Commit fcde931e authored by Charles Girardot's avatar Charles Girardot

Merge branch 'master' of git.embl.de:grp-gbcs/Je

parents c2e22edd e9754a8e
Copyright 2016 GBCS-EMBL
MIT License
Copyright (c) 2016 GBCS-EMBL
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
associated documentation files (the "Software"), to deal in the Software without restriction,
......@@ -11,4 +13,4 @@ The above copyright notice and this permission notice shall be included in all c
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Je
The main public repository is at [github](https://github.com/gbcs-embl/Je/) where issues or pull request can be created.
Additional documentation and support can be found at http://gbcs.embl.de/je
## Installation
* Install from the bioconda channel with `` conda install -c bioconda je-suite ``
* Or, download the ``je_<version>.tar.gz`` from the ``dist/`` directory and unpack
## The Je tool suite
Je currently offers 4 tools:
* **je clip**
to remove UMIs contained in reads of fastq files that do not need sample demultiplexing
* **je demultiplex**
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not)
* **je demultiplex-illu**
to demultiplex fastq files according to associated index files (contain the sample encoding barcodes).
Reads can additionally contain UMIs (inline)
* **je markdupes**
to filter BAM files for read duplicates taking UMIs into account
### Distributions
* ``dist/``
contains the different Je versions for download
* Bioconda
starting from version 1.2 je-suite can be installed through conda: https://anaconda.org/bioconda/je-suite
### Source
* ``src/shell/je``
is the wrapper script to call ``java -jar je_1.0_bundle.jar``
* ``src/galaxy/``
contains the Je wrappers for Galaxy
* ``src/test/``
holds the different test data
Je
--
Additional documentation and support can be found at http://gbcs.embl.de/je
The Je tool suite
=================
Contains
++++++++
Je currently offers 4 tools :
**je clip**
to remove UMIs contained in reads of fastq files that do not need sample demultiplexing
**je demultiplex**
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not)
**je demultiplex-illu**
to demultiplex fastq files according to associated index files (contain the sample encoding barcodes).
Reads can additionally contain UMIs (inline)
**je markdupes**
to filter BAM files for read duplicates taking UMIs into account
Distributions
++++++
dist/
contains the different Je versions for download
Source
++++++
src/shell/je
is the wrapper script to call ``java -jar je_1.0_bundle.jar``
src/galaxy/
contains the Je wrappers for Galaxy
src/test/
holds the different test data
\ No newline at end of file
......@@ -5,7 +5,7 @@ description: The Je tool suite
name: je
owner: gbcs-embl-heidelberg
homepage_url: http://gbcs.embl.de/Je
remote_repository_url: https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy
remote_repository_url: https://github.com/gbcs-embl/Je/tree/master/src/galaxy
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for Je tool: {{ tool_name }}"
......
#!/bin/sh
# Wrapper around je_1.1_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.2_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
......@@ -3,11 +3,12 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
</stdio>
<version_command>echo '1.0'</version_command>
<command interpreter="bash">
<expand macro="version_command" />
<command>
<![CDATA[
je clip
......@@ -85,8 +86,7 @@
</test>
</tests>
<help>
<help>
<![CDATA[
**What it does**
......@@ -242,6 +242,6 @@ This is an exhaustive list of options::
Default value: null.
]]>
</help>
</help>
<expand macro="citations"/>
</tool>
......@@ -3,11 +3,12 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
</stdio>
<version_command>echo '1.0'</version_command>
<command interpreter="bash">
<expand macro="version_command" />
<command>
<![CDATA[
je demultiplex-illu
......@@ -93,8 +94,9 @@
<param name="MM" value="3"/>
<param name="MMD" value="2"/>
<param name="Q" value="20"/>
<param name="DIAG" value="false"/>
<output name="METRICS_FILE_NAME" file="illu_summary_PE.txt" ftype="tabular" lines_diff="4">
<param name="DIAG" value="false"/>
<output name="METRICS_FILE_NAME" file="illu_summary_PE.txt" ftype="tabular" lines_diff="4"/>
<output name="DEMULTIPLEX_RESULTS" ftype="tabular">
<discovered_dataset designation="unassigned_1" file="illu_unassigned_1_PE.txt" />
<discovered_dataset designation="unassigned_2" file="illu_unassigned_2_PE.txt" />
<discovered_dataset designation="emb681m5_GGACTCCTCTCTCTAT_2" file="emb681m5_GGACTCCTCTCTCTAT_2.txt"/>
......@@ -111,8 +113,7 @@
</test>
</tests>
<help>
<help>
<![CDATA[
**What it does**
......@@ -124,6 +125,8 @@ Author: Charles Girardot (charles.girardot@embl.de).
Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
With contributions by: Mehmet Tekman (@mtekman)
------
**Know what you are doing**
......@@ -423,6 +426,6 @@ This is an exhaustive list of options::
Default value: null.
]]>
</help>
</help>
<expand macro="citations"/>
</tool>
......@@ -3,11 +3,12 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
</stdio>
<version_command>echo '1.0'</version_command>
<command interpreter="bash">
<expand macro="version_command" />
<command>
<![CDATA[
je demultiplex
......@@ -59,7 +60,8 @@
<param name="type" value="single"/>
<param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
<param name="BARCODE_FILE" value="barcodes_SE.txt" ftype="tabular"/>
<output name="METRICS_FILE_NAME" file="summary_SE.txt" ftype="tabular" lines_diff="4">
<output name="METRICS_FILE_NAME" file="summary_SE.txt" ftype="tabular" lines_diff="4"/>
<output name="DEMULTIPLEX_RESULTS" ftype="tabular">
<discovered_dataset designation="unassigned_1" file="unassigned_1_SE.txt" />
</output>
</test>
......@@ -73,10 +75,12 @@
<param name="BM" value="BOTH"/>
<param name="BRED" value="false"/>
<param name="COLLECT_OUTPUTS" value="false" />
<param name="barcode_list_type_con" value="text"/>
<param name="barcode_text"
value="sample1 CACTGT:GTATAG&#10;sample2 ATTCCG:TCCGTC&#10;sample3 GCTACC:TGGTCA&#10;sample4 CGAAAC:CACTGT"/>
<output name="METRICS_FILE_NAME" file="summary_PE.txt" ftype="tabular" lines_diff="4">
<output name="METRICS_FILE_NAME" file="summary_PE.txt" ftype="tabular" lines_diff="4"/>
<output name="DEMULTIPLEX_RESULTS" ftype="tabular">
<discovered_dataset designation="unassigned_1" file="unassigned_1_PE.txt" />
<discovered_dataset designation="unassigned_2" file="unassigned_2_PE.txt" />
<discovered_dataset designation="sample4_CGAAACCACTGT_2" file="sample4_CGAAACCACTGT_2.txt"/>
......@@ -89,10 +93,36 @@
<discovered_dataset designation="sample1_CACTGTGTATAG_1" file="sample1_CACTGTGTATAG_1.txt"/>
</output>
</test>
</tests>
<test>
<!-- Repeat of previous but with collection outputs -->
<param name="type" value="paired"/>
<param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
<param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/>
<param name="BPOS" value="BOTH"/>
<param name="BM" value="BOTH"/>
<param name="BRED" value="false"/>
<param name="barcode_list_type_con" value="text"/>
<param name="barcode_text"
value="sample1 CACTGT:GTATAG&#10;sample2 ATTCCG:TCCGTC&#10;sample3 GCTACC:TGGTCA&#10;sample4 CGAAAC:CACTGT"/>
<param name="COLLECT_OUTPUTS" value="true" />
<output_collection name="COLLECTION_1" type="list">
<element name="sample1_CACTGTGTATAG_1.txt" value="sample4_CGAAACCACTGT_1.txt"/>
<element name="sample3_GCTACCTGGTCA_1.txt" value="sample3_GCTACCTGGTCA_1.txt"/>
<element name="sample2_ATTCCGTCCGTC_1.txt" value="sample2_ATTCCGTCCGTC_1.txt"/>
<element name="sample1_CACTGTGTATAG_1.txt" value="sample1_CACTGTGTATAG_1.txt"/>
</output_collection>
<output_collection name="COLLECTION_2" type="list">
<element name="sample4_CGAAACCACTGT_2.txt" value="sample4_CGAAACCACTGT_2.txt"/>
<element name="sample3_GCTACCTGGTCA_2.txt" value="sample3_GCTACCTGGTCA_2.txt"/>
<element name="sample2_ATTCCGTCCGTC_2.txt" value="sample2_ATTCCGTCCGTC_2.txt"/>
<element name="sample1_CACTGTGTATAG_2.txt" value="sample1_CACTGTGTATAG_2.txt"/>
</output_collection>
</test>
</tests>
<help>
<help>
<![CDATA[
**What it does**
......@@ -104,6 +134,8 @@ Author: Charles Girardot (charles.girardot@embl.de).
Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
With contributions by: Mehmet Tekman (@mtekman)
------
**Know what you are doing**
......@@ -434,6 +466,6 @@ This is an exhaustive list of options::
Default value: null.
]]>
</help>
</help>
<expand macro="citations"/>
</tool>
......@@ -3,11 +3,12 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<stdio>
<exit_code range="1:" level="fatal" description="Tool exception" />
</stdio>
<version_command>echo '1.0'</version_command>
<command interpreter="bash">
<expand macro="version_command" />
<command>
<![CDATA[
je markdupes
......@@ -182,8 +183,7 @@
</test>
</tests>
<help>
<help>
<![CDATA[
**What it does**
......@@ -403,6 +403,6 @@ This is an exhaustive list of options::
to clear the default value.
]]>
</help>
</help>
<expand macro="citations"/>
</tool>
<macros>
<token name="@VERSION_STRING@">1.2</token>
<token name="@VERSION_STRING@">1.2.1</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="1.2">je-suite</requirement>
</requirements>
</xml>
<xml name="version_command">
<version_command>je version 2&gt;&amp;1 | tail -n 1</version_command>
</xml>
<token name="@single_or_paired_cmd@">
#if str( $library.type ) == "single":
......@@ -19,8 +27,8 @@
<xml name="paired_options">
<when value="paired">
<param name="input_1" format="fastq,gz" type="data" label="FASTQ file #1"/>
<param name="input_2" format="fastq,gz" type="data" label="FASTQ file #2"/>
<param name="input_1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data" label="FASTQ file #1"/>
<param name="input_2" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data" label="FASTQ file #2"/>
<yield />
<param name="SAME_HEADERS" type="boolean"
label="Ensure headers of both reads of a pair are identical (SAME_HEADERS)"
......@@ -29,7 +37,7 @@
falsevalue="false" checked="false"/>
</when>
<when value="paired_collection">
<param name="input_1" format="fastq,gz" type="data_collection" collection_type="paired"
<param name="input_1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired"
label="FASTQ Paired Dataset"/>
<yield />
<param name="SAME_HEADERS" type="boolean"
......@@ -62,18 +70,18 @@
<option value="paired_collection">Paired-end Dataset Collection</option>
</param>
<when value="single">
<param name="input_1" format="fastq,gz" type="data" label="FASTQ file"/>
<param name="I1" type="data" format="fastq,gz" label="First Index File"/>
<param name="input_1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data" label="FASTQ file"/>
<param name="I1" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="First Index File"/>
</when>
<expand macro="paired_options">
<param name="I1" type="data" format="fastq,gz" label="First Index File"/>
<param name="I1" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="First Index File"/>
<conditional name="I2_CONDITIONAL">
<param name="I2_AVAILABLE" type="select" label="Do you have a second index file?">
<option value="true">Yes</option>
<option value="false" selected="true">No</option>
</param>
<when value="true">
<param name="I2" type="data" format="fastq,gz" label="Second Index File"/>
<param name="I2" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" label="Second Index File"/>
<expand macro="bred_conditional"/>
</when>
<when value="false"/>
......@@ -90,7 +98,7 @@
<option value="paired_collection">Paired-end Dataset Collection</option>
</param>
<when value="single">
<param name="input_1" format="fastq,gz" type="data" label="FASTQ file"/>
<param name="input_1" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" type="data" label="FASTQ file"/>
</when>
<expand macro="paired_options">
<yield />
......@@ -186,7 +194,7 @@
<when value="text">
<param name="barcode_text" type="text" area="True" size="10x30"
value="sample_name\tbarcode\n" label="Barcode List"
value="sample_name &lt;tab&gt; barcode" label="Barcode List"
help="Barcode list format: one sample per line. First column is sample name and second is the
barcode, separated by tab or space. A third and fourth column can be used to specify the resulting
file names. See help below.">
......@@ -270,19 +278,19 @@ ${from_text_area}</configfile>
</xml>
<token name="@demultiplexer_common_output_options_cmd@">
KEEP_UNASSIGNED_READ=$KEEP_UNASSIGNED_READ
STATS_ONLY=${adv_options.STATS_ONLY}
#if str( $adv_options.DIAG ) == "true":
KEEP_UNASSIGNED_READ=${KEEP_UNASSIGNED_READ}
STATS_ONLY=${STATS_ONLY}
#if str( $DIAG ) == "true":
BARCODE_DIAG_FILE=$BARCODE_DIAG_FILE
#end if
</token>
<xml name="demultiplexer_common_output_options">
<param name="KEEP_UNASSIGNED_READ" type="boolean" label="Keep unassigned reads (KEEP_UNASSIGNED_READ)"
truealue="true"
falsevalue="false"
checked="true"
/>
<section name="adv_options" title="Advanced Options" expanded="False">
<!--<section name="output_options" title="Output Options" expanded="True">-->
<param name="COLLECT_OUTPUTS" type="boolean"
truevalue="true" falsevalue="false" checked="false"
label="Output forward and reverse reads in dataset collections" />
<param name="KEEP_UNASSIGNED_READ" type="boolean" label="Keep unassigned reads (KEEP_UNASSIGNED_READ)"
truevalue="true" falsevalue="false" checked="true"/>
<param name="DIAG" type="boolean" label="Output barcode match reporting file (DIAG)"
truevalue="true" falsevalue="false" checked="false"
help="This file will
......@@ -292,7 +300,7 @@ ${from_text_area}</configfile>
<param name="STATS_ONLY" type="boolean" label="Only produce metric and diagnostic reports (STATS_ONLY)"
truevalue="true" falsevalue="false" checked="false"
help="do not demultiplex." />
</section>
<!--</section>-->
</xml>
<token name="@common_options_cmd@">
......@@ -317,24 +325,36 @@ ${from_text_area}</configfile>
help="Set to empty or null for no replacement."
value=":"/>
<param name="GZ" type="boolean" hidden="true" label="Compress output (GZ)"
<param name="GZ" type="hidden" label="Compress output (GZ)"
help="will result in fastq.gz files."
truevalue="true"
falsevalue="false"
checked="false"/>
value="false"/>
</xml>
<token name="@demultiplexer_common_outputs_cmd@">
METRICS_FILE_NAME=$METRICS_FILE_NAME
</token>
<xml name="demultiplexer_common_outputs">
<data name="METRICS_FILE_NAME" format="tabular" label="Je-Demultiplex result">
<!--<discover_datasets pattern="(?P&lt;name&gt;.*)\.txt" ext="fastqsanger"-->
<data name="DEMULTIPLEX_RESULTS" format="tabular" label="Je-Demultiplex result placeholder" hidden="true">
<filter>COLLECT_OUTPUTS == False and STATS_ONLY == False</filter>
<discover_datasets pattern="(?P&lt;name&gt;.*)\.txt" directory="results" visible="true" ext="fastqsanger"/>
</data>
<data name="METRICS_FILE_NAME" format="tabular" label="Je-Demultiplex Metrics"/>
<data name="BARCODE_DIAG_FILE" format="tabular" label="Barcode statistics">
<filter>(adv_options['DIAG'] == 'true')</filter>
<filter>DIAG</filter>
</data>
<collection name="COLLECTION_1" type="list" label="${tool.name} on ${on_string} : Reads_1" >
<filter>COLLECT_OUTPUTS and STATS_ONLY == False</filter>
<discover_datasets pattern="(?P&lt;designation&gt;.+_1\..+)" ext="fastqsanger" directory="results" visible="false" />
</collection>
<collection name="COLLECTION_2" type="list" label="${tool.name} on ${on_string} : Reads_2" >
<filter>COLLECT_OUTPUTS and STATS_ONLY == False</filter>
<discover_datasets pattern="(?P&lt;designation&gt;.+_2\..+)" ext="fastqsanger" directory="results" visible="false" />
</collection>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1186/s12859-016-1284-2</citation>
</citations>
</xml>
</macros>
......@@ -4,8 +4,8 @@ Processed Reads (pairs) 25
Assigned Reads (pairs) 10
Unassigned Reads (pairs) 15
# Individual sample read (pair) counts :
emb681m1 2
emb681m4 2
emb681m5 2
emb6801m2 2
emb681m1 2
emb6801m1 2
emb6801m2 2
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment