Commit d46acaec authored by Charles Girardot's avatar Charles Girardot

merge conflicts resolution

parents fcde931e 62c01919
...@@ -5,3 +5,4 @@ target/ ...@@ -5,3 +5,4 @@ target/
.DS_Store .DS_Store
._* ._*
embl.properties embl.properties
test.properties
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Je</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
eclipse.preferences.version=1
org.eclipse.jdt.ui.javadoc=false
org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\n * @return the ${bare_field_name}\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\n * @param ${param} the ${bare_field_name} to set\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\n * \n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\n * @author ${user}\n *\n * ${tags}\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\n * \n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\n * ${see_to_overridden}\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\n * ${tags}\n * ${see_to_target}\n */</template><template autoinsert\="false" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">/*\n * The MIT License\n *\n * Copyright (c) 2009 The Broad Institute\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the "Software"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions\:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n${filecomment}\n${package_declaration}\n\n${typecomment}\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
...@@ -11,24 +11,40 @@ Additional documentation and support can be found at http://gbcs.embl.de/je ...@@ -11,24 +11,40 @@ Additional documentation and support can be found at http://gbcs.embl.de/je
## The Je tool suite ## The Je tool suite
Je currently offers 4 tools: Je currently offers the following tools:
* **je debarcode**
demultiplexes multi-samples fastq files using user-defined input *read-layouts* and write output files following user-defined *output-layouts*.
Replaces both **demultiplex-illu** and **demultiplex** since version 2.0.
* **je dropseq**
to process drop-seq results: clips cell barcode and UMI from read 1 and adds them to header of read 2 (a unique output fastq is created).
* **je retag**
extracts barcode(s) and UMI sequence(s) embedded in read names of a BAM file and migrate them to proper BAM tags.
* **je clip** * **je clip**
to remove UMIs contained in reads of fastq files that do not need sample demultiplexing to remove UMIs contained in reads of fastq files that do not need sample demultiplexing
* **je markdupes**
filters BAM files for read duplicates taking UMIs into account.
* **je demultiplex** * **je demultiplex**
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not) to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not). Deprecated since version 2.0 (use *je debarcode* instead).
* **je demultiplex-illu** * **je demultiplex-illu**
to demultiplex fastq files according to associated index files (contain the sample encoding barcodes). to demultiplex fastq files according to associated index files (contain the sample encoding barcodes).
Reads can additionally contain UMIs (inline) Reads can additionally contain UMIs (inline). Deprecated since version 2.0 (use *je debarcode* instead).
* **je markdupes**
to filter BAM files for read duplicates taking UMIs into account
### Distributions ### Distributions
...@@ -45,7 +61,7 @@ Je currently offers 4 tools: ...@@ -45,7 +61,7 @@ Je currently offers 4 tools:
* ``src/shell/je`` * ``src/shell/je``
is the wrapper script to call ``java -jar je_1.0_bundle.jar`` is the wrapper script to call ``java -jar je_*_bundle.jar``
* ``src/galaxy/`` * ``src/galaxy/``
......
1. Create a dir with :
* the je wrapper
* the log4j.xml
* the Je bundle jar
2. Make sure the je wrapper calls the correct jar
3. tar czf <tarname>.tar.gz <dirname>/*
e.g.
`tar czf je_2.0.RC.tar.gz je_2.0.RC/*`
\ No newline at end of file
#!/bin/sh #!/bin/sh
# Wrapper around je_1.1_bundle.jar # Wrapper around je_*_bundle.jar
# where are we stored ? # where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR # echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script # path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.0_bundle.jar" JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS # set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'} _JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
......
#!/bin/sh
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd-MM-yy HH:mm:ss} %-5p [%t] %c{1}.%M(%L) | %m%n" />
</layout>
</appender>
<root>
<level value="INFO" />
<appender-ref ref="CONSOLE" />
</root>
</log4j:configuration>
#jars found in this folder are artifact that are not found in maven central, you can then puch them in your local maven repo with the following commands: #jars found in this folder are artifact that are not found in maven central, you can then puch them in your local maven repo with the following commands:
#ADAPT fpath to YOUR Je/lib #ADAPT path to YOUR Je/lib
LIBPATH="/Users/girardot/Work/eclipse_ws/Je/lib/" LIBPATH="/Users/girardot/git/Je/lib/"
cd ~/.m2 cd ~/.m2
mvn install:install-file -DgroupId=net.sf -DartifactId=htsjdk -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/htsjdk-1.140.jar -Dpackaging=jar -DgeneratePom=true mvn install:install-file -DgroupId=org.broadinstitute -DartifactId=picard -Dversion=2.9.4 -Dfile=$LIBPATH/picard_2.9.4.jar -Dpackaging=jar -DgeneratePom=true
mvn install:install-file -DgroupId=net.sf -DartifactId=picard -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/picard.jar -Dpackaging=jar -DgeneratePom=true
# Uncomment to ADD GBCS artifacts if needed (ie if you don t have access to these repos) # Uncomment to ADD GBCS artifacts if needed (ie if you don t have access to these repos)
# IF you are at embl, you rather want to checkout the relevant projects and build them locally # IF you are at embl, you rather want to checkout the relevant projects and build them locally
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>Je</groupId> <groupId>Je</groupId>
<artifactId>Je</artifactId> <artifactId>Je</artifactId>
<version>1.2</version> <version>2.0.RC</version>
<name>Je</name> <name>Je</name>
<description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description> <description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description>
...@@ -233,34 +233,9 @@ ...@@ -233,34 +233,9 @@
<dependency> <dependency>
<groupId>org.embl.cg.utilitytools</groupId> <groupId>org.embl.cg.utilitytools</groupId>
<artifactId>ut_utils</artifactId> <artifactId>ut_utils</artifactId>
<version>1.0</version> <version>1.0.1</version>
</dependency>
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>picard</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>htsjdk</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<dependency>
<groupId>net.sf</groupId>
<artifactId>picard</artifactId>
<version>1.140custom</version>
</dependency> </dependency>
<dependency>
<groupId>net.sf</groupId>
<artifactId>htsjdk</artifactId>
<version>1.140custom</version>
</dependency>
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId> <artifactId>slf4j-api</artifactId>
...@@ -291,6 +266,11 @@ ...@@ -291,6 +266,11 @@
<version>4.11</version> <version>4.11</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.broadinstitute</groupId>
<artifactId>picard</artifactId>
<version>2.9.4</version>
</dependency>
</dependencies> </dependencies>
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
package org.embl.gbcs.je.jemultiplexer; package org.embl.gbcs.je;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
......
...@@ -21,13 +21,15 @@ ...@@ -21,13 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
package org.embl.gbcs.je.jemultiplexer; package org.embl.gbcs.je;
/** Utility class to hang onto data about the best match for a given barcode */ /** Utility class to hang onto data about the best match for a given barcode */
public class BarcodeMatch { public class BarcodeMatch {
/** /**
* indicates if a barcode match has been found, in which case 'barcode' is not null * indicates if this barcode match fullfils the thresholds for barcode matching
*/ */
public boolean matched; public boolean matched;
...@@ -35,6 +37,12 @@ public class BarcodeMatch { ...@@ -35,6 +37,12 @@ public class BarcodeMatch {
* sequence of the matched barcode * sequence of the matched barcode
*/ */
public String barcode; public String barcode;
/**
* sequence extracted from read
*/
public String readSequence;
/** /**
* number of mismatches with 'barcode' * number of mismatches with 'barcode'
...@@ -48,8 +56,9 @@ public class BarcodeMatch { ...@@ -48,8 +56,9 @@ public class BarcodeMatch {
public String toString(){ public String toString(){
if(matched) if(matched)
return "matched :"+ barcode+" [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]"; return "Match for "+readSequence+ " read sequence : barcode "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]";
return "no match"; else
return "No Match for "+readSequence+ " read sequence (best barcode is "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"])";
} }
} }
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
/**
*
* Describes how and what to write in FASTQ output files when demultiplexing input FASTQ files
* More precisely, it describes the output file layout(s) using the slots defined in read layouts e.g. '<BARCODE1><UMI1><UMI2>' or '<SAMPLE1>'
* Each FastqWriterLayout needs two of these descriptors :
* 1. One describing how to write the read sequence e.g. '<SAMPLE1>' (only writes the sample sequence) or '<BARCODE1><SAMPLE1>' to also
* keep the barcode in the output read sequence (and qualities)
* 2. A second one describing how to write the read name (header) e.g. '<BARCODE1><UMI1><UMI2>' to add the barcode and two extracted UMIs
* in the final read name, in addition to the original read name (ie header up to the space). Here each written slot is separated with ':' by default
*
*
* Note that in case of barcode, one might want to write the barcode or the read sequence corresponding to the looked up sample barcode.
*
* The possible keys are :<br/>
* <ul>
* <li>SAMPLEn : refers to the SAMPLE slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>UMIn : refers to the UMI slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>BARCODEn : refers to the sample barcode resolved from the read sequence found in the of the BARCODE slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>READBARn : refers to the read sequence found in the BARCODE slot with idx 'n' defined in the {@link ReadLayout} objects ; this is only valida in read name layout
* i.e. read sequence always contain original sequence</li>
* </ul>
*
* Note that a short layout format can also be used like 'B1', 'U2', 'S1' or 'R1' instead of '<BARCODE1>' , '<UMI2>' , '<SAMPLE1>' and <READBAR>; respectively.
* For example, 'B1U1U2' is the same as '<BARCODE1><UMI1><UMI2>'.
*
* Technically speaking, the short layout format is the only one used.
*
* @author girardot
*
*/
public class FastqWriterLayout {
private static Logger log = LoggerFactory.getLogger(FastqWriterLayout.class);
public static final String DEFAULT_READNAME_DELIMITOR = ":";
private static final String LONG_LAYOUT_REGEX = "^(<?(BARCODE|UMI|SAMPLE|READBAR)\\d+>?)+$";
private static final String SHORT_LAYOUT_REGEX = "^([BUSR]\\d+)+$";
/**
* char to use to delineate slots in read name ; if needed
*/
protected String readNameDelimitor = DEFAULT_READNAME_DELIMITOR;
/**
* Should the quality string be injected into read name together with READBAR and UMI slots ?
*/
protected boolean withQualityInReadName = false;
/**
* Layout for writing the read sequence ; in short format
*/
protected String readSequenceLayout;
/**
* Consumer associated with the readSequenceLayout
*/
protected ReadLayoutConsumer sequenceConsumer;
/**
* Layout for writing the read name ; in short format
*/
protected String readNameLayout;
/**
* Consumer associated with the readNameLayout
*/
protected ReadLayoutConsumer readNameConsumer ;
/**
* All the {@link ReadLayout} defined in the demultiplexing ; used to find how to extract needed information
*/
protected ReadLayout [] readLayouts;
/**
* the {@link FastqQualityFormat} of the input fastq files
*/
protected FastqQualityFormat fastqQualityFormat = null;
/**
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param convertBarcodeToReadbar if true all BARCODE slots are converted to READBAR in the readNameLayout (BARCODE == READBAR in readSequenceLayout)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(
final String readSequenceLayout,
final String readNameLayout,
final ReadLayout [] readLayouts,
final boolean withQualityInReadName,
final String readNameDelimitor,
final boolean convertBarcodeToReadbar,
final FastqQualityFormat fastqQualityFormat) {
this.readNameLayout = (StringUtils.isBlank(readNameLayout) ? null : convertToShortLayout(readNameLayout));
this.readSequenceLayout = convertToShortLayout(readSequenceLayout);
this.readLayouts = readLayouts;
this.withQualityInReadName = withQualityInReadName;
this.readNameDelimitor = readNameDelimitor;
if(convertBarcodeToReadbar && readNameLayout!=null) {
this.readNameLayout = this.readNameLayout.replaceAll("B", "R");
}
this.fastqQualityFormat = fastqQualityFormat;
init(); //build all maps for easy lookup
}
/**
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final FastqQualityFormat fastqQualityFormat) {
this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false, fastqQualityFormat);
}
/**
* @param layout
* @return
*/
private String convertToShortLayout(final String layout) {
log.debug("given layout : "+layout);
if(StringUtils.isBlank(layout))
return layout;
String shortLayout = layout;
if(!Pattern.matches(SHORT_LAYOUT_REGEX, layout)){
if(!Pattern.matches(LONG_LAYOUT_REGEX, layout))
throw new LayoutMalformedException("FASTQ Output Layout does not match expected short ("+SHORT_LAYOUT_REGEX+") nor long ("+LONG_LAYOUT_REGEX+") formats", layout);
//convert to short
shortLayout = shortLayout.replaceAll("<", "");
shortLayout = shortLayout.replaceAll(">", "");
shortLayout = shortLayout.replaceAll("ARCODE", "");
shortLayout = shortLayout.replaceAll("MI", "");
shortLayout = shortLayout.replaceAll("AMPLE", "");
shortLayout = shortLayout.replaceAll("EADBAR", "");
}
log.debug("short layout : "+shortLayout);
return shortLayout;
}
/**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s) ; this method should be used when a barcode has been matched
*
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param sampleMatch a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch sampleMatch ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName();
if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads, sampleMatch);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
return ass;
}
/**
* Convenient wrapper for single end configuration
* @param read the {@link FastqRecord} from the input fastq file
* @param m a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord read, SampleMatch m ){
return assembleRecord(new FastqRecord[]{read}, m);
}
/**
*
*/