Commit d46acaec authored by Charles Girardot's avatar Charles Girardot

merge conflicts resolution

parents fcde931e 62c01919
......@@ -5,3 +5,4 @@ target/
.DS_Store
._*
embl.properties
test.properties
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Je</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
eclipse.preferences.version=1
org.eclipse.jdt.ui.javadoc=false
org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\n * @return the ${bare_field_name}\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\n * @param ${param} the ${bare_field_name} to set\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\n * \n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\n * @author ${user}\n *\n * ${tags}\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\n * \n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\n * ${see_to_overridden}\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\n * ${tags}\n * ${see_to_target}\n */</template><template autoinsert\="false" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">/*\n * The MIT License\n *\n * Copyright (c) 2009 The Broad Institute\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the "Software"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions\:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n${filecomment}\n${package_declaration}\n\n${typecomment}\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
......@@ -11,24 +11,40 @@ Additional documentation and support can be found at http://gbcs.embl.de/je
## The Je tool suite
Je currently offers 4 tools:
Je currently offers the following tools:
* **je debarcode**
demultiplexes multi-samples fastq files using user-defined input *read-layouts* and write output files following user-defined *output-layouts*.
Replaces both **demultiplex-illu** and **demultiplex** since version 2.0.
* **je dropseq**
to process drop-seq results: clips cell barcode and UMI from read 1 and adds them to header of read 2 (a unique output fastq is created).
* **je retag**
extracts barcode(s) and UMI sequence(s) embedded in read names of a BAM file and migrate them to proper BAM tags.
* **je clip**
to remove UMIs contained in reads of fastq files that do not need sample demultiplexing
* **je markdupes**
filters BAM files for read duplicates taking UMIs into account.
* **je demultiplex**
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not)
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not). Deprecated since version 2.0 (use *je debarcode* instead).
* **je demultiplex-illu**
to demultiplex fastq files according to associated index files (contain the sample encoding barcodes).
Reads can additionally contain UMIs (inline)
Reads can additionally contain UMIs (inline). Deprecated since version 2.0 (use *je debarcode* instead).
* **je markdupes**
to filter BAM files for read duplicates taking UMIs into account
### Distributions
......@@ -45,7 +61,7 @@ Je currently offers 4 tools:
* ``src/shell/je``
is the wrapper script to call ``java -jar je_1.0_bundle.jar``
is the wrapper script to call ``java -jar je_*_bundle.jar``
* ``src/galaxy/``
......
1. Create a dir with :
* the je wrapper
* the log4j.xml
* the Je bundle jar
2. Make sure the je wrapper calls the correct jar
3. tar czf <tarname>.tar.gz <dirname>/*
e.g.
`tar czf je_2.0.RC.tar.gz je_2.0.RC/*`
\ No newline at end of file
#!/bin/sh
# Wrapper around je_1.1_bundle.jar
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.0_bundle.jar"
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
......
#!/bin/sh
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd-MM-yy HH:mm:ss} %-5p [%t] %c{1}.%M(%L) | %m%n" />
</layout>
</appender>
<root>
<level value="INFO" />
<appender-ref ref="CONSOLE" />
</root>
</log4j:configuration>
#jars found in this folder are artifact that are not found in maven central, you can then puch them in your local maven repo with the following commands:
#ADAPT fpath to YOUR Je/lib
LIBPATH="/Users/girardot/Work/eclipse_ws/Je/lib/"
#ADAPT path to YOUR Je/lib
LIBPATH="/Users/girardot/git/Je/lib/"
cd ~/.m2
mvn install:install-file -DgroupId=net.sf -DartifactId=htsjdk -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/htsjdk-1.140.jar -Dpackaging=jar -DgeneratePom=true
mvn install:install-file -DgroupId=net.sf -DartifactId=picard -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/picard.jar -Dpackaging=jar -DgeneratePom=true
mvn install:install-file -DgroupId=org.broadinstitute -DartifactId=picard -Dversion=2.9.4 -Dfile=$LIBPATH/picard_2.9.4.jar -Dpackaging=jar -DgeneratePom=true
# Uncomment to ADD GBCS artifacts if needed (ie if you don t have access to these repos)
# IF you are at embl, you rather want to checkout the relevant projects and build them locally
......
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>Je</groupId>
<artifactId>Je</artifactId>
<version>1.2</version>
<version>2.0.RC</version>
<name>Je</name>
<description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description>
......@@ -233,34 +233,9 @@
<dependency>
<groupId>org.embl.cg.utilitytools</groupId>
<artifactId>ut_utils</artifactId>
<version>1.0</version>
</dependency>
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>picard</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>htsjdk</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<dependency>
<groupId>net.sf</groupId>
<artifactId>picard</artifactId>
<version>1.140custom</version>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>net.sf</groupId>
<artifactId>htsjdk</artifactId>
<version>1.140custom</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
......@@ -291,6 +266,11 @@
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.broadinstitute</groupId>
<artifactId>picard</artifactId>
<version>2.9.4</version>
</dependency>
</dependencies>
......
......@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je.jemultiplexer;
package org.embl.gbcs.je;
import java.io.IOException;
import java.io.InputStream;
......
......@@ -21,13 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je.jemultiplexer;
package org.embl.gbcs.je;
/** Utility class to hang onto data about the best match for a given barcode */
public class BarcodeMatch {
/**
* indicates if a barcode match has been found, in which case 'barcode' is not null
* indicates if this barcode match fullfils the thresholds for barcode matching
*/
public boolean matched;
......@@ -35,6 +37,12 @@ public class BarcodeMatch {
* sequence of the matched barcode
*/
public String barcode;
/**
* sequence extracted from read
*/
public String readSequence;
/**
* number of mismatches with 'barcode'
......@@ -48,8 +56,9 @@ public class BarcodeMatch {
public String toString(){
if(matched)
return "matched :"+ barcode+" [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]";
return "no match";
return "Match for "+readSequence+ " read sequence : barcode "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]";
else
return "No Match for "+readSequence+ " read sequence (best barcode is "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"])";
}
}
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
/**
*
* Describes how and what to write in FASTQ output files when demultiplexing input FASTQ files
* More precisely, it describes the output file layout(s) using the slots defined in read layouts e.g. '<BARCODE1><UMI1><UMI2>' or '<SAMPLE1>'
* Each FastqWriterLayout needs two of these descriptors :
* 1. One describing how to write the read sequence e.g. '<SAMPLE1>' (only writes the sample sequence) or '<BARCODE1><SAMPLE1>' to also
* keep the barcode in the output read sequence (and qualities)
* 2. A second one describing how to write the read name (header) e.g. '<BARCODE1><UMI1><UMI2>' to add the barcode and two extracted UMIs
* in the final read name, in addition to the original read name (ie header up to the space). Here each written slot is separated with ':' by default
*
*
* Note that in case of barcode, one might want to write the barcode or the read sequence corresponding to the looked up sample barcode.
*
* The possible keys are :<br/>
* <ul>
* <li>SAMPLEn : refers to the SAMPLE slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>UMIn : refers to the UMI slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>BARCODEn : refers to the sample barcode resolved from the read sequence found in the of the BARCODE slot with idx 'n' defined in the {@link ReadLayout} objects</li>
* <li>READBARn : refers to the read sequence found in the BARCODE slot with idx 'n' defined in the {@link ReadLayout} objects ; this is only valida in read name layout
* i.e. read sequence always contain original sequence</li>
* </ul>
*
* Note that a short layout format can also be used like 'B1', 'U2', 'S1' or 'R1' instead of '<BARCODE1>' , '<UMI2>' , '<SAMPLE1>' and <READBAR>; respectively.
* For example, 'B1U1U2' is the same as '<BARCODE1><UMI1><UMI2>'.
*
* Technically speaking, the short layout format is the only one used.
*
* @author girardot
*
*/
public class FastqWriterLayout {
private static Logger log = LoggerFactory.getLogger(FastqWriterLayout.class);
public static final String DEFAULT_READNAME_DELIMITOR = ":";
private static final String LONG_LAYOUT_REGEX = "^(<?(BARCODE|UMI|SAMPLE|READBAR)\\d+>?)+$";
private static final String SHORT_LAYOUT_REGEX = "^([BUSR]\\d+)+$";
/**
* char to use to delineate slots in read name ; if needed
*/
protected String readNameDelimitor = DEFAULT_READNAME_DELIMITOR;
/**
* Should the quality string be injected into read name together with READBAR and UMI slots ?
*/
protected boolean withQualityInReadName = false;
/**
* Layout for writing the read sequence ; in short format
*/
protected String readSequenceLayout;
/**
* Consumer associated with the readSequenceLayout
*/
protected ReadLayoutConsumer sequenceConsumer;
/**
* Layout for writing the read name ; in short format
*/
protected String readNameLayout;
/**
* Consumer associated with the readNameLayout
*/
protected ReadLayoutConsumer readNameConsumer ;
/**
* All the {@link ReadLayout} defined in the demultiplexing ; used to find how to extract needed information
*/
protected ReadLayout [] readLayouts;
/**
* the {@link FastqQualityFormat} of the input fastq files
*/
protected FastqQualityFormat fastqQualityFormat = null;
/**
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param convertBarcodeToReadbar if true all BARCODE slots are converted to READBAR in the readNameLayout (BARCODE == READBAR in readSequenceLayout)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(
final String readSequenceLayout,
final String readNameLayout,
final ReadLayout [] readLayouts,
final boolean withQualityInReadName,
final String readNameDelimitor,
final boolean convertBarcodeToReadbar,
final FastqQualityFormat fastqQualityFormat) {
this.readNameLayout = (StringUtils.isBlank(readNameLayout) ? null : convertToShortLayout(readNameLayout));
this.readSequenceLayout = convertToShortLayout(readSequenceLayout);
this.readLayouts = readLayouts;
this.withQualityInReadName = withQualityInReadName;
this.readNameDelimitor = readNameDelimitor;
if(convertBarcodeToReadbar && readNameLayout!=null) {
this.readNameLayout = this.readNameLayout.replaceAll("B", "R");
}
this.fastqQualityFormat = fastqQualityFormat;
init(); //build all maps for easy lookup
}
/**
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final FastqQualityFormat fastqQualityFormat) {
this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false, fastqQualityFormat);
}
/**
* @param layout
* @return
*/
private String convertToShortLayout(final String layout) {
log.debug("given layout : "+layout);
if(StringUtils.isBlank(layout))
return layout;
String shortLayout = layout;
if(!Pattern.matches(SHORT_LAYOUT_REGEX, layout)){
if(!Pattern.matches(LONG_LAYOUT_REGEX, layout))
throw new LayoutMalformedException("FASTQ Output Layout does not match expected short ("+SHORT_LAYOUT_REGEX+") nor long ("+LONG_LAYOUT_REGEX+") formats", layout);
//convert to short
shortLayout = shortLayout.replaceAll("<", "");
shortLayout = shortLayout.replaceAll(">", "");
shortLayout = shortLayout.replaceAll("ARCODE", "");
shortLayout = shortLayout.replaceAll("MI", "");
shortLayout = shortLayout.replaceAll("AMPLE", "");
shortLayout = shortLayout.replaceAll("EADBAR", "");
}
log.debug("short layout : "+shortLayout);
return shortLayout;
}
/**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s) ; this method should be used when a barcode has been matched
*
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param sampleMatch a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch sampleMatch ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName();
if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads, sampleMatch);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
return ass;
}
/**
* Convenient wrapper for single end configuration
* @param read the {@link FastqRecord} from the input fastq file
* @param m a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord read, SampleMatch m ){
return assembleRecord(new FastqRecord[]{read}, m);
}
/**
*
*/
protected void init(){
/*
* Process (short format) layout for easy output assembly at FASTQ writing time
*/
// do for read seq
if(!Pattern.matches(SHORT_LAYOUT_REGEX, this.readSequenceLayout)){
throw new LayoutMalformedException("FASTQ Output Layout for read sequence does not match expected short format (regex is :"+SHORT_LAYOUT_REGEX+")", this.readSequenceLayout);
}
sequenceConsumer = new ReadLayoutConsumer(this.readSequenceLayout, this.readLayouts);
// do for read name if not null
if( this.readNameLayout != null){
if(!Pattern.matches(SHORT_LAYOUT_REGEX, this.readNameLayout)){
throw new LayoutMalformedException("FASTQ Output Layout for read name does not match expected short format (regex is :"+SHORT_LAYOUT_REGEX+")", this.readNameLayout);
}
readNameConsumer = new ReadLayoutConsumer(this.readNameLayout, this.readLayouts, this.withQualityInReadName , this.readNameDelimitor, this.fastqQualityFormat);
}
}
/**
* @param reads
* @param readLayouts
* @return
*/
public static Map<Integer, List<FastqRecord>> extractBarcodeSlots(FastqRecord[] reads, ReadLayout[] readLayouts) {
Map<Integer, List<FastqRecord>> m = new HashMap<Integer, List<FastqRecord>>();
/*
* for each read layout
*/
for (int i = 0; i < readLayouts.length; i++) {
//find those with BARCODE slot(s)
if(readLayouts[i].containsBarcode()){
//extract the BARCODE subsequence
String [] sequences = readLayouts[i].extractBarcodes(reads[i].getReadString());
//and the corresponding quality strings
String [] qualities = readLayouts[i].extractBarcodes(reads[i].getBaseQualityString());
//and the BARCODE idx corresponding to them
List<Integer> barcodeBlockIds = readLayouts[i].getOrderedBarcodeBlockUniqueIds();
//save each subsequence/quality as a FastqRecord in the list corresponding to the BARCODE idx
for (int j = 0; j < barcodeBlockIds.size(); j++) {
int blockId = barcodeBlockIds.get(j);
if(!m.containsKey(blockId))
m.put(blockId, new ArrayList<FastqRecord>());
FastqRecord fr = new FastqRecord(null, sequences[j], null, qualities[j]);
log.debug("Extracted Barcode : "+fr.toFastQString());
m.get(blockId).add(
fr
);
}
}
}
return m;
}
/**
* @return the readNameDelimitor
*/
public String getReadNameDelimitor() {
return readNameDelimitor;
}
/**
* @param readNameDelimitor the readNameDelimitor to set
*/
public void setReadNameDelimitor(String readNameDelimitor) {
this.readNameDelimitor = readNameDelimitor;
}
/**
* @return the withQualityInReadName
*/
public boolean isWithQualityInReadName() {
return withQualityInReadName;
}