Commit e6dcce0a authored by Charles Girardot's avatar Charles Girardot

2.0 RC version. je markdupes does not deal with UMI from BAM TAG (still

expected in read names)
parent 542b8ac9
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Je</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
eclipse.preferences.version=1
org.eclipse.jdt.ui.javadoc=false
org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\n * @return the ${bare_field_name}\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\n * @param ${param} the ${bare_field_name} to set\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\n * \n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\n * @author ${user}\n *\n * ${tags}\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\n * \n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\n * ${see_to_overridden}\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\n * ${tags}\n * ${see_to_target}\n */</template><template autoinsert\="false" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">/*\n * The MIT License\n *\n * Copyright (c) 2009 The Broad Institute\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the "Software"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions\:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n${filecomment}\n${package_declaration}\n\n${typecomment}\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
1. Create a dir with :
* the je wrapper
* the log4j.xml
* the Je bundle jar
2. Make sure the je wrapper calls the correct jar
3. tar czf <tarname>.tar.gz <dirname>/*
e.g.
`tar czf je_2.0.RC.tar.gz je_2.0.RC/*`
\ No newline at end of file
#!/bin/sh #!/bin/sh
# Wrapper around je_1.1_bundle.jar # Wrapper around je_*_bundle.jar
# where are we stored ? # where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR # echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script # path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.1_bundle.jar" JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS # set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'} _JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
......
#!/bin/sh
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd-MM-yy HH:mm:ss} %-5p [%t] %c{1}.%M(%L) | %m%n" />
</layout>
</appender>
<root>
<level value="INFO" />
<appender-ref ref="CONSOLE" />
</root>
</log4j:configuration>
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>Je</groupId> <groupId>Je</groupId>
<artifactId>Je</artifactId> <artifactId>Je</artifactId>
<version>2.0.beta</version> <version>2.0.RC</version>
<name>Je</name> <name>Je</name>
<description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description> <description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description>
......
...@@ -34,6 +34,7 @@ import org.slf4j.Logger; ...@@ -34,6 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import htsjdk.samtools.fastq.FastqRecord; import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
/** /**
* *
...@@ -112,24 +113,32 @@ public class FastqWriterLayout { ...@@ -112,24 +113,32 @@ public class FastqWriterLayout {
*/ */
protected ReadLayout [] readLayouts; protected ReadLayout [] readLayouts;
/** /**
* @param readSequenceLayout * the {@link FastqQualityFormat} of the input fastq files
* @param readNameLayout can be null when the read name should be reused unmodified
* @param readLayouts
*/ */
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor) { protected FastqQualityFormat fastqQualityFormat = null;
this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false);
}
/** /**
* @param readSequenceLayout * @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout * @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts * @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName * @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor * @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param convertBarcodeToReadbar if true all BARCODE slots are converted to READBAR in the readNameLayout (BARCODE == READBAR in readSequenceLayout) * @param convertBarcodeToReadbar if true all BARCODE slots are converted to READBAR in the readNameLayout (BARCODE == READBAR in readSequenceLayout)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/ */
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final boolean convertBarcodeToReadbar) { public FastqWriterLayout(
final String readSequenceLayout,
final String readNameLayout,
final ReadLayout [] readLayouts,
final boolean withQualityInReadName,
final String readNameDelimitor,
final boolean convertBarcodeToReadbar,
final FastqQualityFormat fastqQualityFormat) {
this.readNameLayout = (StringUtils.isBlank(readNameLayout) ? null : convertToShortLayout(readNameLayout)); this.readNameLayout = (StringUtils.isBlank(readNameLayout) ? null : convertToShortLayout(readNameLayout));
this.readSequenceLayout = convertToShortLayout(readSequenceLayout); this.readSequenceLayout = convertToShortLayout(readSequenceLayout);
...@@ -139,24 +148,23 @@ public class FastqWriterLayout { ...@@ -139,24 +148,23 @@ public class FastqWriterLayout {
if(convertBarcodeToReadbar && readNameLayout!=null) { if(convertBarcodeToReadbar && readNameLayout!=null) {
this.readNameLayout = this.readNameLayout.replaceAll("B", "R"); this.readNameLayout = this.readNameLayout.replaceAll("B", "R");
} }
this.fastqQualityFormat = fastqQualityFormat;
init(); //build all maps for easy lookup init(); //build all maps for easy lookup
} }
/** /**
* @param readSequenceLayout * @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout can be null when the read name should be reused unmodified * @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayout * @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/ */
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout readLayout, final boolean withQualityInReadName, final String readNameDelimitor) { public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final FastqQualityFormat fastqQualityFormat) {
this(readSequenceLayout, readNameLayout, new ReadLayout[]{ readLayout }, withQualityInReadName, readNameDelimitor, false); this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false, fastqQualityFormat);
} }
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout readLayout, final boolean withQualityInReadName, final String readNameDelimitor, final boolean convertBarcodeToReadbar) {
this(readSequenceLayout, readNameLayout, new ReadLayout[]{ readLayout }, withQualityInReadName, readNameDelimitor, convertBarcodeToReadbar);
}
/** /**
* @param layout * @param layout
...@@ -186,36 +194,18 @@ public class FastqWriterLayout { ...@@ -186,36 +194,18 @@ public class FastqWriterLayout {
/** /**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s). * Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s) ; this method should be used when a barcode has been matched
* This method also use the read sequence to write BARCODE in read name
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* *
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param sampleMatch a {@link SampleMatch} holding all the barcode matches
* @return * @return
*/ */
public FastqRecord assembleRecord( FastqRecord[] reads ){ public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch sampleMatch ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName();
if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
return ass;
}
/**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s)
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param m a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch m ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads); FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName(); String name = rec.getReadName();
if(readNameConsumer != null) if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads, m); name = readNameConsumer.assembleNewReadName(reads, sampleMatch);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString()); FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString()); log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
...@@ -224,7 +214,7 @@ public class FastqWriterLayout { ...@@ -224,7 +214,7 @@ public class FastqWriterLayout {
/** /**
* Convenient wrapper for single end configuration * Convenient wrapper for single end configuration
* @param read the {@link FastqRecord} from the input fastq file * @param read the {@link FastqRecord} from the input fastq file
* @param m a {@link SampleMatch} holding all the barcode matches * @param m a {@link SampleMatch} holding all the barcode matches
* @return * @return
*/ */
...@@ -255,7 +245,7 @@ public class FastqWriterLayout { ...@@ -255,7 +245,7 @@ public class FastqWriterLayout {
if(!Pattern.matches(SHORT_LAYOUT_REGEX, this.readNameLayout)){ if(!Pattern.matches(SHORT_LAYOUT_REGEX, this.readNameLayout)){
throw new LayoutMalformedException("FASTQ Output Layout for read name does not match expected short format (regex is :"+SHORT_LAYOUT_REGEX+")", this.readNameLayout); throw new LayoutMalformedException("FASTQ Output Layout for read name does not match expected short format (regex is :"+SHORT_LAYOUT_REGEX+")", this.readNameLayout);
} }
readNameConsumer = new ReadLayoutConsumer(this.readNameLayout, this.readLayouts, this.withQualityInReadName, this.readNameDelimitor); readNameConsumer = new ReadLayoutConsumer(this.readNameLayout, this.readLayouts, this.withQualityInReadName , this.readNameDelimitor, this.fastqQualityFormat);
} }
} }
......
...@@ -28,6 +28,8 @@ import java.util.Arrays; ...@@ -28,6 +28,8 @@ import java.util.Arrays;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import htsjdk.samtools.SAMUtils;
public class JeTry { public class JeTry {
private static Logger log = LoggerFactory.getLogger(JeTry.class); private static Logger log = LoggerFactory.getLogger(JeTry.class);
...@@ -36,9 +38,7 @@ public class JeTry { ...@@ -36,9 +38,7 @@ public class JeTry {
} }
public static void main(String[] args) { public static void main(String[] args) {
System.out.println(JeUtils.toBytesThenPhred(
"26242516303031"
));
} }
......
...@@ -26,12 +26,23 @@ package org.embl.gbcs.je; ...@@ -26,12 +26,23 @@ package org.embl.gbcs.je;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.developpez.adiguba.shell.ProcessConsumer; import com.developpez.adiguba.shell.ProcessConsumer;
import htsjdk.samtools.SAMUtils; import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.FastqReader;
import htsjdk.samtools.util.FastqQualityFormat;
import htsjdk.samtools.util.QualityEncodingDetector;
import htsjdk.samtools.util.SolexaQualityConverter;
public class JeUtils { public class JeUtils {
private static Logger log = LoggerFactory.getLogger(JeUtils.class);
/* /*
* BC : for sample barcode, raw or corrected, with QT to store its quality string * BC : for sample barcode, raw or corrected, with QT to store its quality string
*/ */
...@@ -69,7 +80,7 @@ public class JeUtils { ...@@ -69,7 +80,7 @@ public class JeUtils {
public static final String SAMTAG_MI = "MI"; public static final String SAMTAG_MI = "MI";
/**convert a string of quality numbers (each quality has 2 char) to the Phred String /**convert a string of quality numbers in Phred Scale (each quality has 2 char) to the Standard Phred + 33 encoding
* ie * ie
* @param s * @param s
* @return * @return
...@@ -85,6 +96,54 @@ public class JeUtils { ...@@ -85,6 +96,54 @@ public class JeUtils {
} }
/**
* Based on the type of quality scores coming in, converts them to a numeric byte[] in phred scale.
*/
public static void convertQualityToPhred(byte[] quals, final FastqQualityFormat version) {
switch (version) {
case Standard:
SAMUtils.fastqToPhred(quals);
break ;
case Solexa:
SolexaQualityConverter.getSingleton().convertSolexaQualityCharsToPhredBinary(quals);
break ;
case Illumina:
SolexaQualityConverter.getSingleton().convertSolexa_1_3_QualityCharsToPhredBinary(quals);
break ;
}
}
/**
* Looks at fastq input(s) and attempts to determine the proper quality format
*
* Closes the reader(s) by side effect
*
* @param readers readers on the input fastq files
* @param expectedQuality If provided, will be used for sanity checking. If left null, autodetection will occur
*/
public static FastqQualityFormat determineQualityFormat(final FastqReader [] readers, final FastqQualityFormat expectedQuality) {
final QualityEncodingDetector detector = new QualityEncodingDetector();
//add all fastq readers
detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, readers);
//close all readers
for (FastqReader reader : readers) {
reader.close();
}
//
final FastqQualityFormat qualityFormat = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ, expectedQuality);
//in case there is no expected quality and different options were possible, warn user
if (detector.isDeterminationAmbiguous()) {
log.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
}
log.info(String.format("Auto-detected quality format as: %s.", qualityFormat));
return qualityFormat;
}
/** /**
* @return the result of executing whoami on the underlying OS * @return the result of executing whoami on the underlying OS
*/ */
......
...@@ -30,11 +30,13 @@ import java.util.TreeSet; ...@@ -30,11 +30,13 @@ import java.util.TreeSet;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.embl.gbcs.je.demultiplexer.Demultiplexer;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import htsjdk.samtools.SAMUtils; import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.FastqRecord; import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
public class ReadLayoutConsumer { public class ReadLayoutConsumer {
private static Logger log = LoggerFactory.getLogger(ReadLayoutConsumer.class); private static Logger log = LoggerFactory.getLogger(ReadLayoutConsumer.class);
...@@ -53,26 +55,36 @@ public class ReadLayoutConsumer { ...@@ -53,26 +55,36 @@ public class ReadLayoutConsumer {
String outPutLayout; String outPutLayout;
boolean withQualityInReadName; boolean withQualityInReadName;
String readNameDelimitor = ":"; String readNameDelimitor = ":";
FastqQualityFormat fastqQualityFormat = null;
/** /**
* @param outPutLayout in short format * Creates a simple ReadLayoutConsumer with default read name delimitor (':') and standard
* @param readLayouts all ordered layout (order is as the reads are read from files) * fastq quality format {@link FastqQualityFormat#Standard}.
*
* @param outPutLayout the string representation of the output layout e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
*
*/ */
public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts){ public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts){
this(outPutLayout, readLayouts, false, ":"); this(outPutLayout, readLayouts, false, ":", FastqQualityFormat.Standard);
} }
/** /**
* @param outPutLayout in short format * Creates a ReadLayoutConsumer
* @param readLayouts all ordered layout (order is as the reads are read from files) *
* @param outPutLayout the string representation of the output layout e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/ */
public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts, boolean withQualityInReadName, String readNameDelimitor){ public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts, boolean withQualityInReadName, String readNameDelimitor, final FastqQualityFormat fastqQualityFormat){
this.outPutLayout = outPutLayout; this.outPutLayout = outPutLayout;
this.readLayouts = readLayouts; this.readLayouts = readLayouts;
this.withQualityInReadName = withQualityInReadName; this.withQualityInReadName = withQualityInReadName;
this.readNameDelimitor = readNameDelimitor; this.readNameDelimitor = readNameDelimitor;
this.fastqQualityFormat = fastqQualityFormat;
Pattern sub = Pattern.compile("([BUSR])(\\d+)"); Pattern sub = Pattern.compile("([BUSR])(\\d+)");
Matcher subMatcher = sub.matcher(""); Matcher subMatcher = sub.matcher("");
...@@ -142,19 +154,6 @@ public class ReadLayoutConsumer { ...@@ -142,19 +154,6 @@ public class ReadLayoutConsumer {
/**
* Assemble a read name by concatenating the output layout to the original read name.
* Concatenation is made by inserting a readNameDelimitor between each added slot
* In this method, the read sequence is always used in BARCODE slots
*
* @param reads the reads in order matching that of the {@link ReadLayout} array used at construction
*
* @return
*/
public String assembleNewReadName(FastqRecord [] reads){
return assembleNewReadName(reads, null);
}
/** /**
* Assemble a read name by concatenating the output layout to the original read name. * Assemble a read name by concatenating the output layout to the original read name.
...@@ -183,54 +182,61 @@ public class ReadLayoutConsumer { ...@@ -183,54 +182,61 @@ public class ReadLayoutConsumer {
String subseq = null; String subseq = null;
byte[] qualB = null; byte[] qualB = null;
int bestQual = 0; int bestQual = 0;
if(slotTypeCode == BYTECODE_BARCODE ){
// we init the subseq with the matched barcode directly for(int rlIdx : layoutIndicesToUseForSlots.get(i)){
subseq = m.getBarcodeMatches().get(slotIdx).barcode;
}else{ ReadLayout rl = readLayouts[rlIdx];
for(int rlIdx : layoutIndicesToUseForSlots.get(i)){ FastqRecord readForLayout = reads[rlIdx];
ReadLayout rl = readLayouts[rlIdx]; String _subseq = null;
FastqRecord readForLayout = reads[rlIdx]; String _subqual = null;
switch (slotTypeCode) {
String _subseq = null; case BYTECODE_BARCODE:
String _subqual = null; // we init the subseq with the matched barcode directly
switch (slotTypeCode) { _subseq = m.getBarcodeMatches().get(slotIdx).barcode;
case BYTECODE_READBAR: _subqual = rl.extractBarcode(readForLayout.getBaseQualityString(), slotIdx);
_subseq = rl.extractBarcode(readForLayout.getReadString(), slotIdx); break;
_subqual = rl.extractBarcode(readForLayout.getBaseQualityString(), slotIdx);