Commit e6dcce0a authored by Charles Girardot's avatar Charles Girardot
Browse files

2.0 RC version. je markdupes does not deal with UMI from BAM TAG (still

expected in read names)
parent 542b8ac9
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Je</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
eclipse.preferences.version=1
org.eclipse.jdt.ui.javadoc=false
org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\n * @return the ${bare_field_name}\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\n * @param ${param} the ${bare_field_name} to set\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\n * \n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\n * @author ${user}\n *\n * ${tags}\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\n * \n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\n * ${see_to_overridden}\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\n * ${tags}\n * ${see_to_target}\n */</template><template autoinsert\="false" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">/*\n * The MIT License\n *\n * Copyright (c) 2009 The Broad Institute\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the "Software"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions\:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n${filecomment}\n${package_declaration}\n\n${typecomment}\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
1. Create a dir with :
* the je wrapper
* the log4j.xml
* the Je bundle jar
2. Make sure the je wrapper calls the correct jar
3. tar czf <tarname>.tar.gz <dirname>/*
e.g.
`tar czf je_2.0.RC.tar.gz je_2.0.RC/*`
\ No newline at end of file
#!/bin/sh
# Wrapper around je_1.1_bundle.jar
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.1_bundle.jar"
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
......
#!/bin/sh
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd-MM-yy HH:mm:ss} %-5p [%t] %c{1}.%M(%L) | %m%n" />
</layout>
</appender>
<root>
<level value="INFO" />
<appender-ref ref="CONSOLE" />
</root>
</log4j:configuration>
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>Je</groupId>
<artifactId>Je</artifactId>
<version>2.0.beta</version>
<version>2.0.RC</version>
<name>Je</name>
<description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description>
......
......@@ -34,6 +34,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
/**
*
......@@ -112,24 +113,32 @@ public class FastqWriterLayout {
*/
protected ReadLayout [] readLayouts;
/**
* @param readSequenceLayout
* @param readNameLayout can be null when the read name should be reused unmodified
* @param readLayouts
* the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor) {
this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false);
}
protected FastqQualityFormat fastqQualityFormat = null;
/**
* @param readSequenceLayout
* @param readNameLayout
* @param readLayouts
* @param withQualityInReadName
* @param readNameDelimitor
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param convertBarcodeToReadbar if true all BARCODE slots are converted to READBAR in the readNameLayout (BARCODE == READBAR in readSequenceLayout)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final boolean convertBarcodeToReadbar) {
public FastqWriterLayout(
final String readSequenceLayout,
final String readNameLayout,
final ReadLayout [] readLayouts,
final boolean withQualityInReadName,
final String readNameDelimitor,
final boolean convertBarcodeToReadbar,
final FastqQualityFormat fastqQualityFormat) {
this.readNameLayout = (StringUtils.isBlank(readNameLayout) ? null : convertToShortLayout(readNameLayout));
this.readSequenceLayout = convertToShortLayout(readSequenceLayout);
......@@ -139,24 +148,23 @@ public class FastqWriterLayout {
if(convertBarcodeToReadbar && readNameLayout!=null) {
this.readNameLayout = this.readNameLayout.replaceAll("B", "R");
}
this.fastqQualityFormat = fastqQualityFormat;
init(); //build all maps for easy lookup
}
/**
* @param readSequenceLayout
* @param readNameLayout can be null when the read name should be reused unmodified
* @param readLayout
* @param readSequenceLayout the string representation of the output layout to use for the read sequence e.g. "S1"
* @param readNameLayout the string representation of the output layout to use for the read name e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout readLayout, final boolean withQualityInReadName, final String readNameDelimitor) {
this(readSequenceLayout, readNameLayout, new ReadLayout[]{ readLayout }, withQualityInReadName, readNameDelimitor, false);
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout [] readLayouts, final boolean withQualityInReadName, final String readNameDelimitor, final FastqQualityFormat fastqQualityFormat) {
this(readSequenceLayout, readNameLayout, readLayouts, withQualityInReadName, readNameDelimitor, false, fastqQualityFormat);
}
public FastqWriterLayout(final String readSequenceLayout, final String readNameLayout, final ReadLayout readLayout, final boolean withQualityInReadName, final String readNameDelimitor, final boolean convertBarcodeToReadbar) {
this(readSequenceLayout, readNameLayout, new ReadLayout[]{ readLayout }, withQualityInReadName, readNameDelimitor, convertBarcodeToReadbar);
}
/**
* @param layout
......@@ -186,36 +194,18 @@ public class FastqWriterLayout {
/**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s).
* This method also use the read sequence to write BARCODE in read name
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s) ; this method should be used when a barcode has been matched
*
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param sampleMatch a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord[] reads ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName();
if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
return ass;
}
/**
* Assemble the {@link FastqRecord} that should be written in the output file according to the layout(s)
* @param reads the {@link FastqRecord} from the input fastq files in the order matching the {@link ReadLayout} given at construction
* @param m a {@link SampleMatch} holding all the barcode matches
* @return
*/
public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch m ){
public FastqRecord assembleRecord( FastqRecord[] reads, SampleMatch sampleMatch ){
FastqRecord rec = sequenceConsumer.assembleNewRead(reads);
String name = rec.getReadName();
if(readNameConsumer != null)
name = readNameConsumer.assembleNewReadName(reads, m);
name = readNameConsumer.assembleNewReadName(reads, sampleMatch);
FastqRecord ass = new FastqRecord(name, rec.getReadString(), rec.getBaseQualityHeader(), rec.getBaseQualityString());
log.debug("Assembled read for output using layout [NameLayout="+this.readNameLayout+" ; SequenceLayout="+this.readSequenceLayout+"] => \n"+ass.toFastQString());
......@@ -224,7 +214,7 @@ public class FastqWriterLayout {
/**
* Convenient wrapper for single end configuration
* @param read the {@link FastqRecord} from the input fastq file
* @param read the {@link FastqRecord} from the input fastq file
* @param m a {@link SampleMatch} holding all the barcode matches
* @return
*/
......@@ -255,7 +245,7 @@ public class FastqWriterLayout {
if(!Pattern.matches(SHORT_LAYOUT_REGEX, this.readNameLayout)){
throw new LayoutMalformedException("FASTQ Output Layout for read name does not match expected short format (regex is :"+SHORT_LAYOUT_REGEX+")", this.readNameLayout);
}
readNameConsumer = new ReadLayoutConsumer(this.readNameLayout, this.readLayouts, this.withQualityInReadName, this.readNameDelimitor);
readNameConsumer = new ReadLayoutConsumer(this.readNameLayout, this.readLayouts, this.withQualityInReadName , this.readNameDelimitor, this.fastqQualityFormat);
}
}
......
......@@ -28,6 +28,8 @@ import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.SAMUtils;
public class JeTry {
private static Logger log = LoggerFactory.getLogger(JeTry.class);
......@@ -36,9 +38,7 @@ public class JeTry {
}
public static void main(String[] args) {
System.out.println(JeUtils.toBytesThenPhred(
"26242516303031"
));
}
......
......@@ -26,12 +26,23 @@ package org.embl.gbcs.je;
import java.util.Set;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.developpez.adiguba.shell.ProcessConsumer;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.FastqReader;
import htsjdk.samtools.util.FastqQualityFormat;
import htsjdk.samtools.util.QualityEncodingDetector;
import htsjdk.samtools.util.SolexaQualityConverter;
public class JeUtils {
private static Logger log = LoggerFactory.getLogger(JeUtils.class);
/*
* BC : for sample barcode, raw or corrected, with QT to store its quality string
*/
......@@ -69,7 +80,7 @@ public class JeUtils {
public static final String SAMTAG_MI = "MI";
/**convert a string of quality numbers (each quality has 2 char) to the Phred String
/**convert a string of quality numbers in Phred Scale (each quality has 2 char) to the Standard Phred + 33 encoding
* ie
* @param s
* @return
......@@ -85,6 +96,54 @@ public class JeUtils {
}
/**
* Based on the type of quality scores coming in, converts them to a numeric byte[] in phred scale.
*/
public static void convertQualityToPhred(byte[] quals, final FastqQualityFormat version) {
switch (version) {
case Standard:
SAMUtils.fastqToPhred(quals);
break ;
case Solexa:
SolexaQualityConverter.getSingleton().convertSolexaQualityCharsToPhredBinary(quals);
break ;
case Illumina:
SolexaQualityConverter.getSingleton().convertSolexa_1_3_QualityCharsToPhredBinary(quals);
break ;
}
}
/**
* Looks at fastq input(s) and attempts to determine the proper quality format
*
* Closes the reader(s) by side effect
*
* @param readers readers on the input fastq files
* @param expectedQuality If provided, will be used for sanity checking. If left null, autodetection will occur
*/
public static FastqQualityFormat determineQualityFormat(final FastqReader [] readers, final FastqQualityFormat expectedQuality) {
final QualityEncodingDetector detector = new QualityEncodingDetector();
//add all fastq readers
detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, readers);
//close all readers
for (FastqReader reader : readers) {
reader.close();
}
//
final FastqQualityFormat qualityFormat = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ, expectedQuality);
//in case there is no expected quality and different options were possible, warn user
if (detector.isDeterminationAmbiguous()) {
log.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
}
log.info(String.format("Auto-detected quality format as: %s.", qualityFormat));
return qualityFormat;
}
/**
* @return the result of executing whoami on the underlying OS
*/
......
......@@ -30,11 +30,13 @@ import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.embl.gbcs.je.demultiplexer.Demultiplexer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.FastqQualityFormat;
public class ReadLayoutConsumer {
private static Logger log = LoggerFactory.getLogger(ReadLayoutConsumer.class);
......@@ -53,26 +55,36 @@ public class ReadLayoutConsumer {
String outPutLayout;
boolean withQualityInReadName;
String readNameDelimitor = ":";
FastqQualityFormat fastqQualityFormat = null;
/**
* @param outPutLayout in short format
* @param readLayouts all ordered layout (order is as the reads are read from files)
* Creates a simple ReadLayoutConsumer with default read name delimitor (':') and standard
* fastq quality format {@link FastqQualityFormat#Standard}.
*
* @param outPutLayout the string representation of the output layout e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
*
*/
public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts){
this(outPutLayout, readLayouts, false, ":");
this(outPutLayout, readLayouts, false, ":", FastqQualityFormat.Standard);
}
/**
* @param outPutLayout in short format
* @param readLayouts all ordered layout (order is as the reads are read from files)
* Creates a ReadLayoutConsumer
*
* @param outPutLayout the string representation of the output layout e.g. "B1U1S1"
* @param readLayouts the ordered {@link ReadLayout} objects defining how input fastq files are formatted
* @param withQualityInReadName indicates if the Barcode/UMI quality should be injected in the read name together with their sequence
* @param readNameDelimitor the character to use to split up the read name (':' is the default)
* @param fastqQualityFormat the {@link FastqQualityFormat} of the input fastq files
*/
public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts, boolean withQualityInReadName, String readNameDelimitor){
public ReadLayoutConsumer(String outPutLayout, ReadLayout [] readLayouts, boolean withQualityInReadName, String readNameDelimitor, final FastqQualityFormat fastqQualityFormat){
this.outPutLayout = outPutLayout;
this.readLayouts = readLayouts;
this.withQualityInReadName = withQualityInReadName;
this.readNameDelimitor = readNameDelimitor;
this.fastqQualityFormat = fastqQualityFormat;
Pattern sub = Pattern.compile("([BUSR])(\\d+)");
Matcher subMatcher = sub.matcher("");
......@@ -142,19 +154,6 @@ public class ReadLayoutConsumer {
/**
* Assemble a read name by concatenating the output layout to the original read name.
* Concatenation is made by inserting a readNameDelimitor between each added slot
* In this method, the read sequence is always used in BARCODE slots
*
* @param reads the reads in order matching that of the {@link ReadLayout} array used at construction
*
* @return
*/
public String assembleNewReadName(FastqRecord [] reads){
return assembleNewReadName(reads, null);
}
/**
* Assemble a read name by concatenating the output layout to the original read name.
......@@ -183,54 +182,61 @@ public class ReadLayoutConsumer {
String subseq = null;
byte[] qualB = null;
int bestQual = 0;
if(slotTypeCode == BYTECODE_BARCODE ){
// we init the subseq with the matched barcode directly
subseq = m.getBarcodeMatches().get(slotIdx).barcode;
}else{
for(int rlIdx : layoutIndicesToUseForSlots.get(i)){
ReadLayout rl = readLayouts[rlIdx];
FastqRecord readForLayout = reads[rlIdx];
String _subseq = null;
String _subqual = null;
switch (slotTypeCode) {
case BYTECODE_READBAR:
_subseq = rl.extractBarcode(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractBarcode(readForLayout.getBaseQualityString(), slotIdx);
break;
case BYTECODE_UMI:
_subseq = rl.extractUMI(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractUMI(readForLayout.getBaseQualityString(), slotIdx);
break;
default:
_subseq = rl.extractSample(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractSample(readForLayout.getBaseQualityString(), slotIdx);
break;
}
byte[] _qualB = SAMUtils.fastqToPhred(_subqual);
int _qualsum = overallQuality( _qualB );
if(subseq == null || _qualsum > bestQual){
subseq = _subseq;
qualB = _qualB;
bestQual = _qualsum;
}
for(int rlIdx : layoutIndicesToUseForSlots.get(i)){
ReadLayout rl = readLayouts[rlIdx];
FastqRecord readForLayout = reads[rlIdx];
String _subseq = null;
String _subqual = null;
switch (slotTypeCode) {
case BYTECODE_BARCODE:
// we init the subseq with the matched barcode directly
_subseq = m.getBarcodeMatches().get(slotIdx).barcode;
_subqual = rl.extractBarcode(readForLayout.getBaseQualityString(), slotIdx);
break;
case BYTECODE_READBAR:
_subseq = rl.extractBarcode(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractBarcode(readForLayout.getBaseQualityString(), slotIdx);
break;
case BYTECODE_UMI:
_subseq = rl.extractUMI(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractUMI(readForLayout.getBaseQualityString(), slotIdx);
break;
default:
_subseq = rl.extractSample(readForLayout.getReadString(), slotIdx);
_subqual = rl.extractSample(readForLayout.getBaseQualityString(), slotIdx);
break;
}
byte[] _qualB = _subqual.getBytes();
int _qualsum = overallQuality( _qualB );
if(subseq == null || _qualsum > bestQual){
subseq = _subseq;
qualB = _qualB;
bestQual = _qualsum;
}
}
//concatenate to the growing name
newname += this.readNameDelimitor + subseq;
if(withQualityInReadName)
newname += qualityToNumberString(qualB);
if(withQualityInReadName) {
newname += qualityToNumberString(qualB, this.fastqQualityFormat);
}
log.debug("header is now : "+newname);
}
return newname;
}
/*
* returns a string made of 2-digits quality scores for injection in the read name
/**
* @param qualbytes byte representation of initial quality string
* @param fastqQualityFormat the encoding of these bytes
* @return
*/
public synchronized static String qualityToNumberString(byte[] qualbytes) {
public synchronized static String qualityToNumberString(byte[] qualbytes, FastqQualityFormat fastqQualityFormat) {
JeUtils.convertQualityToPhred(qualbytes, fastqQualityFormat);
NumberFormat nf = NumberFormat.getIntegerInstance();
nf.setMinimumIntegerDigits(2);
StringBuffer sb = new StringBuffer(qualbytes.length*2);
......
......@@ -41,6 +41,9 @@ public class SampleMatch {
*/
protected Map<Integer, BarcodeMatch> barcodeMatches;
/**
* A note to propagate for diagnostic file
*/
protected String diagnosticNote = "";
public SampleMatch(String sample, Map<Integer, BarcodeMatch> barcodeMatches){
......
......@@ -255,9 +255,11 @@ public class DemultiplexCLI extends CommandLineProgram {
@Option(shortName="WQ", optional = true,
printOrder=35,
doc="Should quality string also be injected in read names. Only applies to READBAR and UMI described in the read name slot of output layout \n"+
"If turned on, the quality string is translated into 2 digits number and a e.g. UMI will look like\n"+
"\t"+" '...:ATGCAT333423212322:...' instead of '...:ATGCAT:...'\n"+
doc="Set to True to keep Phred sequence qualities in output read names. \n"+
"This option only applies to BARCODE, READBAR and UMI described in the read name slot of output layout. "+
"For BARCODE, the equivalent READBAR quality is used. In case of redundant slots, the best found quality is used.\n"+
"The quality string is translated into 2 digits number representing the quality scores on the Phred scale and a e.g. UMI will look like\n"+
"\t"+" '...:ATGCAT333023212322:...' instead of '...:ATGCAT:...'\n"+
"This option is particularly useful with the retag module that knows how to extract quality numbers into BAM tags."
)
public boolean WITH_QUALITY_IN_READNAME = false;
......@@ -424,7 +426,7 @@ public class DemultiplexCLI extends CommandLineProgram {
printOrder=190,
doc="A value describing how the quality values are encoded in the fastq files. Either 'Solexa' for pre-pipeline 1.3 " +
"style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and above (phred scaling + 64) or 'Standard' for phred scaled " +
"scores with a character shift of 33. If this value is not specified (or 'null' is given), the quality format will be detected.\n"
"scores with a character shift of 33. If this value is not specified (or 'null' is given), the quality format is assumed to be will the 'Standard' for phred scale.\n"
)
public FastqQualityFormat QUALITY_FORMAT = DEFAULT_QUALITY_FORMAT;
......@@ -501,7 +503,23 @@ public class DemultiplexCLI extends CommandLineProgram {
if(_names.contains(f.getAbsolutePath())){
return new String[]{"Found twice the same file in FASTQ options: "+f.getAbsolutePath()};
}
_names.add(f.getAbsolutePath());