Commit d46acaec authored by Charles Girardot's avatar Charles Girardot

merge conflicts resolution

parents fcde931e 62c01919
......@@ -5,3 +5,4 @@ target/
.DS_Store
._*
embl.properties
test.properties
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Je</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5
eclipse.preferences.version=1
org.eclipse.jdt.ui.javadoc=false
org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\n * @return the ${bare_field_name}\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\n * @param ${param} the ${bare_field_name} to set\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/**\n * \n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\n * @author ${user}\n *\n * ${tags}\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\n * \n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\n * ${tags}\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\n * ${see_to_overridden}\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\n * ${tags}\n * ${see_to_target}\n */</template><template autoinsert\="false" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">/*\n * The MIT License\n *\n * Copyright (c) 2009 The Broad Institute\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the "Software"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions\:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n${filecomment}\n${package_declaration}\n\n${typecomment}\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
......@@ -11,24 +11,40 @@ Additional documentation and support can be found at http://gbcs.embl.de/je
## The Je tool suite
Je currently offers 4 tools:
Je currently offers the following tools:
* **je debarcode**
demultiplexes multi-samples fastq files using user-defined input *read-layouts* and write output files following user-defined *output-layouts*.
Replaces both **demultiplex-illu** and **demultiplex** since version 2.0.
* **je dropseq**
to process drop-seq results: clips cell barcode and UMI from read 1 and adds them to header of read 2 (a unique output fastq is created).
* **je retag**
extracts barcode(s) and UMI sequence(s) embedded in read names of a BAM file and migrate them to proper BAM tags.
* **je clip**
to remove UMIs contained in reads of fastq files that do not need sample demultiplexing
* **je markdupes**
filters BAM files for read duplicates taking UMIs into account.
* **je demultiplex**
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not)
to demultiplex multi-samples fastq files which reads contain barcodes and UMIs (or not). Deprecated since version 2.0 (use *je debarcode* instead).
* **je demultiplex-illu**
to demultiplex fastq files according to associated index files (contain the sample encoding barcodes).
Reads can additionally contain UMIs (inline)
Reads can additionally contain UMIs (inline). Deprecated since version 2.0 (use *je debarcode* instead).
* **je markdupes**
to filter BAM files for read duplicates taking UMIs into account
### Distributions
......@@ -45,7 +61,7 @@ Je currently offers 4 tools:
* ``src/shell/je``
is the wrapper script to call ``java -jar je_1.0_bundle.jar``
is the wrapper script to call ``java -jar je_*_bundle.jar``
* ``src/galaxy/``
......
1. Create a dir with :
* the je wrapper
* the log4j.xml
* the Je bundle jar
2. Make sure the je wrapper calls the correct jar
3. tar czf <tarname>.tar.gz <dirname>/*
e.g.
`tar czf je_2.0.RC.tar.gz je_2.0.RC/*`
\ No newline at end of file
#!/bin/sh
# Wrapper around je_1.1_bundle.jar
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_1.0_bundle.jar"
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
......
#!/bin/sh
# Wrapper around je_*_bundle.jar
# where are we stored ?
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# echo $DIR
# path to jar file to execute, this jar is supposed to be in the same dir as this script
JAR_FILE=$DIR"/je_2.0.RC_bundle.jar"
# set default _JAVA_OPTIONS
_JAVA_OPTIONS=${_JAVA_OPTIONS:-'-Xmx4G -Xms256m'}
export _JAVA_OPTIONS
# uncomment to change logging level using your own log4j.xml found in $DIR file
# OPTS="-Dlog4j.configuration=file:$DIR/log4j.xml"
java $OPTS -jar $JAR_FILE "$@"
exit $?
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="CONSOLE" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{dd-MM-yy HH:mm:ss} %-5p [%t] %c{1}.%M(%L) | %m%n" />
</layout>
</appender>
<root>
<level value="INFO" />
<appender-ref ref="CONSOLE" />
</root>
</log4j:configuration>
#jars found in this folder are artifact that are not found in maven central, you can then puch them in your local maven repo with the following commands:
#ADAPT fpath to YOUR Je/lib
LIBPATH="/Users/girardot/Work/eclipse_ws/Je/lib/"
#ADAPT path to YOUR Je/lib
LIBPATH="/Users/girardot/git/Je/lib/"
cd ~/.m2
mvn install:install-file -DgroupId=net.sf -DartifactId=htsjdk -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/htsjdk-1.140.jar -Dpackaging=jar -DgeneratePom=true
mvn install:install-file -DgroupId=net.sf -DartifactId=picard -Dversion=1.140custom -Dfile=$LIBPATH/custom-picard/picard.jar -Dpackaging=jar -DgeneratePom=true
mvn install:install-file -DgroupId=org.broadinstitute -DartifactId=picard -Dversion=2.9.4 -Dfile=$LIBPATH/picard_2.9.4.jar -Dpackaging=jar -DgeneratePom=true
# Uncomment to ADD GBCS artifacts if needed (ie if you don t have access to these repos)
# IF you are at embl, you rather want to checkout the relevant projects and build them locally
......
......@@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>Je</groupId>
<artifactId>Je</artifactId>
<version>1.2</version>
<version>2.0.RC</version>
<name>Je</name>
<description>Je provides command line utilities to deal with barcoded FASTQ files with or without Unique Molecular Index (UMI)</description>
......@@ -233,34 +233,9 @@
<dependency>
<groupId>org.embl.cg.utilitytools</groupId>
<artifactId>ut_utils</artifactId>
<version>1.0</version>
</dependency>
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>picard</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<!-- <dependency> -->
<!-- <groupId>net.sf</groupId> -->
<!-- <artifactId>htsjdk</artifactId> -->
<!-- <version>1.140</version> -->
<!-- </dependency> -->
<dependency>
<groupId>net.sf</groupId>
<artifactId>picard</artifactId>
<version>1.140custom</version>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>net.sf</groupId>
<artifactId>htsjdk</artifactId>
<version>1.140custom</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
......@@ -291,6 +266,11 @@
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.broadinstitute</groupId>
<artifactId>picard</artifactId>
<version>2.9.4</version>
</dependency>
</dependencies>
......
......@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je.jemultiplexer;
package org.embl.gbcs.je;
import java.io.IOException;
import java.io.InputStream;
......
......@@ -21,13 +21,15 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je.jemultiplexer;
package org.embl.gbcs.je;
/** Utility class to hang onto data about the best match for a given barcode */
public class BarcodeMatch {
/**
* indicates if a barcode match has been found, in which case 'barcode' is not null
* indicates if this barcode match fullfils the thresholds for barcode matching
*/
public boolean matched;
......@@ -35,6 +37,12 @@ public class BarcodeMatch {
* sequence of the matched barcode
*/
public String barcode;
/**
* sequence extracted from read
*/
public String readSequence;
/**
* number of mismatches with 'barcode'
......@@ -48,8 +56,9 @@ public class BarcodeMatch {
public String toString(){
if(matched)
return "matched :"+ barcode+" [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]";
return "no match";
return "Match for "+readSequence+ " read sequence : barcode "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"]";
else
return "No Match for "+readSequence+ " read sequence (best barcode is "+ barcode+" identified with [MM="+mismatches+", MMD="+mismatchesToSecondBest+"])";
}
}
This diff is collapsed.
......@@ -26,12 +26,15 @@ package org.embl.gbcs.je;
import java.util.Set;
import java.util.TreeSet;
import org.embl.cg.utilitytools.utils.ExceptionUtil;
import org.embl.cg.utilitytools.utils.StringUtil;
import org.embl.gbcs.je.demultiplexer.DemultiplexCLI;
import org.embl.gbcs.je.jeclipper.Jeclipper;
import org.embl.gbcs.je.jedropseq.Jedropseq;
import org.embl.gbcs.je.jeduplicates.MarkDuplicatesWithMolecularCode;
import org.embl.gbcs.je.jemultiplexer.Jemultiplexer;
import org.embl.gbcs.je.jemultiplexer.JemultiplexerIllumina;
import org.embl.gbcs.je.retag.TagFromReadName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -48,6 +51,8 @@ public class Je {
private static Logger log = LoggerFactory.getLogger(Je.class);
public static final String COMMAND_RETAG = "retag";
public static final String COMMAND_DEMULTIPLEX = "debarcode";
public static final String COMMAND_DROPSEQ = "dropseq";
public static final String COMMAND_CLIP = "clip";
public static final String COMMAND_DUPES = "markdupes";
......@@ -61,7 +66,9 @@ public class Je {
ALLOWED_COMMANDS.add(COMMAND_DUPES);
ALLOWED_COMMANDS.add(COMMAND_MULTIPLEX);
ALLOWED_COMMANDS.add(COMMAND_MULTIPLEX_ILLUMINA);
//ALLOWED_COMMANDS.add(COMMAND_DROPSEQ);
ALLOWED_COMMANDS.add(COMMAND_DROPSEQ);
ALLOWED_COMMANDS.add(COMMAND_DEMULTIPLEX);
ALLOWED_COMMANDS.add(COMMAND_RETAG);
}
......@@ -90,51 +97,63 @@ public class Je {
System.exit(0);
}
else if(!ALLOWED_COMMANDS.contains(option.toLowerCase())){
System.err.println("Unknown command name : "+option);
System.err.println(getUsage());
System.err.println("Unknown command name : "+option+" ; please check help with -h");
//System.err.println(getUsage());
System.exit(1); //error
}
/*
* looks good , we delegate to proper implementation
*/
String [] argv = {"-h"}; // init to get help
if(args.length > 1){
argv = StringUtil.subArray(args, 1, args.length-1);
}
if(option.equalsIgnoreCase(COMMAND_CLIP)){
new Jeclipper().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_MULTIPLEX)){
new Jemultiplexer().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_MULTIPLEX_ILLUMINA)){
new JemultiplexerIllumina().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_DUPES)){
new MarkDuplicatesWithMolecularCode().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_DROPSEQ)){
new Jedropseq().instanceMainWithExit(argv);
}
else{
System.err.println(
"FATAL : We just reached a supposedly unreachable part of the code. Please report this bug to Je developpers indicating the options you used i.e. : \n "+
StringUtil.mergeArray(args, " ")
);
try{
String [] argv = {"-h"}; // init to get help
if(args.length > 1){
argv = StringUtil.subArray(args, 1, args.length-1);
}
if(option.equalsIgnoreCase(COMMAND_CLIP)){
new Jeclipper().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_DEMULTIPLEX)){
new DemultiplexCLI().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_MULTIPLEX)){
new Jemultiplexer().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_MULTIPLEX_ILLUMINA)){
new JemultiplexerIllumina().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_DUPES)){
new MarkDuplicatesWithMolecularCode().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_DROPSEQ)){
new Jedropseq().instanceMainWithExit(argv);
}
else if(option.equalsIgnoreCase(COMMAND_RETAG)){
new TagFromReadName().instanceMainWithExit(argv);
}
else{
System.err.println(
"FATAL : We just reached a supposedly unreachable part of the code. Please report this bug to Je developpers indicating the options you used i.e. : \n "+
StringUtil.mergeArray(args, " ")
);
System.exit(1); //error
}
} catch(Exception e){
log.error(ExceptionUtil.getStackTrace(e));
System.exit(1); //error
}
}
protected static String getUsage(){
return "Usage: je <command> [options] \n\n"+
"with command in : \n"
+"\t "+COMMAND_CLIP+" \t\t clips molecular barcodes from fastq sequence and places them in read name headers for further use in 'dupes' module\n"
+"\t "+COMMAND_MULTIPLEX+" \t\t demultiplex fastq file(s), with optional handling of molecular barcodes for further use in 'dupes' module\n"
+"\t "+COMMAND_MULTIPLEX_ILLUMINA+" \t demultiplex fastq file(s) using Illumina Index files, with optional handling of molecular barcodes for further use in 'dupes' module\n"
+"\t "+COMMAND_CLIP+" \t\t clips barcodes/UMIs from fastq sequence and places them in read name headers \n"
+"\t "+COMMAND_DEMULTIPLEX+" \t\t demultiplexes fastq file(s) into user-defined output files, with optional handling of molecular barcodes\n"
+"\t "+COMMAND_MULTIPLEX+" \t\t demultiplexes fastq file(s) with Je 1.x implementation, with optional handling of molecular barcodes for further use in 'dupes' module\n"
+"\t "+COMMAND_MULTIPLEX_ILLUMINA+" \t demultiplexes fastq file(s) using Illumina Index files with Je 1.x implementation, with optional handling of molecular barcodes for further use in 'dupes' module\n"
+"\t "+COMMAND_DUPES+" \t\t removes read duplicates based on molecular barcodes found in read name headers (as produced by clip or plex)\n"
//+"\t "+COMMAND_DROPSEQ+" \t\t clips cell barcode and UMI from read 1 and adds them to header of read 2. This command is for processing drop-seq results.\n"
+"\t "+COMMAND_DROPSEQ+" \t\t clips cell barcode and UMI from read 1 and adds them to header of read 2. This command is for processing drop-seq results.\n"
+"\t "+COMMAND_RETAG+" \t\t extracts barcode and UMI sequence(s) embedded in read names and tag reads with proper BAM tag.\n"
+"\n"
+"Version : "+getVersion()
;
......
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import htsjdk.samtools.SAMUtils;
public class JeTry {
private static Logger log = LoggerFactory.getLogger(JeTry.class);
public JeTry() {
// TODO Auto-generated constructor stub
}
public static void main(String[] args) {
}
}
......@@ -23,11 +23,127 @@
*/
package org.embl.gbcs.je;
import java.util.Set;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.developpez.adiguba.shell.ProcessConsumer;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.fastq.FastqReader;
import htsjdk.samtools.util.FastqQualityFormat;
import htsjdk.samtools.util.QualityEncodingDetector;
import htsjdk.samtools.util.SolexaQualityConverter;
public class JeUtils {
private static Logger log = LoggerFactory.getLogger(JeUtils.class);
/*
* BC : for sample barcode, raw or corrected, with QT to store its quality string
*/
public static final String SAMTAG_BC = "BC";
/*
* QT : Phred quality of the sample-barcode sequence in the BC (or RT) tag
*/
public static final String SAMTAG_QT = "QT";
/*
* RX : Sequence bases of the (possibly corrected) unique molecular identifier
*/
public static final String SAMTAG_RX = "RX";
/*
* QX : Quality score of the unique molecular identifier in the RX tag
*/
public static final String SAMTAG_QX = "QX";
/*
* OX : Original unique molecular barcode bases
*/
public static final String SAMTAG_OX = "OX";
/*
* BZ : Phred quality of the unique molecular barcode bases in the OX tag
*/
public static final String SAMTAG_BZ = "BZ";
/*
* MI : Molecular identifier; a string that uniquely identifies the molecule from which the record was derived
*/
public static final String SAMTAG_MI = "MI";
/**convert a string of quality numbers in Phred Scale (each quality has 2 char) to the Standard Phred + 33 encoding
* ie
* @param s
* @return
*/
public static String toBytesThenPhred(String s) {
byte [] arr = new byte [s.length()/2];
int i =0;
for(String t : s.split("(?<=\\G.{2})")) {
arr[i] = Byte.parseByte(t);
i++;
}
return SAMUtils.phredToFastq(arr);
}
/**
* Based on the type of quality scores coming in, converts them to a numeric byte[] in phred scale.
*/
public static void convertQualityToPhred(byte[] quals, final FastqQualityFormat version) {
switch (version) {
case Standard:
SAMUtils.fastqToPhred(quals);
break ;
case Solexa:
SolexaQualityConverter.getSingleton().convertSolexaQualityCharsToPhredBinary(quals);
break ;
case Illumina:
SolexaQualityConverter.getSingleton().convertSolexa_1_3_QualityCharsToPhredBinary(quals);
break ;
}
}
/**
* Looks at fastq input(s) and attempts to determine the proper quality format
*
* Closes the reader(s) by side effect
*
* @param readers readers on the input fastq files
* @param expectedQuality If provided, will be used for sanity checking. If left null, autodetection will occur
*/
public static FastqQualityFormat determineQualityFormat(final FastqReader [] readers, final FastqQualityFormat expectedQuality) {
final QualityEncodingDetector detector = new QualityEncodingDetector();
//add all fastq readers
detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, readers);
//close all readers
for (FastqReader reader : readers) {
reader.close();
}
//
final FastqQualityFormat qualityFormat = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ, expectedQuality);
//in case there is no expected quality and different options were possible, warn user
if (detector.isDeterminationAmbiguous()) {
log.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
}
log.info(String.format("Auto-detected quality format as: %s.", qualityFormat));
return qualityFormat;
}
/**
* @return the result of executing whoami on the underlying OS
*/
......@@ -61,6 +177,19 @@ public class JeUtils {
}
public static int barcodeSlotCount(ReadLayout[] readLayouts) {
return barcodeBlockUniqueIdSet(readLayouts).size();
}
public static Set<Integer> barcodeBlockUniqueIdSet(ReadLayout[] readLayouts) {
Set<Integer> allIds = new TreeSet<Integer>();
for (ReadLayout rl : readLayouts) {
allIds.addAll(rl.getBarcodeBlockUniqueIds());
}
return allIds;
}
......
......@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je.jemultiplexer;
package org.embl.gbcs.je;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.fastq.AsyncFastqWriter;
......@@ -37,7 +37,7 @@ import java.util.zip.GZIPOutputStream;
public class JemultiplexerFastqWriterFactory {
boolean useAsyncIo = Defaults.USE_ASYNC_IO;
boolean useAsyncIo = Defaults.USE_ASYNC_IO_WRITE_FOR_SAMTOOLS;
/** Sets whether or not to use async io (i.e. a dedicated thread per writer. */
......
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je;
public class LayoutMalformedException extends Jexception {
/**
*
*/
private static final long serialVersionUID = -4024288961353288534L;
public LayoutMalformedException(String message, String layout) {
super(message+"\n Layout was : "+layout);
}
}
This diff is collapsed.
This diff is collapsed.
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.embl.gbcs.je;