Commit 1425bbbf authored by Paul Igor Costea's avatar Paul Igor Costea
Browse files

Merge branch 'master' into 'master'

Make thresholds command line options instead of hard coded

Also:
- Fix a crash when -i option is missing
- Add Makefile dependency

See merge request !4
parents 7db3e741 bcaeb5da
......@@ -13,8 +13,8 @@ all: snpCall
snpCall: snpCall.o
$(CC) snpCall.o -o snpCall
snpCall.o:
$(CC) -I$(BOOST) -I$(INCLUDE) $(CFLAGS) call_vC.cpp -o snpCall.o
snpCall.o: call_vC.cpp
$(CC) -I$(BOOST) -I$(INCLUDE) $(CFLAGS) $< -o $@
clean:
rm -rf *o snpCall
......@@ -20,9 +20,17 @@
#include <boost/lexical_cast.hpp>
using namespace boost::icl;
#define MIN_COVERAGE_FOR_CALLING 4
#define CALLING_THRESHOLD 4
#define CALLING_MIN_FRACTION 0.01
struct SNPCallOptions {
SNPCallOptions()
:min_coverage(4)
,calling_threshold(4)
,calling_min_fraction(0.01)
{ }
int min_coverage;
int calling_threshold;
double calling_min_fraction;
};
#define READ_BUFFER_SIZE 10000000
......@@ -342,8 +350,9 @@ int main(int argc, char** argv) {
//Define the char map!
std::map<char,long*> map;
SNPCallOptions options;
while ((c = getopt (argc, argv, "hdab:f:g:i:")) != -1)
while ((c = getopt (argc, argv, "hdab:f:g:i:c:p:t:")) != -1)
switch (c)
{
case 'h': // help message
......@@ -380,7 +389,16 @@ int main(int argc, char** argv) {
return -1;
}
// printf("Writing individual SNPs to %s \n", optarg);
break;
break;
case 'c':
options.min_coverage = atol(optarg);
break;
case 'p':
options.calling_min_fraction = atof(optarg);
break;
case 't':
options.calling_threshold = atol(optarg);
break;
case '?':
if (( optopt == 'f') || ( optopt == 'g') || ( optopt == 'i') ){
if ( optopt == 'f'){
......@@ -540,10 +558,10 @@ std::map<std::string, char> CodonMap(codons, codons + sizeof codons / sizeof cod
cov = getSum(map,"actgACTG,.");
if (cov < MIN_COVERAGE_FOR_CALLING) {//We don't have enough coverage
if (cov < options.min_coverage) {//We don't have enough coverage
continue;
}
if (getSum(map,"actgACTG") < CALLING_THRESHOLD) {//We don't have enough SNP calls
if (getSum(map,"actgACTG") < options.calling_threshold) {//We don't have enough SNP calls
continue;
}
......@@ -581,14 +599,14 @@ std::map<std::string, char> CodonMap(codons, codons + sizeof codons / sizeof cod
bool writeThis = false;
std::string* sToWrite = NULL;
if ((snpCount >= CALLING_THRESHOLD) && (snpCount >= cov*CALLING_MIN_FRACTION)) { // This is a "common" variant
if ((snpCount >= options.calling_threshold) && (snpCount >= cov*options.calling_min_fraction)) { // This is a "common" variant
write = true;
writeThis = true;
sToWrite = &s;
} else { //May be a individual variant
for (int i=1; i<=nrSamples; ++i) {
int s = getSum(map,check,i);
if (s >= CALLING_THRESHOLD) {//We observe the variant "CALLING_THRESHOLD" times in one sample.
if (s >= options.calling_threshold) {//We observe the variant "options.calling_threshold" times in one sample.
writeThis = true;
sToWrite = &indiv;
break;
......@@ -638,13 +656,28 @@ std::map<std::string, char> CodonMap(codons, codons + sizeof codons / sizeof cod
s.erase(0,1);//There's an extra comma here
//We now have information about all of the snp's. If multiple, sperate them by ,
fprintf(stdout,"%s\t%s\t%ld\t%c\t%s\t%s\n",name.c_str(),geneName.c_str(),lP+1,//again, this one gets converted to 1 based representation
base,getCoverageString(map,nrSamples,"actgACTG,.").c_str(),s.c_str());
}
fprintf(stdout,"%s\t%s\t%ld\t%c\t%s\t%s\n",
name.c_str(),
geneName.c_str(),
long(lP+1),//again, this one gets converted to 1 based representation
base,
getCoverageString(map,nrSamples,"actgACTG,.").c_str(),
s.c_str());
}
if (indiv.length() != 0) {
indiv.erase(0,1);
fprintf(individualFile,"%s\t%s\t%ld\t%c\t%s\t%s\n",name.c_str(),geneName.c_str(),lP+1,//again, this one gets converted to 1 based representation
base,getCoverageString(map,nrSamples,"actgACTG,.").c_str(),indiv.c_str());
if (individualFile == NULL) {
fprintf(stderr, "Individual SNPs detected, but no individual output file specified (-i option).\n");
individualFile = fopen("/dev/null", "w"); // This way, in the next iteration, warning above will not be printed
} else {
fprintf(individualFile,"%s\t%s\t%ld\t%c\t%s\t%s\n",
name.c_str(),
geneName.c_str(),
long(lP+1),//again, this one gets converted to 1 based representation
base,
getCoverageString(map,nrSamples,"actgACTG,.").c_str(),
indiv.c_str());
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment