Commit 95468b33 authored by Robin Erich Muench's avatar Robin Erich Muench
Browse files

Updated qaCompute to work with latest samtools

parent 86879e42
......@@ -31,7 +31,7 @@ typedef khash_t(rg) *rghash_t;
typedef struct
{
int doMedian,maxCoverage,minQual,maxInsert,windowSize;
bool spanCov,silent,skipZero;
bool spanCov,silent;
uint32_t subsam_seed;
double subsample;
FILE* detailed,*profile,*specific,*regionDef;
......@@ -74,8 +74,8 @@ static bool is_mapped(const bam1_core_t *core)
static int print_usage()
{
fprintf(stderr, "\n");
fprintf(stderr, "Version: 3.0\n");
fprintf(stderr, "Contact: Paul Costea <costea@embl.de>\n\n");
fprintf(stderr, "Version: 1.5\n");
fprintf(stderr, "Contact: Paul Costea <paul.igor.costea@scilifelab.se>\n\n");
fprintf(stderr, "Usage: qaCompute [options] <in.bam/sam> <output.out>\n");
fprintf(stderr, "Options: \n");
fprintf(stderr, " -m Also compute median coverage\n");
......@@ -83,7 +83,6 @@ static int print_usage()
fprintf(stderr, " -d Print per-chromosome histogram [<output.out>.detail]\n");
fprintf(stderr, " -p [INT] Print coverage profile at INT window size to bed file [<output.out>.profile] [50000]\n");
fprintf(stderr, " -x [STR] Print coverage over the features in STR. Should be of format: Chr<TAB>Start<TAB>End<TAB>Alias\n");
fprintf(stderr, " -z Don't print stuff that has zero average coverage\n");
fprintf(stderr, " -a [FLOAT] Subsample reads with probability [FLOAT]. [1.0]\n");
fprintf(stderr, " -i Silent.Don't print too much stuff!\n");
fprintf(stderr, " -s [INT] Compute 'span coverage' rather than base coverage, limiting insert size to INT. -1 -> consider all!\n");
......@@ -222,9 +221,6 @@ static void compute_print_cov(FILE* outputFile, Options userOpt, int* data, char
*/
void printSkipped(FILE* outputFile, Options userOpt, bam_header_t* head, int start, int end)
{
if (userOpt.skipZero) {//Nope, not writing these
return;
}
for (int i = start; i < end; ++i) {
if (!userOpt.silent) {
printf("Computing %s of size %u... \n",head->target_name[i],head->target_len[i]);
......@@ -270,10 +266,10 @@ samfile_t * open_alignment_file(std::string path)
{
samfile_t * fp = NULL;
std::string flag = "r";
if (path.substr(path.size()-3).compare("bam") == 0) {
//BAM file!
flag += "b";
}
// if (path.substr(path.size()-3).compare("bam") == 0) {
//BAM file!
flag += "b";
//}
if ((fp = samopen(path.c_str(), flag.c_str() , 0)) == 0) {
fprintf(stderr, "qaCompute: Failed to open file %s\n", path.c_str());
}
......@@ -304,21 +300,19 @@ int main(int argc, char *argv[])
userOpt.maxInsert = -1;
userOpt.subsample = -1.0;
userOpt.subsam_seed = 0.0;
userOpt.skipZero = false;
bool doDetail = false;
bool doProfile = false;
std::map<std::string,std::list<Interval> > intervalMap;
int arg;
char *q;
//Get args
while ((arg = getopt(argc, argv, "zmdip:s:q:c:h:x:a:")) >= 0) {
while ((arg = getopt(argc, argv, "mdip:s:q:c:h:x:a:")) >= 0) {
switch (arg) {
case 'm': userOpt.doMedian = 1; break;
case 'd': doDetail = true; break;
case 'i': userOpt.silent = true; break;
case 'q': userOpt.minQual = atoi(optarg); break;
case 'c': userOpt.maxCoverage = atoi(optarg); break;
case 'z': userOpt.skipZero = true; break;
case 'a':
if ((userOpt.subsam_seed = strtol(optarg, &q, 10)) != 0) {
srand(userOpt.subsam_seed);
......@@ -603,7 +597,7 @@ int main(int argc, char *argv[])
fprintf(stdout,"\nDuplicates:%u \n", duplicates);
//Print header for next table in output file
//fprintf(outputFile,"\nCov*X\tPercentage\tNr. of bases\n");
fprintf(outputFile,"\nCov*X\tPercentage\tNr. of bases\n");
fprintf(stdout,"Total genome lenght %lu \n", totalGenomeLength);
//Compute procentages of genome cover!.
......@@ -617,27 +611,27 @@ int main(int argc, char *argv[])
//All that has been covered i, had been covered i+1, i+2 and so on times. Thus, do this addition
for (int x = i; x<=userOpt.maxCoverage; ++x) coverage += coverageHist[x];
fprintf(stdout,"%3.2f of genome has been covered at least %dX \n", (double)(coverage)/totalGenomeLength*100, i);
//fprintf(outputFile,"%d\t%3.5f\t%lu\n",i, (double)(coverage)/totalGenomeLength*100, coverage);
fprintf(outputFile,"%d\t%3.5f\t%lu\n",i, (double)(coverage)/totalGenomeLength*100, coverage);
}
}
//fprintf(outputFile,"\nOther\n");
fprintf(outputFile,"\nOther\n");
//Printout procentage of mapped/unmapped reads
double procentageOfUnmapped = 100*((double)unmappedReads/totalNumberOfReads);
double procentageOfZeroQuality = 100*((double)zeroQualityReads/totalNumberOfReads);
//fprintf(outputFile,"Total number of reads: %u\n", totalNumberOfReads);
//fprintf(outputFile,"Total number of duplicates found and ignored: %u\n", duplicates);
//fprintf(outputFile,"Percentage of unmapped reads: %3.5f\n", procentageOfUnmapped);
//fprintf(outputFile,"Percentage of sub-par quality mappings: %3.5f\n", procentageOfZeroQuality);
fprintf(outputFile,"Total number of reads: %u\n", totalNumberOfReads);
fprintf(outputFile,"Total number of duplicates found and ignored: %u\n", duplicates);
fprintf(outputFile,"Percentage of unmapped reads: %3.5f\n", procentageOfUnmapped);
fprintf(outputFile,"Percentage of sub-par quality mappings: %3.5f\n", procentageOfZeroQuality);
int32_t nrOfPaires = totalNumberOfReads/2;
double procOfProperPaires = (double)(100*(double)totalProperPaires/2)/nrOfPaires;
//fprintf(outputFile,"Number of proper paired reads: %u\n", totalProperPaires);
//fprintf(outputFile,"Percentage of proper pairs: %3.5f\n", procOfProperPaires);
//if (userOpt.spanCov) {
// fprintf(outputFile, "Number of interchromosomal pairs: %u\n",interChr);
//}
fprintf(outputFile,"Number of proper paired reads: %u\n", totalProperPaires);
fprintf(outputFile,"Percentage of proper pairs: %3.5f\n", procOfProperPaires);
if (userOpt.spanCov) {
fprintf(outputFile, "Number of interchromosomal pairs: %u\n",interChr);
}
printf("Out of %u reads, you have %3.5f unmapped reads\n and %3.5f sub-par quality mappings\n", totalNumberOfReads ,procentageOfUnmapped, procentageOfZeroQuality);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment