Commit 1794b1b2 authored by Robin Erich Muench's avatar Robin Erich Muench
Browse files

Merge branch 'coelho/metaSNP-refactor'

Conflicts:
	src/computeGenomeCoverage.py
parents cf423e71 53beda6e
import sys
# We assume that contigs are sorted within the genome
# We assume that contings are sorted within the genome, so that a contig won't
# show up randomly in the file, but together with its friends.
cov = open(sys.argv[1],'r')
xcov = open(sys.argv[2],'r')
......@@ -14,42 +15,39 @@ while True:
xcovL = xcov.readline()
if not xcovL:
break
name = covL.split('\t')[0]
namex = xcovL.split('\t')[0]
covL = covL.split('\t')
xcovL = xcovL.split('\t')
name = covL[0]
namex = xcovL[0]
if name != namex:
print name
print namex
print 'oups'
print("Mismatch in names {} != {}".format(name, namex))
taxId = name.split('.')[0]
if taxId not in genomeMap:
genomeMap[taxId] = [0.0,0.0,0.0,0.0] #why not [0,0.0,0.0,0.0] since 1st is int?
genomeMap[taxId] = [0.0, 0.0, 0.0, 0.0]
#Add the length
genomeMap[taxId][0] += int(covL.split('\t')[1])
#genomeMap[taxId][0] += 1
#Add the average coverage weighted with the length
genomeMap[taxId][1] += float(covL.split('\t')[2]) * int(covL.split('\t')[1])
#genomeMap[taxId][1] += int(float(covL.split('\t')[2]))
#Add the number of bases covered at 1x
genomeMap[taxId][2] += int(xcovL.split('\t')[2])
#Add the number of bases covered at 2x
genomeMap[taxId][3] += int(xcovL.split('\t')[3])
genomeMap[taxId][0] += int(covL[1])
outf = open(sys.argv[3],'w')
#Write header
outf.write('TaxId\tAverage_cov\tPercentage_1x\tPercentage_2x\n')
#Add the average coverage weighted with the length
genomeMap[taxId][1] += float(covL[2]) * int(covL[1])
moreThan10x = 0
#Print the average coverage over the taxId's
for k in genomeMap.keys():
if genomeMap[k][1]/genomeMap[k][0] > 10:
moreThan10x += 1
outf.write('%s\t%f\t%f\t%f\n'%(k,genomeMap[k][1]/genomeMap[k][0],genomeMap[k][2]/genomeMap[k][0]*100,genomeMap[k][3]/genomeMap[k][0]*100))
#Add the number of bases covered at 1x
genomeMap[taxId][2] += int(xcovL[2])
outf.close()
#Add the number of bases covered at 2x
genomeMap[taxId][3] += int(xcovL[3])
cov.close()
xcov.close()
#print moreThan10x
print 'Done'
with open(sys.argv[3],'w') as outf:
#Write header
outf.write('TaxId\tAverage_cov\tPercentage_1x\tPercentage_2x\n')
#Print the average coverage over the taxId's
for k in genomeMap:
outf.write('%s\t%f\t%f\t%f\n'%(k,
genomeMap[k][1]/genomeMap[k][0],
genomeMap[k][2]/genomeMap[k][0]*100,
genomeMap[k][3]/genomeMap[k][0]*100))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment