Commit 86879e42 by Robin Erich Muench

### update createOptSplit.py

parent 444b593a
 ... ... @@ -2,8 +2,10 @@ import sys import operator cov = open(sys.argv[1],'r') genomes = open(sys.argv[2],'r') nrToSplit = int(sys.argv[3]) perc = open(sys.argv[2],'r') genomes = open(sys.argv[3],'r') nrToSplit = int(sys.argv[4]) outf = sys.argv[5] genomeDict = dict() contigDict = dict() ... ... @@ -35,15 +37,33 @@ for line in cov: print 'Found %d genomes again. Hope they match'%(len(covDict)) perc.readline() perc.readline() percDict = dict() for line in perc: #Get the sum coverage s = 0.0 l = line.rstrip().split('\t') for i in range(1,len(l)): s += float(l[i]) percDict[l[0]] = s print 'Found %d genomes again. Hope they match'%(len(percDict)) table = [] #Get an approximation of how many reads hit each genome. This is as close as you will get to figuring out how long running it is going to take for k in genomeDict.keys(): read = genomeDict[k]*covDict[k]/100 read = genomeDict[k]*covDict[k]/100 #*percDict[k]/100 table.append((k,read)) #Now, sort the table t = sorted(table,key=operator.itemgetter(1),reverse=True) #Now, sort the table t = sorted(table,key=operator.itemgetter(1),reverse=True)#must be True #print(head(t)) #Great, now get these into X bins res = [0]*nrToSplit ... ... @@ -55,7 +75,7 @@ for k in range(len(t)): res[pos] += table[k][1] names[t[k][0]] = pos outf = sys.argv[4] #outf = sys.argv[5] #Open as many files as bins fileDict = dict() for i in xrange(nrToSplit): ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment