Commit cb901241 authored by Thomas Schwarzl's avatar Thomas Schwarzl

added command line usage

parent 2f25b844
......@@ -30,102 +30,253 @@
# -> total number of gaps can be controlled
# -> Stem length and loop length can be controlled
# ---------------------------------------------------------------------------------
__author__ = 'Tom'
# Default variables
# program version
__VERSION__ = 0.1
# verbose for debug purposes
__VERBOSE__ = False
import lib
from lib.StemLoopFactory import StemLoopFactory
# out file name (empty for writing to stdout)
OUTFILE = ""
# config output (empty for writing to stderr)
LOGOUT = ""
# stem size
STEM_SIZE = 3#5
# loop size
MIN_LOOP_SIZE = 2#4
# loop size
MAX_LOOP_SIZE = 2#6
# maximum gap size
MAX_GAP_SIZE = 2
MAX_GAP_SIZE = 0 #2
# maximum total gap count
MAX_TOTAL_GAP_COUNT = 0#3
MAX_TOTAL_GAP_COUNT = 2
# how many matches must be after the loops without gaps
MIN_MATCHES_BEFORE_FIRST_GAP = 2
# loop size
MIN_LOOP_SIZE = 4#4
# minimum GC content
MIN_GC_CONTENT = 0.9
# loop size
MAX_LOOP_SIZE = 4#8
# stem size
STEM_SIZE = 4#7
__HELP__ = '''
SYNOPSIS
# minimum GC content
MIN_GC_CONTENT = 0.0
# out file name
OUTFILE = "out.txt"
#===================================================================================#
f = StemLoopFactory()
f.max_gap_size = MAX_GAP_SIZE
f.max_total_gap_count = MAX_TOTAL_GAP_COUNT
f.min_loop_size = MIN_LOOP_SIZE
f.max_loop_size = MAX_LOOP_SIZE
f.stem_size = STEM_SIZE
f.min_matches_before_first_gap = MIN_MATCHES_BEFORE_FIRST_GAP
f.min_gc_content = MIN_GC_CONTENT
f.init_stemloop()
print(f.summary())
f.calculate_sequences()
f.print_sequences(OUTFILE)
print(f.sequences_summary())
print(f.sequences_header(10))
# ================
# for debugging
# n = Match()
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(Match())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(Match())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(Match())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(Match())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(Match())
# n = n.add_next(Match())
# n = n.add_next(Loop())
# n = n.add_next(Loop())
# n = n.add_next(Loop())
# n = n.add_next(Loop())
# n = n.add_next(MatchMate())
# n = n.add_next(MatchMate())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(MatchMate())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(MatchMate())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(MatchMate())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(MatchMate())
# n = n.add_next(Gap())
# n = n.add_next(Gap())
# n = n.add_next(MatchMate())
\ No newline at end of file
StemLoopGenerator.py <outputFile> [options]
DESCRIPTION
<outputFile> output file for all sequences
empty for writing to stdout
default: %s
[-l, --log=STRING]
log out
empty for writing to stderr
default: %s
[-s, --stem-size=INT] nucleotides of stem
default: %s
[-m, --min-loop-size=INT]
min loop size
default: %s
[-n, --max-loop-size=INT]
max loop size
default: %s
[-a, --max-gap-size=INT]
maximum gap size
default: %s
[-g, --max-total-gaps=INT]
maximum total gap count
default: %s
[-b, --min_matches_before_gap=INT]
strong hits from every experiment in a single file
default: %s
[-c, --min-gc-content]
minimum gc content of generated sequence
default: %s
[-t, --test] test run without sequence evaluation
[-h, --help] display help message
[--verbose] display verbose output for debugging
EXAMPLES
python StemLoopGenerator.py > sequences.txt 2> summary.txt
python StemLoopGenerator.py sequences.txt
python StemLoopGenerator.py --help
AUTHOR
Thomas Schwarzl <schwarzl@embl.de>
''' % (OUTFILE,
LOGOUT,
STEM_SIZE,
MIN_LOOP_SIZE,
MAX_LOOP_SIZE,
MAX_GAP_SIZE,
MAX_TOTAL_GAP_COUNT,
MIN_MATCHES_BEFORE_FIRST_GAP,
MIN_GC_CONTENT)
import lib, sys, optparse, logging, traceback, os
from lib.StemLoopFactory import StemLoopFactory
# Logging
LOGGER = logging.getLogger()
def exceptionHandling(exception):
print(str(exception))
traceback.print_exc()
os._exit(1)
def main():
global options, args
# set output file
OUTFILE = args[0] if len(args) > 0 else None
# initialise logger
logFH = init_logger(options.LOGOUT)
# create stem look factory
f = StemLoopFactory()
f.max_gap_size = options.MAX_GAP_SIZE
f.max_total_gap_count = options.MAX_TOTAL_GAP_COUNT
f.min_loop_size = options.MIN_LOOP_SIZE
f.max_loop_size = options.MAX_LOOP_SIZE
f.stem_size = options.STEM_SIZE
f.min_matches_before_first_gap = options.MIN_MATCHES_BEFORE_FIRST_GAP
f.min_gc_content = options.MIN_GC_CONTENT
#logging.getLogger().setLevel(logging.INFO)
# print the summary
LOGGER.warning(f.summary(__VERSION__))
# if it is not a test run
if not options.TEST:
f.init_stemloop()
f.calculate_sequences()
LOGGER.warning("# " + str(f.sequences_summary()))
LOGGER.warning("# example sequences: " + str(f.sequences_header(10)))
f.print_sequences(OUTFILE)
# close to avoid broken pipe
sys.stderr.close()
def init_logger(LOGOUT):
# configure logger
if options.__VERBOSE__:
LOGGER.setLevel(logging.DEBUG)
else:
LOGGER.setLevel(logging.INFO)
logFH = None
if options.LOGOUT is None or options.LOGOUT is not "":
logFH = logging.FileHandler(options.LOGOUT)
LOGGER.addHandler(logFH)
return logFH
# program can be used as standalone or as module
if __name__ == '__main__':
try:
# get parser and add arguments
parser = optparse.OptionParser(usage=__HELP__,
#usage=globals()['__doc__'],
version="%s" % (__VERSION__))
parser.add_option('-s', '--stem-size',
action = 'store',
default = STEM_SIZE,
type = "int",
dest = "STEM_SIZE")
parser.add_option('-m', '--min-loop-size',
action = 'store',
default = MIN_LOOP_SIZE,
type = "int",
dest = 'MIN_LOOP_SIZE')
parser.add_option('-n', '--max-loop-size',
action = 'store',
default = MAX_LOOP_SIZE,
type = "int",
dest = 'MAX_LOOP_SIZE')
parser.add_option('-a', '--max-gap-size',
action = 'store',
default = MAX_GAP_SIZE,
type = "int",
dest = 'MAX_GAP_SIZE')
parser.add_option('-g', '--max-total-gaps',
action = 'store',
default = MAX_TOTAL_GAP_COUNT,
type = "int",
dest = 'MAX_TOTAL_GAP_COUNT')
parser.add_option('-b', '--min_matches_before_gap',
action = 'store',
default = MIN_MATCHES_BEFORE_FIRST_GAP,
type = "int",
dest = 'MIN_MATCHES_BEFORE_FIRST_GAP')
parser.add_option('-c', '--min-gc-content',
action = 'store',
default = MIN_GC_CONTENT,
type = "float",
dest = 'MIN_GC_CONTENT')
parser.add_option('-l', '--log',
action ='store',
default = LOGOUT,
type = "string",
dest = "LOGOUT")
parser.add_option('-t', '--test',
action = 'store_true',
default = False,
dest = "TEST")
parser.add_option('-v', '--verbose',
action = 'store_true',
default = __VERBOSE__,
dest = "__VERBOSE__")
(options, args) = parser.parse_args()
# check if there are all arguments
if len(args) > 1:
parser.error ('too many arguments')
main()
# Exception Handling: Interruption / Errors
except KeyboardInterrupt as exception: # Control - C
raise exception
except SystemExit as exception:
raise exception
except Exception as exception:
print('Error.')
exceptionHandling(exception)
\ No newline at end of file
__author__ = 'Tom'
import sys
from lib.Nucleotide import Nucleotide
from lib.Match import Match
from lib.MatchMate import MatchMate
......@@ -18,7 +19,6 @@ class StemLoopFactory:
self.min_matches_before_first_gap = 0
self.min_gc_content = 0
self.stemloop = None
self.sequences = None
......@@ -70,6 +70,7 @@ class StemLoopFactory:
self.init_stemloop()
def min_length(self):
self.check_init()
return self.stemloop.min_length()
def length(self):
......@@ -79,18 +80,27 @@ class StemLoopFactory:
def backbone(self):
return self.stemloop.str_all()
def summary(self):
out = "maximum gap size: %s\n" % self.max_gap_size
out += "maximum total gap count: %s\n" % self.max_total_gap_count
out += "minimal length: %s\n" % self.min_length()
out += "maximal length: %s\n" % (self.min_length() + min(self.max_gap_size, self.max_total_gap_count))
out += "total length: %s\n" % self.length()
out += self.backbone()
def summary(self, version = "unkown"):
out = "# STEM LOOP GENERATOR v%s\n" % version
out += "# --------------------------------- #\n"
out += "# stem size: %s\n" % self.stem_size
out += "# minimal loop size: %s\n" % self.min_loop_size
out += "# maximal loop size: %s\n" % self.max_loop_size
out += "# maximum gap size: %s\n" % self.max_gap_size
out += "# maximum total gap count: %s\n" % self.max_total_gap_count
out += "# minimal matches before first gap: %s\n" % self.min_matches_before_first_gap
out += "# minimal gc content [0-1]: %s\n" % self.min_gc_content
out += "# --------------------------------- #\n"
out += "# minimal sequence length: %s\n" % self.min_length()
out += "# maximal sequence length: %s\n" % (self.min_length() + min(self.max_gap_size, self.max_total_gap_count))
out += "# total length: %s\n" % self.length()
out += "# --------------------------------- #\n"
out += "# " + self.backbone() + "\n"
out += "# --------------------------------- #"
return(out)
def sequences_summary(self):
out = "number of unique sequences %s" % self.sequences_count()
out = "number of unique sequences: %s" % self.sequences_count()
return(out)
def check_sequences_init(self):
......@@ -104,15 +114,24 @@ class StemLoopFactory:
def sequences_count(self):
self.check_sequences_init()
return(len(self.sequences))
def print_sequences(self, file):
self.check_sequences_init()
fh = open(file,'w')
for sequence in self.sequences:
fh.write("%s\n" % sequence)
fh.close()
fh = sys.stdout
try:
if file is not "" and file is not None:
fh = open(file, 'w')
for sequence in self.sequences:
fh.write("%s\n" % sequence)
if file is not "" and file is not None:
fh.close()
except IOError as e:
print("IOError, problems opening or writing to the file:\n" + str(e))
def sequences_header(self, i):
return list(self.sequences)[0:i]
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment