Commit af11001d authored by Christian Arnold's avatar Christian Arnold
Browse files

Version 1.2, see Changelog for details

parent c311749f
...@@ -107,7 +107,7 @@ checkAndLoadPackages(c("tidyverse", "futile.logger", "modeest", "checkmate", "gg ...@@ -107,7 +107,7 @@ checkAndLoadPackages(c("tidyverse", "futile.logger", "modeest", "checkmate", "gg
checkAndLoadPackages(c("tidyverse", "futile.logger", "checkmate", "tools", "methods", "boot"), verbose = FALSE) checkAndLoadPackages(c("tidyverse", "futile.logger", "checkmate", "tools", "methods", "boot"), verbose = FALSE)
# Step 6 # Step 6
checkAndLoadPackages(c("tidyverse", "futile.logger", "lsr", "ggrepel", "checkmate", "tools", "methods", "grDevices", "pheatmap"), verbose = TRUE) checkAndLoadPackages(c("tidyverse", "futile.logger", "lsr", "ggrepel", "checkmate", "tools", "methods", "grDevices", "pheatmap"), verbose = FALSE)
...@@ -310,7 +310,9 @@ for (TFCur in allTFs) { ...@@ -310,7 +310,9 @@ for (TFCur in allTFs) {
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
} }
tableCur.df = read_tsv(fileCur, col_names = FALSE, col_types = cols()) tableCur.df = read_tsv(fileCur, col_names = FALSE,
col_types = "ciicic")
if (nrow(problems(tableCur.df)) > 0) { if (nrow(problems(tableCur.df)) > 0) {
flog.fatal(paste0("Parsing errors: "), problems(tableCur.df), capture = TRUE) flog.fatal(paste0("Parsing errors: "), problems(tableCur.df), capture = TRUE)
stop("Parsing errors for file ", TFCur, ". See the log file for more information") stop("Parsing errors for file ", TFCur, ". See the log file for more information")
...@@ -322,6 +324,10 @@ for (TFCur in allTFs) { ...@@ -322,6 +324,10 @@ for (TFCur in allTFs) {
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
} }
assertIntegerish(tableCur.df$X2, lower = 0)
assertIntegerish(tableCur.df$X3, lower = 0)
} }
flog.info(paste0("Done checking.")) flog.info(paste0("Done checking."))
......
...@@ -318,6 +318,7 @@ rule checkParameterValidity: ...@@ -318,6 +318,7 @@ rule checkParameterValidity:
message: "{ruleDisplayMessage}Check parameter validity {script_checkParValidity}..." message: "{ruleDisplayMessage}Check parameter validity {script_checkParValidity}..."
threads: 1 threads: 1
priority: 1 priority: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
params: params:
script: dir_scripts + script_checkParValidity script: dir_scripts + script_checkParValidity
...@@ -331,6 +332,7 @@ rule produceConsensusPeaks: ...@@ -331,6 +332,7 @@ rule produceConsensusPeaks:
log: expand('{dir}/produceConsensusPeaks.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/produceConsensusPeaks.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Calculate consensus peaks for all peak files with the script {script_createConsensusPeaks}..." message: "{ruleDisplayMessage}Calculate consensus peaks for all peak files with the script {script_createConsensusPeaks}..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
params: params:
script: dir_scripts + script_createConsensusPeaks script: dir_scripts + script_createConsensusPeaks
...@@ -349,6 +351,7 @@ rule filterSexChromosomesAndSortPeaks: ...@@ -349,6 +351,7 @@ rule filterSexChromosomesAndSortPeaks:
consensusPeaks_sorted = PEAKS_DIR + "/" + compType + "consensusPeaks.filtered.sorted.bed" consensusPeaks_sorted = PEAKS_DIR + "/" + compType + "consensusPeaks.filtered.sorted.bed"
message: "{ruleDisplayMessage}Filter sex and unassembled chromosomes..." message: "{ruleDisplayMessage}Filter sex and unassembled chromosomes..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
shell: """ shell: """
grep -v "^chrX\|^chrY\|^chrM\|^chrUn\|random\|hap\|_gl" {input.consensusPeaks} | sort -k1,1 -k2,2n > {output.consensusPeaks_sorted} grep -v "^chrX\|^chrY\|^chrM\|^chrUn\|random\|hap\|_gl" {input.consensusPeaks} | sort -k1,1 -k2,2n > {output.consensusPeaks_sorted}
""" """
...@@ -386,11 +389,12 @@ rule resortBAM: ...@@ -386,11 +389,12 @@ rule resortBAM:
BAMSorted = TEMP_BAM_DIR + "/" + "{BAM}.bam" BAMSorted = TEMP_BAM_DIR + "/" + "{BAM}.bam"
message: "{ruleDisplayMessage} Sort BAM file {input.BAM}..." message: "{ruleDisplayMessage} Sort BAM file {input.BAM}..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
compression = "-c", compression = "-c",
noSeqInf = "-t" noSeqInf = "-t"
shell: shell:
"""sh -c 'repair {params.compression} {params.noSeqInf} -i {input.BAM} -o {output.BAMSorted} '""" """repair {params.compression} {params.noSeqInf} -i {input.BAM} -o {output.BAMSorted}"""
...@@ -414,6 +418,7 @@ rule intersectPeaksAndBAM: ...@@ -414,6 +418,7 @@ rule intersectPeaksAndBAM:
log: log:
message: "{ruleDisplayMessage} Intersect for file {input.consensusPeaks} with all BAM files..." message: "{ruleDisplayMessage} Intersect for file {input.consensusPeaks} with all BAM files..."
threads: threadsMax threads: threadsMax
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
pairedEnd = pairedEndOptions, pairedEnd = pairedEndOptions,
readFiltering = "-Q 10", readFiltering = "-Q 10",
...@@ -453,6 +458,7 @@ rule intersectPeaksAndTFBS: ...@@ -453,6 +458,7 @@ rule intersectPeaksAndTFBS:
TFBSinPeaksMod_bed = expand('{dir}/{compType}allTFBS.peaks.extension.bed.gz', dir = TEMP_EXTENSION_DIR, compType = compType) TFBSinPeaksMod_bed = expand('{dir}/{compType}allTFBS.peaks.extension.bed.gz', dir = TEMP_EXTENSION_DIR, compType = compType)
message: "{ruleDisplayMessage} Obtain binding sites from peaks: Intersect all TFBS files and {input.consensusPeaks}..." message: "{ruleDisplayMessage} Obtain binding sites from peaks: Intersect all TFBS files and {input.consensusPeaks}..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
extension = config["par_general"]["regionExtension"], extension = config["par_general"]["regionExtension"],
ulimitMax = ulimitMax ulimitMax = ulimitMax
...@@ -480,6 +486,7 @@ rule intersectTFBSAndBAM: ...@@ -480,6 +486,7 @@ rule intersectTFBSAndBAM:
log: log:
message: "{ruleDisplayMessage} Intersect file {input.bed} against all BAM files for TF {wildcards.TF}..." message: "{ruleDisplayMessage} Intersect file {input.bed} against all BAM files for TF {wildcards.TF}..."
threads: 4 threads: 4
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
pairedEnd = pairedEndOptions, pairedEnd = pairedEndOptions,
readFiltering = "-Q 10", readFiltering = "-Q 10",
...@@ -526,6 +533,7 @@ rule DiffPeaks: ...@@ -526,6 +533,7 @@ rule DiffPeaks:
log: expand('{dir}/DiffPeaks.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/DiffPeaks.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Run R script {script_DiffPeaks}" message: "{ruleDisplayMessage}Run R script {script_DiffPeaks}"
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
params: params:
doCyclicLoess = "true" doCyclicLoess = "true"
script: dir_scripts + script_DiffPeaks script: dir_scripts + script_DiffPeaks
...@@ -549,6 +557,7 @@ rule analyzeTF: ...@@ -549,6 +557,7 @@ rule analyzeTF:
log: expand('{dir}/analyzeTF.{{TF}}.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/analyzeTF.{{TF}}.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Run R script {script_analyzeTF} for TF {wildcards.TF}..." message: "{ruleDisplayMessage}Run R script {script_analyzeTF} for TF {wildcards.TF}..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
params: params:
doCyclicLoess = "true", doCyclicLoess = "true",
allBAMS = list(allBamFiles) allBAMS = list(allBamFiles)
...@@ -564,6 +573,7 @@ rule summary1: ...@@ -564,6 +573,7 @@ rule summary1:
log: expand('{dir}/summary1.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/summary1.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Run R script {script_summary1} ..." message: "{ruleDisplayMessage}Run R script {script_summary1} ..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
script: dir_scripts + script_summary1 script: dir_scripts + script_summary1
...@@ -579,7 +589,7 @@ rule concatenateMotifsPerm: ...@@ -579,7 +589,7 @@ rule concatenateMotifsPerm:
log: log:
message: "{ruleDisplayMessage}Concatenate all motifs for permutation {wildcards.perm}..." message: "{ruleDisplayMessage}Concatenate all motifs for permutation {wildcards.perm}..."
threads: 1 threads: 1
benchmark: LOG_BENCHMARK_DIR + "/concatenateMotifsPerm.{perm}.benchmark" singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
motifsShortPerm = TF_DIR + "/*/" + extDir + "/" + compType + "*.outputPerm.tsv.gz", motifsShortPerm = TF_DIR + "/*/" + extDir + "/" + compType + "*.outputPerm.tsv.gz",
colToExtract= lambda wc: str(int(wc.perm) + 3) colToExtract= lambda wc: str(int(wc.perm) + 3)
...@@ -601,6 +611,7 @@ rule calcNucleotideContent: ...@@ -601,6 +611,7 @@ rule calcNucleotideContent:
log: log:
message: "{ruleDisplayMessage}Calculate nucleotide content via bedtools nuc for all TFBS..." message: "{ruleDisplayMessage}Calculate nucleotide content via bedtools nuc for all TFBS..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
params: params:
motifsShort = TF_DIR + "/*/" + extDir + "/" + compType + "*.output.tsv.gz", motifsShort = TF_DIR + "/*/" + extDir + "/" + compType + "*.output.tsv.gz",
# TFMotifes = construct a string that resembles the call to tail, # TFMotifes = construct a string that resembles the call to tail,
...@@ -622,6 +633,7 @@ rule binningTF: ...@@ -622,6 +633,7 @@ rule binningTF:
log: expand('{dir}/binningTF.{{TF}}.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/binningTF.{{TF}}.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Run R script {script_binningTF} for TF {wildcards.TF}..." message: "{ruleDisplayMessage}Run R script {script_binningTF} for TF {wildcards.TF}..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
script: dir_scripts + script_binningTF script: dir_scripts + script_binningTF
# Determine which files are produced by the rule depending on whether the classifiation should be run # Determine which files are produced by the rule depending on whether the classifiation should be run
...@@ -647,6 +659,7 @@ rule summaryFinal: ...@@ -647,6 +659,7 @@ rule summaryFinal:
log: expand('{dir}/summaryFinal.R.log', dir = LOG_BENCHMARK_DIR) log: expand('{dir}/summaryFinal.R.log', dir = LOG_BENCHMARK_DIR)
message: "{ruleDisplayMessage}Run R script {script_summaryFinal} ..." message: "{ruleDisplayMessage}Run R script {script_summaryFinal} ..."
threads: 1 threads: 1
singularity: "shub://chrarnold/Singularity_images:difftf_r"
params: TFs = ",".join(allTF) params: TFs = ",".join(allTF)
script: dir_scripts + script_summaryFinal script: dir_scripts + script_summaryFinal
...@@ -660,6 +673,7 @@ rule cleanUpLogFiles: ...@@ -660,6 +673,7 @@ rule cleanUpLogFiles:
message: "{ruleDisplayMessage}Clean and summarize Logs_and_Benchmark directory..." message: "{ruleDisplayMessage}Clean and summarize Logs_and_Benchmark directory..."
threads: 1 threads: 1
params: dir = LOG_BENCHMARK_DIR params: dir = LOG_BENCHMARK_DIR
singularity: "shub://chrarnold/Singularity_images:difftf_conda"
shell: shell:
""" """
grep -i "^WARN" {params.dir}/*.log > {output.warnLog} || true && grep -i "^WARN" {params.dir}/*.log > {output.warnLog} || true &&
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment