diff --git a/src/Snakefile b/src/Snakefile index 47b75bf85b73d3bf86928369ddd4d9e5acc9f6d5..e8942af24255da9e192604675735d324d3d19317 100755 --- a/src/Snakefile +++ b/src/Snakefile @@ -1795,12 +1795,41 @@ def getControlSample(filePrefix, addCommandLinePrefix): ctrl_param = prefix + FINAL_OUTPUT_dir + "/" + controlSample + ".final.bam" return ctrl_param +def getAllInputFiles_macs(filePrefix): +# + # For ATACSeq, no control sample + if dataType != "ChIPseq": + return "" + + if mergeReplicates: + sampleTableCol= "individual" + samples_regex = "|".join(allIndividualsUnique) + else: + sampleTableCol= "sampleName" + samples_regex = "|".join(allSamplesUnique) + + + file_base = os.path.basename(filePrefix.replace('.final', '')) + #print(file_base) + #print(samples_regex) + sample_cur = re.search(samples_regex, file_base).group(0).replace(".merged","") # since the matching contains the merged suffix, we remove it here + controlSample = samplesData["control_sampleName"][samplesData[sampleTableCol]==sample_cur].iloc[0] + + #print(controlSample) + bamFile = FINAL_OUTPUT_dir + "/" + filePrefix + ".bam" + + if pandas.isnull(controlSample): + inputFiles = [bamFile] + else: + controlFile = FINAL_OUTPUT_dir + "/" + controlSample + ".final.bam" + inputFiles = [bamFile, controlFile] + + return inputFiles # Runs for all bam files in the final output folder, also the replicated one rule macs2_stringent: input: - bam = FINAL_OUTPUT_dir + "/{basename}.bam", - index = lambda wildcards: [getControlSample(wildcards.basename, False)] + lambda wildcards: getAllInputFiles_macs(wildcards.basename) output: peaks_bedT = temp(macs2_stringent_outputName + '_peaks.narrowPeak'), peaks_bed = temp(macs2_stringent_outputName + '.narrowPeak.gz'), @@ -1809,7 +1838,7 @@ rule macs2_stringent: xls = temp(macs2_stringent_outputName + '_peaks.xls') log: LOG_BENCHMARK_dir + "/macs2_stringent.{basename}.log" #log: LOG_BENCHMARK_dir + expand("/macs2_stringent.{sample}{{specifics}}.log", sample = [allSamplesUnique, allIndividualsUnique] if mergeReplicates else allSamplesUnique) - message: "{ruleDisplayMessage}Run MACS2 (stringent) for {input.bam:q} ..." + message: "{ruleDisplayMessage}Run MACS2 (stringent) for {input[0]:q} ..." threads: 1 # benchmark: LOG_BENCHMARK_dir + "/macs2_stringent.{basename}.benchmark" conda: "condaEnvironments/macs2.yaml" @@ -1828,7 +1857,7 @@ rule macs2_stringent: shell: """PYTHONPATH="" && macs2 callpeak \ - --treatment {input.bam} \ + --treatment {input[0]} \ {params.controlFile} \ -q {params.qValue} \ --outdir {params.outputDir}\ @@ -1859,7 +1888,7 @@ use rule macs2_stringent as macs2_nonStringent with: log: LOG_BENCHMARK_dir + "/macs2_nonStringent.{basename}.log" message: - "{ruleDisplayMessage}Run MACS2 (non-stringent) for {input.bam:q} ..." + "{ruleDisplayMessage}Run MACS2 (non-stringent) for {input[0]:q} ..." params: qValue = config["peakCalling"]["modelNonStringent_minQValue"], slocal = "--slocal " + str(config["peakCalling"]["modelNonStringent_slocal"]), @@ -1877,8 +1906,7 @@ macs2_Encode_outputName = PEAK_ENCODE_dir + '/{basename}' + '.Encode' rule macs2_Encode: input: - bam = expand('{dir}/{{basename}}.bam', dir = FINAL_OUTPUT_dir), - index = lambda wildcards: [getControlSample(wildcards.basename, False)] + lambda wildcards: getAllInputFiles_macs(wildcards.basename) output: broadPeakfileT = temp(macs2_Encode_outputName + '_peaks.broadPeak'), gappedPeakfileT = temp(macs2_Encode_outputName + '_peaks.gappedPeak'), @@ -1896,7 +1924,7 @@ rule macs2_Encode: log: broadAndGapped = LOG_BENCHMARK_dir + "/macs2_Encode_broadAndGapped.{basename}.log", narrow = LOG_BENCHMARK_dir + "/macs2_Encode_narrow.{basename}.log" - message: "{ruleDisplayMessage}Run MACS2 (Encode version) for {input.bam:q} ..." + message: "{ruleDisplayMessage}Run MACS2 (Encode version) for {input[0]:q} ..." threads: 1 # benchmark: LOG_BENCHMARK_dir + "/macs2_Encode.{basename}.benchmark" params: @@ -1918,7 +1946,7 @@ rule macs2_Encode: # After peak calling, sort by col 8and 14 in descending order and replace long peak names in Column 4 with Peak_<peakRank> """ PYTHONPATH="" && macs2 callpeak \ - --treatment {input.bam} \ + --treatment {input[0]} \ {params.controlFile} \ -p {params.pValue} \ --outdir {params.outputDir}\