More LOG messages

a46a4107 · Sascha Meiers · 07d8af42 · a46a4107 · a46a4107 · a46a4107
Commit a46a4107 authored 7 years ago by Sascha Meiers
--- a/Snakefile
+++ b/Snakefile
@@ -36,11 +36,13 @@ rule plot_mosaic_counts:
        info   = "counts/" + config["sample"] + ".{file_name}.info"
    output:
        "plots/" + config["sample"] + ".{file_name}.pdf"
+    log:
+        "log/plot_mosaic_counts.{file_name}.txt"
    params:
        plot_command = "Rscript " + config["plot_script"]
    shell:
        """
-        {params.plot_command} {input.counts} {input.info} {output}
+        {params.plot_command} {input.counts} {input.info} {output} > {log} 2>&1
        """

 rule plot_SV_calls:
@@ -49,12 +51,13 @@ rule plot_SV_calls:
        probs  = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/probabilities.txt"
    output:
        "sv_calls/" + config["sample"] + ".{windows}.{bpdens}.SV_probs.pdf"
+    log:
+        "log/plot_SV_call.{windows}.{bpdens}.txt"
    params:
        plot_command = "Rscript " + config["sv_plot_script"]
    shell:
        """
-        {params.plot_command} {input.counts} {input.probs} {output}
-        touch {output}
+        {params.plot_command} {input.counts} {input.probs} {output}  > {log} 2>&1
        """


@@ -69,17 +72,21 @@ rule mosaic_count_fixed:
    output:
        counts = "counts/" + config["sample"] + ".{window}_fixed.txt.gz",
        info   = "counts/" + config["sample"] + ".{window}_fixed.info"
+    log:
+        "log/mosaic_count_fixed.{window}.txt"
    params:
        mc_command = config["mosaicatcher"],
        mc_exclfile = config["exclude_file"]
    shell:
        """
        {params.mc_command} count \
+            --verbose \
            -o {output.counts} \
            -i {output.info} \
            -x {params.mc_exclfile} \
            -w {wildcards.window} \
-            {input.bam}
+            {input.bam} \
+        > {log} 2>&1
        """


@@ -91,15 +98,19 @@ rule mosaic_count_variable:
    output:
        counts = "counts/" + config["sample"] + ".{window}_variable.txt.gz",
        info   = "counts/" + config["sample"] + ".{window}_variable.info"
+    log:
+        "log/mosaic_count_variable.{window}.txt"
    params:
        mc_command = config["mosaicatcher"]
    shell:
        """
        {params.mc_command} count \
+            --verbose \
            -o {output.counts} \
            -i {output.info} \
            -b {input.bed} \
-            {input.bam}
+            {input.bam} \
+        > {log} 2>&1
        """


@@ -116,13 +127,15 @@ rule segmentation:
        "counts/" + config["sample"] + ".{file_name}.txt.gz"
    output:
        "segmentation/" + config["sample"] + ".{file_name}.txt"
+    log:
+        "log/segmentation.{file_name}.txt"
    params:
        mc_command = config["mosaicatcher"]
    shell:
        """
        {params.mc_command} segment \
        -o {output} \
-        {input}
+        {input} > {log} 2>&1
        """

 # Pick a few segmentations and prepare the input files for SV classification
@@ -131,6 +144,8 @@ rule prepare_segments:
        "segmentation/" + config["sample"] + ".{windows}.txt"
    output:
        "segmentation2/" + config["sample"] + ".{windows}.{bpdens}.txt"
+    log:
+        "log/prepare_segments.{windows}.{bpdens}.txt"
    params:
        quantile = lambda wc: config["bp_density"][wc.bpdens]
    script:
@@ -145,10 +160,10 @@ rule install_MaRyam:
    output:
        "utils/R-packages2/MaRyam/R/MaRyam"
    log:
-        "log/maryam-install.log"
+        "log/install_MaRyam.log"
    shell:
        """
-        Rscript  utils/install_maryam.R > {log} 2>&1
+        Rscript utils/install_maryam.R > {log} 2>&1
        """

 rule run_sv_classification:
@@ -161,12 +176,14 @@ rule run_sv_classification:
    output:
        outdir = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/",
        out1   = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/allSegCellProbs.table"
+    log:
+        "run_sv_classification.{windows}.{bpdens}.txt"
    params:
        windowsize    = lambda wc: wc.windows.split("_")[0]
    shell:
        """
        set -x
-        # set haplotypeInfo if phasing info is available		
+        # set haplotypeInfo if phasing info is available
        Rscript utils/MaRyam_pipeline.R \
                binRCfile={input.counts} \
                BRfile={input.bp} \
@@ -176,7 +193,7 @@ rule run_sv_classification:
                bin.size={params.windowsize} \
                K=22 \
                maximumCN=4 \
-                utils/R-packages2/
+                utils/R-packages2/ > {log} 2>&1
        """

 rule convert_SVprob_output:
@@ -185,6 +202,8 @@ rule convert_SVprob_output:
        info  = "counts/" + config["sample"] + ".{windows}.info"
    output:
        "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/probabilities.txt"
+    log:
+        "log/convert_SVprob_output.{windows}.{bpdens}.txt"
    script:
        "utils/helper.convert_svprob_output.R"

@@ -198,13 +217,15 @@ rule determine_initial_strand_states:
        "counts/" + config["sample"] + ".500000_fixed.txt.gz"
    output:
        "strand_states/" + config["sample"] + ".txt"
+    log:
+        "log/determine_initial_strand_states.txt"
    params:
        sce_command = "Rscript " + config["sce_script"]
    shell:
        """
        echo "[Note] This is a dirty hack to remove chrX and chrY."
        tmp=$(mktemp)
-        {params.sce_command} {input} $tmp
+        {params.sce_command} {input} $tmp > {log} 2>&1
        grep -vP '^Y|^chrY' $tmp | grep -vP '^X|^chrX' > {output}
        """

@@ -216,6 +237,8 @@ rule convert_strandphaser_input:
        info   = "counts/" + config["sample"] + ".500000_fixed.info"
    output:
        "strand_states/" + config["sample"] + ".strandphaser_input.txt"
+    log:
+        "log/convert_strandphaser_input.txt"
    script:
        "utils/helper.convert_strandphaser_input.R"

@@ -226,7 +249,7 @@ rule install_StrandPhaseR:
        "log/strandphaser-install.log"
    shell:
        """
-        Rscript  utils/install_strandphaser.R > {log} 2>&1
+        Rscript utils/install_strandphaser.R > {log} 2>&1
        """

 rule prepare_strandphaser_config:
@@ -259,7 +282,7 @@ rule prepare_strandphaser_config:
            print("translateBases   = TRUE",     file = f)
            print("fillMissAllele   = NULL",     file = f)
            print("splitPhasedReads = TRUE",     file = f)
-	    print("compareSingleCells = TRUE",     file = f)	
+            print("compareSingleCells = TRUE",     file = f)
            print("callBreaks       = FALSE",    file = f)
            print("exportVCF        = '", config["sample"], ".txt'", sep = "", file = f)
            print("bsGenome         = 'BSgenome.Hsapiens.UCSC.hg19'", file = f)
@@ -278,7 +301,7 @@ rule run_strandphaser:
    output:
        "strand_states/" + config["sample"] + ".strandphaser_output.txt"
    log:
-        "log/phased_haps.txt.log"
+        "log/run_strandphaser.txt"
    shell:
        """
        Rscript utils/StrandPhaseR_pipeline.R \
@@ -300,10 +323,13 @@ rule convert_strandphaser_output:
        info           = "counts/" + config["sample"] + ".500000_fixed.info"
    output:
        "strand_states/" + config["sample"] + ".final.txt"
+    log:
+        "log/convert_strandphaser_output.txt"
    script:
        "utils/helper.convert_strandphaser_output.R"


+
 ################################################################################
 # Call SNVs                                                                    #
 ################################################################################
@@ -333,13 +359,15 @@ rule call_SNVs_bcftools_chrom:
        bai   = "snv_calls/merged.bam.bai"
    output:
        "snv_calls/" + config["sample"] + ".{chrom}.vcf"
+    log:
+        "log/call_SNVs_bcftools_chrom.{chrom}.txt"
    params:
        samtools = config["samtools"],
        bcftools = config["bcftools"]
    shell:
        """
        {params.samtools} mpileup -r {wildcards.chrom} -g -f {input.fa} {input.bam} \
-        | {params.bcftools} call -mv - > {output}
+        | {params.bcftools} call -mv - > {output} > {log} 2>&1
        """

 # Write one file per chromosome that should be analysed.
@@ -360,3 +388,4 @@ rule merge_SNV_calls:
        expand("snv_calls/" + config["sample"] + ".vcf")
    shell:
        config["bcftools"] + " concat -O v -o {output} {input}"
+
--- a/utils/helper.convert_strandphaser_input.R
+++ b/utils/helper.convert_strandphaser_input.R
+sink(snakemake@log[[1]])
 library(data.table);
 d = fread(snakemake@input[["states"]])
+d
 e = fread(snakemake@input[["info"]])
 e$bam = basename(e$bam);
+e
 f = merge(d, e, by = c("sample","cell"))[class == "WC", .(chrom,start,end,bam)]
-write.table(f, file=snakemake@output[[1]], quote=F, row.names=F, col.names=F, sep="\t")
\ No newline at end of file
+f
+write.table(f, file=snakemake@output[[1]], quote=F, row.names=F, col.names=F, sep="\t")
--- a/utils/helper.convert_strandphaser_output.R
+++ b/utils/helper.convert_strandphaser_output.R
+sink(snakemake@log[[1]])
 library(data.table);
 library(assertthat)
 e = fread(snakemake@input[["phased_states"]])
+e
 d = fread(snakemake@input[["info"]])
+d
 g = fread(snakemake@input[["initial_states"]])
+g

 d$bam = basename(d$bam);

@@ -10,9 +14,11 @@ e$bam = e$cell
 e$cell = NULL
 e$sample = NULL
 f = merge(d, e, by = "bam")[, .(chrom,start,end,sample,cell,class)]
+f

 # Note that there is still a bug in Venla's strand state detection.
 g = merge(g,f, by = c("chrom","start","end","sample","cell"), all.x = T)
+g


 # Overwrite with David's phased strand state if available!
@@ -20,5 +26,6 @@ g = g[, class := ifelse(!is.na(class.y), class.y, class.x)][]
 g$class.x = NULL
 g$class.y = NULL
 g = g[, .(chrom, start, end, sample, cell, class)]
+g

-write.table(g, file=snakemake@output[[1]], quote=F, row.names=F, col.names=T, sep="\t")
\ No newline at end of file
+write.table(g, file=snakemake@output[[1]], quote=F, row.names=F, col.names=T, sep="\t")
--- a/utils/helper.convert_svprob_output.R
+++ b/utils/helper.convert_svprob_output.R
+sink(snakemake@log[[1]])
 library(data.table)
 library(assertthat)

 f_maryam = snakemake@input[["probs"]]
+f_maryam
 f_info   = snakemake@input[["info"]]
+f_info
 sample   = snakemake@params[["sample_name"]]
+sample

 d = fread(f_maryam, header = T)
+d
 f = fread(f_info)
+f

 # Map cell number to cell name
 assert_that(max(d$cells) <= nrow(f))
 d$cell = f$cell[d$cells]
 d$sample = sample
+d

 d <- d[, .(chrom = chr, start, end, sample, cell, type = types, w = Wcount, c = Ccount,
           p_cn0 = CN0, p_cn1 = CN1, p_cn2 = CN2, p_cn3 = CN3, p_cn4 = CN4,
@@ -19,4 +26,5 @@ d <- d[, .(chrom = chr, start, end, sample, cell, type = types, w = Wcount, c =
           p_inv_hom = `0101`, p_inv_h1 = `0110`, p_inv_h2 = `1001`,
           p_dup_hom = `2020`, p_dup_h1 = `2010`, p_dup_h2 = `1020`,
           p_idup_h1 = `1110`, p_idup_h2 = `1011`)]
+d
 write.table(d, file = snakemake@output[[1]], quote=F, col.names = T, row.names = F, sep = "\")
--- a/utils/helper.prepare_segments.R
+++ b/utils/helper.prepare_segments.R
+sink(snakemake@log[[1]])
 library(data.table)
 qu = snakemake@params[["quantile"]]
+qu
 d = fread(snakemake@input[[1]])
+d
 # type = 1 is important to get discrete values!
 d = d[, .SD[k == quantile(1:max(k), qu, type = 1)], by=chrom][,.(k, chrom, bps = breakpoint)]
-write.table(d, file = snakemake@output[[1]], row.names=F, quote=F, sep="\t")
\ No newline at end of file
+d
+write.table(d, file = snakemake@output[[1]], row.names=F, quote=F, sep="\t")