From f7fa6c66eb9d6ed28e1bfba49ca0b272b2b18211 Mon Sep 17 00:00:00 2001
From: Sascha Meiers <meiers@embl.de>
Date: Thu, 11 Jan 2018 14:02:35 +0100
Subject: [PATCH] Require external VCF to be zipped and indexed

---
 README.md | 21 +++++++++++++++++----
 Snakefile |  3 ++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 3c82929..8af551f 100644
--- a/README.md
+++ b/README.md
@@ -15,17 +15,17 @@ Preliminary SV calling using Strand-seq data - summarized in a [Snakemake](https
       source("https://bioconductor.org/biocLite.R")
       biocLite('BSgenome.Hsapiens.UCSC.hg38')
       ```
-    * [Strand-Phaser](https://github.com/daewoooo/StrandPhaseR) is installed automagically
+    * [Strand-Phaser](https://github.com/daewoooo/StrandPhaseR) is installed automatically
 
   2. **Set up the configuration of the smakemake pipeline**
 
     * Open `Snake.config.json` and specify the path to the executatables
       (such as Mosaicatcher) and to the R scripts.
     * Create a subdirectory `bam/` and another subdirectory per sample (e.g.
-      `bam/NA12878`). **Multiple samples can be run together not**.
+      `bam/NA12878/`). **Multiple samples can be run together not**.
       Then copy (or soft-link) the Strand-seq single-cell libraries (one BAM
-      file per cell) in there. Note that bam files need to contain a read group
-      and should have duplicates marked.
+      file per cell) in there. Note that bam files need to be sorted and indexed,
+      contain a read group and should have duplicates marked.
 
   3. **Run Snakemake**
 
@@ -37,3 +37,16 @@ Preliminary SV calling using Strand-seq data - summarized in a [Snakemake](https
         --cluster-config Snake.cluster.json \
         --cluster "???"
       ```
+
+### SNV calls
+
+  The pipeline will run simple SNV calling using [samtools](https://github.com/samtools/samtools)
+  and [bcftools](https://github.com/samtools/bcftools). If you **already have
+  SNV calls**, you can avoid that by entering your VCF files into the pipeline.
+  To so, make sure the files are [tabix](https://github.com/samtools/tabix)-indexed
+  and specifigy them inside the `Snake.config.json` file:
+  ```
+  "snv_calls"     : {
+        "NA12878" : "path/to/snp/calls.vcf.gz"
+    },
+  ```
diff --git a/Snakefile b/Snakefile
index 1d58145..12a4480 100644
--- a/Snakefile
+++ b/Snakefile
@@ -422,7 +422,8 @@ rule merge_SNV_calls:
 
 rule split_external_snv_calls:
     input:
-        vcf = lambda wc: config["snv_calls"][wc.sample]
+        vcf = lambda wc: config["snv_calls"][wc.sample],
+        tbi = lambda wc: config["snv_calls"][wc.sample] + ".tbi"
     output:
         vcf = "external_snv_calls/{sample}/{chrom}.vcf"
     log: "log/{sample}/external_snv_calls.{chrom}.vcf.log"
-- 
GitLab