From 19a85189b06ecc16caa4b54a622cd10f7efd9896 Mon Sep 17 00:00:00 2001
From: Christian Arnold <christian.arnold@embl.de>
Date: Wed, 23 Feb 2022 13:52:10 +0100
Subject: [PATCH] Singularity changes

---
 example/input/config.yaml              |  3 ---
 example/templates/hg19/config.yaml     | 27 ++++++++++-----------
 example/templates/hg38/config.yaml     |  3 ---
 example/templates/mm10/config.yaml     |  3 ---
 src/Snakefile                          | 21 ++++++++--------
 src/singularityContainers/recipe_R     | 16 +++++++++++++
 src/singularityContainers/recipe_conda | 33 ++++++++++++++++++++++++++
 7 files changed, 71 insertions(+), 35 deletions(-)
 create mode 100644 src/singularityContainers/recipe_R
 create mode 100755 src/singularityContainers/recipe_conda

diff --git a/example/input/config.yaml b/example/input/config.yaml
index 2cf7482..19ff7af 100644
--- a/example/input/config.yaml
+++ b/example/input/config.yaml
@@ -82,9 +82,6 @@ samples:
 ###########################
 additionalInput:
 
-  # STRING. Absolute path to the folder that contains the required Singularity images needed for Singularity support. For the Zaugg group, the default directory is /g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity. Only adjustment necessary if you moved them somewhere else (e.g., scratch)
-  singularity_baseFolder: "/g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity"
-
   # STRING. Absolute path to the adapters file for Trimmomatic in fasta format. Default “/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa”. There is usually no need to change this unless for your experiment, this adapter file is not suited.
   trimmomatic_adapters: "/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa"
 
diff --git a/example/templates/hg19/config.yaml b/example/templates/hg19/config.yaml
index fe69e60..19ff7af 100644
--- a/example/templates/hg19/config.yaml
+++ b/example/templates/hg19/config.yaml
@@ -82,25 +82,22 @@ samples:
 ###########################
 additionalInput:
 
-  # STRING. Absolute path to the folder that contains the required Singularity images needed for Singularity support. For the Zaugg group, the default directory is /g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity. Only adjustment necessary if you moved them somewhere else (e.g., scratch)
-  singularity_baseFolder: "/g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity"
+  # STRING. Absolute path to the adapters file for Trimmomatic in fasta format. Default “/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa”. There is usually no need to change this unless for your experiment, this adapter file is not suited.
+  trimmomatic_adapters: "/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa"
 
-# STRING. Absolute path to the adapters file for Trimmomatic in fasta format. Default “/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa”. There is usually no need to change this unless for your experiment, this adapter file is not suited.
-trimmomatic_adapters: "/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa"
+  # STRING. Absolute path to a BED file that contains the genomic regions that should be filtered from the peaks. The default depends on the genome assembly, see the templates for details. Only needed if doPeakCalling is set to true.
+  blacklistRegions: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/blacklisted/hg19-blacklist.v2.bed"
 
-# STRING. Absolute path to a BED file that contains the genomic regions that should be filtered from the peaks. The default depends on the genome assembly, see the templates for details. Only needed if doPeakCalling is set to true.
-blacklistRegions: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/blacklisted/hg19-blacklist.v2.bed"
+  # STRING. Absolute path to a database of known polymorphic sites (SNPs and Indels, respectively). This is only needed if (1) doBaseRecalibration is set to true and (2) the genome is either hg19 or hg38 and ignored otherwise. The default depends on the genome assembly, see the templates for details. Supported formats from GATK: BCF2, BEAGLE, BED, BEDTABLE, EXAMPLEBINARY, GELITEXT, RAWHAPMAP, REFSEQ, SAMPILEUP, SAMREAD, TABLE, VCF, VCF3.
+  knownSNPs: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/dbsnp_138.hg19.vcf.gz"
+  knownIndels: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz"
 
-# STRING. Absolute path to a database of known polymorphic sites (SNPs and Indels, respectively). This is only needed if (1) doBaseRecalibration is set to true and (2) the genome is either hg19 or hg38 and ignored otherwise. The default depends on the genome assembly, see the templates for details. Supported formats from GATK: BCF2, BEAGLE, BED, BEDTABLE, EXAMPLEBINARY, GELITEXT, RAWHAPMAP, REFSEQ, SAMPILEUP, SAMREAD, TABLE, VCF, VCF3.
-knownSNPs: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/dbsnp_138.hg19.vcf.gz"
-knownIndels: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf.gz"
+  # STRING. The default depends on the genome assembly, see the templates for details. Absolute path to the reference genome in fasta and 2bit format, respectively, both of which have to correspond to the same genome assembly version as used for the alignment as well as the database of polymorphic sites (knownSNPs and knownIndels, if applicable).
+  refGenome_fasta: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/ucsc.hg19.onlyRefChr.fasta"
+  refGenome_2bit: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/ucsc.hg19.2bit"
 
-# STRING. The default depends on the genome assembly, see the templates for details. Absolute path to the reference genome in fasta and 2bit format, respectively, both of which have to correspond to the same genome assembly version as used for the alignment as well as the database of polymorphic sites (knownSNPs and knownIndels, if applicable).
-refGenome_fasta: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/ucsc.hg19.onlyRefChr.fasta"
-refGenome_2bit: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/GATK_bundle/ucsc.hg19.2bit"
-
-# STRING. The default depends on the genome assembly, see the templates for details. Absolute path to an genome annotation file in GTF format.
-annotationGTF: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/Gencode_v19/gencode.v19.annotation.gtf"
+  # STRING. The default depends on the genome assembly, see the templates for details. Absolute path to an genome annotation file in GTF format.
+  annotationGTF: "/g/scb2/zaugg/zaugg_shared/annotations/hg19/Gencode_v19/gencode.v19.annotation.gtf"
 
 
 #######################
diff --git a/example/templates/hg38/config.yaml b/example/templates/hg38/config.yaml
index b1d6c13..0c4ed4d 100644
--- a/example/templates/hg38/config.yaml
+++ b/example/templates/hg38/config.yaml
@@ -82,9 +82,6 @@ samples:
 ###########################
 additionalInput:
 
-  # STRING. Absolute path to the folder that contains the required Singularity images needed for Singularity support. For the Zaugg group, the default directory is /g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity. Only adjustment necessary if you moved them somewhere else (e.g., scratch)
-  singularity_baseFolder: "/g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity"
-
   # STRING. Absolute path to the adapters file for Trimmomatic in fasta format. Default “/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa”. There is usually no need to change this unless for your experiment, this adapter file is not suited.
   trimmomatic_adapters: "/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa"
 
diff --git a/example/templates/mm10/config.yaml b/example/templates/mm10/config.yaml
index 82e46e1..07908df 100644
--- a/example/templates/mm10/config.yaml
+++ b/example/templates/mm10/config.yaml
@@ -82,9 +82,6 @@ samples:
 ###########################
 additionalInput:
 
-  # STRING. Absolute path to the folder that contains the required Singularity images needed for Singularity support. For the Zaugg group, the default directory is /g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity. Only adjustment necessary if you moved them somewhere else (e.g., scratch)
-  singularity_baseFolder: "/g/scb2/zaugg/zaugg_shared/Programs/Snakemake/singularity"
-
   # STRING. Absolute path to the adapters file for Trimmomatic in fasta format. Default “/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa”. There is usually no need to change this unless for your experiment, this adapter file is not suited.
   trimmomatic_adapters: "/g/scb2/zaugg/zaugg_shared/Programs/Trimmomatic-0.33/adapters/NexteraPE-PE.fa"
 
diff --git a/src/Snakefile b/src/Snakefile
index 570c6bb..2c14395 100755
--- a/src/Snakefile
+++ b/src/Snakefile
@@ -139,7 +139,7 @@ configDict = {
             "samples":
                 ["summaryFile", "pairedEnd", "dataType"],
             "additionalInput":
-                ["singularity_baseFolder", "trimmomatic_adapters", "blacklistRegions", "refGenome_fasta", "refGenome_2bit", "annotationGTF"],
+                ["trimmomatic_adapters", "blacklistRegions", "refGenome_fasta", "refGenome_2bit", "annotationGTF"],
             "executables":
                 ["GATK_jar", "java", "PICARD_jar"],
             "trimming":
@@ -339,10 +339,11 @@ else:
 if nIndividualsUniqueNoControl % 5 != 0 and nIndividualsUniqueNoControl not in rangeOverlapMerged:
     rangeOverlapMerged = rangeOverlapMerged + list([nIndividualsUniqueNoControl])
 
-singularity_conda = config["additionalInput"]["singularity_baseFolder"] + "/Singularity.ATAC_seq_conda_all.sif"
+#singularity_conda = config["additionalInput"]["singularity_baseFolder"] + "/Singularity.ATAC_seq_conda_all.sif"
+singularity_conda = "oras://git.embl.de:4567/grp-zaugg/singularity-reg/atac_conda:stable"
 
-# TODO fix R
-singularity_R     = config["additionalInput"]["singularity_baseFolder"] + "/Singularity.ATAC_seq_R.sif"
+#singularity_R     = config["additionalInput"]["singularity_baseFolder"] + "/Singularity.ATAC_seq_R.sif"
+singularity_R     = "oras://git.embl.de:4567/grp-zaugg/singularity-reg/atac_r:stable"
 
 #
 # print(samplesData.loc[:,"sampleName"])
@@ -2044,8 +2045,7 @@ if nSamplesUnique > 1 and doPeakCalling:
             summaryData                 = REPORTS_dir_consensus + '/consensusPeaks_summary.tsv'
         log: expand('{dir}/produceConsensusPeaks.R.log', dir = LOG_BENCHMARK_dir)
         # benchmark: LOG_BENCHMARK_dir +  "/produceConsensusPeaks.benchmark"
-        singularity: "shub://chrarnold/Singularity_images:atac_seq_r"
-        # TODO singularity: singularity_R
+        singularity: singularity_R
         message: "{ruleDisplayMessage}Calculate consensus peaks for all peak files with the script {script_consPeaks}..."
         threads: 1
         params:
@@ -2101,7 +2101,7 @@ if nSamplesUnique > 1 and doPeakCalling:
             cor_data     = REPORTS_dir_corr + '/consensusPeaks_sampleCorrelation.data.all.rds'
         log: expand('{dir}/consensusPeaksPCA_correlation.R.log', dir = LOG_BENCHMARK_dir)
         # benchmark: LOG_BENCHMARK_dir +  "/consensusPeaksPCA.benchmark"
-        # TODO singularity: singularity_R
+        singularity: singularity_R
         message: "{ruleDisplayMessage}Calculate PCA and sample correlation for consensus peaks for all peak files with the script {script_PCA}..."
         threads: 1
         params:
@@ -2121,7 +2121,7 @@ if annotatePeaks and doPeakCalling:
             annotation_individual_stringent = annotation_individualPeaksStringent,
             annotation_individual_nonStringent = annotation_individualPeaksNonStringent,
         log: expand('{dir}/annotatePeaks_individual.R.log', dir = LOG_BENCHMARK_dir)
-        # TODO singularity: singularity_R
+        singularity: singularity_R
         message: "{ruleDisplayMessage} Annotating individual peaks for all peak files with the script {script_annoPeaks}..."
         threads: 1
         params:
@@ -2227,7 +2227,7 @@ if dataType == "ATACseq":
         message: "{ruleDisplayMessage}Create fragment length distribution..."
         threads: 1
         # benchmark: LOG_BENCHMARK_dir + "/fragment_length_distr.benchmark"
-        # TODO singularity: singularity_R
+        singularity: singularity_R
         params:
             FL_distr_cutoff = config["scripts"]["FL_distr_script_cutoff"],
             inputString     = lambda wildcards, input: ','.join(input)
@@ -2312,8 +2312,7 @@ rule stats:
     message: "{ruleDisplayMessage}Generate statistics about pipeline and produce file {output:q}..."
     threads: 1
     # benchmark: LOG_BENCHMARK_dir + "/stats.benchmark"
-    singularity: "shub://chrarnold/Singularity_images:atac_seq_r"
-    # TODO singularity: singularity_R
+    singularity: singularity_R
     params:
         sampleNames          = allSamplesUniqueStr,
         pairedEnd            = pairedEnd,
diff --git a/src/singularityContainers/recipe_R b/src/singularityContainers/recipe_R
new file mode 100644
index 0000000..1483be3
--- /dev/null
+++ b/src/singularityContainers/recipe_R
@@ -0,0 +1,16 @@
+Bootstrap: docker
+FROM: bioconductor/bioconductor_docker
+
+%labels
+  Version v1.2
+
+%help
+  Singularity image for the ATAC-Seq pipeline (R 4 + all packages)
+
+
+%post
+
+  R --slave -e "BiocManager::install(c('DESeq2', 'GenomicRanges', 'DiffBind', 'ChIPseeker', 'org.Hs.eg.db', 'org.Mm.eg.db', 'TxDb.Hsapiens.UCSC.hg19.knownGene', 'TxDb.Hsapiens.UCSC.hg38.knownGene', 'BSgenome.Hsapiens.UCSC.hg19', 'BSgenome.Hsapiens.UCSC.hg38', 'TxDb.Mmusculus.UCSC.mm10.knownGene',
+                                        'TxDb.Mmusculus.UCSC.mm9.knownGene', 'BSgenome.Mmusculus.UCSC.mm9', 'BSgenome.Mmusculus.UCSC.mm10'))"
+
+  R --slave -e "install.packages(c('checkmate', 'futile.logger', 'RColorBrewer', 'matrixStats', 'rlist', 'gridExtra', 'tidyverse', 'reshape2', 'scales', 'corrplot'))"
diff --git a/src/singularityContainers/recipe_conda b/src/singularityContainers/recipe_conda
new file mode 100755
index 0000000..d24c018
--- /dev/null
+++ b/src/singularityContainers/recipe_conda
@@ -0,0 +1,33 @@
+Bootstrap: docker
+FROM: continuumio/miniconda3
+
+%labels
+  Version v1.3
+
+%help
+  Singularity image for the ATAC-Seq pipeline (Python 3)
+
+
+
+%post
+
+  # Add channels for Bioconda
+  /opt/conda/bin/conda config --add channels defaults
+  /opt/conda/bin/conda config --add channels bioconda
+  /opt/conda/bin/conda config --add channels conda-forge
+
+  # Install the tools
+  /opt/conda/bin/conda install --yes mamba
+  mamba install bedtools samtools fastqc trimmomatic multiqc bowtie2 picard gatk deeptools macs2 subread
+
+%environment
+
+%test
+   # bedtools --version
+   # samtools --version
+   # fastqc --version
+   # trimmomatic -version
+   # bowtie2 --version
+   # picard SortSam --version
+   # gatk --version
+   # deeptools --version
-- 
GitLab