Skip to content
Snippets Groups Projects
Commit a4f6175c authored by cyril.cros's avatar cyril.cros
Browse files

Squashing origin/next_work into dev

parent 43d16b08
No related branches found
No related tags found
1 merge request!8Merging back to main
Pipeline #59583 passed
tmp/*
*/.nextflow.log*
*/.nextflow
\ No newline at end of file
*/.nextflow
.nextflow.log*
.nextflow
\ No newline at end of file
......@@ -71,7 +71,7 @@ kustomize_build_apply:
JBROWSE_HOSTNAME: "genomes-dev.arendt.embl.de"
RELEASE_LABEL: "dev"
INGRESS_CLASS: "internal-users"
- when: always
- when: on_success
clean_up_k8s_deployment:
image: dtzar/helm-kubectl:3.8.2
......@@ -98,4 +98,4 @@ clean_up_k8s_deployment:
JBROWSE_HOSTNAME: "genomes-dev.arendt.embl.de"
RELEASE_LABEL: "dev"
INGRESS_CLASS: "internal-users"
- when: always
- when: on_success
......@@ -2,28 +2,41 @@
# Platy only for now
jbrowse create annelids
cd annelids
jbrowse add-assembly https://s3.embl.de/annelids/pdumv021/pdumv021_genome.fa.gz --name pdumv021
jbrowse add-track https://s3.embl.de/annelids/pdumv021/pdumv021.gff.gz -d "Kevin pdumv021 GTF file" -n "Genes models" --category "Annotations"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/transdecoder.gff.gz -d "Kevin runs transdecoder" -n "Transdecoder results" --category "Annotations"
jbrowse text-index
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN001.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN001" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN002.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN002" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN003.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN003" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN004.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN004" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN015.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN015" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN016.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN016" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN018.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN018" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN019.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN019" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN020.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN020" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN021.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN021" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN028.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN028" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN029.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN029" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN030.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN030" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN031.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN031" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN035.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN035" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN054.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN054" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN055.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN055" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN056.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN056" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN057.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN057" --category "Variants"
jbrowse add-track https://s3.embl.de/annelids/pdumv021/variation/SN058.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN058" --category "Variants"
jbrowse add-assembly https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz -n "pdumv021" --displayName "Platynereis dumerilii genome v2.1 (pdumv021)" --gziLocation https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz.gzi --faiLocation https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz.fai -t bgzipFasta
jbrowse add-assembly https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz -n "pmassv1_masked" --displayName "Platynereis massiliensis genome v1 - masked (pmassv1)" --gziLocation https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz.gzi --faiLocation https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz.fai -t bgzipFasta
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/1.final.bam -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 1" --category "ATACseq" --trackId "library_bam_1" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/1.final.bam.bai
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/1.final.bigwig -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 1" --category "ATACseq" --trackId "library_bigwig_1" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/2.final.bam -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 2" --category "ATACseq" --trackId "library_bam_2" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/2.final.bam.bai
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/2.final.bigwig -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 2" --category "ATACseq" --trackId "library_bigwig_2" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/3.final.bam -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 3" --category "ATACseq" --trackId "library_bam_3" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/3.final.bam.bai
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/3.final.bigwig -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 3" --category "ATACseq" --trackId "library_bigwig_3" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/4.final.bam -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 4" --category "ATACseq" --trackId "library_bam_4" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/4.final.bam.bai
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/4.final.bigwig -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 4" --category "ATACseq" --trackId "library_bigwig_4" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/5.final.bam -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 5" --category "ATACseq" --trackId "library_bam_5" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/5.final.bam.bai
jbrowse add-track https://s3.embl.de/annelids/pdumv021/ATACseq/5.final.bigwig -a "pdumv021" -d "Lara's Snakemake outputs" -n "Library 5" --category "ATACseq" --trackId "library_bigwig_5" --indexFile https://s3.embl.de/annelids/pdumv021/ATACseq/
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_Annotation_annotation.gff.gz -a "pdumv021" -d "Kevin's GTF file" -n "Gene models" --category "Annotation" --trackId "annotation" --indexFile https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_Annotation_annotation.gff.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_EMAPPER.emapper.gff.gz -a "pdumv021" -d "Kevin's GFF file from EMAPPER" -n "EMAPPER models" --category "Annotation" --trackId "emapper" --indexFile https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_EMAPPER.emapper.gff.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/MASseq/collapsed.gff.gz -a "pdumv021" -d "MASseq outputs from running isoseq collapse against the Pdum genome" -n "SN073 MASseq isoforms from the 15ng prep" --category "MASseq" --trackId "SN073_15ng_collapsed" --indexFile https://s3.embl.de/annelids/pdumv021/MASseq/collapsed.gff.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/MASseq/collapsed.gff.gz -a "pdumv021" -d "MASseq outputs from running isoseq collapse against the Pdum genome" -n "SN073 MASseq isoforms" --category "MASseq" --trackId "SN073_60ng_collapsed" --indexFile https://s3.embl.de/annelids/pdumv021/MASseq/collapsed.gff.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN001_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN001" --category "Variants" --trackId "SN001_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN001_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN002_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN002" --category "Variants" --trackId "SN002_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN002_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN003_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN003" --category "Variants" --trackId "SN003_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN003_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN004_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN004" --category "Variants" --trackId "SN004_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN004_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN015_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN015" --category "Variants" --trackId "SN015_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN015_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN016_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN016" --category "Variants" --trackId "SN016_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN016_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN018_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN018" --category "Variants" --trackId "SN018_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN018_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN019_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN019" --category "Variants" --trackId "SN019_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN019_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN020_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN020" --category "Variants" --trackId "SN020_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN020_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN021_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN021" --category "Variants" --trackId "SN021_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN021_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN028_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN028" --category "Variants" --trackId "SN028_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN028_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN029_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN029" --category "Variants" --trackId "SN029_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN029_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN030_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN030" --category "Variants" --trackId "SN030_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN030_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN031_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN031" --category "Variants" --trackId "SN031_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN031_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN035_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN035" --category "Variants" --trackId "SN035_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN035_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN054_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN054" --category "Variants" --trackId "SN054_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN054_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN055_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN055" --category "Variants" --trackId "SN055_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN055_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN056_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN056" --category "Variants" --trackId "SN056_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN056_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN057_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN057" --category "Variants" --trackId "SN057_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN057_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN058_vcf.vcf.gz -a "pdumv021" -d "Tobi's variants" -n "SN058" --category "Variants" --trackId "SN058_vcf" --indexFile https://s3.embl.de/annelids/pdumv021/Variants/pdumv021_Variants_SN058_vcf.vcf.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_EMAPPER.gff.gz -a "pmassv1_masked" -d "Kevin's GFF file from EMAPPER" -n "EMAPPER models" --category "Annotation" --trackId "emapper-pmass" --indexFile https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_EMAPPER.gff.gz.tbi
jbrowse add-track https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_masked_Annotation_annotation.gff.gz -a "pmassv1_masked" -d "Kevin's GTF file" -n "Gene models" --category "Annotation" --trackId "annotation-pmass" --indexFile https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_masked_Annotation_annotation.gff.gz.tbi
name,path,display_name
pdumv021,/g/arendt/data/genome_assemblies/pdum-v2.1/genome-assemblies/FINAL_v021_ASM/pdumv021_genome.fa,platychou
pmassv1_masked,/g/arendt/data/platynereis_massiliensis/genome/final_assembly/pmassv1.genome.masked.fa,mid platy
name,path,display,path_to_LUT
pdumv021,/g/arendt/data/genome_assemblies/pdum-v2.1/genome-assemblies/FINAL_v021_ASM/pdumv021_genome.fa,Platynereis dumerilii genome v2.1 (pdumv021),
pmassv1_masked,/g/arendt/data/platynereis_massiliensis/genome/final_assembly/pmassv1.genome.masked.fa,Platynereis massiliensis genome v1 - masked (pmassv1),
\ No newline at end of file
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
process compress_genomes {
tag "$genome"
label "htslib"
label "small_job"
input:
tuple val(genome), path("${genome}.fa")
output:
tuple val(genome), path("${genome}.fa.gz")
"""
echo $genome
bgzip --threads $task.cpus -c ${genome}.fa > ${genome}.fa.gz
ls -alh
"""
}
process index_genomes {
tag "$genome"
label "samtools"
label "small_job"
publishDir "s3://$params.s3_bucket/$genome"
input:
tuple val(genome), path("${genome}.fa.gz")
output:
tuple val(genome), path("${genome}.fa.gz"), path("${genome}.fa.gz.gzi"), path("${genome}.fa.gz.fai")
"""
echo $genome
samtools faidx ${genome}.fa.gz --fai-idx ${genome}.fa.gz.fai --gzi-idx ${genome}.fa.gz.gzi
ls -alh
"""
}
process gtf_to_gff_agat {
tag "$id"
label "agat"
label "small_job"
input:
tuple val(id), val(dest), val(type), path("${id}.gtf")
output:
tuple val(id), val(dest), val(type), path("${id}.gff")
"""
echo $id
agat_convert_sp_gxf2gxf.pl -g ${id}.gtf -o ${id}.gff
"""
}
process vcf_bcftools {
tag "$id"
label "bcftools"
label "small_job"
publishDir "$dest"
input:
tuple val(id), val(dest), val(type), path("${id}.unsorted.vcf")
output:
tuple val(id), val(dest), val(type), path("${id}.vcf.gz"), path("${id}.vcf.gz.tbi")
"""
echo $id
bcftools sort ${id}.unsorted.vcf > ${id}.vcf
bcftools view ${id}.vcf --output-type z > ${id}.vcf.gz
bcftools index --tbi ${id}.vcf.gz
ls -alh
"""
}
process index_compress_sort_gff {
tag "$id"
label "htslib"
label "small_job"
publishDir "$dest"
input:
tuple val(id), val(dest), val(type), path(gff)
output:
tuple val(id), val(dest), val(type), path("${gff}.gz"), path("${gff}.gz.tbi")
"""
echo $id
(grep "^#" $gff; grep -v "^#" $gff | sort -t"`printf '\t'`" -k1,1 -k4,4n) > sorted_${gff};
bgzip --threads $task.cpus -i -c sorted_${gff} > ${gff}.gz
tabix -f ${gff}.gz > ${gff}.gz.tbi
ls -alh
"""
}
process index_samtools {
tag "$id"
label "samtools"
label "small_job"
publishDir "$dest"
input:
tuple val(id), val(dest), val(type), path(bam)
output:
tuple val(id), val(dest), val(type), path("${bam}"), path("${bam}.bai")
"""
echo $id
samtools index --threads $task.cpus -b ${bam} > ${bam}.bai
ls -alh
"""
}
process upload_only {
tag "$id"
label "small_job"
publishDir "$dest"
input:
tuple val(id), val(dest), val(type), path(infile)
output:
tuple val(id), val(dest), val(type), path("${infile}")
"""
echo $id
ls -alh
"""
}
workflow {
// Path
Channel.fromPath(params.csv_genomes_to_process, checkIfExists: true) |
splitCsv(header:true, sep:",") |
splitCsv(header: true) |
set {genomes}
Channel.fromPath(params.csv_tracks_to_process, checkIfExists: true) |
splitCsv(header:true, sep:",") |
splitCsv(header: true) |
set {tracks}
// Simple checking for the genome files path
genomes |
map { it.path } |
tracks.map( { it.path } )
view
// tracks |
// map { file(it.path, checkIfExists: true) } |
// view
concat( tracks.map( { it.path } ) ) |
map { file(it, checkIfExists: true) }
///////////////////// GENOME PARTS
// Compressing the genomes and uploading them
genomes |
map {it -> [it.name, it.path]} |
compress_genomes |
index_genomes |
map{ it -> [ it[0], "https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[1].name, \
"https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[2].name,\
"https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[3].name ]} |
combine (genomes.map(it -> [it.name, it.display, it.path_to_LUT]), by: 0) |
map{ it -> [assembly: it[0], s3: it[1], s3_idx_gzi: it[2], s3_idx_fai: it[3], display: it[4], LUT: it[5]]} |
map {it -> "jbrowse add-assembly ${it.s3} -n \"${it.assembly}\" --displayName \"${it.display}\" " + \
"--gziLocation ${it.s3_idx_gzi} --faiLocation ${it.s3_idx_fai} -t bgzipFasta " + \
((it.LUT.isEmpty()) ? " " : " --refNameAliases ${it.LUT}" )} |
collectFile(name: 'jbrowse_add_assemblies.sh', newLine: true, storeDir: params.out_dir, sort: true)
///////////////////// TRACK PART
// Split by types
tracks |
map {it -> [id: it.assembly+"_"+it.category+"_"+it.trackID, \
dest: "s3://$params.s3_bucket" + "/" + it.assembly +"/"+it.category, \
type: it.type, path: it.path ]} |
branch {
gff: it.type == 'GFF'
gtf: it.type == 'GTF'
vcf: it.type == 'VCF'
bam: it.type == 'BAM'
just_upload_it: it.type == 'BigWig' || it.type == 'BigBed'
not_handled: true
} |
set {track_by_type}
// Do GTFs/GFFs
track_by_type.gff |
concat(gtf_to_gff_agat(track_by_type.gtf)) |
index_compress_sort_gff |
set {GFF}
// VCF
track_by_type.vcf |
vcf_bcftools |
set {VCF}
// BAM
track_by_type.bam |
index_samtools |
set {BAM}
// BigWig and other files you just upload
track_by_type.just_upload_it |
upload_only |
map { it -> it + [""]} |
set {track_with_no_idx}
track_by_type.not_handled |
view{"Nothing gets done with tracks of unhandled type $it"}
// Reunits those files for track downprocessing
track_with_no_idx |
concat(GFF,VCF, BAM) |
// ID / s3 / s3 index
map {it -> [it[0], file(it[3]).getName(), (it[4].isEmpty()) ? "" : file(it[4]).getName()]} |
set {merged_tracks}
// Add back the early info
tracks |
map {it -> [it.assembly+"_"+it.category+"_"+it.trackID] + it.values()} |
combine(merged_tracks, by:0) |
map {it -> [assembly: it[1], trackId: it[2], name: it[3], \
category: it[5], desc: it[6], \
s3: "https://" + params.s3_url +"/"+params.s3_bucket+"/"+ it[1]+ "/" + it[5] + "/" + it[9], \
s3_idx: "https://" + params.s3_url +"/"+params.s3_bucket+"/"+ it[1]+ "/" + it[5] + "/" + it[10], \
text_idx: it[8]
]} |
set {processed_tracks}
processed_tracks |
map {it -> "jbrowse add-track ${it.s3} -a \"${it.assembly}\" -d \"${it.desc}\" -n \"${it.name}\" " + \
"--category \"${it.category}\" --trackId \"${it.trackId}\" " + \
((it.s3_idx.isEmpty()) ? " " : " --indexFile ${it.s3_idx}" )} |
collectFile(name: 'jbrowse_add_tracks.sh', newLine: true, storeDir: params.out_dir, sort: true)
}
workflow.onComplete {
log.info ( workflow.success ? "\nWorkflow exited successfully!" : "\nOops .. something went wrong" )
}
\ No newline at end of file
}
......@@ -2,6 +2,16 @@ manifest {
nextflowVersion = '>= 23.04.0'
}
//storage
aws {
accessKey = secrets.ACCESS_KEY
secretKey = secrets.SECRET_KEY
client {
endpoint = 'https://s3.embl.de/'
s3PathStyleAccess = true
}
}
executor {
$slurm {
queueSize = 100
......@@ -11,6 +21,20 @@ executor {
}
}
conda {
enabled = true
useMamba = true
cacheDir = '/g/arendt/Cyril/envs/nextflow'
}
apptainer {
enabled = true
docker.enabled = false
autoMounts = true
envWhitelist = 'CUDA_VISIBLE_DEVICES'
cacheDir = '/scratch/cros/singularity-cache'
}
process {
executor = "slurm"
scratch = '$SCRATCHDIR'
......@@ -21,12 +45,29 @@ process {
clusterOptions = '-p htc -N 1 --no-requeue'
withLabel: jbrowse2 { // include htslib
singularity ='https://depot.galaxyproject.org/singularity/jbrowse2:2.11.0--hd6180af_0'
withLabel: htslib { // include htslib
conda ='bioconda::htslib=1.20.0'
time = { '1h' * task.attempt }
}
withLabel: samtools { // include htslib
singularity ='https://depot.galaxyproject.org/singularity/jbrowse2:2.11.0--hd6180af_0'
withLabel: agat {
conda ='bioconda::agat=1.4.0-0'
time = { '1h' * task.attempt }
}
withLabel: bcftools {
conda ='bioconda::bcftools=1.20-0'
time = { '1h' * task.attempt }
}
withLabel: samtools {
conda ='bioconda::samtools=1.20-0'
time = { '1h' * task.attempt }
}
withLabel: small_job {
cpus = 4
memory = '8 G'
}
}
\ No newline at end of file
......@@ -3,5 +3,5 @@ csv_genomes_to_process: "./genomes.csv"
csv_tracks_to_process: "./tracks.csv"
out_dir: "/scratch/cros/testing_jbrowse"
name: "Jbrowse2 processing"
s3_endpoint: "https://s3.embl.de"
s3_bucket: "annelids"
\ No newline at end of file
s3_bucket: "annelids"
s3_url: "s3.embl.de"
\ No newline at end of file
assembly,trackID,name,type,category,description,path,is_text_index
pmassv1_masked,annotation,Gene models,GTF,Annotation,Kevin's GTF file,/g/arendt/data/platynereis_massiliensis/genes/pmassv1.gtf,TRUE
pmassv1_masked,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/platynereis_massiliensis/proteome/pmassv1_EMAPPER.gff,FALSE
pmassv1_masked,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/platynereis_massiliensis/proteome/annotation/pmassv1_EMAPPER.gff,FALSE
pdumv021,annotation,Gene models,GTF,Annotation,Kevin's GTF file,/g/arendt/data/genome_assemblies/pdum-v2.1/genes/pdumv021.gtf,TRUE
pdumv021,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/genome_assemblies/pdum-v2.1/proteome/pdumv021_EMAPPER.emapper.gff,FALSE
pdumv021,SN035_vcf,SN035,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN035.vcf",FALSE
pdumv021,SN054_vcf,SN054,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN054.vcf",FALSE
pdumv021,SN055_vcf,SN055,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN055.vcf",FALSE
pdumv021,SN056_vcf,SN056,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN056.vcf",FALSE
pdumv021,SN057_vcf,SN057,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN057.vcf",FALSE
pdumv021,SN058_vcf,SN058,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN058.vcf",FALSE
pdumv021,SN001_vcf,SN001,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN001.vcf",FALSE
pdumv021,SN002_vcf,SN002,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN002.vcf",FALSE
pdumv021,SN003_vcf,SN003,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN003.vcf",FALSE
pdumv021,SN004_vcf,SN004,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN004.vcf",FALSE
pdumv021,SN015_vcf,SN015,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN015.vcf",FALSE
pdumv021,SN016_vcf,SN016,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN016.vcf",FALSE
pdumv021,SN018_vcf,SN018,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN018.vcf",FALSE
pdumv021,SN019_vcf,SN019,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN019.vcf",FALSE
pdumv021,SN020_vcf,SN020,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN020.vcf",FALSE
pdumv021,SN021_vcf,SN021,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN021.vcf",FALSE
pdumv021,SN028_vcf,SN028,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN028.vcf",FALSE
pdumv021,SN029_vcf,SN029,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN029.vcf",FALSE
pdumv021,SN030_vcf,SN030,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN030.vcf",FALSE
pdumv021,SN031_vcf,SN031,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN031.vcf",FALSE
pdumv021,library_bam_1,Library 1,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/1.final.bam",FALSE
pdumv021,library_bam_2,Library 2,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/2.final.bam",FALSE
pdumv021,library_bam_3,Library 3,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/3.final.bam",FALSE
pdumv021,library_bam_4,Library 4,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/4.final.bam",FALSE
pdumv021,library_bam_5,Library 5,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/5.final.bam",FALSE
pdumv021,library_bigwig_1,Library 1,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/1.final.bam",FALSE
pdumv021,library_bigwig_2,Library 2,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/2.final.bam",FALSE
pdumv021,library_bigwig_3,Library 3,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/3.final.bam",FALSE
pdumv021,library_bigwig_4,Library 4,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/4.final.bam",FALSE
pdumv021,library_bigwig_5,Library 5,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/5.final.bam",FALSE
pdumv021,SN073_15ng_collapsed,SN073 MASseq isoforms from the 15ng prep,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,"/scratch/cros/MASseq_output_new/SN073/collapsed.gff",FALSE
pdumv021,SN073_60ng_collapsed,SN073 MASseq isoforms,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,"/scratch/cros/MASseq_output_new/SN073_60ng/collapsed.gff",FALSE
pdumv021,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/genome_assemblies/pdum-v2.1/proteome/annotation/pdumv021_EMAPPER.emapper.gff,FALSE
pdumv021,SN035_vcf,SN035,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN035.vcf,FALSE
pdumv021,SN054_vcf,SN054,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN054.vcf,FALSE
pdumv021,SN055_vcf,SN055,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN055.vcf,FALSE
pdumv021,SN056_vcf,SN056,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN056.vcf,FALSE
pdumv021,SN057_vcf,SN057,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN057.vcf,FALSE
pdumv021,SN058_vcf,SN058,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN058.vcf,FALSE
pdumv021,SN001_vcf,SN001,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN001.vcf,FALSE
pdumv021,SN002_vcf,SN002,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN002.vcf,FALSE
pdumv021,SN003_vcf,SN003,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN003.vcf,FALSE
pdumv021,SN004_vcf,SN004,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN004.vcf,FALSE
pdumv021,SN015_vcf,SN015,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN015.vcf,FALSE
pdumv021,SN016_vcf,SN016,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN016.vcf,FALSE
pdumv021,SN018_vcf,SN018,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN018.vcf,FALSE
pdumv021,SN019_vcf,SN019,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN019.vcf,FALSE
pdumv021,SN020_vcf,SN020,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN020.vcf,FALSE
pdumv021,SN021_vcf,SN021,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN021.vcf,FALSE
pdumv021,SN028_vcf,SN028,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN028.vcf,FALSE
pdumv021,SN029_vcf,SN029,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN029.vcf,FALSE
pdumv021,SN030_vcf,SN030,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN030.vcf,FALSE
pdumv021,SN031_vcf,SN031,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN031.vcf,FALSE
pdumv021,library_bam_1,Library 1,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/1.final.bam,FALSE
pdumv021,library_bam_2,Library 2,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/2.final.bam,FALSE
pdumv021,library_bam_3,Library 3,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/3.final.bam,FALSE
pdumv021,library_bam_4,Library 4,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/4.final.bam,FALSE
pdumv021,library_bam_5,Library 5,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/5.final.bam,FALSE
pdumv021,library_bigwig_1,Library 1,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/1.final.bigwig,FALSE
pdumv021,library_bigwig_2,Library 2,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/2.final.bigwig,FALSE
pdumv021,library_bigwig_3,Library 3,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/3.final.bigwig,FALSE
pdumv021,library_bigwig_4,Library 4,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/4.final.bigwig,FALSE
pdumv021,library_bigwig_5,Library 5,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/5.final.bigwig,FALSE
pdumv021,SN073_15ng_collapsed,SN073 MASseq isoforms from the 15ng prep,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,/scratch/cros/MASseq_output_new/SN073/collapsed.gff,FALSE
pdumv021,SN073_60ng_collapsed,SN073 MASseq isoforms,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,/scratch/cros/MASseq_output_new/SN073_60ng/collapsed.gff,FALSE
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment