From 8b85f1b02162a8355311ccabfffc49e20b167472 Mon Sep 17 00:00:00 2001
From: "cyril.cros" <cyril.cros@polytechnique.org>
Date: Thu, 9 May 2024 00:18:44 +0200
Subject: [PATCH] Main from dev and edits to tracks included

---
 .gitignore                                |   4 +-
 .gitlab-ci.yml                            |   4 +-
 image/jbrowse_script.sh                   |  10 +-
 resources_to_s3_NF/genomes.csv            |   6 +-
 resources_to_s3_NF/jbrowse2_processing.nf | 197 +++++++++++++++++++++-
 resources_to_s3_NF/nextflow.config        |  49 +++++-
 resources_to_s3_NF/params.yaml            |   4 +-
 resources_to_s3_NF/tracks.csv             |  68 ++++----
 8 files changed, 283 insertions(+), 59 deletions(-)

diff --git a/.gitignore b/.gitignore
index 706e6c0..2571725 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 tmp/*
 */.nextflow.log*
-*/.nextflow
\ No newline at end of file
+*/.nextflow
+.nextflow.log*
+.nextflow
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 859bfd8..bde764c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -71,7 +71,7 @@ kustomize_build_apply:
         JBROWSE_HOSTNAME: "genomes-dev.arendt.embl.de"
         RELEASE_LABEL: "dev"
         INGRESS_CLASS: "internal-users"
-    - when: always
+    - when: on_success
 
 clean_up_k8s_deployment:
   image: dtzar/helm-kubectl:3.8.2
@@ -98,4 +98,4 @@ clean_up_k8s_deployment:
         JBROWSE_HOSTNAME: "genomes-dev.arendt.embl.de"
         RELEASE_LABEL: "dev"
         INGRESS_CLASS: "internal-users"
-    - when: always
+    - when: on_success
diff --git a/image/jbrowse_script.sh b/image/jbrowse_script.sh
index d14d979..e7ec75b 100755
--- a/image/jbrowse_script.sh
+++ b/image/jbrowse_script.sh
@@ -2,7 +2,9 @@
 # Platy only for now
 jbrowse create annelids
 cd annelids
-jbrowse add-assembly https://s3.embl.de/annelids/pdumv021/pdumv021_genome.fa.gz --name pdumv021
-jbrowse add-track https://s3.embl.de/annelids/pdumv021/pdumv021.gff.gz -d "Kevin pdumv021 GTF file" -n "Genes models" --category "Annotations"
-jbrowse add-track https://s3.embl.de/annelids/pdumv021/transdecoder.gff.gz -d "Kevin runs transdecoder" -n "Transdecoder results" --category "Annotations"
-jbrowse text-index
+jbrowse add-assembly https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz -n "pdumv021" --displayName "Platynereis dumerilii genome v2.1 (pdumv021)" --gziLocation https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz.gzi --faiLocation https://s3.embl.de/annelids/pdumv021/pdumv021.fa.gz.fai -t bgzipFasta  
+jbrowse add-assembly https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz -n "pmassv1_masked" --displayName "Platynereis massiliensis genome v1 - masked (pmassv1)" --gziLocation https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz.gzi --faiLocation https://s3.embl.de/annelids/pmassv1_masked/pmassv1_masked.fa.gz.fai -t bgzipFasta  
+jbrowse add-track https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_Annotation_annotation.gff.gz -a "pdumv021" -d "Kevin's GTF file" -n "Gene models" --category "Annotation" --trackId "annotation"  --indexFile https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_Annotation_annotation.gff.gz.tbi
+jbrowse add-track https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_EMAPPER.emapper.gff.gz -a "pdumv021" -d "Kevin's GFF file from EMAPPER" -n "EMAPPER models" --category "Annotation" --trackId "emapper"  --indexFile https://s3.embl.de/annelids/pdumv021/Annotation/pdumv021_EMAPPER.emapper.gff.gz.tbi
+jbrowse add-track https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_EMAPPER.gff.gz -a "pmassv1_masked" -d "Kevin's GFF file from EMAPPER" -n "EMAPPER models" --category "Annotation" --trackId "emapper-pmass"  --indexFile https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_EMAPPER.gff.gz.tbi
+jbrowse add-track https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_masked_Annotation_annotation.gff.gz -a "pmassv1_masked" -d "Kevin's GTF file" -n "Gene models" --category "Annotation" --trackId "annotation-pmass"  --indexFile https://s3.embl.de/annelids/pmassv1_masked/Annotation/pmassv1_masked_Annotation_annotation.gff.gz.tbi
diff --git a/resources_to_s3_NF/genomes.csv b/resources_to_s3_NF/genomes.csv
index 567db74..fd57d70 100644
--- a/resources_to_s3_NF/genomes.csv
+++ b/resources_to_s3_NF/genomes.csv
@@ -1,3 +1,3 @@
-name,path,display_name
-pdumv021,/g/arendt/data/genome_assemblies/pdum-v2.1/genome-assemblies/FINAL_v021_ASM/pdumv021_genome.fa,platychou
-pmassv1_masked,/g/arendt/data/platynereis_massiliensis/genome/final_assembly/pmassv1.genome.masked.fa,mid platy
+name,path,display,path_to_LUT
+pdumv021,/g/arendt/data/genome_assemblies/pdum-v2.1/genome-assemblies/FINAL_v021_ASM/pdumv021_genome.fa,Platynereis dumerilii genome v2.1 (pdumv021),
+pmassv1_masked,/g/arendt/data/platynereis_massiliensis/genome/final_assembly/pmassv1.genome.masked.fa,Platynereis massiliensis genome v1 - masked (pmassv1),
\ No newline at end of file
diff --git a/resources_to_s3_NF/jbrowse2_processing.nf b/resources_to_s3_NF/jbrowse2_processing.nf
index e1c7711..4153339 100755
--- a/resources_to_s3_NF/jbrowse2_processing.nf
+++ b/resources_to_s3_NF/jbrowse2_processing.nf
@@ -1,24 +1,203 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl=2
 
+process compress_genomes {
+    tag "$genome"
+    label "htslib"
+    label "small_job"
+    input:
+        tuple val(genome), path("${genome}.fa")
+    output:
+        tuple val(genome), path("${genome}.fa.gz")
+    """
+    echo $genome
+    bgzip --threads $task.cpus -c ${genome}.fa > ${genome}.fa.gz
+    ls -alh
+    """
+}
+
+process index_genomes {
+    tag "$genome"
+    label "samtools"
+    label "small_job"
+    publishDir "s3://$params.s3_bucket/$genome"
+    input:
+        tuple val(genome), path("${genome}.fa.gz")
+    output:
+        tuple val(genome), path("${genome}.fa.gz"), path("${genome}.fa.gz.gzi"), path("${genome}.fa.gz.fai")
+    """
+    echo $genome
+    samtools faidx ${genome}.fa.gz --fai-idx ${genome}.fa.gz.fai --gzi-idx ${genome}.fa.gz.gzi
+    ls -alh
+    """
+}
+
+process gtf_to_gff_agat {
+    tag "$id"
+    label "agat"
+    label "small_job"
+    input:
+        tuple val(id), val(dest), val(type), path("${id}.gtf")
+    output:
+        tuple val(id), val(dest), val(type), path("${id}.gff")
+    """
+    echo $id
+    agat_convert_sp_gxf2gxf.pl -g ${id}.gtf -o ${id}.gff
+    """
+}
+
+process vcf_bcftools {
+    tag "$id"
+    label "bcftools"
+    label "small_job"
+    publishDir "$dest"
+    input:
+        tuple val(id), val(dest), val(type), path("${id}.unsorted.vcf")
+    output:
+        tuple val(id), val(dest), val(type), path("${id}.vcf.gz"), path("${id}.vcf.gz.tbi")
+    """
+    echo $id
+    bcftools sort ${id}.unsorted.vcf > ${id}.vcf
+    bcftools view  ${id}.vcf --output-type z > ${id}.vcf.gz
+    bcftools index --tbi ${id}.vcf.gz
+    ls -alh
+    """
+}
+
+process index_compress_sort_gff {
+    tag "$id"
+    label "htslib"
+    label "small_job"
+    publishDir "$dest"
+    input:
+        tuple val(id), val(dest), val(type), path(gff)
+    output:
+        tuple val(id), val(dest), val(type), path("${gff}.gz"), path("${gff}.gz.tbi")
+    """
+    echo $id
+    (grep "^#" $gff; grep -v "^#" $gff | sort -t"`printf '\t'`" -k1,1 -k4,4n) > sorted_${gff};
+    bgzip --threads $task.cpus -i -c sorted_${gff} > ${gff}.gz
+    tabix -f ${gff}.gz > ${gff}.gz.tbi
+    ls -alh
+    """
+}
+
+process index_samtools {
+    tag "$id"
+    label "samtools"
+    label "small_job"
+    publishDir "$dest"
+    input:
+        tuple val(id), val(dest), val(type), path(bam)
+    output:
+        tuple val(id), val(dest), val(type), path("${bam}"), path("${bam}.bai")
+    """
+    echo $id
+    samtools index --threads $task.cpus -b ${bam} > ${bam}.bai
+    ls -alh
+    """
+}
+
+process upload_only {
+    tag "$id"
+    label "small_job"
+    publishDir "$dest"
+    input:
+        tuple val(id), val(dest), val(type), path(infile)
+    output:
+        tuple val(id), val(dest), val(type), path("${infile}")
+    """
+    echo $id
+    ls -alh
+    """
+}
+
 workflow {
-    // Path
     Channel.fromPath(params.csv_genomes_to_process, checkIfExists: true) |
-        splitCsv(header:true, sep:",") |
+        splitCsv(header: true) |
         set {genomes}
     Channel.fromPath(params.csv_tracks_to_process, checkIfExists: true) |
-        splitCsv(header:true, sep:",") |
+        splitCsv(header: true) |
         set {tracks}
     // Simple checking for the genome files path
     genomes |
         map { it.path } |
-    tracks.map( { it.path } )
-        view
-   // tracks |
-     //   map { file(it.path, checkIfExists: true) } |
-       // view
+        concat( tracks.map( { it.path } ) ) |
+        map { file(it, checkIfExists: true) }
+    ///////////////////// GENOME PARTS
+    // Compressing the genomes and uploading them
+    genomes |
+        map {it -> [it.name, it.path]} |
+        compress_genomes |
+        index_genomes |
+        map{ it -> [ it[0], "https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[1].name, \
+                    "https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[2].name,\
+                    "https://$params.s3_url/$params.s3_bucket/" + it[0] + "/" + it[3].name ]} |
+        combine (genomes.map(it -> [it.name, it.display, it.path_to_LUT]), by: 0) |
+        map{ it -> [assembly: it[0], s3: it[1], s3_idx_gzi: it[2], s3_idx_fai: it[3], display: it[4], LUT: it[5]]} |
+        map {it -> "jbrowse add-assembly ${it.s3} -n \"${it.assembly}\" --displayName \"${it.display}\" " + \
+                    "--gziLocation ${it.s3_idx_gzi} --faiLocation ${it.s3_idx_fai} -t bgzipFasta " + \
+                    ((it.LUT.isEmpty()) ? " " : " --refNameAliases ${it.LUT}" )} |
+        collectFile(name: 'jbrowse_add_assemblies.sh', newLine: true, storeDir: params.out_dir, sort: true)
+    ///////////////////// TRACK PART
+    // Split by types
+    tracks |
+        map {it -> [id: it.assembly+"_"+it.category+"_"+it.trackID, \
+                    dest: "s3://$params.s3_bucket" + "/" + it.assembly +"/"+it.category, \
+                    type: it.type, path: it.path ]} |
+        branch {
+            gff: it.type == 'GFF'
+            gtf: it.type == 'GTF'
+            vcf: it.type == 'VCF'
+            bam: it.type == 'BAM'
+            just_upload_it: it.type == 'BigWig' || it.type == 'BigBed'
+            not_handled: true
+        } |
+        set {track_by_type}
+    // Do GTFs/GFFs
+    track_by_type.gff |
+        concat(gtf_to_gff_agat(track_by_type.gtf)) |
+        index_compress_sort_gff |
+        set {GFF}
+    // VCF
+    track_by_type.vcf |
+        vcf_bcftools |
+        set {VCF}
+    // BAM
+    track_by_type.bam |
+        index_samtools |
+        set {BAM}
+    // BigWig and other files you just upload
+    track_by_type.just_upload_it |
+        upload_only |
+        map { it -> it + [""]} |
+        set {track_with_no_idx}
+    track_by_type.not_handled |
+        view{"Nothing gets done with tracks of unhandled type $it"}
+    // Reunits those files for track downprocessing
+    track_with_no_idx |
+        concat(GFF,VCF, BAM) |
+        // ID / s3 / s3 index
+        map {it -> [it[0], file(it[3]).getName(), (it[4].isEmpty()) ? "" :  file(it[4]).getName()]} |
+        set {merged_tracks}
+    // Add back the early info
+    tracks |
+        map {it -> [it.assembly+"_"+it.category+"_"+it.trackID] + it.values()} |
+        combine(merged_tracks, by:0) |
+        map {it -> [assembly: it[1], trackId: it[2], name: it[3], \
+                    category: it[5], desc: it[6], \
+                    s3: "https://" + params.s3_url +"/"+params.s3_bucket+"/"+ it[1]+ "/" + it[5] + "/" + it[9], \
+                    s3_idx: "https://" + params.s3_url +"/"+params.s3_bucket+"/"+ it[1]+ "/" + it[5] + "/" + it[10], \
+                    text_idx: it[8]
+        ]} |
+        set {processed_tracks}
+    processed_tracks |
+        map {it -> "jbrowse add-track ${it.s3} -a \"${it.assembly}\" -d \"${it.desc}\" -n \"${it.name}\" " + \
+                    "--category \"${it.category}\" --trackId \"${it.trackId}\" " + \
+                    ((it.s3_idx.isEmpty()) ? " " : " --indexFile ${it.s3_idx}" )} |
+        collectFile(name: 'jbrowse_add_tracks.sh', newLine: true, storeDir: params.out_dir, sort: true)
 }
 
 workflow.onComplete {
 	log.info ( workflow.success ? "\nWorkflow exited successfully!" : "\nOops .. something went wrong" )
-}
\ No newline at end of file
+}
diff --git a/resources_to_s3_NF/nextflow.config b/resources_to_s3_NF/nextflow.config
index a1f7afe..462aa02 100644
--- a/resources_to_s3_NF/nextflow.config
+++ b/resources_to_s3_NF/nextflow.config
@@ -2,6 +2,16 @@ manifest {
   nextflowVersion = '>= 23.04.0'
 }
 
+//storage
+aws {
+  accessKey = secrets.ACCESS_KEY
+  secretKey = secrets.SECRET_KEY
+    client {
+        endpoint = 'https://s3.embl.de/'
+        s3PathStyleAccess = true
+    }
+}
+
 executor {
     $slurm {
         queueSize = 100
@@ -11,6 +21,20 @@ executor {
     }
 }
 
+conda {
+  enabled = true
+  useMamba = true
+  cacheDir = '/g/arendt/Cyril/envs/nextflow'
+}
+
+apptainer {
+  enabled = true
+  docker.enabled = false
+  autoMounts = true
+  envWhitelist = 'CUDA_VISIBLE_DEVICES'
+  cacheDir = '/scratch/cros/singularity-cache'
+}
+
 process {
     executor = "slurm"
     scratch = '$SCRATCHDIR'
@@ -21,12 +45,29 @@ process {
     clusterOptions = '-p htc -N 1 --no-requeue'
 
 
-    withLabel: jbrowse2 { // include htslib
-        singularity ='https://depot.galaxyproject.org/singularity/jbrowse2:2.11.0--hd6180af_0'
+    withLabel: htslib { // include htslib
+        conda ='bioconda::htslib=1.20.0'
         time = { '1h' * task.attempt }
     }
-    withLabel: samtools { // include htslib
-        singularity ='https://depot.galaxyproject.org/singularity/jbrowse2:2.11.0--hd6180af_0'
+
+    withLabel: agat {
+        conda ='bioconda::agat=1.4.0-0'
         time = { '1h' * task.attempt }
     }
+
+    withLabel: bcftools { 
+        conda ='bioconda::bcftools=1.20-0'
+        time = { '1h' * task.attempt }
+    }
+
+    withLabel: samtools { 
+        conda ='bioconda::samtools=1.20-0'
+        time = { '1h' * task.attempt }
+    }
+
+    withLabel: small_job {
+        cpus = 4
+        memory = '8 G'
+    }
+
 }
\ No newline at end of file
diff --git a/resources_to_s3_NF/params.yaml b/resources_to_s3_NF/params.yaml
index 7b576f2..9b78410 100644
--- a/resources_to_s3_NF/params.yaml
+++ b/resources_to_s3_NF/params.yaml
@@ -3,5 +3,5 @@ csv_genomes_to_process:  "./genomes.csv"
 csv_tracks_to_process:  "./tracks.csv"
 out_dir: "/scratch/cros/testing_jbrowse"
 name: "Jbrowse2 processing"
-s3_endpoint: "https://s3.embl.de"
-s3_bucket: "annelids"
\ No newline at end of file
+s3_bucket: "annelids"
+s3_url: "s3.embl.de"
\ No newline at end of file
diff --git a/resources_to_s3_NF/tracks.csv b/resources_to_s3_NF/tracks.csv
index fa046e7..c498561 100644
--- a/resources_to_s3_NF/tracks.csv
+++ b/resources_to_s3_NF/tracks.csv
@@ -1,37 +1,37 @@
 assembly,trackID,name,type,category,description,path,is_text_index
 pmassv1_masked,annotation,Gene models,GTF,Annotation,Kevin's GTF file,/g/arendt/data/platynereis_massiliensis/genes/pmassv1.gtf,TRUE
-pmassv1_masked,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/platynereis_massiliensis/proteome/pmassv1_EMAPPER.gff,FALSE
+pmassv1_masked,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/platynereis_massiliensis/proteome/annotation/pmassv1_EMAPPER.gff,FALSE
 pdumv021,annotation,Gene models,GTF,Annotation,Kevin's GTF file,/g/arendt/data/genome_assemblies/pdum-v2.1/genes/pdumv021.gtf,TRUE
-pdumv021,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/genome_assemblies/pdum-v2.1/proteome/pdumv021_EMAPPER.emapper.gff,FALSE
-pdumv021,SN035_vcf,SN035,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN035.vcf",FALSE
-pdumv021,SN054_vcf,SN054,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN054.vcf",FALSE
-pdumv021,SN055_vcf,SN055,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN055.vcf",FALSE
-pdumv021,SN056_vcf,SN056,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN056.vcf",FALSE
-pdumv021,SN057_vcf,SN057,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN057.vcf",FALSE
-pdumv021,SN058_vcf,SN058,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN058.vcf",FALSE
-pdumv021,SN001_vcf,SN001,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN001.vcf",FALSE
-pdumv021,SN002_vcf,SN002,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN002.vcf",FALSE
-pdumv021,SN003_vcf,SN003,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN003.vcf",FALSE
-pdumv021,SN004_vcf,SN004,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN004.vcf",FALSE
-pdumv021,SN015_vcf,SN015,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN015.vcf",FALSE
-pdumv021,SN016_vcf,SN016,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN016.vcf",FALSE
-pdumv021,SN018_vcf,SN018,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN018.vcf",FALSE
-pdumv021,SN019_vcf,SN019,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN019.vcf",FALSE
-pdumv021,SN020_vcf,SN020,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN020.vcf",FALSE
-pdumv021,SN021_vcf,SN021,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN021.vcf",FALSE
-pdumv021,SN028_vcf,SN028,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN028.vcf",FALSE
-pdumv021,SN029_vcf,SN029,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN029.vcf",FALSE
-pdumv021,SN030_vcf,SN030,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN030.vcf",FALSE
-pdumv021,SN031_vcf,SN031,VCF,Variants,Tobi's variants,"/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN031.vcf",FALSE
-pdumv021,library_bam_1,Library 1,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/1.final.bam",FALSE
-pdumv021,library_bam_2,Library 2,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/2.final.bam",FALSE
-pdumv021,library_bam_3,Library 3,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/3.final.bam",FALSE
-pdumv021,library_bam_4,Library 4,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/4.final.bam",FALSE
-pdumv021,library_bam_5,Library 5,BAM,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/5.final.bam",FALSE
-pdumv021,library_bigwig_1,Library 1,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/1.final.bam",FALSE
-pdumv021,library_bigwig_2,Library 2,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/2.final.bam",FALSE
-pdumv021,library_bigwig_3,Library 3,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/3.final.bam",FALSE
-pdumv021,library_bigwig_4,Library 4,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/4.final.bam",FALSE
-pdumv021,library_bigwig_5,Library 5,BigWig,ATACseq,Lara's Snakemake outputs,"/g/arendt/Lara/8.FinalOutput/5.final.bam",FALSE
-pdumv021,SN073_15ng_collapsed,SN073 MASseq isoforms from the 15ng prep,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,"/scratch/cros/MASseq_output_new/SN073/collapsed.gff",FALSE
-pdumv021,SN073_60ng_collapsed,SN073 MASseq isoforms,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,"/scratch/cros/MASseq_output_new/SN073_60ng/collapsed.gff",FALSE
+pdumv021,emapper,EMAPPER models,GFF,Annotation,Kevin's GFF file from EMAPPER,/g/arendt/data/genome_assemblies/pdum-v2.1/proteome/annotation/pdumv021_EMAPPER.emapper.gff,FALSE
+pdumv021,SN035_vcf,SN035,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN035.vcf,FALSE
+pdumv021,SN054_vcf,SN054,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN054.vcf,FALSE
+pdumv021,SN055_vcf,SN055,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN055.vcf,FALSE
+pdumv021,SN056_vcf,SN056,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN056.vcf,FALSE
+pdumv021,SN057_vcf,SN057,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN057.vcf,FALSE
+pdumv021,SN058_vcf,SN058,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_PlanetaryBiology/Variants_SN058.vcf,FALSE
+pdumv021,SN001_vcf,SN001,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN001.vcf,FALSE
+pdumv021,SN002_vcf,SN002,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN002.vcf,FALSE
+pdumv021,SN003_vcf,SN003,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN003.vcf,FALSE
+pdumv021,SN004_vcf,SN004,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN004.vcf,FALSE
+pdumv021,SN015_vcf,SN015,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN015.vcf,FALSE
+pdumv021,SN016_vcf,SN016,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN016.vcf,FALSE
+pdumv021,SN018_vcf,SN018,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN018.vcf,FALSE
+pdumv021,SN019_vcf,SN019,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN019.vcf,FALSE
+pdumv021,SN020_vcf,SN020,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN020.vcf,FALSE
+pdumv021,SN021_vcf,SN021,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN021.vcf,FALSE
+pdumv021,SN028_vcf,SN028,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN028.vcf,FALSE
+pdumv021,SN029_vcf,SN029,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN029.vcf,FALSE
+pdumv021,SN030_vcf,SN030,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN030.vcf,FALSE
+pdumv021,SN031_vcf,SN031,VCF,Variants,Tobi's variants,/g/arendt/gerber/Processed_Files/Variants_Atlas/Variants_Libs/Variants_SN031.vcf,FALSE
+pdumv021,library_bam_1,Library 1,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/1.final.bam,FALSE
+pdumv021,library_bam_2,Library 2,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/2.final.bam,FALSE
+pdumv021,library_bam_3,Library 3,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/3.final.bam,FALSE
+pdumv021,library_bam_4,Library 4,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/4.final.bam,FALSE
+pdumv021,library_bam_5,Library 5,BAM,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/5.final.bam,FALSE
+pdumv021,library_bigwig_1,Library 1,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/1.final.bigwig,FALSE
+pdumv021,library_bigwig_2,Library 2,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/2.final.bigwig,FALSE
+pdumv021,library_bigwig_3,Library 3,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/3.final.bigwig,FALSE
+pdumv021,library_bigwig_4,Library 4,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/4.final.bigwig,FALSE
+pdumv021,library_bigwig_5,Library 5,BigWig,ATACseq,Lara's Snakemake outputs,/g/arendt/Lara/8.FinalOutput/5.final.bigwig,FALSE
+pdumv021,SN073_15ng_collapsed,SN073 MASseq isoforms from the 15ng prep,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,/scratch/cros/MASseq_output_new/SN073/collapsed.gff,FALSE
+pdumv021,SN073_60ng_collapsed,SN073 MASseq isoforms,GFF,MASseq,MASseq outputs from running isoseq collapse against the Pdum genome,/scratch/cros/MASseq_output_new/SN073_60ng/collapsed.gff,FALSE
-- 
GitLab