Skip to content
Snippets Groups Projects
Commit d84904a5 authored by Thomas Weber's avatar Thomas Weber
Browse files

Correct SMK download BAM example files

parent 5ed61ec9
No related branches found
No related tags found
No related merge requests found
......@@ -26,8 +26,9 @@ segmentation2/
snv_calls/
strand_states/
sv_probabilities/
config/config_df.tsv
config/exclude_file.txt
workflow/config/config_df.tsv
workflow/config/exclude_file.txt
workflow/config/exclude_file
# Docs
docs/build/
......
......@@ -65,7 +65,7 @@ if config["mode"] != "download_data":
complete_df_list = list()
input_dir = config["input_bam_location"]
for sample in tqdm(os.listdir(input_dir)):
for sample in os.listdir(input_dir):
l_files_all = [f for f in os.listdir(input_dir + sample + "/all/") if f.endswith('.bam')]
l_files_selected = [f for f in os.listdir(input_dir + sample + "/selected/") if f.endswith('.bam')]
join = list(set(l_files_all).intersection(set(l_files_selected)))
......
#######################################
# MOSAICATCHER CONFIGURATION FILE. #
#######################################
#######################################
## Command-line options
#######################################
### Modes ["count", "segmentation", "mosaiclassifier"]
mode: "count"
### Plot enabled [True] or disabled [False]
plot: False
### Enable / Disable comparison for each BAM file between folder name & SM tag
check_sm_tag: False
### Enable / Disable download of BAM examples (RPE-BM510)
dl_bam_example: False
### Enable / Disable download of external files (1000G SNV & Fasta ref genome)
dl_external_files: False
## Input BAM location
input_bam_location: "TEST_EXAMPLE_DATA/"
## Output location
output_location: "TEST_OUTPUT/"
# External files
## 1000G SNV sites to genotype : https://sandbox.zenodo.org/record/1060653/files/ALL.chr1-22plusX_GRCh38_sites.20170504.renamedCHR.vcf.gz
snv_sites_to_genotype: "sandbox.zenodo.org/record/1062182/files/ALL.chr1-22plusX_GRCh38_sites.20170504.renamedCHR.vcf.gz"
# Reference genome : https://sandbox.zenodo.org/record/1060653/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
reference: "sandbox.zenodo.org/record/1062182/files/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
tmp_check: "sandbox.zenodo.org/record/1062186/files/mosaic_logo.png"
# Chromosomes list to process
chromosomes:
- chr1
- chr2
- chr3
- chr4
- chr5
- chr6
- chr7
- chr8
- chr9
- chr10
- chr11
- chr12
- chr13
- chr14
- chr15
- chr16
- chr17
- chr18
- chr19
- chr20
- chr21
- chr22
- chrX
##############################
# Advanced configuration. #
##############################
# External static data
# R Ref
R_reference: "BSgenome.Hsapiens.UCSC.hg38"
# SegDups file
segdups: "data/segdups/segDups_hg38_UCSCtrack.bed.gz"
# Exclude cells
exclude_list: []
# Strandphaser
git_commit_strandphaser: "69c9fb4"
git_repo_strandphaser: "https://github.com/daewoooo/StrandPhaseR"
paired_end: True
# CHECKME : Parameters optimization ?
# PARAMETERS
## WINDOW
window: 100000
## METHODS
methods:
- "simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0_regfactor6_filterFALSE"
- "simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_gtcutoff0.05_regfactor6_filterTRUE"
llr: 4
poppriors: TRUE
haplotags:
- TRUE
- FALSE
gtcutoff:
- 0
- 0.05
regfactor: 6
filter:
- TRUE
- FALSE
## BPDENS
### JOINT SEG
min_diff_jointseg: 0.1
### SINGLE SEG
min_diff_singleseg: 0.5
### SCE CUTOFF
additional_sce_cutoff: 20000000
### SCE MIN DISTANCE
sce_min_distance: 500000
chr1
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr20
chr22
chrX
chrY
chrM
chr1_KI270706v1_random
chr1_KI270707v1_random
chr1_KI270708v1_random
chr1_KI270709v1_random
chr1_KI270710v1_random
chr1_KI270711v1_random
chr1_KI270712v1_random
chr1_KI270713v1_random
chr1_KI270714v1_random
chr2_KI270715v1_random
chr2_KI270716v1_random
chr3_GL000221v1_random
chr4_GL000008v2_random
chr5_GL000208v1_random
chr9_KI270717v1_random
chr9_KI270718v1_random
chr9_KI270719v1_random
chr9_KI270720v1_random
chr11_KI270721v1_random
chr14_GL000009v2_random
chr14_GL000225v1_random
chr14_KI270722v1_random
chr14_GL000194v1_random
chr14_KI270723v1_random
chr14_KI270724v1_random
chr14_KI270725v1_random
chr14_KI270726v1_random
chr15_KI270727v1_random
chr16_KI270728v1_random
chr17_GL000205v2_random
chr17_KI270729v1_random
chr17_KI270730v1_random
chr22_KI270731v1_random
chr22_KI270732v1_random
chr22_KI270733v1_random
chr22_KI270734v1_random
chr22_KI270735v1_random
chr22_KI270736v1_random
chr22_KI270737v1_random
chr22_KI270738v1_random
chr22_KI270739v1_random
chrY_KI270740v1_random
chrUn_KI270302v1
chrUn_KI270304v1
chrUn_KI270303v1
chrUn_KI270305v1
chrUn_KI270322v1
chrUn_KI270320v1
chrUn_KI270310v1
chrUn_KI270316v1
chrUn_KI270315v1
chrUn_KI270312v1
chrUn_KI270311v1
chrUn_KI270317v1
chrUn_KI270412v1
chrUn_KI270411v1
chrUn_KI270414v1
chrUn_KI270419v1
chrUn_KI270418v1
chrUn_KI270420v1
chrUn_KI270424v1
chrUn_KI270417v1
chrUn_KI270422v1
chrUn_KI270423v1
chrUn_KI270425v1
chrUn_KI270429v1
chrUn_KI270442v1
chrUn_KI270466v1
chrUn_KI270465v1
chrUn_KI270467v1
chrUn_KI270435v1
chrUn_KI270438v1
chrUn_KI270468v1
chrUn_KI270510v1
chrUn_KI270509v1
chrUn_KI270518v1
chrUn_KI270508v1
chrUn_KI270516v1
chrUn_KI270512v1
chrUn_KI270519v1
chrUn_KI270522v1
chrUn_KI270511v1
chrUn_KI270515v1
chrUn_KI270507v1
chrUn_KI270517v1
chrUn_KI270529v1
chrUn_KI270528v1
chrUn_KI270530v1
chrUn_KI270539v1
chrUn_KI270538v1
chrUn_KI270544v1
chrUn_KI270548v1
chrUn_KI270583v1
chrUn_KI270587v1
chrUn_KI270580v1
chrUn_KI270581v1
chrUn_KI270579v1
chrUn_KI270589v1
chrUn_KI270590v1
chrUn_KI270584v1
chrUn_KI270582v1
chrUn_KI270588v1
chrUn_KI270593v1
chrUn_KI270591v1
chrUn_KI270330v1
chrUn_KI270329v1
chrUn_KI270334v1
chrUn_KI270333v1
chrUn_KI270335v1
chrUn_KI270338v1
chrUn_KI270340v1
chrUn_KI270336v1
chrUn_KI270337v1
chrUn_KI270363v1
chrUn_KI270364v1
chrUn_KI270362v1
chrUn_KI270366v1
chrUn_KI270378v1
chrUn_KI270379v1
chrUn_KI270389v1
chrUn_KI270390v1
chrUn_KI270387v1
chrUn_KI270395v1
chrUn_KI270396v1
chrUn_KI270388v1
chrUn_KI270394v1
chrUn_KI270386v1
chrUn_KI270391v1
chrUn_KI270383v1
chrUn_KI270393v1
chrUn_KI270384v1
chrUn_KI270392v1
chrUn_KI270381v1
chrUn_KI270385v1
chrUn_KI270382v1
chrUn_KI270376v1
chrUn_KI270374v1
chrUn_KI270372v1
chrUn_KI270373v1
chrUn_KI270375v1
chrUn_KI270371v1
chrUn_KI270448v1
chrUn_KI270521v1
chrUn_GL000195v1
chrUn_GL000219v1
chrUn_GL000220v1
chrUn_GL000224v1
chrUn_KI270741v1
chrUn_GL000226v1
chrUn_GL000213v1
chrUn_KI270743v1
chrUn_KI270744v1
chrUn_KI270745v1
chrUn_KI270746v1
chrUn_KI270747v1
chrUn_KI270748v1
chrUn_KI270749v1
chrUn_KI270750v1
chrUn_KI270751v1
chrUn_KI270752v1
chrUn_KI270753v1
chrUn_KI270754v1
chrUn_KI270755v1
chrUn_KI270756v1
chrUn_KI270757v1
chrUn_GL000214v1
chrUn_KI270742v1
chrUn_GL000216v2
chrUn_GL000218v1
chrEBV
chrY
chrM
chr1_KI270706v1_random
chr1_KI270707v1_random
chr1_KI270708v1_random
chr1_KI270709v1_random
chr1_KI270710v1_random
chr1_KI270711v1_random
chr1_KI270712v1_random
chr1_KI270713v1_random
chr1_KI270714v1_random
chr2_KI270715v1_random
chr2_KI270716v1_random
chr3_GL000221v1_random
chr4_GL000008v2_random
chr5_GL000208v1_random
chr9_KI270717v1_random
chr9_KI270718v1_random
chr9_KI270719v1_random
chr9_KI270720v1_random
chr11_KI270721v1_random
chr14_GL000009v2_random
chr14_GL000225v1_random
chr14_KI270722v1_random
chr14_GL000194v1_random
chr14_KI270723v1_random
chr14_KI270724v1_random
chr14_KI270725v1_random
chr14_KI270726v1_random
chr15_KI270727v1_random
chr16_KI270728v1_random
chr17_GL000205v2_random
chr17_KI270729v1_random
chr17_KI270730v1_random
chr22_KI270731v1_random
chr22_KI270732v1_random
chr22_KI270733v1_random
chr22_KI270734v1_random
chr22_KI270735v1_random
chr22_KI270736v1_random
chr22_KI270737v1_random
chr22_KI270738v1_random
chr22_KI270739v1_random
chrY_KI270740v1_random
chrUn_KI270302v1
chrUn_KI270304v1
chrUn_KI270303v1
chrUn_KI270305v1
chrUn_KI270322v1
chrUn_KI270320v1
chrUn_KI270310v1
chrUn_KI270316v1
chrUn_KI270315v1
chrUn_KI270312v1
chrUn_KI270311v1
chrUn_KI270317v1
chrUn_KI270412v1
chrUn_KI270411v1
chrUn_KI270414v1
chrUn_KI270419v1
chrUn_KI270418v1
chrUn_KI270420v1
chrUn_KI270424v1
chrUn_KI270417v1
chrUn_KI270422v1
chrUn_KI270423v1
chrUn_KI270425v1
chrUn_KI270429v1
chrUn_KI270442v1
chrUn_KI270466v1
chrUn_KI270465v1
chrUn_KI270467v1
chrUn_KI270435v1
chrUn_KI270438v1
chrUn_KI270468v1
chrUn_KI270510v1
chrUn_KI270509v1
chrUn_KI270518v1
chrUn_KI270508v1
chrUn_KI270516v1
chrUn_KI270512v1
chrUn_KI270519v1
chrUn_KI270522v1
chrUn_KI270511v1
chrUn_KI270515v1
chrUn_KI270507v1
chrUn_KI270517v1
chrUn_KI270529v1
chrUn_KI270528v1
chrUn_KI270530v1
chrUn_KI270539v1
chrUn_KI270538v1
chrUn_KI270544v1
chrUn_KI270548v1
chrUn_KI270583v1
chrUn_KI270587v1
chrUn_KI270580v1
chrUn_KI270581v1
chrUn_KI270579v1
chrUn_KI270589v1
chrUn_KI270590v1
chrUn_KI270584v1
chrUn_KI270582v1
chrUn_KI270588v1
chrUn_KI270593v1
chrUn_KI270591v1
chrUn_KI270330v1
chrUn_KI270329v1
chrUn_KI270334v1
chrUn_KI270333v1
chrUn_KI270335v1
chrUn_KI270338v1
chrUn_KI270340v1
chrUn_KI270336v1
chrUn_KI270337v1
chrUn_KI270363v1
chrUn_KI270364v1
chrUn_KI270362v1
chrUn_KI270366v1
chrUn_KI270378v1
chrUn_KI270379v1
chrUn_KI270389v1
chrUn_KI270390v1
chrUn_KI270387v1
chrUn_KI270395v1
chrUn_KI270396v1
chrUn_KI270388v1
chrUn_KI270394v1
chrUn_KI270386v1
chrUn_KI270391v1
chrUn_KI270383v1
chrUn_KI270393v1
chrUn_KI270384v1
chrUn_KI270392v1
chrUn_KI270381v1
chrUn_KI270385v1
chrUn_KI270382v1
chrUn_KI270376v1
chrUn_KI270374v1
chrUn_KI270372v1
chrUn_KI270373v1
chrUn_KI270375v1
chrUn_KI270371v1
chrUn_KI270448v1
chrUn_KI270521v1
chrUn_GL000195v1
chrUn_GL000219v1
chrUn_GL000220v1
chrUn_GL000224v1
chrUn_KI270741v1
chrUn_GL000226v1
chrUn_GL000213v1
chrUn_KI270743v1
chrUn_KI270744v1
chrUn_KI270745v1
chrUn_KI270746v1
chrUn_KI270747v1
chrUn_KI270748v1
chrUn_KI270749v1
chrUn_KI270750v1
chrUn_KI270751v1
chrUn_KI270752v1
chrUn_KI270753v1
chrUn_KI270754v1
chrUn_KI270755v1
chrUn_KI270756v1
chrUn_KI270757v1
chrUn_GL000214v1
chrUn_KI270742v1
chrUn_GL000216v2
chrUn_GL000218v1
chrEBV
callset cell_count segments total_sce avg_sce_per_cell total_calls unique_calls unique_calls_merged complex_lengths_mb total_calls_complex unique_calls_complex avg_sv_load_per_cell_mb avg_sv_load_per_cell_complex_mb calls_af0to10 calls_af10to80 calls_af80to100 length_sum_af0to10_mb length_sum_af10to80_mb length_sum_af80to100_mb calls_af0to10_complex calls_af10to80_complex calls_af80to100_complex length_sum_af0to10_complex_mb length_sum_af10to80_complex_mb length_sum_af80to100_complex_mb
/g/korbel2/weber/MosaiCatcher_output/Mosaicatcher_output_singularity_LCL-TALL/mosaiclassifier/sv_calls/TALL03-DEA5/simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_gtcutoff0.05_regfactor6_filterTRUE.tsv 77 1113 1692 21.974025974025974 558 28 13 3.4 351 17 6.86635 1.2467532467532467 11 13 4 14.640895 8.840895 4.8 6 9 2 3.9 2.0 0.6
/g/korbel2/weber/MosaiCatcher_output/Mosaicatcher_output_singularity_LCL-TALL/mosaiclassifier/sv_calls/TALL03-DEA5/simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0_regfactor6_filterFALSE.tsv 77 1113 1692 21.974025974025974 789 88 n/a 17.3 532 50 9.93311487012987 3.15974025974026 66 19 3 170.28179 6.840895 4.5 33 16 1 57.8 3.9 0.3
......@@ -11,13 +11,13 @@ rule dl_example_data:
"""
input:
HTTP.remote("https://sandbox.zenodo.org/record/1062182/files/TEST_EXAMPLE_DATA.zip", keep_local=True)
# HTTP.remote("https://sandbox.zenodo.org/record/1062186/files/report_TALL.zip", keep_local=True)
output:
# directory(config["input_bam_location"])
touch(config["output_location"] + "config/dl_example_data.ok")
shell:
"mkdir {output};"
"unzip {input} -d .;"
"mv TEST_EXAMPLE_DATA/* {config[input_bam_location]}"
"unzip {input} -d ."
# TODO: Adapt according reference
rule dl_external_data:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment