From 80c3f94fd85ec697efab04fa81d30a33bbaff48b Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Wed, 3 Oct 2018 15:58:13 +0200 Subject: [PATCH] Change read group IDs to reflect the cell name (sample mixing) --- utils/Snakefile.samplemixing | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/utils/Snakefile.samplemixing b/utils/Snakefile.samplemixing index b99e5a5..dc55bc9 100644 --- a/utils/Snakefile.samplemixing +++ b/utils/Snakefile.samplemixing @@ -2,15 +2,20 @@ from collections import defaultdict import random targets = { + ('WT', 'C7', 80, 5, 1), ('WT', 'C7', 80, 10, 1), ('WT', 'C7', 80, 20, 1), ('WT', 'C7', 50, 50, 1), + ('WT', 'BM510', 80, 5, 1), + ('WT', 'BM510', 80, 10, 1), + ('WT', 'BM510', 80, 20, 1), + ('WT', 'BM510', 50, 50, 1), } sample_paths = { - 'BM510': '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-BM510/bam/RPE-BM510/selected/', - 'C7': '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-C7/bam/C7_data/selected/', - 'WT': '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-WT/bam/RPE1-WT/selected/', + 'BM510': '/MMCI/TM/scratch/strandseq/input-data/RPE-BM510/selected/', + 'C7': '/MMCI/TM/scratch/strandseq/input-data/C7_data/selected/', + 'WT': '/MMCI/TM/scratch/strandseq/input-data/RPE1-WT/selected/', } samples = sorted(sample_paths.keys()) @@ -52,16 +57,16 @@ rule create_new_header: output: hd=temp('bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam') shell: - 'samtools view -H {input.bam} | sed -r \'/^@RG/ s/(SM:[A-Za-z0-9_-]+)/SM:{wildcards.target_sample}/g\' > {output.hd}' + 'samtools view -H {input.bam} | grep -v "^@RG" > {output.hd}' rule translate_bam: input: bam=lambda wc: bam_mapping['bam/{}/all/CELL{}.{}.bam'.format(wc.target_sample,wc.i,wc.cell)], - hd='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam' + hd='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam', output: bam='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.bam' shell: - 'samtools reheader {input.hd} {input.bam} > {output.bam}' + 'samtools reheader {input.hd} {input.bam} | samtools addreplacerg -r "@RG\tID:CELL{wildcards.i}.{wildcards.cell}\tSM:{wildcards.target_sample}" -o {output.bam} -' rule index_bam: input: -- GitLab