From 80c3f94fd85ec697efab04fa81d30a33bbaff48b Mon Sep 17 00:00:00 2001
From: Tobias Marschall <tobias.marschall@0ohm.net>
Date: Wed, 3 Oct 2018 15:58:13 +0200
Subject: [PATCH] Change read group IDs to reflect the cell name (sample
 mixing)

---
 utils/Snakefile.samplemixing | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/utils/Snakefile.samplemixing b/utils/Snakefile.samplemixing
index b99e5a5..dc55bc9 100644
--- a/utils/Snakefile.samplemixing
+++ b/utils/Snakefile.samplemixing
@@ -2,15 +2,20 @@ from collections import defaultdict
 import random
 
 targets = {
+	('WT', 'C7', 80, 5, 1),
 	('WT', 'C7', 80, 10, 1),
 	('WT', 'C7', 80, 20, 1),
 	('WT', 'C7', 50, 50, 1),
+	('WT', 'BM510', 80, 5, 1),
+	('WT', 'BM510', 80, 10, 1),
+	('WT', 'BM510', 80, 20, 1),
+	('WT', 'BM510', 50, 50, 1),
 }
 
 sample_paths = {
-	'BM510': '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-BM510/bam/RPE-BM510/selected/',
-	'C7':    '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-C7/bam/C7_data/selected/',
-	'WT':    '/MMCI/TM/scratch/strandseq/rpe-2018-07-18-WT/bam/RPE1-WT/selected/',
+	'BM510': '/MMCI/TM/scratch/strandseq/input-data/RPE-BM510/selected/',
+	'C7':    '/MMCI/TM/scratch/strandseq/input-data/C7_data/selected/',
+	'WT':    '/MMCI/TM/scratch/strandseq/input-data/RPE1-WT/selected/',
 }
 
 samples = sorted(sample_paths.keys())
@@ -52,16 +57,16 @@ rule create_new_header:
 	output:
 		hd=temp('bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam')
 	shell:
-		'samtools view -H {input.bam} | sed -r \'/^@RG/ s/(SM:[A-Za-z0-9_-]+)/SM:{wildcards.target_sample}/g\' > {output.hd}'
+		'samtools view -H {input.bam} | grep -v "^@RG" > {output.hd}'
 
 rule translate_bam:
 	input:
 		bam=lambda wc: bam_mapping['bam/{}/all/CELL{}.{}.bam'.format(wc.target_sample,wc.i,wc.cell)],
-		hd='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam'
+		hd='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.header.sam',
 	output:
 		bam='bam/{target_sample}/all/CELL{i,[0-9]+}.{cell}.bam'
 	shell:
-		'samtools reheader {input.hd} {input.bam} > {output.bam}'
+		'samtools reheader {input.hd} {input.bam} | samtools addreplacerg -r "@RG\tID:CELL{wildcards.i}.{wildcards.cell}\tSM:{wildcards.target_sample}" -o {output.bam} -'
 
 rule index_bam:
 	input:
-- 
GitLab