Skip to content
Snippets Groups Projects
Commit 4c5619d4 authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Update on filter/merge scripts sent by Jan today.

parent 94be77d6
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@ use strict;
my $min_N_inv = 3;
my $min_WC = 0.2;
my $safe_llr_to_ref = 20;
my $safe_llr_to_ref = 50;
my $SegDup_file = "./utils/segdups/segDups_hg38_UCSCtrack.bed.gz";
my $MaxSegDup_overlap = 0.5;
......@@ -118,6 +118,10 @@ close FH_1;
#-- 5. filter Del events overlapping SegDups to much (based on defined value)
print STDERR "Testing for overlap with SegDups...\n";
#my %SEGDUPOV;
my $UCSC_artefact_chr="chr2";
my $UCSC_artefact_start=90400000;
my $UCSC_artefact_end=91400000;
foreach my $chrom (sort keys %STARTs) {
my %Seen;
for (my $i=0; $i<@{$STARTs{$chrom}}; $i++) {
......@@ -151,6 +155,28 @@ foreach my $chrom (sort keys %STARTs) {
$overlap/=length($overlap_string);
#$SEGDUPOV{$chrom}{$stD}{$enD}=$overlap;
$FILTER{$chrom}{$stD}{$enD}= sprintf ("FAIL(SegDup:%4.2f)", $overlap) if ($overlap > $MaxSegDup_overlap);
#------------------------------------------------------------
#-- remove build38 / ucsc 'hole' region in annotated assembly
#------------------------------------------------------------
next unless ($chrom eq $UCSC_artefact_chr);
my $UCSC_exception=0;
if (($stD<=$UCSC_artefact_start) && ($UCSC_artefact_start<=$enD)) {
my ($EndPos) = sort {$a<=>$b} ($enD, $UCSC_artefact_end); #sort smaller EndVal
my $DupOvL = $EndPos-$UCSC_artefact_start+1;
substr ($overlap_string, $UCSC_artefact_start-$stD+1, $DupOvL) = sprintf "1"x$DupOvL; #fill in "1" for each overlapping base
$UCSC_exception=1;
} elsif (($UCSC_artefact_start<=$stD) && ($stD<=$UCSC_artefact_end)) {
my ($EndPos) = sort {$a<=>$b} ($enD, $UCSC_artefact_end); #sort smaller EndVal
my $DupOvL = $EndPos-$stD+1;
substr ($overlap_string, 0, $DupOvL) = sprintf "1"x$DupOvL; #fill in "1" for each overlapping base
$UCSC_exception = 1;
}
next unless ($UCSC_exception);
for (my $M=0; $M<length($overlap_string); $M++) {
$overlap+=1 if (substr($overlap_string, $M, 1) eq "1");
}
$overlap/=length($overlap_string);
$FILTER{$chrom}{$stD}{$enD}= sprintf ("FAIL(SegD/UCSC:%4.2f)", $overlap) if ($overlap > $MaxSegDup_overlap);
}
}
......
......@@ -2,12 +2,14 @@
use strict;
my $min_AF_for_grouping =0.1;
my $AF_simiarity_threshold = 0.25;
my $input_file = $ARGV[0];
if (!$ARGV[0]) {
print STDERR "inputfile missing\n";
print STDERR "-> cluster into groups by chromosome, if similar AF (relative threshold=$AF_simiarity_threshold), if same SV event, if SVs are directly adjacent, and if at least one cell shared\n";
print STDERR "Ignores SVs with VAF<$min_AF_for_grouping, which are never grouped\n";
die;
}
......@@ -78,6 +80,7 @@ my $group_name = 0;
foreach my $chrom (sort keys %STARTs) {
my $previous_shared = 0;
for (my $i=0; $i<@{$STARTs{$chrom}}-1; $i++) {
next if ($averaged_AF{$chrom}{$STARTs{$chrom}[$i]} < $min_AF_for_grouping); #ignore events with too low VAF
unless ($ENDs{$chrom}[$i] == $STARTs{$chrom}[$i+1]) { #only consider events that are directly adjacent
$previous_shared=0;#initialize
next;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment