diff --git a/utils/filter_MosaiCatcher_calls.pl b/utils/filter_MosaiCatcher_calls.pl index 849bc89cad9e46904cab17134e737af641193bdc..e2687c4a9ba235766c76f62672253319587c1004 100755 --- a/utils/filter_MosaiCatcher_calls.pl +++ b/utils/filter_MosaiCatcher_calls.pl @@ -3,7 +3,7 @@ use strict; my $min_N_inv = 3; my $min_WC = 0.2; -my $safe_llr_to_ref = 20; +my $safe_llr_to_ref = 50; my $SegDup_file = "./utils/segdups/segDups_hg38_UCSCtrack.bed.gz"; my $MaxSegDup_overlap = 0.5; @@ -118,6 +118,10 @@ close FH_1; #-- 5. filter Del events overlapping SegDups to much (based on defined value) print STDERR "Testing for overlap with SegDups...\n"; #my %SEGDUPOV; +my $UCSC_artefact_chr="chr2"; +my $UCSC_artefact_start=90400000; +my $UCSC_artefact_end=91400000; + foreach my $chrom (sort keys %STARTs) { my %Seen; for (my $i=0; $i<@{$STARTs{$chrom}}; $i++) { @@ -151,6 +155,28 @@ foreach my $chrom (sort keys %STARTs) { $overlap/=length($overlap_string); #$SEGDUPOV{$chrom}{$stD}{$enD}=$overlap; $FILTER{$chrom}{$stD}{$enD}= sprintf ("FAIL(SegDup:%4.2f)", $overlap) if ($overlap > $MaxSegDup_overlap); + #------------------------------------------------------------ + #-- remove build38 / ucsc 'hole' region in annotated assembly + #------------------------------------------------------------ + next unless ($chrom eq $UCSC_artefact_chr); + my $UCSC_exception=0; + if (($stD<=$UCSC_artefact_start) && ($UCSC_artefact_start<=$enD)) { + my ($EndPos) = sort {$a<=>$b} ($enD, $UCSC_artefact_end); #sort smaller EndVal + my $DupOvL = $EndPos-$UCSC_artefact_start+1; + substr ($overlap_string, $UCSC_artefact_start-$stD+1, $DupOvL) = sprintf "1"x$DupOvL; #fill in "1" for each overlapping base + $UCSC_exception=1; + } elsif (($UCSC_artefact_start<=$stD) && ($stD<=$UCSC_artefact_end)) { + my ($EndPos) = sort {$a<=>$b} ($enD, $UCSC_artefact_end); #sort smaller EndVal + my $DupOvL = $EndPos-$stD+1; + substr ($overlap_string, 0, $DupOvL) = sprintf "1"x$DupOvL; #fill in "1" for each overlapping base + $UCSC_exception = 1; + } + next unless ($UCSC_exception); + for (my $M=0; $M<length($overlap_string); $M++) { + $overlap+=1 if (substr($overlap_string, $M, 1) eq "1"); + } + $overlap/=length($overlap_string); + $FILTER{$chrom}{$stD}{$enD}= sprintf ("FAIL(SegD/UCSC:%4.2f)", $overlap) if ($overlap > $MaxSegDup_overlap); } } diff --git a/utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl b/utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl index 9b7951ac38842dac1fb67e462cd04b03f0b5e779..d316ebf313bdd17951e5df13c9d96dbcf595d109 100755 --- a/utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl +++ b/utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl @@ -2,12 +2,14 @@ use strict; +my $min_AF_for_grouping =0.1; my $AF_simiarity_threshold = 0.25; my $input_file = $ARGV[0]; if (!$ARGV[0]) { print STDERR "inputfile missing\n"; print STDERR "-> cluster into groups by chromosome, if similar AF (relative threshold=$AF_simiarity_threshold), if same SV event, if SVs are directly adjacent, and if at least one cell shared\n"; + print STDERR "Ignores SVs with VAF<$min_AF_for_grouping, which are never grouped\n"; die; } @@ -78,6 +80,7 @@ my $group_name = 0; foreach my $chrom (sort keys %STARTs) { my $previous_shared = 0; for (my $i=0; $i<@{$STARTs{$chrom}}-1; $i++) { + next if ($averaged_AF{$chrom}{$STARTs{$chrom}[$i]} < $min_AF_for_grouping); #ignore events with too low VAF unless ($ENDs{$chrom}[$i] == $STARTs{$chrom}[$i+1]) { #only consider events that are directly adjacent $previous_shared=0;#initialize next;