Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Mosaicatcher Update
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Thomas Weber
Mosaicatcher Update
Commits
5699687e
Commit
5699687e
authored
6 years ago
by
Tobias Marschall
Browse files
Options
Downloads
Patches
Plain Diff
Replace Jan's merging script by new one (sent by mail Oct 4; 19:11)
parent
c8e5c181
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
Snakefile
+1
-1
1 addition, 1 deletion
Snakefile
utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl
+190
-0
190 additions, 0 deletions
...roup_nearby_calls_of_same_AF_and_generate_output_table.pl
with
191 additions
and
1 deletion
Snakefile
+
1
−
1
View file @
5699687e
...
...
@@ -652,7 +652,7 @@ rule postprocessing_merge:
output:
calls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
shell:
'utils/group_nearby_calls_of_same_AF.pl {input.calls} > {output.calls}'
'utils/group_nearby_calls_of_same_AF
_and_generate_output_table
.pl {input.calls} > {output.calls}'
################################################################################
...
...
This diff is collapsed.
Click to expand it.
utils/group_nearby_calls_of_same_AF.pl
→
utils/group_nearby_calls_of_same_AF
_and_generate_output_table
.pl
+
190
−
0
View file @
5699687e
...
...
@@ -12,7 +12,7 @@ if (!$ARGV[0]) {
}
#--- read input file
my
(
%STARTs
,
%ENDs
,
%SV_TYPEs
,
%CELLs
,
%AFs
,
%SEEN
,
%averaged_AF
,
%MAIN_SV_TYPE
);
my
(
%MAIN_HAP
,
%Sv_call_haplotype
,
%Num_bins
,
%Group_ID
,
%STARTs
,
%ENDs
,
%SV_TYPEs
,
%CELLs
,
%AFs
,
%SEEN
,
%averaged_AF
,
%MAIN_SV_TYPE
,
%Llr_to_2nds
,
%Llr_to_refs
);
print
STDERR
"
Reading
$input_file
...
\n
";
open
FH
,
"
$input_file
"
or
die
;
while
(
<
FH
>
)
{
...
...
@@ -29,6 +29,10 @@ while (<FH>) {
push
(
@
{
$SV_TYPEs
{
$chrom
}{
$start
}},
$sv_call_name
);
push
(
@
{
$CELLs
{
$chrom
}{
$start
}},
$cell
);
push
(
@
{
$AFs
{
$chrom
}{
$start
}},
$af
);
push
(
@
{
$Llr_to_refs
{
$chrom
}{
$start
}},
$llr_to_ref
);
push
(
@
{
$Llr_to_2nds
{
$chrom
}{
$start
}},
$llr_to_2nd
);
push
(
@
{
$Num_bins
{
$chrom
}{
$start
}},
$num_bins
);
push
(
@
{
$Sv_call_haplotype
{
$chrom
}{
$start
}},
$sv_call_haplotype
);
}
%SEEN
=
();
#reinitialize
close
FH
;
...
...
@@ -56,6 +60,19 @@ foreach my $chrom (sort keys %STARTs) {
}
}
#-- define majority haplotype
foreach
my
$chrom
(
sort
keys
%STARTs
)
{
for
(
my
$i
=
0
;
$i
<
@
{
$STARTs
{
$chrom
}};
$i
++
)
{
my
(
%count_abundance
,
$main_hap
);
foreach
my
$hapname
(
@
{
$Sv_call_haplotype
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}})
{
$count_abundance
{
$hapname
}
++
;
}
(
$main_hap
)
=
sort
{
$count_abundance
{
$b
}
<=>
$count_abundance
{
$a
}}
keys
%count_abundance
;
$MAIN_HAP
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}
=
$main_hap
;
}
}
#--- cluster into groups by chromosome, if similar averaged AF, same primary SV type, directly adjacent call, and at least one cell shared
my
$group_name
=
0
;
foreach
my
$chrom
(
sort
keys
%STARTs
)
{
...
...
@@ -85,11 +102,81 @@ foreach my $chrom (sort keys %STARTs) {
next
;
}
$group_name
++
unless
(
$previous_shared
);
print
"
SHARED:
$chrom
\t
$STARTs
{
$chrom
}[
$i
]-
$ENDs
{
$chrom
}[
$i
]|
$STARTs
{
$chrom
}[
$i
+1]-
$ENDs
{
$chrom
}[
$i
+1] (AF=
$averaged_AF
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}|
$averaged_AF
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
+1]}) (SV_type=
$MAIN_SV_TYPE
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}) group_name=
$group_name
\n
";
print
STDERR
"
Grouping/merging
$chrom
\t
$STARTs
{
$chrom
}[
$i
]-
$ENDs
{
$chrom
}[
$i
]|
$STARTs
{
$chrom
}[
$i
+1]-
$ENDs
{
$chrom
}[
$i
+1] (AF=
$averaged_AF
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}|
$averaged_AF
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
+1]}) (SV_type=
$MAIN_SV_TYPE
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}) group_name=
$group_name
...
\n
";
$previous_shared
=
1
;
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}{
$ENDs
{
$chrom
}[
$i
]}
=
$group_name
;
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
+
1
]}{
$ENDs
{
$chrom
}[
$i
+
1
]}
=
$group_name
;
}
}
#--------- Generating output
print
STDERR
"
--
\n
Generating Results_Output_Table
\n
";
print
"
chrom, start, end, num_bins, consensus_sv, consensus_sv_call_haplotype, llr_to_ref_max, llr_to_2nd_max, af, segments
\n
";
foreach
my
$chrom
(
sort
keys
%STARTs
)
{
my
$previous_shared
=
0
;
my
$last_group_ID
;
for
(
my
$i
=
0
;
$i
<
@
{
$STARTs
{
$chrom
}};
$i
++
)
{
if
(
exists
(
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}{
$ENDs
{
$chrom
}[
$i
]}))
{
#next if ($seen{$Group_ID{$chrom}{$STARTs{$chrom}[$i]}{$ENDs{$chrom}[$i]}});
$last_group_ID
=
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}{
$ENDs
{
$chrom
}[
$i
]};
my
(
@myStarts
,
@myEnds
);
my
$segments
=
"
$chrom
:
$STARTs
{
$chrom
}[
$i
]-
$ENDs
{
$chrom
}[
$i
]
";
push
(
@myStarts
,
$STARTs
{
$chrom
}[
$i
]);
push
(
@myEnds
,
$ENDs
{
$chrom
}[
$i
]);
my
$end
=
0
;
my
$y
=
$i
;
#-- prepare consensus report for merged calls
my
(
$af
)
=
sort
{
$b
<=>
$a
}
@
{
$AFs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use maximum in this consensus report
my
(
$llr_to_ref
)
=
sort
{
$b
<=>
$a
}
@
{
$Llr_to_refs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
my
(
$llr_to_2nd
)
=
sort
{
$b
<=>
$a
}
@
{
$Llr_to_2nds
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
my
(
$num_bins
)
=
sort
{
$b
<=>
$a
}
@
{
$Num_bins
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
while
(
!
$end
)
{
#get other events falling onto same group ID
$y
++
;
#increment
if
(
$y
==
@
{
$STARTs
{
$chrom
}})
{
$y
--
;
$i
=
$y
;
last
;
}
#print "($y)|", scalar (@{$STARTs{$chrom}}), " $chrom|$Group_ID{$chrom}{$STARTs{$chrom}[$y]}{$ENDs{$chrom}[$y]}|--->";
if
(
exists
(
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$y
]}{
$ENDs
{
$chrom
}[
$y
]}))
{
if
(
$last_group_ID
ne
$Group_ID
{
$chrom
}{
$STARTs
{
$chrom
}[
$y
]}{
$ENDs
{
$chrom
}[
$y
]})
{
#new group starts
$y
--
;
$i
=
$y
;
last
;
}
push
(
@myStarts
,
$STARTs
{
$chrom
}[
$y
]);
push
(
@myEnds
,
$ENDs
{
$chrom
}[
$y
]);
$segments
.=
"
|
$chrom
:
$STARTs
{
$chrom
}[
$y
]-
$ENDs
{
$chrom
}[
$y
]
";
(
$af
)
=
sort
{
$b
<=>
$a
}
(
$af
,
@
{
$AFs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}});
#use maximum in this consensus report
(
$llr_to_ref
)
=
sort
{
$b
<=>
$a
}
(
$llr_to_ref
,
@
{
$Llr_to_refs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}});
#use max in this consensus report
(
$llr_to_2nd
)
=
sort
{
$b
<=>
$a
}
(
$llr_to_2nd
,
@
{
$Llr_to_2nds
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}});
#use max in this consensus report
(
$num_bins
)
=
sort
{
$b
<=>
$a
}
(
$num_bins
,
@
{
$Num_bins
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}});
#use max in this consensus report
}
else
{
$y
--
;
$i
=
$y
;
#jump forward
$end
=
1
;
}
}
print
"
$chrom
,
$myStarts
[0],
$myEnds
[-1],
$num_bins
,
$MAIN_SV_TYPE
{
$chrom
}{
$myStarts
[0]},
$MAIN_HAP
{
$chrom
}{
$myStarts
[0]},
$llr_to_ref
,
$llr_to_2nd
,
$af
,
\
[Group
\
_
$Group_ID
{
$chrom
}{
$myStarts
[0]}{
$myEnds
[0]}
\
/
$segments
\
]
\n
";
#print "$chrom, $myStarts[0], $myEnds[-1], $num_bins, $MAIN_SV_TYPE{$chrom}{$myStarts[0]]}\n";
}
else
{
#-- prepare consensus report for singlish calls (calls not falling into a merge-group)
my
(
$af
)
=
sort
{
$b
<=>
$a
}
@
{
$AFs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use maximum in this consensus report
my
(
$llr_to_ref
)
=
sort
{
$b
<=>
$a
}
@
{
$Llr_to_refs
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
my
(
$llr_to_2nd
)
=
sort
{
$b
<=>
$a
}
@
{
$Llr_to_2nds
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
my
(
$num_bins
)
=
sort
{
$b
<=>
$a
}
@
{
$Num_bins
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]}};
#use max in this consensus report
print
"
$chrom
,
$STARTs
{
$chrom
}[
$i
],
$ENDs
{
$chrom
}[
$i
],
$num_bins
,
$MAIN_SV_TYPE
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]},
$MAIN_HAP
{
$chrom
}{
$STARTs
{
$chrom
}[
$i
]},
$llr_to_ref
,
$llr_to_2nd
,
$af
, [
$chrom
:
$STARTs
{
$chrom
}[
$i
]-
$ENDs
{
$chrom
}[
$i
]]
\n
";
}
}
}
#---------------
#--- subroutines
#---------------
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment