Contents
Setup and data
source("../utils/utils.R")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'magrittr'
The following object is masked from 'package:purrr':
set_names
The following object is masked from 'package:tidyr':
extract
Loading required package: GenomicRanges
Loading required package: stats4
Loading required package: BiocGenerics
Attaching package: 'BiocGenerics'
The following objects are masked from 'package:lubridate':
intersect, setdiff, union
The following objects are masked from 'package:dplyr':
combine, intersect, setdiff, union
The following objects are masked from 'package:stats':
IQR, mad, sd, var, xtabs
The following objects are masked from 'package:base':
anyDuplicated, aperm, append, as.data.frame, basename, cbind,
colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
Loading required package: S4Vectors
Attaching package: 'S4Vectors'
The following objects are masked from 'package:lubridate':
second, second<-
The following objects are masked from 'package:dplyr':
first, rename
The following object is masked from 'package:tidyr':
expand
The following objects are masked from 'package:base':
expand.grid, I, unname
Loading required package: IRanges
Attaching package: 'IRanges'
The following object is masked from 'package:lubridate':
%within%
The following objects are masked from 'package:dplyr':
collapse, desc, slice
The following object is masked from 'package:purrr':
reduce
Loading required package: GenomeInfoDb
Attaching package: 'GenomicRanges'
The following object is masked from 'package:magrittr':
subtract
Loading required package: grid
Loading required package: Biostrings
Loading required package: XVector
Attaching package: 'XVector'
The following object is masked from 'package:purrr':
compact
Attaching package: 'Biostrings'
The following object is masked from 'package:grid':
pattern
The following object is masked from 'package:base':
strsplit
Attaching package: 'gridExtra'
The following object is masked from 'package:BiocGenerics':
combine
The following object is masked from 'package:dplyr':
combine
Attaching package: 'data.table'
The following object is masked from 'package:GenomicRanges':
shift
The following object is masked from 'package:IRanges':
shift
The following objects are masked from 'package:S4Vectors':
first, second
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
config = load_config()
# load CHT results
cht_full = lapply(ab_tp_list, function(ab_tp) load_cht_results(ab_tp, remove_chr = F)) %>% bind_rows()
cht = cht_full %>% filter(!TEST.SNP.CHROM %in% c("chrX", "chrY", "chrM"))
cht_sign = cht %>% filter(signif_strongAI)
# genes and promoters
genes = load_genes()
promoters = resize(genes, width = 1000, fix = "start")
# combined motif set (all TFs, peaks + alleles)
fimo = get_full_motif_sets(cht, ab_tp_list)
# only alleles
fimo_alleles = lapply(ab_tp_list, function(ab_tp) parse_motifs_in_two_alleles(ab_tp, cht)) %>% bind_rows()
LS0tCnRpdGxlOiAiRmlndXJlXzEiCm91dHB1dDoKICAgQmlvY1N0eWxlOjpodG1sX2RvY3VtZW50OgogICAgICB0b2M6IHRydWUKICAgICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICAgIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgICAgaGlnaGxpZ2h0OiB0YW5nbwojYmlibGlvZ3JhcGh5OiBrbm5fbWxfaW50cm8uYmliCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKYGBge3Igc3R5bGUsIGVjaG89RkFMU0UsIHJlc3VsdHM9ImFzaXMifQpsaWJyYXJ5KCJrbml0ciIpCm9wdGlvbnMoZGlnaXRzID0gMiwgd2lkdGggPSA4MCkKb3B0aW9ucyhiaXRtYXBUeXBlID0gJ2NhaXJvJykKZ29sZGVuX3JhdGlvIDwtICgxICsgc3FydCg1KSkgLyAyCm9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB0aWR5ID0gRkFMU0UsIGluY2x1ZGUgPSBUUlVFLCBjYWNoZSA9IEZBTFNFLAogICAgICAgICAgICAgICBkZXY9YygncG5nJywgJ3BkZicpLCBjb21tZW50ID0gJyAgJywgZHBpID0gMzAwKQoKb3B0aW9ucyhzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCmtuaXRyOjpvcHRzX2NodW5rJHNldChjYWNoZT1GQUxTRSkKb3B0aW9ucyhkaWdpdHMgPSA1KSAgICAgICAgIApgYGAKCgojIFNldHVwIGFuZCBkYXRhCgpgYGB7cn0Kc291cmNlKCIuLi91dGlscy91dGlscy5SIikKY29uZmlnID0gbG9hZF9jb25maWcoKQoKIyBsb2FkIENIVCByZXN1bHRzCmNodF9mdWxsID0gbGFwcGx5KGFiX3RwX2xpc3QsIGZ1bmN0aW9uKGFiX3RwKSBsb2FkX2NodF9yZXN1bHRzKGFiX3RwLCByZW1vdmVfY2hyID0gRikpICU+JSBiaW5kX3Jvd3MoKQpjaHQgPSBjaHRfZnVsbCAlPiUgZmlsdGVyKCFURVNULlNOUC5DSFJPTSAlaW4lIGMoImNoclgiLCAiY2hyWSIsICJjaHJNIikpCmNodF9zaWduID0gY2h0ICU+JSBmaWx0ZXIoc2lnbmlmX3N0cm9uZ0FJKSAKCiMgZ2VuZXMgYW5kIHByb21vdGVycwpnZW5lcyA9IGxvYWRfZ2VuZXMoKQpwcm9tb3RlcnMgPSByZXNpemUoZ2VuZXMsIHdpZHRoID0gMTAwMCwgZml4ID0gInN0YXJ0IikKCiMgY29tYmluZWQgbW90aWYgc2V0IChhbGwgVEZzLCBwZWFrcyArIGFsbGVsZXMpCmZpbW8gPSBnZXRfZnVsbF9tb3RpZl9zZXRzKGNodCwgYWJfdHBfbGlzdCkKIyBvbmx5IGFsbGVsZXMKZmltb19hbGxlbGVzICA9IGxhcHBseShhYl90cF9saXN0LCBmdW5jdGlvbihhYl90cCkgcGFyc2VfbW90aWZzX2luX3R3b19hbGxlbGVzKGFiX3RwLCBjaHQpKSAlPiUgYmluZF9yb3dzKCkgCmBgYAoKCiMgRmlndXJlIDEgQyBtb3RpZiBsb2dvcwoKCmBgYHtyfQoKb3V0Zl9iYXNlID0gZmlsZS5wYXRoKG91dGRpcl9maWdfbWFpbiwgIi9tb3RpZl9sb2dvcy8iKQpmID0gY29uZmlnJGRhdGEkbW90aWZfZGF0YWJhc2VzJGRlbm92b19tb3RpZnMKcGZtcyA9IHJlYWRfbWVtZShmKQoKZm9yKHBmbSBpbiBwZm1zKSB7CiAgCiAgI21vdGlmX25hbWUgPSBnc3ViKCJcXC8iLCAiXyIsIHBmbUBuYW1lKQogIG1vdGlmX25hbWUgPSBwZm1AbmFtZQogIHByaW50KG1vdGlmX25hbWUpCiAgb3V0ZiA9IGZpbGUucGF0aChvdXRmX2Jhc2UsIHBhc3RlMChtb3RpZl9uYW1lLCAiLnBkZiIpKQogIAogIHAgPSB2aWV3X21vdGlmcyhwZm0sIHVzZS50eXBlID0gIklDTSIsIGNvbG91ci5zY2hlbWUgPSBsZXR0ZXJfY29sb3JzLCBzb3J0LnBvc2l0aW9ucyA9IFQpICsKICAgIHRoZW1lX2NsYXNzaWMoKSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzID0gMTpuY29sKHBmbSksIGxhYmVscyA9IDE6bmNvbChwZm0pLCBuYW1lID0gIlBvc2l0aW9uIikgKwogICAgeWxhYigiQml0cyIpICsKICAgIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KHNpemU9MTIpLCBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplPTEyKSwKICAgICAgICAgIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfdGV4dChzaXplPTEyKSwgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemU9MTIpLAogICAgICAgICAgbGVnZW5kLnBvc2l0aW9uPSJub25lIiwKICAgICAgICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoY29sb3I9ImJsYWNrIiwgZmFjZT0iYm9sZCIsIHNpemU9MTYsIGhqdXN0PTAuNSkpCiAgCiAgcHJpbnQocCkKICBnZ3NhdmUob3V0ZiwgcCwgIHdpZHRoID0gNCwgaGVpZ2h0ID0gMikKICAKICAKfQoKYGBgCgoKIyBGaWd1cmUgMSBDIE1vdGlmIGNlbnRyYWwgZW5yaWNobWVudAoKYGBge3J9CnBwID0gbGFwcGx5KGFiX3RwX2xpc3QsIGZ1bmN0aW9uKGFiX3RwKSB7cHJpbnQoYWJfdHApOyBwbG90X21vdGlmX2NlbnRyYWxfZW5yaWNobWVudChhYl90cCwgcHdtID0gbW90aWZzX2Rlbm92b19saXN0W2FiX3RwXSl9KQoKcCA9IGRvLmNhbGwoImdyaWQuYXJyYW5nZSIsIGMocHAsIG5jb2w9IDYpKQpnZ3NhdmUoZmlsZS5wYXRoKG91dGRpcl9maWdfbWFpbiwgIkZpZzFDX21vdGlmX2NlbnRyYWxfZW5yaWNobWVudC5wZGYiKSwgcCwgd2lkdGggPSAxNSwgaGVpZ2h0ID0gMikKCmBgYAoKCiMgRmlndXJlIDEgRCBVcHNldCBwbG90CgpgYGB7cn0KIyBwYXRocyB0byBjb25zZW5zdXMgcGVhayBzZXRzIGZvciBhbGwgVEZzCmZmID0gbGFwcGx5KGFiX3RwX2xpc3QsIGZ1bmN0aW9uKGFiX3RwKSBnZXRfcGF0aF9jb25zZW5zdXNfcGVha3NldChhYl90cCkpCgojIHJlYWQgcGVhayBzZXRzCnBlYWtzID0gbGFwcGx5KGZmLCBmdW5jdGlvbihmKSBsb2FkX2NvbnNlbnN1c19wZWFrX3NldChmLCBmaWx0ZXJfY2hyID0gRikpICU+JSBHUmFuZ2VzTGlzdCgpCm5hbWVzKHBlYWtzKSA9IGFiX3RwX2xhYmVscwoKY29tYmluZWRfcGVha3NldCA9IGNvbnN0cnVjdF9jb21iaW5lZF9jb25zZW5zdXNfc2V0KHBlYWtzKQoKZGYgPSBjb21iaW5lZF9wZWFrc2V0WyAsIDY6bmNvbChjb21iaW5lZF9wZWFrc2V0KV0KbmFtZXMoZGYpID0gYWJfdHBfbGFiZWxzCgoKI3twZGYoZmlsZS5wYXRoKG91dGRpcl9maWdfbWFpbiwgIkZpZzFEX1Vwc2V0UGxvdF9mdWxsLnBkZiIpLCB3aWR0aCA9IDcsIGhlaWdodCA9IDUpCgpwID0gdXBzZXQoZGYsICBvcmRlci5ieSA9ICJmcmVxIiwgbmludGVyc2VjdHMgPSAxNSwga2VlcC5vcmRlciA9IFQsIHNldHMgPSByZXYobmFtZXMoZGYpKSwKICAgICAgc2V0cy5iYXIuY29sb3IgPSByZXYoVEZjb2xzKSwgdGV4dC5zY2FsZSA9IDEuNSkKCnByaW50KHApCgpwZGYoZmlsZS5wYXRoKG91dGRpcl9maWdfbWFpbiwgIkZpZzFEX1Vwc2V0UGxvdF9mdWxsLnBkZiIpLCB3aWR0aCA9IDcsIGhlaWdodCA9IDUpCnAKZGV2Lm9mZigpCgpgYGAKCgojIEZpZ3VyZSAxIEUgTnVtYmVyIG9mIHZhcmlhbnRzIHBlciBwZWFrCgpgYGB7cn0KIyBwYXRocyB0byBjb25zZW5zdXMgcGVhayBzZXRzIGZvciBhbGwgVEZzCmZmID0gbGFwcGx5KGFiX3RwX2xpc3QsIGZ1bmN0aW9uKGFiX3RwKSBnZXRfcGF0aF9jb25zZW5zdXNfcGVha3NldChhYl90cCkpCnBlYWtzID0gbGFwcGx5KGZmLCBmdW5jdGlvbihmKSBsb2FkX2NvbnNlbnN1c19wZWFrX3NldChmLCBmaWx0ZXJfY2hyID0gRikgJT4lIGFzLmRhdGEuZnJhbWUoKSkgJT4lIGJpbmRfcm93cygpICU+JSBzZWxlY3QocGVha19pZCwgY29uZGl0aW9uKQoKIyBhbm5vdGF0ZSBieSBudW1iZXIgb2YgdmFyaWFudHMgcGVyIHBlYWsKcGVha3Nfd2l0aF92YXJpYW50cyA9IGdldF9udW1iZXJfb2ZfdmFyaWFudHNfcGVyX3BlYWsoY2h0X2Z1bGwpCgpkZiA9IG1lcmdlKHBlYWtzLCBwZWFrc193aXRoX3ZhcmlhbnRzLCBieSA9IGMoInBlYWtfaWQiLCAiY29uZGl0aW9uIiksIGFsbC54ID0gVCkKZGYkbl92YXJbaXMubmEoZGYkbl92YXIpXSA9IDAKCmRmICU8PiUgbXV0YXRlKGJpbl9uX3ZhciA9IGN1dChuX3ZhciwgYnJlYWtzID0gYygtMSwgMCwgMjAsIDUwLCA4MCwgSW5mKSwgbGFiZWxzID0gYygiMCIsICIxLTIwIiwgIjIxLTUwIiwgIjUxLTgwIiwgIj44MCIpKSkKCmRmX3N1bSA9IGRmICU+JSBncm91cF9ieShjb25kaXRpb24pICU+JSAKICAgICAgICBtdXRhdGUobl90b3QgPSBuKCkpICU+JQogICAgICAgIGdyb3VwX2J5KGNvbmRpdGlvbiwgYmluX25fdmFyKSAlPiUKICAgICAgICBzdW1tYXJpc2Uobl9iaW5fdmFyID0gbigpLCBzaGFyZV9iaW5fdmFyID0gbl9iaW5fdmFyIC8gbWVhbihuX3RvdCkpICU+JQogICAgICAgIG11dGF0ZShiaW5fbl92YXIgPSBmYWN0b3IoYmluX25fdmFyLCBsZXZlbHMgPSByZXYobGV2ZWxzKGJpbl9uX3ZhcikpKSkKCmRmX3N1bSRsYWJlbCA9IGZhY3RvcihhYl90cF9sYWJlbHNbZGZfc3VtJGNvbmRpdGlvbl0sIGxldmVscyA9IHJldihhYl90cF9sYWJlbHMpKQoKcCA9IGdncGxvdChkZl9zdW0sIGFlcyh4ID0gbGFiZWwsIHkgPSBzaGFyZV9iaW5fdmFyLCBmaWxsID0gYmluX25fdmFyKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCB3aWR0aCA9IDAuNikgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGNiUGFsZXR0ZSwgbmFtZSA9ICIjIHZhcmlhbnRzIHBlciBwZWFrIikgKwogIHhsYWIoIiIpICsKICB5bGFiKCJTaGFyZSBvZiBwZWFrcyIpICsKICB0aGVtZV9idygpICsKICB5bGltKGMoMCwgMSkpICsKICBjb29yZF9mbGlwKCkgKwogIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KHNpemU9MTgsIGNvbG9yID0gcmV2KFRGY29scykpLCBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChzaXplPTE2KSwgCiAgICAgICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KHNpemU9MTgpLCBheGlzLnRpdGxlLnkgPSBlbGVtZW50X3RleHQoc2l6ZT0xNiksCiAgICAgICAgbGVnZW5kLnRleHQ9ZWxlbWVudF90ZXh0KHNpemU9MTYpLCBsZWdlbmQudGl0bGU9ZWxlbWVudF90ZXh0KHNpemU9MTYpKQoKCnAKZ2dzYXZlKGZpbGUucGF0aChvdXRkaXJfZmlnX21haW4sICJGaWcxRV9udW1fdnJpYW50c19wZXJfcGVhay5wZGYiKSwgcCwgd2lkdGggPSA4LCBoZWlnaHQgPSAzKQoKYGBg