Commit 74cc60e6 authored by Thomas Schwarzl's avatar Thomas Schwarzl
Browse files

update ricbase files,

enrichment
parent 1fc65a8f
......@@ -6,6 +6,17 @@ output: html_document
2C RIC Analysis
There are two 2C-RIC runs. The first one with
is RBPBASE000000056.1 Sc_Asencio-2C Sc_2C-TRIC
The second one is
RBPBASE000000068.1 Sc_Asencio-2C-TRIC-p Sc_2C-TRIC-p
RBPBASE000000069.1 Sc_Asencio-2C-SRIC-p Sc_2C-SRIC-p
# Setup
## Paths
......@@ -14,7 +25,6 @@ output: html_document
output_dir <- file.path("out")
lookup_dir <- file.path("..", "RBPbaseBackend", "input", "Review")
input_dir <- file.path("input")
background_dir <- file.path("..", "RBPbaseBackend", "background")
script_dir <- file.path("..", "RBPbaseServer", "app")
```
......@@ -53,7 +63,7 @@ suppressPackageStartupMessages({
ReactomePA,
ggupset,
IHW,
org.Mm.eg.db,
org.Sc.sgd.db,
openxlsx)
#library(cowplot)
require(termEnrichment)
......@@ -80,8 +90,9 @@ suppressPackageStartupMessages({
file_table <- file.path(input_dir, "COMPILED_TABLE.Rda")
file_anno <- file.path(input_dir, "ANNO_STUDIES.Rda")
file_studies <- file.path(input_dir, "RIC_STUDIES.Rda")
file_background <- file.path(background_dir, "Sc_Ascencio2019_Background.Rds")
file_lookup <- file.path(lookup_dir, "LOOKUP.rda")
file_background <- file.path(input_dir, "Sc_Ascencio2019_Background.Rds")
file_short_long_background <- file.path(input_dir, "Sc_Ascencio2020_short_long_Background.Rds")
#file_lookup <- file.path(lookup_dir, "LOOKUP.rda")
if (!file.exists(file_table) | !file.exists(file_anno) | !file.exists(file_studies)) {
stop("You need to clone the RBPbase project")
......@@ -90,8 +101,9 @@ if (!file.exists(file_table) | !file.exists(file_anno) | !file.exists(file_studi
load(file_table)
load(file_anno)
load(file_studies)
load(file_lookup)
#load(file_lookup)
TRIC_Background <- readRDS(file_background)
TRIC_SHORT_LONG_Background <- readRDS(file_short_long_background)
```
......@@ -109,15 +121,12 @@ mytheme <- theme_minimal() + theme(text = element_text(size=20), axis.text.x =
Ids
```{r}
TRIC <- "RBPBASE000000056.1"
LTRIC <- "RBPBASE000000068.1"
STRIC <- "RBPBASE000000069.1"
TRIC <- "RBPBASE000000056.1"
TRIC_2 <- "RBPBASE000000068.1"
sTRIC_2 <- "RBPBASE000000069.1"
#selected_studies_ids <- c(TRIC, LTRIC, STRIC)
selected_studies_ids <- c(TRIC)
selected_studies_ids <- c(TRIC, TRIC_2, sTRIC_2)
```
Define organisms which to compare to
......@@ -191,10 +200,6 @@ anySc_id <- RA$add_annotation(name = "any_Sc",
pull(property) )
# Custom selection for Sc studies wanted by collaborators
#
anyScExpTRAPP_id <- RA$add_annotation(name = "any_Sc",
values =
RA_but_TRAPP$get_studies_only_table(count_comparison_hits = T,
......@@ -222,24 +227,117 @@ RA$set_anyRIC_id(id = anyScMmHs_id, update = TRUE)
## Add custom annotations
is RBPBASE000000056.1 Sc_Asencio-2C Sc_2C-TRIC
The second one is
RBPBASE000000068.1 Sc_Asencio-2C-TRIC-p Sc_2C-TRIC-p
RBPBASE000000069.1 Sc_Asencio-2C-SRIC-p Sc_2C-SRIC-p
### any_2C
```{r}
any2C_id <- RA$add_annotation(name = "any_2C",
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000068.1 |
RBPBASE000000056.1 |
RBPBASE000000069.1
) %>%
pull(property) )
```
### any_2CTRIC
```{r}
anyTRIC_id <- RA$add_annotation(name = "any_2C_TRIC",
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000068.1 |
RBPBASE000000056.1
) %>%
pull(property) )
```
### only short 2C run2
```{r}
onlySRIC <- RA$add_annotation(name = "only_2C_SRIC",
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000069.1 &
!RBPBASE000000068.1
) %>%
pull(property) )
```
### only 2C TRIC run2 compared to 2C SRIC 2 run 2
```{r}
onlyTRIC2 <- RA$add_annotation(name = "only_2C_TRIC2",
values =
RA$get_main_table() %>%
mutate(property = !RBPBASE000000069.1 &
RBPBASE000000068.1
) %>%
pull(property) )
```
### any_2CTRIC2
### Found in any TRIC study
```{r}
# TRIC <- RA$add_annotation(name = "cell_line_studies",
# values =
# RA$get_main_table() %>%
# mutate(property = !!sym(brain_id) |
# !!sym(kidney_id) |
# !!sym(liver_id)) %>%
# pull(property) )
any2C_run2 <- RA$add_annotation(name = "2C_TRIC_SRIC_run2",
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000068.1 |
RBPBASE000000069.1
) %>%
pull(property) )
```
### Background 2C TRIC run 1
```{r}
background_run1 <- RA$add_annotation(name = "background_2C_run1",
values =
RA$get_main_table() %>%
mutate(property = ID %in% TRIC_Background$GENEID
) %>%
pull(property) )
```
### Background 2C TRIC run2
```{r}
background_run2 <- RA$add_annotation(name = "background_2C_run2",
values =
RA$get_main_table() %>%
mutate(property = ID %in% TRIC_SHORT_LONG_Background$GENEID
) %>%
pull(property) )
```
### TRAPP
```{r}
TRAPP_desc <- "TRAPP"
TRAPP_id <- RA$add_annotation(name = TRAPP_desc,
TRAPP_id <- RA$add_annotation(name = "TRAPP",
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000042.1 |
......@@ -251,6 +349,8 @@ TRAPP_id <- RA$add_annotation(name = TRAPP_desc,
```
### Found in more than 20 studies
```{r}
......@@ -484,16 +584,36 @@ p
First we add a custom annotation for merged studies
```{r}
RA$save_venn(selection_ids = c(TRIC, RA$get_anyRIC_id()),
names = c("TRIC", "any Sc, Mm, Hs"),
RA$save_venn(selection_ids = c(TRIC, RA$get_anyRIC_id()),
names = c("2C TRIC", "any Sc, Mm, Hs"),
file_root = file.path(output_dir, "TRIC_ScMmHs_venn"),
fill = cbbPalette[c(6,7,3)])
```
```{r}
RA$save_venn(selection_ids = c(TRIC, any2C_run2, RA$get_anyRIC_id()),
names = c("2C TRIC", "2C TRIC/SRIC", "any Sc, Mm, Hs"),
file_root = file.path(output_dir, "TRIC_TRIC2_ScMmHs_venn"),
fill = cbbPalette[c(6,7,3)])
```
```{r}
RA$save_venn(selection_ids = c(TRIC, TRIC_2, sTRIC_2),
names = c("2C TRIC(1)", "2C TRIC(2)", "2C RIC(2)"),
file_root = file.path(output_dir, "TRIC1_TRIC2_SRIC2"),
fill = cbbPalette[c(6,7,3)])
```
```{r}
RA$save_euler(selection_ids = c(TRIC, TRIC_2, sTRIC_2),
names = c("2C TRIC(1)", "2C TRIC(2)", "2C RIC(2)"),
file_root = file.path(output_dir, "TRIC1_TRIC2_SRIC2_euler"),
fill = cbbPalette[c(6,7,3)])
```
## Comparison of TRIC with TRAPP
### with mouse studies - venn
### TRIC / TRAPP / any Sc
```{r}
RA$save_venn(selection_ids = c(TRIC, anyScExpTRAPP_id, TRAPP_id),
......@@ -502,62 +622,31 @@ RA$save_venn(selection_ids = c(TRIC, anyScExpTRAPP_id, TRAPP_id),
fill = cbbPalette[c(6,7,3)])
```
### with mouse studies - upset
### Short
```{r}
# p <- RA$plot_upset(ids = c(only_kidney_id,
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# descriptive_names = T,
# n_intersections = 100)
#
#
# cowplot::save_plot(filename = file.path(output_dir,
# "kidney_specific_Mm_upset.pdf"),
# plot = p,
# base_aspect_ratio = 2,
# base_height = 4)
#
# p
RA$save_venn(selection_ids = c(TRIC_2, sTRIC_2, TRAPP_id),
names = c("2C TRIC(2)", "2C SRIC(2)", "any TRAPP"),
file_root = file.path(output_dir, "TRIC2_SRIC_TRAPP"),
fill = cbbPalette[c(6,7,3)])
```
Showing heatmap for only kidney specific
### combined total
```{r}
# x <- RA$get_main_table(selection_ids = c(only_kidney_id,
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# descriptive_names = T)
#
# v <- RA$get_venn_data(selection_ids = c(
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# filter = x$kidney_specific)
RA$save_venn(selection_ids = c(anyTRIC_id, sTRIC_2, TRAPP_id),
names = c("2C TRIC(1/2)", "2C SRIC(2)", "any TRAPP"),
file_root = file.path(output_dir, "TRIC12_SRIC_TRAPP_venn"),
fill = cbbPalette[c(6,7,3)])
```
```{r}
# pdf(file.path(output_dir,
# "kidney_specific_Mm_heatmap.pdf"))
# setmap(v, method="ward.D2")
# dev.off()
RA$save_euler(selection_ids = c(anyTRIC_id, sTRIC_2, TRAPP_id),
names = c("2C TRIC(1/2)", "2C SRIC(2)", "any TRAPP"),
file_root = file.path(output_dir, "TRIC12_SRIC_TRAPP_euler"),
fill = cbbPalette[c(6,7,3)])
```
### with proteins containing RNA-binding domains
```{r}
......@@ -567,7 +656,13 @@ RA$save_venn(selection_ids = c(TRIC, anySc_id, RA$get_hasRBD_id()),
fill = cbbPalette[c(6,7,3)])
```
```{r}
RA$save_venn(selection_ids = c(anyTRIC_id, sTRIC_2, RA$get_hasRBD_id()),
names = c("2C-TRIC(1/2)", "2C-STRIC(2)", "RB domain"),
file_root = file.path(output_dir, "2CRIC_RBD_Venn"),
fill = cbbPalette[c(6,7,3)])
```
## Comparison of primary with annotation
......@@ -671,29 +766,26 @@ cowplot::save_plot(filename = file.path(output_dir,
p
```
## Comparison of
```{r}
```
## Enrichment Analysis
TRIC_Background
TRIC_SHORT_LONG_Background
### Define Inputs
Helpfer function to retreive IDs
Helpfer function to retrieve IDs
```{r}
get_ids = function(x, colfilter, type = "Ensembl") {
id = "ID"
if (type == "Entrez") {
id = "RBPANNO000000066.1" # Mouse Entrez Gene IDs
}
if(type == "Entrez") {
id = "RBPANNO000000068.1"
}
x %>%
dplyr::filter(!!sym(colfilter)) %>%
......@@ -702,10 +794,10 @@ get_ids = function(x, colfilter, type = "Ensembl") {
unlist %>%
sort %>%
unique
}
```
background_run1
#### As Ensembl IDs
......@@ -714,63 +806,64 @@ get_ids = function(x, colfilter, type = "Ensembl") {
#source("TermEnrichment.R")
EHE <- EnrichmentHandler$new()
EHE$add_comparison_manually("kidney",
EHE$add_comparison_manually("SRIC",
get_ids(RA$get_main_table(), sTRIC_2),
get_ids(RA$get_main_table(), background_run2))
EHE$add_comparison_manually("TRIC2",
get_ids(RA$get_main_table(), TRIC_2),
get_ids(RA$get_main_table(), background_run2))
EHE$add_comparison_manually("TRIC",
get_ids(RA$get_main_table(), TRIC),
TRIC_Background$GENEID)
#get_ids(RA$get_main_table(), kidney_id),
#get_ids(RA$get_main_table(), wcl_kidney_id))
get_ids(RA$get_main_table(), background_run1))
EHE$add_comparison_manually("onlyTRIC",
get_ids(RA$get_main_table(), onlyTRIC2),
get_ids(RA$get_main_table(), background_run2))
EHE$add_comparison_manually("onlySRIC",
get_ids(RA$get_main_table(), onlySRIC),
get_ids(RA$get_main_table(), background_run2))
EHE$write_comparisons(path = output_dir)
```
#### As Entrez / NCBI Gene IDs
```{r}
source("TermEnrichment.R")
EHN <- EnrichmentHandler$new()
EHN$add_comparison_manually("kidney",
get_ids(RA$get_main_table(), kidney_id, type = "Entrez"),
get_ids(RA$get_main_table(), wcl_kidney_id, type = "Entrez"))
EHN$add_comparison_manually("SRIC",
get_ids(RA$get_main_table(), sTRIC_2, type="Entrez"),
get_ids(RA$get_main_table(), background_run2, type="Entrez"))
EHN$add_comparison_manually("TRIC2",
get_ids(RA$get_main_table(), TRIC_2, type="Entrez"),
get_ids(RA$get_main_table(), background_run2, type="Entrez"))
EHN$add_comparison_manually("TRIC",
get_ids(RA$get_main_table(), TRIC, type="Entrez"),
get_ids(RA$get_main_table(), background_run1, type="Entrez"))
EHN$add_comparison_manually("onlyTRIC",
get_ids(RA$get_main_table(), onlyTRIC2, type="Entrez"),
get_ids(RA$get_main_table(), background_run2, type="Entrez"))
EHN$add_comparison_manually("onlySRIC",
get_ids(RA$get_main_table(), onlySRIC, type="Entrez"),
get_ids(RA$get_main_table(), background_run2, type="Entrez"))
EHN$add_comparison_manually("brain",
get_ids(RA$get_main_table(), brain_id, type = "Entrez"),
get_ids(RA$get_main_table(), wcl_brain_id, type = "Entrez")
)
EHN$add_comparison_manually("liver",
get_ids(RA$get_main_table(), liver_id, type = "Entrez"),
get_ids(RA$get_main_table(), wcl_liver_id, type = "Entrez")
)
EHN$add_comparison_manually("more20",
get_ids(RA$get_main_table() %>% filter(!!sym(anyMm_id)), more20_id, type = "Entrez"),
get_ids(RA$get_main_table(), TRIC, type = "Entrez")
)
#EHN$add_enrichment(ReactomePAEnrichment$new(organism = "mouse"))
#EHN$options$verbose <- T
#EHN$run()
#EHN
EHN$write_comparisons(path = output_dir)
```
### KEGG enrichment
#### Tissue comparison
#### Multiple
```{r}
kegg_cluster <- compareCluster(geneCluster = list(
Brain = EHN$get_comparison("brain")$get_foreground(),
Kidney = EHN$get_comparison("kidney")$get_foreground(),
Liver = EHN$get_comparison("liver")$get_foreground()),
organism = 'mmu',
# universe = c(EHN$get("brain")$get_universe() ,
# EHN$get("kidney")$get_universe(),
# EHN$get("liver")$get_universe()) %>% sort %>% unique,
keyType = 'ncbi-geneid',
SRIC = EHE$get_comparison("SRIC")$get_foreground(),
TRIC2 = EHE$get_comparison("TRIC2")$get_foreground(),
TRIC = EHE$get_comparison("TRIC")$get_foreground()),
organism = 'sce',
#keyType = 'ncbi-geneid',
pAdjustMethod = "BH",
fun = "enrichKEGG")
p <- dotplot(kegg_cluster)
......@@ -783,9 +876,9 @@ kegg_cluster <- compareCluster(geneCluster = list(
Perform enrichment for all comparisons
```{r}
for (i in names(EHN$get_comparison_names())) {
RK <- KEGGClusterProfilerEnrichment$new(organism = "mmu")
RK$set_input(EHN$get_comparison(i))
for (i in EHE$get_comparison_names()) {
RK <- KEGGClusterProfilerEnrichment$new(organism = "sce")
RK$set_input(EHE$get_comparison(i))
RK$run_enrichment()
RK$plot_enrichment()
RK$save_enrichment(path = output_dir)
......@@ -795,27 +888,29 @@ for (i in names(EHN$get_comparison_names())) {
#### Translate IDs
TODO
```{r}
files_kegg_tables <- list.files(output_dir, pattern = "_kegg_table.xlsx")
# files_kegg_tables <- list.files(output_dir, pattern = "_kegg_table.xlsx")
```
```{r}
KEGGresults_translated <- lapply(files_kegg_tables, function(input_file) {
y <- read.xlsx(file.path(output_dir, input_file))
y <- y %>% as.data.frame %>% mutate(gene_name = lapply(geneID, function(x) {
i <- strsplit(x, split = "/") %>% unlist %>% as.numeric
LOOKUP[["Mm_EnsemblID_Entrez"]] %>%
dplyr::filter(Entrez %in% i) %>%
left_join(y = LOOKUP[["Mm_EnsemblID_Name_Description"]], by = c("ID" = "ID")) %>%
.[["Name"]] %>% paste0(collapse = "|")
}))
new_file <- gsub(input_file, pattern = ".xlsx$", replacement = "_translated.xlsx")
write.xlsx(y, file = file.path(output_dir, new_file))
})
# KEGGresults_translated <- lapply(files_kegg_tables, function(input_file) {
# y <- read.xlsx(file.path(output_dir, input_file))
#
# y <- y %>% as.data.frame %>% mutate(gene_name = lapply(geneID, function(x) {
# i <- strsplit(x, split = "/") %>% unlist %>% as.numeric
# LOOKUP[["Mm_EnsemblID_Entrez"]] %>%
# dplyr::filter(Entrez %in% i) %>%
# left_join(y = LOOKUP[["Mm_EnsemblID_Name_Description"]], by = c("ID" = "ID")) %>%
# .[["Name"]] %>% paste0(collapse = "|")
# }))
#
# new_file <- gsub(input_file, pattern = ".xlsx$", replacement = "_translated.xlsx")
#
# write.xlsx(y, file = file.path(output_dir, new_file))
# })
```
......@@ -826,11 +921,12 @@ KEGGresults_translated <- lapply(files_kegg_tables, function(input_file) {
Perform enrichment for all comparisons
```{r}
for (i in names(EHN$get_comparison_names())) {
RE <- ReactomePAEnrichment$new(organism = "mouse")
for (i in EHN$get_comparison_names()) {
RE <- ReactomePAEnrichment$new(organism = "yeast", readable = FALSE)
RE$set_input(EHN$get_comparison(i))
RE$run_enrichment()
RE$save_enrichment(path = output_dir)
#RE$plot_enrichment() # TODO TO IMPLEMENT
#RE$save_enrichment(path = output_dir) # TODO TO IMPLEMENT
}
#RE <- ReactomePAEnrichment$new(organism = "mouse")
......@@ -844,43 +940,37 @@ for (i in names(EHN$get_comparison_names())) {
```{r}
for (gotype in c("BP", "CC", "MF")) {
for (i in EHE$get_comparison_names()) {
RO <- GOProfilerEnrichment$new(org_db = org.Mm.eg.db,
ont = gotype)
RO <- GOProfilerEnrichment$new(org_db = org.Sc.sgd.db::org.Sc.sgd.db,
ont = gotype,
readable = FALSE)
RO$set_input(EHE$get_comparison(i))
RO$run_enrichment()
RO$save_enrichment(path = output_dir)
}
}
RO <- GOProfilerEnrichment$new(org_db = org.Mm.eg.db,
ont = "BP")
RO$set_input(EHE$get_comparison("brain"))
RO$run_enrichment()
RO$plot_enrichment()
RO$enrichment
#RO$save_enrichment(path = output_dir)
#
#
emapplot(RO$enrichment)
#emapplot(RO$enrichment)
#ridgeplot(RO$enrichment)
```
#### Tissue comparison
#### Comparison
```{r}
for(i in c("BP", "CC", "MF")) {
GO_cluster <- compareCluster(geneCluster = list(
Brain = EHE$get_comparison("brain")$get_foreground(),
Kidney = EHE$get_comparison("kidney")$get_foreground(),
Liver = EHE$get_comparison("liver")$get_foreground()),
onlySRIC = EHE$get_comparison("onlySRIC")$get_foreground(),
onlyTRIC = EHE$get_comparison("onlyTRIC")$get_foreground()),
#universe = c(EHE$get_comparison("brain")$get_universe() ,
# EHE$get_comparison("kidney")$get_universe(),
# EHE$get_comparison("liver")$get_universe()) %>% sort %>% unique,
pAdjustMethod = "BH",
OrgDb = org.Mm.eg.db,
OrgDb = org.Sc.sgd.db::org.Sc.sgd.db,
keyType = "ENSEMBL",
ont = i,
fun = "enrichGO")
......@@ -891,115 +981,11 @@ for(i in c("BP", "CC", "MF")) {
```
### Domain Enrichment
#### Mouse Mine results
```{r}
for (i in c("brain", "liver", "kidney", "more20")) {
x <- read_tsv(file.path( "domain", paste0(i, ".txt")),
col_names = c("domain", "padj", "domainlist", "ID"),
col_types = "cdcc") %>% mutate(domain = factor(domain, level = rev(domain)))
p <- x %>%
ggplot(aes(y = domain, x = -log10(padj), fill = padj)) + geom_bar(stat = "identity") +
ggtitle(i) + ylab("") + theme_minimal()
cowplot::save_plot(p, filename = file.path(output_dir, paste0("domain_", i ,".pdf")))
cowplot::save_plot(p, filename = file.path(output_dir, paste0("domain_", i ,".png")))
}
```
# Session
#### Mouse Mine results - combined
```{r}
mouse_mine <- list()
for (i in c("brain", "liver", "kidney")) {
mouse_mine[[i]] <- read_tsv(file.path( "domain", paste0(i, ".txt")),
col_names = c("domain", "padj", "domainlist", "ID"),
col_types = "cdcc") %>% mutate(domain = factor(domain, level = rev(domain))) %>%
mutate(cell_line = i)
}
library(RColorBrewer)
mouse_mine_combined <- do.call(bind_rows, mouse_mine)