Commit 1fc65a8f authored by Thomas Schwarzl's avatar Thomas Schwarzl
Browse files

added inputs

parent 693d421d
.Rproj.user
.Rhistory
.RData
.Ruserdata
---
title: "2CRICAnalysis"
author: "Thomas Schwarzl"
output: html_document
---
2C RIC Analysis
# Setup
## Paths
```{r}
output_dir <- file.path("out")
lookup_dir <- file.path("..", "RBPbaseBackend", "input", "Review")
input_dir <- file.path("input")
background_dir <- file.path("..", "RBPbaseBackend", "background")
script_dir <- file.path("..", "RBPbaseServer", "app")
```
Create directories
```{r}
dir.create(output_dir, showWarnings = FALSE)
```
## Install Packages
```{r, message=FALSE}
#require(devtools)
#remotes::install_github("Distue/termEnrichment")
```
## Load libraries
```{r}
if (!require("pacman")) install.packages("pacman")
setRepositories(ind = c(1,2,3,4,5))
suppressPackageStartupMessages({
pacman::p_load(
R6,
tidyverse,
eulerr,
seqsetvis,
RVenn,
ggExtra,
Cairo,
#UpSetR,
RColorBrewer,
stringr,
cowplot,
clusterProfiler,
ReactomePA,
ggupset,
IHW,
org.Mm.eg.db,
openxlsx)
#library(cowplot)
require(termEnrichment)
# theme_set(cowplot::theme_cowplot())
#require(venneuler)
#require(rJava)
})
```
## Load scripts
```{r}
suppressPackageStartupMessages({
source(file.path(script_dir, "annotationFunctions.R"))
source(file.path("..", "RBPbaseAnalysis", "RBPbaseFunctions.R"))
source(file.path("..", "RBPbaseAnalysisMmCelllines", "TermEnrichment.R"))
})
```
## Load data
```{r}
file_table <- file.path(input_dir, "COMPILED_TABLE.Rda")
file_anno <- file.path(input_dir, "ANNO_STUDIES.Rda")
file_studies <- file.path(input_dir, "RIC_STUDIES.Rda")
file_background <- file.path(background_dir, "Sc_Ascencio2019_Background.Rds")
file_lookup <- file.path(lookup_dir, "LOOKUP.rda")
if (!file.exists(file_table) | !file.exists(file_anno) | !file.exists(file_studies)) {
stop("You need to clone the RBPbase project")
}
load(file_table)
load(file_anno)
load(file_studies)
load(file_lookup)
TRIC_Background <- readRDS(file_background)
```
## Graphical setup
```{r}
cbbPalette <- c("#D55E00", "#0072B2", "#888888", "#E69F00", "#009E73", "#F0E442", "#56B4E9", "#CC79A7")
cbbPalette2 <- c("#D55E00", "#F0E442", "#0072B2", "#888888", "#E69F00", "#009E73", "#56B4E9", "#CC79A7")
mytheme <- theme_minimal() + theme(text = element_text(size=20), axis.text.x = element_text(size=20, face = "bold"), plot.title = element_text(size = 25, face = "bold", colour = "black", vjust=-1))
```
## Define study ids and properties
Ids
```{r}
TRIC <- "RBPBASE000000056.1"
LTRIC <- "RBPBASE000000068.1"
STRIC <- "RBPBASE000000069.1"
#selected_studies_ids <- c(TRIC, LTRIC, STRIC)
selected_studies_ids <- c(TRIC)
```
Define organisms which to compare to
```{r}
comparison_studies_organism <- c("Hs", "Mm")
```
```{r}
excluded_comparison_studies <- NULL
```
```{r}
TRAPP_RBP_ids <- c("RBPBASE000000042.1",
"RBPBASE000000043.1",
"RBPBASE000000044.1",
"RBPBASE000000045.1")
```
# Analysis
## Initialize
Create RBPbase Analysis Handler
```{r}
RA <- new_RBPbaseAnalysis(COMPILED_TABLE,
RIC_STUDIES,
ANNO_STUDIES,
selected_studies_ids,
comparison_studies_organism,
excluded_comparison_studies,
enzyme_id = "RBPANNO000000007.1",
hasRBD_id = "RBPANNO000000019.1",
metabolic_enzyme_id = "RBPANNO000000036.1",
knownRBPs_study_ids = c("RBPANNO000000040.1", # GO
"RBPANNO000000033.1" #RNAcompete
),
DEBUG = FALSE,
update = TRUE)
RA_but_TRAPP <- new_RBPbaseAnalysis(COMPILED_TABLE,
RIC_STUDIES,
ANNO_STUDIES,
selected_studies_ids,
comparison_studies_organism,
TRAPP_RBP_ids, # exclude
enzyme_id = "RBPANNO000000007.1",
hasRBD_id = "RBPANNO000000019.1",
metabolic_enzyme_id = "RBPANNO000000036.1",
knownRBPs_study_ids = c("RBPANNO000000040.1", # GO
"RBPANNO000000033.1" #RNAcompete
),
DEBUG = FALSE,
update = TRUE)
```
```{r}
# Custom selection for Sc studies wanted by collaborators
anySc_id <- RA$add_annotation(name = "any_Sc",
values =
RA$get_studies_only_table(count_comparison_hits = T,
selected_studies = F,
comparison_studies = F,
main_organism_studies = T) %>%
mutate(property = number_hits_compare > 0) %>%
pull(property) )
# Custom selection for Sc studies wanted by collaborators
#
anyScExpTRAPP_id <- RA$add_annotation(name = "any_Sc",
values =
RA_but_TRAPP$get_studies_only_table(count_comparison_hits = T,
selected_studies = F,
comparison_studies = F,
main_organism_studies = T) %>%
mutate(property = number_hits_compare > 0) %>%
pull(property) )
# Custom selection for MM or Hs studies wanted by collaborators
anyScMmHs_id <- RA$add_annotation(name = "any_MmHs",
values =
RA$get_studies_only_table(count_comparison_hits = T,
selected_studies = F,
comparison_studies = T,
main_organism_studies = T) %>%
mutate(property = number_hits_compare > 0) %>%
mutate(property = ifelse(is.na(property), F, property)) %>%
pull(property) )
RA$set_anyRIC_id(id = anyScMmHs_id, update = TRUE)
```
## Add custom annotations
### Found in any TRIC study
```{r}
# TRIC <- RA$add_annotation(name = "cell_line_studies",
# values =
# RA$get_main_table() %>%
# mutate(property = !!sym(brain_id) |
# !!sym(kidney_id) |
# !!sym(liver_id)) %>%
# pull(property) )
```
### TRAPP
```{r}
TRAPP_desc <- "TRAPP"
TRAPP_id <- RA$add_annotation(name = TRAPP_desc,
values =
RA$get_main_table() %>%
mutate(property = RBPBASE000000042.1 |
RBPBASE000000043.1 |
RBPBASE000000044.1 |
RBPBASE000000045.1
) %>%
pull(property) )
```
### Found in more than 20 studies
```{r}
more20_id <- RA$add_annotation(
name = "found_in_more_than_20",
values =
RA$get_studies_only_table(count_comparison_hits = T,
selected_studies = T,
comparison_studies = T,
main_organism_studies = T) %>%
mutate(property = number_hits_compare >= 20) %>%
pull(property)
)
```
### Found in more than 30 studies
```{r}
more30_id <- RA$add_annotation(
name = "found_in_more_than_30",
values =
RA$get_studies_only_table(count_comparison_hits = T,
selected_studies = T,
comparison_studies = T,
main_organism_studies = T) %>%
mutate(property = number_hits_compare >= 30) %>%
pull(property)
)
```
## Comparison between the cell line studies
```{r}
# RA$save_venn(selection_ids = RA$selected_studies_ids,
# names = c("Brain", "Kidney", "Liver"),
# file_root = file.path(output_dir, "primary_venn"),
# fill = cbbPalette[c(6,7,3)])
```
## Comparison with other studies
### Upset for mouse studies
ggupset plot for studies
```{r}
p <- RA$plot_upset_for_studies(comparison_studies = F,
main_organism_studies = T,
descriptive_names = T,
n_intersections = 100) +
geom_text(stat = 'count',
aes(label = ..count..),
size = 2.3,
vjust = 0)
cowplot::save_plot(filename = file.path(output_dir,
"upset.pdf"),
plot = p,
base_aspect_ratio = 3.6,
base_height = 4)
p
```
### Hits in RIC studies
#### Histogram
```{r}
x <- RA$get_studies_only_table(count_comparison_hits = T,
selected_studies = T,
comparison_studies = T,
main_organism_studies = T,
additional_ids = c(TRIC,
RA$get_found_in_at_least_id()))
x <- x %>% dplyr::filter(!!sym(TRIC) | number_hits_compare > 0)
#x <- x %>% mutate(number_hits_compare = ifelse(!!sym(TRIC),
# -number_hits_compare,
# number_hits_compare))
```
```{r}
p <- x %>% ggplot(aes(x = number_hits_compare,
fill = !!sym(TRIC))) +
geom_histogram(bins = 30) +
guides(fill=guide_legend(title="RBP found in\nTRIC")) +
scale_fill_manual(values = cbbPalette[c(3,2)]) +
xlab("# hits in RIC studies") + theme_minimal()
p
```
```{r}
p <- x %>% filter(!(!!sym(TRIC))) %>%
ggplot(aes(x = number_hits_compare),
fill = cbbPalette[3]) +
geom_histogram(aes(x = number_hits_compare,
y = ..count..),
bins = 32) +
geom_histogram(data = x %>% filter((!!sym(TRIC))),
aes(x = number_hits_compare, y = -..count..),
bins = 32, fill = cbbPalette[2]) +
guides(fill=guide_legend(title="RBP found in\nTRIC")) +
xlab("# hits in RIC studies") + theme_minimal()
p
```
```{r}
cowplot::save_plot(p, filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies.pdf"))
cowplot::save_plot(p + coord_cartesian(ylim = c(0,300)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_y300.pdf"))
cowplot::save_plot(p + coord_cartesian(ylim = c(0,100)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_y100.pdf"))
cowplot::save_plot(p + coord_cartesian(xlim = c(10,33), ylim = c(-25,140)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_x10.pdf"))
cowplot::save_plot(p + coord_cartesian(xlim = c(15,33), ylim = c(-25,50)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_x15.pdf"))
```
```{r}
# p <- x %>% ggplot(aes(x = number_hits_compare,
# fill = !!sym(TRIC))) +
# facet_grid(~CANNO_cell_line_studies) + #### HERE
# geom_histogram(bins = 30) +
# guides(fill=guide_legend(title="RBP found in\nTRIC")) +
# scale_fill_manual(values = cbbPalette[c(3,2)]) +
# xlab("# hits in RIC studies") + theme_minimal()
#
#
# cowplot::save_plot(p, filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_side.pdf"))
#
# cowplot::save_plot(p + coord_cartesian(ylim = c(0,500)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_side_max500.pdf"))
#
# cowplot::save_plot(p + coord_cartesian(ylim = c(0,100)), filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_side_max100.pdf"))
#
# p
```
#### Barplot of non-hits
```{r}
p <- x %>% filter(!(!!sym(TRIC))) %>%
dplyr::select(UNIQUE, any_of(RA$get_found_in_at_least_id())) %>%
dplyr::filter(!is.na(!!sym(RA$get_found_in_at_least_id()))) %>%
ggplot(aes( x = !!sym(RA$get_found_in_at_least_id()) ))+
geom_bar(position = "dodge") +
guides(fill = guide_legend(title="RBP found in\nTRIC")) +
scale_fill_manual(values = cbbPalette[c(3,2,1,4)]) +
xlab("# hits in RIC studies") + theme_minimal() + geom_text(stat = 'count',
aes(label = ..count..),
position = position_stack(vjust = 0.5),
size = 5.5)
p
cowplot::save_plot(p, filename = file.path(output_dir, "non_TRIC_in_MmHs_ric_studies_barplot.pdf"), base_aspect_ratio = 2)
cowplot::save_plot(p, filename = file.path(output_dir, "non_TRIC_in_MmHs_ric_studies_barplot.png"), base_aspect_ratio = 2)
p
```
#### Save IDs and numbers
```{r}
hits_background <- x %>% filter(!(!!sym(TRIC))) %>% dplyr::select(UNIQUE, number_hits_compare) %>% arrange(desc(number_hits_compare))
write.xlsx(file = file.path(output_dir, "hits_non_TRIC.xlsx"),
x = hits_background)
hits_foreground <- x %>% filter((!!sym(TRIC))) %>%
dplyr::select(UNIQUE, any_of(selected_studies_ids), number_hits_compare) %>%
arrange(desc(number_hits_compare))
write.xlsx(file = file.path(output_dir, "hits_TRIC.xlsx"),
x = hits_foreground)
```
#### Barplot of TRIC
```{r}
p <- x %>%
dplyr::select(any_of(c(selected_studies_ids,
RA$get_found_in_at_least_id()))) %>%
gather(selected_studies_ids,
key = "studies",
value = "found") %>%
dplyr::filter(found) %>%
dplyr::filter(!is.na(!!sym(RA$get_found_in_at_least_id()))) %>%
mutate(studies = RA$get_descriptiveid_for_rbpbaseids(studies)) %>%
ggplot(aes(fill = studies,
x = !!sym(RA$get_found_in_at_least_id()) )) +
#facet_grid(~CANNO_cell_line_studies) +
geom_bar(position = "dodge") +
guides(fill = guide_legend(title="RBP found in\nTRIC")) +
scale_fill_manual(values = cbbPalette[c(3,2,1,4)]) +
xlab("# hits in RIC studies") + theme_minimal()
cowplot::save_plot(p, filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_barplot.pdf"), base_aspect_ratio = 2)
cowplot::save_plot(p, filename = file.path(output_dir, "TRIC_in_ScMmHs_ric_studies_barplot.png"), base_aspect_ratio = 2)
p
```
### Overlap TRIC with any Sc study
First we add a custom annotation for merged studies
```{r}
RA$save_venn(selection_ids = c(TRIC, RA$get_anyRIC_id()),
names = c("TRIC", "any Sc, Mm, Hs"),
file_root = file.path(output_dir, "TRIC_ScMmHs_venn"),
fill = cbbPalette[c(6,7,3)])
```
## Comparison of TRIC with TRAPP
### with mouse studies - venn
```{r}
RA$save_venn(selection_ids = c(TRIC, anyScExpTRAPP_id, TRAPP_id),
names = c("TRIC", "any Sc RIC", "any TRAPP"),
file_root = file.path(output_dir, "TRIC_anySc_TRAPP"),
fill = cbbPalette[c(6,7,3)])
```
### with mouse studies - upset
```{r}
# p <- RA$plot_upset(ids = c(only_kidney_id,
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# descriptive_names = T,
# n_intersections = 100)
#
#
# cowplot::save_plot(filename = file.path(output_dir,
# "kidney_specific_Mm_upset.pdf"),
# plot = p,
# base_aspect_ratio = 2,
# base_height = 4)
#
# p
```
Showing heatmap for only kidney specific
```{r}
# x <- RA$get_main_table(selection_ids = c(only_kidney_id,
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# descriptive_names = T)
#
# v <- RA$get_venn_data(selection_ids = c(
# RA$get_studies_only_ids(
# selected_studies = F,
# comparison_studies = F,
# main_organism_studies = T)
# ),
# filter = x$kidney_specific)
```
```{r}
# pdf(file.path(output_dir,
# "kidney_specific_Mm_heatmap.pdf"))
# setmap(v, method="ward.D2")
# dev.off()
```
### with proteins containing RNA-binding domains
```{r}
RA$save_venn(selection_ids = c(TRIC, anySc_id, RA$get_hasRBD_id()),
names = c("TRIC", "any Sc", "RB domain"),
file_root = file.path(output_dir, "TRIC_RBD"),
fill = cbbPalette[c(6,7,3)])
```
## Comparison of primary with annotation
### known RNA-binding domains
```{r}
p <- RA$plot_barplot(id = RA$get_hasRBD_id(),
col = cbbPalette[c(3,4,7,1)]) +
guides(fill = guide_legend(title="known RBD")) +
theme(legend.title = element_text(size = 15))
cowplot::save_plot(filename = file.path(output_dir,
"TRIC_RBD_barplot.pdf"),
plot = p,
base_aspect_ratio = 1.9)
p
```
### Enzyme
```{r}
p <- RA$plot_barplot(id = RA$get_enzyme_desc_id(),
col = cbbPalette[c(3,4,7,1)]) +
#guides(fill = guide_legend(title="enzyme")) +
theme(legend.title = element_text(size = 15))
cowplot::save_plot(filename = file.path(output_dir,
"TRIC_enzyme_barplot.pdf"),
plot = p,
base_aspect_ratio = 1.9)
p
```
### known RBPs
```{r}
p <- RA$plot_barplot(id = RA$get_knownRBPs_id(),
col = cbbPalette[c(3,7)]) +
guides(fill = guide_legend(title="known RBPs")) +
theme(legend.title = element_text(size = 15))
cowplot::save_plot(filename = file.path(output_dir,
"TRIC_knownRBPs_barplot.pdf"),
plot = p,
base_aspect_ratio = 1.9)
p
```
### Any RIC
```{r}
p <- RA$plot_barplot(id = anySc_id,
col = cbbPalette[c(3,1)]) +
guides(fill = guide_legend(title="found in any Sc RIC")) +
theme(legend.title = element_text(size = 15))
cowplot::save_plot(filename = file.path(output_dir,
"TRIC_anyScRIC_barplot.pdf"),
plot = p,
base_aspect_ratio = 1.9)
p
```
### known vs RIC
```{r}
p <- RA$plot_barplot(id = RA$get_known_RIC_desc_id(),
col = cbbPalette[c(2,1,4,3)]) +
guides(fill = guide_legend(title="")) +
theme(legend.title = element_text(size = 15))
cowplot::save_plot(filename = file.path(output_dir,
"TRIC_known_vs_anyRIC_barplot.pdf"),
plot = p,
base_aspect_ratio = 1.9)
p