Add solution to exercise 4 for day 3

b66e163a · Renato Alves · 27b4cae3 · b66e163a
Verified Commit b66e163a authored 4 years ago by Renato Alves
--- a/Day_3/Solutions/Exercise_4_cluster_comparative_analysis_2020.R
+++ b/Day_3/Solutions/Exercise_4_cluster_comparative_analysis_2020.R
+
+#---------------------------------------------------------------------------------------
+# Exercise 4
+# Comparative analysis using ClusterProfiler and custom annotation with enricher
+# go to the subdirectory 
+# /Exercise 4
+#---------------------------------------------------------------------------------------
+# here you will find 2 files:
+# cluster1.txt
+# and
+# cluster2.txt
+# all_gene_disease_associations.tsv
+
+
+# Matt Rogon
+# rogon@embl.de
+
+#source("http://bioconductor.org/biocLite.R") 
+# biocLite("ReactomePA")
+
+library(clusterProfiler)
+library(ReactomePA)
+library(pathview)
+library(org.Hs.eg.db)
+
+#----------------------------------------------------------------------------------------
+# load data - atm and dlblc hit lists (gene symbols)
+setwd("/Users/rogon/Work/01. Teaching/CBNA Courses/2020 CBNA-DeNBI Enrichment course - my materials/Part 2 - R-code session ClusterProfiler, ReactomePA, pathfindR/Exercise 4 (custom ontology with enricher, and multicluster)")
+
+# perform comparative cluster analysis for all clusters in the source file against the custom annotation file
+# compareCluster accepts fun= switch which works in a similar way of passing fun='enrichGO' or fun='enrichKEGG'.
+# you can pass fun='enricher'
+#----------------------------------------------------------------------------------------
+
+
+# load data - cluster 1 and cluster 2 into individual hit lists 
+# the first column contains gene symbols
+# convert them to entrez id's
+# here is an example on how to proceed with bitr function for annotation: 
+# keytypes(org.Hs.eg.db)
+# hit_ids <- bitr(hit_list$gene, fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db")
+
+# create a custom annotation for use with enricher for that we will use the 
+# all_gene_disease_associations.tsv file and create two dataframes from this annotation:
+# 1. disease2gene
+# 2. disease2name
+
+# the first maps disease id to gene id
+# the second maps disease id to disease names
+# as you may imagine you will need the gene id to be entrez for both the hit list and the annotation file
+# first column of the source annotation file is the entrezID
+
+
+# Once all is constructed you will use the compareCluster function to
+# run enricher with the above created annotations
+
+# produce dotplot comparing the two clusters
+
+
+#----------------------------------------------------------------------------------------
+# load clusters and create the input list of lists
+#----------------------------------------------------------------------------------------
+
+atm_hit_list <- read.delim("cluster1.txt", stringsAsFactors=FALSE)
+dlbcl_hit_list <- read.delim("cluster2.txt", stringsAsFactors=FALSE)
+
+#Convert id's to entrez
+# If organism is not supported by AnnotationHub, user can use AnnotationForge to build OrgDb.
+keytypes(org.Hs.eg.db)
+hit_ids <- bitr(atm_hit_list$gene, fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db")
+hit_ids2 <- bitr(dlbcl_hit_list$gene, fromType="SYMBOL", toType=c("ENTREZID"), OrgDb="org.Hs.eg.db")
+
+# entrez lists for each analysis
+hit_list_Entrez <- as.list(hit_ids$ENTREZID)
+hit_list_Entrez <- unique(hit_list_Entrez)
+
+hit_list_Entrez2 <- as.list(hit_ids2$ENTREZID)
+hit_list_Entrez2 <- unique(hit_list_Entrez2)
+#hit_list_Entrez2 <- noquote(hit_list_Entrez2)
+
+hit_list_Entrez_flat <- unlist(hit_list_Entrez)
+hit_list_Entrez2_flat <- unlist(hit_list_Entrez2)
+
+input = list()
+input$X1 <- hit_list_Entrez_flat
+input$X2 <- hit_list_Entrez2_flat
+summary(input)
+
+
+
+
+
+#----------------------------------------------------------------------------------------
+# 
+# Build a custom ontology from the provided annotation file
+# all_gene_disease_associations.tsv
+#----------------------------------------------------------------------------------------
+# Currently clusterProfiler supports user’s own annotations via the enricher and GSEA functions, 
+# which require users provide their own annotation in a data frame 
+#----------------------------------------------------------------------------------------
+# Custom enrichment with clusterProfiler - preparation of 2 data frames
+# TERM2GENE is a data.frame with first column of term ID and second column of corresponding mapped gene 
+# TERM2NAME is a data.frame with first column of term ID and second column of corresponding term name. 
+# TERM2NAME is optional.
+#----------------------------------------------------------------------------------------
+
+require(DOSE)
+require(clusterProfiler)
+
+# load this file and look at the content of the data frame
+# load the annotation file (this is from DisGeNet)
+gda <- read.delim("/Users/rogon/Work/01. Teaching/CBNA Courses/2020 CBNA-DeNBI Enrichment course - my materials/Part 2 - R-code session ClusterProfiler, ReactomePA, pathfindR/Exercise 3 comparing conditions/all_gene_disease_associations.tsv")
+dim(gda)
+View(gda)
+
+# to create a custom annotation file we need to create two dataframes from this annotation
+# 1. disease2gene
+# 2. disease2name
+
+# Let's create the two required dataframes - diseaseID to geneID, and diseaseID to diseaseName
+disease2gene=gda[, c("diseaseId", "geneId")]
+disease2name=gda[, c("diseaseId", "diseaseName")]
+
+
+
+#----------------------------------------------------------------------------------------
+# # Explore the enricher function in ClusterProfiler
+#
+# Build an enrichment against the custom ontology/annotation
+# and compare the input clusters all in one
+#----------------------------------------------------------------------------------------
+# set p-value to 0.05
+# q value to 0.1
+# Benjamini-Hochberg correction
+
+res_go <- compareCluster(input, fun="enricher", TERM2GENE=disease2gene, TERM2NAME=disease2name,pvalueCutoff = 0.05, pAdjustMethod = "BH", qvalueCutoff = 0.1)
+
+# create a dotplot and a cnetplot
+dotplot(res_go, showCategory=10,includeAll=TRUE, title="Custom Disease Enrichment")
+cnetplot(res_go)
+
+
+
+