#---------------------------------------------------------------------------------------
# Exercise 2
#---------------------------------------------------------------------------------------

# Given a list of hits and a background in folder /Exercise 2
# background.txt and id_list.txt

# perform enrichment analysis against 
# 1. Gene Ontology,
# 2. Kegg modules, and
# 3. DisGeNet (diseases)

# For each result produce a cnetplot and an upsetplot.
# - you will need to provide the correct id's for the functions to execute - use converters
# - vary the p-value q-value thresholds in case of an empty result
# - make the disgenet output readable (gene names, not entrez id's)


background <- read.csv("/Users/rogon/Work/01. Teaching/CBNA Courses/2020 Online Course Series/2020 CBNA-DeNBI Enrichment course - my materials/Part 2 - R-code session ClusterProfiler, ReactomePA, pathfindR/Exercise 2 (custom background)/Background.txt", sep="")
id_list <- read.delim("/Users/rogon/Work/01. Teaching/CBNA Courses/2020 Online Course Series/2020 CBNA-DeNBI Enrichment course - my materials/Part 2 - R-code session ClusterProfiler, ReactomePA, pathfindR/Exercise 2 (custom background)/id_list.txt")



keytypes(org.Hs.eg.db)
entrez_map <- bitr(background$GeneID, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db", drop= FALSE)


# or biomart:

# you will need to convert identifiers - for that purpose I suggest using the biomaRt package in R, alternatively you can use the website itselt
# ensembl biomart http://www.ensembl.org/biomart/martview/
library("biomaRt")
ensembl = biomaRt::useMart("ensembl")
ensembl = useDataset("hsapiens_gene_ensembl", mart=ensembl)
datasets_biomart <- listDatasets(ensembl)
attributes_biomart <- listAttributes(ensembl)

id_list_biomart= getBM(attributes = c("hgnc_symbol","entrezgene_id", "ensembl_gene_id"),
               filters = c(filters = "entrezgene_id"),
               values = id_list$EntrezGene.ID,
               mart = ensembl)


background_biomart= getBM(attributes = c("hgnc_symbol","entrezgene_id", "ensembl_gene_id"),
                          filters = c(filters = "hgnc_symbol"),
                          values = background$GeneID,
                          mart = ensembl)


# part 1
id_list_go <- enrichGO(gene = id_list_biomart$entrezgene_id, 'org.Hs.eg.db', pvalueCutoff = 0.05, pAdjustMethod = "BH", universe = names(background_biomart$entrezgene_id), minGSSize = 5, maxGSSize = 500, qvalueCutoff = 0.05, readable = TRUE)
head(id_list_go)
cnetplot(id_list_go)
upsetplot(id_list_go)

# part 2 
id_list_mkegg <- enrichMKEGG(gene = id_list_biomart$entrezgene_id, pvalueCutoff = 0.1, pAdjustMethod = "BH", universe = names(background_biomart$entrezgene_id), minGSSize = 5, maxGSSize = 500, qvalueCutoff = 0.1)
head(id_list_mkegg)
cnetplot(id_list_mkegg)
upsetplot(id_list_mkegg)

# part 3
id_list_dgn <- enrichDGN(gene = id_list_biomart$entrezgene_id, pvalueCutoff = 0.05, pAdjustMethod = "BH", universe = names(background_biomart$entrezgene_id), minGSSize = 5, maxGSSize = 500, qvalueCutoff = 0.05, readable = TRUE)
head(id_list_dgn)
cnetplot(id_list_dgn)
upsetplot(id_list_dgn)

# GO without background 
id_list_go_noBckg <- enrichGO(gene = id_list_biomart$entrezgene_id, 'org.Hs.eg.db', pvalueCutoff = 0.05, pAdjustMethod = "BH", minGSSize = 5, maxGSSize = 500, qvalueCutoff = 0.05, readable = TRUE)
head(id_list_go_noBckg)
cnetplot(id_list_go_noBckg)
upsetplot(id_list_go_noBckg)