Skip to content
Snippets Groups Projects
Commit a5896961 authored by Rim Moussa's avatar Rim Moussa
Browse files

edited consensus peak script to exclude control samples

parent 1e28aa2d
No related branches found
No related tags found
1 merge request!3Chipseq updates
No preview for this file type
start.time <- Sys.time()
source(paste0(snakemake@config$additionalInput$scriptsDir, "/functions.R"))
########################################################################
# SAVE SNAKEMAKE S4 OBJECT THAT IS PASSED ALONG FOR DEBUGGING PURPOSES #
########################################################################
# Use the following line to load the Snakemake object to manually rerun this script (e.g., for debugging purposes)
# Replace {outputFolder} correspondingly.
# snakemake = readRDS("{outputFolder}/LOGS_AND_BENCHMARKS/1.annotatePeaks.R.rds")
createDebugFile(snakemake)
initFunctionsScript(packagesReq = NULL, minRVersion = "3.1.0", warningsLevel = 1, disableScientificNotation = TRUE)
#checkAndLoadPackages(c("tidyverse", "futile.logger", "DiffBind", "checkmate", "stats", "stringr", "ChIPseeker"), verbose = FALSE)
checkAndLoadPackages(c("futile.logger", "checkmate", "stats", "ChIPseeker"), verbose = FALSE)
assertClass(snakemake, "Snakemake")
assertDirectoryExists(snakemake@config$additionalInput$scriptsDir)
###################
#### PARAMETERS ###
###################
par.l = list()
par.l$allPeakTypes = c("broadPeak", "gappedPeak", "narrowPeak", "stringent", "non-stringent")
par.l$verbose = TRUE
par.l$log_minlevel = "INFO"
#####################
# VERIFY PARAMETERS #
#####################
## INPUT ##
assertList(snakemake@input, min.len = 1)
assertSubset(names(snakemake@input), c("", "individual_stringent", "individual_nonStringent", "individual_encode",
"consensus_stringent", "consensus_nonStringent", "consensus_encode"))
fileTypes = c("", "individual_stringent", "individual_nonStringent", "individual_encode",
"consensus_stringent", "consensus_nonStringent", "consensus_encode")
par.l$input_peaks = snakemake@input
## OUTPUT ##
assertList(snakemake@output, min.len = 1)
assertSubset(names(snakemake@output), c("", "annotation_individual_stringent", "annotation_individual_nonStringent", "annotation_individual_encode",
"annotation_consensus_stringent", "annotation_consensus_nonStringent", "annotation_consensus_encode"))
par.l$output = snakemake@output
## CONFIG ##
assertList(snakemake@config, min.len = 1)
## PARAMS ##
assertList(snakemake@params, min.len = 3)
par.l$tabulateOutput = as.logical(snakemake@params$tabulateOutput)
par.l$tssRegion = as.integer(unlist(strsplit(snakemake@params$tssRegion, ",")))
par.l$assemblyVersion = snakemake@params$assemblyVersion
## LOG ##
assertList(snakemake@log, min.len = 1)
par.l$file_log = snakemake@log[[1]]
######################
# FINAL PREPARATIONS #
######################
startLogger(par.l$file_log, par.l$log_minlevel, removeOldLog = TRUE)
printParametersLog(par.l)
allDirs = unique(dirname(unlist(par.l$output)))
testExistanceAndCreateDirectoriesRecursively(allDirs)
###################
# Annotate peaks #
##################
for (file_grp in unique(names(par.l$input_peaks))){
#
# print("~~~~~~~~")
# print(file_grp)
if (file_grp == "") {next}
# for (inFiles in par.l$input_peaks[file_grp]){
# if (is.null(inFiles)) {next}
# for (file in inFiles[[1]]){
for (file in unlist(par.l$input_peaks[file_grp])){
futile.logger::flog.info(paste0("Peak annotation for the file: ", file))
if (readLines(file, n=1) == "# No consensus peaks found"){next}
peaks.annotated = suppressMessages(ChIPseeker::annotatePeak(
file,
tssRegion = par.l$tssRegion,
TxDb = .getGenomeObject(par.l$assemblyVersion, type = "txbd"),
level = "gene",
assignGenomicAnnotation = TRUE, # the default
genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron",
"Downstream", "Intergenic"), # the default
annoDb = .getGenomeObject(par.l$assemblyVersion, type = "packageName"), # optional, if provided, extra columns including SYMBOL, GENENAME, ENSEMBL/ENTREZID will be added
sameStrand = FALSE, ignoreOverlap = FALSE, ignoreUpstream = FALSE, ignoreDownstream = FALSE, overlap = "TSS", verbose = TRUE
))
outIMG = grep(pattern = paste0(gsub(basename(file), pattern = "\\.bed\\.gz", replacement = ""), ".pdf"),
x = par.l$output[paste0("annotation_", file_grp)][[1]], value = T)
outTbl = grep(pattern = paste0(gsub(basename(file), pattern = "\\.bed\\.gz", replacement = ""), ".csv"),
x = par.l$output[paste0("annotation_", file_grp)][[1]], value = T)
pdf(outIMG)
print(plotAnnoPie(peaks.annotated, main = "Peak Annotation"))
print(plotDistToTSS(peaks.annotated))
dev.off()
if (par.l$tabulateOutput){
write.csv(as.data.frame(peaks.annotated), file = outTbl, row.names = FALSE)
}
# }
}
}
.printExecutionTime(start.time)
flog.info("Session info: ", sessionInfo(), capture = TRUE)
......@@ -43,9 +43,12 @@ assertClass(snakemake, "Snakemake")
assertList(snakemake@input, min.len = 1)
assertSubset(names(snakemake@input), c("", "stringent", "nonStringent", "encode"))
par.l$files_input_stringent = snakemake@input$stringent
par.l$files_input_nonStringent = snakemake@input$nonStringent
par.l$files_input_ENCODE = snakemake@input$encode
par.l$files_input_stringent = setdiff(snakemake@input$stringent,
grep(pattern = paste(snakemake@params$controlFiles, collapse = "|"), x = snakemake@input$stringent, value = T))
par.l$files_input_nonStringent = setdiff(snakemake@input$nonStringent,
grep(pattern = paste(snakemake@input$nonStringent, collapse = "|"), x = snakemake@input$stringent, value = T))
par.l$files_input_ENCODE = setdiff(snakemake@input$encode,
grep(pattern = paste(snakemake@params$controlFiles, collapse = "|"), x = snakemake@input$encode, value = T))
#par.l$file_input_sampleFile = snakemake@input$sampleFile
## OUTPUT ##
......@@ -159,6 +162,8 @@ for (GCBiasStrCur in par.l$GCBiasStr) {
sampleMetaData.df = tibble(Peaks = filesInput, PeakCaller = "narrow")
# here exclude the control files based on sample name grep
# but how will the merged replicated be handled? maybe add an extra parameter for the merged control replicates
assertIntegerish(max(par.l$minOverlap), upper = nrow(sampleMetaData.df), lower = 1)
......@@ -235,12 +240,14 @@ for (GCBiasStrCur in par.l$GCBiasStr) {
} else {
write_lines("# No consensus peaks found", outputCur)
}
#break
}
#break
}
#break
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment