Commit 71af0a2d authored by Christian Arnold's avatar Christian Arnold

Version 1.2.2, see Changelog for details

parent 59425da2
......@@ -53,6 +53,8 @@ We also put the paper on *bioRxiv*, please read all methodological details here:
Change log
============================
Version 1.2.2 (2019-02-01)
- Minor code fixed. Removed the creation of the circular plot, which has been replaced with the Volcano plot over time. Fixed a bug that could have led to wrong log2 fold-change values for the RNA-Seq data under special circumstances. We recommend rerunning the ``summaryFinal`` rule. Ask us for more details if you are concerned about this.
Version 1.2.1 (2019-01-22)
- Increased the value for ``expressions`` in R from 5000 (the R default) to 10000. Some users reported that they receive a "Error: evaluation nested too deeply: infinite recursion / options(expressions=)?" error message. Thanks to Benedict Man Hung Choi!
......
......@@ -202,9 +202,11 @@ if (nrow(overlapsAll.df) > 0) {
nTFBS = nrow(overlapsAll.df)
# Create formula based on user-defined design
designFormula = convertToFormula(par.l$designFormula, colnames(sampleData.df))
formulaVariables = attr(terms(designFormula), "term.labels")
# Extract the variable that defines the contrast. Always the last element in the formula
variableToPermute = formulaVariables[length(formulaVariables)]
if (nTFBS >= par.l$minNoDatapoints) {
......@@ -241,16 +243,14 @@ if (skipTF) {
} else {
# Create formula based on user-defined design
designFormula = convertToFormula(par.l$designFormula, colnames(sampleData.df))
normFacs = readRDS(par.l$file_input_normFacs)
TF.cds = tryCatch( {
# create Deseq object from the TF specific data
# The correct order is already enforced due to the creation of the TF.table.m matrix before that is sorted after sampleData.df
TF.cds <- DESeqDataSetFromMatrix(countData = TF.table.m,
colData = sampleData.df,
design = designFormula)
......@@ -319,6 +319,8 @@ if (skipTF) {
peaksFiltered.df = readRDS(par.l$file_input_peaks)
conditionComparison = readRDS(par.l$file_input_conditionComparison)
################################
# ITERATE THROUGH PERMUTATIONS #
################################
......@@ -402,7 +404,9 @@ if (skipTF) {
}
if (!skipTF) {
res_DESeq.df <- as.data.frame(DESeq2::results(res_DESeq))
# Enforce the correct order
res_DESeq.df <- as.data.frame(DESeq2::results(res_DESeq, contrast = c(variableToPermute, conditionComparison[1], conditionComparison[2])))
final.TF.df = data_frame("TFBSID" = rownames(res_DESeq.df),
"DESeq_baseMean" = res_DESeq.df$baseMean,
......@@ -441,9 +445,7 @@ if (skipTF) {
######################
## DIAGNOSTIC PLOTS ##
######################
conditionComparison = readRDS(par.l$file_input_conditionComparison)
pdf(par.l$file_output_plot_diagnostic)
if (par.l$nPermutations == 0) {
plotDiagnosticPlots(TF.cds.filt, res_DESeq, conditionComparison, filename = NULL, maxPairwiseComparisons = 5)
......
## README: prepare Permutations, to make it more efficient
start.time <- Sys.time()
# Increase the default of 5000, some users reported issues with the limit being reached
options(expressions=10000)
#########################
# LIBRARY AND FUNCTIONS #
......
......@@ -214,7 +214,7 @@ if (file_peaks != "") {
assertFileExists(snakemake@config$peaks$consensusPeaks)
peaks.df = read_tsv(snakemake@config$peaks$consensusPeaks, col_names = FALSE)
if (nrow(problems(peaks.df)) > 0) {
flog.fatal(paste0("Parsing errors: "), problems(overlapsAll.df), capture = TRUE)
flog.fatal(paste0("Parsing errors: "), problems(peaks.df), capture = TRUE)
stop("Parsing errors with file ", snakemake@config$peaks$consensusPeaks, ". See the log file for more information")
}
......
......@@ -149,7 +149,6 @@ if (!all(sapply(components2,length) == 2)) {
}
components3 = unlist(lapply(components2, "[[", 1))
components3types = tolower(unlist(lapply(components2, "[[", 2)))
names(components3types) = components3
checkAndLogWarningsAndErrors(sort(formulaVariables), checkSetEqual(sort(formulaVariables), sort(components3)))
......@@ -193,8 +192,6 @@ if (datatypeVariableToPermute == "factor" & nLevels != 2) {
##############################
# ITERATE THROUGH PEAK FILES #
##############################
......@@ -302,8 +299,14 @@ while (nPermutationsDone < par.l$nPermutations) {
designFormula = convertToFormula(par.l$designFormula, colnames(sampleData.df))
cds.peaks <- DESeqDataSetFromMatrix(countData = coverageAll.m,
colData = sampleData.df,
# Enforce the correct order of sampleData and coverageAll so that the rownames match
stopifnot(sampleData.df$SampleID %in% colnames(coverageAll.m))
sampleData.temp.df = as.data.frame(sampleData.df)
rownames(sampleData.temp.df) = sampleData.temp.df$SampleID
cds.peaks <- DESeqDataSetFromMatrix(countData = coverageAll.m[, sampleData.temp.df$SampleID],
colData = sampleData.temp.df,
design = designFormula)
# Recent versions of DeSeq seem to do this automatically, whereas older versions don't, so enforce it here
......@@ -422,9 +425,9 @@ if (par.l$nPermutations == 0) {
}
)
cds.peaks.df <- as.data.frame(DESeq2::results(cds.peaks.filt))
#Enforce the correct order of the comparison
cds.peaks.df <- as.data.frame(DESeq2::results(cds.peaks.filt, contrast = c(variableToPermute, comparisonDESeq[1], comparisonDESeq[2])))
# TODO: "peakID" = rownames(results.df), CHECK
final.peaks.df = data_frame(
"permutation" = 0,
"peakID" = rownames(cds.peaks.df),
......
This diff is collapsed.
......@@ -647,7 +647,6 @@ rule summaryFinal:
sampleDataR = rules.DiffPeaks.output.sampleDataR,
output:
summary = FINAL_DIR + "/" + compType + "summary.tsv.gz",
circularPlot = FINAL_DIR + "/" + compType + "summary.circular.pdf",
volcanoPlot = FINAL_DIR + "/" + compType + "summary.volcano.pdf",
diagnosticPlots = allDiagnosticFiles,
plotsRDS = FINAL_DIR + "/" + compType + "summary.circular.rds",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment