Commit e8951dc0 authored by Christian Arnold's avatar Christian Arnold
Browse files

Version 1.3.2, see Changelog for details

parent dd053146
......@@ -57,7 +57,7 @@ author = 'Christian Arnold, Ivan Berest, Judith B. Zaugg'
# The short X.Y version.
version = '1.3'
# The full version, including alpha/beta/rc tags.
release = '1.3.1'
release = '1.3.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......
......@@ -53,6 +53,11 @@ We also put the paper on *bioRxiv*, please read all methodological details here:
Change log
============================
Version 1.3.2 (2019-07-24)
- Fixed the error "unable to find an inherited method for function ‘assay’ for signature ‘"matrix", "character"’" that arises due to a new implementation of the *normOffsets* function from the *csaw* package in versions above 1.14.1. The Singularity image that comes with *diffTF* still uses csaw version 1.14.1, for which the original implementation works fine, but for newer R installations (that is, *csaw* >= 1.16) the above error is thrown. In the code, the function call is now dependent on the version of the *csaw* package. This was the most common error that was thrown when running *diffTF* without Singularity, and should therefore increase compatibility.
- Minor changes to make the *diffTF* code more compatible for future releases of R (e.g., replacing the deprecated *data_frame* by *tibble*, specifying the package for functions that are defined in different packages)
- updated the *startAnalysis* scripts due to a changed folder structure in the *examples* subfolder
Version 1.3 and 1.3.1 (2019-07-17)
- Various minor changes and small bug fixes as reported by users
- improved the RNA-Seq classification, further information will follow soon.
......
......@@ -12,7 +12,7 @@
"nBootstraps": 0,
"nCGBins": 10,
"TFs": "CTCF,CEBPB,SNAI2,CEBPA,UBIP1,CEBPG,CEBPD,ZFX,AP2D,PAX5.S,SNAI1,ZEB1,SP4,MBD2,IRF1,MECP2,PAX5.D,SP3,NFIA.C,SP1.A,IRF7,MYF6,NRF1,DBP,MAZ,NKX28,DLX2,GATA1,P53,ZN143,AIRE,NR2C2,HMGA1,FUBP1,TEAD3,OVOL1,HXD4,KLF1,RXRG,HNF1B,ZIC3,HNF1A,NANOG.S,GFI1,PO3F1,NR2C1,ELF5,TF65.C,NFAC3,TEAD1",
"dir_scripts": "../../src/R",
"dir_scripts": "../../../src/R",
"RNASeqIntegration": true
},
"samples": {
......@@ -28,6 +28,6 @@
"refGenome_fasta": "referenceGenome/mm10.fa",
"dir_TFBS": "mm10/PWMScan_HOCOMOCOv10",
"RNASeqCounts": "data/RNA-Seq/RNA.counts.tsv",
"HOCOMOCO_mapping": "../../src/TF_Gene_TranslationTables/HOCOMOCO_v10/translationTable_mm10.csv"
"HOCOMOCO_mapping": "../../../src/TF_Gene_TranslationTables/HOCOMOCO_v10/translationTable_mm10.csv"
}
}
......@@ -17,4 +17,4 @@ echo "# INCREASING THE NUMBER OF CORES SPEEDS UP THE ANALYSIS #"
echo "#########################################################"
# Real run, using 2 cores
snakemake --snakefile ../../src/Snakefile --cores 2 --configfile config.json
snakemake --snakefile ../../../src/Snakefile --cores 2 --configfile config.json
......@@ -12,4 +12,4 @@ echo "####################################################################"
echo "\nThis wrapper script calls Snakemake in dryrun mode to start diffTF. Nothing will actually be executed.\n"
# Dryrun, using 2 cores
snakemake --snakefile ../../src/Snakefile --dryrun --quiet --cores 2 --configfile config.json
snakemake --snakefile ../../../src/Snakefile --dryrun --quiet --cores 2 --configfile config.json
......@@ -319,7 +319,7 @@ if (skipTF) {
}
# low RC, check by rowMean
TF.cds.filt = TF.cds[rowMeans(counts(TF.cds)) > 0, ]
TF.cds.filt = TF.cds[rowMeans(DESeq2::counts(TF.cds)) > 0, ]
nPeaks = nrow(TF.cds.filt)
......@@ -344,7 +344,7 @@ if (skipTF) {
if (par.l$nPermutations > 0) {
# Generate normalized counts for limma analysis
countsNorm = counts(TF.cds.filt, norm = TRUE)
countsNorm = DESeq2::counts(TF.cds.filt, norm = TRUE)
countsNorm.transf = log2(countsNorm + par.l$pseudocountAddition)
rownames(countsNorm.transf) = rownames(TF.cds.filt)
......@@ -373,7 +373,7 @@ if (skipTF) {
fit <- eBayes(lmFit(countsNorm.transf, design = model.matrix(designFormula, data = sampleData.df)))
results.df <- topTable(fit, coef = colnames(fit$design)[ncol(fit$design)], number = Inf, sort.by = "none")
final.TF.df = data_frame("TFBSID" = rownames(results.df),
final.TF.df = tibble("TFBSID" = rownames(results.df),
"limma_avgExpr" = results.df$AveExpr,
"l2FC" = results.df$logFC,
"limma_B" = results.df$B,
......@@ -435,7 +435,7 @@ if (skipTF) {
}
final.TF.df = data_frame("TFBSID" = rownames(res_DESeq.df),
final.TF.df = tibble("TFBSID" = rownames(res_DESeq.df),
"DESeq_baseMean" = res_DESeq.df$baseMean,
"l2FC" = res_DESeq.df$log2FoldChange,
"DESeq_ldcSE" = res_DESeq.df$lfcSE,
......
......@@ -348,9 +348,14 @@ if (!par.l$doCyclicLoess) {
# since counts returns,by default, non-normalized counts, the following code should be fine and there is no need to
# also run estimateSizeFactors beforehand
normFacs <- exp(normOffsets(counts(cds.peaks),
lib.sizes = colSums(counts(cds.peaks)),
type = "loess"))
if (packageVersion("csaw") <= "1.14.1") {
normFacs = exp(normOffsets(DESeq2::counts(cds.peaks), lib.sizes = colSums(DESeq2::counts(cds.peaks)), type = "loess"))
} else {
object = SummarizedExperiment(list(counts=DESeq2::counts(cds.peaks)))
object$totals = colSums(DESeq2::counts(cds.peaks))
normFacs = exp(normOffsets(object, se.out = FALSE))
}
# sanity check: is the geometric mean across samples equal to one?
#library("psych")
......@@ -366,7 +371,7 @@ if (!par.l$doCyclicLoess) {
# Filter peaks with zero counts
cds.peaks.filt = cds.peaks[rowMeans(counts(cds.peaks)) > 0, ]
cds.peaks.filt = cds.peaks[rowMeans(DESeq2::counts(cds.peaks)) > 0, ]
# DESeq log2fc are not used at all afterwards, as we currently only take the normalization factors to normalize the TFBS subsequently
......@@ -387,7 +392,7 @@ if (comparisonMode == "pairwise") {
# GET LOG2FC #
##############
countsNorm = counts(cds.peaks.filt, norm = TRUE)
countsNorm = DESeq2::counts(cds.peaks.filt, norm = TRUE)
countsNorm.df = as.data.frame(countsNorm) %>%
dplyr::mutate(peakID = rownames(cds.peaks.filt)) %>%
dplyr::select(one_of("peakID", colnames(countsNorm)))
......@@ -412,7 +417,7 @@ if (par.l$nPermutations == 0) {
fit <- eBayes(lmFit(countsNorm.transf, design = designMatrix))
results.df <- topTable(fit, coef = colnames(fit$design)[ncol(fit$design)], number = Inf, sort.by = "none")
final.peaks.df = data_frame(
final.peaks.df = tibble(
"permutation" = 0,
"peakID" = rownames(results.df),
"limma_avgExpr" = results.df$AveExpr,
......@@ -452,7 +457,7 @@ if (par.l$nPermutations == 0) {
}
final.peaks.df = data_frame(
final.peaks.df = tibble(
"permutation" = 0,
"peakID" = rownames(cds.peaks.df),
"DESeq_baseMean" = cds.peaks.df$baseMean,
......
......@@ -110,10 +110,26 @@ printParametersLog <- function(par.l, verbose = FALSE) {
if (verbose) cat("Could not find the following packages: ", paste( packagesToInstall , collapse = ", "), "\n")
install.packages(packagesToInstall, repos = "http://cran.rstudio.com/")
source("http://bioconductor.org/biocLite.R")
for (packageCur in packagesToInstall) {
biocLite(packageCur, suppressUpdates = TRUE)
if (getRversion() < "3.5.0") {
source("http://bioconductor.org/biocLite.R")
for (packageCur in packagesToInstall) {
biocLite(packageCur, suppressUpdates = TRUE)
}
} else {
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
for (packageCur in packagesToInstall) {
BiocManager::install(packageCur)
}
}
} else {
if (verbose) cat("All packages are already installed\n")
}
......@@ -460,21 +476,21 @@ plotDiagnosticPlots <- function(dd, differentialResults, conditionComparison, fi
if (nSamples < 10) {
multidensity(log(counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", col = colors)
multidensity(log(counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", col = colors)
multidensity(log(DESeq2::counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", col = colors)
multidensity(log(DESeq2::counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", col = colors)
multiecdf(log(counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", col = colors)
multiecdf(log(counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", col = colors)
multiecdf(log(DESeq2::counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", col = colors)
multiecdf(log(DESeq2::counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", col = colors)
} else {
warning("Omitting legend due to large number of samples (threshold is 10 at the moment)")
multidensity(log(counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", legend = NULL, col = colors)
multidensity(log(counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", legend = NULL, col = colors)
multidensity(log(DESeq2::counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", legend = NULL, col = colors)
multidensity(log(DESeq2::counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", legend = NULL, col = colors)
multiecdf(log(counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", legend = NULL, col = colors)
multiecdf(log(counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", legend = NULL, col = colors)
multiecdf(log(DESeq2::counts(dd, normalized = FALSE) + 0.5), xlab = xlabCur, main = "Non-normalized log counts", legend = NULL, col = colors)
multiecdf(log(DESeq2::counts(dd, normalized = TRUE) + 0.5) , xlab = xlabCur, main = "Normalized log counts", legend = NULL, col = colors)
}
......@@ -496,7 +512,7 @@ plotDiagnosticPlots <- function(dd, differentialResults, conditionComparison, fi
flog.info(paste0(" Plotting pairwise comparison ", i, " out of ", nrow(MA.idx.filt)))
label = paste0(colnames(dd)[MA.idx.filt[i,1]], " vs ", colnames(dd)[MA.idx.filt[i,2]])
suppressWarnings(print(myMAPlot(counts(dd, normalized = TRUE), c(MA.idx[i,1], MA.idx.filt[i,2]), main = label)))
suppressWarnings(print(myMAPlot(DESeq2::counts(dd, normalized = TRUE), c(MA.idx[i,1], MA.idx.filt[i,2]), main = label)))
}
# Show an empty page with a warning if plots have been omitted
......@@ -510,7 +526,7 @@ plotDiagnosticPlots <- function(dd, differentialResults, conditionComparison, fi
# 4. Mean SD plot: Plot row standard deviations versus row means
notAllZeroPeaks <- (rowSums(counts(dd)) > 0)
notAllZeroPeaks <- (rowSums(DESeq2::counts(dd)) > 0)
suppressWarnings(meanSdPlot(assay(dd[notAllZeroPeaks,])))
......
......@@ -415,7 +415,7 @@ if (par.l$plotRNASeqClassification) {
dd = estimateSizeFactors(dd)
# dd = DESeq(dd)
dd_counts = counts(dd, normalized=TRUE)
dd_counts = DESeq2::counts(dd, normalized=TRUE)
######################################
# Filtering of lowly expressed genes #
......@@ -446,11 +446,11 @@ if (par.l$plotRNASeqClassification) {
}
dd.filt <- DESeq(dd.filt)
dd_counts.filt = counts(dd.filt, normalized=TRUE)
dd_counts.filt = DESeq2::counts(dd.filt, normalized=TRUE)
RNA.counts.filt.df = dd_counts.filt %>% as.data.frame() %>% rownames_to_column("ENSEMBL") %>% as.tibble()
# Raw counts, used for other types of normalization thereafter
dd_counts.raw.filt = counts(dd.filt, normalized=FALSE)
dd_counts.raw.filt = DESeq2::counts(dd.filt, normalized=FALSE)
dd_counts.filt.quantile = normalize.quantiles(as.matrix(dd_counts.raw.filt))
RNA.counts.quantile.df.all = dd_counts.filt.quantile %>% as.data.frame() %>% as.tibble()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment