Commit abdca3c3 authored by Christian Arnold's avatar Christian Arnold

Version 1.1.8, see Changelog for details

parent d8d37de5
...@@ -31,6 +31,9 @@ We also put the paper on *bioRxiv*, please read all methodological details here: ...@@ -31,6 +31,9 @@ We also put the paper on *bioRxiv*, please read all methodological details here:
Change log Change log
============================ ============================
Version 1.1.8 (2018-11-07)
- changed the call to the mlv function from the modeest package due to a breaking implementation change in version 2.3.2 that was published end of October 2018. ``diffTF`` now checks the package version for modeest and calls the functions in dependence of the specific version.
Version 1.1.7 (2018-10-25) Version 1.1.7 (2018-10-25)
- the default value of the minimum number of data points for a CG bin to be included has been raised from 5 to 20 to make the variance calculation more reliable - the default value of the minimum number of data points for a CG bin to be included has been raised from 5 to 20 to make the variance calculation more reliable
- various small updates to the ``summaryFinal.R`` script - various small updates to the ``summaryFinal.R`` script
......
...@@ -494,7 +494,21 @@ if (skipTF) { ...@@ -494,7 +494,21 @@ if (skipTF) {
# d) Comparisons between peaks and binding sites # d) Comparisons between peaks and binding sites
modeNum = mlv(final.TF.df$l2FC, method = "mfv", na.rm = TRUE) # Check the version of modeest, because version 2.3.2 introduced an implementation change that breaks things
if (packageVersion("modeest") < "2.3.2") {
modeNum = mlv(final.TF.df$l2FC, method = "mfv", na.rm = TRUE)
stopifnot(is.list(modeNum))
l2fc_mode = ifelse(is.null(modeNum$M), NA, modeNum$M)
l2fc_skewness = ifelse(is.null(modeNum$skewness), NA, modeNum$skewness)
} else {
l2fc_mode = mlv(final.TF.df$l2FC, method = "mfv", na.rm = TRUE)[1]
l2fc_skewness = skewness(final.TF.df$l2FC, na.rm = TRUE)[1]
}
# We have to filter now by NAs because for some permutations, limma might have been unable to calculate the coefficients # We have to filter now by NAs because for some permutations, limma might have been unable to calculate the coefficients
final.TF.filtered.df = dplyr::filter(final.TF.df, !is.na(l2FC)) final.TF.filtered.df = dplyr::filter(final.TF.df, !is.na(l2FC))
...@@ -518,10 +532,10 @@ if (skipTF) { ...@@ -518,10 +532,10 @@ if (skipTF) {
Pos_l2FC = nrow(final.TF.df[final.TF.df$l2FC > 0,]) / nrow(final.TF.df), Pos_l2FC = nrow(final.TF.df[final.TF.df$l2FC > 0,]) / nrow(final.TF.df),
Mean_l2FC = mean(final.TF.df$l2FC, na.rm = TRUE), Mean_l2FC = mean(final.TF.df$l2FC, na.rm = TRUE),
Median_l2FC = median(final.TF.df$l2FC, na.rm = TRUE), Median_l2FC = median(final.TF.df$l2FC, na.rm = TRUE),
Mode_l2FC = modeNum[[1]], Mode_l2FC = l2fc_mode,
sd_l2FC = sd(final.TF.df$l2FC, na.rm = TRUE), sd_l2FC = sd(final.TF.df$l2FC, na.rm = TRUE),
pvalue_raw = tTest_pVal, pvalue_raw = tTest_pVal,
skewness_l2FC = modeNum[[2]], skewness_l2FC = l2fc_skewness,
T_statistic = tTest_stat, T_statistic = tTest_stat,
TFBS_num = nrow(final.TF.df) TFBS_num = nrow(final.TF.df)
) )
......
...@@ -146,16 +146,30 @@ if (nTFMissing == nrow(summary.df)) { ...@@ -146,16 +146,30 @@ if (nTFMissing == nrow(summary.df)) {
# Replace p-values of 0 with the smallest p-value on the system # Replace p-values of 0 with the smallest p-value on the system
summary.df$pvalue_raw[summary.df$pvalue_raw == 0] = .Machine$double.xmin summary.df$pvalue_raw[summary.df$pvalue_raw == 0] = .Machine$double.xmin
# Check the version of modeest, because version 2.3.2 introduced an implementation change that breaks things
if (packageVersion("modeest") < "2.3.2") {
mode_peaks = mlv(peaks.df$l2FC, method = "mfv", na.rm = TRUE)
stopifnot(is.list(mode_peaks))
l2fc_mode = ifelse(is.null(mode_peaks$M), NA, mode_peaks$M)
l2fc_skewness = ifelse(is.null(mode_peaks$skewness), NA, mode_peaks$skewness)
} else {
l2fc_mode = mlv(peaks.df$l2FC, method = "mfv", na.rm = TRUE)[1]
l2fc_skewness = skewness(peaks.df$l2FC, na.rm = TRUE)[1]
}
mode_peaks = mlv(peaks.df$l2FC, method = "mfv", na.rm = TRUE)
summary.df = summary.df %>% summary.df = summary.df %>%
dplyr::mutate( dplyr::mutate(
adj_pvalue = p.adjust(pvalue_raw, method = "fdr"), adj_pvalue = p.adjust(pvalue_raw, method = "fdr"),
Diff_mean = Mean_l2FC - mean(peaks.df$l2FC, na.rm = TRUE), Diff_mean = Mean_l2FC - mean(peaks.df$l2FC, na.rm = TRUE),
Diff_median = Median_l2FC - median(peaks.df$l2FC, na.rm = TRUE), Diff_median = Median_l2FC - median(peaks.df$l2FC, na.rm = TRUE),
Diff_mode = Mode_l2FC - mode_peaks[[1]], Diff_mode = Mode_l2FC - l2fc_mode,
Diff_skew = skewness_l2FC - mode_peaks[[2]]) %>% Diff_skew = skewness_l2FC - l2fc_skewness) %>%
na.omit(summary.df) na.omit(summary.df)
summary.df = mutate_if(summary.df, is.numeric, as.character) summary.df = mutate_if(summary.df, is.numeric, as.character)
......
...@@ -471,6 +471,8 @@ for (fileCur in par.l$files_input_permResults) { ...@@ -471,6 +471,8 @@ for (fileCur in par.l$files_input_permResults) {
output.global.TFs.orig$weighted_meanDifference = as.numeric(output.global.TFs.orig$weighted_meanDifference) output.global.TFs.orig$weighted_meanDifference = as.numeric(output.global.TFs.orig$weighted_meanDifference)
output.global.TFs.orig$variance = as.numeric(output.global.TFs.orig$variance) output.global.TFs.orig$variance = as.numeric(output.global.TFs.orig$variance)
output.global.TFs.orig$weighted_CD = as.numeric(output.global.TFs.orig$weighted_CD) output.global.TFs.orig$weighted_CD = as.numeric(output.global.TFs.orig$weighted_CD)
output.global.TFs.orig$weighted_Tstat = as.numeric(output.global.TFs.orig$weighted_Tstat)
# Remove rows with NA # Remove rows with NA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment