diff --git a/README.md b/README.md index bf6b6492b6b8865056e26c0e15454954b7c1cfc6..8798226532ada23fbcd0d3c029098173d857673a 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,11 @@ Installation and Quick Start The following quick start briefly summarizes the necessary steps to use our pipeline: -1. Install the necessary tools (Snakemake, samtools, and bedtools). We recommend installing them via conda, in which case the installation is as easy as +1. Install the necessary tools (Snakemake, samtools, bedtools, and Subread). We recommend installing them via conda, in which case the installation is as easy as - ``conda install -c bioconda snakemake bedtools samtools`` + ``conda install -c bioconda snakemake bedtools samtools subread`` - If conda is not yet installed, follow the [installation instructions](https://conda.io/docs/user-guide/install/index.html). If you want to install the tools manually and outside of the conda framework, see the following instructions for each of the tools: [snakemake](http://snakemake.readthedocs.io/en/stable/getting_started/installation.html), [samtools](http://www.htslib.org/download/), [bedtools](http://bedtools.readthedocs.io/en/latest/content/installation.html). + If conda is not yet installed, follow the [installation instructions](https://conda.io/docs/user-guide/install/index.html). If you want to install the tools manually and outside of the conda framework, see the following instructions for each of the tools: [snakemake](http://snakemake.readthedocs.io/en/stable/getting_started/installation.html), [samtools](http://www.htslib.org/download/), [bedtools](http://bedtools.readthedocs.io/en/latest/content/installation.html), [Subread](http://subread.sourceforge.net/). 2. Clone the Git repository: ``git clone https://git.embl.de/grp-zaugg/diffTF`` diff --git a/src/R/4.summary1.R b/src/R/4.summary1.R index a5f946d8f6de645021818e9ac67d72c5d7171bfb..3c8324fb1fe6562f535c0e6c19666b4bd4c9b909 100755 --- a/src/R/4.summary1.R +++ b/src/R/4.summary1.R @@ -154,65 +154,16 @@ mode_peaks = mlv(round(peaks.df$D2_l2FC, 2), method = "mfv", na.rm = TRUE) summary.df = summary.df %>% dplyr::mutate( - pvalue_adj = p.adjust(pvalue_raw, method = "fdr"), + adj_pvalue = p.adjust(Ttest_pval, method = "fdr"), Diff_mean = Mean_l2FC - mean (peaks.df$D2_l2FC, na.rm = TRUE), - Diff_median = Median_l2FC - median(peaks.df$D2_l2FC, na.rm = TRUE), + DiffMedian = Median_l2FC - median(peaks.df$D2_l2FC, na.rm = TRUE), Diff_mode = Mode_l2FC - mode_peaks[[1]], Diff_skew = Modeskewness - mode_peaks[[2]]) %>% na.omit(summary.df) -# Loop through summary files and use the TFBS_num column - - - - -# Automatically calculate the significance thresholds -# Reverse Ivans heuristic approach earlier -# TODO: Old code, how to make this up to date? -# threshold1 = par.l$FDR_threshold / nTF -# min_T_stat = qt(threshold1/2, median(summary.df$TFBS_num), lower.tail = FALSE) -# -# -# -# # Filter rows -# plot_thr.df = summary.df %>% -# filter(Diff_mean > par.l$plot_min_diffMean | Diff_mean < -par.l$plot_min_diffMean) %>% -# filter(abs(T_statistic) > min_T_stat) -# -# -# -# TF_volcano = ggplot() + -# geom_point(aes(x = summary.df$Diff_mean, -# y = abs(summary.df$T_statistic), -# label = summary.df$TF),size = 1) + -# geom_vline(xintercept = 0, size = 0.7, -# linetype = "longdash", color = "blue") + -# geom_hline(yintercept = min_T_stat, size = 0.7, -# linetype = "longdash", color = "red") + -# geom_text_repel(aes(x = plot_thr.df$Diff_mean, -# y = abs(plot_thr.df$T_statistic), -# label = plot_thr.df$TF),size = 2.5, -# segment.size = 0.5,box.padding = unit(0.05,"lines")) + -# ylab("Absolute T-statistic") + -# xlab(paste0("Mean(TF distr) - mean(peaks)")) + -# theme(axis.text.x = element_text(face = "bold", color = "black", size = 20), -# axis.text.y = element_text(face = "bold", color = "black", size = 20), -# axis.title.x = element_text(face = "bold", colour = "black", size = 24,margin = margin(25,0,0,0)), -# axis.title.y = element_text(face = "bold", colour = "black", size = 24,margin = margin(0,25,0,0)), -# axis.line.x = element_line(color = "black"), axis.line.y = element_line(color = "black"), -# panel.grid.major = element_blank(), -# panel.grid.minor = element_blank(), -# panel.border = element_blank(), -# panel.background = element_blank(), -# legend.position = c(0.1,0.9), -# legend.justification = "center", -# legend.title = element_blank()) -# -# ggsave(plot = TF_volcano, filename = par.l$file_output_volcanoPlot, width = 12, height = 8, useDingbats = FALSE, dpi = 600) - - -write_tsv(summary.df, par.l$file_output_table) # TODO: check the dec = "." parameter + +write_tsv(summary.df, par.l$file_output_table) .printExecutionTime(start.time) diff --git a/src/R/7.summaryFinal.R b/src/R/7.summaryFinal.R index 93af655e67024ae24f28c52ab80dbc94bb2b3bea..ee2e0b1dc6daec59b9836d804f117fb17b5e4d8b 100755 --- a/src/R/7.summaryFinal.R +++ b/src/R/7.summaryFinal.R @@ -453,12 +453,19 @@ if (par.l$plotRNASeqClassification) { } # Filter by rowMeans to eliminate rows with an sd of 0 - rowMeans1 = rowMeans(expressed.TF.counts.df) - rowsToDelete = which(rowMeans1 < 1) + # rowMeans1 = rowMeans(expressed.TF.counts.df) + # rowsToDelete = which(rowMeans1 < 1) + # if (length(rowsToDelete) > 0) { + # expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,] + # flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a row mean of < 1.")) + # } + rowSds = rowSds(expressed.TF.counts.df) + rowsToDelete = which(rowSds == 0) if (length(rowsToDelete) > 0) { - expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,] - flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a row mean of < 1.")) + expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,] + flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a standard deviation of 0.")) } + rowMeans2 = rowMeans(peak.counts) rowsToDelete = which(rowMeans2 == 0) if (length(rowsToDelete) > 0) {