Minor updates

1c3e7071 · Christian Arnold · 00df536c · 1c3e7071 · 1c3e7071 · 1c3e7071
Commit 1c3e7071 authored 7 years ago by Christian Arnold
--- a/README.md
+++ b/README.md
@@ -20,11 +20,11 @@ Installation and Quick Start

 The following quick start briefly summarizes the necessary steps to use our pipeline:

-1. Install the necessary tools (Snakemake, samtools, and bedtools). We recommend installing them via conda, in which case the installation is as easy as 
+1. Install the necessary tools (Snakemake, samtools, bedtools, and Subread). We recommend installing them via conda, in which case the installation is as easy as 

-    ``conda install -c bioconda snakemake bedtools samtools``
+    ``conda install -c bioconda snakemake bedtools samtools subread``

-    If conda is not yet installed, follow the [installation instructions](https://conda.io/docs/user-guide/install/index.html). If you want to install the tools manually and outside of the conda framework, see the following instructions for each of the tools: [snakemake](http://snakemake.readthedocs.io/en/stable/getting_started/installation.html), [samtools](http://www.htslib.org/download/), [bedtools](http://bedtools.readthedocs.io/en/latest/content/installation.html).
+    If conda is not yet installed, follow the [installation instructions](https://conda.io/docs/user-guide/install/index.html). If you want to install the tools manually and outside of the conda framework, see the following instructions for each of the tools: [snakemake](http://snakemake.readthedocs.io/en/stable/getting_started/installation.html), [samtools](http://www.htslib.org/download/), [bedtools](http://bedtools.readthedocs.io/en/latest/content/installation.html), [Subread](http://subread.sourceforge.net/).
 2. Clone the Git repository: 

    ``git clone https://git.embl.de/grp-zaugg/diffTF``

--- a/src/R/4.summary1.R
+++ b/src/R/4.summary1.R
@@ -154,65 +154,16 @@ mode_peaks = mlv(round(peaks.df$D2_l2FC, 2), method = "mfv", na.rm = TRUE)

 summary.df = summary.df %>%
              dplyr::mutate(
-                  pvalue_adj = p.adjust(pvalue_raw, method = "fdr"),
+                  adj_pvalue = p.adjust(Ttest_pval, method = "fdr"),
                  Diff_mean  = Mean_l2FC   - mean  (peaks.df$D2_l2FC, na.rm = TRUE), 
-                  Diff_median = Median_l2FC - median(peaks.df$D2_l2FC, na.rm = TRUE),
+                  DiffMedian = Median_l2FC - median(peaks.df$D2_l2FC, na.rm = TRUE),
                  Diff_mode  = Mode_l2FC - mode_peaks[[1]],    
                  Diff_skew  = Modeskewness - mode_peaks[[2]])  %>%
              na.omit(summary.df)


-# Loop through summary files and use the TFBS_num column
-
-
-
-
-# Automatically calculate the significance thresholds
-# Reverse Ivans heuristic approach earlier
-# TODO: Old code, how to make this up to date?
-# threshold1 = par.l$FDR_threshold / nTF
-# min_T_stat = qt(threshold1/2, median(summary.df$TFBS_num), lower.tail = FALSE)
-# 
-# 
-# 
-# # Filter rows
-# plot_thr.df = summary.df %>%
-#                 filter(Diff_mean > par.l$plot_min_diffMean | Diff_mean < -par.l$plot_min_diffMean)  %>%
-#                 filter(abs(T_statistic) > min_T_stat)
-# 
-# 
-# 
-# TF_volcano = ggplot() +
-#   geom_point(aes(x = summary.df$Diff_mean,
-#                  y = abs(summary.df$T_statistic),
-#                  label = summary.df$TF),size = 1)   +
-#   geom_vline(xintercept = 0, size = 0.7,
-#              linetype = "longdash", color = "blue") +
-#   geom_hline(yintercept = min_T_stat, size = 0.7,
-#              linetype = "longdash", color = "red") +
-#   geom_text_repel(aes(x = plot_thr.df$Diff_mean,
-#                       y = abs(plot_thr.df$T_statistic),
-#                       label = plot_thr.df$TF),size = 2.5,
-#                   segment.size = 0.5,box.padding = unit(0.05,"lines")) +
-#   ylab("Absolute T-statistic") +
-#   xlab(paste0("Mean(TF distr) - mean(peaks)")) +
-#   theme(axis.text.x = element_text(face = "bold", color = "black", size = 20),
-#         axis.text.y = element_text(face = "bold", color = "black", size = 20),
-#         axis.title.x = element_text(face = "bold", colour = "black", size = 24,margin = margin(25,0,0,0)),
-#         axis.title.y = element_text(face = "bold", colour = "black", size = 24,margin = margin(0,25,0,0)),
-#         axis.line.x = element_line(color = "black"), axis.line.y = element_line(color = "black"),
-#         panel.grid.major = element_blank(),
-#         panel.grid.minor = element_blank(),
-#         panel.border = element_blank(),
-#         panel.background = element_blank(),
-#         legend.position = c(0.1,0.9),
-#         legend.justification = "center",
-#         legend.title = element_blank())
-# 
-# ggsave(plot = TF_volcano, filename = par.l$file_output_volcanoPlot, width = 12, height = 8, useDingbats = FALSE, dpi = 600)
-
-
-write_tsv(summary.df, par.l$file_output_table) # TODO: check the dec = "." parameter
+
+write_tsv(summary.df, par.l$file_output_table) 

 .printExecutionTime(start.time)


--- a/src/R/7.summaryFinal.R
+++ b/src/R/7.summaryFinal.R
@@ -453,12 +453,19 @@ if (par.l$plotRNASeqClassification) {
    }
    
    # Filter by rowMeans to eliminate rows with an sd of 0
-    rowMeans1 = rowMeans(expressed.TF.counts.df)
-    rowsToDelete = which(rowMeans1 < 1)
+    # rowMeans1 = rowMeans(expressed.TF.counts.df)
+    # rowsToDelete = which(rowMeans1 < 1)
+    # if (length(rowsToDelete) > 0) {
+    #   expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,]
+    #   flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a row mean of < 1."))
+    # }
+    rowSds = rowSds(expressed.TF.counts.df)
+    rowsToDelete = which(rowSds == 0)
    if (length(rowsToDelete) > 0) {
-      expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,]
-      flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a row mean of < 1."))
+        expressed.TF.counts.df = expressed.TF.counts.df[-rowsToDelete,]
+        flog.info(paste0("Removed ", length(rowsToDelete), " TFs out of ", nrow(expressed.TF.counts.df), " because they had a standard deviation of 0."))
    }
+    
    rowMeans2 = rowMeans(peak.counts)
    rowsToDelete = which(rowMeans2 == 0)
    if (length(rowsToDelete) > 0) {