Commit bc432e8c authored by Bernd Klaus's avatar Bernd Klaus

added a grouped mutate operation as an alternative to the join

parent a54244ea
......@@ -18,3 +18,4 @@ Slides_stat_methods_bioinf/slides_data_handling_bioinf_cache
Slides_stat_methods_bioinf/slides_factor_ana_testing_ml_cache
Slides_stat_methods_bioinf/slides_graphics_bioinf_cache
Slides_stat_methods_bioinf/SRP022054
grouped_mutated.R
......@@ -13,11 +13,10 @@ print(date())
library("readxl")
library("BiocStyle")
library("knitr")
library("tidyverse")
library("matrixStats")
library("RColorBrewer")
library("stringr")
library("pheatmap")
library("matrixStats")
library("purrr")
library("fdrtool")
library("readr")
......@@ -34,6 +33,7 @@ library("openxlsx")
library("readxl")
library("limma")
library("ggthemes")
library("tidyverse")
theme_set(theme_solarized(base_size = 18))
......@@ -240,8 +240,8 @@ all(round(sc_qc_stats_wide$sum_perc) == 100)
sc_qc_stats_per_batch <- group_by(sc_qc_stats, type, batch)
sc_qc_mean_align_per_batch <- filter(summarize(sc_qc_stats_per_batch,
mean_align_rate = mean(percent)),
type == "concordantUniq")
mean_align_rate = mean(percent)),
type == "concordantUniq")
sc_qc_mean_align_per_batch
......@@ -296,16 +296,19 @@ head(summary_across_reps, 5)
augmented_table <- left_join(q_pcr, summary_across_reps )
head(augmented_table, 5)
## ----ex_high_qual, echo=FALSE, results="hide"----------------------------
## ----grouped_mutate------------------------------------------------------
augmented_table_mutate <- group_by(q_pcr, sample_name,
target_Name) %>%
mutate(mean = mean(C_T, na.rm = TRUE),
spread = abs(diff(C_T)))
head(augmented_table_mutate, 5)
hq_cells <- group_by(sc_qc_stats, type, percent) %>%
mutate(high_qual = percent > 60) %>%
## ----ex_high_qual, echo=FALSE, results="hide"----------------------------
hq_cells <- mutate(sc_qc_stats, high_qual = percent > 60) %>%
filter(type == "concordantUniq")
prop_hgh_qual <- group_by(hq_cells, type, percent, high_qual) %>%
filter(type == "concordantUniq") %>%
ungroup() %>%
dplyr::count(high_qual) %>%
prop_high_qual <- dplyr::count(hq_cells, high_qual) %>%
arrange(high_qual) %>%
mutate(perc_qual = n / sum(n))
......@@ -313,7 +316,6 @@ prop_hgh_qual <- group_by(hq_cells, type, percent, high_qual) %>%
## ----ex_high_qual_batch, echo=FALSE, results="hide"----------------------
group_by(hq_cells, batch) %>%
filter(type == "concordantUniq") %>%
dplyr::count(high_qual) %>%
mutate(perc_qual = n / sum(n)) %>%
filter(high_qual == TRUE) %>%
......
......@@ -5,7 +5,6 @@ date: "`r doc_date()`"
output:
BiocStyle::html_document:
toc: true
highlight: tango
self_contained: true
toc_float: false
code_download: true
......@@ -13,7 +12,6 @@ output:
toc_depth: 2
BiocStyle::pdf_document:
toc: true
highlight: tango
toc_depth: 2
bibliography: stat_methods_bioinf.bib
---
......@@ -51,11 +49,10 @@ print(date())
library("readxl")
library("BiocStyle")
library("knitr")
library("tidyverse")
library("matrixStats")
library("RColorBrewer")
library("stringr")
library("pheatmap")
library("matrixStats")
library("purrr")
library("fdrtool")
library("readr")
......@@ -72,6 +69,7 @@ library("openxlsx")
library("readxl")
library("limma")
library("ggthemes")
library("tidyverse")
theme_set(theme_solarized(base_size = 18))
......@@ -835,8 +833,8 @@ Let's look at the mean alignment rates per batch:
sc_qc_stats_per_batch <- group_by(sc_qc_stats, type, batch)
sc_qc_mean_align_per_batch <- filter(summarize(sc_qc_stats_per_batch,
mean_align_rate = mean(percent)),
type == "concordantUniq")
mean_align_rate = mean(percent)),
type == "concordantUniq")
sc_qc_mean_align_per_batch
```
......@@ -917,7 +915,8 @@ Often, one wants to combine two (or more) tables that contain related informatio
This is often the case after a summarization on grouped data.
Consider the following data table showing qPCR data for
various genes in two replicates. In quantitative real time PCR,
various genes (`target\_Name`) in various cell lines (`sample\_name`)
in two replicates. In quantitative real time PCR,
the complete PCR amplification process is monitored and a florescence intensity
is obtained for each cycle. A common way to reduce the data for the complete
process to a single number, is to give the number of times the fluorescence
......@@ -987,27 +986,39 @@ head(augmented_table, 5)
tell you which ones it has used, but you can always specify them manually
using the __by__ argument.
## Alternative: a grouped mutate
Instead of creating a summary of the data and then joining the summary to
the original table, we can also directly use the `mutate` function on
a grouped data set:
```{r grouped_mutate}
augmented_table_mutate <- group_by(q_pcr, sample_name,
target_Name) %>%
mutate(mean = mean(C_T, na.rm = TRUE),
spread = abs(diff(C_T)))
head(augmented_table_mutate, 5)
```
### Exercise: High quality cells
1. Assume a you want to retain only high quality cells defined by an
percentage of greater than 60% of uniquely and concordantly mapping
read pairs.
Which proportion of cells are of high quality? Use grouping, filtering
Which proportion of cells are of high quality? Use filtering
and the function `count()` to answer this. Make sure to ungroup the
data set before using the function `count()`!
```{r ex_high_qual, echo=FALSE, results="hide"}
hq_cells <- group_by(sc_qc_stats, type, percent) %>%
mutate(high_qual = percent > 60) %>%
hq_cells <- mutate(sc_qc_stats, high_qual = percent > 60) %>%
filter(type == "concordantUniq")
prop_hgh_qual <- group_by(hq_cells, type, percent, high_qual) %>%
filter(type == "concordantUniq") %>%
ungroup() %>%
dplyr::count(high_qual) %>%
prop_high_qual <- dplyr::count(hq_cells, high_qual) %>%
arrange(high_qual) %>%
mutate(perc_qual = n / sum(n))
......@@ -1019,7 +1030,6 @@ prop_hgh_qual <- group_by(hq_cells, type, percent, high_qual) %>%
```{r ex_high_qual_batch, echo=FALSE, results="hide"}
group_by(hq_cells, batch) %>%
filter(type == "concordantUniq") %>%
dplyr::count(high_qual) %>%
mutate(perc_qual = n / sum(n)) %>%
filter(high_qual == TRUE) %>%
......
This source diff could not be displayed because it is too large. You can view the blob instead.
No preview for this file type
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment