Commit 8b106a93 authored by Christian Arnold's avatar Christian Arnold

Fixed an issue with the linear model

parent 6d2e520e
......@@ -65,6 +65,8 @@ assertIntegerish(par.l$nBootstraps, len = 1)
par.l$file_input_sampleData = snakemake@config$samples$summaryFile
checkAndLogWarningsAndErrors(par.l$file_input_sampleData, checkFileExists(par.l$file_input_sampleData, access = "r"))
par.l$designFormulaVariableTypes = snakemake@config$par_general$designVariableTypes
par.l$designFormula = snakemake@config$par_general$designContrast
## LOG ##
assertList(snakemake@log, min.len = 1)
par.l$file_log = snakemake@log[[1]]
......@@ -119,23 +121,76 @@ if (par.l$nPermutations == 0 && par.l$nBootstraps < 1000) {
}
#############################
# CHECK FASTA AND BAM FILES #
# CHECK PARAMETER VALIDITY #
#############################
sampleData.df = read_tsv(par.l$file_input_sampleData, col_names = TRUE, col_types = cols())
# Check the sample table
designFormula = as.formula(par.l$designFormula)
formulaVariables = attr(terms(designFormula), "term.labels")
# Extract the variable that defines the contrast. Always the last element in the formula
variableToPermute = formulaVariables[length(formulaVariables)]
par.l$designFormulaVariableTypes = gsub(" ", "", par.l$designFormulaVariableTypes)
components = strsplit(par.l$designFormulaVariableTypes, ",")[[1]]
checkAndLogWarningsAndErrors(components, checkVector(components, len = length(formulaVariables)))
# Split further
components2 = strsplit(components, ":")
par.l$designFormulaVariableTypes = gsub(" ", "", par.l$designFormulaVariableTypes)
components = strsplit(par.l$designFormulaVariableTypes, ",")[[1]]
checkAndLogWarningsAndErrors(components, checkVector(components, len = length(formulaVariables)))
# Split further
components2 = strsplit(components, ":")
if (!all(sapply(components2,length) == 2)) {
message = "The parameter \"designVariableTypes\" has not been specified correctly. It must contain all the variables that appear in the parameter \"designContrast\". See the documentation for details"
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
components3 = unlist(lapply(components2, "[[", 1))
components3types = tolower(unlist(lapply(components2, "[[", 2)))
names(components3types) = components3
checkAndLogWarningsAndErrors(sort(formulaVariables), checkSetEqual(sort(formulaVariables), sort(components3)))
checkAndLogWarningsAndErrors(components3types, checkSubset(components3types, c("factor", "integer", "numeric", "logical")))
datatypeVariableToPermute = components3types[variableToPermute]
nDistValues = length(unique(sampleData.df$conditionSummary))
if (nDistValues != 2) {
if (datatypeVariableToPermute %in% c("integer", "numeric")) {
if (nDistValues < 3) {
message = paste0("The column 'conditionSummary' must contain at least 3 different values for the linear model, but ", nDistValues, " were found.")
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
} else {
if (nDistValues != 2) {
message = paste0("The column 'conditionSummary' must contain exactly 2 different values, but ", nDistValues, " were found.")
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
if (!testSubset(unique(sampleData.df$conditionSummary), conditionComparison)) {
}
if (!testSubset(unique(sampleData.df$conditionSummary), conditionComparison)) {
message = paste0("The elements specified in 'conditionComparison' in the config file must be a subset of the values in the column 'conditionSummary' in the sample file")
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
}
#############################
# CHECK FASTA AND BAM FILES #
#############################
# Build the fasta index. Requires write access to the folder where the fasta is stored (limitation of samtools faidx)
fastaIndex = paste0(fastaFile, ".fai")
......
......@@ -123,11 +123,6 @@ sampleData.df = read_tsv(par.l$file_input_sampleData, col_names = TRUE, col_type
checkAndLogWarningsAndErrors(colnames(sampleData.df), checkSubset(c("bamReads"), colnames(sampleData.df)))
conditionsVec = strsplit(par.l$conditionComparison, ",")[[1]]
if (!testSubset(sampleData.df$conditionSummary, conditionsVec)) {
message = "The parameter conditionComparison does not correspond to the sample summary table"
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
designFormula = as.formula(par.l$designFormula)
formulaVariables = attr(terms(designFormula), "term.labels")
......@@ -157,6 +152,8 @@ checkAndLogWarningsAndErrors(components3types, checkSubset(components3types, c("
datatypeVariableToPermute = components3types[variableToPermute]
# Read and modify samples metadata
sampleData.df = mutate(sampleData.df, name = file_path_sans_ext(basename(sampleData.df$bamReads)))
......@@ -180,21 +177,32 @@ for (colnameCur in names(components3types)) {
}
# Change the conditionSummary specifically and enforce the direction as specified in the config file
sampleData.df$conditionSummary = factor(sampleData.df$conditionSummary, levels = conditionsVec)
# If variable to permute is a factor, check that is has 2 levels
nLevels = length(unique(unlist(sampleData.df[,variableToPermute])))
if (datatypeVariableToPermute == "factor" & nLevels != 2) {
message = paste0("The variable ", variableToPermute, " was specified as a factor, but it does not have two different levels but instead ", nLevels, ".")
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
if (datatypeVariableToPermute %in% c("factor", "logical")) {
conditionsVec = strsplit(par.l$conditionComparison, ",")[[1]]
if (!testSubset(sampleData.df$conditionSummary, conditionsVec)) {
message = "The parameter conditionComparison does not correspond to the sample summary table"
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
# If variable to permute is a factor, check that is has 2 levels
nLevels = length(unique(unlist(sampleData.df[,variableToPermute])))
if (datatypeVariableToPermute == "factor" & nLevels != 2) {
message = paste0("The variable ", variableToPermute, " was specified as a factor, but it does not have two different levels but instead ", nLevels, ".")
checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
}
# Change the conditionSummary specifically and enforce the direction as specified in the config file
sampleData.df$conditionSummary = factor(sampleData.df$conditionSummary, levels = conditionsVec)
} else {
}
##############################
# ITERATE THROUGH PEAK FILES #
##############################
......@@ -361,7 +369,10 @@ cds.peaks.filt = cds.peaks[rowMeans(counts(cds.peaks)) > 0, ]
# model.matrix uses the first level in the specified column as reference, and so the corresponding column name and values are relative to that reference.
# That is, if the levels are "GMP" and "MPP", then all log2 fc will be the log2fc of MPP as compared to GMP.
# The levels have to be reversed because the first element is the one appering at the right of the plot, with positive values as. compared to the reference
# The levels have to be reversed because the first element is the one appearing at the right of the plot, with positive values as. compared to the reference
# TODO: Double-check because limma and DeSEQ are different: post vs pre and pre vs post (DESeq). This has a big influence in the final visualization!
# dds$condition <- relevel(dds$condition, ref = "untreated")
# limma: whatever comes first for model.matrix is taken as first value, then log2fc is of the second condition over the first
comparisonDESeq = rev(levels(sampleData.df$conditionSummary))
##############
......
......@@ -428,7 +428,7 @@ plotDiagnosticPlots <- function(dd, differentialResults, conditionComparison, fi
assertClass(dd, "DESeqDataSet")
assert(testClass(differentialResults, "MArrayLM"), testClass(differentialResults, "DESeqDataSet"))
assertVector(conditionComparison, len = 2)
assert(testNull(conditionComparison), testVector(conditionComparison, len = 2))
assert(checkNull(filename), checkDirectory(dirname(filename), access = "w"))
......@@ -438,7 +438,13 @@ plotDiagnosticPlots <- function(dd, differentialResults, conditionComparison, fi
}
if (testClass(differentialResults, "MArrayLM")) {
title = paste0("limma results\n", conditionComparison[1], " vs. ", conditionComparison[2])
if (!is.null(conditionComparison)) {
title = paste0("limma results\n", conditionComparison[1], " vs. ", conditionComparison[2])
} else {
title = paste0("limma results\n")
}
for (alphaCur in alpha) {
isSign = ifelse(p.adjust(differentialResults$p.value[,ncol(differentialResults$p.value)], method = "BH") < alphaCur,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment