Commit ccb37018 authored by Sudeep Sahadevan's avatar Sudeep Sahadevan

unit tests for DESeqDataSetFromSlidingWindows

parent 5a7db256
......@@ -21,6 +21,7 @@ Depends:
Suggests:
knitr,
rmarkdown,
testthat,
BiocStyle
VignetteBuilder:
knitr
......
#' @export
#' @importFrom methods as
#' @importFrom methods as is
#' @importFrom BiocGenerics sort
#' @importFrom GenomeInfoDb sortSeqlevels
#' @importFrom GenomicRanges makeGRangesFromDataFrame
......@@ -12,10 +12,10 @@
#'
#' @param countData sliding window count data
#' @param colData phenotype data
#' @param annotObj can either be a data.frame or a file name, see details
#' @param design design of the experiment, see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}
#' @param tidy If TRUE, first column is of countData is treated as rownames (defalt: FALSE), see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}
#' @param ignoreRank ignore rank, see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}
#' @param annotObj can either be a data.frame or a file name, see details
#' @param start0based TRUE (default) or FALSE. If TRUE, then the start positions in \code{annotObj} is considered to be 0-based
#'
#' @details
......@@ -36,18 +36,27 @@
#' window_number: window number \cr
#'
#' @return DESeq object
DESeqDataSetFromSlidingWindows <- function(countData, colData, design,tidy=FALSE,ignoreRank=FALSE,annotObj, start0based=TRUE){
DESeqDataSetFromSlidingWindows <- function(countData, colData, annotObj, design, tidy=FALSE,ignoreRank=FALSE, start0based=TRUE){
stopifnot(ncol(countData) > 1)
if(is(countData,'data.table')){
countData <- data.frame(countData)
warning('countData is a data.table object, converting it to data.frame. First column will be used as rownames')
if(!tidy){
tidy <- TRUE
}
}
if (tidy) {
# this code is copied from DESeq2 source, credits to authors
rownms <- as.character(countData[,1])
countData <- countData[,-1,drop=FALSE]
rownames(countData) <- rownms
}else if(is.null(rownames(countData))){
stop('rownames of countData cannot be empty')
}
if(class(annotObj)=="character"){
if(is(annotObj,"character")){
annotData <- .readAnnotation(fname=annotObj,asGRange=FALSE,start0based=start0based)
}else if(class(annotObj)=="data.frame"){
}else if(is(annotObj,"data.frame")){
neededCols <- c('chromosome','unique_id','begin','end','strand','gene_id','gene_name','gene_type','gene_region','Nr_of_region',
'Total_nr_of_region','window_number')
missingCols <- setdiff(neededCols,colnames(annotObj))
......@@ -68,17 +77,17 @@ DESeqDataSetFromSlidingWindows <- function(countData, colData, design,tidy=FALSE
Missing columns:
',paste(missingCols,collapse=", "),'')
}
commonIds <- intersect(as.character(annotObj$unique_id),rownames(countData))
if(length(commonIds)==0){
stop('There are no common unique ids between the input file and uniqueIds. Please check your data sets!')
if(length(intersect(as.character(annotObj$unique_id),rownames(countData)))==0){
stop('There are no common unique ids between the countData and annotObj. Please check your data sets!')
}
annotData <- annotObj
rownames(annotData) <- annotData$unique_id
}else{
stop('annotObj MUST be a data.frame or character')
}
if(length(setdiff(rownames(countData),rownames(annotData)))>0){
warning('Cannot find chromosomal positions for all entries
in countData and these rows will be removed.
Please check your data sets!')
warning('Cannot find chromosomal positions for all entries in countData.
countData rows with missing annotation will be removed !')
}
commonIds <- intersect(rownames(annotData),rownames(countData))
countData <- countData[commonIds,]
......@@ -109,7 +118,7 @@ DESeqDataSetFromSlidingWindows <- function(countData, colData, design,tidy=FALSE
keep.extra.columns=TRUE,starts.in.df.are.0based=start0based)
gr <- sortSeqlevels(gr)
gr <- sort(gr)
countData <- as.matrix(countData[mcols(gr)$unique_id,])
countData <- as.matrix(countData[rownames(mcols(gr)),])
se <- SummarizedExperiment(assays = SimpleList(counts=countData),colData = colData,rowRanges=gr)
return(DESeqDataSet(se, design = design, ignoreRank))
}
......@@ -4,22 +4,22 @@
\alias{DESeqDataSetFromSlidingWindows}
\title{create DESeq data object}
\usage{
DESeqDataSetFromSlidingWindows(countData, colData, design, tidy = FALSE,
ignoreRank = FALSE, annotObj, start0based = TRUE)
DESeqDataSetFromSlidingWindows(countData, colData, annotObj, design,
tidy = FALSE, ignoreRank = FALSE, start0based = TRUE)
}
\arguments{
\item{countData}{sliding window count data}
\item{colData}{phenotype data}
\item{annotObj}{can either be a data.frame or a file name, see details}
\item{design}{design of the experiment, see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}}
\item{tidy}{If TRUE, first column is of countData is treated as rownames (defalt: FALSE), see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}}
\item{ignoreRank}{ignore rank, see \code{\link[DESeq2:DESeqDataSet]{DESeqDataSet}}}
\item{annotObj}{can either be a data.frame or a file name, see details}
\item{start0based}{TRUE (default) or FALSE. If TRUE, then the start positions in \code{annotObj} is considered to be 0-based}
}
\value{
......
context("DESeqDataSetFromSlidingWindows")
test_that("DESeqDataSetFromSlidingWindows throws proper errors during object construction", {
set.seed(1)
library(data.table)
phenoDat <- DataFrame(condition=factor(c('test','test','control','control')),row.names=c('S1','S2','S3','S4'))
testCols <- c('chromosome','unique_id','begin','end','strand','gene_id',
'gene_name','gene_type','gene_region','Nr_of_region',
'Total_nr_of_region','window_number')
annotDf <- data.frame(chromosome = paste('chr',c(1,1,3,3,5,10,15,20,'X','Y'),sep = ''),
unique_id = paste('uid',c(1:10),sep='_'), begin = c(100,124,212,215,986,763,3221,2311,1212,3322),
end=c(150,174,262,265,1036,813,3271,2361,1262,3372), strand=c('-', '+', '+', '+', '-', '-', '+', '+', '-', '-'),
gene_id=paste('gene',c(1,1,3,3,5,10,15,20,'X','Y'),sep = ''),gene_name=paste('gene',c(1,1,3,3,5,10,15,20,'X','Y'),sep = ''),
gene_type=c(rep('protein_coding',3),'rRNA',' miRNA',rep('lncRNA',2),'pseudogene','snoRNA','protein_coding'),
gene_region=c(rep('exon',5),rep('intron',5)),Nr_of_region=c(1,2,4,5,11,10,5,7,1,33),
Total_nr_of_region=c(6,7,9,10,16,15,10,12,6,38),window_number=c(287,614,145,329,487,855,851,630,498,858))
expect_error(DESeqDataSetFromSlidingWindows(matrix(0,10,4),phenoDat,annotDf),'rownames of countData cannot be empty')
expect_error(DESeqDataSetFromSlidingWindows(data.frame(matrix(0,10,4)),phenoDat,annotObj=matrix(0,10,4)),'annotObj MUST be a data.frame or character')
errMsg <- 'Input annotation file is missing required columns, needed columns:
chromosome: chromosome name
unique_id: unique id of the window
begin: window start co-ordinate
end: window end co-ordinate
strand: strand
gene_id: gene id
gene_name: gene name
gene_type: gene type annotation
gene_region: gene region
Nr_of_region: number of the current region
Total_nr_of_region: total number of regions
window_number: window number
Missing columns:
window_number'
expect_error(DESeqDataSetFromSlidingWindows(data.frame(matrix(0,10,4)),phenoDat,annotDf[,c(1:11)]),errMsg)
expect_error(DESeqDataSetFromSlidingWindows(data.frame(matrix(0,10,4)),phenoDat,annotDf),
'There are no common unique ids between the countData and annotObj. Please check your data sets!')
sampleData <- data.frame(matrix(rnbinom(48,10,0.8),12,4,dimnames=list(c(paste('uid',c(1:12),sep='_')),c('S1','S2','S3','S4'))))
warnMsg <- 'Cannot find chromosomal positions for all entries in countData.
countData rows with missing annotation will be removed !'
expect_warning(DESeqDataSetFromSlidingWindows(sampleData,phenoDat,annotDf,design = ~condition),warnMsg)
expect_warning(DESeqDataSetFromSlidingWindows(data.table(data.frame(uid=rownames(sampleData),sampleData))[c(1:10),],phenoDat,annotDf,design = ~condition),
'countData is a data.table object, converting it to data.frame. First column will be used as rownames')
})
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment