Commit 957c54e9 authored by Sudeep Sahadevan's avatar Sudeep Sahadevan

R CMD check compatible version

parent de6e04e2
......@@ -10,15 +10,25 @@ Maintainer: Hentze bioinformatics team <biohentze@embl.de>
Description: Differential expression analysis of windows for next-generation sequencing data like eCLIP or iCLIP data.
Imports:
DESeq2,
BiocGenerics,
BiocParallel,
data.table,
GenomeInfoDb,
GenomicRanges,
methods,
S4Vectors,
SummarizedExperiment,
utils
Depends:
DESeq2,
BiocGenerics,
BiocParallel,
data.table,
GenomeInfoDb,
GenomicRanges,
methods,
S4Vectors,
SummarizedExperiment,
utils
Suggests:
knitr
......
# Generated by roxygen2: do not edit by hand
export(countGeneRegion)
export(extractRegions)
export(resultsDEWSeq)
export(toBED)
export(topWindowStats)
import(BiocParallel)
import(DESeq2)
import(S4Vectors)
importFrom(BiocGenerics,sort)
importFrom(GenomeInfoDb,sortSeqlevels)
importFrom(GenomicRanges,findOverlaps)
importFrom(GenomicRanges,makeGRangesFromDataFrame)
importFrom(GenomicRanges,reduce)
importFrom(S4Vectors,na.omit)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(data.table,fread)
importFrom(methods,is)
importFrom(stats,p.adjust)
importFrom(stats,pnorm)
importFrom(stats,pt)
importFrom(stats,qf)
importFrom(stats,terms)
importFrom(stats,terms.formula)
importFrom(utils,packageVersion)
importFrom(utils,read.table)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
......@@ -2,9 +2,16 @@
# Author: Sudeep Sahadevan, sudeep.sahadevan@embl.de
#' @export
#'
#' @importFrom BiocGenerics sort
#' @importFrom GenomeInfoDb sortSeqlevels
#' @importFrom GenomicRanges makeGRangesFromDataFrame reduce
#' @importFrom S4Vectors na.omit
#' @importFrom utils setTxtProgressBar txtProgressBar
#'
#' @title extract significant regions
#' @description extract significant windows from output of
#' \code{\link{results_DEWSeq}} using the supplied padj and
#' \code{\link{resultsDEWSeq}} using the supplied padj and
#' log2FoldChange cut-offs and merge these significant windows
#' to regions and create the following columns for each
#' significant region:
......@@ -43,7 +50,7 @@
#' }
#'
#' @param windowRes output data.frame from \code{\link{results_DEWSeq}}
#' @param windowRes output data.frame from \code{\link{resultsDEWSeq}}
#' @param padjCol name of the adjusted pvalue column (default: padj)
#' @param padjThresh threshold for p-adjusted value (default: 0.05)
#' @param log2FoldChangeCol name of the log2foldchange column (default: log2FoldChange)
......@@ -97,17 +104,17 @@ extractRegions <- function(windowRes,
message('There are no significant windows/regions under the current threshold!\nPlease lower your significance cut-off thresholds and manually check if there are any significant windows under the threshold')
return(NULL)
}
geneRange <- GenomicRanges::makeGRangesFromDataFrame(sigDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
geneRange <- makeGRangesFromDataFrame(sigDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
geneRange <- GenomeInfoDb::sortSeqlevels(geneRange)
geneRange <- BiocGenerics::sort(geneRange)
geneReduce <- GenomicRanges::reduce(geneRange,drop.empty.ranges=TRUE,with.revmap=TRUE,min.gapwidth=1)
geneRange <- sortSeqlevels(geneRange)
geneRange <- sort(geneRange)
geneReduce <- reduce(geneRange,drop.empty.ranges=TRUE,with.revmap=TRUE,min.gapwidth=1)
mcols(geneReduce)$regionStartId <- mcols(geneReduce)$gene_name <- mcols(geneReduce)$gene_type <- mcols(geneReduce)$gene_region <- 'Undefined'
mcols(geneReduce)$chromosome <- mcols(geneReduce)$unique_ids <- 'Undefined'
mcols(geneReduce)$windows_in_region <- mcols(geneReduce)$Nr_of_region <- mcols(geneReduce)$Total_nr_of_region <- mcols(geneReduce)$window_number <- 1
mcols(geneReduce)$padj_min <- mcols(geneReduce)$padj_max <- mcols(geneReduce)$padj_mean <- 1.0
mcols(geneReduce)$log2FoldChange_min <- mcols(geneReduce)$log2FoldChange_max <- mcols(geneReduce)$log2FoldChange_mean <- 0.0
pb <- utils::txtProgressBar(min=0,max=length(geneReduce),style = 3)
pb <- txtProgressBar(min=0,max=length(geneReduce),style = 3)
for(i in c(1:length(geneReduce))){
# values
mergeInd <- unlist(mcols(geneReduce)[i,1])
......@@ -129,7 +136,7 @@ extractRegions <- function(windowRes,
mcols(geneReduce)[i,'Total_nr_of_region'] <- mcols(geneRange)[min(mergeInd),'Total_nr_of_region']
mcols(geneReduce)[i,'window_number'] <- mcols(geneRange)[min(mergeInd),'window_number']
# progress
utils::setTxtProgressBar(pb=pb,value=i)
setTxtProgressBar(pb=pb,value=i)
}
close(pb)
regionRes <- as.data.frame(geneReduce)
......
# Function(s) to read in large files
# Author: Sudeep Sahadevan, sudeep.sahadevan@embl.de
#'
#' @importFrom BiocGenerics sort
#' @importFrom GenomeInfoDb sortSeqlevels
#' @importFrom GenomicRanges makeGRangesFromDataFrame reduce
#' @importFrom data.table fread
#' @importFrom utils read.table
#'
#' @title read annotation data
#' @description read annotation data for windows
#' The file MUST be tab separated and MUST have the following columns:\cr
......@@ -35,9 +42,9 @@
if(gzlen>0 && platform=='Windows'){
annTable <- read.table(gzfile(fname),sep="\t",stringsAsFactors=FALSE,header=TRUE)
}else if(gzlen>0){ # assuming that zcat binary is installed in Linux and Mac distributions
annTable <- data.table::fread(input=paste('zcat',fname),sep="\t",stringsAsFactors = FALSE,header=TRUE)
annTable <- fread(input=paste('zcat',fname),sep="\t",stringsAsFactors = FALSE,header=TRUE)
}else if (gzlen==0){
annTable <- data.table::fread(fname,sep="\t",stringsAsFactors = FALSE,header=TRUE)
annTable <- fread(fname,sep="\t",stringsAsFactors = FALSE,header=TRUE)
}
missingCols <- setdiff(neededCols,colnames(annTable))
if(length(missingCols)>0 & checkWindowNumber){
......@@ -78,10 +85,10 @@
}
if(is.null(uniqIds)){
if(asGRange){
gr <- GenomicRanges::makeGRangesFromDataFrame(annTable,seqnames.field='chromosome',start.field='begin',end.field='end',strand.field='strand',
gr <- makeGRangesFromDataFrame(annTable,seqnames.field='chromosome',start.field='begin',end.field='end',strand.field='strand',
ignore.strand=FALSE,keep.extra.columns=TRUE, starts.in.df.are.0based=begin0based)
gr <- GenomeInfoDb::sortSeqlevels(gr)
gr <- BiocGenerics::sort(gr)
gr <- sortSeqlevels(gr)
gr <- sort(gr)
rm(annTable)
gc()
return(gr)
......@@ -94,10 +101,10 @@
stop('There are no common unique ids between the input file and uniqueIds. Please check your data sets!')
}
if(asGRange){
gr <- GenomicRanges::makeGRangesFromDataFrame(annTable[commonIds,], seqnames.field='chromosome',start.field='begin',end.field='end',
gr <- makeGRangesFromDataFrame(annTable[commonIds,], seqnames.field='chromosome',start.field='begin',end.field='end',
strand.field='strand', ignore.strand=FALSE,keep.extra.columns=TRUE, starts.in.df.are.0based=begin0based)
gr <- GenomeInfoDb::sortSeqlevels(gr)
gr <- BiocGenerics::sort(gr)
gr <- sortSeqlevels(gr)
gr <- sort(gr)
rm(annTable)
gc()
return(gr)
......
#' @export
#'
#' @import BiocParallel DESeq2 S4Vectors
#'
#' @importFrom GenomicRanges findOverlaps
#' @importFrom methods is
#' @importFrom SummarizedExperiment assays colData
#' @importFrom S4Vectors na.omit
#' @importFrom stats p.adjust pnorm pt qf terms terms.formula
#' @importFrom utils packageVersion
#'
#' @title extract DEWseq results
#' @description This is a modified version of the
#' \code{\link[DESeq2:results]{results}} function.
......@@ -43,6 +53,7 @@
#' @param contrast this argument specifies what comparison to extract from the \code{object} to build a results table.
#' @param name the name of the individual effect (coefficient) for building a results table.
#' \code{name} argument is ignored if \code{contrast} is specified
#' @param listValues check \code{\link[DESeq2:results]{results}} for details of this parameter
#' @param cooksCutoff theshold on Cook's distance
#' @param test this is automatically detected internally if not provided.
#' @param addMLE if \code{betaPrior=TRUE} was used
......@@ -139,7 +150,7 @@ resultsDEWSeq <- function(object, annotationFile,contrast, name, listValues=c(1,
test=test, useT=useT, minmu=minmu)
} else if (parallel) {
# parallel execution
nworkers <- getNworkers(BPPARAM)
nworkers <- DESeq2:::getNworkers(BPPARAM)
idx <- factor(sort(rep(seq_len(nworkers),length.out=nrow(object))))
res <- do.call(rbind, bplapply(levels(idx), function(l) {
DESeq2:::cleanContrast(object[idx == l,,drop=FALSE], contrast,
......@@ -240,7 +251,7 @@ resultsDEWSeq <- function(object, annotationFile,contrast, name, listValues=c(1,
}
# calculate the number of windows that overlaps with each other by atleast 2 bp,
# this way, any intron/exon junctions will not be counted, but all ovelapping windows will be counted
resOvs <- GenomicRanges::findOverlaps(resGrange,minoverlap=1,drop.redundant=FALSE,drop.self=FALSE,ignore.strand=FALSE)
resOvs <- findOverlaps(resGrange,minoverlap=1,drop.redundant=FALSE,drop.self=FALSE,ignore.strand=FALSE)
nOvWindows <- as.data.frame(table(queryHits(resOvs)))[,2]
names(nOvWindows) <- rownames(mcols(resGrange))
nOvWindows <- nOvWindows[rownames(res)]
......
......@@ -3,10 +3,11 @@
#' @export
#' @title windows/regions to BED
#' @description given output of \code{\link{extractRegions}}, \code{\link({results_DEWSeq}} and significance thresholds, extract significant windows,
#' @description given output of \code{\link{extractRegions}}, \code{\link({resultsDEWSeq}} and significance thresholds, extract significant windows,
#' regions (merged neighboring significant windows) and create a BED file for visualization
#' @param windowRes output data.frame from \code{\link{results_DEWSeq}}
#' @param windowRes output data.frame from \code{\link{resultsDEWSeq}}
#' @param regionRes output data.frame from \code{\link{extractRegions}}
#' @param fileName filename to save BED output
#' @param padjCol name of the adjusted pvalue column (default: padj)
#' @param padjThresh threshold for p-adjusted value (default: 0.05)
#' @param log2FoldChangeCol name of the log2foldchange column (default: log2FoldChange)
......
#' @export
#'
#' @importFrom BiocGenerics sort
#' @importFrom GenomeInfoDb sortSeqlevels
#' @importFrom GenomicRanges makeGRangesFromDataFrame reduce
#' @importFrom S4Vectors na.omit
#' @importFrom utils setTxtProgressBar txtProgressBar
#'
#' @title stats for the top windows in each region
#' @description given window resutls and normalized counts, combine significant overlapping windows into regions and
#' for each region, pick two candidate winodws:
......@@ -50,7 +57,7 @@
#' \item the next columns will be normalized expression values of the meanWindow from individual treatment and control samples
#' }
#'
#' @param windowRes output data.frame from \code{\link{results_DEWSeq}}
#' @param windowRes output data.frame from \code{\link{resultsDEWSeq}}
#' @param padjCol name of the adjusted pvalue column (default: padj)
#' @param padjThresh threshold for p-adjusted value (default: 0.05)
#' @param log2FoldChangeCol name of the log2foldchange column (default: log2FoldChange)
......@@ -112,11 +119,11 @@ topWindowStats <- function(windowRes,padjCol='padj',padjThresh=0.05,log2FoldChan
stop('There are no common elements between significant windows/regions in under the current threshold and windows in normalizedCounts data.frame!\nPlease lower your significance cut-off thresholds and manually check if there are any significant windows under the threshold')
}
sigDat <- cbind(sigDat[commonids,],normalizedCounts[commonids,])
sigRange <- GenomicRanges::makeGRangesFromDataFrame(sigDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
sigRange <- makeGRangesFromDataFrame(sigDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
sigRange <- GenomeInfoDb::sortSeqlevels(sigRange)
sigRange <- BiocGenerics::sort(sigRange)
sigReduce <- GenomicRanges::reduce(sigRange,drop.empty.ranges=TRUE,with.revmap=TRUE,min.gapwidth=1)
sigRange <- sortSeqlevels(sigRange)
sigRange <- sort(sigRange)
sigReduce <- reduce(sigRange,drop.empty.ranges=TRUE,with.revmap=TRUE,min.gapwidth=1)
if(nrow(mcols(sigReduce))<1){
stop('Cannot find overlapping windows (regions) in input results!')
}
......@@ -134,7 +141,7 @@ topWindowStats <- function(windowRes,padjCol='padj',padjThresh=0.05,log2FoldChan
outDat$unique_id.meanWindow <- 'same window'
outDat[,c('begin.meanWindow','end.meanWindow',meanMean,meanWindowCols)] <- 0
# mean, log2foldchange comparison data.frame
pb <- utils::txtProgressBar(min=0,max=length(sigReduce),style = 3)
pb <- txtProgressBar(min=0,max=length(sigReduce),style = 3)
for(i in c(1:length(sigReduce))){
mergeInd <- sigReduce$revmap[[i]]
outDat[i,'regionStartId'] <- mcols(sigRange)[min(mergeInd),'unique_id']
......@@ -173,7 +180,7 @@ topWindowStats <- function(windowRes,padjCol='padj',padjThresh=0.05,log2FoldChan
outDat[i,meanWindowCols] <- unlist(mcols(sigRange)[mergeInd[meanInd],colnames(normalizedCounts)])
# fill out mean window data
# progress
utils::setTxtProgressBar(pb=pb,value=i)
setTxtProgressBar(pb=pb,value=i)
}
close(pb)
if(begin0based){ # return 0 based start positions
......
......@@ -9,7 +9,7 @@ extractRegions(windowRes, padjCol = "padj", padjThresh = 0.05,
begin0based = TRUE)
}
\arguments{
\item{windowRes}{output data.frame from \code{\link{results_DEWSeq}}}
\item{windowRes}{output data.frame from \code{\link{resultsDEWSeq}}}
\item{padjCol}{name of the adjusted pvalue column (default: padj)}
......@@ -26,7 +26,7 @@ data.frame
}
\description{
extract significant windows from output of
\code{\link{results_DEWSeq}} using the supplied padj and
\code{\link{resultsDEWSeq}} using the supplied padj and
log2FoldChange cut-offs and merge these significant windows
to regions and create the following columns for each
significant region:
......
......@@ -22,6 +22,8 @@ the file MUST be <TAB> separated. See Details for a description of the columns a
\item{name}{the name of the individual effect (coefficient) for building a results table.
\code{name} argument is ignored if \code{contrast} is specified}
\item{listValues}{check \code{\link[DESeq2:results]{results}} for details of this parameter}
\item{cooksCutoff}{theshold on Cook's distance}
\item{test}{this is automatically detected internally if not provided.}
......
......@@ -10,10 +10,12 @@ toBED(windowRes, regionRes, fileName, padjCol = "padj",
description = "sliding windows")
}
\arguments{
\item{windowRes}{output data.frame from \code{\link{results_DEWSeq}}}
\item{windowRes}{output data.frame from \code{\link{resultsDEWSeq}}}
\item{regionRes}{output data.frame from \code{\link{extractRegions}}}
\item{fileName}{filename to save BED output}
\item{padjCol}{name of the adjusted pvalue column (default: padj)}
\item{padjThresh}{threshold for p-adjusted value (default: 0.05)}
......@@ -27,6 +29,6 @@ toBED(windowRes, regionRes, fileName, padjCol = "padj",
\item{description}{description of this track, for visualization}
}
\description{
given output of \code{\link{extractRegions}}, \code{\link({results_DEWSeq}} and significance thresholds, extract significant windows,
given output of \code{\link{extractRegions}}, \code{\link({resultsDEWSeq}} and significance thresholds, extract significant windows,
regions (merged neighboring significant windows) and create a BED file for visualization
}
......@@ -10,7 +10,7 @@ topWindowStats(windowRes, padjCol = "padj", padjThresh = 0.05,
treatmentName = "treatment", controlName = "control", op = "max")
}
\arguments{
\item{windowRes}{output data.frame from \code{\link{results_DEWSeq}}}
\item{windowRes}{output data.frame from \code{\link{resultsDEWSeq}}}
\item{padjCol}{name of the adjusted pvalue column (default: padj)}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment