Commit 89cbe0eb authored by Sudeep Sahadevan's avatar Sudeep Sahadevan

cleaned up help annotations

parent f0f520a6
{
"activeTab" : 7
"activeTab" : 0
}
\ No newline at end of file
build-last-errors="[]"
build-last-errors-base-dir="~/workspace/DEWSeq/"
build-last-outputs="[{\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace'))\\n\\n\",\"type\":0},{\"output\":\"Updating DEWSeq documentation\\n\",\"type\":2},{\"output\":\"Writing NAMESPACE\\n\",\"type\":1},{\"output\":\"Loading DEWSeq\\n\",\"type\":2},{\"output\":\"Writing extractRegions.Rd\\nWriting results_DEWSeq.Rd\\nWriting topWindowStats.Rd\\nWriting NAMESPACE\\n\",\"type\":1},{\"output\":\"Warning message:\\ncountGeneRegion.Rd is missing name/title. Skipping \\n\",\"type\":2},{\"output\":\"Documentation completed\\n\\n\",\"type\":1},{\"output\":\"==> R CMD INSTALL --preclean --no-multiarch --with-keep.source DEWSeq\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/home/sahadeva/R/x86_64-pc-linux-gnu-library/3.6’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘DEWSeq’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"Warning: /home/sahadeva/workspace/DEWSeq/man/toBED.Rd:30: unexpected TEXT '(', expecting '{'\\n\",\"type\":2},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (DEWSeq)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
build-last-outputs="[{\"output\":\"==> devtools::document(roclets = c('rd', 'collate', 'namespace'))\\n\\n\",\"type\":0},{\"output\":\"Updating DEWSeq documentation\\n\",\"type\":2},{\"output\":\"Writing NAMESPACE\\n\",\"type\":1},{\"output\":\"Loading DEWSeq\\n\",\"type\":2},{\"output\":\"Writing NAMESPACE\\n\",\"type\":1},{\"output\":\"Warning message:\\ncountGeneRegion.Rd is missing name/title. Skipping \\n\",\"type\":2},{\"output\":\"Documentation completed\\n\\n\",\"type\":1},{\"output\":\"==> R CMD INSTALL --preclean --no-multiarch --with-keep.source DEWSeq\\n\\n\",\"type\":0},{\"output\":\"* installing to library ‘/home/sahadeva/R/x86_64-pc-linux-gnu-library/3.6’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘DEWSeq’ ...\\n\",\"type\":1},{\"output\":\"** using staged installation\\n\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"Warning: /home/sahadeva/workspace/DEWSeq/man/toBED.Rd:30: unexpected TEXT '(', expecting '{'\\n\",\"type\":2},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"** installing vignettes\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from temporary location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package can be loaded from final location\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** testing if installed package keeps a record of temporary installation path\\n\",\"type\":1},{\"output\":\"* DONE (DEWSeq)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
files.monitored-path=""
find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
......
{
"cursorPosition" : "15,18",
"cursorPosition" : "25,35",
"scrollLine" : "0"
}
\ No newline at end of file
......@@ -8,6 +8,7 @@
~%2Fworkspace%2FDEWSeq%2FDESCRIPTION="19FCEC81"
~%2Fworkspace%2FDEWSeq%2FNAMESPACE="283CE13C"
~%2Fworkspace%2FDEWSeq%2FR%2Fannotate.R="76AA8897"
~%2Fworkspace%2FDEWSeq%2FR%2FcountGeneRegion.R="D15B2889"
~%2Fworkspace%2FDEWSeq%2FR%2FcreateRegions.R="C3B9B5A7"
~%2Fworkspace%2FDEWSeq%2FR%2Fcreate_regions.R="382777EF"
~%2Fworkspace%2FDEWSeq%2FR%2Fextract.R="BF52E426"
......
......@@ -2,7 +2,7 @@
/home/sahadeva/hentze/projects/eCLIP/Gal3/analysis/RNAse_01/analysis/Gal3_RNAseq01_analysis.Rmd="9DE3ECE4"
/home/sahadeva/hentze/projects/eCLIP/Gal3/analysis/htseqclip2/R-files/Gal3_slidingWindow_w50_s20.Rmd="C1F852C9"
/home/sahadeva/workspace/DEWSeq/NAMESPACE="6F715446"
/home/sahadeva/workspace/DEWSeq/R/annotate.R="7A2667F5"
/home/sahadeva/workspace/DEWSeq/R/countGeneRegion.R="23FC5E9A"
/home/sahadeva/workspace/DEWSeq/R/readers.R="E867891D"
/home/sahadeva/workspace/DEWSeq/R/topWindow.R="CDA9F0A5"
/home/sahadeva/workspace/kruschke-doing_bayesian_data_analysis/2e/BernMetrop.R="D644F739"
......
......@@ -7,8 +7,7 @@ Authors@R: c(
person("Thomas", "Schwarzl", email="schwarzl@embl.de", role = c("aut", "ctb"))
)
Maintainer: Hentze bioinformatics team <biohentze@embl.de>
Description: Differential expression analysis of windows from eCLIP data
Use four spaces when indenting paragraphs within the Description.
Description: Differential expression analysis of windows for next-generation sequencing data like eCLIP or iCLIP data.
Imports:
DESeq2,
data.table,
......
# Generated by roxygen2: do not edit by hand
export(annotateResults)
export(countGeneRegion)
export(extractRegions)
export(results_DEWSeq)
......
#' @export
#' @description
#' for the given widow description file and annotation file,
#' find overlaps between the two in terms of chromosomal locations,
#' return the number of regions (3'UTR, 5'UTR, exon, intron, CDS ...) as a
#' as a count table
countGeneRegion <- function(regionRes,annotationFile,begin0based=TRUE,minOverlap=0.5){
requiredCols <- c('chromosome','regionStartId','region_begin','region_end','strand','gene_id','gene_name')
missingCols <- setdiff(requiredCols,colnames(regionRes))
if(length(missingCols)>0){
stop('Input data.frame is missing required columns, needed columns:
chromosome: chromosome name
regionStartId: unique id of the left most window of the region
begin: region start co-ordinate
end: region end co-ordinate
strand: strand
gene_id: gene id\n',
'Missing columns: ',paste(missingCols,collapse=", "),'')
}
regDat <- GenomicRanges::makeGRangesFromDataFrame(regionRes[,requiredCols],seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
regDat <- GenomeInfoDb::sortSeqlevels(regDat)
regDat <- BiocGenerics::sort(regDat)
annDat <- .readAnnotation(fname=annotationFile,uniqIds =NULL, asGRange=FALSE,checkWindowNumber=FALSE)
annDat <- GenomicRanges::makeGRangesFromDataFrame(annDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
annDat <- GenomeInfoDb::sortSeqlevels(annDat)
annDat <- BiocGenerics::sort(annDat)
resAnnOv <- GenomicRanges::findOverlaps(regDat,annDat)
ovRegion <- GenomicRanges::pintersect(regDat[queryHits(resAnnOv)],annDat[subjectHits(resAnnOv)],drop.nohit.ranges=TRUE)
selectedOV <- BiocGenerics::width(ovRegion)/BiocGenerics::width(regDat[queryHits(resAnnOv)])>=minOverlap
if(!any(selectedOV)){
stop('Cannot find overlaps between regionRes and annotationFile. Please lower minOverlap threshold and try again')
}
regionStartId <- mcols(regDat[queryHits(resAnnOv)[selectedOV]])[,'regionStartId']
geneId <- gsub('\\:.*$','',regionStartId)
outDat <- data.frame(mcols(annDat[subjectHits(resAnnOv)[selectedOV]])[,c('unique_id','gene_type','gene_name','gene_region')],regiontartId=regionStartId,gene_id=geneId)
return(outDat)
}
......@@ -124,44 +124,3 @@ extractRegions <- function(windowRes,padjCol='padj',padjThresh=0.05,log2FoldChan
rownames(regionRes) <- NULL
return(regionRes[with(regionRes,order(chromosome,region_begin)),])
}
#' @export
#' @description
#' for the given widow description file and annotation file,
#' find overlaps between the two in terms of chromosomal locations,
#' return the number of regions (3'UTR, 5'UTR, exon, intron, CDS ...) as a
#' as a count table
countGeneRegion <- function(regionRes,annotationFile,begin0based=TRUE,minOverlap=0.5){
requiredCols <- c('chromosome','regionStartId','region_begin','region_end','strand','gene_id','gene_name')
missingCols <- setdiff(requiredCols,colnames(regionRes))
if(length(missingCols)>0){
stop('Input data.frame is missing required columns, needed columns:
chromosome: chromosome name
regionStartId: unique id of the left most window of the region
begin: region start co-ordinate
end: region end co-ordinate
strand: strand
gene_id: gene id\n',
'Missing columns: ',paste(missingCols,collapse=", "),'')
}
regDat <- GenomicRanges::makeGRangesFromDataFrame(regionRes[,requiredCols],seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
regDat <- GenomeInfoDb::sortSeqlevels(regDat)
regDat <- BiocGenerics::sort(regDat)
annDat <- .readAnnotation(fname=annotationFile,uniqIds =NULL, asGRange=FALSE,checkWindowNumber=FALSE)
annDat <- GenomicRanges::makeGRangesFromDataFrame(annDat,seqnames.field = 'gene_id',start.field = 'begin',end.field = 'end',strand.field = 'strand',
ignore.strand=FALSE,starts.in.df.are.0based=begin0based,keep.extra.columns = TRUE)
annDat <- GenomeInfoDb::sortSeqlevels(annDat)
annDat <- BiocGenerics::sort(annDat)
resAnnOv <- GenomicRanges::findOverlaps(regDat,annDat)
ovRegion <- GenomicRanges::pintersect(regDat[queryHits(resAnnOv)],annDat[subjectHits(resAnnOv)],drop.nohit.ranges=TRUE)
selectedOV <- BiocGenerics::width(ovRegion)/BiocGenerics::width(regDat[queryHits(resAnnOv)])>=minOverlap
if(!any(selectedOV)){
stop('Cannot find overlaps between regionRes and annotationFile. Please lower minOverlap threshold and try again')
}
regionStartId <- mcols(regDat[queryHits(resAnnOv)[selectedOV]])[,'regionStartId']
geneId <- gsub('\\:.*$','',regionStartId)
outDat <- data.frame(mcols(annDat[subjectHits(resAnnOv)[selectedOV]])[,c('unique_id','gene_type','gene_name','gene_region')],regiontartId=regionStartId,gene_id=geneId)
return(outDat)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/annotate.R
\name{annotateResults}
\alias{annotateResults}
\title{annotate DESeq2 results}
\usage{
annotateResults(results, annotationFile)
}
\arguments{
\item{results}{DESeq2 results data.frame, from \code{\link{DESeq}}, \code{\link{results}}}
\item{annotationFile}{sliding window annotation file, can be plain either text or .gz file,
the file MUST be TAB separated, and MUST have the following columns:\cr
\code{chromosome} chromosome name \cr
\code{unique_id} unique id of the window \cr
\code{begin} window start co-ordinate \cr
\code{end} window end co-ordinate \cr
\code{strand} strand \cr
\code{gene_id} gene id \cr
\code{gene_name} gene name \cr
\code{gene_type} gene type annotation \cr
\code{gene_region} gene region \cr
\code{Nr_of_region} number of the current region \cr
\code{Total_nr_of_region} total number of regions \cr
\code{window_number} window number \cr}
}
\value{
data.frame
}
\description{
annotate DESeq2 results using annotation file from htseq-clip
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment