summarize_features.R 7.68 KB
Newer Older
Konrad Zych's avatar
updates  
Konrad Zych committed
1 2 3 4 5 6
#!/usr/bin/Rscript
### SIAMCAT - Statistical Inference of Associations between
### Microbial Communities And host phenoTypes R flavor EMBL
### Heidelberg 2012-2018 GNU GPL 3.0

#' @title Summarize features
7
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
8
#' @description This function summarize features on a specific taxonomic level
9
#'
10 11
#' @usage summarize.features(siamcat, level = 'g__',
#'                     feature.type='original', verbose=1)
12
#'
Konrad Zych's avatar
updates  
Konrad Zych committed
13
#' @param siamcat object of class \link{siamcat-class}
14
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
15
#' @param level string, at which level to summarize (e.g. \code{g__} = genus)
16
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
17 18 19 20
#' @param feature.type string, on which type of features should the function
#'   work? Can be either \code{"original"}, \code{"filtered"}, or
#'   \code{"normalized"}. Please only change this paramter if you know what
#'   you are doing!
21
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
22 23 24
#' @param verbose integer, control output: \code{0} for no output at all,
#'     \code{1} for only information about progress and success, \code{2} for
#'     normal level of information and \code{3} for full debug information,
Konrad Zych's avatar
updates  
Konrad Zych committed
25
#'     defaults to \code{1}
26
#'
Konrad Zych's avatar
updates  
Konrad Zych committed
27
#' @keywords internal
28
#'
29
#' @export summarize.features
30
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
31 32 33 34
#' @details This function will summarize features at different taxonomic
#' levels, e.g. transform species-level relative abundance into genus-level
#' taxonomic profiles.
#'
35 36 37
#' The function expects a SIAMCAT object that either contains an entry in the
#' \link{phyloseq}{tax_table} slot of its \link{phyloseq} object, \strong{OR}
#' a set of feature names which encode taxonomic information, e.g.
Jakob Wirbel's avatar
Jakob Wirbel committed
38 39 40 41 42 43 44 45 46 47
#'
#' \code{k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Acidimicrobiales;..}
#'
#' Then, for a given taxonomic level (e.g. \code{g__}), the function will
#' sum up all the relative abundances of features belonging to the same group
#' at that specific taxonomic level.
#'
#' \strong{Please note that this function is currently maturing and not
#' necessarily reliable!!!}
#'
Jakob Wirbel's avatar
Jakob Wirbel committed
48
#' @return object of class \link{siamcat-class} with a summarized feature table
49
#'
Konrad Zych's avatar
updates  
Konrad Zych committed
50
#' @examples
Jakob Wirbel's avatar
Jakob Wirbel committed
51 52
#' ## load the phyloseq example data
#' data("GlobalPatterns")
53
#' ## create an example label
54
#' label <- create.label(meta=sample_data(GlobalPatterns),
55
#'     label = "SampleType",
Jakob Wirbel's avatar
Jakob Wirbel committed
56
#'     case = c("Freshwater", "Freshwater (creek)", "Ocean"))
57
#' # run the constructor function
58 59
#' siamcat <- siamcat(phyloseq=GlobalPatterns, label=label, verbose=1)
#' siamcat <- summarize.features(siamcat, level='Genus', verbose=3)
60
summarize.features <- function(siamcat, level = "g__",
Jakob Wirbel's avatar
Jakob Wirbel committed
61
    feature.type='original', verbose=1){
62 63 64 65 66 67 68

    if (verbose > 1) message("+ starting summarize.features")

    s.time <- proc.time()[3]

    if (verbose > 2) message("+++ summarizing on level: ",level)

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    if (!feature.type %in% c('original', 'filtered', 'normalized')){
        stop("Unrecognised feature type, exiting...\n")
    }
    # get the right features
    if (feature.type == 'original'){
        feat <- get.orig_feat.matrix(siamcat)
    } else if (feature.type == 'filtered'){
        if (is.null(filt_feat(siamcat, verbose=0))){
            stop('Features have not yet been filtered, exiting...\n')
        }
        feat <- get.filt_feat.matrix(siamcat)
    } else if (feature.type == 'normalized'){
        if (is.null(norm_feat(siamcat, verbose=0))){
            stop('Features have not yet been normalized, exiting...\n')
        }
        feat <- get.norm_feat.matrix(siamcat)
    }
86

87 88 89 90
    if (is.null(tax_table(physeq(siamcat), errorIfNULL = FALSE))){
        warning(paste0("Tax_table slot is empty! Will try to infer the tax",
            " table from the feature names"))
        # make sure that seperating characters are dots
91
        taxa.names <- make.names(rownames(feat))
92 93 94
        # TODO
        # checks and balances
        # check that all feature names are on the same taxonomic Level
Jakob Wirbel's avatar
Jakob Wirbel committed
95

96
        # check that desired level is in the names
97
        tax.table <- str_split(taxa.names,
98 99
            pattern='(\\.[a-z]__)|(^[a-z]__)',
            simplify = TRUE)[,-1]
100
        colnames(tax.table) <- str_extract_all(taxa.names[1],
101
            '[a-z]__')[[1]]
102
        rownames(tax.table) <- rownames(feat)
103 104 105 106
        tax_table(physeq(siamcat)) <- tax.table
    } else {
        tax.table <- tax_table(physeq(siamcat))
    }
Jakob Wirbel's avatar
Jakob Wirbel committed
107

108 109 110
    tax.table <- tax.table[rownames(feat),]
    tax.table <- tax.table@.Data
    # search for bins
Jakob Wirbel's avatar
Jakob Wirbel committed
111 112 113 114 115 116
    idx <- match(level, colnames(tax.table))
    if (is.na(idx)){
        stop('Level ', level, ' not found in the feature names. Exiting...')
    }
    # rename unclassified features
    tax.table[tax.table[,idx] == '', idx] <- 'unclassified'
117
    tax.table[is.na(tax.table[,idx]), idx] <- 'unclassified'
Jakob Wirbel's avatar
Jakob Wirbel committed
118
    bins.unique <- unique(tax.table[,idx])
119 120 121

    # make empty matrix
    summarized.feat <- matrix(NA, nrow=length(bins.unique), ncol=ncol(feat),
122
        dimnames=list(bins.unique, colnames(feat)))
123

124
    if (verbose > 1) pb <- progress_bar$new(total=length(bins.unique))
125
    for (x in bins.unique){
Jakob Wirbel's avatar
Jakob Wirbel committed
126
        summarized.feat[x,] <- colSums(feat[which(tax.table[,idx] == x),,
127
            drop=FALSE])
128
        if (verbose > 1) pb$tick()
Konrad Zych's avatar
updates  
Konrad Zych committed
129
    }
Konrad Zych's avatar
Konrad Zych committed
130

Jakob Wirbel's avatar
Jakob Wirbel committed
131
    if (verbose > 2) message("+++ summarized features table contains: ",
132
        nrow(summarized.feat)," features")
Jakob Wirbel's avatar
Jakob Wirbel committed
133 134

    if (feature.type == 'original'){
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
        # adjust tax table and entire phyloseq object for reduced feat table
        tax.table.red <- tax.table[,seq_len(idx), drop=FALSE]
        tax.table.red <- tax.table.red[tax.table.red[,idx]!='unclassified',]
        tax.table.red <- tax.table.red[!duplicated(tax.table.red),]
        if (nrow(tax.table.red) > (nrow(summarized.feat)-1)){
            warning(paste0("Tax table does not seem to be consistent in ",
                "all cases...\nWill be collapsed at level ", level))
            for (g in unique(tax.table.red[,idx])){
                temp <- tax.table.red[tax.table.red[,idx]==g,,drop=FALSE]
                if (nrow(temp)!=1){
                    tax.table.red <- tax.table.red[-which(
                        tax.table.red[,idx]==g),]
                    temp <- vapply(colnames(temp), FUN=function(x){
                        y=temp[,x]
                        if(length(unique(y))==1){
                            return(unique(y))
                        } else {
                            return(paste(y, collapse='|'))
                        }
                    }, FUN.VALUE=character(1))
                    tax.table.red <- rbind(tax.table.red, temp)
                }
            }
        }
        # add unclassified again
        tax.table.red <- rbind(tax.table.red, NA)
        tax.table.red[nrow(tax.table.red), idx] <- 'unclassified'
        rownames(tax.table.red) <- tax.table.red[,idx]
        tax.table.red <- tax.table.red[rownames(summarized.feat),]
        temp.phyloseq <- phyloseq(tax_table=tax_table(tax.table.red),
            otu_table=otu_table(summarized.feat, taxa_are_rows=TRUE))
        if (!is.null(sample_data(physeq(siamcat), errorIfNULL=FALSE))){
            sample_data(temp.phyloseq) <- sample_data(physeq(siamcat))
        }
        if (!is.null(phy_tree(physeq(siamcat), errorIfNULL=FALSE))){
            warning("Phylogenetic tree in original SIAMCAT object had ",
                "to be deleted...")
        }
        physeq(siamcat) <- temp.phyloseq
Jakob Wirbel's avatar
Jakob Wirbel committed
174
    } else if (feature.type == 'filtered'){
Jakob Wirbel's avatar
Jakob Wirbel committed
175
        filt_feat(siamcat) <- list(
Jakob Wirbel's avatar
Jakob Wirbel committed
176 177 178
            filt.feat=otu_table(summarized.feat,taxa_are_rows = TRUE),
            filt.param=filt_params(siamcat))
    } else if (feature.type == 'normalized'){
Jakob Wirbel's avatar
Jakob Wirbel committed
179
        norm_feat(siamcat) <- list(
Jakob Wirbel's avatar
Jakob Wirbel committed
180 181 182
            norm.feat=otu_table(summarized.feat,taxa_are_rows = TRUE),
            norm.param=norm_params(siamcat))
    }
183

184 185 186 187 188 189 190 191
    e.time <- proc.time()[3]
    if (verbose > 1)
    message(paste("+ finished summarize.features in",
        formatC(e.time - s.time, digits = 3), "s" ))

    if (verbose == 1)
        message("Summarized features successfully.")

192
    return(siamcat)
Konrad Zych's avatar
updates  
Konrad Zych committed
193
}