Commit eccf630e authored by Lars Velten's avatar Lars Velten
Browse files

Initial commit

parents
Pipeline #7900 failed with stages
in 40 seconds
Package: RNAMagnet
Type: Package
Title: Infers physical and signaling interactions from single-cell RNA-seq data
Version: 0.1.0
Author: Lars Velten [aut,cre], Chiara Baccin [ctb], Julia Schnell [ctb]
Maintainer: Lars Velten <lars.velten@embl.de>
Description: RNAMagnet uses a curated list of ligand-receptor pairs
to predict interactions between cell types identified by single-cell
RNA-seq.
License: GPLv3
Encoding: UTF-8
LazyData: true
Imports:
Seurat,
Matrix,
Rmagic,
igraph,
e1071,
parallel,
reshape2,
plyr
Suggests:
rmarkdown,
knitr,
pheatmap,
ggplot2,
DESeq2
RoxygenNote: 6.1.0
VignetteBuilder: knitr
exportPattern("^[[:alpha:]]+")
CIBERSORT <- function(response,features, transform, usegenes, norm=T, nu= c(0.25,0.5,0.75), optim.nu =F, mc.cores=3, ...) {
if (norm) features <- apply(features, 2, function(x) x / mean(x) * mean(response))
response <- transform(response) -> allresponse
response <- response[usegenes]
allresponse <- allresponse[intersect(rownames(features),names(allresponse))]
features <- transform(features[usegenes, ])
#
# features <- apply(features,2,scale)
# response <- scale(response)
#proper way to choose nu
#set up CV scheme
if (optim.nu & length(nu)>1) {
cvorder <- sample(1:nrow(features), nrow(features))
cvset <- rep(c(1:5), length.out = nrow(features))
nuerror <- sapply(nu, function(n) {
tuning <- do.call(rbind,parallel::mclapply(1:5, function(i) {
train <- cvorder[cvset != i]
test <- cvorder[cvset == i]
svr <- e1071::svm(features[train,], y = response[train], type="nu-regression", kernel="linear", nu = n, ...)
out <- data.frame(real = response[test], predicted = e1071:::predict.svm(svr, newdata = features[test,]))
}, mc.cores=mc.cores))
sqrt(mean((tuning$predicted - tuning$real) ^2))
})
nu <- nu[which.min(nuerror)]
}
SVR <- parallel::mclapply(nu, function(n) e1071::svm(features, y = response, type="nu-regression", kernel="linear", nu = n, ...))
get_RMSE <- function(SVR, truey) {
predicted <- e1071:::predict.svm(SVR)
sqrt(mean((predicted - truey) ^2))
}
RMSE <- sapply(SVR, get_RMSE, truey = response)
SVR <- SVR[[which.min(RMSE)]]
coef <- t(SVR$coefs) %*% SVR$SV
coef[coef<0] <- 0
#P value estimation.
predicted <- e1071:::predict.svm(SVR)
test_statistics <- cor(predicted , response)
#CIBERSORT is then run on m*i to produce a vector of estimated cellular fractions, f*i. CIBERSORT determines the correlation coefficient R*i between the random mixture m*i and the reconstituted mixture, f*i ?? B. This process is repeated for I iterations (I = 500 in this work) to produce R*.
out <- t(coef/sum(coef))
attr(out, "p") <- cor.test(predicted , response)$p.value
attr(out, "SV") <- SVR$SV
out
}
#'Runs CIBERSORT for decomposing bulk RNA-seq samples using a single cell reference
#'
#' This is a custom implementation of the algorithm described by Newman et al (Nautre Methods 12:453-457). CIBERSORT is an algorithm for estimating the cell type composition of a bulk sample, given a gene expression profile of the sample and a known gene expression profile for each cell type potentially contributing to the sample.
#'@param exprs A data frame or matrix of raw read counts of \epmh{bulk} RNA-seq samples. Column names correspond to sample names, row names to genes.
#'@param base A matrix of read counts representing the gene expression profiles of all cell types that might contribute to the bulk samples. See examples below for how to generate this object from an object of class \code{seurat}.
#'@param design A named vector assigning sample names to sample class, see examples below.
#'@param markergenes A vector of genes to be included in the analysis, defaults to \code{intersect( rownames(mean_by_cluster), rownames(exprs) )}
#'@param transform A function to be applied to columns of \code{exprs} and \{base} following normalization. Defaults to no transformation since bulk RNA-seq profiles are generated by pooling up RNA from constituent cell types. In the original CIBERSORT paper, a logarithmic transform was used.
#'@param nu Different values of nu to evaluate support vector regression at, see \code{\link[e1071]svm}. Nu defines how many support vectors (i.e. genes) to use in regression.
#'@param optim.nu In the original CIBERSORT implementation, SVR is evaluated at several values of nu and the value with the best RSME is chosen. This can lead to overfitting. If \code{optim.nu} is set to \code{TRUE}, the value for nu is chosen by cross validation, which leads to longer runtimes.
#'@param mc.cores Number of cores used, e.g. for the parallel evaluation at different balues of nu.
#'@param ... Parameters passed to \code{\link[e1071]svm}
#'@return
#'@examples
#'\dontrun{
#'#See also package vignette CIBERORT.Rmd
#'#1. identify marker genes from a seurat object
#'NicheMarkers10x <- FindAllMarkers(NicheData10x, test.use = "roc")
#'usegenes <- unique(NicheMarkers10x$gene[(NicheMarkers10x$myAUC > 0.8 |NicheMarkers10x$myAUC < 0.2) ])
#'
#'#2. compute mean expression per cell type
#'mean_by_cluster <- do.call(cbind, lapply(unique(NicheData10x@ident), function(x) {
#'apply(NicheData10x@raw.data[usegenes,NicheData10x@cell.names][,NicheData10x@ident == x], 1,mean )
#'}))
#'colnames(mean_by_cluster) <- unique(NicheData10x@ident)
#'
#'#3. Create a vector that maps samples to biological class
#'LCM_design <- NicheMetaDataLCM$biological.class
#'names(LCM_design) <- NicheMetaDataLCM$id
#'
#'4. Run CIBERSORT
#'CIBER <- runCIBERSORT(NicheDataLCM, mean_by_cluster,usegenes, LCM_design, mc.cores=3)
#'}
#'@export
runCIBERSORT <- function(exprs, base,design, markergenes = intersect( rownames(mean_by_cluster), rownames(exprs) ),transform=function(x) x,nu = c(0.25,0.5,0.75), optim.nu = F, mc.cores= 3, ...) {
res <- list()
for (i in 1:ncol(exprs)) {
x <- exprs[,i]
names(x) <- rownames(exprs)
res[[i]] <- CIBERSORT(x, features=base, transform=transform, usegenes = intersect(markergenes, rownames(exprs)), nu=nu, optim.nu = optim.nu, mc.cores = mc.cores, ...)
}
#out <- apply(exprs,2, CIBERSORT, features=mean_by_cluster, kernel=kernel, cost =cost, method = method,alpha=alpha, gamma=gamma, transform=transform, usegenes = intersect(markergenes, rownames(exprs)), norm=norm, nu=nu)
pvals <- data.frame(
pvals = sapply(res, attr, "p"),
samples = colnames(exprs)
)
svs <- lapply(res, attr, "SV")
out <- do.call(cbind,res)
colnames(out) <- colnames(exprs)
rownames(out) <- colnames(mean_by_cluster)
out <- reshape2::melt(out)
out$experiment <- design[out$Var2]
colnames(out) <- c("CellType","SampleID","Fraction","SampleClass")
out
}
plotCIBER <- function(ciber,nrow=NULL) {
ggplot(aes(x = Var1, y = value, fill=Var1), data=ciber) + geom_bar(stat="summary", fun.y=mean) + scale_x_discrete(labels = annos) + scale_fill_manual(values=colors, labels=annos, guide=F) + facet_wrap(~experiment,nrow=nrow) + geom_errorbar(stat="summary",fun.ymin = function(x) mean(x)-sd(x), fun.ymax = function(x) mean(x)+sd(x), width=0.2) + theme(axis.text.x = element_text(angle=90, size=8)) + ylab("% of population") + xlab("")
}
get_sample <- function(runid, usegenes,npop=10,ncell=1000, replicates = 1, kernel ="radial",cost = 1, transform="log2", method="SVR",alpha=0.5, gamma=NULL, nu = c(0.25,0.5,0.75)){
raw <- seurat@raw.data[,seurat@cell.names]
#if (transform == "log2") transform <- log2p1 else transform <- lin
what.to.sample <- sample(as.character(unique(seurat@ident)), npop)
fractions <- rdirichlet(1, rep(1,npop))
fractions <- round(ncell*rdirichlet(1, rep(1,npop)))
out <- replicate(replicates, {
gex <- lapply(1:length(what.to.sample), function(x) {
use <- sample(seurat@cell.names[seurat@ident == what.to.sample[x]], fractions[x], replace=T)
if (length(use)==1) raw[,use] else apply(raw[,use],1,sum)
})
gex <- do.call(cbind,gex)
gex <- apply(gex,1,sum)
# all <- rep(0, length(unique(seurat@ident)) )
# names(all ) <- unique(seurat@ident)
# all[what.to.sample] <- fractions
#
# gex <- mean_by_cluster %*% all
# gex <- gex[,1]
usegenes <- intersect(usegenes, names(gex))#,size = ngene)
#features <- transform(mean_by_cluster[usegenes, ])
test_ciber <- CIBERSORT(gex, mean_by_cluster, kernel, cost, transform, usegenes,method = method,alpha=alpha, gamma=gamma, nu=nu)
})
all <- rep(0, length(unique(seurat@ident)) )
names(all ) <- unique(seurat@ident)
all[what.to.sample] <- fractions/ncell
test_ciber <- apply(out,1,mean)
if (method == "SVR") merged <- data.frame(truth = all, result = test_ciber, cluster = names(all),runid = runid) else merged <- data.frame(truth = all, result = test_ciber, cluster = names(all),runid = runid)
merged
}
runSampling <- function(usegenes, nsamples=15,npop=10,replicates=1,ncell=1000, kernel ="radial",cost = 1, transform="log2", method = "SVR",alpha=0.5, gamma = NULL, nu=c(0.25,0.5,0.75)) {
samples <- lapply(1:nsamples, get_sample, usegenes=usegenes, npop=npop,ncell=ncell, replicates=replicates,kernel =kernel,cost = cost, transform=transform, method=method, alpha=alpha, gamma=gamma, nu=nu)
rsqrs <- sapply(samples, function(x) cor(x$truth,x$result)^2)
rho <- sapply(samples, function(x) cor(x$truth,x$result,method="spearman"))
out <- do.call(rbind, samples)
out$gamma <- gamma; out$npop <- npop
list(rsqr = mean(rsqrs), rho = mean(rho), samples = out, allrho = rho, allrsqrs = rsqrs)
}
get_sample_fixed <- function(runid, populations,frequencies,usegenes,ncell=1000, kernel ="radial",cost = 1, transform="log2", method="SVR",alpha=0.5, gamma=NULL){
raw <- as.matrix(raw)
# if (transform == "log2") transform <- log2p1 else transform <- lin
what.to.sample <- populations
fractions <- round(ncell*frequencies)
gex <- lapply(1:length(what.to.sample), function(x) {
use <- sample(cell.names[ident == what.to.sample[x]], fractions[x], replace=T)
if (length(use)==1) raw[,use] else apply(raw[,use],1,sum)
})
gex <- do.call(cbind,gex)
gex <- apply(gex,1,sum)
all <- rep(0, length(unique(ident)) )
names(all ) <- unique(ident)
all[what.to.sample] <- fractions/ncell
# all <- rep(0, length(unique(seurat@ident)) )
# names(all ) <- unique(seurat@ident)
# all[what.to.sample] <- fractions
#
# gex <- mean_by_cluster %*% all
# gex <- gex[,1]
usegenes <- intersect(usegenes, names(gex))#,size = ngene)
# mean_by_cluster <- apply(mean_by_cluster, 2, function(x) x / mean(x) * mean(gex))
# features <- transform(mean_by_cluster[usegenes, ])
# test_ciber <- CIBERSORT(transform(gex[usegenes]), features, kernel, cost, method = method,alpha=alpha)
test_ciber <- CIBERSORT(gex, mean_by_cluster, kernel, cost, transform = transform, usegenes = usegenes, method = method,alpha=alpha, gamma = gamma)
if (method == "SVR") merged <- data.frame(truth = all, result = test_ciber[,1], cluster = names(all),runid = runid, stringsAsFactors = F) else merged <- data.frame(truth = all, result = test_ciber, cluster = names(all),runid = runid, stringsAsFactors = F)
merged
}
#'Runs RNAMagnet for identifying localization of single cells to anchor populations
#'
#' RNAMagnet comes in two flavors: \code{RNAMagnetAnchors} and \code{\link{RNAMagnetSignaling}}. This function is meant to identify, for each single cell from a \code{\link[Seurat]{seurat}} object, the propensity to physically adhere to a set of anchor populations. For example, this function can identify if a single cell is more likely to bind to arteriolar or sinusoidal vessels.
#'@param seurat An object of class \code{\link[Seurat]{seurat}} containing a valid clustering and t-SNE information. For information on how to create such an object, see https://satijalab.org/seurat/get_started.html
#'@param anchors A character vector of anchor populations. Entries must be levels of \code{seurat@@ident}
#'@param return Determines object to return; one of "summary" or "rnamagnet-class"
#'@param neighborhood.distance See detail
#'@param neighborhood.gradient See detail
#'@param ... For explanation of all further parameters, see \code{\link{RNAMagnetBase}}.
#'@return Returns a data frame containing, for each cell, the propensity to physically interact with the various anchor populations as well as an overall adhesiveness score and prefered interaction partner. Alternatively, if \code{return} ist set to \code{rnamagnet-class}, an object of class \code{\link{rnamagnet}}.
#'@details Highly similar cell types can localize to different physical structures. For example, one type of pericyte may localize to sinusoids and another type may localize to arterioles. To increase the resolution in this scenario, \code{RNAMagnetAnchors} for each pair of single cells and anchor populations therefore computes a specificity score that describes how the single cell differs from \emph{similar} cells.
#'@details In our hands, the behavior of RNAMagnet is largely insensitive to the parameters \code{neighborhood.distance} and \code{neighborhood.gradient} that define what actually constitutes a similar cell. To explore how these parameters affect the weight each single cell carries in specificity score computation, see the example code.
#'@examples
#'\dontrun{
#'result <- RNAMagnetAnchors(NicheData10x, anchors = c("Sinusoidal ECs","Arteriolar ECs","Smooth muscle","Osteoblasts"))
#'qplot(x =NicheData10x@dr$tsne@cell.embeddings[,1], y=NicheData10x@dr$tsne@cell.embeddings[,2], \
#' color = direction,size=I(0.75),alpha= adhesiveness,data=result) + \
#' scale_color_brewer(name = "RNAMagnet\nLocation",palette= "Set1") + \
#' scale_alpha_continuous(name = "RNAMagnet\nAdhesiveness")
#'
#'#To understand the effect of the neighborhood.distance and neighborhood.gradient parameters
#'#consider the following snippet:
#'myMagnet <- RNAMagnetAnchors(NicheData10x, return = "rnamagnet-class", \
#' anchors = c("Sinusoidal ECs","Arteriolar ECs","Smooth muscle","Osteoblasts"))
#'use <- 1234 #select some cell of interest
#'kernel <- function(x,k=10, x0=0.5) 1/(1+exp(-k * (x-x0))) #defines weighing function
#'plf <- data.frame(x = seurat@dr$tsne@cell.embeddings[,1],y = seurat@dr$tsne@cell.embeddings[,2], \
#' weight = 1-kernel(dbig[use,],k=neighborhood.gradient,x0=neighborhood.distance))
#'qplot(x = x, y= y, color = scores, data=plf) + \
#' scale_color_gradientn(name = "Weight in local neighborhood", colours = c("#EEEEEE","#999999","blue","red")) + \
#' geom_point(color = "black", shape = 17, size= 3, data=plf[use,])
#'}
#'@export
RNAMagnetAnchors <- function(seurat, anchors, return = "summary", neighborhood.distance = 0.7, neighborhood.gradient = 3, .k = 10, .x0 = 0.5, .minExpression = 0, .version = "latest", .cellularCompartment = c("Membrane","ECM","Both"), .manualAnnotation = "Correct" ) {
myMagnet <- RNAMagnetBase(seurat, anchors, neighborhood.distance,neighborhood.gradient, .k, .x0, .minExpression, .version, .cellularCompartment, .manualAnnotation,TRUE)
if (return=="rnamagnet-class") myMagnet else data.frame(direction = as.factor(colnames(myMagnet@specificity)[apply(myMagnet@specificity,1,which.max)]), adhesiveness = myMagnet@adhesiveness, myMagnet@specificity[,anchors])
}
#'Runs RNAMagnet for identifying signaling interactions between cells
#'
#' RNAMagnet comes in two flavors: \code{RNAMagnetSignaling} and \code{\link{RNAMagnetAnchors}}. This function is meant to identify, for each cell type from a \code{\link[Seurat]{seurat}} object, potential signaling interactions with other cell types.
#'@param seurat An object of class \code{\link[Seurat]{seurat}} containing a valid clustering and t-SNE information. For information on how to create such an object, see https://satijalab.org/seurat/get_started.html
#'@param return Determines object to return; one of "summary" or "rnamagnet-class"
#'@param ... For explanation of all further parameters, see \code{\link{RNAMagnetBase}}.
#'@return Returns an objects of class \code{\link{rnamagnet}}. \code{\link{plotNetwork}} or \code{\link{plotInteraction}} can be used for further analyses.
#'@export
RNAMagnetSignaling <- function(seurat, return = "summary", neighborhood.distance = NULL, neighborhood.gradient = NULL, .k = 10, .x0 = 0.5, .minExpression = 10, .version = "latest", .cellularCompartment = c("Secreted","Both"), .manualAnnotation = "Correct" ) {
RNAMagnetBase(seurat, anchors = NULL, neighborhood.distance,neighborhood.gradient, .k, .x0, .minExpression, .version, .cellularCompartment, .manualAnnotation, FALSE)
}
#'Low-level function to run core RNA magnet steps
#'
#' Users are advised to use the top-level functions \code{\link{RNAMagnetAnchors}} and \code{\link{RNAMagnetSignaling}} which appropriately set default parameters and return user-friendly return values.
#' This is a low level function for development purposes.
#'@param seurat An object of class \code{\link[Seurat]{seurat}} containing a valid clustering and t-SNE information. For information on how to create such an object, see https://satijalab.org/seurat/get_started.html
#'@param anchors A character vector of anchor populations. Entries must be levels of \code{seurat@@ident}. If \code{NULL}: All entries of \code{seurat@@ident} are used as anchors.
#'@param neighborhood.distance See \code{\link{RNAMagnetAnchors}}
#'@param neighborhood.gradient See \code{\link{RNAMagnetAnchors}}
#'@param .k Fuzzification parameter, see detail. Recommended to leave at the default value.
#'@param .x0 Fuzzification parameter, see detail. Recommended to leave at the default value.
#'@param .minExpression Minimal expression level of genes to be included, specified as the number of cells in the dataset that express the gene.
#'@param .version The version of the underlying ligand-receptor database. Recommended to leave at the default value. See \code{\link{getLigandsReceptors}}.
#'@param .cellularCompartment Types of ligands to be included. For physical interactions, defaults to \code{ c("Membrane","ECM","Both")}. See \code{\link{getLigandsReceptors}}.
#'@param .manualAnnotation Annotation status of ligands to be included. Default to \code{"Correct"}. See \code{\link{getLigandsReceptors}}.
#'@param .symmetric Assume that if A is a receptor for B, B is also a receptor for A
#'@details The algorithm takes the following steps: \enumerate{
#'\item Ligand-receptor pairs are selected based on the parameters \code{.version}, \code{.cellularCompartment} and \code{.manualAnnotation}. Choice of \code{.cellularCompartment} is crucial for determining the algorithm's behavior, e.g. if set to \code{c("Secreted","Both")}, paracrine signaling interactions involving soluble ligands are investigated.
#'\item Dropout values in the expression levels of ligands and receptors are imputed using \code{\link[Rmagic]{magic}}
#'\item Mean expression level of ligands and receptors is computed for all anchor populations
#'\item For each cell or anchor population, the expression of each ligand and receptor is encoded as a fuzzy logic variable
#'\item Fuzzy logic AND is used to compute probabilities for a given interaction to be active between a single cell and an anchor population
#'\item An interaction score is computed as the sum of interaction probabilities across all possible ligand-receptor pairs
#'\item Specificty scores are computed by comparing interaction scores to average interaction scores in a local neighborhood.
#'}
#'@details Add the methods section of the paper here!
#'@return Returns an object of class \code{\link{rnamagnet}}
#'@export
RNAMagnetBase <- function(seurat, anchors=NULL,neighborhood.distance=NULL, neighborhood.gradient =NULL, .k = 10, .x0 = 0.5, .minExpression, .version = "latest", .cellularCompartment, .manualAnnotation = "Correct", .symmetric = F) {
cat("Setting everything up...\n")
if (is.null(anchors)) anchors <- as.character(unique(seurat@ident))
out <- new("rnamagnet", celltype = seurat@ident, params = list("neighborhood.distance"=neighborhood.distance, "neighborhood.gradient" =neighborhood.gradient, ".k" = .k, ".x0" = .x0, ".minExpression" = .minExpression, ".cellularCompartment" = .cellularCompartment, ".manualAnnotation" = .manualAnnotation, ".symmetric" = .symmetric))
#compute cell-cell similarity
out@similarity <- as.matrix(1-cor(t(seurat@dr$pca@cell.embeddings[,1:15])))
#prepare database
ligrec <- getLigandsReceptors(.version, .cellularCompartment, .manualAnnotation)
if (.symmetric) ligrec <- makeSymmetric(ligrec) #for physical interactions: i a binds b, b binds a.
#prepare genes included into MAGIC
filteredGenes <- rownames(seurat@raw.data)[apply(seurat@raw.data[,seurat@cell.names] >0 ,1,sum) > .minExpression]
genes <- unique(c(ligrec$Receptor.Mouse,
ligrec$Ligand.Mouse))
genes <- genes[sapply(genes, function(x) {
entries <- strsplit(x, "[&|]")
if (grepl("&", x))
all(entries[[1]] %in% filteredGenes)
else any(entries[[1]] %in% filteredGenes)
})]
genes_formagic <- unlist(strsplit( genes,"[&|]"))
formagic <- Matrix::t(seurat@raw.data[,seurat@cell.names])
formagic <- Rmagic::library.size.normalize(formagic)
formagic <- sqrt(formagic)
#Run MAGIC
cat("Now running MAGIC to impute dropout values...\n")
mymagic <- Rmagic::magic(formagic, genes = genes_formagic, seed = 0xbeef)
mymagic <- as.data.frame(mymagic)
#handle expression levels of heterodimers
resolvedRawData <- resolveData(t(mymagic), genes)
resolvedRawData <- t(apply(resolvedRawData, 1, function(x) (x - min(x )) / (max(x) - min(x)) ))
annotated_genes <- rownames(resolvedRawData)
out@mylr <- subset(ligrec, Receptor.Mouse %in% annotated_genes &
Ligand.Mouse %in% annotated_genes)
stepf<- Vectorize(function(x) if (x<0) 0 else x)
cat("Now running RNAMagnet...\n")
#compute mean gene expression level per population
out@anchors <- do.call(cbind, lapply(anchors, function(id) {
apply(resolvedRawData[,seurat@ident == id],1,mean)
}));
colnames(out@anchors) <- anchors
#use fuzzy logic and operations to compute interaction score
out@interaction <- sapply(anchors, function(pop_l) {
out@mylr$expression_ligand <- out@anchors[out@mylr$Ligand.Mouse, pop_l]
sapply(seurat@cell.names, function(cell_r) {
out@mylr$expression_receptor <-resolvedRawData[out@mylr$Receptor.Mouse, cell_r]
sum(kernel(out@mylr$expression_ligand, k =.k, x0 = .x0) * kernel(out@mylr$expression_receptor, k = .k, x0=.x0 )) #kernel performs fuzzification
})
})
out@specificity <- t(sapply(rownames(out@interaction), function(cell) {
x <- out@interaction[cell,]
beta <- x/sum(x)
if (!is.null(neighborhood.distance)) alpha <- apply(out@interaction * (1-kernel(out@similarity[cell,],neighborhood.gradient,x0=neighborhood.distance )),2,sum) else alpha <- apply(out@interaction,2,mean)
alpha <- alpha / sum(alpha)
beta- alpha
}))
rownames(out@specificity) <- rownames(out@interaction)
out@adhesiveness <- apply(mymagic, 1,function(x) sum(kernel(x,x0 = .x0, k = .k)))
return(out)
}
makeSymmetric <- function(ligrec) {
toadd <- list()
for (i in 1:nrow(ligrec)) {
if (! any(ligrec$Ligand.Mouse[i] == ligrec$Receptor.Mouse & ligrec$Receptor.Mouse[i] == ligrec$Ligand.Mouse )) {
toadd[[length(toadd)+1]] <- ligrec[i,]
toadd[[length(toadd)]]$Receptor.Mouse <- ligrec[i,"Ligand.Mouse"]
toadd[[length(toadd)]]$Ligand.Mouse <- ligrec[i,"Receptor.Mouse"]
}
}
rbind(ligrec, do.call(rbind, toadd))
}
#function to handle heterodimeric complexes by applying fuzzy logic AND operations
resolveData <- function(rawdata, lr) {
#as ligands/receptor can contain gene lists (complex components), apply fuzzy logic to gene expression values
singleentries <- lr[!grepl("[&|]",lr)]
use_normdata <- rawdata[singleentries,]
doubleentries <- lr[grepl("[&|]",lr)]
add_normdata <- do.call(rbind,lapply(doubleentries, function(x) {
if (grepl("&",x)) {
entries <- strsplit(x, "&",fixed = T)[[1]]
apply(rawdata[entries,],2,min)
} else {
entries <- strsplit(x, "|",fixed=T)
entries <- entries[[1]][entries[[1]] %in% rownames(rawdata)]
if(length(entries) > 1) apply(rawdata[entries,],2,max) else rawdata[entries,]
}
}))
rownames(add_normdata) <- doubleentries
use_normdata <- rbind(use_normdata, add_normdata)
use_normdata[lr,]
}
kernel <- function(x,k=10, x0=0.5) 1/(1+exp(-k * (x-x0)))
#' 10x dataset from mouse bone marrow
#'
#' @format An object of class \code{\link[Seurat]{seurat}}, containing clustering and dimensionality reduction
"NicheData10x"
#' LCM dataset from mouse bone marrow
#'
#' @format Read count matrix, see \code{\link{NicheMetaDataLCM}} for per-sample metadata.
"NicheDataLCM"
#' Meta data for LCM dataset
#'
#' @format Data frame, column \code{id} corresponds to the column names of \code{\link{NicheDataLCM}}. Other colummns describe various parameters related to potential sources of batch effects (e.g. the microscopy slide, the day of sample collection and processing, the sequencing lane and the size of the area sampled) and biology (Basic sample class, presence of sinusoids and distance from the endosteum)
#' @describeIn NicheDataLCM
"NicheMetaDataLCM"
#' Markers for populations defined by 10x genomics
#'
#' @format Data frame of cell type specific markers - the output of running \code{FindMarkersAll{\link{NicheData10x}, method="roc"}}
#' @describeIn NicheData10x
"NicheMarkers10x"
#'Retrieve a database of ligand-receptor pairs
#'
#'@param version Currently supports the following values: \itemize{
#' \item{latest} points to \code{1.0.0}
#' \item{1.0.0} contains manual annotation for all genes expressed in bone marrow. This version was used for analysis is the Baccin et al paper.
#' \item{2.0.0} contains manual annotation for all genes in the geneome (not yet complete)
#' \item Alternatively, a data frame with the same column names as \code{ligandsReceptors_1.0.0} can be used.
#'}
#'@param cellularCompartment A character vector to select permitted localizations of the \strong{ligand}. Valid entries are \code{Membrane}, \code{ECM}, \code{Secreted} and \code{Both}, which refers to membrane-bound ligands that can also be secreted.
#'@param manualAnnotation A character vector to select permitted annotation status of the interaction. Valid entries are \code{Correct}, \code{Incorrect}, \code{Not Expressed} (i.e. entries not annotated in v1.0.0) and \code{Irrelevant} (i.e. entries we consider correct but not relevant at homeostasis, e.g. interactions involving activated components of the complement system)
#'@param ligandClass A character vector to select permitted classes of ligands. Defaults to all, i.e. \code{c("Other","Cytokine","Chemokine","GrowthFactor","Interleukin")}
#'@return Returns a data frame with 11 columns: \itemize{
#'\item{Pair.Name}{Unique identifier for the ligand-receptor pair}
#'\item{Ligand.Mouse}{MGI symbol of the ligand. }
#'\item{Receptor.Mouse}{MGI symbol of the receptor. In case multiple receptors are required for complex formation, multiple MGI symbols separated by \strong{&}, as in Itgal&Itgb2. }
#'\item{Source}{Ramilowski for entries taken from \emph{Ramilowski et al., Nature Communications 2015}; Baccin for entries added as part of this work}
#'\item{Reference}{Pubmed ID or similar}
#'\item{ManualAnnotation}{The literature evidence for all entries from Ramilowski et al with expression in bone where checked manually.}
#'\item{Ligand.CC}{Cellular Compartment of the ligand - one of Secreted, Membrane, Both, or ECM. Annotation is based on Gene Ontology, but was checked manually for entries with expression in bone}
#'\item{Ligand.GO}{Classification of the ligand by gene ontology into one of the following classes: GrowthFactor, Cytokine, Chemokine, Interleukin, or Other}
#'}
#'@export
getLigandsReceptors <- function(version = "latest", cellularCompartment = c("Membrane","ECM","Both","Secreted"), manualAnnotation = "Correct", ligandClass = c("Other","Cytokine","Chemokine","GrowthFactor","Interleukin")) {
if (is.character(version)) out <- switch(version,
latest = ligandsReceptors_2.0.0,
"1.0.0" = ligandsReceptors_1.0.0,
"2.0.0" = ligandsReceptors_2.0.0) else if (is.data.frame(version)) out <- version else stop("Version needs to be a character string or a data frame.")
if(is.null(out)) error("Version", version, "not supported. See documentation.")
out <- subset(out, Ligand.CC %in% cellularCompartment & ManualAnnotation %in% manualAnnotation & Ligand.GO %in% ligandClass)
out
}
#'Extracts receptor-ligand pairs mediating interaction between two cell types
#'
#'@param rnamagnet An object of class \code{\link{rnamagnet}}
#'@param pop_l Sending population to consider
#'@param pop_r Receiving population to consider
#'@param thresh Threshold score for ligand and receptor expression
#'@return A data frame conaining the score and the receptor-ligand pair
#'@export
getRNAMagnetGenes <- function(rnamagnet, pop_l, pop_r, thresh = 0.05) {
rnamagnet@mylr$expression_ligand <- rnamagnet@anchors[rnamagnet@mylr$Ligand.Mouse, pop_l]
rnamagnet@mylr$expression_receptor <- rnamagnet@anchors[rnamagnet@mylr$Receptor.Mouse, pop_r]
out <- data.frame(score = kernel(rnamagnet@mylr$expression_ligand, k =rnamagnet@params[[".k"]], x0 = rnamagnet@params[[".x0"]]) * kernel(rnamagnet@mylr$expression_receptor, k =rnamagnet@params[[".k"]], x0 = rnamagnet@params[[".x0"]] ),
pair = rnamagnet@mylr$Pair.Name)
subset(out[order(out$score, decreasing = T),], score > thresh)
}
#'Visualizes RNAMagnet output as a signaling network
#'
#'@param rnamagnet An object of class \code{\link{rnamagnet}}, as produced by \code{\link{RNAMagnetSignaling}}
#'@param return Determines object to return; one of "summary" or "rnamagnet-class"
#'@param threshold Threshold interaction strength for including an edge in the network.
#'@param colors A named vector of colors to be used for plotting; names correspond to levels of \code{rnamagnet@class}
#'@param useLabels Plot the population labels on the graph?
#'@return Plots the network and invisibly returns an object of class \code{\link[igraph]{igraph}}
#'@export
PlotSignalingNetwork <- function(rnamagnet, threshold = 0.01, colors = NULL, useLabels=T) {
pops <- colnames(rnamagnet@specificity)
mean_by_pop <- sapply(pops, function(id) {
sapply(pops, function(id2) {
mean(rnamagnet@specificity[rnamagnet@celltype == id2,id])
})
})
graphf <- reshape2::melt(t(mean_by_pop))
colnames(graphf) <- c("sender", "receiver", "value")
graphf$sender <- as.character(graphf$sender); graphf$receiver <- as.character(graphf$receiver);
graphf$width <- graphf$value^1.5 *1000
vertices <- data.frame(id = unique(graphf$sender), size = 5, stringsAsFactors = F, label = unique(graphf$sender))
if(!is.null(colors)){
vertices$label.color <- colors[vertices$id]
vertices$color <- colors[vertices$id]
graphf$color <- colors[graphf$sender]
}
plotgraph <- igraph::graph_from_data_frame(subset(graphf,value > 0.01 ), vertices = vertices)
if (useLabels) plot(plotgraph,vertex.frame.color=NA, edge.arrow.mode = "-", edge.curved=0.3) else plot(plotgraph,vertex.frame.color=NA, edge.arrow.mode = "-", vertex.label = NA, edge.curved=0.3)
invisible(plotgraph)
}
#'RNAMagnet class
#'
#'This class contains representation for the output of \code{\link{RNAMagnetBase}}.
#'
#'@slot similarity Cell-cell distance matrix
#'@slot anchors Mean gene expression matrix of anchor populations
#'@slot interaction Interaction score matrix between cells and anchor populations
#'@slot specificity Specificity of interaction scores relative to similar cells