Skip to content
Snippets Groups Projects
Commit a6f91df4 authored by Constantin Pape's avatar Constantin Pape
Browse files

Add gene expression computation

parent 49c9683e
No related branches found
No related tags found
No related merge requests found
#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python
import argparse
import os
from scripts import get_latest_version
from scripts.analysis import get_cells_expressing_genes
def count_gene_expression(gene_names, threshold, version):
# path are hard-coded, so we need to change the pwd to '..'
os.chdir('..')
try:
if version == '':
version = get_latest_version()
# TODO enable using vc assignments once we have them on master
table_path = 'data/%s/tables/sbem-6dpf-1-whole-segmented-cells-labels/genes.csv' % version
ids = get_cells_expressing_genes(table_path, threshold, gene_names)
n = len(ids)
print("Found", n, "cells expressing:", ",".join(gene_names))
except Exception as e:
os.chdir('analysis')
raise e
os.chdir('analysis')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Compute number of cells co-expressing genes.')
parser.add_argument('gene_names', type=str, nargs='+',
help='Names of the genes for which to express co-expression.')
parser.add_argument('--threshold', type=float, default=.5,
help='Threshold to count gene expression. Default is 0.5.')
parser.add_argument('--version', type=str, default='',
help='Version of the platy browser data. Default is latest.')
args = parser.parse_args()
count_gene_expression(args.gene_names, args.threshold, args.version)
from .release_helper import get_latest_version
from .counts import cell_counts
from .expression import get_cells_expressing_genes
import numpy as np
import pandas as pd
def get_cells_expressing_genes(table_path, expression_threshold, gene_names):
if isinstance(gene_names, str):
gene_names = [gene_names]
if not isinstance(gene_names, list):
raise ValueError("Gene names must be a str or a list of strings")
table = pd.read_csv(table_path, sep='\t')
label_ids = table['label_id']
columns = table.columns
unmatched = set(gene_names) - set(columns)
if len(unmatched) > 0:
raise RuntimeError("Could not find gene names %s in table %s" % (", ".join(unmatched),
table_path))
# find logical and of columns expressing the genes
expressing = np.logical_and.reduce(tuple(table[name] > expression_threshold
for name in gene_names))
# get ids of columns expressing all genes
label_ids = label_ids[expressing].values
return label_ids
......@@ -121,3 +121,9 @@ def add_version(tag):
versions.append(tag)
with open(VERSION_FILE, 'w') as f:
json.dump(versions, f)
def get_latest_version():
with open(VERSION_FILE) as f:
versions = json.load(f)
return versions[-1]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment