From 12bd0d5951367d2d85fb5a9d5f3fddbfaf404996 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Fri, 4 Oct 2019 18:19:28 +0200 Subject: [PATCH] Fix issues in vc assignment scripts --- scripts/attributes/genes.py | 1 + scripts/extension/attributes/genes.py | 2 +- scripts/extension/attributes/vc_assignments.py | 5 ++--- .../extension/attributes/vc_assignments_impl.py | 15 ++++++++++----- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/scripts/attributes/genes.py b/scripts/attributes/genes.py index fba1551..a894ec0 100755 --- a/scripts/attributes/genes.py +++ b/scripts/attributes/genes.py @@ -50,6 +50,7 @@ def vc_assignment_table(seg_path, vc_vol_path, vc_expression_path, t = task(tmp_folder=tmp_folder, config_dir=config_folder, max_jobs=1, segmentation_path=seg_path, vc_volume_path=vc_vol_path, + vc_expression_path=vc_expression_path, med_expression_path=med_expression_path, output_path=output_path) ret = luigi.build([t], local_scheduler=True) if not ret: diff --git a/scripts/extension/attributes/genes.py b/scripts/extension/attributes/genes.py index e5683cc..8094c6e 100644 --- a/scripts/extension/attributes/genes.py +++ b/scripts/extension/attributes/genes.py @@ -10,7 +10,7 @@ import numpy as np import cluster_tools.utils.function_utils as fu from cluster_tools.utils.task_utils import DummyTask from cluster_tools.cluster_tasks import SlurmTask, LocalTask -from .genes_impl import gene_assignments +from scripts.extension.attributes.genes_impl import gene_assignments # # Gene Attribute Tasks diff --git a/scripts/extension/attributes/vc_assignments.py b/scripts/extension/attributes/vc_assignments.py index d2b57a1..5b604b8 100644 --- a/scripts/extension/attributes/vc_assignments.py +++ b/scripts/extension/attributes/vc_assignments.py @@ -5,12 +5,11 @@ import sys import json import luigi -import numpy as np import cluster_tools.utils.function_utils as fu from cluster_tools.utils.task_utils import DummyTask from cluster_tools.cluster_tasks import SlurmTask, LocalTask -from .vc_assignments_impl import vc_assignments as vc_assignments_impl +from scripts.extension.attributes.vc_assignments_impl import vc_assignments as vc_assignments_impl # # Gene Attribute Tasks @@ -88,7 +87,7 @@ def vc_assignments(job_id, config_path): config = json.load(f) segmentation_path = config['segmentation_path'] - vc_volume_path = config['vc_assignments_path'] + vc_volume_path = config['vc_volume_path'] vc_expression_path = config['vc_expression_path'] med_expression_path = config['med_expression_path'] diff --git a/scripts/extension/attributes/vc_assignments_impl.py b/scripts/extension/attributes/vc_assignments_impl.py index 791be04..5534586 100644 --- a/scripts/extension/attributes/vc_assignments_impl.py +++ b/scripts/extension/attributes/vc_assignments_impl.py @@ -20,10 +20,12 @@ def get_common_genes(vc_genes_file_path, cells_gene_expression, med_gene_names): vc_gene_indices = [] common_gene_names = [] med_gene_names_lowercase = [i.lower().split('-')[0] for i in med_gene_names] + # get the names of genes used for vc's with open(vc_genes_file_path) as csv_file: - csv_reader = csv.DictReader(csv_file, delimiter=',') + csv_reader = csv.DictReader(csv_file, delimiter='\t') vc_gene_names = csv_reader.fieldnames + # find a subset of genes both used for vc's and available as MEDs for i in range(len(vc_gene_names)): name = vc_gene_names[i].split('--')[0] @@ -31,11 +33,12 @@ def get_common_genes(vc_genes_file_path, cells_gene_expression, med_gene_names): med_gene_indices.append(med_gene_names_lowercase.index(name.lower())) vc_gene_indices.append(i) common_gene_names.append(name) + # from expression_by_overlap assignment extract only the subset genes cells_expression_subset = np.take(cells_gene_expression, med_gene_indices, axis=1) # from vcs_expression extract only the subset genes - vc_expression_subset = np.loadtxt(vc_genes_file_path, delimiter=',', + vc_expression_subset = np.loadtxt(vc_genes_file_path, delimiter='\t', skiprows=1, usecols=vc_gene_indices) # add the null vc with no expression vc_expression_subset = np.insert(vc_expression_subset, 0, @@ -56,7 +59,8 @@ def get_bbs(data, offset): maxs = features['Coord<Maximum >'] + offset + 1 # to prevent 'out of range' due to offsets mins[np.where(mins < 0)] = 0 - maxs[np.where(maxs > shape)] = shape[np.where(maxs > shape)[1]] + exceed_bounds = np.where(maxs > shape) + maxs[exceed_bounds] = shape[exceed_bounds[1]] # get a bb for each cell cell_bbs = [tuple(slice(mi, ma) for mi, ma in zip(min_, max_)) for min_, max_ in zip(np.uint32(mins), np.uint32(maxs))] @@ -72,7 +76,7 @@ def get_distances(em_data, vc_data, cells_expression, vc_expression, n_threads, distance_matrix = np.full((num_cells, num_vcs), np.nan) bbs = get_bbs(em_data, offset) - def cell_ids(cell): + def get_distance(cell): if cell == 0: return @@ -92,7 +96,7 @@ def get_distances(em_data, vc_data, cells_expression, vc_expression, n_threads, distance_matrix[cell][vc_list] = distance with futures.ThreadPoolExecutor(n_threads) as tp: - tasks = [tp.submit(get_distances, cell)for cell in avail_cells] + tasks = [tp.submit(get_distance, cell_id) for cell_id in avail_cells] [t.result() for t in tasks] return distance_matrix @@ -137,6 +141,7 @@ def vc_assignments(segm_volume_file, vc_volume_file, vc_expr_file, dist_matrix = get_distances(downsampled_segm_data, vc_data, cells_expression_subset, vc_expression_subset, n_threads) + # assign the cells to the genetically closest vcs cell_assign = assign_vc(dist_matrix, vc_expression_subset) # write down a new table -- GitLab