From 12bd0d5951367d2d85fb5a9d5f3fddbfaf404996 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de>
Date: Fri, 4 Oct 2019 18:19:28 +0200
Subject: [PATCH] Fix issues in vc assignment scripts

---
 scripts/attributes/genes.py                       |  1 +
 scripts/extension/attributes/genes.py             |  2 +-
 scripts/extension/attributes/vc_assignments.py    |  5 ++---
 .../extension/attributes/vc_assignments_impl.py   | 15 ++++++++++-----
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/scripts/attributes/genes.py b/scripts/attributes/genes.py
index fba1551..a894ec0 100755
--- a/scripts/attributes/genes.py
+++ b/scripts/attributes/genes.py
@@ -50,6 +50,7 @@ def vc_assignment_table(seg_path, vc_vol_path, vc_expression_path,
 
     t = task(tmp_folder=tmp_folder, config_dir=config_folder, max_jobs=1,
              segmentation_path=seg_path, vc_volume_path=vc_vol_path,
+             vc_expression_path=vc_expression_path,
              med_expression_path=med_expression_path, output_path=output_path)
     ret = luigi.build([t], local_scheduler=True)
     if not ret:
diff --git a/scripts/extension/attributes/genes.py b/scripts/extension/attributes/genes.py
index e5683cc..8094c6e 100644
--- a/scripts/extension/attributes/genes.py
+++ b/scripts/extension/attributes/genes.py
@@ -10,7 +10,7 @@ import numpy as np
 import cluster_tools.utils.function_utils as fu
 from cluster_tools.utils.task_utils import DummyTask
 from cluster_tools.cluster_tasks import SlurmTask, LocalTask
-from .genes_impl import gene_assignments
+from scripts.extension.attributes.genes_impl import gene_assignments
 
 #
 # Gene Attribute Tasks
diff --git a/scripts/extension/attributes/vc_assignments.py b/scripts/extension/attributes/vc_assignments.py
index d2b57a1..5b604b8 100644
--- a/scripts/extension/attributes/vc_assignments.py
+++ b/scripts/extension/attributes/vc_assignments.py
@@ -5,12 +5,11 @@ import sys
 import json
 
 import luigi
-import numpy as np
 
 import cluster_tools.utils.function_utils as fu
 from cluster_tools.utils.task_utils import DummyTask
 from cluster_tools.cluster_tasks import SlurmTask, LocalTask
-from .vc_assignments_impl import vc_assignments as vc_assignments_impl
+from scripts.extension.attributes.vc_assignments_impl import vc_assignments as vc_assignments_impl
 
 #
 # Gene Attribute Tasks
@@ -88,7 +87,7 @@ def vc_assignments(job_id, config_path):
         config = json.load(f)
 
     segmentation_path = config['segmentation_path']
-    vc_volume_path = config['vc_assignments_path']
+    vc_volume_path = config['vc_volume_path']
     vc_expression_path = config['vc_expression_path']
     med_expression_path = config['med_expression_path']
 
diff --git a/scripts/extension/attributes/vc_assignments_impl.py b/scripts/extension/attributes/vc_assignments_impl.py
index 791be04..5534586 100644
--- a/scripts/extension/attributes/vc_assignments_impl.py
+++ b/scripts/extension/attributes/vc_assignments_impl.py
@@ -20,10 +20,12 @@ def get_common_genes(vc_genes_file_path, cells_gene_expression, med_gene_names):
     vc_gene_indices = []
     common_gene_names = []
     med_gene_names_lowercase = [i.lower().split('-')[0] for i in med_gene_names]
+
     # get the names of genes used for vc's
     with open(vc_genes_file_path) as csv_file:
-        csv_reader = csv.DictReader(csv_file, delimiter=',')
+        csv_reader = csv.DictReader(csv_file, delimiter='\t')
         vc_gene_names = csv_reader.fieldnames
+
     # find a subset of genes both used for vc's and available as MEDs
     for i in range(len(vc_gene_names)):
         name = vc_gene_names[i].split('--')[0]
@@ -31,11 +33,12 @@ def get_common_genes(vc_genes_file_path, cells_gene_expression, med_gene_names):
             med_gene_indices.append(med_gene_names_lowercase.index(name.lower()))
             vc_gene_indices.append(i)
             common_gene_names.append(name)
+
     # from expression_by_overlap assignment extract only the subset genes
     cells_expression_subset = np.take(cells_gene_expression, med_gene_indices,
                                       axis=1)
     # from vcs_expression extract only the subset genes
-    vc_expression_subset = np.loadtxt(vc_genes_file_path, delimiter=',',
+    vc_expression_subset = np.loadtxt(vc_genes_file_path, delimiter='\t',
                                       skiprows=1, usecols=vc_gene_indices)
     # add the null vc with no expression
     vc_expression_subset = np.insert(vc_expression_subset, 0,
@@ -56,7 +59,8 @@ def get_bbs(data, offset):
     maxs = features['Coord<Maximum >'] + offset + 1
     # to prevent 'out of range' due to offsets
     mins[np.where(mins < 0)] = 0
-    maxs[np.where(maxs > shape)] = shape[np.where(maxs > shape)[1]]
+    exceed_bounds = np.where(maxs > shape)
+    maxs[exceed_bounds] = shape[exceed_bounds[1]]
     # get a bb for each cell
     cell_bbs = [tuple(slice(mi, ma) for mi, ma in zip(min_, max_))
                 for min_, max_ in zip(np.uint32(mins), np.uint32(maxs))]
@@ -72,7 +76,7 @@ def get_distances(em_data, vc_data, cells_expression, vc_expression, n_threads,
     distance_matrix = np.full((num_cells, num_vcs), np.nan)
     bbs = get_bbs(em_data, offset)
 
-    def cell_ids(cell):
+    def get_distance(cell):
         if cell == 0:
             return
 
@@ -92,7 +96,7 @@ def get_distances(em_data, vc_data, cells_expression, vc_expression, n_threads,
         distance_matrix[cell][vc_list] = distance
 
     with futures.ThreadPoolExecutor(n_threads) as tp:
-        tasks = [tp.submit(get_distances, cell)for cell in avail_cells]
+        tasks = [tp.submit(get_distance, cell_id) for cell_id in avail_cells]
         [t.result() for t in tasks]
 
     return distance_matrix
@@ -137,6 +141,7 @@ def vc_assignments(segm_volume_file, vc_volume_file, vc_expr_file,
     dist_matrix = get_distances(downsampled_segm_data, vc_data,
                                 cells_expression_subset,
                                 vc_expression_subset, n_threads)
+
     # assign the cells to the genetically closest vcs
     cell_assign = assign_vc(dist_matrix, vc_expression_subset)
     # write down a new table
-- 
GitLab