Skip to content
Snippets Groups Projects
Commit e72ff611 authored by SofiaTorchia's avatar SofiaTorchia
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
LICENSE 0 → 100755
MIT License
Copyright (c) 2023 Dominik Kutra, Christian Tischer, Matteo Spatuzzi, Jean-Karim Heriche;
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
Module for generating a dataset where for each mouse we compute
details about time spent is certain rois for hiding quantification
Input data structure:
| |video |animal |roi |entry|exit|
|- |- |- |- |- |- |
|0 |C57j-B6-US_MS |UM |Up-corn1 |1214 |1477|
|1 |C57j-B6-US_MS |UM |Up-corn1 |1690 |2500|
|2 |C57j-B6-US_MS |UM |Up-corn1 |2570 |2598|
|3 |C57j-B6-US_MS |UM |Up-corn1 |3688 |3737|
|4 |C57j-B6-US_MS |UM |Up-corn1 |7210 |7301|
|.. |.. |.. |.. |.. |.. |
"""
import numpy as np
import pandas as pd
FRAMERATE = 45
N = 27000 # Total number of frames in a 10-minute recording
def delete_hiding_noise(data: pd.DataFrame) -> pd.DataFrame:
'''
Sometimes, a mouse moving very close to an ROI is incorrectly detected as being
inside this ROI for a few frames. The following function deletes those rows where
the mouse is considered to be in one ROI for less than one second,
aiming to reduce noise in the dataset.
Args:
data: dataset containing ROI information for each mouse along with the
corresponding entry and exit time frames.
Returns:
denoised version of the input dataset
'''
indices = data[data.exit - data.entry <= FRAMERATE].index.values.tolist()
data.drop(indices, inplace = True)
data.reset_index(inplace = True)
return data
def hiding_and_unreachable_vectors(data: pd.DataFrame) -> pd.DataFrame:
'''
This function computes:
- a vector containing 1 for each frame when the mouse is hiding, i.e.
when it is in a corner (corn).
- a vector containing 1 for each frame when the mouse is unreachable,
i.e. it is in any high-position ROI (H-..).
Args:
data: dataset containing ROI information for each mouse along with the
corresponding entry and exit time frames.
Returns:
hiding: binary data with 1 for frames when the mouse is hiding
unreachable: binary data with 1 for frames when the mouse is
unreachable by the other mouse
'''
hiding = np.zeros(N*11)
unreachable = np.zeros(N*11)
for i in range(data.shape[0]):
if 'corn' in data.roi.iloc[i] or 'H' in data.roi.iloc[i]:
for j in range(data.entry.iloc[i],data.exit.iloc[i],1):
hiding[j] = 1
if 'H' in data.roi.iloc[i]:
for h in range(data.entry.iloc[i],data.exit.iloc[i],1):
unreachable[h] = 1
return hiding, unreachable
def time_in_rois(paths: str) -> list[list]:
'''
Here, for each video, we first apply 'delete_hiding_noise' to filter out
the noise related to hiding time.
Then, for each mouse belonging to the same recording, we compute:
- the total time of the recording
- the total time spent hiding
- the portion of time spent in different regions
(other compartment, other nest, other HF)
Args:
paths: input data path
Returns:
vector_list: list of vectors. Each vector contains specific
time measurements for each single mouse
'''
vector_list = []
for path in paths:
data = pd.read_csv(path)
data = delete_hiding_noise(data)
names = data.video.unique()
for file_name in names:
for mouse in ['UM','LM']:
if mouse == 'UM':
other_comp = 'Down'
else:
other_comp = 'Up'
vector = [file_name+'-'+mouse]
d1 = data[data.video == file_name]
d1 = d1[d1.animal == mouse]
t1 = (d1[d1.roi == 'Up-comp']['exit'] - d1[d1.roi == 'Up-comp']['entry']).sum()
t2 = (d1[d1.roi == 'Down-comp']['exit'] - d1[d1.roi == 'Down-comp']['entry']).sum()
total_time1 = t1 + t2
total_time_hidden1 = 0
for roi in ['Up-HF', 'Down-HF', 'Up-HT', 'Down-HT', 'Up-corn1','Up-corn2',
'Up-corn3','Up-corn4','Down-corn1','Down-corn2','Down-corn3',
'Down-corn4' ]:
t3 = (d1[d1.roi == roi]['exit'] - d1[d1.roi == roi]['entry']).sum()
total_time_hidden1 = total_time_hidden1 + t3
t_exit = d1[d1.roi == other_comp + '-comp']['exit']
t_entry = d1[d1.roi == other_comp + '-comp']['entry']
total_time_other_comp1 = (t_exit - t_entry).sum()
t_exit = d1[d1.roi == other_comp + '-nest']['exit']
t_entry = d1[d1.roi == other_comp + '-nest']['entry']
total_time_other_nest1 = (t_exit - t_entry).sum()
t_exit = d1[d1.roi == other_comp + '-HF']['exit']
t_entry = d1[d1.roi == other_comp + '-HF']['entry']
total_time_other_hf1 = (t_exit - t_entry).sum()
total_time_hidde_other_comp1 = 0
for roi in [other_comp + '-HF', other_comp + '-HT', other_comp + '-corn1',
other_comp + '-corn2', other_comp + '-corn3',other_comp + '-corn4']:
t4 = (d1[d1.roi == roi]['exit'] - d1[d1.roi == roi]['entry']).sum()
total_time_hidde_other_comp1 = total_time_hidde_other_comp1 + t4
portion_time_other_comp1 = round(total_time_other_comp1/total_time1,2)
portion_time_other_nest_hf1 = total_time_other_nest1 - total_time_other_hf1
portion_time_other_nest_hf1 = portion_time_other_nest_hf1 / total_time1
portion_time_other_nest_hf1 = round(portion_time_other_nest_hf1,2)
portion_time_hidden1 = round(total_time_hidden1/total_time1,2)
portion_time_hidde_other_comp1 = round(total_time_hidde_other_comp1/total_time1,2)
vector = vector + [portion_time_other_comp1,portion_time_other_nest_hf1,
portion_time_hidden1,portion_time_hidde_other_comp1]
vector_list.append(vector)
return vector_list
def traveled_distance(paths: str) -> list[list]:
'''
Here, we extract the information about the distance traveled by each mouse
Args:
paths: input data path
Returns:
vectors: list containing the recording and mouse names,
and the total distance traveled by the mouse
during the recording.
'''
vectors = []
for path in paths:
data = pd.read_csv(path)
names = data.video.unique()
for file_name in names:
for mouse in ['UM','LM']:
vector = [file_name+'-'+mouse]
cond1 = data.video == file_name
cond2 = data.animal == mouse
cond3 = data.measure == 'Distance (cm)'
distance = data[cond1][cond2][cond3].value.values[0]
vector.append(distance)
vectors.append(vector)
return vectors
def cumulative_time_in_rois(paths: str) -> pd.DataFrame:
'''
In the following snippet, for each video, we first apply 'delete_hiding_noise'
to filter out the noise related to hiding time.
Then, for each mouse belonging to the same recording, we compute:
- the cumulative time spent hiding
- the cumulative time spent in unreachable areas
For T2 videos, it identifies a restart frame and adjusts the cumulative
sums accordingly.
Args:
paths: input data path
Returns:
dataframe: Each raw of the dataframe contains specific
time measurements for each single mouse
'''
for path in paths:
data = pd.read_csv(path)
data = delete_hiding_noise(data)
names = data.video.unique()
for file_name in names:
for mouse in ['UM','LM']:
d = data[data.video == file_name]
d = d[d.animal == mouse]
hiding, unreachable = hiding_and_unreachable_vectors(d)
cumsum1 = np.cumsum(hiding)
cumsum2 = np.cumsum(unreachable)
time_in_seconds = [round(i/FRAMERATE,4) for i in range(N*11)]
dataframe = pd.DataFrame({'frames': list(range(N*11)),
'time_in_seconds': time_in_seconds,'hiding': hiding,
'unreachable': unreachable, 'hiding_cumsum': cumsum1,
'unreachable_cumsum': cumsum2})
total_time = (d[(d.roi == 'Up-comp') | (d.roi == 'Down-comp')]['exit'] -
d[(d.roi == 'Up-comp') | (d.roi == 'Down-comp')]['entry']).sum()
cumsum1_portion = dataframe.hiding_cumsum / total_time
cumsum2_portion = dataframe.unreachable_cumsum / total_time
dataframe['relative_hiding_cumsum'] = cumsum1_portion
dataframe['relative_unreachable_cumsum'] = cumsum2_portion
if '-T1.2' in file_name:
d1 = data[data.video == file_name]
d1 = d1[d1.animal == mouse]
restart_frame = d1.exit.max()
d_hiding_cumsum = dataframe.hiding_cumsum[dataframe.frames == restart_frame]
restart_h_cumsum = d_hiding_cumsum[restart_frame]
condition = dataframe.frames == restart_frame
d_unreachable_cumsum = dataframe.unreachable_cumsum[condition]
restart_unreachable_cumsum = d_unreachable_cumsum[restart_frame]
condition = dataframe.frames == restart_frame
d_restart_hiding = dataframe.relative_hiding_cumsum[condition]
restart_relative_hidden_cumsum = d_restart_hiding[restart_frame]
condition = dataframe.frames == restart_frame
d_restart_unreachable = dataframe.relative_unreachable_cumsum[condition]
restart_relative_unreachable_cumsum = d_restart_unreachable[restart_frame]
dataframe.drop(list(range(restart_frame + 1,N*9,1)), inplace = True)
d_h_subset = dataframe.hiding_cumsum.iloc[restart_frame + 1 :]
dataframe.hiding_cumsum.iloc[restart_frame + 1 :] = d_h_subset.apply(lambda x: x - restart_h_cumsum)
d_subset = dataframe.unreachable_cumsum.iloc[restart_frame + 1 :]
dataframe.unreachable_cumsum.iloc[restart_frame + 1 :] = d_subset.apply(lambda x: x - restart_unreachable_cumsum)
d_subset = dataframe.relative_hiding_cumsum.iloc[restart_frame + 1 :]
dataframe.relative_hiding_cumsum.iloc[restart_frame + 1 :] = d_subset.apply(lambda x: x - restart_relative_hidden_cumsum)
d_subset = dataframe.relative_unreachable_cumsum.iloc[restart_frame + 1 :]
dataframe.relative_unreachable_cumsum.iloc[restart_frame + 1 :] = d_subset.apply(lambda x: x - restart_relative_unreachable_cumsum)
return dataframe
"""
Module for exploring the following dataset ('pca_dataset.csv')
via PCA and for plotting 2 dimensional data
resulting from the projection into the space generate
by the first two principal components:
| | video |chases|attacks|other_comp|other_cage|locomotion|ur_postures|flights|hiding|mouse|line |time|interaction|
|- |- | - |- |- |- |- |- |- |- |- |- |- |- |
|0 |CD1-B1-UN_LS-T1 |0.0 |25.0 |9.5 |0.0 |15293.8694|6.0 |10.0 |21.5 |UN |CD1 |T1 |1 |
|1 |CD1-B1-UN_LS-T1 |15.0 |32.0 |50.5 |12.0 |12501.2516|0.0 |1.0 |12.5 |LS |CD1 |T1 |1 |
|2 |CD1-B1-MS_US-T1 |2.0 |25.0 |41.5 |2.0 |15990.6214|0.0 |0.0 |23.0 |MS |CD1 |T1 |1 |
|3 |CD1-B1-MS_US-T1 |6.0 |23.0 |32.0 |1.0 |13421.5625|0.0 |5.0 |22.5 |US |CD1 |T1 |1 |
|4 |CD1-B2-UN_LS-T1 |6.0 |38.0 |27.5 |5.5 |15003.1560|4.0 |9.0 |46.5 |UN |CD1 |T1 |1 |
|..|... |... |... |... |... |... |... |... |... |... |... |... |... |
"""
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
PATH = 'pca_dataset.csv'
STRAINS = ['CD1','C57j','Hyb']
def create_subset_for_pca(line: str, time: str, interaction: int = None) -> pd.DataFrame:
"""
Creates a subset of the entire dataset on which PCA will be performed.
Data are selected based on the mouse strain "line" and the recording
period "time".
Args:
data: entire dataset from which a subset is extracted
line: condition to select a specific strain on mice
time: recording time to filter out early/late data
interaction: if non None, 1 for selecting only interacting mice,
0 for non-interacting mice.
Returns:
dataset: subset of initial dataset
"""
data = pd.read_csv(PATH)
cond_time = data.time == time
dataset = data[cond_time]
if interaction:
cond_interaction = data.interaction == interaction
dataset = dataset[cond_interaction]
if line in STRAINS:
cond_line = dataset.line == line
dataset = dataset[cond_line]
return dataset
def plot_projected_data(new_data: pd.DataFrame, status_column: str=None) -> None:
"""
Plots data points projected into the space defined by the two principal component
and colors point based on their status (dominant or subordinate mouse) or mouse line
Args:
new_data: input 2-dimensional dataset with mice as data points and 'pca0', 'pca1',
'line', status' and 'interaction' as columns.
Mice belonging to the same pair have to appear consecutively in
the dataset
status_columns: dataset columns to be chosen as color criterion
Returns:
plot color-coded two dimensional data points
"""
if status_column == 'status_and_interaction':
colors = {'dominant': 'purple','subordinate':'orange','no interaction':'yellow'}
edgecolors = {'dominant': 'indigo','subordinate':'darkorange','no interaction':'gold'}
elif status_column == 'line':
colors = {'CD1': 'gold','Hyb':'firebrick','C57j':'deepskyblue'}
edgecolors = {'CD1': 'goldenrod','Hyb':'maroon','C57j':'dodgerblue'}
elif status_column == 'status':
colors = {'dominant': 'purple','subordinate':'orange'}
edgecolors = {'dominant': 'indigo','subordinate':'darkorange'}
for key,value in colors.items():
cond = new_data[status_column] == key
data_plot = new_data[cond]
color = value
edgecolor = edgecolors[key]
plt.scatter(data_plot['pca0'],data_plot['pca1'],color = color,
s = 200, edgecolors = edgecolor, label = key)
plt.legend()
plt.xlabel('First Principal Component', fontsize = 17)
plt.ylabel('Second Principal Component', fontsize = 17)
def plot_pairs(new_data: pd.DataFrame) -> None:
'''
Plots a segment between two projected data points if they represent
two mice belonging to the same pair.
Args: new_data: input 2-dimensional dataset with mice as data points and 'pca0', 'pca1',
'line', status' and 'interaction' as columns.
Mice belonging to the same pair have to appear consecutively in
the dataset
Returns:
plots segments between mice belonging to the same pair
'''
n = new_data.shape[0]
for i in range(0,n,2):
plt.plot([new_data.iloc[i]['pca0'],new_data.iloc[i+1]['pca0']],
[new_data.iloc[i]['pca1'],new_data.iloc[i+1]['pca1']],
color = 'grey', alpha = 0.1)
plt.xlabel('First Principal Component', fontsize = 17)
plt.ylabel('Second Principal Component', fontsize = 17)
def plot_loadings(v1: list,v2: list):
"""
Plots pca loadings.
Args:
v1: loadings for PC1
v2: loadings for PC2
"""
plt.figure(figsize=(5,3))
plt.bar([i for i in range(len(v1))],v1,color = 'teal', width = 0.8, alpha = 0.3,label = 'PC1')
plt.bar([i for i in range(len(v2))],v2,color = 'brown', width = 0.5, alpha = 0.4,label = 'PC2')
plt.vlines([i for i in range(len(v1))],-1,1,color = 'grey', linewidth = 1, alpha = 0.2)
plt.hlines(0,-10,10,color = 'grey', linewidth = 1, alpha = 0.2)
numerical_columns = ['chases','attacks','other_comp','locomotion',
'ur_postures','flights','hiding']
plt.xlim((-0.7,7.7))
plt.xticks([i for i in range(len(v1))],numerical_columns,rotation = 45)
plt.title('Factor loadings for PC1 and PC2')
plt.legend()
#plt.savefig('Pictures/loadings.svg')
plt.show()
try:
print('\nPerforming PCA analysis')
dataset = create_subset_for_pca('all animals','T2')
numerical_columns = ['chases','attacks','other_comp','locomotion',
'ur_postures','flights','hiding']
scaler = StandardScaler().set_output(transform = "pandas")
scaled_dataset = scaler.fit_transform(dataset[numerical_columns])
pca = PCA(n_components=scaled_dataset.shape[1]).set_output(transform="pandas")
pca.fit(scaled_dataset)
new_data = pca.transform(scaled_dataset)
print('explained variance: ', [round(i,3) for i in pca.explained_variance_ratio_])
new_data[['video','mouse','line',
'time','interaction']] = dataset[['video','mouse','line',
'time','interaction']]
plt.figure(figsize=(7,7))
plot_projected_data(new_data,'line')
plot_pairs(new_data)
plt.xlim((-4.7,6.3))
plt.ylim((-2.3,5.5))
plt.title('PCA', fontsize = 20)
plt.show()
videos = list(new_data.video.unique())
new_status = []
for video in videos:
if new_data.pca0[new_data.video == video].values[0] > new_data.pca0[new_data.video == video].values[1]:
new_status.extend(['dominant','subordinate'])
else:
new_status.extend(['subordinate','dominant'])
new_data['status'] = new_status
v1 = pca.components_[0,:]
v2 = pca.components_[1,:]
plot_loadings(v1,v2)
print()
except:
print('Failed')
\ No newline at end of file
# get arguments from command line
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)==0) {
stop("At least one argument must be supplied (input file).n", call.=FALSE)
} else if (length(args)==1) {
# default output file
args[2] = "out.txt"
}
# check if seed was provided, otherwise set it to 21
if (length(args)>2){
seed <- args[3]
} else{
print("Setting seed to 21.")
seed <- 21
}
# check if number of permutations is provided, otherwise set them to 2000
if (length(args)>3){
N <- args[4]
}else{
print("Setting number of permutations to N=2000.")
N <- 2000
}
# reqiured libraries
library(tidyverse)
library(readxl)
#### DEFINE FUNCTIONS
# calculate the average difference
average_diff <- function(df){
df %>%
group_by(pair) %>%
summarize(diff = max(score) - min(score)) %>%
pull(diff) %>%
mean
}
# get a null distribution for average differences by permutation
null_mean <- function(df,n=2000){
replicate(n,df %>%
mutate(pair = sample(pair)) %>%
average_diff()
)
}
# the permutation test as a whole:
perm_test <- function(df, n){
obs <- average_diff(df)
null_dist <- null_mean(df,n)
mean(null_dist >= obs)
}
# load data
data <- read_xlsx(args[1])
data <- data %>%
rename(mouse_line = `Mouse_line`)
# Transform the data into long format
data_long <- data %>%
pivot_longer(
-c(mouse_line,pair),
names_to = "behavior",
values_to = "score"
)
# RUN PERMUTATION TESTS for all data in input
# set seed
set.seed(21)
print("calculating permutations...")
results <- data_long %>%
filter(!is.na(score)) %>%
group_by(mouse_line, behavior) %>%
summarize(p = perm_test(data.frame(score,pair),n=2000))
print("writing results to output file")
write_tsv(results,file=args[2])
"""
Module for plotting overlapping distance or speed curves extracted from
windows centered around all flight onsets.
"""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import rcParams
import window_analysis as w
from scipy.stats import sem
MOUSE1 = 'M1'
MOUSE2 = 'M2'
RIGHT = 60
LEFT = 30
FRAMES_PER_SECOND = 45
FRAME_RATE_ADAPT = 9
PIXEL_TO_CM = 15.4
MAX_LENGTH = 3000
BEHAVIORS = ['Fl','A','C','U']
LABELS_PATH = 'Labels.csv'
def plot_behavioral_data(video: str) -> None:
'''
This function plots overlapping distance and speed traces of one pair of
mice for a single recording as long as vertical lines correspondig to
behavioral events (attack, flight, chase, upright posture).
These vertical lines are colored according to the bevahior
with _get_behavior_color(...).
Line style is chosen based on the mouse (M1 or M2) with _get_line_style(...)
Args:
video: name of the recording from which distance and speed are extracted
Returns:
plot distance and speed curves
'''
data = w.get_data(video)
behavioral_scoring_data = w.get_behavioral_scoring_data(video, BEHAVIORS)
beh_list = w.get_scoring_list(BEHAVIORS)
dist = w.compute_distance(data)
speed_mouse_1 = w.compute_speed(data,'M1')
speed_mouse_2 = w.compute_speed(data,'M2')
step = 3000
end = data.shape[0]
for i in range(0, end, step):
plt.figure(figsize=(25, 3))
for beh in beh_list:
color = _get_behavior_color(beh)
style = _get_line_style(beh)
func = lambda x: 3000 if x == beh else -100
plt.plot(behavioral_scoring_data.Default[i:i+step].apply(func),
label=beh, color=color, linestyle = style)
plt.plot(dist[i:i+step]/PIXEL_TO_CM, label='Mean distance', color='black')
plt.plot(speed_mouse_1.loc[i:i+step]/PIXEL_TO_CM,
label='speed mouse 1', color='orange', alpha=1)
plt.plot(speed_mouse_2.loc[i:i+step]/PIXEL_TO_CM,
label='speed mouse 2', color='purple', alpha=1)
plt.ylim((0, 150))
plt.xlabel('Frames', fontsize=15)
plt.ylabel('Distance (cm) / Speed (cm/s)', fontsize=15)
plt.axhline(y=300, color='black', linestyle='--')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
def _get_behavior_color(behavior: str) -> str:
'''
Returns the color to be used for the vertical lines in
plot_behavioral_data(...) based on the behavior the lines represent
Args:
behavior: type of behavior
Returns:
color name for vertical line
'''
if 'R' in behavior:
color = 'grey'
if 'Fl' in behavior:
color = 'deepskyblue'
if 'A' in behavior:
color = 'green'
if 'Cl' in behavior:
color = 'red'
if 'C-' in behavior:
color = 'chartreuse'
else:
color = 'blue'
return color
def _get_line_style(behavior: str) -> str:
'''
Returns line style for vertical lines in plot_behavioral_data(..)
Args:
behavior: type of behavior
Returns:
line style for vertical line
'''
if 'M1' in behavior:
style = 'dotted'
else:
style = 'solid'
return style
def plot_behavioral_data_example(start: int, end: int, video: str) -> None:
'''
This function plots distance and speed traces of one pair of mice for
a single recording separately, for a subset of the entire recording,
between frame=start and frame = end.
Both distance between mice and the pair of speed curves are
represented together with the vertical lines correspondig to behavioral
events (attack, flight, chase, upright posture).
These lines are colored according
to the bevahior with _get_behavior_color(...).
Line style is chosen based on the mouse (M1 or M2)
with _get_line_style(...)
Args:
start: start value for x ticks
end: end value for x ticks
video: name of the recording from which speed and distance
are extracted
Returns:
plot of distance and speed curves
'''
data = w.get_data(video)
behavioral_scoring_data = w.get_behavioral_scoring_data(video,BEHAVIORS)
beh_list = w.get_scoring_list(BEHAVIORS)
dist = w.compute_distance(data)
speed_mouse_1 = w.compute_speed(data,'M1')
speed_mouse_2 = w.compute_speed(data,'M2')
_set_plot_style()
plt.figure(figsize=(13, 3))
for beh in beh_list:
color = _get_behavior_color(beh)
style = _get_line_style(beh)
func = lambda x: 3000 if x == beh else -100
plt.plot(behavioral_scoring_data.Default[start:end].apply(func),
label=beh, color=color, linestyle=style)
plt.plot(dist[start:end] / PIXEL_TO_CM, label='Distance', color='red')
plt.ylim((0, 80))
plt.axhline(y=300, color='black', linestyle='--')
_set_plot_labels(start, end, 45)
plt.show()
plt.figure(figsize=(13, 3))
for beh in beh_list:
color = _get_behavior_color(beh)
style = _get_line_style(beh)
plt.plot(behavioral_scoring_data.Default[start:end].apply(func),
label=beh, color=color, linestyle=style)
plt.plot(speed_mouse_1.loc[start:end] / PIXEL_TO_CM,
label='speed 1', color='black', alpha=1, linestyle='dotted')
plt.plot(speed_mouse_2.loc[start:end] / PIXEL_TO_CM,
label='speed 2', color='black', alpha=1)
plt.ylim((0, 80))
_set_plot_labels(start, end, 45)
plt.show()
def _set_plot_style() -> None:
'''
Sets axes style for plot_behavioral_data_example(...)
'''
rcParams['axes.spines.bottom'] = True
rcParams['axes.spines.left'] = True
rcParams['axes.spines.right'] = False
rcParams['axes.spines.top'] = False
rcParams['axes.edgecolor'] = 'black'
def _set_plot_labels(start: int, end: int, frame_rate: int) -> None:
'''
Sets labels for plot_behavioral_data_example(...)
Args:
start: start value for x ticks
end: end value for x ticks
frame_rate: step between to consecutive x ticks
'''
plt.xlabel('Time (s)', fontsize=10)
plt.ylabel('Distance (cm)', fontsize=10)
plt.xticks(ticks=list(range(start, end + 1, frame_rate)),
labels=[round(i / frame_rate) for i in range(0, 1351, frame_rate)],
fontsize=10)
plt.yticks(fontsize=10)
plt.xlim((start, end))
def plot_window(curve_list: list[pd.Series], curve: pd.Series, title: str) -> None:
'''
Plots the curves produced in compute_window(..)
Args:
curve_list: list of curves to be represented as overlapping
in the same plot
curve: single curve to be plotted together with curve_list.
Generally, this curve represents the average curve
title: plot title
'''
if curve_list.shape[0] > 0:
plt.plot(curve_list.T, color = 'grey', alpha = 0.1)
if 'speed' in title:
start = 180
max_y = 140
step = 45
plt.vlines(start,0,max_y,color = 'black')
plt.ylim((-2,max_y))
plt.ylabel('Speed (cm/s)')
elif 'distance' in title:
start = 180
step = 45
plt.vlines(start,0,140,color = 'black')
plt.ylim((-4,140))
plt.ylabel('Distance (cm)')
plt.plot(curve, color = 'red')
plt.xlabel('Time (s)',fontsize = 12)
labels = [round((i - start)/45) for i in range(0,curve.shape[0],step)]
plt.xticks(ticks = list(range(0,curve.shape[0],step)),
labels = labels)
else:
print("empty curve list")
try:
# plot entire speed and distance trace of a recording
print('\nPlot speed and distance over time')
plot_behavioral_data('CD1-B1-MS_US-T1.2.csv')
# plot traces within one interval
video = 'CD1-B6-UN_LS-T1.2.csv'
plot_behavioral_data_example(2170, 3520, video)
line = 'C57j'
time = 'T1'
left_length = 180
right_length = 180
curve_to_visualize = 'distance'
behavior = 'flights_without_chases'
align_criterium = 'max_slope'
if align_criterium: crit_title = ' - alignment by ' + align_criterium
else: crit_title = ' - no aligment'
video_list_all = w.get_video_list()
video_list = [video for video in video_list_all if time in video and line in video]
print('Computing average distance and speed traces around flight onset for all listed recordings...\n')
curve_list = w.compute_window(video_list, behavior, left_length,
right_length, curve_to_visualize,
align_criterium)
mean_curve = np.nanmean(curve_list, axis = 0)
plt.figure(figsize = (6,6))
plot_window(curve_list, mean_curve, curve_to_visualize)
plt.show()
plt.figure(figsize = (6,6))
st_error = sem(curve_list)
plt.plot(mean_curve)
x = [i for i in range(left_length + right_length+1)]
plt.fill_between(x,mean_curve+st_error, mean_curve-st_error,alpha = 0.4)
plt.show()
except:
print('Failed')
\ No newline at end of file
"""
Co-habitation time spent within short distance
In this script, we measure the duration that two mice from the same pair spend in close proximity to each other. "Close" is defined as follows:
- They must be in the same ROI.
- The Euclidean distance between them must be less than a predefined threshold, referred to here as the "radius."
Input data:
| |video |animal |roi |entry|exit|
|- |- |- |- |- |- |
|0 |C57j-B6-US_MS |UM |Up-corn1 |1214 |1477|
|1 |C57j-B6-US_MS |UM |Up-corn1 |1690 |2500|
|2 |C57j-B6-US_MS |UM |Up-corn1 |2570 |2598|
|3 |C57j-B6-US_MS |UM |Up-corn1 |3688 |3737|
|4 |C57j-B6-US_MS |UM |Up-corn1 |7210 |7301|
|.. |.. |.. |.. |.. |.. |
"""
import importlib
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import window_analysis as d
PIXEL_TO_CM = 15.4
ROIS = ['Down-HF','Down-comp','Down-corn1','Down-corn2','Down-corn3','Down-corn4','Down-nest','Down-HT',
'Up-HF','Up-comp','Up-corn1','Up-corn2','Up-corn3','Up-corn4','Up-nest','Up-HT']
def delete_hiding_noise(data: pd.DataFrame) -> pd.DataFrame:
'''
Sometimes, a mouse moving very close to an ROI is incorrectly detected as being
inside this ROI for a few frames. The following function deletes those rows where
the mouse is considered to be in one ROI for less than one second,
aiming to reduce noise in the dataset.
Args:
data: dataset containing ROI information for each mouse along with the
corresponding entry and exit time frames.
Returns:
denoised version of the input dataset
'''
indices = data[data.exit - data.entry <= FRAMERATE].index.values.tolist()
data.drop(indices, inplace = True)
data.reset_index(inplace = True)
return data
def create_rois_per_frame(data: pd.DataFrame):
"""
Creates a dataset of the following format:
| | C57j-B6-UN_LS-T1.1-LM | C57j-B6-US_MS-T1.2-LM | C57j-B6-UN_LS-T1.2-UM | ... | CD1-B6-UN_LS-T1.1-LM | CD1-B6-UN_LS-T1.2-UM |
|-------------------------|-----------------------|-----------------------|-----------------------|-----|-----------------------|-----------------------|
| 0 | Down-corn2 | Down-comp | Up-comp | ... | Up-nest | Up-corn4 |
| 1 | Down-corn2 | Down-comp | Up-comp | ... | Up-nest | Up-corn4 |
| 2 | Down-corn2 | Down-comp | Up-comp | ... | Up-nest | Up-corn4 |
| 3 | Down-corn2 | Down-comp | Up-comp | ... | Up-nest | Up-corn4 |
| 4 | Down-corn2 | Down-comp | Up-comp | ... | Up-nest | Up-corn4 |
| ... | ... | ... | ... | ... | ... | ... |
Args:
data: dataset containing ROI information for each mouse along with the
corresponding entry and exit time frames.
Returns:
Reshaped version of data indicating ROIs information for each mouse and for each frame
"""
data = delete_hiding_noise(data)
videos = data.video.unique()
new_dataset = pd.DataFrame()
for video in tqdm(videos):
for mouse in ['UM','LM']:
column_name = video + '-' + mouse
column = pd.Series([None for i in range(27000)], name = column_name)
for roi in rois:
data1 = data[data.video == video]
data2 = data1[data1.roi == roi]
data3 = data2[data2.animal == mouse]
if data3.shape[0] > 0:
for i in range(data3.shape[0]):
start = data3.entry.iloc[i]
end = data3.exit.iloc[i]
column.iloc[start:end+1] = [roi for j in range(start,end+1,1)]
new_dataset = pd.concat([new_dataset, column],axis=1)
return new_dataset
def replace_names(rois_per_frame: pd.DataFrame):
"""
Adjust roi names:
We temporarily employ new roi names by considering the up/down corners as <br>
part of their respective up/down compartments.
Args:
rois_per_frame: data indicating ROIs information for each mouse and for each frame
Returns:
same dataset with simplified ROIs nomenclature
"""
for old_name in ['Down-comp','Down-corn1','Down-corn2','Down-corn3','Down-corn4']:
rois_per_frame.replace(old_name,'downcomp', inplace = True)
for old_name in ['Up-comp','Up-corn1','Up-corn2','Up-corn3','Up-corn4']:
rois_per_frame.replace(old_name,'upcomp', inplace = True)
return rois_per_frame
def cohabitation_distance(radius: int, times: str, rois_per_frame: pd.DataFrame) -> dict:
"""
Computes portion of time spent in the same ROI and within radius distance
Args:
radius: max distance
times: 'T1' (early) or 'T2' (late)
rois_per_frame: data indicating ROIs information for each mouse and for each frame
Returns:
time_close_dict: dictionary storing time spend close to each other
by each recording pair for all strains.
"""
rois_per_frame = replace_names(rois_per_frame)
time_close_dict = {}
for line in ['CD1','Hyb','C57j']:
time_cose_line_dict = {}
for time in times:
columns = [c for c in rois_per_frame.columns.tolist() if (line in c and time in c)]
n = len(columns)
time_close = []
for i in range(0,n,2):
video = columns[i][:-3] + '.csv'
data = d.get_data(video)
dist = d.compute_distance(data) / PIXEL_TO_CM
dist_filter = dist.apply(lambda x: 1 if x < radius else 0)
cond_same_roi = rois_per_frame[columns[i]] == rois_per_frame[columns[i+1]]
cond_same_roi.apply(lambda x: 1 if x else 0)
num_frames_close = np.dot(cond_same_roi[0:dist.shape[0]],dist_filter)
tot_time = dist.shape[0]
time_close.append(num_frames_close/tot_time)
time_cose_line_dict[time] = time_close
time_close_dict[line] = time_cose_line_dict
return time_close_dict
def plot_cohabitatio_distance(times: str, time_close_dict: dict):
"""
Plots mean portion of time spent in the same ROI and within radius distance
for each line.
Args:
time_close_dict: dictionary storing time spend close to each other
by each recording pair for all strains.
times: 'T1' (early) or 'T2' (late)
"""
plt.figure(figsize = (4,3))
for line in ['CD1','Hyb','C57j']:
means = []
for time in times:
means.append(np.mean(time_close_dict[line][time]))
plt.plot([0,1],means, label = line)
plt.scatter([0,1],means,s=100)
title = 'Radius distance: ' + str(radius) + 'cm'
plt.title(title)
plt.xticks(ticks = [i for i in range(len(times))],labels = times, fontsize = 15)
plt.ylim((0,0.5))
plt.xlim((-0.2,1.2))
plt.legend()
plt.show()
try:
data = pd.read_csv('input_data.csv',low_memory=False) # change path name for input data
rois_per_frame = create_rois_per_frame(data)
time_close_dict = cohabitation_distance(10,"T1", rois_per_frame)
plot_cohabitatio_distance("T1", time_close_dict)
except:
print("Failed")
source diff could not be displayed: it is too large. Options to address this: view the blob.
readme.md 0 → 100755
# Induction of Territorial Behavior and Dominance Hierarchies in Laboratory Mice
This repository contains the code used for analyzing the research described in the paper by
Dorian Battivelli, Lucas Boldrini, Mohit Jaiswal, Pradnya Patil, Sofia Torchia, Elizabeth Engelen, Luca Spagnoletti, Sarah Kaspar and Cornelius T. Gross: "**Induction of territorial behavior and dominance hierarchies in laboratory mice**".<br>
You can explore the data associated with this study [here]().
#### Abstract
Territorial behaviors comprise a set of coordinated actions and response patterns found across animal species that promote the exclusive access to resources. House mice are highly territorial with a subset of males consistently attacking and chasing competing males to expel them from their territories and performing urine marking behaviors to signal the extent of their territories. Natural variation in territorial behaviors within a mouse colony leads to the formation of dominance hierarchies in which subordinate males can reside within the territory of a dominant male. While the full repertoire of such territorial behaviors and hierarchies has been extensively studied in wild-derived mice in semi-natural enclosures, so far they have not been established in the smaller enclosures and with the genetically-defined laboratory strains required for the application of neural recording and manipulation methods. Here, we present a protocol to induce an extensive repertoire of territorial behaviors in small enclosures in laboratory mice, including a method for the simultaneous tracking of urine marking behavior in mouse pairs. Using this protocol we describe the emergence of robust dominant-subordinate hierarchies between pairs of CD1 outbred or CD1xB6 F1 hybrid mice, but unexpectedly not in C57BL/6 inbred animals. Our behavioral paradigm opens the door for neurocircuit studies of territorial behaviors and social hierarchy in the laboratory.
## Overview
The repository includes code for the following analyses: <br>
1. Average Distance and Speed:
- ```window_analysis.py``` computes the distance between two mice and the speed of a single mouse over time.
- ```plot_window.py``` generates plots showing overlapping distance or speed curves extracted from windows centered around all flight onsets.
2. Permutation test for comparison with baseline interaction
3. Unbiased Dominance Score via PCA Analysis
4. Proximity measure
5. Time Spent Hiding
### 1) Average distance and speed
```window_analysis.py``` implements the necessary steps to compute distance between two mice and speed of a mouse over time, while ```plot_window.py``` produces plots for overlapping distance or speed curves extracted from windows centered around all flight onsets. <br>
Here, input data (behavioral recordings and coordinates
files) are a .csv files stored in a folder named respectively
"Data" and "Behavioral scoring". We also assign each mouse a dominance status ("dominant" or "subordinate") through ```Labels.csv```.
In order to run this code, organize data and code as follows:
- Data:
- ```CD1-B1-MS_US-T1.1.csv```
- ```CD1-B1-MS_US-T1.2.csv```
- ...
- Behavioral scoring:
- ```CD1-B1-MS_US-T1.1.csv```
- ```CD1-B1-MS_US-T1.2.csv```
- ...
- ```Labels.csv```
- ```window_analysis.py```
- ```plot_window.py```
Each mouse is originally identified by the x and y coordinates of eight points on its body. These coordinates are tracked over the whole recording and stored in a .csv file in "Data":
| <span style="color:grey">UM_Ear_left_1_x</span> | <span style="color:grey">UM_Ear_left_1_y</span> | <span style="color:grey">UM_Ear_left_1_p</span> | <span style="color:grey">UM_Nose_1_x</span> | <span style="color:grey">UM_Nose_1_y</span> | ... | <span style="color:grey">LM_Lat_left_2_p</span> | <span style="color:grey">LM_Lat_right_2_x</span> | <span style="color:grey">LM_Lat_right_2_y</span> | <span style="color:grey">LM_Lat_right_2_p</span> | <span style="color:grey">LM_Tail_base_2_x</span> | <span style="color:grey">LM_Tail_base_2_y</span> | <span style="color:grey">LM_Tail_base_2_p</span> |
|-------------------------|-------------------------|-------------------------|--------------------|--------------------|-----|-------------------------|--------------------------|--------------------------|--------------------------|---------------------------|---------------------------|---------------------------|
| 1174.5143 | 247.42857 | 1.0 | 1139.6571 | 258.20000 | ... | 1.0 | 1968.7428 | 1937.9714 | 1.0 | 1932.0857 | 1974.3429 | 0.999257 |
| 1175.5714 | 248.74286 | 1.0 | 1139.6000 | 254.71428 | ... | 1.0 | 1968.1714 | 1935.8572 | 1.0 | 1931.7428 | 1974.4857 | 0.999743 |
| 1176.0000 | 249.91429 | 1.0 | 1139.4000 | 252.65715 | ... | 1.0 | 1968.1714 | 1935.1714 | 1.0 | 1931.2572 | 1974.3429 | 1.000086 |
| 1174.8286 | 250.17143 | 1.0 | 1139.2572 | 253.82857 | ... | 1.0 | 1968.8286 | 1936.5714 | 1.0 | 1930.9143 | 1973.9143 | 1.000000 |
| 1174.6857 | 249.82857 | 1.0 | 1139.2572 | 253.74286 | ... | 1.0 | 1968.8286 | 1936.9143 | 1.0 | 1931.0857 | 1973.9143 | 1.000000 |
|...|...|...|...|...|...|...|...|...|...|...|...|...|...|
Each behavioral annotation file in "Behavioral scoring" is similar to:
| |<span style="color:grey">Time</span> | <span style="color:grey">Default</span>|
| - | - | -|
|0 |0.00 | NaN|
|1 |0.20 | NaN|
|2 |0.60 | C-M2-C1 | # mouse M2 chases (C) mouse M1 in compartment C1
|3 |0.80 | NaN|
... |... | ...|
```Labels.csv``` has the following structure:
| | <span style="color:grey">Video</span> | <span style="color:grey">Dominant</span> | <span style="color:grey">Subordinate</span> | <span style="color:grey">Start_frame</span>|
|-|-|-|-|-|
|0 | C57j-B1-MS_US-T1.1.csv | M2 | M1 | 495|
|1 | C57j-B1-UN_LS-T1.1.csv | M1 | M2 | 0|
|2 | C57j-B2-MS_US-T1.1.csv | M1 | M2 | 675|
|3 | C57j-B2-UN_LS-T1.1.csv | M2 | M1 | 0|
|... | ... | ... | ... | ...|
Behavior is annotated at 5 frames per second,
while coordinates over time are extracted at 45 frames per second. For each behavioral scoring file the first frames are discarded; ```Labels.csv```stores the actual initial frame for these files inside the column "Start_frame".
### 2) Permutation test for comparison with baseline interaction
The permutation tests run on an `.xlsx` data file, which has the columns `Mouse_line`, `pair`, and one column for each behavior to be tested.
### 3) Unbiased dominance score via PCA analysis
File organization:
- pca_analysis.py
- pca_dataset.csv
The script pca_analysis.py operates on pca_dataset.csv which has the following structure:
| | <span style="color:grey">video&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;</span>| <span style="color:grey">chases</span>|<span style="color:grey">attacks</span>|<span style="color:grey">other_comp</span>|<span style="color:grey">other_cage</span>|<span style="color:grey">locomotion</span>| <span style="color:grey">ur_postures</span>|<span style="color:grey">flights</span>|<span style="color:grey">hiding</span>|<span style="color:grey">mouse</span>|<span style="color:grey">line</span>|<span style="color:grey">time</span>|<span style="color:grey">interaction</span>|
|- |- | -|- |- | -|- |- |- |- |- | - |- |- |
|0 |CD1-B1-UN_LS-T1|0.0| 25.0 | 9.5 | 0.0 | 15293.8694 | 6.0 | 10.0 | 21.5 | UN |CD1 | T1 | 1|
|1 |CD1-B1-UN_LS-T1|15.0| 32.0 | 50.5 | 12.0 | 12501.2516| 0.0 | 1.0 | 12.5 |LS |CD1| T1 | 1|
|2 |CD1-B1-MS_US-T1|2.0 | 25.0 | 41.5 | 2.0 | 15990.6214 | 0.0 | 0.0 | 23.0 | MS |CD1 | T1 | 1|
|3 |CD1-B1-MS_US-T1|6.0| 23.0 | 32.0 | 1.0 | 13421.5625 | 0.0| 5.0 | 22.5 | US |CD1 | T1 | 1|
|...|...|...|...|...|...|...|...|...|...|...|...|...|...|
### 4 - 5) Proximity measure and Hiding analysis
Both analysis require the following data format:
| |<span style="color:grey">video</span>| <span style="color:grey">animal</span> | <span style="color:grey">roi</span>| <span style="color:grey">entry</span>| <span style="color:grey">exit</span>|
|-|-|-|-|-|-|
|0 | C57j-B6-US_MS | UM | Up-corn1 | 1214 | 1477|
|1 | C57j-B6-US_MS| UM | Up-corn1 | 1690 | 2500|
|2 | C57j-B6-US_MS | UM | Up-corn1 | 2570 | 2598|
|3 | C57j-B6-US_MS | UM | Up-corn1 | 3688 | 3737|
The script for "proximity measure" quantifies the time two mice from the same pair spend close to each other. "Close" is defined by being in the same ROI and having an Euclidean distance less than a predefined threshold (here 10 centimeters), referred to as the "radius."<br>
The script for "hiding" computer for each mouse belonging to the same pair:
- the total time of the recording
- the total time spent hiding
- the portion of time spent in various regions (e.g. other compartment, other nest, other HF) <br><br>
## Running this code
Ensure Python version 3.10.0 is installed to execute the provided scripts successfully, and that all libraries listed in ```requirements.txt``` are available with specified versions.
You can do so by cloning this repository and running the following commands: <br>
```
conda create --name behavior_analysis python==3.10
conda activate behavior_analysis
pip install -r requirements.txt
```
You can now run the analysis for speed and distance from the repository directory by typing:
```
python plot_window.py
```
And the PCA exploration for dominance labeling with:
```
python pca_analysis.py
```
### Permutation tests
For running the analysis
- make sure you have the libraries `tidyverse` and `readxl` installed
- navigate to the folder that contains `permutations.R`
Run the script as follows:
```console
Rscript --vanilla permutations.R INPUT_FILE OUTPUT_FILE
```
where `INPUT_FILE` gives the path to an `.xlsx` table containing the data to be analyzed, and `OUTPUT_FILE` is the path to store the results. The output file should be `.tsv`.
Optionally, you can provide a seed as the third argument, and the number of permutations as the fourth argument. For example:
```console
Rscript --vanilla permutations.R data/T1.xlsx results/T1-results.tsv 22 1000
```
For this publication, the analysis was run with `seed = 21` and `N=10000` permutations, with R version 4.3.0 under macOS 14.4.1.
## Authors
Sofia Torchia <br>
Sarah Kaspar
matplotlib == 3.7.1
numpy == 1.24.3
pandas == 2.0.1
scipy == 1.10.1
scikit-learn == 1.2.2
\ No newline at end of file
"""
Description:
This module implements the necessary steps to compute
distance between two mice and speed of a mouse over time.
File organization:
Here, we assume that behavioral recordings and coordinates
files are a .csv file stored in a folder named respectively
Data and Behavioral scoring.
We also assign each mouse a dominance status ("dominant" or "subordinate")
through Labels.csv
In order to run this code, data and code have to be organized as follows:
- Data:
- CD1-B1-MS_US-T1.1.csv
- CD1-B1-MS_US-T1.2.csv
- ...
- Behavioral scoring:
- CD1-B1-MS_US-T1.1.csv
- CD1-B1-MS_US-T1.2.csv
- ...
- Labels.csv
- window_analysis.py
- plot_window.py
Data structure:
Each coordinate file in Data has a structure similar to:
MS_Ear_left_1_x MS_Ear_left_1_y ... US_Tail_end_2_y
0 1174.5143 247.42857 ... 1964.00000
1 1175.5714 248.74286 ... 1964.08570
2 1176.0000 249.91429 ... 1963.74280
3 1174.8286 250.17143 ... 1963.25720
4 1174.6857 249.82857 ... 1962.82860
... ... ... ... ...
Each behavioral file in Behavioral scoring folder is similar to:
Time Default
0 0.00 NaN
1 0.20 NaN
2 0.40 NaN
3 0.60 C-M2-C1 # mouse M2 chases (C) mouse M1 in compartment C1
4 0.80 NaN
... ... ...
Labels.csv has the following structure:
Video Dominant Subordinate Start_frame
0 C57j-B1-MS_US-T1.1.csv M2 M1 495
1 C57j-B1-UN_LS-T1.1.csv M1 M2 0
2 C57j-B2-MS_US-T1.1.csv M1 M2 675
3 C57j-B2-UN_LS-T1.1.csv M2 M1 0
4 C57j-B3-MS_US-T1.1.csv M1 M2 450
... ... ... ... ...
Frame rate:
Behavioral scoring is annotated at 5 frames per second,
while coordinates over time are extracted at 45 frames per second.
"""
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import rcParams
MOUSE1 = 'M1'
MOUSE2 = 'M2'
RIGHT = 60
LEFT = 30
FRAMES_PER_SECOND = 45
FRAME_RATE_ADAPT = 9
PIXEL_TO_CM = 15.4
MAX_LENGTH = 3000
BEHAVIORS = ['Fl','A','C','U'] # list of annotated behaviors
LABELS_PATH = 'Labels.csv' # stores dominance status for all mice
'''
TODO:
1) delete unused functions
3) constants
4) add comments and examples
5) find better names for functions
6) strain or line or both?
10) function order
13) append or extend
'''
def get_data(video: str) -> pd.DataFrame:
'''
This function processes a given video recording and extracts
the positional data of two mice over time.
Specifically, the output dataset contains the coordinates
along the x and y axes for the eight points identifying each mouse.
Args:
video: name of recording stored in Data folder
Returns:
pandas dataframe corresponding to mice coordinates extracted
from the recording
'''
data = pd.read_csv('Data/' + video, header = 0)
return data
def compute_distance(data: pd.DataFrame) -> pd.Series:
'''
This function computes the euclidean distance over time between MOUSE1 and MOUSE2.
Each mouse is originally identified by the x and y coordinates
of eight points on its body, for instance mouse1 is identified by
mouse1_x and mouse1_y data.
For each mouse we compute its mean position over time along
x and y coordinates, (for example: pos_mouse_1_x and pos_mouse_1_y for mouse1)
so that one mouse is identified only by a single two-dimensional point.
Finally, the euclidean distance between these 2 two-dimensional point over time is returned.
Args:
data: dataset with mice coordinates
Returns:
time series corresponding to distance between mice over time
'''
mouse1_x, mouse1_y = get_column_names(data, MOUSE1)
mouse2_x, mouse2_y = get_column_names(data, MOUSE2)
pos_mouse_1_x = data[mouse1_x].mean(axis = 1).values
pos_mouse_1_y = data[mouse1_y].mean(axis = 1).values
pos_mouse_1 = pd.DataFrame({'pos_x':pos_mouse_1_x,'pos_y':pos_mouse_1_y})
pos_mouse_2_x = data[mouse2_x].mean(axis = 1).values
pos_mouse_2_y = data[mouse2_y].mean(axis = 1).values
pos_mouse_2 = pd.DataFrame({'pos_x':pos_mouse_2_x,'pos_y':pos_mouse_2_y})
return ((pos_mouse_1 - pos_mouse_2)**2).sum(axis=1)**0.5
def get_column_names(data: pd.DataFrame, mouse_label: str) -> pd.DataFrame:
'''
Each mouse is originally identified by the x and y coordinates
of eight points on its body, such as tail_base, tail_end,
ear_left, ear_right, etc.
Given a mouse_label (MOUSE1 or MOUSE2), this function returns
the columns names of the x and y coordinates of these eigth points.
Args:
data: dataset with mice coordinates over time
mouse_label: name of one specific mouse
Returns:
colums of dataset corresponding to coordinates
of one specific mouse
'''
if mouse_label == MOUSE1:
columns = get_column_names_lbl(data, '_1_x', '_1_y')
if mouse_label == MOUSE2:
columns = get_column_names_lbl(data, '_2_x', '_2_y')
return columns
def get_column_names_lbl(data: pd.DataFrame, x_lbl: str, y_lbl: str) -> tuple[list, list]:
'''
Returns list of data columns name related to x and y coordinates of a mouse.
Args:
data: dataset with mice coordinates over time
x_lbl: label for x coordinates
y_lbl: label for y coordinates
Returns:
mouse-specific columns names for x and y coordinates
'''
mouse_x = [x for x in data.columns if x_lbl in str(x)]
mouse_y = [x for x in data.columns if y_lbl in str(x)]
return mouse_x, mouse_y
def compute_speed(data : pd.DataFrame, mouse_label: str) -> pd.Series:
'''
This function computes the speed over time of a mouse.
Each mouse is originally identified by the x and y coordinates of
eight points on its body: mouse_x and mouse_y.
First, these eight points are used to compute a mean two-dimensional
point on the mouse with coordinates pos_mouse_x and pos_mouse_y.
The mouse speed at time t is computed as the variation
of the mouse position from time t1=t-FRAMES_PER_SECOND to time t2=t
Args:
data: dataset with mice coordinates over time
mouse_label: name of one specific mouse
Returns:
speed of mouse over time
'''
mouse_x, mouse_y = get_column_names(data,mouse_label)
pos_mouse_x = data[mouse_x].mean(axis = 1)
pos_mouse_y = data[mouse_y].mean(axis = 1)
vel_mouse_x = pos_mouse_x - pos_mouse_x.shift(FRAMES_PER_SECOND)
vel_mouse_y = pos_mouse_y - pos_mouse_y.shift(FRAMES_PER_SECOND)
speed_mouse = (vel_mouse_x**2 + vel_mouse_y**2).apply(lambda x: np.sqrt(x))
return speed_mouse
def get_dominant(video: str) -> str:
'''
This function returns the dominant mouse name
(M1 or M2) for each couple
Args:
video: name of a recording
Returns:
name of dominant mouse in the couple
'''
labels = pd.read_csv(LABELS_PATH)
return labels[labels.Video == video]['Dominant'].values[0]
def get_subordinate(video: str) -> str:
'''
This function returns the subordinate mouse name
(M1 or M2) for each couple
Args:
video: name of a recording
Returns:
name of subordinate mouse in the couple
'''
labels = pd.read_csv(LABELS_PATH)
return labels[labels.Video == video]['Subordinate'].values[0]
def get_video_list() -> list:
'''
This function returns a list of the couples for which
a dominant/subordinate labeling is available.
'''
labels = pd.read_csv(LABELS_PATH)
return labels.Video.values.tolist()
def compute_window(video_list: list, target_behavior: str, left_length: int,
right_length: int, curve_to_visualize: str,
align_criterion: str=None) -> tuple[list, dict]:
'''
For each video of mice,
this function produces windows from the time series curve_to_visualize
of length l = left_length + right_length + 1
centered in all events occurred for a particular behavior (chase or flight)
taking into account the status of the fleeing mouse (dominant or subordinate)
The selected windows are aligned following a criterion (align_criterion)
and returned together with the mean curve.
Args:
video_list: list of recordings from which distance or speed curves
are extracted
target_behavior: specifies the kind of behavioral event around which
distance and speed windows are computed
left_length: right length of the window
right_length: left length of the window
curve_to_visualize: curves to be visualized in the plot (speed or distace)
align_criterion: re-calculation of the 'zero' of the windows based on
a specific criterion
Returns:
list of curves to visualize
'''
curve_list = []
for video in video_list:
new_list = compute_window_video(left_length, right_length,
video, target_behavior,
curve_to_visualize,
align_criterion)
curve_list.extend(new_list)
curve_list = np.array(curve_list)
return curve_list
def compute_window_video(left_length: int, right_length: int,
video, target_behavior: str, curve_to_visualize: str,
align_criterion: str=None) -> tuple[list, dict]:
'''
Given a time_series and a set of indices, this function produces a window
of length l = left_length + right_length + 1
centered in all events occurred for a particular behavior (chase or flight)
considering the status of the fleeing mouse (dominant or subordinate)
The selected windows are aligned together following a criterion (align_criterion)
and returned together with the mean curve.
Args:
video: recording from which distance or speed curves
are extracted
target_behavior: specifies the kind of behavioral event around which
distance and speed windows are computed
left_length: right length of the window
right_length: left length of the window
curve_to_visualize: curves to be visualized in the plot (speed or distace)
align_criterion: re-calculation of the 'zero' of the windows based on
a specific criterion
Returns:
list of curves to visualize
'''
dominant = get_dominant(video)
subordinate = get_subordinate(video)
data = get_data(video)
behavioral_scoring_data = get_behavioral_scoring_data(video, ['Fl','C'])
speed_subordinate = compute_speed(data,subordinate) / PIXEL_TO_CM
frames = get_flights(dominant, subordinate,
behavioral_scoring_data, target_behavior)
new_frames = indices_by_align_criterion(speed_subordinate,
frames, align_criterion)
time_series = get_curve_to_visualize(curve_to_visualize, data, video)
curve_list = []
cumsum = 0
dataset = time_series
for i in range(- left_length, right_length + 1, 1):
new_col = time_series.shift(-i)
new_col = new_col.rename(f'shift{i}')
dataset = pd.concat([dataset, new_col], axis = 1)
dataset.drop(columns = [0], inplace = True)
for frame in new_frames:
curve = dataset.loc[frame]
cumsum = cumsum + curve['shift0']
curve_list.append(list(curve))
return curve_list
def get_curve_to_visualize(curve_to_visualize: str,
data: pd.DataFrame, video: str) -> pd.DataFrame:
'''
Return time series to be decomposed in compute_window(...) and represented
through plot_window(...)
Args:
curve_to_visualize: name of the curve to use for window computation
data: set of mice coordinates to compute distance or speed over time
video: name of the recording from which the mice coordinates are
extracted.
Returns:
time series from which windows are computed
'''
if curve_to_visualize == 'distance':
curve = compute_distance(data) / PIXEL_TO_CM
if curve_to_visualize == 'speed subordinate mouse':
curve = compute_speed(data,get_subordinate(video)) / PIXEL_TO_CM
if curve_to_visualize == 'speed dominant mouse':
curve = compute_speed(data,get_dominant(video)) / PIXEL_TO_CM
return curve
def adapt_frame_rate(old_dataset: pd.Series, behavior_list: list) -> pd.DataFrame:
'''
This function performs an oversampling of the behavioral dataset (old_dataset)
from 5 frames per second to 45 frames per second, such that no behavior event
is duplicated.
Args:
old_dataset: raw dataset with behavioral annotation (frame rate of 5Hz)
behavioral_list: list of all possible behavioral annotations
Returns:
oversampled behavioral dataset with frame rate of 45Hz
Example:
old_dataset = [None, 'Flight', None]
new = [None, None, None, None, None, None, None, None, None,
'Flight', None, None, None, None, None, None, None, None,
None, None, None, None, None, None, None, None, None]
'''
new = []
for i in range(old_dataset.shape[0]):
if old_dataset[i] in behavior_list:
new.append(old_dataset[i])
new.extend([None for _ in range(FRAME_RATE_ADAPT-1)])
else:
new.extend([None for _ in range(FRAME_RATE_ADAPT)])
return pd.DataFrame({'Default': new})
def get_behavioral_scoring_data(video: str, behavior_list: list):
'''
Givena a couple (video), this function returns
the dataset containing the behavioral scoring, which is at 5 Hz.
'''
scoring_list = get_scoring_list(behavior_list)
behavioral_scoring_data = pd.read_csv('Behavioral scoring/' + video)
cond = behavioral_scoring_data.Time == 'Frequencies'
right_length_index = behavioral_scoring_data[cond].index[0] - 1
labels = pd.read_csv(LABELS_PATH)
init_index = labels[labels.Video == video]['Start_frame'].values[0]
right_length_index = min(right_length_index, init_index + MAX_LENGTH)
behavioral_scoring_data = behavioral_scoring_data[init_index:right_length_index].reset_index()
behavioral_scoring_data = adapt_frame_rate(behavioral_scoring_data.Default,scoring_list)
return behavioral_scoring_data
def index_at_behavior(beh: str, mouse: str, dataset: pd.Series) -> list:
'''
Given a mouse and a behavior label (beh), this function returns
the indices at which the behavioral event occurs for the given
mouse in both compartments (C1 and C2).
Args:
beh: name of bahavior
mouse: name of mouse
dataset: dataset containing mice coordinates for a single recording
Returns:
list of indices where all behavioral event of the same kind and
for specified mouse occur in the recording
'''
indices = []
for comp in ['-C1','-C2']:
condition = dataset == beh + '-' + mouse + comp
ind = dataset[condition].index.values.tolist()
indices.extend(ind)
return indices
def get_scoring_list(behaviors: list) -> list:
'''
Returns all (complete) behavior labels for the given behaviors in input.
Args:
behaviors: list of behavior names
Returns:
all possible behavioral annotation for input behavior names
(including information about the mouse doing the behavior
and the compartment in which the event occurs)
Example:
behaviors = ['Fl','C']
behavior_list = ['Fl-M1-C1','Fl-M1-C2','Fl-M2-C1','Fl-M2-C2',
'C-M1-C1','C-M1-C2','C-M2-C1','C-M2-C2']
'''
behavior_list = []
for mouse in ['-M1','-M2']:
for comp in ['-C1','-C2']:
for behavior in behaviors:
behavior_list.append(behavior + mouse + comp)
return behavior_list
def get_flights(dominant: str, subordinate: str, dataset: pd.Series, behavior: str) -> list:
'''
Returns the event indices for:
- all flights occurred for mouse_for_flights
- all flights occurred for mouse_for_flights when this is NOT chased
by mouse_for_chases whithin a time span given by time_span
- all flights occurred for mouse_for_flights ONLY when this is chased
by mouse_for_chases whithin a time span given by time_span
Args:
dominant: name of dominant mouse
subordinate: name of subordinate mouse
dataset: dataset with behavioral annotations
behavior: type of behavioral events
Returns:
list of frames when the specified type of behavior occurs
'''
time_span = FRAMES_PER_SECOND*2
for i in range(-time_span,1,1):
new_col = dataset.Default.shift(-i)
new_col = new_col.rename('shift' + str(i))
dataset = pd.concat([dataset, new_col], axis = 1)
cond1 = dataset.Default == 'Fl-' + subordinate + '-C1'
cond2 = dataset.Default == 'Fl-' + subordinate + '-C2'
filtered = dataset[cond1 | cond2].drop(columns = ['Default'])
cond3 = filtered.where(filtered == 'C-' + dominant + '-C2')
cond4 = filtered.where(filtered == 'C-' + dominant + '-C1')
flights_with_chases = filtered[cond3.any(axis = 1)].index.tolist()
flights_with_chases = flights_with_chases + filtered[cond4.any(axis = 1)].index.tolist()
if behavior == 'flights_with_chases':
return flights_with_chases
all_flights = index_at_behavior('Fl', subordinate, dataset.Default)
if behavior == 'all_flights':
return all_flights
if behavior == 'flights_without_chases':
return [f for f in all_flights if f not in flights_with_chases]
def indices_by_align_criterion(time_series: pd.Series,
frames: list, align_criterion: str) -> list:
'''
This function takes as input a time series and a set of indices,
selects a window of the time series around each index and
computes a new index based on one of the following criteria:
- None: the previous index is returned
- peak: the new index is the point of maximum of the time series
inside the window
- max_slope: the new index is the point of maximun slope of the time series
inside the window
Args:
time_series: time series based on which the new frames are computed
frames: list of old frames
align_criterion: criterion for calculation of the new set of frames
Returns:
list of new frames
'''
if align_criterion:
new_frames = []
for frame in frames:
curve = time_series.loc[frame - LEFT:frame + RIGHT]
if align_criterion == 'peak':
new_frame = curve.argmax() + max(frame - LEFT,0)
if align_criterion == 'max_slope':
gradient = np.gradient(curve)
new_frame = gradient.argmax() + max(frame - LEFT,0)
new_frames.append(new_frame)
else:
new_frames = frames
return new_frames
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment