Commit 34db5f59 authored by Antonio Politi's avatar Antonio Politi
Browse files

load data on the fly

parent d38502a0
function master_workflow_fit_protein(wd)
% MASTER_WORKFLOW_FIT_DYE call workflow_fit_dye for several directorie
% MASTER_WORKFLOW_FIT_PROTEIN call workflow_fit_protein for several directorie
% specified in the script
%
% Antonio Politi, EMBL, January 2017
force = 0;
session = '2c'
force = 1;
if nargin <1
wd = uigetdir('.', 'Specify main directory where to search protein folders');
......@@ -18,7 +19,7 @@ if ~exist(fullfile(wd, 'ProcessingHelpFiles', 'protein2cfiles.mat')) || force
[indirCell] = getAllFolders(wd, '\Mito(s|S)ys\d');
resfiles = [];
for i = 1:length(indirCell)
resfiles = [resfiles; getAllFiles(indirCell{i}, 'res', '2c.res', 0)];
resfiles = [resfiles; getAllFiles(indirCell{i}, 'res', [session '.res'], 0)];
end
if isempty(resfiles)
resftable = readtable(fullfile(wd, 'ProcessingHelpFiles', 'protein2cfiles.txt'), 'ReadVariableName', 0);
......@@ -30,14 +31,13 @@ if ~exist(fullfile(wd, 'ProcessingHelpFiles', 'protein2cfiles.mat')) || force
save(fullfile(wd, 'ProcessingHelpFiles', 'protein2cfiles.mat'), 'resfiles');
else
load(fullfile(wd, 'ProcessingHelpFiles', 'protein2cfiles.mat'));
end
%%
%6 9 11
for ifile = 1:length(resfiles)
resfile = resfiles{ifile};
display(sprintf('master_workflow_fit_proteins processing res file %d/%d %s', ifile, length(resfiles), resfiles{ifile}))
fnames = workflow_fit_protein(resfile, '2c');
fnames = workflow_fit_protein(resfile, session);
%fnames = {fullfile(fileparts(resfile), '1c_opt.res'), fullfile(fileparts(resfile),'2c_opt.res')};
for iout = 1:length(fnames)
distributefiles(fnames{iout})
......
function [focVol] = workflow_fit_dye(indir, kappaVal, outputdir, FAsession, force)
function [focVolA, focVolfile] = workflow_fit_dye(indir, kappaVal, outputdir, FAsession, force)
% COMPUTEDIRECTORY perform fitting of dye in a specific directory. program
% assumes that traces of one experiment are all in one directory!
% INPUT:
......@@ -65,7 +65,7 @@ if isempty(xmlFAfiles)
display('No FA xml found use .fcs file instead');
cond = 3;
else
cond = 2;
cond = 1;
end
switch cond
......@@ -91,12 +91,7 @@ if ~MO.weightChi2
prefix = [prefix '_unw'];
end
%% load data into cell array dataC
if ZEN
dataC = MO.readZeissfcs(zenfiles{1});
else
dataC = MO.readFAxml(xmlFAfiles);
end
norm = [];
%% Workflow
......@@ -115,6 +110,12 @@ if exist(fullfile(outdir, [prefix '.mat'])) %load result file if it exists alrea
end
end
if (process || force)
%% load data into cell array dataC
if ZEN
dataC = MO.readZeissfcs(zenfiles{1});
else
dataC = MO.readFAxml(xmlFAfiles);
end
% starting value of kappa
kappa = 5;
% number of repeats for each fit (starts from random values)
......@@ -183,7 +184,7 @@ if (process || force)
for i = 1:lk
% find indexes for given kappaValue
posExp = find(round(10*outPar(:,9))==round(10*kappaVal(i)));
% compute mean focal volume (pl) for all experiments at given
% compute mean focal volume (fl) for all experiments at given
% kappa
mVol(i) = MO.computeVolume(mean(outPar(posExp, 1:nrPar),1),iC);
% compute mean focal radius (um) for all experiments at given
......@@ -210,10 +211,10 @@ if (process || force)
end
% avoid cases were boundary of kappaVal is touched and use a value of kappa
% = 5 instead
% = 5.8 instead this is the average values obtained for many experiments
posB = find(kappaVal==5.8);
if all([Chfits.posBest] == [1 1]) || all([Chfits.posBest] == [lk lk])
posB = find(kappaVal==5);
posB = find(kappaVal==5.8);
elseif (Chfits(1).posBest == 1) || (Chfits(1).posBest == lk)
posB = Chfits(2).posBest;
elseif (Chfits(2).posBest == 1) || (Chfits(2).posBest == lk)
......@@ -224,7 +225,8 @@ end
focVolA = [Chfits(1).mfocRad(posB) Chfits(1).mfocVol(posB) Chfits(1).kappaVal(posB);...
Chfits(2).mfocRad(posB) Chfits(2).mfocVol(posB) Chfits(2).kappaVal(posB); ...
sqrt(Chfits(1).mfocRad(posB)*Chfits(2).mfocRad(posB)) sqrt(Chfits(1).mfocVol(posB)*Chfits(2).mfocVol(posB)) Chfits(2).kappaVal(posB)];
fid = fopen(fullfile(indir, 'focalVolume.txt'), 'w');
focVolfile = fullfile(indir, 'focalVolume.txt');
fid = fopen(focVolfile, 'w');
fprintf(fid,'w0_um\tVol_fl\tkappa\r\n');
for i = 1:3
fprintf(fid,'%.4f\t%.4f\t%.1f\r\n', focVolA(i,1),focVolA(i,2),focVolA(i,3) );
......@@ -233,6 +235,7 @@ fclose(fid);
%% save all rounds of fit
if process || force
for iC = 1:2
if (dataC{1}(1,iC*2) == 0) % do not process if channel has not been acquired
continue
......@@ -291,7 +294,7 @@ for iC = 1:2
for i = 1:lk
% find indexes for given kappaValue
posExp = find(round(10*Chfits(iC).outPar(:,9))==round(10*kappaVal(i)));
% compute mean focal volume (pl) for all experiments at given
% compute mean focal volume (fl) for all experiments at given
% kappa
mVol(i) = MO.computeVolume(mean(Chfits(iC).outPar(posExp, 1:end-1),1),iC);
% compute mean focal radius (um) for all experiments at given
......@@ -313,7 +316,7 @@ for iC = 1:2
ylim([min(Chfits(iC).normN(:)) max(Chfits(iC).normN(:))])
ylabel('normalized Chi2');
xlabel('kappa')
title(sprintf('Channel %d kappa %.1f; Vol %.3f pl; w0 %.3f um', iC, Chfits(iC).kappa, Chfits(iC).mfocVol(posB), Chfits(iC).mfocRad(posB)))
title(sprintf('Channel %d kappa %.1f; Vol %.3f fl; w0 %.3f um', iC, Chfits(iC).kappa, Chfits(iC).mfocVol(posB), Chfits(iC).mfocRad(posB)))
% focal volume
subplot(4,1,2)
......@@ -343,3 +346,4 @@ for iC = 1:2
saveas(hf,fullfile(outdir,[prefix '_Ch' num2str(iC) '.png']), 'png')
end
end
end
\ No newline at end of file
function outfiles = workflow_fit_protein(resfile, FAsession)
% WORKFLOW_FIT_PROTEIN perform fitting of protein data using as input a res file from FA
function outfiles = workflow_fit_protein(resfile, FAsession, focVolfile)
%% WORKFLOW_FIT_PROTEIN perform one component and 2 component fitting of protein data using the xml files specified in
% the res file from FluctuationAnalyzer (FA).
% [OUTFILES] = WORKFLOW_FIT_PROTEIN() prompt for a resfile to process. The xml files/FAsession are
% identified by prefix of filename (resfile = 2c.res -> FAsession = 2c).
% Search for focal volume file in subdirectories.
% [OUTFILES] = WORKFLOW_FIT_PROTEIN(RESFILE) process RESFILE. The xml files/FAsession are
% identified by prefix of filename (resfile = 2c.res -> FAsession = 2c). Search for focal volume
% file in subdirectories.
% [OUTFILES] = WORKFLOW_FIT_PROTEIN(RESFILE, FASESSION) process RESFILE. FASESSION specifies id of FA xml files. Search for focal volume file
% in subdirectories.
% [OUTFILES] = WORKFLOW_FIT_PROTEIN(RESFILE, FASESSION, FOCVOLFILE) process RESFILE.
% FASESSION specifies id of FA xml files. focVolfile is file to use for
% focal volume.
% INPUT:
% resfile - a result file containing the directory where to extract files
% FAsession - session name
% RESFILE - a result file containing the directory where to extract files
% FASESSION - session name
% FOCVOLFILE - file containing the focal volume data. Table of the
% for
% OUTPUT:
% OUTFILES - path to output resfiles.
%
% Antonio Politi, EMBL, January 2017
% The workflow performs a 1 and 2 component fitting using anomalous diffusion model and writes results to a
% *1c_opt.res/*2c_opt.res file of FA format. Also the xml files are created
% with ID *1c_opt and *2c_opt.
% FOCVOLFILE is a tab delimited txt file. The file is created by
% workflow_fit_dye and named focalVolume.txt
% "w0_um Vol_fl kappa
% 0.1792 0.1858 5.8
% 0.2174 0.3319 5.8
% 0.1974 0.2484 5.8"
% The rows are for Ch1, Ch2 and Cross
% FOCVOLFILE is not necessary for the fitting a default value for kappa of 5.5 will be used instead. The workflow will try to find one or
% just proceed and does not output volume to in *.res file.
%
% Antonio Politi, EMBL, January 2017, Modified March 2017 for
%% default values
%% read the inputs
if nargin < 1
% prompt for a directory
[resfile, pathname] = uigetfile({'*.res'; '*.*'},'Select res file', '');
%[resfile, pathname] = uigetfile({'*.res'; '*.*'},'Select res file', 'Z:\AntonioP_elltier1\CelllinesImaging\MitoSysPipelines\160308_gfpNUP107z26z31\MitoSys2');
if ~resfile
return
end
resfile = fullfile(pathname, resfile);
end
[pathname, resbase, ext] = fileparts(resfile);
[tmp, dirname] = fileparts(pathname);
% a file containing the fluorescence intensity at the FCS measurement point
% (optional)
flintfile = fullfile(pathname, [resbase '.fint']);
if nargin < 2
if nargin < 2 % the ID for the xml files
FAsession = resbase;
%FAsession = '2c'
FAsession = '2c';
end
if ~exist(resfile)
display(['Could not find resfile' resfile]);
return
end
% create focalVolume data
if nargin < 3
warning('No path to focalVolume.txt provided. Try to parse directory/subdirectory and find file')
focVolfile = getAllFiles(pathname, 'txt', 'focalVolume.txt');
if length(focVolfile) > 1
error('Several focalVolume.txt associated with resfile. Please specify only one file')
end
if isempty(focVolfile)
focVolfile = [];
else
focVolfile = focVolfile{1};
end
else
if ~exist(focVolfile)
warning(['Path to focalVolume.txt ' focVolfile ' does not exist!']);
focVolfile = [];
end
end
if isempty(focVolfile)
[focVolfile, pathnamefocVol] = uigetfile({'*.txt'; '*.*'},'No focaleVolume.txt found. Select focal volume file', fileparts(resfile));
if ~focVolfile
warning('No focal volume have been found. Volume data are not integrated in result file')
focVolfile = [];
else
focVolfile = fullfile(pathnamefocVol, focVolfile);
end
end
if isempty(focVolfile)
focVol = [];
else
focVol = readtable(focVolfile, 'Delimiter', '\t', 'FileType', 'text');
end
%% Initialization
% load model class
clear('MO');
......@@ -60,6 +128,7 @@ restable2c.P14Name = repmat({'pValueFtest'}, length(restable1c.P11Name),1);
restable2c.P15Name = repmat({'PAIC'}, length(restable1c.P11Name),1);
restable2c.P16Name = repmat({'BIC'}, length(restable1c.P11Name),1);
%% write fluorescence intensity to table
try
if ~isempty(flintT)
restable1c.class = flintT.class;
restable2c.class = flintT.class;
......@@ -70,7 +139,9 @@ if ~isempty(flintT)
end
end
end
catch
warning('could not create the fluorescence intensity columns');
end
%% this write empty entries as NaN (must double check afterwards in particular for annotation)
......@@ -78,25 +149,6 @@ end
% get names of xml files
xmlFAfiles = strrep(restable2c.FullPath, ['.zen'], ['.zen.' FAsession '.xml']);
%xmlFAfiles = strrep(restable2c.FullPath, ['.zen_mod.fcs'], ['.zen_mod.fcs.' FAsession '.xml']);
% name should be with respect to subfolders! TODO
%% read focal volume data
focVol = getAllFiles(fileparts(resfile), 'txt', 'focalVolume');
if length(focVol) == 0
display(['No focalVolume file associated with ' resfile]);
[focVol, pathname] = uigetfile({'*.txt'; '*.*'},'Select focal volume file', fileparts(resfile));
if ~focVol
return
end
focVol = {fullfile(pathname,focVol)};
end
if length(focVol) > 1
display(['several focalVolume files associated with ' resfile]);
return;
end
focVol = readtable(focVol{1}, 'Delimiter', '\t');
%%
if isempty(xmlFAfiles)
display('No FA xml found use .fcs file instead');
......@@ -121,9 +173,11 @@ end
%% load data into cell array dataC
display(['Load correlation data for ' resfile])
if ZEN
dataC = MO.readZeissfcs(zenfiles{1});
nrData = length(dataC);
else
dataC = MO.readFAxml(xmlFAfiles);
nrData = length(xmlFAfiles);
end
%% Workflow
......@@ -136,9 +190,38 @@ weightStr = {'','_w'};
% performed is not available. If we take best fit here and plugin into FA
% the Chi2 slightly increases
for iD = 1:length(dataC)
display(sprintf('Fitting of data set %d/%d', iD, length(dataC)));
% standard deviation needs to be decreased by sqrt(20)
stdCorr = 1/sqrt(20);
h = waitbar(0,sprintf('resfile %s\nfocVolfile %s', resfile, focVolfile),'Name',['Fitting ' num2str(nrData) ' FCS traces...'],...
'CreateCancelBtn',...
'setappdata(gcbf,''canceling'',1)');
for iD = 1:nrData
if ZEN
data = dataC{iD};
xmlfile = [];
else
if ~exist(xmlFAfiles{iD})
% search locally
[m, iend] = regexp(xmlFAfiles{iD},dirname, 'match', 'end');
if iend > 0
xmlfile = fullfile(pathname, xmlFAfiles{iD}(iend+1:end));
end
else
xmlfile = xmlFAfiles{iD};
end
if ~exist(xmlfile, 'file')
error(['was not able to find ' xmlfile]);
end
% this avoids to load huge amount of data in memory
data = MO.readFAxml({xmlfile});
data = data{1};
end
waitbar(iD/nrData, h)
if getappdata(h,'canceling')
delete(h)
outfiles = []
return
end
% Fit for all channels (if CC or XC is found)
outpar1c = repmat(struct2array(MO.par),3,1); % parameters for 1 component fit
......@@ -150,19 +233,23 @@ for iD = 1:length(dataC)
else
Chstr = ['Ch' num2str(iCh)];
end
if isempty(dataC{iD})
if isempty(data)
continue
end
if dataC{iD}(1,iCh*2) == 0 % exclude if no data available
if data(1,iCh*2) == 0 % exclude if no data available
continue
end
if isempty(focVol)
MO.par.kappa = 5.5;
else
MO.par.kappa = focVol.kappa(iCh);
end
% norm1c is sum of squared residuals used in the fit. norms1c = [unweighted, weighted norm]
% R2_1c is from norm used for the fit. R2s_1c:[R2_unweighted,
% R2_weighted norm]
[outpar1c(iCh,:), norm1c, R2_1c, norms1c, R2s_1c, Nrpts1c, nrpara1c] = MO.fitOneComponent(dataC{iD}(:, [1 2*iCh 2*iCh+1]), iCh==3);
[outpar2c(iCh,:), norm2c, R2_2c, norms2c, R2s_2c, Nrpts2c, nrpara2c] = MO.fitTwoComponents(dataC{iD}(:,[1 2*iCh 2*iCh+1]), iCh==3);
data(:,2*iCh+1) = data(:,2*iCh+1)*stdCorr;
[outpar1c(iCh,:), norm1c, R2_1c, norms1c, R2s_1c, Nrpts1c, nrpara1c] = MO.fitOneComponent(data(:, [1 2*iCh 2*iCh+1]), iCh==3);
[outpar2c(iCh,:), norm2c, R2_2c, norms2c, R2s_2c, Nrpts2c, nrpara2c] = MO.fitTwoComponents(data(:,[1 2*iCh 2*iCh+1]), iCh==3);
% adjusted R2s as used in FA
R2adj_1c = 1 - (1-R2s_1c).*(Nrpts1c-1)./(Nrpts1c - nrpara1c - 1);
......@@ -177,17 +264,18 @@ for iD = 1:length(dataC)
restable1c(iD, ['P0' num2str(ipar) 'Value' Chstr]) = table(outpar1c(iCh, ipar));
restable2c(iD, ['P0' num2str(ipar) 'Value' Chstr]) = table(outpar2c(iCh, ipar));
end
%% only report weighted norm
%% only report weighted norm divided by the nr points
restable1c(iD, ['ChiSq' Chstr]) = table(norms1c(2)/Nrpts1c);
restable2c(iD, ['ChiSq' Chstr]) = table(norms2c(2)/Nrpts2c);
%report unweighted R2 as the weight are largely overestimated. This
restable1c(iD, ['RSqAdj' Chstr]) = table(R2adj_1c(1));
restable2c(iD, ['RSqAdj' Chstr]) = table(R2adj_2c(1));
if ~isempty(focVol)
restable1c(iD, ['P11Value' Chstr]) = table(focVol.w0_um(iCh));
restable1c(iD, ['P12Value' Chstr]) = table(focVol.Vol_fl(iCh));
restable2c(iD, ['P11Value' Chstr]) = table(focVol.w0_um(iCh));
restable2c(iD, ['P12Value' Chstr]) = table(focVol.Vol_fl(iCh));
end
restable1c(iD, ['P13Value' Chstr]) = table(pvalueChi);
restable1c(iD, ['P14Value' Chstr]) = table(pvalueFtest);
......@@ -201,18 +289,22 @@ for iD = 1:length(dataC)
end
% write updated xml file
for ifit = 1:2
if isempty(dataC{iD})
if isempty(xmlfile)
continue
end
if isempty(data)
continue
end
node = xmlFA.readnode(xmlFAfiles{iD});
[path1, fout, ext] = fileparts(xmlFAfiles{iD});
node = xmlFA.readnode(xmlfile);
[path1, fout, ext] = fileparts(xmlfile);
[path, fout, ext] = fileparts(fout);
fname_out = fullfile(path1, sprintf('%s.%dc_opt%s.xml', fout, ifit, weightStr{MO.weightChi2+1}));
idxs = MO.getDataIndexes(dataC{iD}, MO.tauBoundary);
Ntfits = length(idxs(1):length(dataC{iD})-idxs(2));
idxs = MO.getDataIndexes(data, MO.tauBoundary);
Ntfits = length(idxs(1):length(data)-idxs(2));
fit = zeros(Ntfits, 3);
if ifit == 1
par = outpar1c;
......@@ -220,10 +312,10 @@ for iD = 1:length(dataC)
par = outpar2c;
end
for iC = 1:3
if (dataC{iD}(1,iC*2) == 0) % do not process if channel has not been acquired
if (data(1,iC*2) == 0) % do not process if channel has not been acquired
continue
end
fit(:,iC) = MO.Gcomp(MO.parvecToparstruct(par(iC,:)), dataC{iD}(idxs(1):end-idxs(2),1));
fit(:,iC) = MO.Gcomp(MO.parvecToparstruct(par(iC,:)), data(idxs(1):end-idxs(2),1));
end
xmlFA.writepar(node, par, struct2array(MO.lb), struct2array(MO.hb));
......@@ -259,6 +351,7 @@ for ifile = 1:2
appendFiles(tmpfiles{ifile}, outfiles{ifile});
delete(tmpfiles{ifile});
end
delete(h)
end
......@@ -269,19 +362,26 @@ function [PAIC, BIC, pvalueChi, pvalueF] = modelCompare(norms, nrpts, nrpara)
% so BIC is more strict)
AIC = 2*nrpara + norms + 2*(nrpara+1).*(nrpara)./(nrpts-nrpara-2);
PAIC = exp((min(AIC) - AIC)/length(AIC)); % probability of worse model to be correct
% PAIC(1) gives the probability
% BIC. The lowest value is better
BIC = log(nrpts).*nrpara + norms;
% log-likelihood ratio test
% log-likelihood ratio test. Lower pvalue indicates that more
% complicated model can describe the data
Dlog = norms(1) - norms(2);
df = nrpara(2) - nrpara(1);
pvalueChi = 1 - chi2cdf(Dlog, df);
%%
% F-test. Keep in mind that the sum(weighted_squares) is not ~ Nrpts.
% a low pValue indicate 2c model is probable
% F-test. Keep in mind that in the original FA
% the sum(weighted_squares) is not ~ Nrpts.
% so we are overestimating the error.
% we can reassign the norms (but this is rather unorthodox)
fact = nrpts(1)/norms(1);
% we can reassign the norm using 1/sqrt(20). This is the proper
% rescaling of the std. std should be standard error of the mean of 20
% equally spaced windows.
% no use for this fact = nrpts(1)/norms(1);
fact = 1;
F = (Dlog/df)/(fact*norms(2)/(nrpts(1)-nrpara(2)));
pvalueF = 1 - fcdf(F, df, nrpts(1)-nrpara(2));
end
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment