Commit e9a02bf6 authored by karius's avatar karius

status quo

parent c45fdad0
This diff is collapsed.
/*
* CMrcReader.h
*
* Created on: Jun 6, 2019
* Author: kkarius
*/
#ifndef CMRCREADER_H_
#define CMRCREADER_H_
#include <string>
#include <vector>
//'?' boolean
//'b' (signed) byte
//'B' unsigned byte
//'i' (signed) integer
//'u' unsigned integer
//'f' floating-point
//'c' complex-floating point
//'m' timedelta
//'M' datetime
//'O' (Python) objects
//'S', 'a' zero-terminated bytes (not recommended)
//'U' Unicode string
//'V' raw data (void)
#pragma pack(push, r1, 1)
struct SCellA {
// ('cella', [ # Cell size in angstroms
// ('x', 'f4'),
// ('y', 'f4'),
// ('z', 'f4')
// ]),
float x;
float y;
float z;
};
struct SCellB {
// ('cellb', [ # Cell angles in degrees
// ('alpha', 'f4'),
// ('beta', 'f4'),
// ('gamma', 'f4')
// ]),
float alpha;
float beta;
float gamma;
};
struct SMrcHeader {
// ('nx', 'i4'), # Number of columns
// ('ny', 'i4'), # Number of rows
// ('nz', 'i4'), # Number of sections
int nx;
int ny;
int nz;
// ('mode', 'i4'), # Mode; indicates type of values stored in data block
int mode;
// ('nxstart', 'i4'), # Starting point of sub-image
// ('nystart', 'i4'),
// ('nzstart', 'i4'),
int nxstart;
int nystart;
int nzstart;
//
// ('mx', 'i4'), # Grid size in X, Y and Z
// ('my', 'i4'),
// ('mz', 'i4'),
int mx;
int my;
int mz;
//
// ('cella', [ # Cell size in angstrom
SCellA cella;
//
// ('cellb', [ # Cell angles in degrees
SCellB cellb;
//
// ('mapc', 'i4'), # map column 1=x,2=y,3=z.
// ('mapr', 'i4'), # map row 1=x,2=y,3=z.
// ('maps', 'i4'), # map section 1=x,2=y,3=z.
int mapc;
int mapr;
int maps;
//
// ('dmin', 'f4'), # Minimum pixel value
// ('dmax', 'f4'), # Maximum pixel value
// ('dmean', 'f4'), # Mean pixel value
float dmin;
float dmax;
float dmean;
// ('ispg', 'i4'), # space group number
// ('nsymbt', 'i4'), # number of bytes in extended header
int ispg;
int nsymbt;
// ('extra1', 'V8'), # extra space, usage varies by application
int extra1[2];
// ('exttyp', 'S4'), # code for the type of extended header
char exttype[4];
// ('nversion', 'i4'), # version of the MRC format
int nversion;
// ('extra2', 'V84'), # extra space, usage varies by application
int extra2[42];
//
// ('origin', [ # Origin of image
// ('x', 'f4'),
// ('y', 'f4'),
// ('z', 'f4')
// ]),
//same shit
SCellA origin;
// ('map', 'S4'), # Contains 'MAP ' to identify file type
char map[4];
// ('machst', 'u1', 4), # Machine stamp; identifies byte order
char machst[4];
//
// ('rms', 'f4'), # RMS deviation of densities from mean density
float rms;
//
// ('nlabl', 'i4'), # Number of labels with useful data
int nlabl;
// ('label', 'S80', 10) # 10 labels of 80 characters
char label[800];
};
#pragma pack(pop, r1)
class CMrcReader {
public:
CMrcReader();
SMrcHeader header;
std::string mrc_file_name;
std::vector<float> data;
virtual ~CMrcReader();
void readHeader(std::string mrc_file_name);
void printHeader(void);
void readData(void);
};
#endif /* CMRCREADER_H_ */
//HEADER_DTYPE = np.dtype([
//
//])
//
//
//VOXEL_SIZE_DTYPE = np.dtype([
// ('x', 'f4'),
// ('y', 'f4'),
// ('z', 'f4')
//])
//
//
//# FEI extended header dtype for metadata version 0, as described in the EPU
//# manual. Note that the FEI documentation is unclear about the endianness of
//# the data in the extended header. Probably, it is always little-endian, and
//# therefore the data might be misinterpreted on a big-endian machine, but this
//# has not been tested.
//FEI_EXTENDED_HEADER_DTYPE = np.dtype([
// ('Metadata size', 'i4'),
// ('Metadata version', 'i4'),
// ('Bitmask 1', 'u4'),
// ('Timestamp', 'f8'), # Not specified, but suspect this is in days after 1/1/1900
// ('Microscope type', 'S16'),
// ('D-Number', 'S16'),
// ('Application', 'S16'),
// ('Application version', 'S16'),
// ('HT', 'f8'),
// ('Dose', 'f8'),
// ('Alpha tilt', 'f8'),
// ('Beta tilt', 'f8'),
// ('X-Stage', 'f8'),
// ('Y-Stage', 'f8'),
// ('Z-Stage', 'f8'),
// ('Tilt axis angle', 'f8'),
// ('Dual axis rotation', 'f8'),
// ('Pixel size X', 'f8'),
// ('Pixel size Y', 'f8'),
// ('Unused range', 'S48'),
// ('Defocus', 'f8'),
// ('STEM Defocus', 'f8'),
// ('Applied defocus', 'f8'),
// ('Instrument mode', 'i4'),
// ('Projection mode', 'i4'),
// ('Objective lens mode', 'S16'),
// ('High magnification mode', 'S16'),
// ('Probe mode', 'i4'),
// ('EFTEM On', '?'),
// ('Magnification', 'f8'),
// ('Bitmask 2', 'u4'),
// ('Camera length', 'f8'),
// ('Spot index', 'i4'),
// ('Illuminated area', 'f8'),
// ('Intensity', 'f8'),
// ('Convergence angle', 'f8'),
// ('Illumination mode', 'S16'),
// ('Wide convergence angle range', '?'),
// ('Slit inserted', '?'),
// ('Slit width', 'f8'),
// ('Acceleration voltage offset', 'f8'),
// ('Drift tube voltage', 'f8'),
// ('Energy shift', 'f8'),
// ('Shift offset X', 'f8'),
// ('Shift offset Y', 'f8'),
// ('Shift X', 'f8'),
// ('Shift Y', 'f8'),
// ('Integration time', 'f8'),
// ('Binning Width', 'i4'),
// ('Binning Height', 'i4'),
// ('Camera name', 'S16'),
// ('Readout area left', 'i4'),
// ('Readout area top', 'i4'),
// ('Readout area right', 'i4'),
// ('Readout area bottom', 'i4'),
// ('Ceta noise reduction', '?'),
// ('Ceta frames summed', 'i4'),
// ('Direct detector electron counting', '?'),
// ('Direct detector align frames', '?'),
// ('Camera param reserved 0', 'i4'),
// ('Camera param reserved 1', 'i4'),
// ('Camera param reserved 2', 'i4'),
// ('Camera param reserved 3', 'i4'),
// ('Bitmask 3', 'u4'),
// ('Camera param reserved 4', 'i4'),
// ('Camera param reserved 5', 'i4'),
// ('Camera param reserved 6', 'i4'),
// ('Camera param reserved 7', 'i4'),
// ('Camera param reserved 8', 'i4'),
// ('Camera param reserved 9', 'i4'),
// ('Phase Plate', '?'),
// ('STEM Detector name', 'S16'),
// ('Gain', 'f8'),
// ('Offset', 'f8'),
// ('STEM param reserved 0', 'i4'),
// ('STEM param reserved 1', 'i4'),
// ('STEM param reserved 2', 'i4'),
// ('STEM param reserved 3', 'i4'),
// ('STEM param reserved 4', 'i4'),
// ('Dwell time', 'f8'),
// ('Frame time', 'f8'),
// ('Scan size left', 'i4'),
// ('Scan size top', 'i4'),
// ('Scan size right', 'i4'),
// ('Scan size bottom', 'i4'),
// ('Full scan FOV X', 'f8'),
// ('Full scan FOV Y', 'f8'),
// ('Element', 'S16'),
// ('Energy interval lower', 'f8'),
// ('Energy interval higher', 'f8'),
// ('Method', 'i4'),
// ('Is dose fraction', '?'),
// ('Fraction number', 'i4'),
// ('Start frame', 'i4'),
// ('End frame', 'i4'),
// ('Input stack filename', 'S80'),
// ('Bitmask 4', 'u4'),
// ('Alpha tilt min', 'f8'),
// ('Alpha tilt max', 'f8')
//])
File added
/*
* CPdbBuffer.cpp
*
* Created on: Mar 29, 2019
* Author: kkarius
*/
#include <CPdbBuffer.h>
#include <algorithm>
#include <cstring>
using std::size_t;
CPdbBuffer::CPdbBuffer(FILE *fptr, size_t buff_sz, size_t put_back) :
fptr_(fptr),
put_back_(std::max(put_back, size_t(1))),
buffer_(std::max(buff_sz, put_back_) + put_back_)
{
char *end = &buffer_.front() + buffer_.size();
setg(end, end, end);
}
std::streambuf::int_type CPdbBuffer::underflow()
{
if (gptr() < egptr()) // buffer not exhausted
return traits_type::to_int_type(*gptr());
char *base = &buffer_.front();
char *start = base;
if (eback() == base) // true when this isn't the first fill
{
// Make arrangements for putback characters
std::memmove(base, egptr() - put_back_, put_back_);
start += put_back_;
}
// start is now the start of the buffer, proper.
// Read from fptr_ in to the provided buffer
size_t n = std::fread(start, 1, buffer_.size() - (start - base), fptr_);
if (n == 0)
return traits_type::eof();
// Set buffer pointers
setg(base, start, start + n);
return traits_type::to_int_type(*gptr());
}
CPdbBuffer::~CPdbBuffer() {
// TODO Auto-generated destructor stub
}
/*
* CPdbBuffer.h
*
* Created on: Mar 29, 2019
* Author: kkarius
*/
#ifndef CPDBBUFFER_H_
#define CPDBBUFFER_H_
#include <streambuf>
#include <vector>
#include <cstdlib>
#include <cstdio>
class CPdbBuffer : public std::streambuf {
public:
CPdbBuffer();
public:
explicit CPdbBuffer(FILE *fptr, std::size_t buff_sz = 256, std::size_t put_back = 8);
virtual ~CPdbBuffer();
private:
// overrides base class underflow()
int_type underflow();
// copy ctor and assignment not implemented;
// copying not allowed
CPdbBuffer(const CPdbBuffer &);
CPdbBuffer &operator= (const CPdbBuffer &);
private:
FILE *fptr_;
const std::size_t put_back_;
std::vector<char> buffer_;
};
#endif /* CPDBBUFFER_H_ */
File added
/*
* CPdbIfstream.cpp
*
* Created on: Mar 25, 2019
* Author: kkarius
*/
#include <CPdbIfstream.h>
#include <stdio.h>
#include <iostream>
bool CPdbIfstream::IS_PROPER_PDB(const boost::filesystem::path& filepath) {
return (bool) (boost::filesystem::extension(filepath) == PDB_EXTENSION
&& !(boost::filesystem::symlink_status(filepath).type()
== boost::filesystem::symlink_file)
&& boost::filesystem::is_regular_file(filepath));
}
CPdbIfstream::CPdbIfstream(const boost::filesystem::path& source) :
std::ifstream(), mstream_mode(SINGLE_FILE) {
if (boost::filesystem::is_directory(source)) {
mstream_mode = MULTI_FILE;
mpdb_list = CPdbIfstream::GET_PDB_LIST(source);
mpdb_it = mpdb_list.begin();
if (!mpdb_list.empty()) {
open(mpdb_it->string().c_str(), std::ios_base::binary);
}
} else {
if (CPdbIfstream::IS_PROPER_PDB(source)) {
open(source.string().c_str(), std::ios_base::binary);
}
}
}
int CPdbIfstream::tellg(){
std::cout<<"Called"<<std::endl;
return std::ifstream::tellg();
}
bool CPdbIfstream::eof(){
std::cout<<"Called"<<std::endl;
if (mpdb_it==mpdb_list.end()){
return std::ifstream::eof();
}
if (std::ifstream::eof()){
close();
clear();
mpdb_it++;
open(mpdb_it->string().c_str(),std::ios_base::binary);
}
return false;
}
int CPdbIfstream::peek(){
std::cout<<"Called"<<std::endl;
if (mpdb_it==mpdb_list.end()){
return std::ifstream::peek();
}
else {
if (std::ifstream::peek() == std::char_traits<char>::eof()){
return std::char_traits<char>::eof()+1;
}
return std::ifstream::peek();
}
}
std::list<boost::filesystem::path> CPdbIfstream::GET_PDB_LIST(
const boost::filesystem::path& dir_path) {
std::list<boost::filesystem::path> pdb_list;
if (boost::filesystem::is_directory(dir_path)) {
boost::filesystem::recursive_directory_iterator dir(dir_path);
for (boost::filesystem::directory_entry& x : dir) {
if (CPdbIfstream::IS_PROPER_PDB(x)) {
pdb_list.push_back(x);
}
}
}
return pdb_list;
}
CPdbIfstream::~CPdbIfstream() {
// TODO Auto-generated destructor stub
}
/*
* CPdbIfstream.h
*
* Created on: Mar 25, 2019
* Author: kkarius
*/
#ifndef CPDBIFSTREAM_H_
#define CPDBIFSTREAM_H_
#include <corelib/ncbistre.hpp>
#include <boost/filesystem.hpp>
#include <list>
#include <ios>
enum eStreamMode {SINGLE_FILE, MULTI_FILE};
const std::string PDB_EXTENSION(".pdb");
class CPdbIfstream : public std::ifstream{
public:
static bool IS_PROPER_PDB(const boost::filesystem::path& filepath);
static std::list<boost::filesystem::path> GET_PDB_LIST(const boost::filesystem::path& dir_path);
CPdbIfstream(const boost::filesystem::path& source);
bool eof(void);
int peek(void);
int tellg(void);
virtual ~CPdbIfstream();
boost::filesystem::path GetCurrentFile(void);
private:
std::list<boost::filesystem::path> mpdb_list;
std::list<boost::filesystem::path>::iterator mpdb_it;
eStreamMode mstream_mode;
};
#endif /* CPDBIFSTREAM_H_ */
File added
File added
/*
* CXlinkAsset.cpp
*
* Created on: Apr 24, 2019
* Author: kkarius
*/
#include <CXlinkAsset.h>
CXlinkAsset::CXlinkAsset() {
// TODO Auto-generated constructor stub
}
CXlinkAsset::~CXlinkAsset() {
// TODO Auto-generated destructor stub
}
/*
* CXlinkAsset.h
*
* Created on: Apr 24, 2019
* Author: kkarius
*/
#ifndef CXLINKASSET_H_
#define CXLINKASSET_H_
class CXlinkAsset {
public:
CXlinkAsset();
virtual ~CXlinkAsset();
};
#endif /* CXLINKASSET_H_ */
File added
/*
* CXlinkRegister.cpp
*
* Created on: Apr 24, 2019
* Author: kkarius
*/
#include <CXlink_record.h>
#include <CXlinkRegister.h>
#include <iostream>
CXlinkRegister::CXlinkRegister() {
}
bool CXlinkRegister::registerXlinkrecord(
ncbi::CRR_Row<ncbi::CRowReaderStream_IANA_CSV> row) {
std::string xl_type = row["XLType"].Get<std::string>();
if (XLINKTYPECONVERSION.find(xl_type) != XLINKTYPECONVERSION.end()) {
EXlinkType type = XLINKTYPECONVERSION.find(xl_type)->second;
SXlink_record record;
record.XLType = type;
record.Protein1 = row["Protein1"].Get<std::string>();
record.Protein2 = row["Protein1"].Get<std::string>();
record.ld_Score = row["ld-Score"].Get<float>();
record.LineFile = m_RowReader.GetCurrentLineNo();
record.FilePath = m_currentFile;
switch (type) {
case eIntralink:
case eInterlink:
record.Residue1 = ncbi::NStr::StringToInt(
ncbi::CTempString(row["Residue1"].Get<std::string>()));
record.Residue2 = ncbi::NStr::StringToInt(
ncbi::CTempString(row["Residue2"].Get<std::string>()));
return x_ParseTwoEndId(record, row["Id"].Get<std::string>());
case eMonolink:
try {
record.Residue1 = ncbi::NStr::StringToInt(
ncbi::CTempString(row["Residue1"].Get<std::string>()));
} catch (const std::exception& e) {
record.Residue1 = -1;
}
try {
record.Residue2 = ncbi::NStr::StringToInt(
ncbi::CTempString(row["Residue2"].Get<std::string>()));
} catch (const std::exception& e) {
record.Residue2 = -1;
}
// m_Residue1 = ncbi::NStr::StringToInt(
// ncbi::CTempString(m_Residue1_string));
return x_ParseMonoId(record, row["Id"].Get<std::string>());
case eUnknown:
break;
m_xlinkvec.push_back(record);
}
}
return true;
}
std::vector<SXlink_record>::iterator CXlinkRegister::iterRecords(){
return m_xlinkvec.begin();
}
std::string CXlinkRegister::getSignalSeq_id(int i){
return std::next(m_SeqEntryRegister.begin(),i)->first;
}
bool CXlinkRegister::addXlinkFile(boost::filesystem::path xlink_Filepath) {
bool success = true;
if (boost::filesystem::exists(xlink_Filepath)
&& boost::filesystem::is_regular_file(xlink_Filepath)) {
m_RowReader.SetDataSource(xlink_Filepath.string().c_str());
m_currentFile = xlink_Filepath;
for (const auto & row : m_RowReader) {
success = registerXlinkrecord(row) && success;
}
}
if (success){
std::vector<std::string>::iterator it;
for (it= m_bioseqvec.begin();it!=m_bioseqvec.end();++it){
success = x_registerXlinkSignal(*it) && success;
}
}
return success;
}
void CXlinkRegister::showSeqEntryRegister() {
std::cout << "Number of registered signal sequences: " << m_bioseqvec.size() << std::endl;
for (std::vector<std::string>::iterator iter =
m_bioseqvec.begin(); iter != m_bioseqvec.end();
++iter) {
std::cout << *iter << std::endl;
}
}
bool CXlinkRegister::x_registerXlinkSignal(std::string xlink_signal) {
ncbi::CRef<ncbi::objects::CSeq_entry> entry(new ncbi::objects::CSeq_entry);
ncbi::CRef<ncbi::objects::CBioseq> bioseq(new ncbi::objects::CBioseq);
ncbi::CRef<ncbi::objects::CSeq_data> data(
new ncbi::objects::CSeq_data(xlink_signal,
ncbi::objects::CSeq_data::e_Ncbieaa));
ncbi::objects::CSeq_inst& inst = bioseq->SetInst();
inst.SetSeq_data(*data);
inst.SetMol(ncbi::objects::CSeq_inst::eMol_aa);
inst.SetExt().SetSeg();
inst.SetLength(xlink_signal.size());
std::string xl_signal_id = "xlinksignal_"
+ std::to_string(m_SeqEntryRegister.size());
ncbi::CRef<ncbi::objects::CSeq_id> id;
id.Reset(
new ncbi::objects::CSeq_id(xl_signal_id,
ncbi::objects::CSeq_id::fParse_ValidLocal));
bioseq->SetId().push_back(id);
entry->SetSeq(*bioseq);
entry->Parentize();
m_SeqEntryRegister[xlink_signal] = entry;
m_SeqEntryIterator = m_SeqEntryRegister.begin();
return true;
}
ncbi::CRef<ncbi::objects::CSeq_entry> CXlinkRegister::ReadOneSeq(
ncbi::objects::ILineErrorListener * pMessageListener) {
ncbi::CRef<ncbi::objects::CSeq_entry> seq_entry(m_SeqEntryIterator->second);
m_SeqEntryIterator++;
return seq_entry;
}
ncbi::CRef<ncbi::objects::CSeq_loc> CXlinkRegister::x_SeqEntryToSeqloc(
ncbi::CScope& scope) {
ncbi::CRef<ncbi::objects::CSeq_entry> seq_entry(ReadOneSeq(nullptr));
ncbi::CRef<ncbi::objects::CSeq_annot> annot(new ncbi::objects::CSeq_annot);
// set id
ncbi::CSeq_id id(seq_entry->GetSeq().GetId().front()->GetSeqIdString(),
ncbi::CSeq_id::fParse_ValidLocal);
seq_entry->SetAnnot().push_back(annot);
scope.AddTopLevelSeqEntry(*seq_entry);
ncbi::CRef<ncbi::objects::CSeq_loc> retval(new ncbi::objects::CSeq_loc());
// set sequence range
retval->SetInt().SetFrom(0);
retval->SetInt().SetTo(seq_entry->GetSeq().GetInst().GetLength() - 1);
retval->SetInt().SetId().Assign(id);
return retval;
}
ncbi::blast::SSeqLoc CXlinkRegister::GetNextSSeqLoc(ncbi::CScope& scope) {
ncbi::CRef<ncbi::objects::CSeq_loc> seqloc = x_SeqEntryToSeqloc(scope);
ncbi::blast::SSeqLoc retval(seqloc, &scope);
return retval;
}
bool CXlinkRegister::x_ParseTwoEndId(SXlink_record xlink_record,
std::string xlink_signal) {
//TODO:Build some failsafes
std::string s = xlink_signal;
std::string delimiter = "-";
size_t pos = 0;
std::string token;
int vecpos;
std::vector<std::string>::iterator it;
pos = s.find(delimiter);
std::string signal = s.substr(0, pos);
it = std::find(m_bioseqvec.begin(), m_bioseqvec.end(), signal);
if (it == m_bioseqvec.end()) {
m_bioseqvec.push_back(signal);
}
vecpos = std::distance(it, m_bioseqvec.begin());
xlink_record.Protein1_signal = vecpos;
s.erase(0, pos + delimiter.length());
pos = s.find(delimiter);
signal = s.substr(0, pos);
it = std::find(m_bioseqvec.begin(), m_bioseqvec.end(), signal);
if (it == m_bioseqvec.end()) {
m_bioseqvec.push_back(signal);
}
vecpos = std::distance(it, m_bioseqvec.begin());
xlink_record.Protein2_signal = vecpos;
s.erase(0, pos + delimiter.length());
pos = s.find(delimiter);