diff --git a/include/fastani/__init__.pxd b/include/fastani/__init__.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/include/fastani/cgi/__init__.pxd b/include/fastani/cgi/__init__.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/include/fastani/cgi/cgid_types.pxd b/include/fastani/cgi/cgid_types.pxd new file mode 100644 index 0000000000000000000000000000000000000000..4bfb736be98afd5b364bcd88be29286ac69f11c8 --- /dev/null +++ b/include/fastani/cgi/cgid_types.pxd @@ -0,0 +1,27 @@ +from libcpp cimport bool + +from fastani.map.base_types cimport seqno_t, offset_t + + +cdef extern from "cgi/include/cgid_types.hpp" namespace "cgi" nogil: + + cdef cppclass MappingResult_CGI: + + seqno_t refSequenceId + seqno_t genomeId + seqno_t querySeqId + seqno_t refStartPos + seqno_t queryStartPos + seqno_t mapRefPosBin + float nucIdentity + + + cdef cppclass CGI_Results: + + seqno_t refGenomeId + seqno_t qryGenomeId + seqno_t countSeq + seqno_t totalQueryFragments + float identity + + bool operator<(const CGI_Results& x) diff --git a/include/fastani/cgi/compute_core_identity.pxd b/include/fastani/cgi/compute_core_identity.pxd new file mode 100644 index 0000000000000000000000000000000000000000..d86b3ab9ede9b3bb3b356d31c2463a06460da21c --- /dev/null +++ b/include/fastani/cgi/compute_core_identity.pxd @@ -0,0 +1,58 @@ +from libc.stdint cimport uint64_t +from libcpp.string cimport string +from libcpp.unordered_map cimport unordered_map +from libcpp.vector cimport vector + +from fastani.cgi.cgid_types cimport MappingResult_CGI, CGI_Results +from fastani.map.base_types cimport MappingResultsVector_t +from fastani.map.compute_map cimport Map +from fastani.map.map_parameters cimport Parameters +from fastani.map.win_sketch cimport Sketch + + +cdef extern from "cgi/include/computeCoreIdentity.hpp" namespace "cgi" nogil: + + + void reviseRefIdToGenomeId(vector[MappingResult_CGI] &shortResults, Sketch &refSketch) + void computeGenomeLengths(Parameters ¶meters, unordered_map[string, uint64_t] &genomeLengths) + + void outputVisualizationFile( + Parameters ¶meters, + vector[MappingResult_CGI] &mappings_2way, + Map &mapper, + Sketch &refSketch, + uint64_t queryFileNo, + string &fileName + ) + + void computeCGI( + Parameters ¶meters, + MappingResultsVector_t &results, + Map &mapper, + Sketch &refSketch, + uint64_t totalQueryFragments, + uint64_t queryFileNo, + string &fileName, + vector[CGI_Results] &CGI_ResultsVector + ) + + void outputCGI( + Parameters ¶meters, + unordered_map[string, uint64_t] &genomeLengths, + vector[CGI_Results] &CGI_ResultsVector, + string &fileName + ) + + void outputPhylip( + Parameters ¶meters, + unordered_map[string, uint64_t] &genomeLengths, + vector[CGI_Results] &CGI_ResultsVector, + string &fileName + ) + + void splitReferenceGenomes( + Parameters ¶meters, + vector[Parameters] ¶meters_split + ) + + void correctRefGenomeIds(vector[CGI_Results] &CGI_ResultsVector) diff --git a/include/fastani/map/__init__.pxd b/include/fastani/map/__init__.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/include/fastani/map/base_types.pxd b/include/fastani/map/base_types.pxd new file mode 100644 index 0000000000000000000000000000000000000000..768e7397078a7c2ab323d3d5dd27eae0bf36f57f --- /dev/null +++ b/include/fastani/map/base_types.pxd @@ -0,0 +1,64 @@ +from libc.stdint cimport uint32_t +from libcpp cimport bool +from libcpp.vector cimport vector +from libcpp.string cimport string +from libcpp11.chrono cimport high_resolution_clock + + +cdef extern from "map/include/base_types.hpp" namespace "skch" nogil: + + ctypedef uint32_t hash_t + ctypedef int offset_t + ctypedef int seqno_t + + # ctypedef high_resolution_clock Time + + cdef cppclass MinimizerInfo: + hash_t hash + seqno_t seqId + offset_t wpos + + bool operator< (const MinimizerInfo &x) + bool operator== (const MinimizerInfo &x) + bool operator!= (const MinimizerInfo &x) + + bool equalityByHash(const MinimizerInfo &x, const MinimizerInfo &y) + bool lessByHash (const MinimizerInfo &x, const MinimizerInfo &y) + + + cdef cppclass MinimizerMetaData: + seqno_t seqId + offset_t wpos + + + ctypedef hash_t MinimizerMapKeyType + ctypedef vector[MinimizerMetaData] MinimizerMapValueType; + + + cdef cppclass ContigInfo: + string name + offset_t len + + + cdef cppclass QueryMetaData[ K, MV ]: + K kseq + seqno_t seqCounter + int sketchsize + MV minimizerTableQuery + + + cdef cppclass MappingResult: + offset_t queryLen + offset_t refStartPos + offset_t refEndPos + offset_t queryStartPos + offset_t queryEndPos + seqno_t refSeqId + seqno_t querySeqId + float nucIdentity + float nucIdentityUpperBound + int sketchSize + int conservedSketches + + + ctypedef vector[MappingResult] MappingResultsVector_t; diff --git a/include/fastani/map/compute_map.pxd b/include/fastani/map/compute_map.pxd new file mode 100644 index 0000000000000000000000000000000000000000..cec09a9d942f9f12bcfa32c645e268debed22ddc --- /dev/null +++ b/include/fastani/map/compute_map.pxd @@ -0,0 +1,46 @@ +from libc.stdint cimport uint64_t +from libcpp.vector cimport vector +from libcpp.functional cimport function + +from fastani.map.base_types cimport seqno_t, offset_t, ContigInfo, MappingResult, MappingResultsVector_t +from fastani.map.map_parameters cimport Parameters +from fastani.map.win_sketch cimport Sketch + + +cdef extern from "map/include/computeMap.hpp" namespace "skch" nogil: + + + cdef cppclass Map: + + cppclass L1_candidateLocus_t: + seqno_t seqId + offset_t rangeStartPos + offset_t rangeEndPos + + cppclass L2_mapLocus_t: + seqno_t seqId + offset_t meanOptimalPos + Sketch.MIIter_t optimalStart + Sketch.MIIter_t optimalEnd + int sharedSketchSize + + ctypedef Sketch.MI_Type MinVec_Type + ctypedef Sketch.MIIter_t MIIter_t + + vector[ContigInfo] metadata + + Map( + const Parameters &p, + const Sketch &refsketch, + uint64_t &totalQueryFragments, + int queryno, + function[void(MappingResult&)] f = nullptr + ) + + Map( + const Parameters &p, + const Sketch &refsketch, + uint64_t &totalQueryFragments, + int queryno, + MappingResultsVector_t &r + ) diff --git a/include/fastani/map/map_parameters.pxd b/include/fastani/map/map_parameters.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e9a7bf6dc91eb85daf977c3ca9543a57ebb14373 --- /dev/null +++ b/include/fastani/map/map_parameters.pxd @@ -0,0 +1,24 @@ +from libc.stdint cimport uint64_t +from libcpp cimport bool +from libcpp.vector cimport vector +from libcpp.string cimport string + + +cdef extern from "map/include/map_parameters.hpp" namespace "skch" nogil: + + cdef cppclass Parameters: + int kmerSize + int windowSize + int minReadLength + float minFraction + int threads + int alphabetSize + uint64_t referenceSize + float percentageIdentity + double p_value + vector[string] refSequences + vector[string] querySequences + string outFileName + bool reportAll + bool visualize + bool matrixOutput diff --git a/include/fastani/map/map_stats.pxd b/include/fastani/map/map_stats.pxd new file mode 100644 index 0000000000000000000000000000000000000000..1a0d109bec3f2db80062d3067e6a8f0e1259c1e5 --- /dev/null +++ b/include/fastani/map/map_stats.pxd @@ -0,0 +1,29 @@ +from libc.stdint cimport uint64_t + + +cdef extern from "map/include/map_stats.hpp" namespace "skch::Stat" nogil: + + cdef float j2md(float j, float k) + cdef float md2j(float d, int k) + cdef float md_lower_bound(float d, int s, int k, float ci) + + cdef int estimateMinimumHits(int s, int k, float perc_identity) + cdef int estimateMinimumHitsRelaxed(int s, int k, float perc_identity) + + cdef double estimate_pvalue( + int s, + int k, + int alphabetSize, + float identity, + int lengthQuery, + uint64_t lengthReference + ) + + cdef int recommendedWindowSize( + double pValue_cutoff, + int k, + int alphabetSize, + float identity, + int lengthQuery, + uint64_t lengthReference + ) diff --git a/include/fastani/map/win_sketch.pxd b/include/fastani/map/win_sketch.pxd new file mode 100644 index 0000000000000000000000000000000000000000..5a6cd1a6687764312ff0a93c028b2931f09f24f3 --- /dev/null +++ b/include/fastani/map/win_sketch.pxd @@ -0,0 +1,32 @@ +cimport libcpp11.iostream +from libcpp.unordered_map cimport unordered_map +from libcpp.vector cimport vector + +from fastani.map.base_types cimport ( + offset_t, + seqno_t, + MinimizerMapKeyType, + MinimizerMapValueType, + MinimizerInfo, + ContigInfo +) +from fastani.map.map_parameters cimport Parameters + + +cdef extern from "map/include/winSketch.hpp" namespace "skch" nogil: + + cdef cppclass Sketch: + + ctypedef vector[MinimizerInfo].const_iterator MIIter_t + ctypedef unordered_map[MinimizerMapKeyType, MinimizerMapValueType] MI_Map_t + ctypedef vector[MinimizerInfo] MI_Type + + vector[ContigInfo] metadata + vector[seqno_t] sequencesByFileInfo + MI_Map_t minimizerPosLookupIndex + + Sketch(const Parameters &p) + + int getFreqThreshold() + MIIter_t searchIndex(seqno_t seqId, offset_t winpos) + MIIter_t getMinimizerIndexEnd() diff --git a/include/libcpp11/__init__.pxd b/include/libcpp11/__init__.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/include/libcpp11/chrono.pxd b/include/libcpp11/chrono.pxd new file mode 100644 index 0000000000000000000000000000000000000000..65926811e62dbda7d2383de814a0867c266d2053 --- /dev/null +++ b/include/libcpp11/chrono.pxd @@ -0,0 +1,6 @@ + + +cdef extern from "<chrono>" namespace "std::chrono" nogil: + + cdef cppclass high_resolution_clock: + pass diff --git a/include/libcpp11/iostream.pxd b/include/libcpp11/iostream.pxd new file mode 100644 index 0000000000000000000000000000000000000000..f709cf9f4413befcea8ec47e8325669cf07d005a --- /dev/null +++ b/include/libcpp11/iostream.pxd @@ -0,0 +1,29 @@ +from libcpp11.istream cimport istream +from libcpp11.ostream cimport ostream + + +cdef extern from "<iostream>" namespace "std" nogil: + + cdef istream cin + cdef ostream cout + cdef ostream cerr + + + +# template<class CharT, class Traits = char_traits<CharT>> +# class basic_istream; +# +# using istream = basic_istream<char>; +# using wistream = basic_istream<wchar_t>; +# +# template<class CharT, class Traits = char_traits<CharT>> +# class basic_iostream; +# +# using iostream = basic_iostream<char>; +# using wiostream = basic_iostream<wchar_t>; +# +# template<class CharT, class Traits> +# basic_istream<CharT, Traits>& ws(basic_istream<CharT, Traits>& is); +# +# template<class Istream, class T> +# Istream&& operator>>(Istream&& is, T&& x); diff --git a/include/libcpp11/istream.pxd b/include/libcpp11/istream.pxd new file mode 100644 index 0000000000000000000000000000000000000000..b38e10c82a49106bf4569e338edd6b2a6795204e --- /dev/null +++ b/include/libcpp11/istream.pxd @@ -0,0 +1,16 @@ +from libc.stddef cimport wchar_t + + +cdef extern from "<istream>" namespace "std" nogil: + + cdef cppclass basic_istream[CharT]: + pass + + ctypedef basic_istream[char] istream + ctypedef basic_istream[wchar_t] wistream + + cdef cppclass basic_iostream[CharT]: + pass + + ctypedef basic_iostream[char] iostream + ctypedef basic_iostream[wchar_t] wiostream diff --git a/include/libcpp11/ostream.pxd b/include/libcpp11/ostream.pxd new file mode 100644 index 0000000000000000000000000000000000000000..52e5d78e396d2225058ce0c360c0b05e111c87fd --- /dev/null +++ b/include/libcpp11/ostream.pxd @@ -0,0 +1,10 @@ +from libc.stddef cimport wchar_t + + +cdef extern from "<ostream>" namespace "std" nogil: + + cdef cppclass basic_ostream[CharT]: + pass + + ctypedef basic_ostream[char] ostream + ctypedef basic_ostream[wchar_t] wostream diff --git a/include/libcpp11/utility.pxd b/include/libcpp11/utility.pxd new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391