From ca6d22e79a17f1ac62e9989c2b23b789cc0d6686 Mon Sep 17 00:00:00 2001
From: Martin Larralde <martin.larralde@embl.de>
Date: Sat, 12 Jun 2021 04:22:46 +0200
Subject: [PATCH] Add Cython headers for most of FastANI interface plus some
 missing `std` headers

---
 include/fastani/__init__.pxd                  |  0
 include/fastani/cgi/__init__.pxd              |  0
 include/fastani/cgi/cgid_types.pxd            | 27 ++++++++
 include/fastani/cgi/compute_core_identity.pxd | 58 +++++++++++++++++
 include/fastani/map/__init__.pxd              |  0
 include/fastani/map/base_types.pxd            | 64 +++++++++++++++++++
 include/fastani/map/compute_map.pxd           | 46 +++++++++++++
 include/fastani/map/map_parameters.pxd        | 24 +++++++
 include/fastani/map/map_stats.pxd             | 29 +++++++++
 include/fastani/map/win_sketch.pxd            | 32 ++++++++++
 include/libcpp11/__init__.pxd                 |  0
 include/libcpp11/chrono.pxd                   |  6 ++
 include/libcpp11/iostream.pxd                 | 29 +++++++++
 include/libcpp11/istream.pxd                  | 16 +++++
 include/libcpp11/ostream.pxd                  | 10 +++
 include/libcpp11/utility.pxd                  |  0
 16 files changed, 341 insertions(+)
 create mode 100644 include/fastani/__init__.pxd
 create mode 100644 include/fastani/cgi/__init__.pxd
 create mode 100644 include/fastani/cgi/cgid_types.pxd
 create mode 100644 include/fastani/cgi/compute_core_identity.pxd
 create mode 100644 include/fastani/map/__init__.pxd
 create mode 100644 include/fastani/map/base_types.pxd
 create mode 100644 include/fastani/map/compute_map.pxd
 create mode 100644 include/fastani/map/map_parameters.pxd
 create mode 100644 include/fastani/map/map_stats.pxd
 create mode 100644 include/fastani/map/win_sketch.pxd
 create mode 100644 include/libcpp11/__init__.pxd
 create mode 100644 include/libcpp11/chrono.pxd
 create mode 100644 include/libcpp11/iostream.pxd
 create mode 100644 include/libcpp11/istream.pxd
 create mode 100644 include/libcpp11/ostream.pxd
 create mode 100644 include/libcpp11/utility.pxd

diff --git a/include/fastani/__init__.pxd b/include/fastani/__init__.pxd
new file mode 100644
index 0000000..e69de29
diff --git a/include/fastani/cgi/__init__.pxd b/include/fastani/cgi/__init__.pxd
new file mode 100644
index 0000000..e69de29
diff --git a/include/fastani/cgi/cgid_types.pxd b/include/fastani/cgi/cgid_types.pxd
new file mode 100644
index 0000000..4bfb736
--- /dev/null
+++ b/include/fastani/cgi/cgid_types.pxd
@@ -0,0 +1,27 @@
+from libcpp cimport bool
+
+from fastani.map.base_types cimport seqno_t, offset_t
+
+
+cdef extern from "cgi/include/cgid_types.hpp" namespace "cgi" nogil:
+
+    cdef cppclass MappingResult_CGI:
+
+        seqno_t refSequenceId
+        seqno_t genomeId
+        seqno_t querySeqId
+        seqno_t refStartPos
+        seqno_t queryStartPos
+        seqno_t mapRefPosBin
+        float   nucIdentity
+
+
+    cdef cppclass CGI_Results:
+
+        seqno_t refGenomeId
+        seqno_t qryGenomeId
+        seqno_t countSeq
+        seqno_t totalQueryFragments
+        float   identity
+
+        bool  operator<(const CGI_Results& x)
diff --git a/include/fastani/cgi/compute_core_identity.pxd b/include/fastani/cgi/compute_core_identity.pxd
new file mode 100644
index 0000000..d86b3ab
--- /dev/null
+++ b/include/fastani/cgi/compute_core_identity.pxd
@@ -0,0 +1,58 @@
+from libc.stdint cimport uint64_t
+from libcpp.string cimport string
+from libcpp.unordered_map cimport unordered_map
+from libcpp.vector cimport vector
+
+from fastani.cgi.cgid_types cimport MappingResult_CGI, CGI_Results
+from fastani.map.base_types cimport MappingResultsVector_t
+from fastani.map.compute_map cimport Map
+from fastani.map.map_parameters cimport Parameters
+from fastani.map.win_sketch cimport Sketch
+
+
+cdef extern from "cgi/include/computeCoreIdentity.hpp" namespace "cgi" nogil:
+
+
+    void reviseRefIdToGenomeId(vector[MappingResult_CGI] &shortResults, Sketch &refSketch)
+    void computeGenomeLengths(Parameters &parameters, unordered_map[string, uint64_t] &genomeLengths)
+
+    void outputVisualizationFile(
+        Parameters &parameters,
+        vector[MappingResult_CGI] &mappings_2way,
+        Map &mapper,
+        Sketch &refSketch,
+        uint64_t queryFileNo,
+        string &fileName
+    )
+
+    void computeCGI(
+        Parameters &parameters,
+        MappingResultsVector_t &results,
+        Map &mapper,
+        Sketch &refSketch,
+        uint64_t totalQueryFragments,
+        uint64_t queryFileNo,
+        string &fileName,
+        vector[CGI_Results] &CGI_ResultsVector
+    )
+
+    void outputCGI(
+        Parameters &parameters,
+        unordered_map[string, uint64_t] &genomeLengths,
+        vector[CGI_Results] &CGI_ResultsVector,
+        string &fileName
+    )
+
+    void outputPhylip(
+        Parameters &parameters,
+        unordered_map[string, uint64_t] &genomeLengths,
+        vector[CGI_Results] &CGI_ResultsVector,
+        string &fileName
+    )
+
+    void splitReferenceGenomes(
+        Parameters &parameters,
+        vector[Parameters] &parameters_split
+    )
+
+    void correctRefGenomeIds(vector[CGI_Results] &CGI_ResultsVector)
diff --git a/include/fastani/map/__init__.pxd b/include/fastani/map/__init__.pxd
new file mode 100644
index 0000000..e69de29
diff --git a/include/fastani/map/base_types.pxd b/include/fastani/map/base_types.pxd
new file mode 100644
index 0000000..768e739
--- /dev/null
+++ b/include/fastani/map/base_types.pxd
@@ -0,0 +1,64 @@
+from libc.stdint cimport uint32_t
+from libcpp cimport bool
+from libcpp.vector cimport vector
+from libcpp.string cimport string
+from libcpp11.chrono cimport high_resolution_clock
+
+
+cdef extern from "map/include/base_types.hpp" namespace "skch" nogil:
+
+    ctypedef uint32_t hash_t
+    ctypedef int      offset_t
+    ctypedef int      seqno_t
+
+    # ctypedef high_resolution_clock Time
+
+    cdef cppclass MinimizerInfo:
+        hash_t   hash
+        seqno_t  seqId
+        offset_t wpos
+
+        bool operator<  (const MinimizerInfo &x)
+        bool operator== (const MinimizerInfo &x)
+        bool operator!= (const MinimizerInfo &x)
+
+        bool equalityByHash(const MinimizerInfo &x, const MinimizerInfo &y)
+        bool lessByHash    (const MinimizerInfo &x, const MinimizerInfo &y)
+
+
+    cdef cppclass MinimizerMetaData:
+        seqno_t  seqId
+        offset_t wpos
+
+
+    ctypedef hash_t                    MinimizerMapKeyType
+    ctypedef vector[MinimizerMetaData] MinimizerMapValueType;
+
+
+    cdef cppclass ContigInfo:
+        string   name
+        offset_t len
+
+
+    cdef cppclass QueryMetaData[ K, MV ]:
+        K       kseq
+        seqno_t seqCounter
+        int     sketchsize
+        MV      minimizerTableQuery
+
+
+    cdef cppclass MappingResult:
+        offset_t queryLen
+        offset_t refStartPos
+        offset_t refEndPos
+        offset_t queryStartPos
+        offset_t queryEndPos
+        seqno_t  refSeqId
+        seqno_t  querySeqId
+        float    nucIdentity
+        float    nucIdentityUpperBound
+        int      sketchSize
+        int      conservedSketches
+
+
+    ctypedef vector[MappingResult] MappingResultsVector_t;
diff --git a/include/fastani/map/compute_map.pxd b/include/fastani/map/compute_map.pxd
new file mode 100644
index 0000000..cec09a9
--- /dev/null
+++ b/include/fastani/map/compute_map.pxd
@@ -0,0 +1,46 @@
+from libc.stdint cimport uint64_t
+from libcpp.vector cimport vector
+from libcpp.functional cimport function
+
+from fastani.map.base_types cimport seqno_t, offset_t, ContigInfo, MappingResult, MappingResultsVector_t
+from fastani.map.map_parameters cimport Parameters
+from fastani.map.win_sketch cimport Sketch
+
+
+cdef extern from "map/include/computeMap.hpp" namespace "skch" nogil:
+
+
+    cdef cppclass Map:
+
+        cppclass L1_candidateLocus_t:
+            seqno_t  seqId
+            offset_t rangeStartPos
+            offset_t rangeEndPos
+
+        cppclass L2_mapLocus_t:
+            seqno_t         seqId
+            offset_t        meanOptimalPos
+            Sketch.MIIter_t optimalStart
+            Sketch.MIIter_t optimalEnd
+            int             sharedSketchSize
+
+        ctypedef Sketch.MI_Type MinVec_Type
+        ctypedef Sketch.MIIter_t MIIter_t
+
+        vector[ContigInfo] metadata
+
+        Map(
+            const Parameters &p,
+            const Sketch &refsketch,
+            uint64_t &totalQueryFragments,
+            int queryno,
+            function[void(MappingResult&)] f = nullptr
+        )
+
+        Map(
+            const Parameters &p,
+            const Sketch &refsketch,
+            uint64_t &totalQueryFragments,
+            int queryno,
+            MappingResultsVector_t &r
+        )
diff --git a/include/fastani/map/map_parameters.pxd b/include/fastani/map/map_parameters.pxd
new file mode 100644
index 0000000..e9a7bf6
--- /dev/null
+++ b/include/fastani/map/map_parameters.pxd
@@ -0,0 +1,24 @@
+from libc.stdint cimport uint64_t
+from libcpp cimport bool
+from libcpp.vector cimport vector
+from libcpp.string cimport string
+
+
+cdef extern from "map/include/map_parameters.hpp" namespace "skch" nogil:
+
+    cdef cppclass Parameters:
+        int            kmerSize
+        int            windowSize
+        int            minReadLength
+        float          minFraction
+        int            threads
+        int            alphabetSize
+        uint64_t       referenceSize
+        float          percentageIdentity
+        double         p_value
+        vector[string] refSequences
+        vector[string] querySequences
+        string         outFileName
+        bool           reportAll
+        bool           visualize
+        bool           matrixOutput
diff --git a/include/fastani/map/map_stats.pxd b/include/fastani/map/map_stats.pxd
new file mode 100644
index 0000000..1a0d109
--- /dev/null
+++ b/include/fastani/map/map_stats.pxd
@@ -0,0 +1,29 @@
+from libc.stdint cimport uint64_t
+
+
+cdef extern from "map/include/map_stats.hpp" namespace "skch::Stat" nogil:
+
+    cdef float  j2md(float j, float k)
+    cdef float  md2j(float d, int k)
+    cdef float  md_lower_bound(float d, int s, int k, float ci)
+
+    cdef int estimateMinimumHits(int s, int k, float perc_identity)
+    cdef int estimateMinimumHitsRelaxed(int s, int k, float perc_identity)
+
+    cdef double estimate_pvalue(
+        int s,
+        int k,
+        int alphabetSize,
+        float identity,
+        int lengthQuery,
+        uint64_t lengthReference
+    )
+
+    cdef int recommendedWindowSize(
+        double pValue_cutoff,
+        int k,
+        int alphabetSize,
+        float identity,
+        int lengthQuery,
+        uint64_t lengthReference
+    )
diff --git a/include/fastani/map/win_sketch.pxd b/include/fastani/map/win_sketch.pxd
new file mode 100644
index 0000000..5a6cd1a
--- /dev/null
+++ b/include/fastani/map/win_sketch.pxd
@@ -0,0 +1,32 @@
+cimport libcpp11.iostream
+from libcpp.unordered_map cimport unordered_map
+from libcpp.vector cimport vector
+
+from fastani.map.base_types cimport (
+    offset_t,
+    seqno_t,
+    MinimizerMapKeyType,
+    MinimizerMapValueType,
+    MinimizerInfo,
+    ContigInfo
+)
+from fastani.map.map_parameters cimport Parameters
+
+
+cdef extern from "map/include/winSketch.hpp" namespace "skch" nogil:
+
+    cdef cppclass Sketch:
+
+        ctypedef vector[MinimizerInfo].const_iterator MIIter_t
+        ctypedef unordered_map[MinimizerMapKeyType, MinimizerMapValueType] MI_Map_t
+        ctypedef vector[MinimizerInfo] MI_Type
+
+        vector[ContigInfo] metadata
+        vector[seqno_t]    sequencesByFileInfo
+        MI_Map_t minimizerPosLookupIndex
+
+        Sketch(const Parameters &p)
+
+        int getFreqThreshold()
+        MIIter_t searchIndex(seqno_t seqId, offset_t winpos)
+        MIIter_t getMinimizerIndexEnd()
diff --git a/include/libcpp11/__init__.pxd b/include/libcpp11/__init__.pxd
new file mode 100644
index 0000000..e69de29
diff --git a/include/libcpp11/chrono.pxd b/include/libcpp11/chrono.pxd
new file mode 100644
index 0000000..6592681
--- /dev/null
+++ b/include/libcpp11/chrono.pxd
@@ -0,0 +1,6 @@
+
+
+cdef extern from "<chrono>" namespace "std::chrono" nogil:
+
+    cdef cppclass high_resolution_clock:
+        pass
diff --git a/include/libcpp11/iostream.pxd b/include/libcpp11/iostream.pxd
new file mode 100644
index 0000000..f709cf9
--- /dev/null
+++ b/include/libcpp11/iostream.pxd
@@ -0,0 +1,29 @@
+from libcpp11.istream cimport istream
+from libcpp11.ostream cimport ostream
+
+
+cdef extern from "<iostream>" namespace "std" nogil:
+
+    cdef istream cin
+    cdef ostream cout
+    cdef ostream cerr
+
+
+
+#   template<class CharT, class Traits = char_traits<CharT>>
+#   class basic_istream;
+#
+# using istream  = basic_istream<char>;
+# using wistream = basic_istream<wchar_t>;
+#
+# template<class CharT, class Traits = char_traits<CharT>>
+#   class basic_iostream;
+#
+# using iostream  = basic_iostream<char>;
+# using wiostream = basic_iostream<wchar_t>;
+#
+# template<class CharT, class Traits>
+#   basic_istream<CharT, Traits>& ws(basic_istream<CharT, Traits>& is);
+#
+# template<class Istream, class T>
+#   Istream&& operator>>(Istream&& is, T&& x);
diff --git a/include/libcpp11/istream.pxd b/include/libcpp11/istream.pxd
new file mode 100644
index 0000000..b38e10c
--- /dev/null
+++ b/include/libcpp11/istream.pxd
@@ -0,0 +1,16 @@
+from libc.stddef cimport wchar_t
+
+
+cdef extern from "<istream>" namespace "std" nogil:
+
+    cdef cppclass basic_istream[CharT]:
+        pass
+
+    ctypedef basic_istream[char]    istream
+    ctypedef basic_istream[wchar_t] wistream
+
+    cdef cppclass basic_iostream[CharT]:
+        pass
+
+    ctypedef basic_iostream[char]    iostream
+    ctypedef basic_iostream[wchar_t] wiostream
diff --git a/include/libcpp11/ostream.pxd b/include/libcpp11/ostream.pxd
new file mode 100644
index 0000000..52e5d78
--- /dev/null
+++ b/include/libcpp11/ostream.pxd
@@ -0,0 +1,10 @@
+from libc.stddef cimport wchar_t
+
+
+cdef extern from "<ostream>" namespace "std" nogil:
+
+    cdef cppclass basic_ostream[CharT]:
+        pass
+
+    ctypedef basic_ostream[char]    ostream
+    ctypedef basic_ostream[wchar_t] wostream
diff --git a/include/libcpp11/utility.pxd b/include/libcpp11/utility.pxd
new file mode 100644
index 0000000..e69de29
-- 
GitLab