Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
PyFastANI
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Martin Larralde
PyFastANI
Commits
df59a5ba
Commit
df59a5ba
authored
3 years ago
by
Martin Larralde
Browse files
Options
Downloads
Patches
Plain Diff
Add `pyfastani._fastani` Cython code wrapping the FastANI C++ code
parent
ca6d22e7
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
pyfastani/__init__.py
+3
-0
3 additions, 0 deletions
pyfastani/__init__.py
pyfastani/_fastani.pyx
+106
-0
106 additions, 0 deletions
pyfastani/_fastani.pyx
with
109 additions
and
0 deletions
pyfastani/__init__.py
0 → 100644
+
3
−
0
View file @
df59a5ba
__author__
=
"
Martin Larralde <martin.larralde@embl.de>
"
__license__
=
"
MIT
"
__version__
=
"
0.1.0
"
This diff is collapsed.
Click to expand it.
pyfastani/_fastani.pyx
0 → 100644
+
106
−
0
View file @
df59a5ba
# coding: utf-8
# cython: language_level=3, linetrace=True, language=cpp
cimport
libcpp11.chrono
from
libc.stdint
cimport
uint64_t
from
libc.stdlib
cimport
malloc
,
realloc
,
free
from
libcpp.functional
cimport
function
from
libcpp.string
cimport
string
from
libcpp.unordered_map
cimport
unordered_map
from
libcpp.vector
cimport
vector
cimport
fastani.map.map_stats
from
fastani.cgi.compute_core_identity
cimport
(
computeCGI
,
correctRefGenomeIds
,
computeGenomeLengths
,
outputCGI
)
from
fastani.cgi.cgid_types
cimport
CGI_Results
from
fastani.map
cimport
base_types
from
fastani.map.compute_map
cimport
Map
from
fastani.map.map_parameters
cimport
Parameters
from
fastani.map.map_stats
cimport
recommendedWindowSize
from
fastani.map.win_sketch
cimport
Sketch
from
fastani.map.base_types
cimport
(
MappingResult
,
MappingResultsVector_t
,
)
from
_utils
cimport
new_map_with_result_vector
import
os
cdef
extern
from
"
<chrono>
"
namespace
"
std
"
nogil
:
pass
# cdef class MinimizerInfo:
# cdef base_types.MinimizerInfo* _mi
#
# def __cinit__(self):
# self._mi = new base_types.MinimizerInfo()
#
# def __dealloc__(self):
# free(self._mi)
def
main
(
query_file
,
ref_file
,
output_file
):
cdef
Parameters
parameters
parameters
.
kmerSize
=
16
;
parameters
.
minReadLength
=
3000
;
parameters
.
alphabetSize
=
4
;
parameters
.
minFraction
=
0.2
;
parameters
.
threads
=
1
;
parameters
.
p_value
=
1e-03
;
parameters
.
percentageIdentity
=
80
;
parameters
.
visualize
=
False
;
parameters
.
matrixOutput
=
False
;
parameters
.
referenceSize
=
5000000
;
parameters
.
reportAll
=
True
;
# Add each query as an individual genome
parameters
.
querySequences
.
push_back
(
os
.
fsencode
(
query_file
))
parameters
.
refSequences
.
push_back
(
os
.
fsencode
(
ref_file
))
# Add output file
parameters
.
outFileName
=
os
.
fsencode
(
output_file
)
# Compute optimal window size
parameters
.
windowSize
=
fastani
.
map
.
map_stats
.
recommendedWindowSize
(
parameters
.
p_value
,
parameters
.
kmerSize
,
parameters
.
alphabetSize
,
parameters
.
percentageIdentity
,
parameters
.
minReadLength
,
parameters
.
referenceSize
)
# Build the sketch for the reference
cdef
Sketch
*
reference_sketch
=
new
Sketch
(
parameters
)
# Final output vector
cdef
vector
[
CGI_Results
]
finalResults
cdef
vector
[
CGI_Results
]
finalResults_local
# query/ref output vector
cdef
MappingResultsVector_t
mapResults
cdef
uint64_t
totalQueryFragments
=
0
# mapper
cdef
Map
*
mapper
=
new_map_with_result_vector
(
parameters
,
reference_sketch
[
0
],
totalQueryFragments
,
0
,
mapResults
)
#
computeCGI
(
parameters
,
mapResults
,
mapper
[
0
],
reference_sketch
[
0
],
totalQueryFragments
,
0
,
parameters
.
outFileName
,
finalResults_local
);
# correctRefGenomeIds(finalResults_local) <-- only useful for OpenMP multithreaded
finalResults
.
insert
(
finalResults
.
end
(),
finalResults_local
.
begin
(),
finalResults_local
.
end
());
# report
cdef
unordered_map
[
string
,
uint64_t
]
genomeLengths
computeGenomeLengths
(
parameters
,
genomeLengths
)
outputCGI
(
parameters
,
genomeLengths
,
finalResults
,
parameters
.
outFileName
);
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment