diff --git a/pyfastani/_fasta.pyx b/pyfastani/_fasta.pyx index c4381ea79399721d2cc2f00f0a84c9e4da48d499..394ba0846c068f01ed6d5ea579ee4bd2a0dd2dc8 100644 --- a/pyfastani/_fasta.pyx +++ b/pyfastani/_fasta.pyx @@ -11,7 +11,7 @@ from libc.errno cimport errno from libc.string cimport strlen, memcpy, memset from _unicode cimport PyUnicode_1BYTE_KIND, PyUnicode_FromKindAndData -from _simd cimport copy_upper +from _sequtils cimport copy_upper cdef extern from "<ctype.h>" nogil: cdef int toupper(int c) diff --git a/pyfastani/_fastani.pyx b/pyfastani/_fastani.pyx index 2afb2633b2a5526cb62fe19e5063e438f8e2c41f..663ed817b6e370980ebd0f18f37d10af7024aa04 100644 --- a/pyfastani/_fastani.pyx +++ b/pyfastani/_fastani.pyx @@ -46,8 +46,9 @@ from fastani.map.base_types cimport ( # HACK: we need kseq_t* as a template argument, which is not supported by # Cython at the moment, so we just `typedef kseq_t* kseq_ptr_t` in # an external C++ header to make Cython happy -from _utils cimport kseq_ptr_t, toupper, complement, distance +from _utils cimport kseq_ptr_t, toupper, distance from _unicode cimport * +from _sequtils cimport copy_upper, reverse_complement # --- Python imports --------------------------------------------------------- @@ -88,10 +89,17 @@ cdef ssize_t _read_nucl( else: length = 0 - for j in range(length): - nuc = toupper(<int> PyUnicode_READ(kind, data, i + j)) - fwd[_MAX_KMER_SIZE + j] = nuc - bwd[_MAX_KMER_SIZE - j - 1] = complement(nuc) + # if UCS-1, bytes are next to each other, so we can use the SIMD + # implementations to copy into uppercase + if kind == PyUnicode_1BYTE_KIND: + copy_upper(&fwd[_MAX_KMER_SIZE], &(<char*> data)[i], length) + else: + for j in range(length): + fwd[_MAX_KMER_SIZE + j] = toupper(<int> PyUnicode_READ(kind, data, i + j)) + + # reverse complement in backward buffer + reverse_complement(&bwd[_MAX_KMER_SIZE - length], &fwd[_MAX_KMER_SIZE], length) + return length diff --git a/pyfastani/_sequtils/__init__.pxd b/pyfastani/_sequtils/__init__.pxd new file mode 100644 index 0000000000000000000000000000000000000000..2ef1acee34702888c3b0f4f98117d044615bce87 --- /dev/null +++ b/pyfastani/_sequtils/__init__.pxd @@ -0,0 +1,4 @@ +cdef extern from "sequtils.h" nogil: + char complement(char base) + void copy_upper(char*, const char*, size_t) + void reverse_complement(char*, const char*, size_t) diff --git a/pyfastani/_sequtils/complement.h b/pyfastani/_sequtils/complement.h new file mode 100644 index 0000000000000000000000000000000000000000..2a3ccb84ac93001e49ef4a41f5bba95bbae997b6 --- /dev/null +++ b/pyfastani/_sequtils/complement.h @@ -0,0 +1,28 @@ +#ifndef __SEQUTILS_COMPLEMENT_H +#define __SEQUTILS_COMPLEMENT_H + +// efficient nucleotide complement with a lookup table +static const char COMPLEMENT_LOOKUP[128] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\t', '\n', '\x0', '\x0c', '\r', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1', '\x1c', '\x1d', '\x1e', '\x1f', + ' ', '!', '"', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'T', 'V', 'G', 'H', 'E', 'F', 'C', + 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O', + 'P', 'Q', 'Y', 'S', 'A', 'U', 'B', 'W', + 'X', 'R', 'Z', '[', '\\', ']', '^', '_', + '`', 't', 'v', 'g', 'h', 'e', 'f', 'c', + 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o', + 'p', 'q', 'y', 's', 'a', 'u', 'b', 'w', + 'x', 'r', 'z', '{', '|', '}', '~', '\x7f' +}; + +static char complement(char base) { + return COMPLEMENT_LOOKUP[(size_t) (base & 0x7F)]; +} + +#endif diff --git a/pyfastani/_sequtils/sequtils.h b/pyfastani/_sequtils/sequtils.h new file mode 100644 index 0000000000000000000000000000000000000000..8bc1edfdceaa61692b4cb06a94b3a70f46f1dfa8 --- /dev/null +++ b/pyfastani/_sequtils/sequtils.h @@ -0,0 +1,29 @@ +#ifndef __SEQUTILS_H +#define __SEQUTILS_H + +#include "complement.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void default_copy_upper(char*, const char*, size_t); +#ifdef SSE2_BUILD_SUPPORTED +void sse2_copy_upper(char*, const char*, size_t); +#endif +#ifdef NEON_BUILD_SUPPORTED +void neon_copy_upper(char* dst, const char* src, size_t len); +#endif +void copy_upper(char*, const char*, size_t); + +void default_reverse_complement(char*, const char*, size_t); +#ifdef SSSE3_BUILD_SUPPORTED +extern void ssse3_reverse_complement(char* dst, const char* src, size_t len); +#endif +void reverse_complement(char*, const char*, size_t); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/pyfastani/_simd.c b/pyfastani/_simd.c deleted file mode 100644 index 5114c65343d7ed4aade13164229149aea6c3d806..0000000000000000000000000000000000000000 --- a/pyfastani/_simd.c +++ /dev/null @@ -1,87 +0,0 @@ -#include <ctype.h> -#include <stddef.h> -#ifdef __SSE2__ -#include <x86intrin.h> -#endif -#ifdef __ARM_NEON__ -#include <arm_neon.h> -#endif - -#include "_simd.h" - -void default_copy_upper(char* dst, const char* src, size_t len) { - while (len-- > 0) { - *dst = toupper(*src); - src++; - dst++; - } -} - -#ifdef __SSE2__ -void sse2_copy_upper(char* dst, const char* src, size_t len) { - - const __m128i ascii_a = _mm_set1_epi8('a' - 1); - const __m128i ascii_z = _mm_set1_epi8('z'); - const __m128i offset = _mm_set1_epi8('a' - 'A'); - - while (len >= sizeof(__m128i)) { - __m128i inp = _mm_loadu_si128((__m128i*) src); - __m128i greater_than_a = _mm_cmpgt_epi8(inp, ascii_a); - __m128i less_equal_z = _mm_cmpgt_epi8(ascii_z, inp); - __m128i mask = _mm_and_si128(greater_than_a, less_equal_z); - __m128i diff = _mm_and_si128(mask, offset); - __m128i added = _mm_sub_epi8(inp, diff); - _mm_storeu_si128((__m128i *) dst, added); - len -= sizeof(__m128i); - src += sizeof(__m128i); - dst += sizeof(__m128i); - } - - default_copy_upper(dst, src, len); -} -#endif -#ifdef __ARM_NEON__ -void neon_copy_upper(char* dst, const char* src, size_t len) { - - const int8x16_t ascii_a = vdupq_n_s8('a' - 1); - const int8x16_t ascii_z = vdupq_n_s8('z'); - const int8x16_t offset = vdupq_n_s8('a' - 'A'); - - while (len >= sizeof(int8x16_t)) { - int8x16_t inp = vld1q_u8((int8_t*) src); - int8x16_t greater_than_a = vcgtq_s8(inp, ascii_a); - int8x16_t less_equal_z = vcgtq_s8(ascii_z, inp); - int8x16_t mask = vandq_s8(greater_than_a, less_equal_z); - int8x16_t diff = vandq_s8(mask, offset); - int8x16_t added = vsubq_s8(inp, diff); - vst1q_s8((int8_t*) dst, added); - len -= sizeof(int8x16_t); - src += sizeof(int8x16_t); - dst += sizeof(int8x16_t); - } - - default_copy_upper(dst, src, len); -} -#endif - -void (*resolve_copy_upper (void))(char*, const char*, size_t) -{ - // ifunc resolvers fire before constructors, explicitly call the init - // function. -#ifdef __SSE2__ - __builtin_cpu_init (); - if (__builtin_cpu_supports ("sse2")) - return sse2_copy_upper; // fast copying plus upper. - else -#endif -#ifdef __ARM_NEON__ -__builtin_cpu_init (); -if (__builtin_cpu_supports ("neon")) - return neon_copy_upper; // fast copying plus upper. -else -#endif - return default_copy_upper; -} - -void copy_upper(char*, const char*, size_t) - __attribute__ ((ifunc ("resolve_copy_upper"))); diff --git a/pyfastani/_simd.h b/pyfastani/_simd.h deleted file mode 100644 index 1eeac380693c0b7dc81ad2daf87ab4c9d27641d6..0000000000000000000000000000000000000000 --- a/pyfastani/_simd.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __SIMD_H -#define __SIMD_H - -#ifdef __cplusplus -extern "C" { -#endif - -void default_copy_upper(char*, const char*, size_t); -#ifdef __SSE2__ -void sse_copy_upper(char*, const char*, size_t); -#endif -#ifdef __ARM_NEON__ -void neon_copy_upper(char* dst, const char* src, size_t len); -#endif -void copy_upper(char*, const char*, size_t); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/pyfastani/_simd.pxd b/pyfastani/_simd.pxd deleted file mode 100644 index f545df43d38f279ff71b2e1b2802dfa82ef3580b..0000000000000000000000000000000000000000 --- a/pyfastani/_simd.pxd +++ /dev/null @@ -1,2 +0,0 @@ -cdef extern from "_simd.h" nogil: - void copy_upper(char*, const char*, size_t) diff --git a/pyfastani/_utils.cpp b/pyfastani/_utils.cpp index af2065688a01fce3675f30d8b223d2b82390c13a..8170735b0b1bdbcdec9230636455af6b8b09ed9b 100644 --- a/pyfastani/_utils.cpp +++ b/pyfastani/_utils.cpp @@ -1,14 +1,6 @@ -#include "omp.h" +#include <zlib.h> #include "_utils.hpp" -int omp_get_thread_num(void) { - return 1; // Make the logger shut up. -} - -int omp_get_num_threads(void) { - return 1; -} - ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char* mode) { return NULL; } diff --git a/pyfastani/_utils.hpp b/pyfastani/_utils.hpp index 0c113608cfcff73cb74d795d46519b185ff4ac6f..6b9b317414e14e4a4af1f9a7fc70359c044e889b 100644 --- a/pyfastani/_utils.hpp +++ b/pyfastani/_utils.hpp @@ -14,34 +14,8 @@ #include "map/include/winSketch.hpp" extern "C" { - // compatibility layer for Cython typedef kseq_t* kseq_ptr_t; - - // efficient nucleotide complement with a lookup table - static const char COMPLEMENT_LOOKUP[128] = { - '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', - '\x08', '\t', '\n', '\x0', '\x0c', '\r', '\x0e', '\x0f', - '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', - '\x18', '\x19', '\x1a', '\x1', '\x1c', '\x1d', '\x1e', '\x1f', - ' ', '!', '"', '#', '$', '%', '&', '\'', - '(', ')', '*', '+', ',', '-', '.', '/', - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', ':', ';', '<', '=', '>', '?', - '@', 'T', 'V', 'G', 'H', 'E', 'F', 'C', - 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O', - 'P', 'Q', 'Y', 'S', 'A', 'U', 'B', 'W', - 'X', 'R', 'Z', '[', '\\', ']', '^', '_', - '`', 't', 'v', 'g', 'h', 'e', 'f', 'c', - 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o', - 'p', 'q', 'y', 's', 'a', 'u', 'b', 'w', - 'x', 'r', 'z', '{', '|', '}', '~', '\x7f' - }; - - inline char complement(char base) { - return COMPLEMENT_LOOKUP[(size_t) (base & 0x7F)]; - } - } -#endif // ifdef __UTILS_HPP +#endif diff --git a/pyfastani/_utils.pxd b/pyfastani/_utils.pxd index a630ee114bb9c7ac76cf15dc98e69cd796a617a8..11500d8402a1dd1d7d70624d8deef37ff319edd0 100644 --- a/pyfastani/_utils.pxd +++ b/pyfastani/_utils.pxd @@ -24,20 +24,18 @@ cdef extern from "<iterator>" namespace "std" nogil: cdef ssize_t distance[I](I first, I last); -cdef extern from "<zlib.h>" nogil: - cdef struct gzFile_s: - pass - - ctypedef gzFile_s* gzFile - - gzFile gzopen(int fd, const char* mode) - gzFile gzopen64(const char* path, const char* mode) - int gzread(gzFile file, void* buf, unsigned int len) - int gzclose(gzFile file) +# cdef extern from "<zlib.h>" nogil: +# cdef struct gzFile_s: +# pass +# +# ctypedef gzFile_s* gzFile +# +# gzFile gzopen(int fd, const char* mode) +# gzFile gzopen64(const char* path, const char* mode) +# int gzread(gzFile file, void* buf, unsigned int len) +# int gzclose(gzFile file) cdef extern from "_utils.hpp" nogil: ctypedef kseq_t* kseq_ptr_t - - int complement(int) diff --git a/pyfastani/omp.cpp b/pyfastani/omp.cpp index e670dd613d92b68c246869ddb9e59b50005d8f61..f6c68896fc6f483c2718c0b403f3ce734f4c1c0a 100644 --- a/pyfastani/omp.cpp +++ b/pyfastani/omp.cpp @@ -2,5 +2,10 @@ // not needed anywhere except in `cgi::correctRefGenomeIds` so we can just // patch these functions to disable logging -extern int omp_get_thread_num(void); -extern int omp_get_num_threads(void); +int omp_get_thread_num(void) { + return 1; // Make the logger shut up. +} + +int omp_get_num_threads(void) { + return 1; +}