Skip to content
Snippets Groups Projects
Commit 3ce1a799 authored by Martin Larralde's avatar Martin Larralde
Browse files

Reorganize platform-specific code to allow independent compiling

parent 8e5851af
No related branches found
No related tags found
No related merge requests found
...@@ -11,7 +11,7 @@ from libc.errno cimport errno ...@@ -11,7 +11,7 @@ from libc.errno cimport errno
from libc.string cimport strlen, memcpy, memset from libc.string cimport strlen, memcpy, memset
from _unicode cimport PyUnicode_1BYTE_KIND, PyUnicode_FromKindAndData from _unicode cimport PyUnicode_1BYTE_KIND, PyUnicode_FromKindAndData
from _simd cimport copy_upper from _sequtils cimport copy_upper
cdef extern from "<ctype.h>" nogil: cdef extern from "<ctype.h>" nogil:
cdef int toupper(int c) cdef int toupper(int c)
......
...@@ -46,8 +46,9 @@ from fastani.map.base_types cimport ( ...@@ -46,8 +46,9 @@ from fastani.map.base_types cimport (
# HACK: we need kseq_t* as a template argument, which is not supported by # HACK: we need kseq_t* as a template argument, which is not supported by
# Cython at the moment, so we just `typedef kseq_t* kseq_ptr_t` in # Cython at the moment, so we just `typedef kseq_t* kseq_ptr_t` in
# an external C++ header to make Cython happy # an external C++ header to make Cython happy
from _utils cimport kseq_ptr_t, toupper, complement, distance from _utils cimport kseq_ptr_t, toupper, distance
from _unicode cimport * from _unicode cimport *
from _sequtils cimport copy_upper, reverse_complement
# --- Python imports --------------------------------------------------------- # --- Python imports ---------------------------------------------------------
...@@ -88,10 +89,17 @@ cdef ssize_t _read_nucl( ...@@ -88,10 +89,17 @@ cdef ssize_t _read_nucl(
else: else:
length = 0 length = 0
for j in range(length): # if UCS-1, bytes are next to each other, so we can use the SIMD
nuc = toupper(<int> PyUnicode_READ(kind, data, i + j)) # implementations to copy into uppercase
fwd[_MAX_KMER_SIZE + j] = nuc if kind == PyUnicode_1BYTE_KIND:
bwd[_MAX_KMER_SIZE - j - 1] = complement(nuc) copy_upper(&fwd[_MAX_KMER_SIZE], &(<char*> data)[i], length)
else:
for j in range(length):
fwd[_MAX_KMER_SIZE + j] = toupper(<int> PyUnicode_READ(kind, data, i + j))
# reverse complement in backward buffer
reverse_complement(&bwd[_MAX_KMER_SIZE - length], &fwd[_MAX_KMER_SIZE], length)
return length return length
......
cdef extern from "sequtils.h" nogil:
char complement(char base)
void copy_upper(char*, const char*, size_t)
void reverse_complement(char*, const char*, size_t)
#ifndef __SEQUTILS_COMPLEMENT_H
#define __SEQUTILS_COMPLEMENT_H
// efficient nucleotide complement with a lookup table
static const char COMPLEMENT_LOOKUP[128] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\t', '\n', '\x0', '\x0c', '\r', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1', '\x1c', '\x1d', '\x1e', '\x1f',
' ', '!', '"', '#', '$', '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
'@', 'T', 'V', 'G', 'H', 'E', 'F', 'C',
'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
'P', 'Q', 'Y', 'S', 'A', 'U', 'B', 'W',
'X', 'R', 'Z', '[', '\\', ']', '^', '_',
'`', 't', 'v', 'g', 'h', 'e', 'f', 'c',
'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
'p', 'q', 'y', 's', 'a', 'u', 'b', 'w',
'x', 'r', 'z', '{', '|', '}', '~', '\x7f'
};
static char complement(char base) {
return COMPLEMENT_LOOKUP[(size_t) (base & 0x7F)];
}
#endif
#ifndef __SIMD_H #ifndef __SEQUTILS_H
#define __SIMD_H #define __SEQUTILS_H
#include "complement.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
void default_copy_upper(char*, const char*, size_t); void default_copy_upper(char*, const char*, size_t);
#ifdef __SSE2__ #ifdef SSE2_BUILD_SUPPORTED
void sse_copy_upper(char*, const char*, size_t); void sse2_copy_upper(char*, const char*, size_t);
#endif #endif
#ifdef __ARM_NEON__ #ifdef NEON_BUILD_SUPPORTED
void neon_copy_upper(char* dst, const char* src, size_t len); void neon_copy_upper(char* dst, const char* src, size_t len);
#endif #endif
void copy_upper(char*, const char*, size_t); void copy_upper(char*, const char*, size_t);
void default_reverse_complement(char*, const char*, size_t);
#ifdef SSSE3_BUILD_SUPPORTED
extern void ssse3_reverse_complement(char* dst, const char* src, size_t len);
#endif
void reverse_complement(char*, const char*, size_t);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
#include <ctype.h>
#include <stddef.h>
#ifdef __SSE2__
#include <x86intrin.h>
#endif
#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif
#include "_simd.h"
void default_copy_upper(char* dst, const char* src, size_t len) {
while (len-- > 0) {
*dst = toupper(*src);
src++;
dst++;
}
}
#ifdef __SSE2__
void sse2_copy_upper(char* dst, const char* src, size_t len) {
const __m128i ascii_a = _mm_set1_epi8('a' - 1);
const __m128i ascii_z = _mm_set1_epi8('z');
const __m128i offset = _mm_set1_epi8('a' - 'A');
while (len >= sizeof(__m128i)) {
__m128i inp = _mm_loadu_si128((__m128i*) src);
__m128i greater_than_a = _mm_cmpgt_epi8(inp, ascii_a);
__m128i less_equal_z = _mm_cmpgt_epi8(ascii_z, inp);
__m128i mask = _mm_and_si128(greater_than_a, less_equal_z);
__m128i diff = _mm_and_si128(mask, offset);
__m128i added = _mm_sub_epi8(inp, diff);
_mm_storeu_si128((__m128i *) dst, added);
len -= sizeof(__m128i);
src += sizeof(__m128i);
dst += sizeof(__m128i);
}
default_copy_upper(dst, src, len);
}
#endif
#ifdef __ARM_NEON__
void neon_copy_upper(char* dst, const char* src, size_t len) {
const int8x16_t ascii_a = vdupq_n_s8('a' - 1);
const int8x16_t ascii_z = vdupq_n_s8('z');
const int8x16_t offset = vdupq_n_s8('a' - 'A');
while (len >= sizeof(int8x16_t)) {
int8x16_t inp = vld1q_u8((int8_t*) src);
int8x16_t greater_than_a = vcgtq_s8(inp, ascii_a);
int8x16_t less_equal_z = vcgtq_s8(ascii_z, inp);
int8x16_t mask = vandq_s8(greater_than_a, less_equal_z);
int8x16_t diff = vandq_s8(mask, offset);
int8x16_t added = vsubq_s8(inp, diff);
vst1q_s8((int8_t*) dst, added);
len -= sizeof(int8x16_t);
src += sizeof(int8x16_t);
dst += sizeof(int8x16_t);
}
default_copy_upper(dst, src, len);
}
#endif
void (*resolve_copy_upper (void))(char*, const char*, size_t)
{
// ifunc resolvers fire before constructors, explicitly call the init
// function.
#ifdef __SSE2__
__builtin_cpu_init ();
if (__builtin_cpu_supports ("sse2"))
return sse2_copy_upper; // fast copying plus upper.
else
#endif
#ifdef __ARM_NEON__
__builtin_cpu_init ();
if (__builtin_cpu_supports ("neon"))
return neon_copy_upper; // fast copying plus upper.
else
#endif
return default_copy_upper;
}
void copy_upper(char*, const char*, size_t)
__attribute__ ((ifunc ("resolve_copy_upper")));
cdef extern from "_simd.h" nogil:
void copy_upper(char*, const char*, size_t)
#include "omp.h" #include <zlib.h>
#include "_utils.hpp" #include "_utils.hpp"
int omp_get_thread_num(void) {
return 1; // Make the logger shut up.
}
int omp_get_num_threads(void) {
return 1;
}
ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char* mode) { ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char* mode) {
return NULL; return NULL;
} }
......
...@@ -14,34 +14,8 @@ ...@@ -14,34 +14,8 @@
#include "map/include/winSketch.hpp" #include "map/include/winSketch.hpp"
extern "C" { extern "C" {
// compatibility layer for Cython // compatibility layer for Cython
typedef kseq_t* kseq_ptr_t; typedef kseq_t* kseq_ptr_t;
// efficient nucleotide complement with a lookup table
static const char COMPLEMENT_LOOKUP[128] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\t', '\n', '\x0', '\x0c', '\r', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1', '\x1c', '\x1d', '\x1e', '\x1f',
' ', '!', '"', '#', '$', '%', '&', '\'',
'(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', ':', ';', '<', '=', '>', '?',
'@', 'T', 'V', 'G', 'H', 'E', 'F', 'C',
'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
'P', 'Q', 'Y', 'S', 'A', 'U', 'B', 'W',
'X', 'R', 'Z', '[', '\\', ']', '^', '_',
'`', 't', 'v', 'g', 'h', 'e', 'f', 'c',
'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
'p', 'q', 'y', 's', 'a', 'u', 'b', 'w',
'x', 'r', 'z', '{', '|', '}', '~', '\x7f'
};
inline char complement(char base) {
return COMPLEMENT_LOOKUP[(size_t) (base & 0x7F)];
}
} }
#endif // ifdef __UTILS_HPP #endif
...@@ -24,20 +24,18 @@ cdef extern from "<iterator>" namespace "std" nogil: ...@@ -24,20 +24,18 @@ cdef extern from "<iterator>" namespace "std" nogil:
cdef ssize_t distance[I](I first, I last); cdef ssize_t distance[I](I first, I last);
cdef extern from "<zlib.h>" nogil: # cdef extern from "<zlib.h>" nogil:
cdef struct gzFile_s: # cdef struct gzFile_s:
pass # pass
#
ctypedef gzFile_s* gzFile # ctypedef gzFile_s* gzFile
#
gzFile gzopen(int fd, const char* mode) # gzFile gzopen(int fd, const char* mode)
gzFile gzopen64(const char* path, const char* mode) # gzFile gzopen64(const char* path, const char* mode)
int gzread(gzFile file, void* buf, unsigned int len) # int gzread(gzFile file, void* buf, unsigned int len)
int gzclose(gzFile file) # int gzclose(gzFile file)
cdef extern from "_utils.hpp" nogil: cdef extern from "_utils.hpp" nogil:
ctypedef kseq_t* kseq_ptr_t ctypedef kseq_t* kseq_ptr_t
int complement(int)
...@@ -2,5 +2,10 @@ ...@@ -2,5 +2,10 @@
// not needed anywhere except in `cgi::correctRefGenomeIds` so we can just // not needed anywhere except in `cgi::correctRefGenomeIds` so we can just
// patch these functions to disable logging // patch these functions to disable logging
extern int omp_get_thread_num(void); int omp_get_thread_num(void) {
extern int omp_get_num_threads(void); return 1; // Make the logger shut up.
}
int omp_get_num_threads(void) {
return 1;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment