Skip to content
Snippets Groups Projects
Commit 41cfdf1b authored by Martin Larralde's avatar Martin Larralde
Browse files

Add a minimal SIMD-accelerated FASTA parser for tests and CLI

parent 6f8d7676
No related branches found
No related tags found
No related merge requests found
# coding: utf-8
# cython: language_level=3, linetrace=True, language=cpp, binding=False
# --- C imports --------------------------------------------------------------
from cpython cimport PyObject
from cpython.bytes cimport PyBytes_FromStringAndSize
from libc.stdlib cimport malloc, realloc, free
from libc.stdio cimport FILE, fopen, fgets, fclose
from libc.errno cimport errno
from libc.string cimport strlen, memcpy, memset
from _unicode cimport PyUnicode_1BYTE_KIND, PyUnicode_FromKindAndData
from _simd cimport copy_upper
cdef extern from "<ctype.h>" nogil:
cdef int toupper(int c)
# --- Python imports ---------------------------------------------------------
import os
# --- Cython classes ---------------------------------------------------------
cdef class Record:
cdef public str id
cdef public bytes seq
def __init__(self, str id, bytes seq):
self.id = id
self.seq = seq
cdef class Parser:
cdef str path
cdef char line[2048]
cdef FILE* file
cdef char* seq_buffer
cdef size_t buffer_length
def __cinit__(self, str path):
self.path = path
self.file = fopen(os.fsencode(path), "r")
if self.file == NULL:
raise OSError(errno, path)
fgets(<char*> &self.line, sizeof(self.line), self.file)
self.buffer_length = 0x8000
self.seq_buffer = <char*> malloc(self.buffer_length * sizeof(char))
def __dealloc__(self):
fclose(self.file)
free(self.seq_buffer)
def __iter__(self):
return self
def __next__(self):
cdef size_t seq_length = 0
cdef size_t line_length = 0
cdef size_t line_pos = 0
cdef str id
cdef bytes seq
if self.line[0] != b">":
raise StopIteration()
line_length = strlen(<char*> &self.line)
id = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, <void*> &self.line[1], line_length - 1)
with nogil:
while fgets(<char*> &self.line, sizeof(self.line), self.file) != NULL:
if self.line[0] == b'>':
break
line_length = strlen(<char*> &self.line)
if line_length + seq_length >= self.buffer_length:
self.buffer_length *= 2
self.seq_buffer = <char*> realloc(<void*> self.seq_buffer, self.buffer_length)
line_pos = 0
if self.line[line_length - 1] == b"\n":
line_length -= 1
copy_upper(&self.seq_buffer[seq_length], <char*> &self.line, line_length)
#
#
# while self.line[line_pos] != b"\n" and self.line[line_pos] != b"\0":
# self.seq_buffer[seq_length] = toupper(self.line[line_pos])
# line_pos += 1
# seq_length += 1
seq = PyBytes_FromStringAndSize(self.seq_buffer, seq_length)
return Record(id, seq)
# return Record(id, seq)
#include <ctype.h>
#include <stddef.h>
#ifdef __SSE2__
#include <x86intrin.h>
#endif
#ifdef __ARM_NEON__
#include <arm_neon.h>
#endif
#include "_simd.h"
void default_copy_upper(char* dst, const char* src, size_t len) {
while (len-- > 0) {
*dst = tolower(*src);
src++;
dst++;
}
}
#ifdef __SSE2__
void sse_copy_upper(char* dst, const char* src, size_t len) {
const __m128i ascii_a = _mm_set1_epi8('a' - 1);
const __m128i ascii_z = _mm_set1_epi8('z');
const __m128i offset = _mm_set1_epi8('a' - 'A');
while (len >= sizeof(__m128i)) {
__m128i inp = _mm_loadu_si128((__m128i*) src);
__m128i greater_than_a = _mm_cmpgt_epi8(inp, ascii_a);
__m128i less_equal_z = _mm_cmpgt_epi8(ascii_z, inp);
__m128i mask = _mm_and_si128(greater_than_a, less_equal_z);
__m128i diff = _mm_and_si128(mask, offset);
__m128i added = _mm_sub_epi8(inp, diff);
_mm_storeu_si128((__m128i *) dst, added);
len -= sizeof(__m128i);
src += sizeof(__m128i);
dst += sizeof(__m128i);
}
default_copy_upper(dst, src, len);
}
#endif
#ifdef __ARM_NEON__
void neon_copy_upper(char* dst, const char* src, size_t len) {
const int8x16_t ascii_a = vdupq_n_s8('a' - 1);
const int8x16_t ascii_z = vdupq_n_s8('z');
const int8x16_t offset = vdupq_n_s8('a' - 'A');
while (len >= sizeof(int8x16_t)) {
int8x16_t inp = vld1q_u8((int8_t*) src);
int8x16_t greater_than_a = vcgtq_s8(inp, ascii_a);
int8x16_t less_equal_z = vcgtq_s8(ascii_z, inp);
int8x16_t mask = vandq_s8(greater_than_a, less_equal_z);
int8x16_t diff = vandq_s8(mask, offset);
int8x16_t added = vsubq_s8(inp, diff);
vst1q_s8((int8_t*) dst, added);
len -= sizeof(int8x16_t);
src += sizeof(int8x16_t);
dst += sizeof(int8x16_t);
}
default_copy_upper(dst, src, len);
}
#endif
void (*resolve_copy_upper (void))(char*, const char*, size_t)
{
// ifunc resolvers fire before constructors, explicitly call the init
// function.
#ifdef __SSE2__
__builtin_cpu_init ();
if (__builtin_cpu_supports ("sse2"))
return sse_copy_upper; // fast copying plus upper.
else
#endif
#ifdef __ARM_NEON__
__builtin_cpu_init ();
if (__builtin_cpu_supports ("neon"))
return neon_copy_upper; // fast copying plus upper.
else
#endif
return default_copy_upper;
}
void copy_upper(char*, const char*, size_t)
__attribute__ ((ifunc ("resolve_copy_upper")));
#ifndef __SIMD_H
#define __SIMD_H
#ifdef __cplusplus
extern "C" {
#endif
void default_copy_upper(char*, const char*, size_t);
#ifdef __SSE2__
void sse_copy_upper(char*, const char*, size_t);
#endif
#ifdef __ARM_NEON__
void neon_copy_upper(char* dst, const char* src, size_t len);
#endif
void copy_upper(char*, const char*, size_t);
#ifdef __cplusplus
}
#endif
#endif
cdef extern from "_simd.h" nogil:
void copy_upper(char*, const char*, size_t)
......@@ -204,18 +204,19 @@ class build_ext(_build_ext):
else:
ext.define_macros.append(("CYTHON_WITHOUT_ASSERTIONS", 1))
# make sure to build with C++11
if self.compiler.compiler_type == "msvc":
ext.extra_compile_args.append("/std:c++11")
ext.extra_link_args.append("/std:c++11")
else:
ext.extra_compile_args.append("-std=c++11")
ext.extra_link_args.append("-std=c++11")
# in case we are compiling with clang, make sure to use libstdc++
if self.compiler.compiler_type == "unix" and sys.platform == "darwin":
ext.extra_compile_args.append("-stdlib=libc++")
ext.extra_link_args.append("-stdlib=libc++")
# C++ OS-specific options
if ext.language == "c++":
# make sure to build with C++11
if self.compiler.compiler_type == "msvc":
ext.extra_compile_args.append("/std:c++11")
ext.extra_link_args.append("/std:c++11")
else:
ext.extra_compile_args.append("-std=c++11")
ext.extra_link_args.append("-std=c++11")
# in case we are compiling with clang, make sure to use libstdc++
if self.compiler.compiler_type == "unix" and sys.platform == "darwin":
ext.extra_compile_args.append("-stdlib=libc++")
ext.extra_link_args.append("-stdlib=libc++")
# build the rest of the extension as normal
_build_ext.build_extension(self, ext)
......@@ -249,6 +250,12 @@ extensions = [
language="c++",
include_dirs=["include", "pyfastani"],
define_macros=[("USE_BOOST", 1)],
),
Extension(
"pyfastani._fasta",
[os.path.join("pyfastani", x) for x in ("_fasta.pyx", "_simd.c")],
language="c",
# extra_compile_args=["-mavx2"],
)
]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment