Commit c3a88a37 authored by Chris Kerr's avatar Chris Kerr

Merge branch...

Merge branch '15-change-ylm-representation-to-store-magnitude-and-exp-i-phi-separately' into 'master'

Resolve "Change Ylm representation to store magnitude and exp(i*phi) separately"

Closes #15

See merge request !28
parents 7ce1d26b ca044587
Pipeline #15330 failed with stages
in 2 minutes and 3 seconds
/*
* Copyright (c) European Molecular Biology Laboratory
* SPDX-FileCopyrightText: 2016 European Molecular Biology Laboratory (EMBL)
* SPDX-FileCopyrightText: 2020 Christopher Kerr
*
* SPDX-License-Identifier: LGPL-3.0-or-later
*
......@@ -29,10 +30,12 @@
#ifdef WORKSIZE
__attribute__((reqd_work_group_size(1,WORKSIZE,1)))
#endif
__kernel void alm_add_atom(__global const cfloat_t *restrict ylm, //Y(L,M,theta,phi) dimensions[LM]
__global const float *restrict jl, //Jy(L,s*r) dimensions[L,s]
__global const float *restrict ff, //Form factor dimensions[s]
__global cfloat_t *restrict alm) //A(L,M,s) dimensions[LM,s]
__kernel void alm_add_atom(
const cfloat_t eiphi, //e^(i*phi) - used to calculate phase of ylm
__global const float *restrict ylm, //Y(L,M,theta,phi) dimensions[LM]
__global const float *restrict jl, //Jy(L,s*r) dimensions[L,s]
__global const float *restrict ff, //Form factor dimensions[s]
__global cfloat_t *restrict alm) //A(L,M,s) dimensions[LM,s]
{
const unsigned int L = get_global_id(0);
const size_t is = get_global_id(1);
......@@ -43,23 +46,27 @@ __kernel void alm_add_atom(__global const cfloat_t *restrict ylm, //Y(L,M,theta,
const float ffjl = jl[ils] * ff[is];
unsigned int M;
cfloat_t eiMphi = cfloat_new(1, 0);
for (M=0; M<=L; ++M) {
size_t ilm = il0 + M;
size_t ilms = ilm * NS + is;
cfloat_t almnew = cfloat_mulr(ylm[ilm], ffjl);
cfloat_t almnew = cfloat_mulr(eiMphi, ylm[ilm] * ffjl);
alm[ilms] = cfloat_add(alm[ilms], almnew);
eiMphi = cfloat_mul(eiMphi, eiphi);
}
}
#ifdef WORKSIZE
__attribute__((reqd_work_group_size(1,WORKSIZE,1)))
#endif
__kernel void alm_add_many_atoms(const unsigned int nAt,
__global const cfloat_t *restrict ylm, //Y(L,M,theta,phi) dimensions[LM]
__global const float *restrict jl, //Jy(L,s*r) dimensions[L,s]
__global const float *restrict ff, //Form factor dimensions[iAt,s]
__global cfloat_t *restrict alm, //A(L,M,s) dimensions[LM,s]
__local cfloat_t *restrict ylm_tmp)
__kernel void alm_add_many_atoms(
const unsigned int nAt,
__global const cfloat_t *restrict eiphi, //e^(i*phi) dimensions[iAt]
__global const float *restrict ylm, //Y(L,M,theta,phi) dimensions[iAt,LM]
__global const float *restrict jl, //Jy(L,s*r) dimensions[iAt,L,s]
__global const float *restrict ff, //Form factor dimensions[iAt,s]
__global cfloat_t *restrict alm, //A(L,M,s) dimensions[LM,s]
__local float *restrict ylm_local)
{
const uint L = get_global_id(0);
const uint LMAX1 = get_global_size(0);
......@@ -79,6 +86,7 @@ __kernel void alm_add_many_atoms(const unsigned int nAt,
const size_t ils = L * NS + is;
float ffjl[NATWORK];
cfloat_t eiMphi[NATWORK];
event_t ev = 0;
for (uint iAt=0; iAt<nAt; ++iAt) {
size_t iAtM0 = iAt * LMAX1;
......@@ -86,15 +94,9 @@ __kernel void alm_add_many_atoms(const unsigned int nAt,
size_t iAtS = iAt * NS + is;
size_t iAtLS = iAt * NLS + ils;
// Some OpenCL implementations cannot do async_work_group_copy for cfloat_t
// Instead, cast ylm and ylm_tmp as float arrays
__global float *fylm_global = (__global float*)(ylm+iAtLM0);
__local float *fylm_local = (__local float*)(ylm_tmp+iAtM0);
ev = async_work_group_copy(fylm_local, fylm_global, 2*LMAX1, ev);
// This is the line that was originally here
// ev = async_work_group_copy(ylm_tmp+iAtM0, ylm+iAtLM0, LMAX1, ev);
ev = async_work_group_copy(ylm_local+iAtM0, ylm+iAtLM0, LMAX1, ev);
ffjl[iAt] = ff[iAtS] * jl[iAtLS];
eiMphi[iAt] = cfloat_new(1, 0);
}
wait_group_events(1, &ev);
......@@ -111,8 +113,9 @@ __kernel void alm_add_many_atoms(const unsigned int nAt,
// Then use the builtin vector dot product function
for (uint iAt=0; iAt<nAt; ++iAt) {
size_t iAtM = iAt * LMAX1 + M;
cfloat_t almnew = cfloat_mulr(ylm_tmp[iAtM], ffjl[iAt]);
cfloat_t almnew = cfloat_mulr(eiMphi[iAt], ylm_local[iAtM] * ffjl[iAt]);
almtot = cfloat_add(almtot, almnew);
eiMphi[iAt] = cfloat_mul(eiMphi[iAt], eiphi[iAt]);
}
alm[ilms] = almtot;
......
/*
* Copyright (c) European Molecular Biology Laboratory, Christopher Kerr
* SPDX-FileCopyrightText: 2016 European Molecular Biology Laboratory (EMBL)
* SPDX-FileCopyrightText: 2018-2019 Christopher Kerr
* SPDX-FileCopyrightText: 2018-2020 Christopher Kerr
*
* SPDX-License-Identifier: LGPL-3.0-or-later
*
......@@ -81,27 +81,11 @@ void ylm_Mpart_local(const unsigned int L,
barrier(CLK_LOCAL_MEM_FENCE);
}
// Raise a complex number to a non-negative integer power
cfloat_t cfloat_pown(cfloat_t Z, uint N)
{
cfloat_t res = cfloat_new(1.0f, 0.0f);
// In theory the clz() function (number of leading zero bits)
// would be helpful here. However it doesn't seem to work
// Sticking to a simple if sub-optimal solution
for (uint bit = 0x80000000; bit > 0; bit >>= 1) {
res = cfloat_mul(res, res);
if (bit & N) {
res = cfloat_mul(res, Z);
}
}
return res;
}
/* Spherical harmonic Y_lm(theta, phi) for 0<=L<=LMAX, 0<=M<=L
/* Magnitude of the spherical harmonic Y_lm(theta, phi) for 0<=L<=LMAX, 0<=M<=L
*
* The value of theta is supplied as cos(theta) and sin(theta),
* since in practice the original data are in Cartesian coordinates.
* Likewise phi is supplied as exp(i*phi)
* Phi is not needed since it only affects the phase, not the magnitude.
*
* The output array is packed: index(L,M) is (L*(L+1))/2 + M;
*
......@@ -111,12 +95,12 @@ cfloat_t cfloat_pown(cfloat_t Z, uint N)
#ifdef LMAX1
__attribute__((reqd_work_group_size(LMAX1,1,1)))
#endif
__kernel void ylm(const float costheta,
const float sintheta,
const cfloat_t eiphi,
__global cfloat_t *restrict result,
__local float *MpartF,
__local int *MpartE)
__kernel void ylm_real(
const float costheta,
const float sintheta,
__global float *restrict result,
__local float *MpartF,
__local int *MpartE)
{
// assert(get_local_size(0) == get_global_size(0));
#ifndef LMAX1
......@@ -132,8 +116,6 @@ __kernel void ylm(const float costheta,
int termE = 0;
int sumexp = 0;
const cfloat_t eiMphi = cfloat_pown(eiphi, M);
for (L=0; L<LMAX1; ++L) {
ylm_Mpart_local(L, sintheta, MpartF, MpartE);
......@@ -155,11 +137,10 @@ __kernel void ylm(const float costheta,
// ldexp(term0, sumexp)
// and the sin(theta) part with the sqrt((L-M)!/(L+M)!) prefactor by:
// ldexp(mpartF[M], mpartE[M])
// Now calculate Ylm = (cospart) * (sinpart) * exp(i*M*phi)
// Now calculate abs(Ylm) = (cospart) * (sinpart)
if (L >= M) {
size_t ilm = (L*(L+1))/2 + M;
float magnitude = ldexp(term0 * MpartF[M], sumexp + MpartE[M]);
result[ilm] = cfloat_mulr(eiMphi, magnitude);
result[ilm] = ldexp(term0 * MpartF[M], sumexp + MpartE[M]);
}
// Rotate term0 to term1 and term1 to term2
......@@ -189,11 +170,10 @@ __kernel void ylm(const float costheta,
#ifdef LMAX1
__attribute__((reqd_work_group_size(LMAX1,1,1)))
#endif
__kernel void ylm_batch(
__kernel void ylm_real_batch(
__constant float *restrict costheta,
__constant float *restrict sintheta,
__constant cfloat_t *restrict eiphi,
__global cfloat_t *restrict result,
__global float *restrict result,
__local float *restrict MpartF,
__local int *restrict MpartE)
{
......@@ -205,6 +185,6 @@ __kernel void ylm_batch(
#endif
const unsigned int iAt = get_global_id(1);
ylm(costheta[iAt], sintheta[iAt], eiphi[iAt],
result + (iAt * NLM), MpartF, MpartE);
ylm_real(costheta[iAt], sintheta[iAt],
result + (iAt * NLM), MpartF, MpartE);
}
\ No newline at end of file
# SPDX-FileCopyrightText: 2016 European Molecular Biology Laboratory (EMBL)
# SPDX-FileCopyrightText: 2017-2019 Christopher Kerr
# SPDX-FileCopyrightText: 2017-2020 Christopher Kerr
#
# SPDX-License-Identifier: LGPL-3.0-or-later
......@@ -115,7 +115,7 @@ class AtomicScattering(PackedLMMixin, ClProgram):
r, costheta, sintheta, eiphi = sicopolar(xyz)
evY, ylm_dev = self.sphharm(costheta, sintheta, eiphi, queue=queue)
evY, ylm_dev = self.sphharm(costheta, sintheta, queue=queue)
evJ, jl_dev = self.sphbes(r, queue=queue)
ff32 = np.array(ff, dtype=np.float32)
......@@ -124,6 +124,7 @@ class AtomicScattering(PackedLMMixin, ClProgram):
ev = self.program.alm_add_atom(queue,
(LMAX1, NS),
(1, worksize),
np.complex64(eiphi),
ylm_dev.data,
jl_dev.data,
ff_dev.data,
......@@ -165,16 +166,19 @@ class AtomicScattering(PackedLMMixin, ClProgram):
r, costheta, sintheta, eiphi = sicopolar_array(xyz[iAt0:iAtN, :])
evY, ylm_dev = self.sphharm.batch(costheta, sintheta, eiphi, queue=queue)
evY, ylm_dev = self.sphharm.batch(costheta, sintheta, queue=queue)
evJ, jl_dev = self.sphbes.batch(r, queue=queue)
ff32 = np.array(ff[iAt0:iAtN, :], dtype=np.float32)
ff_dev = cl_array.to_device(queue, ff32)
eiphi32 = np.array(eiphi, dtype=np.complex64)
eiphi_dev = cl_array.to_device(queue, eiphi32)
ev = self.program.alm_add_many_atoms(queue,
(LMAX1, NS),
(1, worksize),
np.uint32(iAtN - iAt0),
eiphi_dev.data,
ylm_dev.data,
jl_dev.data,
ff_dev.data,
......
# SPDX-FileCopyrightText: 2016 European Molecular Biology Laboratory (EMBL)
# SPDX-FileCopyrightText: 2017-2019 Christopher Kerr
# SPDX-FileCopyrightText: 2017-2020 Christopher Kerr
#
# SPDX-License-Identifier: LGPL-3.0-or-later
......@@ -40,8 +40,8 @@ class SphericalHarmonic(PackedLMMixin, ClProgram):
options=["-DLMAX1=%d" % (self.LMAX1,)],
context=context)
@optional_out_array(shape=('NLM',), dtype='c8')
def __call__(self, costheta, sintheta, eiphi, queue=None, out=None):
@optional_out_array(shape=('NLM',), dtype='f4')
def __call__(self, costheta, sintheta, queue=None, out=None):
"""Calculate all non-negative spherical harmonics up to LMAX.
The calculation is done asynchronously, the returned OpenCL
......@@ -57,19 +57,18 @@ class SphericalHarmonic(PackedLMMixin, ClProgram):
localmemF = cl.LocalMemory(4*LMAX1)
localmemI = cl.LocalMemory(4*LMAX1)
ev = self.program.ylm(queue,
(LMAX1,),
(LMAX1,),
np.float32(costheta),
np.float32(sintheta),
np.complex64(eiphi),
out.data,
localmemF,
localmemI)
ev = self.program.ylm_real(
queue, (LMAX1,), (LMAX1,),
np.float32(costheta),
np.float32(sintheta),
out.data,
localmemF,
localmemI,
)
return ev, out
@optional_out_array(shape=('natwork', 'NLM'), dtype='c8')
def batch(self, costheta, sintheta, eiphi, queue=None, out=None):
@optional_out_array(shape=('natwork', 'NLM'), dtype='f4')
def batch(self, costheta, sintheta, queue=None, out=None):
"""Calculate spherical harmonics for a batch of atoms.
The calculation is done asynchronously, the returned OpenCL
......@@ -77,9 +76,7 @@ class SphericalHarmonic(PackedLMMixin, ClProgram):
"""
costheta = np.array(costheta, dtype=np.float32)
sintheta = np.array(sintheta, dtype=np.float32)
eiphi = np.array(eiphi, dtype=np.complex64)
assert np.shape(costheta) == np.shape(sintheta)
assert np.shape(costheta) == np.shape(eiphi)
assert np.all(-1 <= costheta)
assert np.all(costheta <= 1)
assert np.all(0 <= sintheta) # 0 <= theta <= pi
......@@ -92,18 +89,16 @@ class SphericalHarmonic(PackedLMMixin, ClProgram):
costh_dev = cl_array.to_device(queue, costheta)
sinth_dev = cl_array.to_device(queue, sintheta)
eiphi_dev = cl_array.to_device(queue, eiphi)
localmemF = cl.LocalMemory(4*LMAX1)
localmemI = cl.LocalMemory(4*LMAX1)
ev = self.program.ylm_batch(queue,
(LMAX1, nAt),
(LMAX1, 1),
costh_dev.data,
sinth_dev.data,
eiphi_dev.data,
out.data,
localmemF,
localmemI)
ev = self.program.ylm_real_batch(
queue, (LMAX1, nAt), (LMAX1, 1),
costh_dev.data,
sinth_dev.data,
out.data,
localmemF,
localmemI,
)
return ev, out
......@@ -13,34 +13,32 @@ from scipy.special import sph_harm
from galumph.sphericalharmonic import SphericalHarmonic
def scipy_SphHarm(LMAX, theta, phi):
def scipy_SphHarm(LMAX, theta):
NLM = (LMAX+1)*(LMAX+2)//2
ylm = np.empty((NLM,), dtype=np.complex128)
for L in range(LMAX+1):
il0 = L*(L+1)//2
ilend = il0+L+1
ylm[il0:ilend] = sph_harm(np.arange(L+1), L, phi, theta)
return ylm
ylm[il0:ilend] = sph_harm(np.arange(L+1), L, 0, theta)
return ylm.real
@settings(max_examples=25)
@given(
LMAX=st.integers(min_value=3, max_value=20),
theta=st.floats(min_value=0, max_value=np.pi),
phi=st.floats(min_value=0, max_value=2*np.pi),
)
@example(LMAX=3, theta=(np.pi/2), phi=0.0) # Issue #24
def test_single(cl_context, cl_queue, LMAX, theta, phi):
@example(LMAX=3, theta=(np.pi/2)) # Issue #24
def test_single(cl_context, cl_queue, LMAX, theta):
"""Test calculating the spherical harmonics for one (theta, phi) pair."""
clSphHarm = SphericalHarmonic(LMAX, context=cl_context)
costheta = np.cos(theta)
sintheta = np.sin(theta)
eiphi = np.exp(1j*phi)
# Rounding error sometimes gives theta slightly greater than pi
assume(sintheta >= 0)
ev, ylm_dev = clSphHarm(costheta, sintheta, eiphi, queue=cl_queue)
ylm_sp = scipy_SphHarm(LMAX, theta, phi)
ev, ylm_dev = clSphHarm(costheta, sintheta, queue=cl_queue)
ylm_sp = scipy_SphHarm(LMAX, theta)
ev.wait()
ylm_cl = ylm_dev.get(cl_queue)
......@@ -50,35 +48,29 @@ def test_single(cl_context, cl_queue, LMAX, theta, phi):
@settings(max_examples=10)
@given(
LMAX=st.integers(min_value=3, max_value=20),
# Generate theta and phi as one array so the are the same length
# All the values will be between 0 and pi, scale to get 0 <= phi <= 2pi
theta_phi=npst.arrays(
theta=npst.arrays(
shape=st.tuples(
st.integers(min_value=1, max_value=16),
st.just(2),
),
elements=st.floats(min_value=0, max_value=np.float32(np.pi), width=32),
dtype=st.sampled_from(('f4', 'f8')),
),
)
def test_multiple(cl_context, cl_queue, LMAX, theta_phi):
"""Test calculating the spherical harmonics for arrays of theta and phi."""
def test_multiple(cl_context, cl_queue, LMAX, theta):
"""Test calculating the spherical harmonics for arrays of theta."""
clSphHarm = SphericalHarmonic(LMAX, context=cl_context)
nAt = theta_phi.shape[0]
theta = theta_phi[:, 0]
phi = 2 * theta_phi[:, 1]
nAt = theta.shape[0]
costheta = np.cos(theta)
sintheta = np.sin(theta)
eiphi = np.exp(1j*phi)
# Rounding error sometimes gives theta slightly greater than pi
assume(np.all(sintheta >= 0))
ev, ylm_dev = clSphHarm.batch(costheta, sintheta, eiphi, queue=cl_queue)
ev, ylm_dev = clSphHarm.batch(costheta, sintheta, queue=cl_queue)
NLM = (LMAX+1)*(LMAX+2)//2
ylm_sp = np.empty((nAt, NLM), dtype=np.complex128)
for iAt in range(nAt):
ylm_sp[iAt, :] = scipy_SphHarm(LMAX, theta[iAt], phi[iAt])
ylm_sp[iAt, :] = scipy_SphHarm(LMAX, theta[iAt])
ev.wait()
ylm_cl = ylm_dev.get(cl_queue)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment