FFT accuracy using (py)vkfft
The methodology follows http://www.fftw.org/accuracy/method.html: * random values are generated with a uniform distribution between -0.5 and 0.5 (for both real and imaginary values) * the comparison is made with long double precision calculations performed with (py)fftw * the comparison is made using the norms: \(L_n(y) = \left[\Sigma{\left|y\right|^n}\right]^{1/n}\) (n=1,2 or \(\infty\)) * the reported average accuracy is \(\frac{L_n(fft_{ref} - fft)}{L_n(fft_{ref})}\)
Note that the observed differences between the OpenCL and CUDA backend of VkFFT are due to different sine and cosine functions used when useLUT
is not specified. With useLUT=1
these differences disappear, and may also not exist on different GPUs.
[1]:
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt
# pyfftw supports long double accuracy
from pyfftw.interfaces.scipy_fft import fftn as fftwn, ifftn as ifftwn
from scipy.fft import fftn as fftsn, ifftn as ifftsn
from scipy import stats
from pyvkfft.fft import fftn as vkfftn, ifftn as ivkfftn
from pyvkfft.base import primes
from pyvkfft.version import __version__, vkfft_version
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
print("pyvkfft %s, VkFFT %s" % (__version__, vkfft_version()))
pyvkfft 2023.2, VkFFT 1.3.1
[2]:
# Init pycuda
cuda_device_name = None
try:
import pycuda.autoinit
import pycuda.gpuarray as cua
from pyvkfft.cuda import VkFFTApp as VkFFTAppcu
has_pycuda = True
cuda_device_name = pycuda.autoinit.device.name()
print("Selected CUDA device: ", cuda_device_name)
v_cufft_plan = []
def fftncu(d):
dcu = cua.to_gpu(d)
plan = cu_fft.Plan(d.shape, d.dtype, d.dtype)
cu_fft.fft(dcu, dcu, plan, scale=False)
if cu_fft.cufft.cufftGetVersion() >= 10200:
v_cufft_plan.append(plan)
return dcu.get()
def fftnvcu(d):
dcu = cua.to_gpu(d)
#return vkfftn(dcu).get()
app = VkFFTAppcu(d.shape, d.dtype, useLUT=0)
return app.fft(dcu).get()
def fftnvculut(d):
dcu = cua.to_gpu(d)
app = VkFFTAppcu(d.shape, d.dtype, useLUT=1)
return app.fft(dcu).get()
try:
import skcuda.fft as cu_fft
has_cufft = True
if cu_fft.cufft.cufftGetVersion() >= 10200:
print("WARNING: cuFFT plans destruction is inhibited as a workaround for "
"an issue with CUDA>=11.0. See https://github.com/lebedov/scikit-cuda/issues/308\n"
"=> all cuFFT plans will be kept in GPU memory, effectively creating a memory leak "
"(this should be fine to run this notebook)")
except:
has_cufft = False
except:
print("CUDA is not available")
has_pycuda = False
has_cufft = False
Selected CUDA device: NVIDIA A40
/home/esrf/favre/miniconda3/envs/pynx-py311-cu11.7/lib/python3.11/site-packages/skcuda/cublas.py:284: UserWarning: creating CUBLAS context to get version number
warnings.warn('creating CUBLAS context to get version number')
WARNING: cuFFT plans destruction is inhibited as a workaround for an issue with CUDA>=11.0. See https://github.com/lebedov/scikit-cuda/issues/308
=> all cuFFT plans will be kept in GPU memory, effectively creating a memory leak (this should be fine to run this notebook)
[3]:
# Init pyopencl
cl_device_name = None
try:
import pyopencl as cl
import pyopencl.array as cla
import os
from pyvkfft.opencl import VkFFTApp as VkFFTAppcl
# Create some context on the first available GPU
if 'PYOPENCL_CTX' in os.environ:
ctx = cl.create_some_context()
else:
ctx = None
# Find the first OpenCL GPU available and use it, unless
for p in cl.get_platforms():
for d in p.get_devices():
if d.type & cl.device_type.GPU == 0:
continue
cl_device_name = d.name
print("Selected OpenCL device: ", d.name)
ctx = cl.Context(devices=(d,))
break
if ctx is not None:
break
cq = cl.CommandQueue(ctx)
def fftnvcl(d):
dcl = cla.to_device(cq, d)
#return vkfftn(d).get()
app = VkFFTAppcl(d.shape, d.dtype, queue=cq, useLUT=0)
return app.fft(dcl).get()
def fftnvcllut(d):
dcl = cla.to_device(cq, d)
app = VkFFTAppcl(d.shape, d.dtype, queue=cq, useLUT=1)
return app.fft(dcl).get()
has_pyopencl = True
except:
print("OpenCL is not available")
has_pyopencl = False
Selected OpenCL device: NVIDIA A40
[4]:
def l1(a,b):
return abs(a-b).sum() / abs(a).sum()
def l2(a,b):
return np.sqrt((abs(a-b)**2).sum() / (abs(a)**2).sum())
def li(a,b):
return abs(a-b).max() / abs(a).max()
def latex_float(f):
float_str = "{0:.2g}".format(f)
if "e" in float_str:
base, exponent = float_str.split("e")
return r"{0} \times 10^{{{1}}}".format(base, int(exponent))
else:
return float_str
[5]:
if has_pycuda:
# CUDA
device_name = "CUDA: " + cuda_device_name
else:
# OpenCL
device_name = "OpenCL: " + cl_device_name
fft_dic = {"fftw": fftwn}
if has_pycuda:
fft_dic["vkfft-cuda"] = fftnvcu
fft_dic["vkfft-cuda-LUT"] = fftnvculut
if has_cufft:
fft_dic["cufft"] = fftncu
if has_pyopencl:
fft_dic["vkfft-opencl"] = fftnvcl
fft_dic["vkfft-opencl-LUT"] = fftnvcllut
1D, single precision
[6]:
nmax = 2**18
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)
def accu_1d(n, fft_dic):
rld = fftwn(d0ld[:n])
res = {}
for k,v in fft_dic.items():
r = v(d0s[:n])
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
# print(accu_1d(16, fft_dic))
vn, vl1, vl2, vli = [], {}, {}, {}
#print("%7s %12s %12s %12s %12s"%("N", "vkfft ", "vkfft-LUT ", "cufft ", "fftw "))
s = "%7s %14s"%("N", "fftw ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %14s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) <= 3: # Change this for a more complete test (only 2^N1*3^N2 sizes)
r = accu_1d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %14e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
8 5.661812e-08 4.771311e-08 4.771311e-08 4.771311e-08 4.771311e-08 4.538441e-08
9 6.925965e-08 7.249566e-08 7.249566e-08 7.422968e-08 7.422968e-08 8.148424e-08
12 6.159262e-08 6.638158e-08 6.638158e-08 6.638158e-08 6.638158e-08 7.102478e-08
16 6.543330e-08 1.039333e-07 8.245534e-08 1.031274e-07 8.245534e-08 1.048754e-07
18 6.431882e-08 1.475902e-07 9.247269e-08 1.475902e-07 1.026794e-07 7.987485e-08
24 7.437298e-08 1.118194e-07 7.039635e-08 1.063454e-07 7.663216e-08 9.952901e-08
27 1.101631e-07 2.058048e-07 7.791600e-08 1.934317e-07 8.185001e-08 8.483861e-08
32 8.698629e-08 1.209513e-07 6.916893e-08 1.241982e-07 8.133395e-08 1.410946e-07
36 7.085077e-08 2.411464e-07 9.978294e-08 2.446531e-07 9.479782e-08 1.257602e-07
48 8.651990e-08 1.417475e-07 7.830167e-08 1.494853e-07 8.045021e-08 1.064689e-07
54 8.836157e-08 2.339144e-07 8.168266e-08 2.323761e-07 8.433767e-08 1.439560e-07
64 1.022756e-07 1.441626e-07 9.541947e-08 1.436288e-07 9.778236e-08 1.194721e-07
72 8.032949e-08 1.897170e-07 8.997575e-08 1.934350e-07 9.029303e-08 1.011575e-07
81 9.224252e-08 2.426904e-07 1.057223e-07 2.317988e-07 9.535692e-08 1.291147e-07
96 9.801735e-08 2.243387e-07 9.796188e-08 2.162218e-07 1.000744e-07 1.313988e-07
108 1.095297e-07 2.498205e-07 1.109149e-07 2.554678e-07 1.061535e-07 1.772444e-07
128 9.785448e-08 1.850773e-07 9.511398e-08 1.859862e-07 9.421877e-08 1.527004e-07
144 1.153802e-07 2.039221e-07 9.885693e-08 2.074026e-07 9.331911e-08 1.763623e-07
162 1.058271e-07 2.952651e-07 1.100295e-07 2.893044e-07 1.089239e-07 1.298426e-07
192 1.034159e-07 2.286529e-07 9.648514e-08 2.320203e-07 9.776815e-08 1.936020e-07
216 1.026397e-07 3.446967e-07 1.136366e-07 3.540606e-07 1.162158e-07 1.348444e-07
243 1.146960e-07 2.997211e-07 1.136536e-07 2.975615e-07 1.138712e-07 1.489338e-07
256 9.955169e-08 2.118876e-07 1.024745e-07 2.105416e-07 1.041131e-07 2.010968e-07
288 1.184379e-07 2.616714e-07 9.917606e-08 2.621172e-07 9.961721e-08 1.667383e-07
324 1.213902e-07 2.986930e-07 1.147992e-07 2.982502e-07 1.092042e-07 1.575466e-07
384 1.105922e-07 2.723698e-07 1.081798e-07 2.741561e-07 1.041250e-07 1.502640e-07
432 1.279715e-07 2.773929e-07 1.120886e-07 2.748347e-07 1.099727e-07 2.185309e-07
486 1.264691e-07 3.618835e-07 1.241485e-07 3.655857e-07 1.288156e-07 1.400701e-07
512 1.150365e-07 2.727073e-07 1.173454e-07 2.744505e-07 1.176425e-07 3.322761e-07
576 1.293662e-07 3.059793e-07 1.099459e-07 3.064660e-07 1.071335e-07 2.345285e-07
648 1.188700e-07 3.512993e-07 1.143462e-07 3.473531e-07 1.174056e-07 1.991699e-07
729 1.256747e-07 3.280571e-07 1.258247e-07 3.217211e-07 1.262471e-07 1.795872e-07
768 1.144708e-07 3.169585e-07 1.175144e-07 3.150117e-07 1.139605e-07 1.985681e-07
864 1.375433e-07 3.426251e-07 1.174357e-07 3.447054e-07 1.176099e-07 1.868742e-07
972 1.351672e-07 3.669900e-07 1.207861e-07 3.641820e-07 1.193669e-07 2.001350e-07
1024 1.279255e-07 2.957719e-07 1.179705e-07 2.986286e-07 1.189138e-07 2.900168e-07
1152 1.315614e-07 3.665685e-07 1.177501e-07 3.656964e-07 1.166467e-07 2.262170e-07
1296 1.338465e-07 4.333911e-07 1.261911e-07 4.354325e-07 1.260905e-07 1.690936e-07
1458 1.331758e-07 4.355626e-07 1.299837e-07 4.336216e-07 1.288716e-07 2.429227e-07
1536 1.270332e-07 3.558997e-07 1.188751e-07 3.565947e-07 1.211796e-07 3.016196e-07
1728 1.439379e-07 3.477974e-07 1.173882e-07 3.484380e-07 1.206126e-07 2.288359e-07
1944 1.294519e-07 4.033637e-07 1.265031e-07 4.040633e-07 1.282332e-07 1.825159e-07
2048 1.311234e-07 3.512463e-07 1.309433e-07 3.392700e-07 1.257402e-07 2.668943e-07
2187 1.386178e-07 3.917401e-07 1.358261e-07 3.933939e-07 1.353502e-07 1.952800e-07
2304 1.291952e-07 4.086238e-07 1.222116e-07 4.116580e-07 1.238758e-07 2.479581e-07
2592 1.395300e-07 3.983599e-07 1.263009e-07 3.968862e-07 1.254950e-07 2.427453e-07
2916 1.378609e-07 4.355173e-07 1.338088e-07 4.329371e-07 1.302515e-07 3.612562e-07
3072 1.365998e-07 4.001772e-07 1.220737e-07 3.955397e-07 1.238067e-07 2.876277e-07
3456 1.448430e-07 3.929084e-07 1.278417e-07 3.924585e-07 1.279892e-07 2.359315e-07
3888 1.452707e-07 3.951435e-07 1.303788e-07 3.958168e-07 1.300982e-07 1.896389e-07
4096 1.374327e-07 4.227348e-07 1.419898e-07 4.013398e-07 1.346181e-07 2.739278e-07
4374 1.490227e-07 4.911756e-07 1.398336e-07 4.920331e-07 1.421305e-07 2.199837e-07
4608 1.473382e-07 4.699489e-07 1.277424e-07 4.686752e-07 1.270572e-07 2.177657e-07
5184 1.497707e-07 4.054957e-07 1.324129e-07 4.067427e-07 1.313687e-07 2.614033e-07
5832 1.427670e-07 4.637798e-07 1.419198e-07 4.627156e-07 1.411020e-07 3.163732e-07
6144 1.461489e-07 4.403711e-07 1.320455e-07 3.709079e-07 1.378164e-07 2.272958e-07
6561 1.506496e-07 4.284306e-07 1.478323e-07 4.919648e-07 1.479551e-07 2.114005e-07
6912 1.443263e-07 4.305760e-07 1.318745e-07 4.061516e-07 1.367010e-07 2.460669e-07
7776 1.523758e-07 4.987578e-07 1.451305e-07 4.606680e-07 1.425706e-07 2.235600e-07
8192 1.463298e-07 4.469246e-07 1.478802e-07 4.409693e-07 1.396761e-07 4.389304e-07
8748 1.496005e-07 4.923488e-07 1.427488e-07 4.894587e-07 1.441992e-07 2.195366e-07
9216 1.593096e-07 5.081581e-07 1.350635e-07 4.455401e-07 1.387715e-07 3.256638e-07
10368 1.540761e-07 4.429276e-07 1.390027e-07 4.341219e-07 1.399476e-07 2.597733e-07
11664 1.527387e-07 4.646269e-07 1.408878e-07 4.853375e-07 1.441207e-07 2.112244e-07
12288 1.551555e-07 4.841449e-07 1.387407e-07 4.052009e-07 1.437321e-07 2.526851e-07
13122 1.544250e-07 5.482336e-07 1.543670e-07 5.477291e-07 1.539420e-07 2.187722e-07
13824 1.567230e-07 4.407193e-07 1.447505e-07 4.394896e-07 1.450354e-07 2.272706e-07
15552 1.604332e-07 4.544390e-07 1.423638e-07 4.542038e-07 1.422464e-07 2.214296e-07
16384 1.532192e-07 4.011272e-07 1.499668e-07 4.850540e-07 1.468326e-07 2.936496e-07
17496 1.527100e-07 5.505847e-07 1.508470e-07 5.499863e-07 1.506793e-07 2.218428e-07
18432 1.647677e-07 4.411765e-07 1.450592e-07 4.381201e-07 1.444041e-07 2.992014e-07
19683 1.620029e-07 5.751107e-07 1.588397e-07 5.739652e-07 1.574418e-07 2.406210e-07
20736 1.546362e-07 3.857048e-07 1.418489e-07 3.849317e-07 1.428486e-07 2.741637e-07
23328 1.586530e-07 5.139474e-07 1.496062e-07 5.147107e-07 1.505448e-07 2.494354e-07
24576 1.551876e-07 4.433321e-07 1.483676e-07 4.419779e-07 1.498213e-07 2.764359e-07
26244 1.605376e-07 6.059876e-07 1.595808e-07 6.062914e-07 1.587859e-07 2.201023e-07
27648 1.676835e-07 4.429513e-07 1.468292e-07 4.436884e-07 1.453715e-07 3.281937e-07
31104 1.638566e-07 5.540975e-07 1.542658e-07 5.541096e-07 1.547016e-07 2.575961e-07
32768 1.609546e-07 4.459117e-07 1.557306e-07 4.446131e-07 1.569180e-07 3.061112e-07
34992 1.640913e-07 6.296191e-07 1.606009e-07 6.293764e-07 1.613646e-07 2.256591e-07
36864 1.677148e-07 5.174434e-07 1.500451e-07 5.178042e-07 1.495052e-07 2.600174e-07
39366 1.653863e-07 6.178460e-07 1.648943e-07 6.182127e-07 1.635293e-07 2.688381e-07
41472 1.645688e-07 5.443656e-07 1.565446e-07 5.443770e-07 1.572641e-07 2.637117e-07
46656 1.664472e-07 6.631293e-07 1.624201e-07 6.634179e-07 1.628490e-07 2.448943e-07
49152 1.625402e-07 4.688899e-07 1.540039e-07 4.667786e-07 1.544734e-07 3.103901e-07
52488 1.636480e-07 6.225651e-07 1.656405e-07 6.218054e-07 1.662586e-07 2.725278e-07
55296 1.761434e-07 5.524049e-07 1.607641e-07 5.506604e-07 1.614658e-07 3.018288e-07
59049 1.690452e-07 5.764474e-07 1.703297e-07 5.771309e-07 1.689326e-07 2.383780e-07
62208 1.637051e-07 5.286435e-07 1.634514e-07 5.270071e-07 1.632802e-07 2.677715e-07
65536 1.656372e-07 4.759224e-07 1.603294e-07 4.745700e-07 1.612591e-07 3.164788e-07
69984 1.703471e-07 5.628977e-07 1.613945e-07 5.626291e-07 1.604559e-07 2.571305e-07
73728 1.660060e-07 5.250533e-07 1.558256e-07 5.222549e-07 1.562680e-07 2.901202e-07
78732 1.709068e-07 6.223701e-07 1.660930e-07 6.229767e-07 1.659434e-07 2.668760e-07
82944 1.726384e-07 4.990407e-07 1.537106e-07 4.985179e-07 1.523260e-07 3.565637e-07
93312 1.715934e-07 5.716983e-07 1.574825e-07 5.723901e-07 1.574283e-07 2.794871e-07
98304 1.728357e-07 5.076330e-07 1.595007e-07 5.063123e-07 1.597662e-07 3.215259e-07
104976 1.712175e-07 6.111326e-07 1.627277e-07 6.111089e-07 1.619584e-07 2.616428e-07
110592 1.776956e-07 5.400699e-07 1.570824e-07 5.396469e-07 1.572924e-07 2.797803e-07
118098 1.739740e-07 6.371366e-07 1.743872e-07 6.364468e-07 1.741531e-07 2.580930e-07
124416 1.752412e-07 5.952115e-07 1.621353e-07 5.949920e-07 1.618229e-07 2.520142e-07
131072 1.696012e-07 5.097098e-07 1.697233e-07 4.981079e-07 1.655631e-07 3.035412e-07
139968 1.770749e-07 5.943713e-07 1.621396e-07 5.941507e-07 1.620690e-07 2.562030e-07
147456 1.803107e-07 5.952785e-07 1.616329e-07 5.954645e-07 1.612240e-07 3.236669e-07
157464 1.709750e-07 6.801359e-07 1.711262e-07 6.803316e-07 1.710486e-07 2.627029e-07
165888 1.809574e-07 5.459304e-07 1.611403e-07 5.462046e-07 1.611718e-07 3.295721e-07
177147 1.788280e-07 6.012366e-07 1.803042e-07 6.015966e-07 1.799429e-07 2.595503e-07
186624 1.748429e-07 5.852788e-07 1.614547e-07 5.857742e-07 1.615886e-07 2.878387e-07
196608 1.817188e-07 5.496654e-07 1.660428e-07 5.490561e-07 1.667814e-07 3.334495e-07
209952 1.778060e-07 6.405874e-07 1.701896e-07 6.404317e-07 1.701566e-07 3.014692e-07
221184 1.775296e-07 5.586580e-07 1.664262e-07 5.583502e-07 1.670919e-07 3.003483e-07
236196 1.783809e-07 7.032948e-07 1.794682e-07 7.033912e-07 1.797219e-07 2.582962e-07
248832 1.820535e-07 6.318924e-07 1.751290e-07 6.316520e-07 1.755134e-07 3.489647e-07
262144 1.785678e-07 5.437365e-07 1.707511e-07 5.433821e-07 1.723307e-07 2.862827e-07
[7]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("1D FFT errors (single precision, radix-2,3) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (single precision, radix-2,3) - " + device_name)
plt.xlabel("N", loc='right')
plt.grid(True)
plt.tight_layout()
1D, double precision
[8]:
nmax = 2**18
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0d = d0.astype(np.complex128)
def accu_1d(n, fft_dic):
rld = fftwn(d0ld[:n])
res = {}
for k,v in fft_dic.items():
r = v(d0d[:n])
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
# print(accu_1d(16, fft_dic))
vn, vl1, vl2, vli = [], {}, {}, {}
#print("%7s %12s %12s %12s %12s"%("N", "vkfft ", "vkfft-LUT ", "cufft ", "fftw "))
s = "%7s %14s"%("N", "fftw ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %14s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) <= 3:
r = accu_1d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %14e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
8 5.367255e-17 5.656864e-17 5.656864e-17 5.656864e-17 5.656864e-17 5.840088e-17
9 9.619862e-17 1.069195e-16 1.069195e-16 1.067132e-16 1.067132e-16 1.067132e-16
12 7.087986e-17 1.069433e-16 1.069433e-16 1.069433e-16 1.069433e-16 9.472633e-17
16 9.872177e-17 1.219751e-16 1.219751e-16 1.067727e-16 1.067727e-16 1.104271e-16
18 1.133557e-16 1.162862e-16 1.162862e-16 1.087061e-16 1.087061e-16 1.259815e-16
24 1.299068e-16 1.543545e-16 1.543545e-16 1.539512e-16 1.539512e-16 1.660673e-16
27 1.494702e-16 1.227911e-16 1.227911e-16 1.501807e-16 1.501807e-16 1.830138e-16
32 1.133334e-16 1.606026e-16 1.606026e-16 1.516121e-16 1.516121e-16 1.497297e-16
36 1.885887e-16 1.740149e-16 1.740149e-16 2.135202e-16 2.135202e-16 2.157540e-16
48 1.216451e-16 1.539245e-16 1.539245e-16 1.575012e-16 1.575012e-16 1.581150e-16
54 1.575659e-16 2.347844e-16 2.347844e-16 2.206083e-16 2.206083e-16 2.230410e-16
64 1.281604e-16 1.429013e-16 1.429013e-16 1.598804e-16 1.598804e-16 1.602454e-16
72 1.643927e-16 1.607482e-16 1.607482e-16 1.630723e-16 1.630723e-16 1.749040e-16
81 2.040083e-16 2.156954e-16 2.156954e-16 2.175756e-16 2.175756e-16 2.011994e-16
96 1.961262e-16 1.544846e-16 1.544846e-16 1.698115e-16 1.698115e-16 1.626307e-16
108 1.560056e-16 1.995488e-16 1.995488e-16 2.029792e-16 2.029792e-16 2.402584e-16
128 1.679223e-16 1.627604e-16 1.627604e-16 1.623086e-16 1.623086e-16 1.970800e-16
144 2.077158e-16 1.991140e-16 1.991140e-16 2.002485e-16 2.002485e-16 2.271230e-16
162 1.923714e-16 2.607887e-16 2.607887e-16 2.687635e-16 2.687635e-16 2.261844e-16
192 1.722049e-16 1.811760e-16 1.811760e-16 1.705169e-16 1.705169e-16 2.677313e-16
216 2.117673e-16 2.514679e-16 2.514679e-16 2.615850e-16 2.615850e-16 2.457024e-16
243 2.187502e-16 2.250994e-16 2.250994e-16 2.399898e-16 2.399898e-16 2.507859e-16
256 2.005995e-16 1.766079e-16 1.766079e-16 1.815744e-16 1.815744e-16 2.074768e-16
288 2.156484e-16 2.083801e-16 2.083801e-16 2.095753e-16 2.095753e-16 2.101710e-16
324 2.105772e-16 2.466879e-16 2.466879e-16 2.542842e-16 2.542842e-16 2.617610e-16
384 2.040326e-16 1.976065e-16 1.976065e-16 1.941932e-16 1.941932e-16 1.909575e-16
432 2.326740e-16 2.278230e-16 2.278230e-16 2.342927e-16 2.342927e-16 3.047920e-16
486 2.252192e-16 2.944502e-16 2.944502e-16 2.926094e-16 2.926094e-16 2.383041e-16
512 1.952682e-16 2.006080e-16 2.006080e-16 2.078939e-16 2.078939e-16 2.358933e-16
576 2.328841e-16 2.187861e-16 2.187861e-16 2.277219e-16 2.277219e-16 2.660536e-16
648 2.279861e-16 2.741809e-16 2.741809e-16 2.765692e-16 2.765692e-16 2.812500e-16
729 2.397941e-16 2.745695e-16 2.745695e-16 2.776495e-16 2.776495e-16 2.685338e-16
768 2.217792e-16 2.050935e-16 2.050935e-16 2.112474e-16 2.112474e-16 2.549967e-16
864 2.392887e-16 2.464955e-16 2.464955e-16 2.396781e-16 2.396781e-16 2.845579e-16
972 2.437752e-16 2.969209e-16 2.969209e-16 2.948694e-16 2.948694e-16 2.879844e-16
1024 2.134838e-16 2.192733e-16 2.192733e-16 2.213998e-16 2.213998e-16 2.892022e-16
1152 2.344243e-16 2.362071e-16 2.362071e-16 2.263669e-16 2.263669e-16 3.024710e-16
1296 2.415099e-16 3.039216e-16 3.039216e-16 3.007685e-16 3.007685e-16 2.961018e-16
1458 2.618731e-16 3.227033e-16 3.227033e-16 3.204716e-16 3.204716e-16 3.092983e-16
1536 2.268523e-16 2.204308e-16 2.204308e-16 2.229394e-16 2.229394e-16 3.047860e-16
1728 2.469143e-16 2.590953e-16 2.590953e-16 2.587161e-16 2.587161e-16 3.184378e-16
1944 2.645351e-16 3.034015e-16 3.034015e-16 3.052625e-16 3.052625e-16 3.385453e-16
2048 2.262977e-16 2.262601e-16 2.262601e-16 2.295038e-16 2.295038e-16 3.141678e-16
2187 2.805185e-16 3.080530e-16 3.080530e-16 3.109135e-16 3.109135e-16 3.259160e-16
2304 2.510036e-16 2.384915e-16 2.384915e-16 2.450032e-16 2.450032e-16 3.448086e-16
2592 2.577191e-16 2.855486e-16 2.855486e-16 2.833299e-16 2.833299e-16 3.582828e-16
2916 2.718885e-16 3.276193e-16 3.276193e-16 3.252465e-16 3.252465e-16 3.454902e-16
3072 2.375548e-16 2.306371e-16 2.306371e-16 2.240963e-16 2.240963e-16 3.153804e-16
3456 2.513195e-16 2.715907e-16 2.715907e-16 2.570280e-16 2.570280e-16 3.364389e-16
3888 2.780577e-16 3.033602e-16 3.033602e-16 3.142853e-16 3.142853e-16 3.734568e-16
4096 2.407233e-16 2.445722e-16 2.445722e-16 2.416350e-16 2.416350e-16 5.001043e-16
4374 2.844914e-16 3.535308e-16 3.535308e-16 3.357152e-16 3.357152e-16 4.219991e-16
4608 2.704864e-16 2.544678e-16 2.544678e-16 2.450729e-16 2.450729e-16 3.487418e-16
5184 2.716405e-16 2.914734e-16 2.914734e-16 3.059825e-16 3.059825e-16 3.633577e-16
5832 2.793983e-16 3.411985e-16 3.411985e-16 3.195383e-16 3.195383e-16 3.389887e-16
6144 2.587074e-16 2.417811e-16 2.417811e-16 2.322334e-16 2.322334e-16 3.255483e-16
6561 2.957650e-16 3.294060e-16 3.294060e-16 3.317424e-16 3.317424e-16 3.745007e-16
6912 2.754060e-16 2.746749e-16 2.746749e-16 2.785481e-16 2.785481e-16 3.569971e-16
7776 2.849944e-16 2.955296e-16 2.955296e-16 2.915170e-16 2.915170e-16 3.987939e-16
8192 2.674059e-16 2.445088e-16 2.445088e-16 2.512990e-16 2.512990e-16 3.306980e-16
8748 2.857597e-16 3.321091e-16 3.321091e-16 3.312670e-16 3.312670e-16 4.658386e-16
9216 2.744919e-16 2.505807e-16 2.505807e-16 2.524278e-16 2.524278e-16 3.408578e-16
10368 2.771470e-16 2.939128e-16 2.939128e-16 2.921326e-16 2.921326e-16 3.594668e-16
11664 2.900195e-16 3.325539e-16 3.325539e-16 3.322498e-16 3.322498e-16 3.400178e-16
12288 2.663610e-16 2.410619e-16 2.410619e-16 2.424417e-16 2.424417e-16 4.097729e-16
13122 3.118571e-16 3.684103e-16 3.684103e-16 3.667145e-16 3.667145e-16 4.058685e-16
13824 2.864866e-16 2.664835e-16 2.664835e-16 2.689970e-16 2.689970e-16 3.573930e-16
15552 2.927195e-16 3.165552e-16 3.165552e-16 3.163312e-16 3.163312e-16 3.997006e-16
16384 2.734085e-16 2.520036e-16 2.520036e-16 2.592869e-16 2.592869e-16 3.431413e-16
17496 3.097311e-16 3.696773e-16 3.696773e-16 3.694079e-16 3.694079e-16 4.567620e-16
18432 2.838084e-16 2.566817e-16 2.566817e-16 2.591014e-16 2.591014e-16 3.491507e-16
19683 3.225921e-16 3.571714e-16 3.571714e-16 3.576653e-16 3.576653e-16 4.474667e-16
20736 2.907632e-16 3.041692e-16 3.041692e-16 3.044346e-16 3.044346e-16 4.007057e-16
23328 3.018126e-16 3.562740e-16 3.562740e-16 3.559010e-16 3.559010e-16 3.700049e-16
24576 2.801902e-16 2.504164e-16 2.504164e-16 2.522531e-16 2.522531e-16 4.033854e-16
26244 3.163100e-16 4.030064e-16 4.030064e-16 4.007280e-16 4.007280e-16 4.392310e-16
27648 2.965545e-16 2.900228e-16 2.900228e-16 2.904187e-16 2.904187e-16 3.614601e-16
31104 2.963796e-16 3.366356e-16 3.366356e-16 3.349188e-16 3.349188e-16 3.981820e-16
32768 2.804352e-16 2.754293e-16 2.754293e-16 2.791107e-16 2.791107e-16 3.668297e-16
34992 3.184266e-16 4.035335e-16 4.035335e-16 4.032879e-16 4.032879e-16 4.843679e-16
36864 2.919735e-16 2.755381e-16 2.755381e-16 2.750201e-16 2.750201e-16 4.280307e-16
39366 3.327346e-16 3.960490e-16 3.960490e-16 3.962953e-16 3.962953e-16 4.883068e-16
41472 3.021585e-16 3.357164e-16 3.357164e-16 3.377360e-16 3.377360e-16 3.986021e-16
46656 3.150570e-16 4.027811e-16 4.027811e-16 4.046402e-16 4.046402e-16 3.781815e-16
49152 2.894413e-16 2.618178e-16 2.618178e-16 2.639423e-16 2.639423e-16 4.046840e-16
52488 3.231218e-16 3.973656e-16 3.973656e-16 3.978064e-16 3.978064e-16 4.343594e-16
55296 3.008292e-16 3.013918e-16 3.013918e-16 3.020041e-16 3.020041e-16 3.712652e-16
59049 3.382884e-16 3.927921e-16 3.927921e-16 3.907209e-16 3.907209e-16 4.450151e-16
62208 3.181738e-16 3.016649e-16 3.016649e-16 3.001105e-16 3.001105e-16 4.324270e-16
65536 2.906882e-16 2.838052e-16 2.838052e-16 2.897250e-16 2.897250e-16 4.082329e-16
69984 3.277844e-16 3.545990e-16 3.545990e-16 3.536523e-16 3.536523e-16 5.056228e-16
73728 3.095879e-16 2.748330e-16 2.748330e-16 2.754066e-16 2.754066e-16 4.172611e-16
78732 3.318011e-16 3.939013e-16 3.939013e-16 3.928623e-16 3.928623e-16 5.219038e-16
82944 3.143504e-16 3.188615e-16 3.188615e-16 3.186202e-16 3.186202e-16 3.957595e-16
93312 3.185616e-16 3.584346e-16 3.584346e-16 3.580152e-16 3.580152e-16 3.815659e-16
98304 3.006042e-16 2.701318e-16 2.701318e-16 2.722451e-16 2.722451e-16 4.328090e-16
104976 3.304492e-16 3.974154e-16 3.974154e-16 3.975181e-16 3.975181e-16 4.638966e-16
110592 3.070421e-16 3.068377e-16 3.068377e-16 3.048006e-16 3.048006e-16 4.357884e-16
118098 3.558113e-16 4.234892e-16 4.234892e-16 4.216279e-16 4.216279e-16 4.775853e-16
124416 3.258323e-16 3.449687e-16 3.449687e-16 3.451990e-16 3.451990e-16 4.284158e-16
131072 2.993093e-16 2.998442e-16 2.998442e-16 3.004132e-16 3.004132e-16 4.158093e-16
139968 3.335338e-16 3.809354e-16 3.809354e-16 3.807334e-16 3.807334e-16 5.071593e-16
147456 3.222761e-16 2.946617e-16 2.946617e-16 2.960588e-16 2.960588e-16 4.198297e-16
157464 3.495228e-16 4.260938e-16 4.260938e-16 4.262866e-16 4.262866e-16 5.188830e-16
165888 3.242618e-16 3.282844e-16 3.282844e-16 3.283160e-16 3.283160e-16 4.091494e-16
177147 3.665945e-16 4.186164e-16 4.186164e-16 4.173338e-16 4.173338e-16 5.334907e-16
186624 3.299107e-16 3.633019e-16 3.633019e-16 3.630145e-16 3.630145e-16 4.071148e-16
196608 3.112031e-16 2.856495e-16 2.856495e-16 2.877122e-16 2.877122e-16 4.781679e-16
209952 3.407887e-16 4.094482e-16 4.094482e-16 4.089924e-16 4.089924e-16 4.856920e-16
221184 3.273546e-16 3.042240e-16 3.042240e-16 3.045988e-16 3.045988e-16 4.278760e-16
236196 3.594414e-16 4.542307e-16 4.542307e-16 4.561372e-16 4.561372e-16 4.968436e-16
248832 3.348508e-16 3.354448e-16 3.354448e-16 3.355906e-16 3.355906e-16 4.245576e-16
262144 3.192889e-16 3.152152e-16 3.152152e-16 3.203700e-16 3.203700e-16 4.084377e-16
[9]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("1D FFT L2 error (double precision, radix-2,3) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (double precision, radix-2,3) - " + device_name)
plt.xlabel("N", loc='right')
plt.grid(True)
plt.tight_layout()
2D, single precision
[10]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, (nmax, nmax)) + 1j * np.random.uniform(-0.5, 0.5, (nmax, nmax))
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)
def accu_2d(n, fft_dic):
rld = fftwn(d0ld[:n,:n].copy())
res = {}
for k,v in fft_dic.items():
r = v(d0s[:n,:n].copy())
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
vn, vl1, vl2, vli = [], {}, {}, {}
s = "%7s %14s"%("N", "fftw ")
r = accu_2d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %14s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) <= 3:
r = accu_2d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %14e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
8 8.527774e-08 7.266479e-08 7.266479e-08 7.266479e-08 7.266479e-08 9.498406e-08
9 9.335190e-08 9.891788e-08 9.891788e-08 8.961366e-08 8.961366e-08 1.040164e-07
12 8.536343e-08 9.431391e-08 9.431391e-08 9.556699e-08 9.556699e-08 8.830248e-08
16 9.928524e-08 1.328003e-07 9.199517e-08 1.366377e-07 9.277042e-08 1.076327e-07
18 1.134702e-07 2.237842e-07 1.165017e-07 2.278635e-07 1.159992e-07 1.362995e-07
24 1.051133e-07 1.626263e-07 9.872837e-08 1.622961e-07 1.008451e-07 1.257961e-07
27 1.276222e-07 3.197227e-07 1.178340e-07 3.181370e-07 1.196095e-07 1.366535e-07
32 1.088421e-07 1.656500e-07 1.171188e-07 1.665413e-07 1.161752e-07 2.327413e-07
36 1.249738e-07 3.031020e-07 1.241648e-07 3.027040e-07 1.232112e-07 1.426325e-07
48 1.121796e-07 2.344185e-07 1.176478e-07 2.330087e-07 1.186958e-07 1.593699e-07
54 1.371896e-07 3.735055e-07 1.342523e-07 3.733109e-07 1.363872e-07 2.123211e-07
64 1.203335e-07 2.423184e-07 1.319832e-07 2.416615e-07 1.329602e-07 1.844784e-07
72 1.246935e-07 2.994335e-07 1.296193e-07 2.986124e-07 1.299276e-07 1.639534e-07
81 1.449360e-07 3.778978e-07 1.441958e-07 3.779404e-07 1.426087e-07 1.755133e-07
96 1.334507e-07 3.268282e-07 1.336894e-07 3.279344e-07 1.323218e-07 2.305902e-07
108 1.471534e-07 3.744680e-07 1.383366e-07 3.765097e-07 1.367569e-07 2.450587e-07
128 1.312548e-07 3.073083e-07 1.431903e-07 3.049531e-07 1.436841e-07 2.448643e-07
144 1.571986e-07 3.033074e-07 1.366514e-07 3.035363e-07 1.382916e-07 2.496395e-07
162 1.557316e-07 5.001706e-07 1.547347e-07 5.022002e-07 1.538532e-07 1.839227e-07
192 1.531230e-07 4.017444e-07 1.449496e-07 4.019279e-07 1.447932e-07 3.175503e-07
216 1.507109e-07 5.479016e-07 1.589944e-07 5.472586e-07 1.589780e-07 1.960320e-07
243 1.655783e-07 4.816184e-07 1.656449e-07 4.808104e-07 1.656831e-07 2.152531e-07
256 1.509464e-07 3.599852e-07 1.533763e-07 3.576926e-07 1.548415e-07 2.779536e-07
288 1.786177e-07 4.199985e-07 1.484688e-07 4.204004e-07 1.479948e-07 2.346000e-07
324 1.647224e-07 5.043536e-07 1.585540e-07 5.038923e-07 1.581418e-07 2.490004e-07
384 1.623741e-07 4.828982e-07 1.569511e-07 4.826630e-07 1.570796e-07 2.847407e-07
432 1.740673e-07 4.695336e-07 1.578176e-07 4.695530e-07 1.575228e-07 2.996553e-07
486 1.778964e-07 6.158958e-07 1.759388e-07 6.158558e-07 1.754382e-07 2.078483e-07
512 1.617633e-07 4.453922e-07 1.668406e-07 4.446452e-07 1.680485e-07 4.012341e-07
[11]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("2D FFT errors (single precision, radix-2,3) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("2D FFT L2 error (single precision, radix-2,3) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()
1D, non-radix (Bluestein or Rader) transforms, single precision
[12]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)
def accu_1d(n, fft_dic):
rld = fftwn(d0ld[:n])
res = {}
for k,v in fft_dic.items():
r = v(d0s[:n])
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
fft_dic = {"fftw": fftwn}
if has_pycuda:
fft_dic["vkfft-cuda"] = fftnvcu
fft_dic["vkfft-cuda-LUT"] = fftnvculut
fft_dic["cufft"] = fftncu
if has_pyopencl:
fft_dic["vkfft-opencl"] = fftnvcl
fft_dic["vkfft-opencl-LUT"] = fftnvcllut
# print(accu_1d(16, fft_dic))
vn, vl1, vl2, vli = [], {}, {}, {}
#print("%7s %12s %12s %12s %12s"%("N", "vkfft ", "vkfft-LUT ", "cufft ", "fftw "))
s = "%7s %16s"%("N", "fftw ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %16s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) >13: # test only transforms with non-radix sizes
r = accu_1d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %16e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
17 6.254615e-08 1.015721e-07 1.102006e-07 1.102006e-07 1.102006e-07 7.038351e-08
19 8.248360e-08 1.645417e-07 1.248262e-07 1.105743e-07 1.105743e-07 7.713461e-08
23 9.364838e-08 2.120295e-07 1.265788e-07 1.307243e-07 1.307243e-07 9.525661e-08
29 9.151393e-08 1.697528e-07 1.473385e-07 1.526987e-07 1.526987e-07 6.975211e-08
31 9.476318e-08 2.802384e-07 1.140413e-07 1.201944e-07 1.201944e-07 7.906383e-08
34 7.735947e-08 1.776248e-07 1.045113e-07 1.034587e-07 1.034587e-07 8.285427e-08
37 1.484575e-07 3.000786e-07 1.426029e-07 1.731795e-07 1.731795e-07 9.075627e-08
38 9.740193e-08 2.774460e-07 1.702731e-07 1.763272e-07 1.763272e-07 1.093944e-07
41 1.353334e-07 3.260490e-07 1.226760e-07 1.258329e-07 1.258329e-07 9.509407e-08
43 9.872378e-08 2.399786e-07 1.835915e-07 1.852351e-07 1.852351e-07 1.046902e-07
46 8.949255e-08 2.700867e-07 1.125207e-07 1.034681e-07 1.034681e-07 1.174656e-07
47 1.830142e-07 1.198861e-07 1.198861e-07 1.198861e-07 1.198861e-07 1.057758e-07
51 8.422249e-08 2.138944e-07 1.312271e-07 1.413012e-07 1.413012e-07 9.646047e-08
53 2.024566e-07 2.477728e-07 1.450103e-07 1.570705e-07 1.570705e-07 1.102411e-07
57 9.803056e-08 3.179224e-07 1.579285e-07 1.649120e-07 1.649120e-07 9.402163e-08
58 9.713275e-08 2.461764e-07 1.489107e-07 1.607300e-07 1.607300e-07 1.089946e-07
59 1.950429e-07 1.029824e-07 1.029824e-07 1.029824e-07 1.029824e-07 1.092878e-07
61 1.497549e-07 2.960745e-07 1.713226e-07 1.514671e-07 1.514671e-07 1.104706e-07
62 1.136793e-07 2.791304e-07 1.393296e-07 1.469882e-07 1.469882e-07 1.049168e-07
67 2.155272e-07 3.261446e-07 1.541285e-07 1.611212e-07 1.611212e-07 1.426873e-07
68 9.695182e-08 2.457007e-07 1.163033e-07 1.161389e-07 1.161389e-07 1.830814e-07
69 9.709617e-08 3.187640e-07 1.400185e-07 1.366575e-07 1.366575e-07 1.235348e-07
71 2.174195e-07 2.688835e-07 2.061562e-07 1.606750e-07 1.606750e-07 1.163606e-07
73 1.692324e-07 3.746569e-07 1.741872e-07 1.690175e-07 1.690175e-07 1.162877e-07
74 1.575586e-07 3.353020e-07 1.659695e-07 1.662891e-07 1.662891e-07 1.084640e-07
76 9.244200e-08 2.818918e-07 1.555008e-07 1.548209e-07 1.548209e-07 1.972896e-07
79 1.975269e-07 2.576685e-07 1.454759e-07 1.567628e-07 1.567628e-07 1.084098e-07
82 1.635243e-07 3.941707e-07 1.374833e-07 1.506632e-07 1.506632e-07 1.209537e-07
83 1.789268e-07 1.317153e-07 1.317153e-07 1.317153e-07 1.317153e-07 1.263885e-07
85 1.095898e-07 2.689680e-07 1.206128e-07 1.350287e-07 1.350287e-07 1.750085e-07
86 1.076581e-07 2.863250e-07 1.877223e-07 1.826287e-07 1.826287e-07 1.247761e-07
87 1.133036e-07 2.994499e-07 1.791361e-07 1.937553e-07 1.937553e-07 1.125409e-07
89 1.967461e-07 4.042257e-07 1.658388e-07 1.578106e-07 1.578106e-07 1.361586e-07
92 1.008463e-07 3.297765e-07 1.286483e-07 1.372108e-07 1.372108e-07 2.697618e-07
93 1.062135e-07 2.926745e-07 1.844804e-07 1.793787e-07 1.793787e-07 1.180430e-07
94 1.852338e-07 1.138912e-07 1.138912e-07 1.137691e-07 1.137691e-07 1.217035e-07
95 9.282513e-08 3.283934e-07 1.558627e-07 1.430664e-07 1.430664e-07 1.145523e-07
97 1.614399e-07 3.290774e-07 1.905957e-07 1.978080e-07 1.978080e-07 1.307875e-07
101 1.732766e-07 3.416956e-07 1.809738e-07 1.770801e-07 1.770801e-07 1.239909e-07
102 1.060491e-07 2.507643e-07 1.439354e-07 1.360755e-07 1.360755e-07 1.296681e-07
103 1.856866e-07 3.637793e-07 1.877914e-07 3.538714e-07 1.845640e-07 1.289173e-07
106 2.004430e-07 3.015556e-07 1.506545e-07 1.355621e-07 1.355621e-07 1.373634e-07
107 1.931322e-07 3.856692e-07 1.847519e-07 3.872953e-07 1.884483e-07 1.281276e-07
109 2.000102e-07 3.536153e-07 1.780016e-07 1.988556e-07 1.988556e-07 1.377508e-07
111 1.664198e-07 3.192866e-07 1.421667e-07 1.502838e-07 1.502838e-07 1.311545e-07
113 2.054878e-07 3.076369e-07 1.966113e-07 2.046859e-07 2.046859e-07 1.374936e-07
114 1.019928e-07 3.318454e-07 1.351128e-07 1.431855e-07 1.431855e-07 1.839490e-07
115 1.023938e-07 3.135284e-07 1.434199e-07 1.309437e-07 1.309437e-07 1.140271e-07
116 1.092353e-07 3.303176e-07 1.723933e-07 1.752890e-07 1.752890e-07 1.411988e-07
118 1.852870e-07 1.243774e-07 1.243774e-07 1.250687e-07 1.250687e-07 1.553505e-07
119 1.104477e-07 2.485960e-07 1.557393e-07 1.515781e-07 1.515781e-07 1.075155e-07
122 1.581833e-07 3.233978e-07 1.773808e-07 1.685501e-07 1.685501e-07 1.341923e-07
123 1.409362e-07 3.653380e-07 1.448166e-07 1.379628e-07 1.379628e-07 1.157736e-07
124 1.003285e-07 2.969741e-07 1.668390e-07 1.808389e-07 1.808389e-07 1.165339e-07
127 2.089652e-07 3.815689e-07 2.064010e-07 1.999192e-07 1.999192e-07 1.465482e-07
129 1.193924e-07 3.597178e-07 1.871710e-07 1.774803e-07 1.774803e-07 1.274098e-07
131 2.081924e-07 3.716765e-07 1.918856e-07 1.823460e-07 1.823460e-07 1.013079e+00
133 1.018254e-07 2.984598e-07 1.557671e-07 1.501836e-07 1.501836e-07 2.170771e-07
134 2.099063e-07 3.904764e-07 1.645518e-07 1.702173e-07 1.702173e-07 1.470246e-07
136 1.098400e-07 3.201838e-07 1.493731e-07 1.498841e-07 1.498841e-07 2.141142e-07
137 2.387834e-07 4.598919e-07 1.623450e-07 4.394354e-07 1.701358e-07 2.027433e-07
138 1.095211e-07 3.408621e-07 1.376716e-07 1.298727e-07 1.298727e-07 2.245828e-07
139 2.280636e-07 4.499628e-07 1.600395e-07 4.485685e-07 1.711647e-07 2.062495e-07
141 1.990999e-07 1.150641e-07 1.150641e-07 1.123040e-07 1.123040e-07 1.204941e-07
142 2.047809e-07 3.131161e-07 1.897272e-07 1.899545e-07 1.899545e-07 1.339982e-07
145 1.150806e-07 2.897775e-07 1.821280e-07 1.971955e-07 1.971955e-07 1.101078e-07
146 1.659277e-07 4.249103e-07 1.950035e-07 1.772116e-07 1.772116e-07 1.546777e-07
148 1.579387e-07 3.514651e-07 1.569699e-07 1.581785e-07 1.581785e-07 1.166514e-07
149 2.219864e-07 4.673630e-07 1.809679e-07 4.716310e-07 1.858944e-07 2.225737e-07
151 1.971656e-07 2.967276e-07 1.910044e-07 1.835607e-07 1.835607e-07 2.252467e-07
152 1.059442e-07 2.693980e-07 1.450913e-07 1.568306e-07 1.568306e-07 1.783349e-07
153 1.025081e-07 2.127526e-07 1.397033e-07 1.368405e-07 1.368405e-07 2.066911e-07
155 1.186708e-07 3.282051e-07 1.736079e-07 1.717431e-07 1.717431e-07 1.108088e-07
157 1.967858e-07 3.286034e-07 1.837128e-07 1.744382e-07 1.744382e-07 2.352349e-07
158 2.035188e-07 3.164353e-07 1.684070e-07 1.764726e-07 1.764726e-07 1.574034e-07
159 1.794777e-07 3.200739e-07 1.532787e-07 1.430142e-07 1.430142e-07 1.335363e-07
161 1.067734e-07 3.343597e-07 1.435854e-07 1.424633e-07 1.424633e-07 1.090317e-07
163 2.089208e-07 4.299696e-07 1.937606e-07 1.967131e-07 1.967131e-07 2.044107e-07
164 1.500700e-07 3.919023e-07 1.551416e-07 1.463539e-07 1.463539e-07 1.604627e-07
166 2.001572e-07 1.282135e-07 1.282135e-07 1.276195e-07 1.276195e-07 1.356593e-07
167 2.106085e-07 4.942549e-07 1.732278e-07 4.825893e-07 1.650029e-07 2.329364e-07
170 1.135286e-07 2.669009e-07 1.454533e-07 1.388323e-07 1.388323e-07 2.498998e-07
171 1.169343e-07 3.053043e-07 1.715374e-07 1.667814e-07 1.667814e-07 1.978128e-07
172 1.280455e-07 3.232796e-07 1.898046e-07 1.929596e-07 1.929596e-07 1.323492e-07
173 2.063084e-07 4.856868e-07 1.806599e-07 4.943003e-07 1.886242e-07 2.237001e-07
174 1.123379e-07 3.299866e-07 1.801550e-07 1.921437e-07 1.921437e-07 1.367182e-07
177 1.927455e-07 1.320034e-07 1.320034e-07 1.339027e-07 1.339027e-07 1.582679e-07
178 2.105625e-07 4.347624e-07 1.596042e-07 1.638857e-07 1.638857e-07 1.524142e-07
179 2.231056e-07 4.764158e-07 1.842309e-07 4.880476e-07 1.847664e-07 2.154776e-07
181 1.929199e-07 3.110166e-07 1.871079e-07 1.806618e-07 1.806618e-07 2.252474e-07
183 1.564172e-07 3.317619e-07 1.709997e-07 1.810320e-07 1.810320e-07 1.434001e-07
184 1.085857e-07 3.120010e-07 1.317914e-07 1.359868e-07 1.359868e-07 2.275638e-07
185 1.555445e-07 3.967476e-07 1.645497e-07 1.640616e-07 1.640616e-07 1.782655e-07
186 1.067958e-07 3.180343e-07 1.788461e-07 1.658559e-07 1.658559e-07 1.543705e-07
187 1.159999e-07 2.607448e-07 1.385812e-07 1.380326e-07 1.380326e-07 1.186911e-07
188 1.916176e-07 1.122158e-07 1.122158e-07 1.089307e-07 1.089307e-07 1.255861e-07
190 1.276445e-07 3.446052e-07 1.479991e-07 1.594947e-07 1.594947e-07 2.638597e-07
191 2.153538e-07 4.911162e-07 1.867347e-07 4.815208e-07 1.804571e-07 2.182134e-07
193 1.891501e-07 3.513702e-07 1.766511e-07 1.818459e-07 1.818459e-07 2.350272e-07
194 1.623496e-07 3.814176e-07 1.905166e-07 1.879265e-07 1.879265e-07 1.634499e-07
197 2.112167e-07 3.561558e-07 2.226806e-07 2.097722e-07 2.097722e-07 2.386824e-07
199 2.132325e-07 4.444957e-07 1.875646e-07 1.922090e-07 1.922090e-07 2.273699e-07
201 1.991247e-07 4.068708e-07 1.676640e-07 1.622211e-07 1.622211e-07 1.617963e-07
202 1.716199e-07 3.756492e-07 1.874411e-07 1.744065e-07 1.744065e-07 1.608291e-07
203 1.182969e-07 2.946635e-07 1.887379e-07 1.885407e-07 1.885407e-07 1.309971e-07
204 1.141323e-07 2.440876e-07 1.583997e-07 1.529069e-07 1.529069e-07 2.074945e-07
205 1.704316e-07 4.050686e-07 1.540238e-07 1.544637e-07 1.544637e-07 1.159754e-07
206 1.986491e-07 4.488049e-07 2.115694e-07 4.661837e-07 2.150825e-07 1.511929e-07
207 1.188873e-07 3.344837e-07 1.373426e-07 1.371272e-07 1.371272e-07 1.346638e-07
209 1.146278e-07 3.246925e-07 1.530359e-07 1.487966e-07 1.487966e-07 2.378698e-07
211 2.209989e-07 3.128723e-07 2.008453e-07 1.996888e-07 1.996888e-07 2.443957e-07
212 1.850437e-07 3.224179e-07 1.433081e-07 1.363619e-07 1.363619e-07 1.545950e-07
213 1.963910e-07 3.433657e-07 1.949126e-07 1.946143e-07 1.946143e-07 1.380735e-07
214 2.055205e-07 4.556934e-07 2.114241e-07 4.605312e-07 1.992919e-07 1.539524e-07
215 1.228424e-07 3.465688e-07 1.926393e-07 1.734769e-07 1.734769e-07 1.383893e-07
217 1.149121e-07 3.645797e-07 1.747176e-07 1.663483e-07 1.663483e-07 1.487485e-07
218 1.923187e-07 3.720207e-07 1.798481e-07 1.793363e-07 1.793363e-07 1.591430e-07
219 1.627610e-07 4.714273e-07 1.901704e-07 1.850918e-07 1.850918e-07 1.640938e-07
221 1.161102e-07 2.267586e-07 1.315326e-07 1.354092e-07 1.354092e-07 1.993351e-07
222 1.634625e-07 3.810690e-07 1.619523e-07 1.570721e-07 1.570721e-07 1.739043e-07
223 2.094943e-07 4.385517e-07 2.088867e-07 4.470930e-07 2.010502e-07 2.553330e-07
226 2.219911e-07 3.661037e-07 1.895713e-07 1.964298e-07 1.964298e-07 1.521751e-07
227 2.091631e-07 4.503239e-07 2.074894e-07 4.400035e-07 2.137942e-07 2.370926e-07
228 1.113769e-07 3.727095e-07 1.734228e-07 1.639969e-07 1.639969e-07 2.000458e-07
229 2.132539e-07 4.377515e-07 2.098218e-07 4.455166e-07 2.106596e-07 2.454666e-07
230 1.157781e-07 3.321521e-07 1.455432e-07 1.372214e-07 1.372214e-07 1.690261e-07
232 1.041392e-07 3.557029e-07 1.749468e-07 1.791588e-07 1.791588e-07 3.322553e-07
233 2.118066e-07 4.420936e-07 2.168508e-07 4.429969e-07 2.162505e-07 2.370328e-07
235 1.889062e-07 1.194290e-07 1.194290e-07 1.201400e-07 1.201400e-07 1.340496e-07
236 1.997082e-07 1.239299e-07 1.239299e-07 1.249085e-07 1.249085e-07 1.541500e-07
237 2.021329e-07 3.507753e-07 1.697484e-07 1.683454e-07 1.683454e-07 1.727904e-07
238 1.200816e-07 2.791008e-07 1.499446e-07 1.449852e-07 1.449852e-07 1.298035e-07
239 2.228663e-07 4.719928e-07 2.220813e-07 4.660047e-07 2.131480e-07 2.547764e-07
241 1.815284e-07 3.981928e-07 2.010071e-07 1.983743e-07 1.983743e-07 2.420173e-07
244 1.628528e-07 3.164522e-07 1.765841e-07 1.774213e-07 1.774213e-07 1.397179e-07
246 1.588252e-07 3.849659e-07 1.429126e-07 1.416401e-07 1.416401e-07 1.580901e-07
247 1.253356e-07 3.067324e-07 1.490375e-07 1.448856e-07 1.448856e-07 1.881051e-07
248 1.093901e-07 3.226233e-07 1.759849e-07 1.597223e-07 1.597223e-07 3.386437e-07
249 1.961131e-07 1.312664e-07 1.312664e-07 1.334024e-07 1.334024e-07 1.520260e-07
251 2.078468e-07 4.599977e-07 2.041316e-07 1.966544e-07 1.966544e-07 2.643667e-07
253 1.138023e-07 3.518098e-07 1.385994e-07 1.364449e-07 1.364449e-07 2.909594e-07
254 2.137445e-07 3.606242e-07 2.057417e-07 2.044628e-07 2.044628e-07 1.634830e-07
255 1.169972e-07 2.841816e-07 1.387739e-07 1.383085e-07 1.383085e-07 2.405275e-07
257 1.860466e-07 3.666547e-07 2.074898e-07 2.182892e-07 2.182892e-07 3.579960e-07
258 1.150828e-07 3.576258e-07 1.924915e-07 1.881525e-07 1.881525e-07 1.438081e-07
259 1.705815e-07 3.788998e-07 1.709033e-07 1.658286e-07 1.658286e-07 1.358027e-07
261 1.212339e-07 3.061716e-07 1.912142e-07 1.919352e-07 1.919352e-07 1.249266e-07
262 2.153069e-07 3.803537e-07 1.853768e-07 1.859536e-07 1.859536e-07 3.763236e-07
263 2.213543e-07 4.796229e-07 2.403416e-07 4.603458e-07 2.338935e-07 3.783692e-07
265 2.019336e-07 3.566799e-07 1.516975e-07 1.531708e-07 1.531708e-07 1.309428e-07
266 1.182956e-07 3.383422e-07 1.641217e-07 1.645517e-07 1.645517e-07 1.556428e-07
267 2.033083e-07 5.448238e-07 1.674888e-07 1.708365e-07 1.708365e-07 1.648607e-07
268 2.015187e-07 4.221798e-07 1.666308e-07 1.565711e-07 1.565711e-07 1.343170e-07
269 2.062111e-07 5.254998e-07 2.387887e-07 5.274504e-07 2.406694e-07 3.798024e-07
271 1.977533e-07 4.726542e-07 1.972593e-07 1.959547e-07 1.959547e-07 3.796388e-07
272 1.145908e-07 3.577410e-07 1.402941e-07 1.390949e-07 1.390949e-07 1.788681e-07
274 2.456598e-07 5.155301e-07 2.437044e-07 5.256583e-07 2.386551e-07 3.690020e-07
276 1.233398e-07 3.986606e-07 1.365727e-07 1.329284e-07 1.329284e-07 2.615891e-07
277 2.539705e-07 5.281953e-07 2.507689e-07 5.113510e-07 2.636458e-07 3.613178e-07
278 2.500034e-07 5.125767e-07 2.543965e-07 4.943646e-07 2.586662e-07 3.722843e-07
279 1.230800e-07 3.426067e-07 1.727915e-07 1.706629e-07 1.706629e-07 1.300775e-07
281 2.600788e-07 4.421222e-07 2.178430e-07 2.192520e-07 2.192520e-07 3.548260e-07
282 1.923835e-07 1.256465e-07 1.256465e-07 1.232156e-07 1.232156e-07 1.356765e-07
283 2.539360e-07 5.033807e-07 2.497082e-07 5.140324e-07 2.414126e-07 3.838028e-07
284 2.027077e-07 3.349201e-07 1.928199e-07 1.911002e-07 1.911002e-07 1.471158e-07
285 1.150815e-07 3.526146e-07 1.717077e-07 1.675323e-07 1.675323e-07 2.339297e-07
287 1.639109e-07 4.147068e-07 1.750788e-07 1.703897e-07 1.703897e-07 1.222591e-07
289 1.116158e-07 2.800092e-07 1.795379e-07 1.757745e-07 1.757745e-07 1.930739e-07
290 1.231251e-07 3.149545e-07 1.788850e-07 1.861414e-07 1.861414e-07 1.675623e-07
291 1.689888e-07 4.075763e-07 1.925678e-07 2.027442e-07 2.027442e-07 1.506466e-07
292 1.581902e-07 4.443587e-07 1.772448e-07 1.758394e-07 1.758394e-07 1.576791e-07
293 2.145222e-07 5.690740e-07 2.249014e-07 5.626118e-07 2.197821e-07 3.759567e-07
295 2.081281e-07 1.285269e-07 1.285269e-07 1.283674e-07 1.283674e-07 1.404634e-07
296 1.628766e-07 3.589461e-07 1.556122e-07 1.612424e-07 1.612424e-07 4.028750e-07
298 2.174732e-07 5.931939e-07 2.265769e-07 5.886718e-07 2.272150e-07 3.750535e-07
299 1.251625e-07 3.636882e-07 1.384727e-07 1.381702e-07 1.381702e-07 2.277146e-07
301 1.263431e-07 3.312216e-07 1.908233e-07 1.806836e-07 1.806836e-07 1.326826e-07
302 2.098849e-07 3.635306e-07 1.855339e-07 1.893575e-07 1.893575e-07 3.865412e-07
303 1.731870e-07 3.952747e-07 1.843101e-07 1.750927e-07 1.750927e-07 1.863328e-07
304 1.100630e-07 3.539772e-07 1.561596e-07 1.586423e-07 1.586423e-07 1.829102e-07
305 1.705174e-07 3.479583e-07 1.647859e-07 1.727334e-07 1.727334e-07 1.382707e-07
306 1.141977e-07 3.333392e-07 1.460105e-07 1.460203e-07 1.460203e-07 2.167725e-07
307 2.440031e-07 5.704432e-07 2.291278e-07 5.584193e-07 2.180736e-07 3.534893e-07
309 1.988677e-07 6.125582e-07 2.252990e-07 6.210531e-07 2.312589e-07 1.559475e-07
310 1.285205e-07 3.491847e-07 1.762986e-07 1.725355e-07 1.725355e-07 1.681429e-07
311 2.426690e-07 6.063725e-07 2.161904e-07 5.889187e-07 2.295903e-07 3.794806e-07
313 2.464821e-07 3.865626e-07 1.817521e-07 1.897758e-07 1.897758e-07 3.754765e-07
314 1.917747e-07 3.616533e-07 1.686394e-07 1.728908e-07 1.728908e-07 3.787588e-07
316 1.945554e-07 3.289450e-07 1.671818e-07 1.787079e-07 1.787079e-07 1.468917e-07
317 2.208842e-07 5.542473e-07 1.927134e-07 5.618234e-07 1.891404e-07 3.794081e-07
318 1.991046e-07 3.641487e-07 1.538132e-07 1.517044e-07 1.517044e-07 1.544278e-07
319 1.214983e-07 3.224470e-07 1.846269e-07 1.805655e-07 1.805655e-07 1.241928e-07
321 2.056992e-07 5.471295e-07 1.928324e-07 5.515586e-07 1.902652e-07 1.890142e-07
322 1.226038e-07 3.495409e-07 1.575273e-07 1.578879e-07 1.578879e-07 1.443990e-07
323 1.056029e-07 3.658308e-07 1.915645e-07 1.861628e-07 1.861628e-07 1.897334e-07
326 1.961685e-07 4.727551e-07 1.964517e-07 2.006391e-07 2.006391e-07 3.792286e-07
327 1.906094e-07 4.003689e-07 1.954245e-07 1.917423e-07 1.917423e-07 1.566012e-07
328 1.611068e-07 4.071518e-07 1.474960e-07 1.538363e-07 1.538363e-07 4.229538e-07
329 2.006236e-07 1.274322e-07 1.274322e-07 1.300759e-07 1.300759e-07 1.373526e-07
331 2.159058e-07 4.720389e-07 2.057458e-07 2.115960e-07 2.115960e-07 3.938261e-07
332 2.034817e-07 1.323201e-07 1.323201e-07 1.299925e-07 1.299925e-07 1.504214e-07
333 1.734878e-07 3.816761e-07 1.794666e-07 1.683401e-07 1.683401e-07 1.357561e-07
334 2.143846e-07 5.358465e-07 1.966540e-07 5.327061e-07 1.966666e-07 4.071275e-07
335 2.087981e-07 4.658408e-07 1.697511e-07 1.647695e-07 1.647695e-07 1.449444e-07
337 2.278881e-07 4.637756e-07 2.324619e-07 2.240918e-07 2.240918e-07 3.727136e-07
339 2.086462e-07 3.530390e-07 2.099672e-07 2.078547e-07 2.078547e-07 1.653213e-07
340 1.240866e-07 3.350963e-07 1.433581e-07 1.461282e-07 1.461282e-07 2.466206e-07
341 1.234526e-07 3.809751e-07 1.752402e-07 1.733505e-07 1.733505e-07 1.684949e-07
342 1.181279e-07 4.157423e-07 1.586675e-07 1.565297e-07 1.565297e-07 2.236661e-07
344 1.182284e-07 3.338805e-07 1.849533e-07 1.796248e-07 1.796248e-07 3.418745e-07
345 1.226118e-07 3.341517e-07 1.470779e-07 1.471805e-07 1.471805e-07 2.598271e-07
346 1.993998e-07 5.553338e-07 2.046332e-07 5.613075e-07 1.963778e-07 3.755884e-07
347 2.216287e-07 5.428358e-07 1.963689e-07 5.421260e-07 2.022821e-07 3.674438e-07
348 1.214825e-07 3.640408e-07 1.731811e-07 1.764132e-07 1.764132e-07 3.111197e-07
349 2.151976e-07 5.504587e-07 2.008980e-07 5.523555e-07 1.996018e-07 3.688477e-07
353 2.264505e-07 5.409662e-07 1.934584e-07 1.861936e-07 1.861936e-07 3.660872e-07
354 1.998465e-07 1.257226e-07 1.257226e-07 1.266527e-07 1.266527e-07 2.085274e-07
355 2.054707e-07 3.689250e-07 2.035096e-07 2.026951e-07 2.026951e-07 1.386130e-07
356 2.116076e-07 4.886805e-07 1.674753e-07 1.735857e-07 1.735857e-07 1.519563e-07
357 1.203586e-07 3.012444e-07 1.577486e-07 1.588690e-07 1.588690e-07 2.204874e-07
358 2.102375e-07 5.391917e-07 1.991985e-07 5.374192e-07 2.003993e-07 3.767501e-07
359 2.179083e-07 5.633718e-07 2.044090e-07 5.541445e-07 2.116728e-07 3.888299e-07
361 1.169275e-07 4.323907e-07 2.025631e-07 2.097343e-07 2.097343e-07 2.364930e-07
362 1.900972e-07 3.643446e-07 1.873354e-07 1.914500e-07 1.914500e-07 3.850253e-07
365 1.760859e-07 4.756318e-07 1.887352e-07 1.905305e-07 1.905305e-07 1.552590e-07
366 1.624002e-07 3.487381e-07 1.752577e-07 1.751299e-07 1.751299e-07 1.635652e-07
367 2.445333e-07 5.490395e-07 2.035403e-07 5.292860e-07 1.949966e-07 3.944209e-07
368 1.227206e-07 3.478803e-07 1.402666e-07 1.405892e-07 1.405892e-07 2.679055e-07
369 1.717121e-07 4.411172e-07 1.589857e-07 1.555127e-07 1.555127e-07 1.637991e-07
370 1.795894e-07 4.042884e-07 1.741971e-07 1.756213e-07 1.756213e-07 2.346114e-07
371 1.971203e-07 4.091323e-07 1.664423e-07 1.708177e-07 1.708177e-07 1.692075e-07
372 1.235428e-07 3.312909e-07 1.713933e-07 1.646320e-07 1.646320e-07 3.646265e-07
373 2.479865e-07 5.509212e-07 2.142649e-07 5.471139e-07 2.085351e-07 3.855961e-07
374 1.159354e-07 3.151714e-07 1.370980e-07 1.455959e-07 1.455959e-07 1.346691e-07
376 1.849804e-07 1.189638e-07 1.189638e-07 1.142045e-07 1.142045e-07 5.225659e-07
377 1.223947e-07 2.816012e-07 1.776371e-07 1.757985e-07 1.757985e-07 1.427971e-07
379 2.256487e-07 4.858061e-07 2.176086e-07 2.206778e-07 2.206778e-07 3.899603e-07
380 1.146674e-07 4.304451e-07 1.549537e-07 1.572528e-07 1.572528e-07 2.871437e-07
381 2.023679e-07 4.077683e-07 2.126635e-07 2.260456e-07 2.260456e-07 2.139849e-07
382 2.373684e-07 5.818699e-07 2.167913e-07 5.705790e-07 2.100067e-07 4.063626e-07
383 2.344242e-07 5.416479e-07 2.061691e-07 5.488713e-07 2.161354e-07 3.858209e-07
386 2.024901e-07 4.119942e-07 1.949455e-07 1.931482e-07 1.931482e-07 4.051680e-07
387 1.281457e-07 3.261754e-07 1.926199e-07 1.837260e-07 1.837260e-07 1.660771e-07
388 1.731449e-07 4.111774e-07 1.927799e-07 1.940468e-07 1.940468e-07 4.273809e-07
389 2.172676e-07 5.133044e-07 2.033750e-07 5.004174e-07 2.005164e-07 4.179661e-07
391 1.231344e-07 3.756860e-07 1.723851e-07 1.698571e-07 1.698571e-07 2.499926e-07
393 2.169994e-07 4.228823e-07 1.994616e-07 1.947048e-07 1.947048e-07 4.070797e-07
394 2.276188e-07 3.889847e-07 2.306473e-07 2.114615e-07 2.114615e-07 4.184138e-07
395 1.979007e-07 3.828043e-07 1.772292e-07 1.773488e-07 1.773488e-07 2.008334e-07
397 2.196821e-07 4.728493e-07 1.949282e-07 1.950308e-07 1.950308e-07 3.920595e-07
398 2.106061e-07 5.164568e-07 1.897553e-07 1.871118e-07 1.871118e-07 4.071116e-07
399 1.162972e-07 3.982255e-07 1.725317e-07 1.718150e-07 1.718150e-07 2.836280e-07
401 2.000908e-07 5.168932e-07 1.952368e-07 1.926290e-07 1.926290e-07 3.956637e-07
402 2.104636e-07 4.237690e-07 1.711537e-07 1.740200e-07 1.740200e-07 1.867341e-07
403 1.325543e-07 3.394525e-07 1.736128e-07 1.724151e-07 1.724151e-07 1.331267e-07
404 1.743760e-07 4.090944e-07 1.775001e-07 1.697448e-07 1.697448e-07 4.105280e-07
406 1.248159e-07 2.909439e-07 1.891626e-07 1.891914e-07 1.891914e-07 1.522211e-07
407 1.735658e-07 4.081548e-07 1.646497e-07 1.711953e-07 1.711953e-07 1.457375e-07
408 1.192708e-07 3.021921e-07 1.406938e-07 1.428666e-07 1.428666e-07 2.116826e-07
409 2.617243e-07 5.228763e-07 2.070993e-07 4.955519e-07 2.043311e-07 4.020891e-07
410 1.666759e-07 4.183887e-07 1.586831e-07 1.561989e-07 1.561989e-07 1.707850e-07
411 2.439922e-07 5.070495e-07 2.102991e-07 5.155817e-07 1.978480e-07 4.067824e-07
412 2.033801e-07 4.969668e-07 2.006337e-07 5.025138e-07 2.043601e-07 4.079360e-07
413 1.935025e-07 1.388785e-07 1.388785e-07 1.394107e-07 1.394107e-07 1.387839e-07
414 1.249509e-07 4.333397e-07 1.450005e-07 1.430800e-07 1.430800e-07 1.469408e-07
415 1.958207e-07 1.408202e-07 1.408202e-07 1.440556e-07 1.440556e-07 1.490856e-07
417 2.521934e-07 5.048626e-07 2.258710e-07 5.237951e-07 2.402501e-07 4.212701e-07
418 1.139443e-07 3.926634e-07 1.592880e-07 1.617844e-07 1.617844e-07 1.584389e-07
419 2.530188e-07 4.994945e-07 2.242183e-07 5.027571e-07 2.179493e-07 4.018248e-07
421 2.705975e-07 4.421037e-07 2.237606e-07 2.111102e-07 2.111102e-07 4.032416e-07
422 2.304052e-07 3.361239e-07 2.032661e-07 2.061795e-07 2.061795e-07 3.947762e-07
423 1.895341e-07 1.242365e-07 1.242365e-07 1.257242e-07 1.257242e-07 1.569439e-07
424 1.982191e-07 3.699608e-07 1.483311e-07 1.482508e-07 1.482508e-07 1.588054e-07
425 1.183367e-07 3.837843e-07 1.479654e-07 1.459986e-07 1.459986e-07 1.356949e-07
426 2.064544e-07 3.473485e-07 2.025404e-07 2.013154e-07 2.013154e-07 1.606851e-07
427 1.715502e-07 3.426199e-07 1.770080e-07 1.901854e-07 1.901854e-07 1.371926e-07
428 2.186324e-07 5.096769e-07 2.175050e-07 5.021566e-07 2.169037e-07 4.015141e-07
430 1.390314e-07 3.663572e-07 1.927799e-07 1.951004e-07 1.951004e-07 1.769368e-07
431 2.607433e-07 5.090831e-07 2.089387e-07 5.076196e-07 2.186953e-07 4.202045e-07
433 2.199477e-07 5.861007e-07 2.052414e-07 2.088900e-07 2.088900e-07 4.066604e-07
434 1.256917e-07 3.683642e-07 1.813943e-07 1.777079e-07 1.777079e-07 1.604449e-07
435 1.275254e-07 3.040839e-07 1.809527e-07 1.858254e-07 1.858254e-07 1.736110e-07
436 1.856051e-07 4.020655e-07 1.892757e-07 1.974237e-07 1.974237e-07 4.063403e-07
437 1.208273e-07 4.721079e-07 1.901402e-07 1.976879e-07 1.976879e-07 1.526453e-07
438 1.771345e-07 4.849630e-07 1.843541e-07 1.956542e-07 1.956542e-07 2.069160e-07
439 2.265181e-07 5.146082e-07 2.180214e-07 5.173231e-07 2.224487e-07 4.034112e-07
442 1.254011e-07 2.598154e-07 1.424844e-07 1.409673e-07 1.409673e-07 2.329137e-07
443 2.380930e-07 5.319910e-07 2.309411e-07 5.285052e-07 2.325624e-07 4.136064e-07
444 1.709316e-07 3.848045e-07 1.734499e-07 1.679520e-07 1.679520e-07 2.267555e-07
445 2.069760e-07 5.080970e-07 1.678781e-07 1.760085e-07 1.760085e-07 1.659490e-07
446 2.233213e-07 5.137934e-07 2.237258e-07 5.116345e-07 2.192639e-07 3.922768e-07
447 2.162510e-07 4.952950e-07 2.143213e-07 5.139247e-07 2.236551e-07 3.989452e-07
449 2.436216e-07 4.622922e-07 2.348649e-07 2.299427e-07 2.299427e-07 4.129329e-07
451 1.675976e-07 3.959555e-07 1.617838e-07 1.590233e-07 1.590233e-07 1.280170e-07
452 2.256701e-07 3.752432e-07 2.066080e-07 2.102768e-07 2.102768e-07 4.182558e-07
453 2.001570e-07 3.876731e-07 2.054536e-07 1.905133e-07 1.905133e-07 4.025112e-07
454 2.245039e-07 5.107622e-07 2.183724e-07 5.185578e-07 2.269084e-07 4.119558e-07
456 1.152630e-07 4.146577e-07 1.554956e-07 1.587821e-07 1.587821e-07 2.047558e-07
457 2.117934e-07 5.190966e-07 2.248397e-07 5.215551e-07 2.265207e-07 4.124146e-07
458 2.143870e-07 5.249751e-07 2.375757e-07 5.318775e-07 2.382534e-07 4.181211e-07
459 1.213021e-07 2.900938e-07 1.508174e-07 1.593493e-07 1.593493e-07 2.082768e-07
460 1.209660e-07 4.101683e-07 1.491910e-07 1.444748e-07 1.444748e-07 2.112227e-07
461 2.182038e-07 5.019176e-07 2.149358e-07 5.004436e-07 2.216786e-07 4.172298e-07
463 2.169663e-07 5.210963e-07 2.260456e-07 2.201856e-07 2.201856e-07 4.124268e-07
464 1.195802e-07 3.762592e-07 1.737731e-07 1.785273e-07 1.785273e-07 3.389835e-07
465 1.320007e-07 3.409968e-07 1.738816e-07 1.712088e-07 1.712088e-07 1.766731e-07
466 2.084511e-07 5.109973e-07 2.142472e-07 5.047934e-07 2.204741e-07 4.085223e-07
467 2.223129e-07 5.237200e-07 2.343470e-07 5.231124e-07 2.162369e-07 4.418545e-07
469 2.134477e-07 4.375466e-07 1.766562e-07 1.770066e-07 1.770066e-07 1.575051e-07
470 1.939910e-07 1.220688e-07 1.220688e-07 1.207220e-07 1.207220e-07 1.907693e-07
471 2.008678e-07 3.992884e-07 1.841018e-07 1.719335e-07 1.719335e-07 4.335573e-07
472 1.926134e-07 1.276455e-07 1.276455e-07 1.289261e-07 1.289261e-07 1.711876e-07
473 1.297388e-07 3.916882e-07 1.894324e-07 1.822792e-07 1.822792e-07 1.546751e-07
474 1.946602e-07 3.982798e-07 1.811392e-07 1.883797e-07 1.883797e-07 2.106717e-07
475 1.236633e-07 4.591152e-07 1.640005e-07 1.693252e-07 1.693252e-07 1.423350e-07
476 1.230369e-07 3.127627e-07 1.502968e-07 1.543293e-07 1.543293e-07 2.400958e-07
477 2.094209e-07 3.986784e-07 1.537925e-07 1.611200e-07 1.611200e-07 1.451774e-07
478 2.186375e-07 5.027230e-07 2.170199e-07 5.069745e-07 2.217227e-07 4.268714e-07
479 2.237072e-07 5.163222e-07 2.271152e-07 5.135320e-07 2.271299e-07 4.277150e-07
481 1.732225e-07 3.729516e-07 1.666305e-07 1.794548e-07 1.794548e-07 1.299093e-07
482 1.842885e-07 4.089928e-07 1.876269e-07 1.834506e-07 1.834506e-07 4.245059e-07
483 1.244360e-07 3.564028e-07 1.616629e-07 1.564488e-07 1.564488e-07 1.825908e-07
485 1.788327e-07 4.253253e-07 2.037094e-07 1.918958e-07 1.918958e-07 1.944200e-07
487 2.229994e-07 5.296223e-07 2.369284e-07 2.288944e-07 2.288944e-07 4.310897e-07
488 1.692659e-07 3.465961e-07 1.733630e-07 1.719035e-07 1.719035e-07 1.426343e-07
489 2.062031e-07 5.178350e-07 2.042306e-07 2.127927e-07 2.127927e-07 4.234923e-07
491 2.582021e-07 4.592675e-07 2.384360e-07 2.473744e-07 2.473744e-07 4.462432e-07
492 1.724304e-07 4.194741e-07 1.597444e-07 1.564075e-07 1.564075e-07 4.958317e-07
493 1.258842e-07 3.713533e-07 2.083126e-07 2.096025e-07 2.096025e-07 1.548449e-07
494 1.345958e-07 3.429842e-07 1.548295e-07 1.573919e-07 1.573919e-07 2.452533e-07
496 1.124039e-07 3.645980e-07 1.677611e-07 1.694262e-07 1.694262e-07 2.837890e-07
497 2.158677e-07 3.431011e-07 2.144308e-07 2.079147e-07 2.079147e-07 1.566916e-07
498 1.991789e-07 1.490081e-07 1.490081e-07 1.482831e-07 1.482831e-07 1.789563e-07
499 2.615972e-07 5.283323e-07 2.384425e-07 5.273755e-07 2.335929e-07 4.323736e-07
501 2.078116e-07 5.490043e-07 2.338488e-07 5.490212e-07 2.389284e-07 4.491719e-07
502 2.111899e-07 5.074902e-07 2.051022e-07 2.101117e-07 2.101117e-07 4.236623e-07
503 2.432694e-07 5.422333e-07 2.413805e-07 5.398226e-07 2.444753e-07 4.413455e-07
505 1.854365e-07 4.179943e-07 1.867338e-07 1.905998e-07 1.905998e-07 1.635027e-07
506 1.180782e-07 4.147211e-07 1.484697e-07 1.422107e-07 1.422107e-07 1.800469e-07
508 1.985166e-07 3.892409e-07 2.175820e-07 2.139927e-07 2.139927e-07 4.507913e-07
509 2.526551e-07 5.177221e-07 2.290690e-07 5.198127e-07 2.356156e-07 4.521368e-07
510 1.221776e-07 3.492785e-07 1.486429e-07 1.442352e-07 1.442352e-07 2.301662e-07
511 1.703786e-07 4.975219e-07 1.831423e-07 1.854142e-07 1.854142e-07 1.445589e-07
[13]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("1D FFT errors (single precision, Bluestein) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (single precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()
1D, non-radix (Bluestein) transforms, double precision
[14]:
nmax = 512
d0 = np.random.uniform(-0.5, 0.5, nmax) + 1j * np.random.uniform(-0.5, 0.5, nmax)
d0ld = d0.astype(np.clongdouble)
d0 = d0.astype(np.complex128)
def accu_1d(n, fft_dic):
rld = fftwn(d0ld[:n])
res = {}
for k,v in fft_dic.items():
r = v(d0[:n])
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
fft_dic = {"fftw": fftwn}
if has_pycuda:
fft_dic["vkfft-cuda"] = fftnvcu
fft_dic["vkfft-cuda-LUT"] = fftnvculut
fft_dic["cufft"] = fftncu
if has_pyopencl:
fft_dic["vkfft-opencl"] = fftnvcl
fft_dic["vkfft-opencl-LUT"] = fftnvcllut
# print(accu_1d(16, fft_dic))
vn, vl1, vl2, vli = [], {}, {}, {}
#print("%7s %12s %12s %12s %12s"%("N", "vkfft ", "vkfft-LUT ", "cufft ", "fftw "))
s = "%7s %16s"%("N", "fftw ")
r = accu_1d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %16s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) >13: # test only transforms with non-radix dimensions
r = accu_1d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %16e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
17 1.066556e-16 2.032744e-16 2.032744e-16 1.932772e-16 1.932772e-16 1.024254e-16
19 1.358658e-16 4.147412e-16 4.147412e-16 3.430604e-16 3.430604e-16 1.105945e-16
23 1.222737e-16 2.413451e-16 2.413451e-16 2.516965e-16 2.516965e-16 1.148165e-16
29 1.501406e-16 3.579374e-16 3.579374e-16 3.529350e-16 3.529350e-16 1.465995e-16
31 1.883550e-16 3.082450e-16 3.082450e-16 2.744713e-16 2.744713e-16 1.223319e-16
34 1.593584e-16 2.025947e-16 2.025947e-16 2.125895e-16 2.125895e-16 1.505484e-16
37 3.676175e-16 3.702819e-16 3.702819e-16 3.570183e-16 3.570183e-16 1.801324e-16
38 1.601503e-16 4.237837e-16 4.237837e-16 4.025553e-16 4.025553e-16 1.837089e-16
41 3.152464e-16 3.281737e-16 3.281737e-16 3.527551e-16 3.527551e-16 1.717301e-16
43 1.706233e-16 3.943163e-16 3.943163e-16 3.963092e-16 3.963092e-16 1.425642e-16
46 1.883136e-16 2.610338e-16 2.610338e-16 2.461951e-16 2.461951e-16 2.052718e-16
47 1.982760e-16 1.872361e-16 1.872361e-16 1.872361e-16 1.872361e-16 1.807893e-16
51 1.770969e-16 2.013288e-16 2.013288e-16 2.244093e-16 2.244093e-16 1.617509e-16
53 2.045219e-16 2.697018e-16 2.697018e-16 2.737883e-16 2.737883e-16 2.051781e-16
57 1.669975e-16 4.105691e-16 4.105691e-16 4.015106e-16 4.015106e-16 1.896102e-16
58 2.054844e-16 3.860140e-16 3.860140e-16 3.862643e-16 3.862643e-16 1.738323e-16
59 3.658351e-16 1.443108e-16 1.443108e-16 1.443108e-16 1.443108e-16 1.729608e-16
61 3.067896e-16 3.166648e-16 3.166648e-16 3.220561e-16 3.220561e-16 1.717918e-16
62 1.668922e-16 3.051567e-16 3.051567e-16 3.128923e-16 3.128923e-16 1.628782e-16
67 2.314708e-16 3.264476e-16 3.264476e-16 3.312149e-16 3.312149e-16 1.879661e-16
68 1.662272e-16 2.178020e-16 2.178020e-16 2.049310e-16 2.049310e-16 1.746297e-16
69 2.056034e-16 2.668543e-16 2.668543e-16 2.939738e-16 2.939738e-16 2.107158e-16
71 3.728700e-16 4.401986e-16 4.401986e-16 4.499767e-16 4.499767e-16 2.336482e-16
73 3.211524e-16 3.214904e-16 3.214904e-16 3.268311e-16 3.268311e-16 2.120713e-16
74 3.410121e-16 4.176166e-16 4.176166e-16 3.968245e-16 3.968245e-16 1.868322e-16
76 1.641933e-16 3.873916e-16 3.873916e-16 4.100067e-16 4.100067e-16 2.465960e-16
79 3.712197e-16 3.675608e-16 3.675608e-16 3.433692e-16 3.433692e-16 2.077833e-16
82 3.404349e-16 3.004966e-16 3.004966e-16 3.170214e-16 3.170214e-16 1.770641e-16
83 3.830541e-16 2.051431e-16 2.051431e-16 2.051431e-16 2.051431e-16 2.208298e-16
85 1.866519e-16 2.184219e-16 2.184219e-16 2.383326e-16 2.383326e-16 2.162865e-16
86 2.014463e-16 4.268616e-16 4.268616e-16 4.419241e-16 4.419241e-16 1.894896e-16
87 1.797631e-16 3.964417e-16 3.964417e-16 4.066787e-16 4.066787e-16 1.783458e-16
89 4.216439e-16 2.905135e-16 2.905135e-16 2.927240e-16 2.927240e-16 2.524720e-16
92 1.638247e-16 2.761034e-16 2.761034e-16 2.804747e-16 2.804747e-16 1.829325e-16
93 1.968851e-16 3.083718e-16 3.083718e-16 3.203025e-16 3.203025e-16 2.133215e-16
94 2.110990e-16 1.956110e-16 1.956110e-16 1.955015e-16 1.955015e-16 1.960184e-16
95 2.079143e-16 4.075900e-16 4.075900e-16 3.762110e-16 3.762110e-16 1.857351e-16
97 2.888369e-16 3.009593e-16 3.009593e-16 3.292453e-16 3.292453e-16 2.249417e-16
101 3.412749e-16 3.585316e-16 3.585316e-16 3.766311e-16 3.766311e-16 2.442822e-16
102 1.955521e-16 2.558419e-16 2.558419e-16 2.496954e-16 2.496954e-16 2.250308e-16
103 3.560505e-16 3.447515e-16 3.447515e-16 3.473488e-16 3.473488e-16 2.512852e-16
106 2.015267e-16 2.709866e-16 2.709866e-16 2.873229e-16 2.873229e-16 2.108172e-16
107 3.985238e-16 3.363602e-16 3.363602e-16 3.484215e-16 3.484215e-16 2.343304e-16
109 3.867450e-16 4.286241e-16 4.286241e-16 4.458877e-16 4.458877e-16 2.476720e-16
111 3.207092e-16 4.836789e-16 4.836789e-16 4.604139e-16 4.604139e-16 2.075605e-16
113 4.859810e-16 4.435949e-16 4.435949e-16 4.064003e-16 4.064003e-16 2.726163e-16
114 1.872245e-16 4.348822e-16 4.348822e-16 4.603168e-16 4.603168e-16 2.509529e-16
115 2.135527e-16 2.927767e-16 2.927767e-16 2.886522e-16 2.886522e-16 1.917635e-16
116 1.875593e-16 4.010563e-16 4.010563e-16 4.214003e-16 4.214003e-16 2.104462e-16
118 4.093327e-16 2.223871e-16 2.223871e-16 2.174214e-16 2.174214e-16 2.350523e-16
119 1.960130e-16 2.813889e-16 2.813889e-16 2.667188e-16 2.667188e-16 1.905773e-16
122 3.278021e-16 3.227205e-16 3.227205e-16 2.874891e-16 2.874891e-16 2.554785e-16
123 3.100847e-16 3.311754e-16 3.311754e-16 3.237449e-16 3.237449e-16 2.661508e-16
124 1.989735e-16 3.337404e-16 3.337404e-16 3.379497e-16 3.379497e-16 1.993340e-16
127 3.720486e-16 4.861899e-16 4.861899e-16 5.231844e-16 5.231844e-16 3.109651e-16
129 2.288744e-16 4.624426e-16 4.624426e-16 4.350685e-16 4.350685e-16 2.422310e-16
131 4.504955e-16 3.698660e-16 3.698660e-16 3.238832e-16 3.238832e-16 5.140240e-16
133 1.920109e-16 4.226630e-16 4.226630e-16 4.119946e-16 4.119946e-16 1.997344e-16
134 2.198039e-16 3.891507e-16 3.891507e-16 4.012934e-16 4.012934e-16 2.384162e-16
136 2.084956e-16 2.231501e-16 2.231501e-16 2.200405e-16 2.200405e-16 1.828021e-16
137 4.284987e-16 4.205853e-16 4.205853e-16 4.245394e-16 4.245394e-16 4.720658e-16
138 2.092334e-16 2.952860e-16 2.952860e-16 2.814172e-16 2.814172e-16 2.741673e-16
139 4.188772e-16 4.621012e-16 4.621012e-16 4.252904e-16 4.252904e-16 4.974237e-16
141 2.302913e-16 2.003624e-16 2.003624e-16 2.002325e-16 2.002325e-16 2.350283e-16
142 4.153879e-16 4.624765e-16 4.624765e-16 4.479296e-16 4.479296e-16 2.577220e-16
145 2.022253e-16 4.172456e-16 4.172456e-16 4.354485e-16 4.354485e-16 1.962583e-16
146 3.808002e-16 3.435374e-16 3.435374e-16 3.524562e-16 3.524562e-16 2.303773e-16
148 3.446394e-16 3.988110e-16 3.988110e-16 3.810151e-16 3.810151e-16 2.358893e-16
149 4.062796e-16 3.337224e-16 3.337224e-16 3.507061e-16 3.507061e-16 4.933272e-16
151 4.002610e-16 3.986196e-16 3.986196e-16 3.904609e-16 3.904609e-16 5.679273e-16
152 1.979888e-16 3.762176e-16 3.762176e-16 3.926671e-16 3.926671e-16 2.324557e-16
153 2.093900e-16 2.539712e-16 2.539712e-16 2.356858e-16 2.356858e-16 2.957654e-16
155 2.125133e-16 3.159057e-16 3.159057e-16 3.243829e-16 3.243829e-16 2.704413e-16
157 3.665922e-16 3.880987e-16 3.880987e-16 3.646680e-16 3.646680e-16 4.767506e-16
158 3.511630e-16 3.891027e-16 3.891027e-16 4.002845e-16 4.002845e-16 2.469131e-16
159 2.182624e-16 2.971595e-16 2.971595e-16 2.976541e-16 2.976541e-16 2.405648e-16
161 2.185268e-16 3.262269e-16 3.262269e-16 3.159330e-16 3.159330e-16 2.090939e-16
163 5.132026e-16 5.242517e-16 5.242517e-16 4.922753e-16 4.922753e-16 5.354424e-16
164 3.540930e-16 3.204207e-16 3.204207e-16 3.287711e-16 3.287711e-16 2.272630e-16
166 4.178859e-16 2.420661e-16 2.420661e-16 2.408705e-16 2.408705e-16 2.577730e-16
167 4.430471e-16 3.688898e-16 3.688898e-16 3.807752e-16 3.807752e-16 7.740158e-16
170 2.062950e-16 2.668753e-16 2.668753e-16 2.569752e-16 2.569752e-16 2.357323e-16
171 2.173697e-16 4.420927e-16 4.420927e-16 4.220043e-16 4.220043e-16 2.832620e-16
172 2.118488e-16 4.810829e-16 4.810829e-16 4.583838e-16 4.583838e-16 2.335068e-16
173 4.386330e-16 3.545772e-16 3.545772e-16 3.684199e-16 3.684199e-16 5.030760e-16
174 1.966625e-16 4.320914e-16 4.320914e-16 4.346338e-16 4.346338e-16 2.602417e-16
177 3.980790e-16 2.457636e-16 2.457636e-16 2.445310e-16 2.445310e-16 2.222503e-16
178 4.307858e-16 3.340961e-16 3.340961e-16 3.220665e-16 3.220665e-16 2.573625e-16
179 4.408395e-16 3.728112e-16 3.728112e-16 3.939275e-16 3.939275e-16 6.536899e-16
181 3.945121e-16 3.793207e-16 3.793207e-16 3.759944e-16 3.759944e-16 5.287358e-16
183 3.198437e-16 3.591010e-16 3.591010e-16 3.373155e-16 3.373155e-16 2.407143e-16
184 1.928248e-16 2.756121e-16 2.756121e-16 2.781794e-16 2.781794e-16 2.357639e-16
185 3.572576e-16 4.350504e-16 4.350504e-16 4.208205e-16 4.208205e-16 1.995552e-16
186 2.015435e-16 3.696052e-16 3.696052e-16 3.466084e-16 3.466084e-16 2.558334e-16
187 1.952256e-16 2.622070e-16 2.622070e-16 2.600007e-16 2.600007e-16 2.987652e-16
188 2.105454e-16 2.016576e-16 2.016576e-16 1.981467e-16 1.981467e-16 2.202767e-16
190 2.106325e-16 3.773636e-16 3.773636e-16 3.852194e-16 3.852194e-16 2.212831e-16
191 3.951166e-16 3.760999e-16 3.760999e-16 3.703903e-16 3.703903e-16 5.701175e-16
193 3.320902e-16 3.630357e-16 3.630357e-16 3.585508e-16 3.585508e-16 6.086125e-16
194 3.383942e-16 3.323661e-16 3.323661e-16 3.088008e-16 3.088008e-16 2.198132e-16
197 3.981957e-16 5.515412e-16 5.515412e-16 5.652031e-16 5.652031e-16 7.284824e-16
199 3.842122e-16 4.684283e-16 4.684283e-16 4.662818e-16 4.662818e-16 5.385862e-16
201 2.267111e-16 4.151502e-16 4.151502e-16 4.308448e-16 4.308448e-16 2.314105e-16
202 3.618266e-16 3.614846e-16 3.614846e-16 3.794759e-16 3.794759e-16 2.536718e-16
203 2.008576e-16 4.592167e-16 4.592167e-16 4.548037e-16 4.548037e-16 2.248447e-16
204 1.840370e-16 2.426777e-16 2.426777e-16 2.390891e-16 2.390891e-16 2.730761e-16
205 3.534615e-16 3.265867e-16 3.265867e-16 3.193702e-16 3.193702e-16 2.500830e-16
206 4.137018e-16 4.018045e-16 4.018045e-16 4.123807e-16 4.123807e-16 2.764055e-16
207 2.029172e-16 2.854071e-16 2.854071e-16 3.052030e-16 3.052030e-16 2.353100e-16
209 2.079554e-16 4.067970e-16 4.067970e-16 4.126865e-16 4.126865e-16 2.467300e-16
211 4.207708e-16 4.749237e-16 4.749237e-16 4.828406e-16 4.828406e-16 5.606828e-16
212 2.178517e-16 2.969481e-16 2.969481e-16 3.124649e-16 3.124649e-16 2.215099e-16
213 4.066487e-16 4.161368e-16 4.161368e-16 4.567764e-16 4.567764e-16 2.626519e-16
214 4.013386e-16 4.027706e-16 4.027706e-16 3.921899e-16 3.921899e-16 3.011889e-16
215 2.323581e-16 4.631091e-16 4.631091e-16 4.520387e-16 4.520387e-16 2.313819e-16
217 2.014112e-16 4.338211e-16 4.338211e-16 4.274623e-16 4.274623e-16 2.567983e-16
218 3.738077e-16 3.991135e-16 3.991135e-16 4.358038e-16 4.358038e-16 2.651147e-16
219 3.309379e-16 3.421003e-16 3.421003e-16 3.668241e-16 3.668241e-16 2.354015e-16
221 2.254187e-16 2.547771e-16 2.547771e-16 2.525488e-16 2.525488e-16 2.421736e-16
222 3.198251e-16 4.548480e-16 4.548480e-16 4.406397e-16 4.406397e-16 2.426449e-16
223 4.649334e-16 4.140162e-16 4.140162e-16 4.234457e-16 4.234457e-16 5.556637e-16
226 4.556817e-16 4.723412e-16 4.723412e-16 4.484493e-16 4.484493e-16 2.728222e-16
227 4.050698e-16 4.189205e-16 4.189205e-16 3.990821e-16 3.990821e-16 5.731509e-16
228 1.908995e-16 4.293922e-16 4.293922e-16 4.386623e-16 4.386623e-16 2.379902e-16
229 4.116690e-16 4.060416e-16 4.060416e-16 3.987697e-16 3.987697e-16 5.650542e-16
230 2.182557e-16 2.777721e-16 2.777721e-16 2.785502e-16 2.785502e-16 2.232426e-16
232 1.935038e-16 4.295693e-16 4.295693e-16 4.025064e-16 4.025064e-16 2.059679e-16
233 4.379194e-16 4.008820e-16 4.008820e-16 3.967666e-16 3.967666e-16 5.485785e-16
235 2.359735e-16 2.289978e-16 2.289978e-16 2.263088e-16 2.263088e-16 2.225156e-16
236 3.694819e-16 2.200808e-16 2.200808e-16 2.184442e-16 2.184442e-16 2.626275e-16
237 3.994447e-16 4.142501e-16 4.142501e-16 3.793456e-16 3.793456e-16 2.599157e-16
238 2.057381e-16 3.259522e-16 3.259522e-16 3.023206e-16 3.023206e-16 2.034470e-16
239 4.303439e-16 4.129857e-16 4.129857e-16 4.312417e-16 4.312417e-16 6.068418e-16
241 3.898456e-16 3.645930e-16 3.645930e-16 3.429079e-16 3.429079e-16 6.133368e-16
244 3.153901e-16 3.459627e-16 3.459627e-16 3.356776e-16 3.356776e-16 2.512832e-16
246 3.177716e-16 3.213002e-16 3.213002e-16 3.356220e-16 3.356220e-16 3.342745e-16
247 2.098875e-16 4.184590e-16 4.184590e-16 4.107725e-16 4.107725e-16 2.544384e-16
248 2.113783e-16 3.189088e-16 3.189088e-16 3.189737e-16 3.189737e-16 2.409934e-16
249 4.189447e-16 2.434623e-16 2.434623e-16 2.434838e-16 2.434838e-16 2.735400e-16
251 3.932799e-16 4.551911e-16 4.551911e-16 4.561028e-16 4.561028e-16 5.486927e-16
253 1.935493e-16 3.144230e-16 3.144230e-16 3.085725e-16 3.085725e-16 2.833915e-16
254 3.918738e-16 5.040980e-16 5.040980e-16 4.731031e-16 4.731031e-16 2.848777e-16
255 2.257604e-16 2.373266e-16 2.373266e-16 2.583114e-16 2.583114e-16 3.236405e-16
257 3.606148e-16 3.664810e-16 3.664810e-16 3.387670e-16 3.387670e-16 5.881190e-16
258 2.240237e-16 4.603611e-16 4.603611e-16 4.727578e-16 4.727578e-16 3.269233e-16
259 3.105543e-16 4.611773e-16 4.611773e-16 4.837644e-16 4.837644e-16 2.153012e-16
261 2.335936e-16 4.292606e-16 4.292606e-16 4.224598e-16 4.224598e-16 2.179332e-16
262 4.504228e-16 3.820135e-16 3.820135e-16 3.714197e-16 3.714197e-16 5.312039e-16
263 4.499065e-16 5.306187e-16 5.306187e-16 5.315015e-16 5.315015e-16 6.039527e-16
265 2.430568e-16 3.061193e-16 3.061193e-16 3.170129e-16 3.170129e-16 2.498435e-16
266 1.986538e-16 4.141403e-16 4.141403e-16 4.265732e-16 4.265732e-16 2.319189e-16
267 4.586972e-16 3.472214e-16 3.472214e-16 3.400729e-16 3.400729e-16 2.688599e-16
268 2.401297e-16 3.873853e-16 3.873853e-16 3.907927e-16 3.907927e-16 2.403332e-16
269 4.567868e-16 5.023242e-16 5.023242e-16 5.186498e-16 5.186498e-16 5.516824e-16
271 4.212166e-16 4.398502e-16 4.398502e-16 4.263670e-16 4.263670e-16 5.161765e-16
272 2.122421e-16 2.454751e-16 2.454751e-16 2.389727e-16 2.389727e-16 2.036869e-16
274 4.139624e-16 5.341366e-16 5.341366e-16 5.133030e-16 5.133030e-16 5.176935e-16
276 2.055527e-16 3.096288e-16 3.096288e-16 3.199842e-16 3.199842e-16 2.658387e-16
277 4.404688e-16 5.113123e-16 5.113123e-16 5.119446e-16 5.119446e-16 5.377449e-16
278 4.536224e-16 5.137798e-16 5.137798e-16 5.322734e-16 5.322734e-16 5.396899e-16
279 2.217484e-16 3.674293e-16 3.674293e-16 3.599327e-16 3.599327e-16 2.126634e-16
281 4.466869e-16 5.288474e-16 5.288474e-16 5.176549e-16 5.176549e-16 6.118109e-16
282 2.255889e-16 2.286690e-16 2.286690e-16 2.318738e-16 2.318738e-16 2.819492e-16
283 4.022360e-16 5.200618e-16 5.200618e-16 5.141904e-16 5.141904e-16 5.795763e-16
284 4.056160e-16 4.505878e-16 4.505878e-16 4.625566e-16 4.625566e-16 2.489041e-16
285 2.235094e-16 3.947394e-16 3.947394e-16 3.937464e-16 3.937464e-16 2.859044e-16
287 3.616424e-16 3.786879e-16 3.786879e-16 3.699689e-16 3.699689e-16 2.459823e-16
289 2.244310e-16 3.273120e-16 3.273120e-16 3.127297e-16 3.127297e-16 2.637970e-16
290 2.127957e-16 4.129002e-16 4.129002e-16 4.218636e-16 4.218636e-16 2.210733e-16
291 3.278867e-16 3.518950e-16 3.518950e-16 3.375856e-16 3.375856e-16 2.589637e-16
292 3.496717e-16 3.753425e-16 3.753425e-16 3.843060e-16 3.843060e-16 2.459724e-16
293 4.252048e-16 5.679563e-16 5.679563e-16 5.626417e-16 5.626417e-16 5.074463e-16
295 3.987306e-16 2.542718e-16 2.542718e-16 2.547957e-16 2.547957e-16 2.650414e-16
296 3.429963e-16 4.362115e-16 4.362115e-16 4.322252e-16 4.322252e-16 2.426680e-16
298 3.976622e-16 5.603865e-16 5.603865e-16 5.528722e-16 5.528722e-16 5.211148e-16
299 2.205340e-16 2.876882e-16 2.876882e-16 2.966815e-16 2.966815e-16 2.334871e-16
301 2.247091e-16 4.978535e-16 4.978535e-16 4.815244e-16 4.815244e-16 2.437074e-16
302 3.708065e-16 4.094584e-16 4.094584e-16 4.064159e-16 4.064159e-16 5.371969e-16
303 3.519356e-16 3.771127e-16 3.771127e-16 4.015522e-16 4.015522e-16 2.845445e-16
304 2.241276e-16 3.855226e-16 3.855226e-16 3.993954e-16 3.993954e-16 2.265670e-16
305 3.431844e-16 3.589180e-16 3.589180e-16 3.551306e-16 3.551306e-16 2.559310e-16
306 2.174840e-16 2.711774e-16 2.711774e-16 2.622224e-16 2.622224e-16 3.769455e-16
307 4.765977e-16 5.909385e-16 5.909385e-16 5.975148e-16 5.975148e-16 5.422187e-16
309 4.416886e-16 5.655617e-16 5.655617e-16 5.742615e-16 5.742615e-16 2.815834e-16
310 2.274252e-16 3.539044e-16 3.539044e-16 3.601667e-16 3.601667e-16 2.914733e-16
311 4.841171e-16 5.265861e-16 5.265861e-16 5.628839e-16 5.628839e-16 5.382997e-16
313 4.301800e-16 4.043976e-16 4.043976e-16 4.118433e-16 4.118433e-16 6.817123e-16
314 4.037395e-16 3.749561e-16 3.749561e-16 3.574227e-16 3.574227e-16 5.281438e-16
316 3.884867e-16 3.949738e-16 3.949738e-16 3.885426e-16 3.885426e-16 2.689181e-16
317 3.951267e-16 4.361437e-16 4.361437e-16 4.402120e-16 4.402120e-16 7.140741e-16
318 2.296233e-16 3.107057e-16 3.107057e-16 3.179396e-16 3.179396e-16 2.561462e-16
319 2.127062e-16 4.287095e-16 4.287095e-16 4.368500e-16 4.368500e-16 2.124074e-16
321 4.404682e-16 4.496236e-16 4.496236e-16 4.348619e-16 4.348619e-16 2.944202e-16
322 2.104195e-16 3.403837e-16 3.403837e-16 3.274292e-16 3.274292e-16 2.207092e-16
323 2.305595e-16 4.115702e-16 4.115702e-16 4.061381e-16 4.061381e-16 2.704614e-16
326 4.345080e-16 5.233691e-16 5.233691e-16 4.997438e-16 4.997438e-16 5.742139e-16
327 4.004922e-16 4.442778e-16 4.442778e-16 4.549182e-16 4.549182e-16 2.697508e-16
328 3.486878e-16 3.644010e-16 3.644010e-16 3.644615e-16 3.644615e-16 2.481652e-16
329 2.315735e-16 2.703042e-16 2.703042e-16 2.639123e-16 2.639123e-16 2.305321e-16
331 4.691042e-16 3.947969e-16 3.947969e-16 3.973702e-16 3.973702e-16 5.442337e-16
332 4.222593e-16 2.512098e-16 2.512098e-16 2.537442e-16 2.537442e-16 2.801125e-16
333 3.238470e-16 4.519223e-16 4.519223e-16 4.692280e-16 4.692280e-16 2.959127e-16
334 4.169577e-16 4.573448e-16 4.573448e-16 4.666044e-16 4.666044e-16 7.417690e-16
335 2.683303e-16 3.795257e-16 3.795257e-16 3.926932e-16 3.926932e-16 2.517444e-16
337 4.850852e-16 4.733831e-16 4.733831e-16 5.063587e-16 5.063587e-16 5.896164e-16
339 4.549069e-16 4.534603e-16 4.534603e-16 4.550205e-16 4.550205e-16 3.342645e-16
340 2.244549e-16 2.638299e-16 2.638299e-16 2.684636e-16 2.684636e-16 2.995269e-16
341 2.192033e-16 3.336948e-16 3.336948e-16 3.284659e-16 3.284659e-16 2.102115e-16
342 2.196978e-16 4.394662e-16 4.394662e-16 4.420263e-16 4.420263e-16 3.396546e-16
344 2.191307e-16 4.641263e-16 4.641263e-16 4.608098e-16 4.608098e-16 2.343412e-16
345 2.318773e-16 2.965797e-16 2.965797e-16 2.949499e-16 2.949499e-16 4.133579e-16
346 4.146525e-16 4.765834e-16 4.765834e-16 4.650029e-16 4.650029e-16 5.483127e-16
347 4.576647e-16 4.919012e-16 4.919012e-16 4.833519e-16 4.833519e-16 5.781621e-16
348 2.105215e-16 4.143864e-16 4.143864e-16 4.253093e-16 4.253093e-16 2.946552e-16
349 4.803904e-16 4.328752e-16 4.328752e-16 4.483396e-16 4.483396e-16 5.556987e-16
353 4.636717e-16 3.784891e-16 3.784891e-16 3.623746e-16 3.623746e-16 6.957535e-16
354 3.667740e-16 2.455426e-16 2.455426e-16 2.521484e-16 2.521484e-16 2.724729e-16
355 4.063957e-16 4.686091e-16 4.686091e-16 4.703910e-16 4.703910e-16 3.459256e-16
356 4.275822e-16 3.434922e-16 3.434922e-16 3.310776e-16 3.310776e-16 2.705112e-16
357 2.249814e-16 3.129011e-16 3.129011e-16 3.202446e-16 3.202446e-16 2.618991e-16
358 4.423923e-16 4.540961e-16 4.540961e-16 4.566750e-16 4.566750e-16 6.605479e-16
359 4.604351e-16 4.486373e-16 4.486373e-16 4.591504e-16 4.591504e-16 7.568075e-16
361 2.152814e-16 6.109397e-16 6.109397e-16 6.480191e-16 6.480191e-16 2.499273e-16
362 4.210193e-16 3.948298e-16 3.948298e-16 3.974537e-16 3.974537e-16 5.496687e-16
365 3.416778e-16 3.522027e-16 3.522027e-16 3.785637e-16 3.785637e-16 3.180973e-16
366 3.364970e-16 3.561232e-16 3.561232e-16 3.352433e-16 3.352433e-16 2.858453e-16
367 4.738596e-16 4.788143e-16 4.788143e-16 4.883752e-16 4.883752e-16 5.437239e-16
368 2.147653e-16 2.863575e-16 2.863575e-16 2.762466e-16 2.762466e-16 2.644321e-16
369 3.477856e-16 3.532663e-16 3.532663e-16 3.374932e-16 3.374932e-16 3.011164e-16
370 3.119328e-16 4.353787e-16 4.353787e-16 4.377745e-16 4.377745e-16 2.305324e-16
371 2.306585e-16 3.381922e-16 3.381922e-16 3.473304e-16 3.473304e-16 2.854896e-16
372 2.297127e-16 3.652244e-16 3.652244e-16 3.681493e-16 3.681493e-16 2.750904e-16
373 4.784392e-16 4.596506e-16 4.596506e-16 4.525283e-16 4.525283e-16 5.684160e-16
374 2.199592e-16 2.648148e-16 2.648148e-16 2.525418e-16 2.525418e-16 3.287146e-16
376 2.254823e-16 2.193314e-16 2.193314e-16 2.187124e-16 2.187124e-16 2.351097e-16
377 2.348134e-16 3.973743e-16 3.973743e-16 4.066423e-16 4.066423e-16 2.454729e-16
379 4.366325e-16 5.573529e-16 5.573529e-16 5.587239e-16 5.587239e-16 8.446140e-16
380 2.153783e-16 4.109959e-16 4.109959e-16 3.976027e-16 3.976027e-16 2.660615e-16
381 3.945235e-16 5.194882e-16 5.194882e-16 4.908172e-16 4.908172e-16 2.949664e-16
382 3.980268e-16 4.243672e-16 4.243672e-16 4.169758e-16 4.169758e-16 5.764166e-16
383 4.208039e-16 4.350881e-16 4.350881e-16 4.254624e-16 4.254624e-16 6.145900e-16
386 3.313086e-16 3.585882e-16 3.585882e-16 3.530873e-16 3.530873e-16 6.189568e-16
387 2.476523e-16 5.005858e-16 5.005858e-16 4.940193e-16 4.940193e-16 2.765656e-16
388 3.334975e-16 3.431799e-16 3.431799e-16 3.536376e-16 3.536376e-16 2.795207e-16
389 4.534249e-16 4.012172e-16 4.012172e-16 4.136554e-16 4.136554e-16 6.685074e-16
391 2.250132e-16 3.288109e-16 3.288109e-16 3.344614e-16 3.344614e-16 2.332006e-16
393 4.483899e-16 3.660825e-16 3.660825e-16 3.868008e-16 3.868008e-16 7.670915e-16
394 4.074309e-16 5.422990e-16 5.422990e-16 5.414215e-16 5.414215e-16 7.470965e-16
395 4.061293e-16 3.819859e-16 3.819859e-16 3.870493e-16 3.870493e-16 2.775116e-16
397 4.546222e-16 4.477367e-16 4.477367e-16 4.569631e-16 4.569631e-16 6.484007e-16
398 3.986398e-16 4.805902e-16 4.805902e-16 4.655313e-16 4.655313e-16 5.731040e-16
399 2.179817e-16 4.749482e-16 4.749482e-16 4.852004e-16 4.852004e-16 2.551743e-16
401 3.641570e-16 4.537890e-16 4.537890e-16 4.385749e-16 4.385749e-16 6.521790e-16
402 2.547605e-16 4.095842e-16 4.095842e-16 4.197842e-16 4.197842e-16 2.759522e-16
403 2.371265e-16 3.615626e-16 3.615626e-16 3.674748e-16 3.674748e-16 2.165406e-16
404 3.578523e-16 3.773839e-16 3.773839e-16 4.041103e-16 4.041103e-16 3.172097e-16
406 2.203063e-16 4.469431e-16 4.469431e-16 4.562960e-16 4.562960e-16 2.226624e-16
407 3.407111e-16 4.455074e-16 4.455074e-16 4.370183e-16 4.370183e-16 2.203054e-16
408 2.250566e-16 2.599335e-16 2.599335e-16 2.591405e-16 2.591405e-16 3.102238e-16
409 4.459972e-16 4.405106e-16 4.405106e-16 4.180883e-16 4.180883e-16 6.066592e-16
410 3.530948e-16 3.609879e-16 3.609879e-16 3.568410e-16 3.568410e-16 2.828219e-16
411 4.406156e-16 4.032047e-16 4.032047e-16 4.073707e-16 4.073707e-16 7.131128e-16
412 4.325561e-16 4.316762e-16 4.316762e-16 4.208343e-16 4.208343e-16 3.006191e-16
413 3.795352e-16 2.991110e-16 2.991110e-16 3.027518e-16 3.027518e-16 2.550085e-16
414 2.199854e-16 3.277582e-16 3.277582e-16 3.207243e-16 3.207243e-16 3.166114e-16
415 4.333893e-16 2.745321e-16 2.745321e-16 2.740133e-16 2.740133e-16 2.712208e-16
417 4.435986e-16 4.045203e-16 4.045203e-16 4.027409e-16 4.027409e-16 5.753028e-16
418 2.300417e-16 4.063278e-16 4.063278e-16 4.121372e-16 4.121372e-16 2.977369e-16
419 4.479669e-16 4.140895e-16 4.140895e-16 4.335225e-16 4.335225e-16 5.756406e-16
421 4.505356e-16 5.226405e-16 5.226405e-16 5.048513e-16 5.048513e-16 5.995219e-16
422 4.333064e-16 4.883745e-16 4.883745e-16 4.908159e-16 4.908159e-16 5.674477e-16
423 2.347486e-16 2.307141e-16 2.307141e-16 2.340541e-16 2.340541e-16 2.601520e-16
424 2.584542e-16 3.070476e-16 3.070476e-16 3.112950e-16 3.112950e-16 2.396436e-16
425 2.405092e-16 2.926105e-16 2.926105e-16 2.782895e-16 2.782895e-16 2.372834e-16
426 4.458562e-16 4.399177e-16 4.399177e-16 4.619101e-16 4.619101e-16 3.159384e-16
427 3.178348e-16 3.724478e-16 3.724478e-16 3.778633e-16 3.778633e-16 2.542602e-16
428 4.136415e-16 3.989878e-16 3.989878e-16 4.036388e-16 4.036388e-16 3.066449e-16
430 2.454803e-16 4.369650e-16 4.369650e-16 4.423259e-16 4.423259e-16 2.677730e-16
431 4.408204e-16 3.964946e-16 3.964946e-16 4.131919e-16 4.131919e-16 5.942937e-16
433 4.223632e-16 4.709574e-16 4.709574e-16 4.557268e-16 4.557268e-16 6.293978e-16
434 2.361074e-16 4.071958e-16 4.071958e-16 4.152329e-16 4.152329e-16 2.971950e-16
435 2.295792e-16 4.423498e-16 4.423498e-16 4.203942e-16 4.203942e-16 3.597191e-16
436 3.524861e-16 4.464411e-16 4.464411e-16 4.436311e-16 4.436311e-16 2.830192e-16
437 2.186560e-16 5.058341e-16 5.058341e-16 5.150317e-16 5.150317e-16 2.187293e-16
438 3.697465e-16 3.772502e-16 3.772502e-16 4.136676e-16 4.136676e-16 2.785747e-16
439 4.699810e-16 4.185354e-16 4.185354e-16 4.123208e-16 4.123208e-16 5.543589e-16
442 2.171584e-16 2.772936e-16 2.772936e-16 2.660441e-16 2.660441e-16 2.995558e-16
443 4.958746e-16 4.152065e-16 4.152065e-16 4.345575e-16 4.345575e-16 5.932955e-16
444 3.358226e-16 4.527715e-16 4.527715e-16 4.390338e-16 4.390338e-16 2.431729e-16
445 4.428114e-16 3.419863e-16 3.419863e-16 3.365053e-16 3.365053e-16 2.933957e-16
446 4.693574e-16 4.028203e-16 4.028203e-16 4.316694e-16 4.316694e-16 5.967187e-16
447 4.166388e-16 4.107732e-16 4.107732e-16 4.177250e-16 4.177250e-16 5.684658e-16
449 4.779489e-16 4.916625e-16 4.916625e-16 5.042315e-16 5.042315e-16 6.470232e-16
451 3.731203e-16 3.454496e-16 3.454496e-16 3.367108e-16 3.367108e-16 2.623106e-16
452 4.521838e-16 4.569107e-16 4.569107e-16 4.488081e-16 4.488081e-16 3.025968e-16
453 3.752138e-16 4.027194e-16 4.027194e-16 3.970457e-16 3.970457e-16 6.161256e-16
454 4.356927e-16 4.260680e-16 4.260680e-16 3.984764e-16 3.984764e-16 5.585017e-16
456 2.223502e-16 4.377104e-16 4.377104e-16 4.436412e-16 4.436412e-16 2.845021e-16
457 4.064637e-16 4.584909e-16 4.584909e-16 4.281186e-16 4.281186e-16 5.864570e-16
458 4.311810e-16 4.265438e-16 4.265438e-16 4.381598e-16 4.381598e-16 6.110807e-16
459 2.276173e-16 2.772439e-16 2.772439e-16 2.637197e-16 2.637197e-16 2.539484e-16
460 2.270926e-16 2.984909e-16 2.984909e-16 3.008670e-16 3.008670e-16 2.697652e-16
461 4.185490e-16 4.280652e-16 4.280652e-16 4.183564e-16 4.183564e-16 6.024082e-16
463 4.402809e-16 5.206069e-16 5.206069e-16 5.226720e-16 5.226720e-16 5.757983e-16
464 2.349617e-16 4.269825e-16 4.269825e-16 4.122941e-16 4.122941e-16 2.391928e-16
465 2.381781e-16 3.601220e-16 3.601220e-16 3.504457e-16 3.504457e-16 2.914530e-16
466 4.384737e-16 4.411578e-16 4.411578e-16 4.444572e-16 4.444572e-16 5.964198e-16
467 4.315914e-16 4.267229e-16 4.267229e-16 4.314943e-16 4.314943e-16 6.298230e-16
469 2.562792e-16 4.407079e-16 4.407079e-16 4.423371e-16 4.423371e-16 2.506215e-16
470 2.611323e-16 2.400088e-16 2.400088e-16 2.418638e-16 2.418638e-16 2.498508e-16
471 4.096718e-16 3.917565e-16 3.917565e-16 3.890869e-16 3.890869e-16 5.984302e-16
472 3.797157e-16 2.456149e-16 2.456149e-16 2.428081e-16 2.428081e-16 2.884509e-16
473 2.471074e-16 4.562845e-16 4.562845e-16 4.589379e-16 4.589379e-16 3.050556e-16
474 3.863249e-16 4.130609e-16 4.130609e-16 4.061693e-16 4.061693e-16 2.861199e-16
475 2.340054e-16 3.886565e-16 3.886565e-16 3.963861e-16 3.963861e-16 2.296002e-16
476 2.203823e-16 3.145705e-16 3.145705e-16 3.129197e-16 3.129197e-16 2.407926e-16
477 2.414661e-16 3.217269e-16 3.217269e-16 3.113420e-16 3.113420e-16 2.560642e-16
478 4.329472e-16 4.196160e-16 4.196160e-16 4.421878e-16 4.421878e-16 6.428448e-16
479 4.229005e-16 4.534632e-16 4.534632e-16 4.441220e-16 4.441220e-16 5.749513e-16
481 3.545958e-16 4.484231e-16 4.484231e-16 4.408979e-16 4.408979e-16 2.334183e-16
482 3.695478e-16 3.631799e-16 3.631799e-16 3.684506e-16 3.684506e-16 5.978246e-16
483 2.241180e-16 3.706330e-16 3.706330e-16 3.729883e-16 3.729883e-16 3.142881e-16
485 3.440491e-16 3.459042e-16 3.459042e-16 3.599548e-16 3.599548e-16 3.175534e-16
487 4.574951e-16 5.798463e-16 5.798463e-16 5.628864e-16 5.628864e-16 7.188753e-16
488 3.176733e-16 3.523134e-16 3.523134e-16 3.399705e-16 3.399705e-16 3.092616e-16
489 4.577790e-16 5.279113e-16 5.279113e-16 5.164502e-16 5.164502e-16 5.745423e-16
491 4.977715e-16 6.094328e-16 6.094328e-16 5.949345e-16 5.949345e-16 6.108383e-16
492 3.569061e-16 3.539576e-16 3.539576e-16 3.609381e-16 3.609381e-16 3.406413e-16
493 2.425425e-16 4.469755e-16 4.469755e-16 4.612446e-16 4.612446e-16 2.291785e-16
494 2.486894e-16 4.041481e-16 4.041481e-16 4.052317e-16 4.052317e-16 3.406617e-16
496 2.392158e-16 3.538974e-16 3.538974e-16 3.424186e-16 3.424186e-16 2.375339e-16
497 4.404625e-16 5.009285e-16 5.009285e-16 5.123642e-16 5.123642e-16 2.594489e-16
498 4.148436e-16 2.584082e-16 2.584082e-16 2.560726e-16 2.560726e-16 3.500145e-16
499 4.773732e-16 4.351097e-16 4.351097e-16 4.489492e-16 4.489492e-16 7.118935e-16
501 4.657366e-16 4.333194e-16 4.333194e-16 4.359787e-16 4.359787e-16 6.057919e-16
502 4.116226e-16 4.585403e-16 4.585403e-16 4.567751e-16 4.567751e-16 5.798328e-16
503 4.175961e-16 4.554120e-16 4.554120e-16 4.341163e-16 4.341163e-16 6.879124e-16
505 3.674330e-16 4.165634e-16 4.165634e-16 4.020705e-16 4.020705e-16 2.992717e-16
506 2.297001e-16 3.109442e-16 3.109442e-16 3.179573e-16 3.179573e-16 3.466802e-16
508 3.876398e-16 5.034659e-16 5.034659e-16 5.076236e-16 5.076236e-16 3.145135e-16
509 4.222445e-16 4.656826e-16 4.656826e-16 4.515350e-16 4.515350e-16 6.135904e-16
510 2.379677e-16 2.697083e-16 2.697083e-16 2.756163e-16 2.756163e-16 3.826891e-16
511 3.611723e-16 4.184942e-16 4.184942e-16 4.257446e-16 4.257446e-16 2.891353e-16
[15]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("1D FFT errors (double precision, Bluestein) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("1D FFT L2 error (double precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()
2D, non-radix (Bluestein) transforms, single precision
[17]:
nmax = 101
d0 = np.random.uniform(-0.5, 0.5, (nmax, nmax)) + 1j * np.random.uniform(-0.5, 0.5, (nmax, nmax))
d0ld = d0.astype(np.clongdouble)
d0s = d0.astype(np.complex64)
def accu_2d(n, fft_dic):
rld = fftwn(d0ld[:n,:n].copy())
res = {}
for k,v in fft_dic.items():
r = v(d0s[:n,:n].copy())
res[k] = l1(rld, r),l2(rld, r),li(rld, r)
return res
fft_dic = {"fftw": fftwn}
if has_pycuda:
fft_dic["vkfft-cuda"] = fftnvcu
fft_dic["vkfft-cuda-LUT"] = fftnvculut
fft_dic["cufft"] = fftncu
if has_pyopencl:
fft_dic["vkfft-opencl"] = fftnvcl
fft_dic["vkfft-opencl-LUT"] = fftnvcllut
# print(accu_1d(16, fft_dic))
vn, vl1, vl2, vli = [], {}, {}, {}
#print("%7s %12s %12s %12s %12s"%("N", "vkfft ", "vkfft-LUT ", "cufft ", "fftw "))
s = "%7s %16s"%("N", "fftw ")
r = accu_2d(8, fft_dic)
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in r:
s += " %16s" % k
print(s)
for n in range(8, len(d0)+1):
if max(primes(n)) >13: # test only transforms with non-radix dimensions
r = accu_2d(n, fft_dic)
vn.append(n)
for k, v in r.items():
if k not in vl1:
vl1[k] = []
vl2[k] = []
vli[k] = []
vl1[k].append(v[0])
vl2[k].append(v[1])
vli[k].append(v[2])
s = "%7d %16e" % (n, vl2["fftw"][-1])
for k in ["vkfft-cuda", "vkfft-cuda-LUT", "vkfft-opencl", "vkfft-opencl-LUT", "cufft"]:
if k in vl2:
red = int(np.log10(vl2[k][-1] / vl2["fftw"][-1]) / np.log10(100) * 255)
if red < 0:
red = 0
if red > 255:
red = 255
s += "\x1b[38;2;%d;0;0m %14e\x1b[0m" % (red, vl2[k][-1])
print(s)
N fftw vkfft-cuda vkfft-cuda-LUT vkfft-opencl vkfft-opencl-LUT cufft
17 1.064493e-07 1.769288e-07 1.766706e-07 1.793724e-07 1.793724e-07 1.014472e-07
19 1.138276e-07 3.462510e-07 1.990153e-07 2.157018e-07 2.157018e-07 1.085927e-07
23 1.136545e-07 3.388370e-07 1.643277e-07 1.702715e-07 1.702715e-07 1.154740e-07
29 1.254697e-07 3.026991e-07 2.325725e-07 2.373313e-07 2.373313e-07 1.174079e-07
31 1.254689e-07 4.019228e-07 2.190677e-07 2.194561e-07 2.194561e-07 1.249641e-07
34 1.234050e-07 3.149019e-07 1.895455e-07 1.884011e-07 1.884011e-07 1.258486e-07
37 2.311431e-07 4.809977e-07 2.342342e-07 2.405885e-07 2.405885e-07 1.288560e-07
38 1.206409e-07 4.302840e-07 2.073467e-07 2.172952e-07 2.172952e-07 1.275125e-07
41 2.080894e-07 5.428678e-07 1.994068e-07 2.039316e-07 2.039316e-07 1.358058e-07
43 1.431978e-07 4.030685e-07 2.609898e-07 2.555937e-07 2.555937e-07 1.380594e-07
46 1.295018e-07 4.224610e-07 1.794655e-07 1.837880e-07 1.837880e-07 1.374309e-07
47 2.617369e-07 1.403620e-07 1.403620e-07 1.403620e-07 1.403620e-07 1.429748e-07
51 1.339754e-07 3.595894e-07 1.919911e-07 1.936782e-07 1.936782e-07 1.693986e-07
53 2.746606e-07 4.255515e-07 2.005767e-07 2.030934e-07 2.030934e-07 1.482253e-07
57 1.359685e-07 4.913498e-07 2.212892e-07 2.236271e-07 2.236271e-07 1.566432e-07
58 1.383858e-07 4.088250e-07 2.431134e-07 2.458935e-07 2.458935e-07 1.628219e-07
59 2.645646e-07 1.549736e-07 1.549736e-07 1.549736e-07 1.549736e-07 1.522756e-07
61 2.119116e-07 4.415614e-07 2.265996e-07 2.320769e-07 2.320769e-07 1.567412e-07
62 1.392797e-07 4.624509e-07 2.299051e-07 2.257486e-07 2.257486e-07 1.569656e-07
67 2.745849e-07 5.424466e-07 2.225806e-07 2.148022e-07 2.148022e-07 1.661600e-07
68 1.324792e-07 4.179806e-07 1.959072e-07 1.975950e-07 1.975950e-07 2.647303e-07
69 1.424612e-07 5.226099e-07 1.860048e-07 1.855560e-07 1.855560e-07 1.751677e-07
71 2.922181e-07 4.038621e-07 2.641228e-07 2.662511e-07 2.662511e-07 1.670022e-07
73 2.385595e-07 6.906754e-07 2.697811e-07 2.658394e-07 2.658394e-07 1.695733e-07
74 2.388433e-07 5.565384e-07 2.428012e-07 2.438724e-07 2.438724e-07 1.555605e-07
76 1.335076e-07 4.982163e-07 2.132034e-07 2.166279e-07 2.166279e-07 3.091439e-07
79 2.724505e-07 4.404951e-07 2.318821e-07 2.359840e-07 2.359840e-07 1.753737e-07
82 2.201404e-07 6.103246e-07 2.083082e-07 2.043014e-07 2.043014e-07 1.831030e-07
83 2.628864e-07 1.791628e-07 1.791628e-07 1.791628e-07 1.791628e-07 1.768091e-07
85 1.430311e-07 4.365876e-07 1.982840e-07 1.986417e-07 1.986417e-07 2.757655e-07
86 1.557046e-07 4.788947e-07 2.647328e-07 2.552991e-07 2.552991e-07 1.699690e-07
87 1.505856e-07 4.641769e-07 2.485872e-07 2.510426e-07 2.510426e-07 1.591133e-07
89 2.888630e-07 6.814944e-07 2.264877e-07 2.325447e-07 2.325447e-07 1.821610e-07
92 1.394571e-07 4.746074e-07 1.871657e-07 1.867921e-07 1.867921e-07 3.775324e-07
93 1.512917e-07 5.044389e-07 2.364509e-07 2.333803e-07 2.333803e-07 1.693592e-07
94 2.684735e-07 1.508294e-07 1.508294e-07 1.508263e-07 1.508263e-07 1.771150e-07
95 1.449898e-07 5.854121e-07 2.263481e-07 2.307058e-07 2.307058e-07 2.094239e-07
97 2.562577e-07 6.076271e-07 3.061153e-07 3.136416e-07 3.136416e-07 1.910664e-07
101 2.497812e-07 5.302290e-07 2.538472e-07 2.453008e-07 2.453008e-07 1.921079e-07
[18]:
plt.figure(figsize=(13,1+(len(vl2)+1)*1.5))
vk = vl2.keys()
x = np.array(vn, dtype=np.float32)
xl = d0.ndim * np.log10(x) # Use the size of the array
i=1
for k in vk:
plt.subplot((len(vl2)+1)//2,2,i)
plt.semilogx(vn, vl1[k], '-ob', label="$L1$")
r2 = stats.linregress(xl, np.array(vl2[k], dtype=np.float32))
plt.semilogx(vn, vl2[k], '-ok', label=r"$L2\approx %s+%s\log(size)$" % (latex_float(r2[1]), latex_float(r2[0])))
ri = stats.linregress(xl, np.array(vli[k], dtype=np.float32))
plt.semilogx(vn, vli[k], '-og', label=r"$L_{\infty}\approx %s+%s\log(size)$" % (latex_float(ri[1]), latex_float(ri[0])))
plt.semilogx(x, r2[1] + r2[0]*xl, "k-")
plt.semilogx(x, ri[1] + ri[0]*xl, "g-")
plt.title(k)
plt.grid(True)
plt.legend(loc='upper left')
plt.xlabel("N", loc='right')
i+=1
plt.suptitle("2D FFT errors (single precision, Bluestein) - " + device_name)
plt.tight_layout()
plt.figure()
ms = 3
clrs = {"fftw":'-og', "vkfft-cuda": "-^k", "vkfft-cuda-LUT":"-^b", "vkfft-opencl": "-vk", "vkfft-opencl-LUT":"-vb","cufft":"-or"}
for k,v in vl2.items():
plt.semilogx(vn, v, clrs[k], markersize=ms, label=k)
plt.legend(loc='upper left')
plt.suptitle("2D FFT L2 error (single precision, Bluestein) - " + device_name)
plt.grid(True)
plt.xlabel("N", loc='right')
plt.tight_layout()
[ ]: