Source code for pyfr.backends.cuda.generator

# -*- coding: utf-8 -*-

import re

from pyfr.backends.base.generator import BaseKernelGenerator


[docs]class CUDAKernelGenerator(BaseKernelGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Specialise if self.ndim == 1: self._dims = ['_nx'] self._limits = 'if (_x < _nx)' self._deref_arg_array = self._deref_arg_array_1d else: self._dims = ['_ny', '_nx'] self._limits = 'for (int _y = 0; _y < _ny && _x < _nx; ++_y)' self._deref_arg_array = self._deref_arg_array_2d
[docs] def render(self): # Get the kernel specification and main body spec = self._emit_spec() body = self._emit_body() # Iteration limits (if statement/for loop) limits = self._limits # Combine return '''{spec} {{ int _x = blockIdx.x*blockDim.x + threadIdx.x; {limits} {{ {body} }} }}'''.format(spec=spec, limits=limits, body=body)
[docs] def _emit_spec(self): # We first need the argument list; starting with the dimensions kargs = ['int ' + d for d in self._dims] # Now add any scalar arguments kargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs) # Finally, add the vector arguments for va in self.vectargs: # Views if va.isview: kargs.append('{0.dtype}* __restrict__ {0.name}_v'.format(va)) kargs.append('const int* __restrict__ {0.name}_vix' .format(va)) if va.ncdim >= 1: kargs.append('const int* __restrict__ {0.name}_vcstri' .format(va)) if va.ncdim == 2: kargs.append('const int* __restrict__ {0.name}_vrstri' .format(va)) # Arrays else: # Intent in arguments should be marked constant const = 'const' if va.intent == 'in' else '' kargs.append('{0} {1.dtype}* __restrict__ {1.name}_v' .format(const, va).strip()) # If we are a matrix (ndim = 2) or a non-MPI stacked # vector then a leading (sub) dimension is required if self.ndim == 2 or (va.ncdim > 0 and not va.ismpi): kargs.append('int lsd{0.name}'.format(va)) return '__global__ void {0}({1})'.format(self.name, ', '.join(kargs))
[docs] def _deref_arg_view(self, arg): ptns = ['{0}_v[{0}_vix[_x]]', r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]', r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1 + {0}_vcstri[_x]*\2]'] return ptns[arg.ncdim].format(arg.name)
[docs] def _deref_arg_array_1d(self, arg): # Leading (sub) dimension lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx' # Vector name_v[_x] if arg.ncdim == 0: ix = '_x' # Stacked vector; name_v[lsdim*\1 + _x] elif arg.ncdim == 1: ix = r'{0}*\1 + _x'.format(lsdim) # Doubly stacked vector; name_v[(nv*\1 + \2)*lsdim + _x] else: ix = r'({0}*\1 + \2)*{1} + _x'.format(arg.cdims[1], lsdim) return '{0}_v[{1}]'.format(arg.name, ix)
[docs] def _deref_arg_array_2d(self, arg): # Matrix name_v[lsdim*_y + _x] if arg.ncdim == 0: ix = 'lsd{}*_y + _x'.format(arg.name) # Stacked matrix; name_v[(_y*nv + \1)*lsdim + _x] elif arg.ncdim == 1: ix = r'(_y*{0} + \1)*lsd{1} + _x'.format(arg.cdims[0], arg.name) # Doubly stacked matrix; name_v[((\1*_ny + _y)*nv + \2)*lsdim + _x] else: ix = (r'((\1*_ny + _y)*{0} + \2)*lsd{1} + _x' .format(arg.cdims[1], arg.name)) return '{0}_v[{1}]'.format(arg.name, ix)
[docs] def _emit_body(self): body = self.body ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]'] for va in self.vectargs: # Dereference the argument if va.isview: darg = self._deref_arg_view(va) else: darg = self._deref_arg_array(va) # Substitute body = re.sub(ptns[va.ncdim].format(va.name), darg, body) return body