Source code for pyfr.backends.openmp.generator

# -*- coding: utf-8 -*-

import re

from pyfr.backends.base.generator import BaseKernelGenerator
from pyfr.util import ndrange


[docs]class OpenMPKernelGenerator(BaseKernelGenerator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Specialise self._dims = ['_nx'] if self.ndim == 1 else ['_ny', '_nx']
[docs] def render(self): # Kernel spec spec = self._emit_spec() if self.ndim == 1: body = self._emit_body_1d() return ''' {spec} {{ #pragma omp parallel {{ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t); int cb, ce; loop_sched_1d(_nx, align, &cb, &ce); for (int _x = cb; _x < ce; _x++) {{ {body} }} }} }}'''.format(spec=spec, body=body) else: innerfn = self._emit_inner_func() innercall = self._emit_inner_call() return '''{innerfn} {spec} {{ #pragma omp parallel {{ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t); int rb, re, cb, ce; loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce); for (int _y = rb; _y < re; _y++) {{ {innercall} }} }} }}'''.format(innerfn=innerfn, spec=spec, innercall=innercall)
[docs] def _emit_inner_func(self): # Get the specification and body spec = self._emit_inner_spec() body = self._emit_body_2d() # Combine return '''{spec} {{ for (int _x = 0; _x < _nx; _x++) {{ {body} }} }}'''.format(spec=spec, body=body)
[docs] def _emit_inner_call(self): # Arguments for the inner function iargs = ['ce - cb'] iargs.extend(sa.name for sa in self.scalargs) for va in self.vectargs: iargs.extend(self._offset_arg_array_2d(va)) return '{0}_inner({1});'.format(self.name, ', '.join(iargs))
[docs] def _emit_inner_spec(self): # Inner dimension ikargs = ['int _nx'] # Add any scalar arguments ikargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs) # Vector arguments (always arrays as we're 2D) for va in self.vectargs: const = 'const' if va.intent == 'in' else '' stmt = '{0} {1.dtype} *__restrict__ {1.name}_v'.format(const, va) stmt = stmt.strip() if va.ncdim == 0: ikargs.append(stmt) else: for ij in ndrange(*va.cdims): ikargs.append(stmt + 'v'.join(str(n) for n in ij)) return ('static PYFR_NOINLINE void {0}_inner({1})' .format(self.name, ', '.join(ikargs)))
[docs] def _emit_spec(self): # We first need the argument list; starting with the dimensions kargs = ['int ' + d for d in self._dims] # Now add any scalar arguments kargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs) # Finally, add the vector arguments for va in self.vectargs: # Views if va.isview: kargs.append('{0.dtype}* __restrict__ {0.name}_v'.format(va)) kargs.append('const int* __restrict__ {0.name}_vix' .format(va)) if va.ncdim >= 1: kargs.append('const int* __restrict__ {0.name}_vcstri' .format(va)) if va.ncdim == 2: kargs.append('const int* __restrict__ {0.name}_vrstri' .format(va)) # Arrays else: # Intent in arguments should be marked constant const = 'const' if va.intent == 'in' else '' kargs.append('{0} {1.dtype}* __restrict__ {1.name}_v' .format(const, va).strip()) # If we are a matrix (ndim = 2) or a non-MPI stacked # vector then a leading (sub) dimension is required if self.ndim == 2 or (va.ncdim > 0 and not va.ismpi): kargs.append('int lsd{0.name}'.format(va)) return 'void {0}({1})'.format(self.name, ', '.join(kargs))
[docs] def _emit_body_1d(self): body = self.body ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]'] for va in self.vectargs: # Dereference the argument darg = self._deref_arg(va) # Substitute body = re.sub(ptns[va.ncdim].format(va.name), darg, body) return body
[docs] def _emit_body_2d(self): body = self.body ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]'] subs = ['{0}_v[_x]', r'{0}_v\1[_x]', r'{0}_v\1v\2[_x]'] for va in self.vectargs: body = re.sub(ptns[va.ncdim].format(va.name), subs[va.ncdim].format(va.name), body) return body
[docs] def _deref_arg(self, arg): if arg.isview: ptns = ['{0}_v[{0}_vix[_x]]', r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]', r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1' r' + {0}_vcstri[_x]*\2]'] return ptns[arg.ncdim].format(arg.name) else: # Leading (sub) dimension lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx' # Vector name_v[_x] if arg.ncdim == 0: ix = '_x' # Stacked vector; name_v[lsdim*\1 + _x] elif arg.ncdim == 1: ix = r'{0}*\1 + _x'.format(lsdim) # Doubly stacked vector; name_v[lsdim*nv*\1 + lsdim*\2 + _x] else: ix = r'{0}*{1}*\1 + {0}*\2 + _x'.format(lsdim, arg.cdims[1]) return '{0}_v[{1}]'.format(arg.name, ix)
[docs] def _offset_arg_array_2d(self, arg): stmts = [] # Matrix; name + _y*lsdim + cb if arg.ncdim == 0: stmts.append('{0}_v + _y*lsd{0} + cb'.format(arg.name)) # Stacked matrix; name + (_y*nv + <0>)*lsdim + cb elif arg.ncdim == 1: stmts.extend('{0}_v + (_y*{1} + {2})*lsd{0} + cb' .format(arg.name, arg.cdims[0], i) for i in range(arg.cdims[0])) # Doubly stacked matrix; name + ((<0>*_ny + _y)*nv + <1>)*lsdim + cb else: stmts.extend('{0}_v + (({1}*_ny + _y)*{2} + {3})*lsd{0} + cb' .format(arg.name, i, arg.cdims[1], j) for i, j in ndrange(*arg.cdims)) return stmts