/* Copyright (C) 2003-2005 Peter J. Verveer
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met: 
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials provided
 *    with the distribution.
 *
 * 3. The name of the author may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.      
 */

#include "ni_support.h"

/* initialize iterations over single array elements: */
int NI_InitPointIterator(PyArrayObject *array, NI_Iterator *iterator)
{
  int ii, arank;

  assert(array != NULL);  
  assert(iterator != NULL);

  arank = NI_GetArrayRank(array);
  NI_GetArrayDimensions(array, iterator->dimensions);
  NI_GetArrayStrides(array, iterator->strides);
  iterator->rank_m1 = arank - 1;

  for(ii = 0; ii < arank; ii++) {
    /* adapt dimensions for use in the macros: */
    iterator->dimensions[ii] -= 1;
    /* initialize coordinates: */
    iterator->coordinates[ii] = 0;
    /* calculate the strides to move back at the end of an axis: */
    iterator->backstrides[ii] = 
      iterator->strides[ii] * iterator->dimensions[ii];
  }
  return 1;
}


/* initialize iteration over a lower sub-space: */
int NI_InitSubSpaceIterator(PyArrayObject *array, NI_Iterator *iterator, 
                            UInt32 axes)
{
  int ii, srank, arank, atype, adims[NI_MAXDIM], astrides[NI_MAXDIM];

  assert(array != NULL);
  assert(iterator != NULL);

  arank = NI_GetArrayRank(array);
  NI_GetArrayDimensions(array, adims);
  NI_GetArrayStrides(array, astrides);

  /* check rank */
  if (arank > 32) {
    PyErr_SetString(PyExc_RuntimeError, "array rank too high");
    return 0;
  }

  /* complex arrays not implemented, yet */
  atype = NI_GetArrayType(array);
  if (atype == tComplex32 || atype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    return 0;
  }

  /* Store array dimensions and strides, for specified axes: */
  srank = 0;
  for(ii = 0; ii < arank; ii++) {
    if (axes & (((UInt32)1) << ii)) {
      iterator->dimensions[srank] = adims[ii];
      iterator->strides[srank] = astrides[ii];
      ++srank;
    }
  }
  iterator->rank_m1 = srank - 1;

  for(ii = 0; ii < srank; ii++) {
    /* adapt dimensions for use in the macros: */
    iterator->dimensions[ii] -= 1;
    /* initialize coordinates: */
    iterator->coordinates[ii] = 0;
    /* calculate the strides to move back at the end of an axis: */
    iterator->backstrides[ii] = 
      iterator->strides[ii] * iterator->dimensions[ii];
  }

  return 1;
}

/* initialize iteration over array lines: */
int NI_InitLineIterator(PyArrayObject *array, NI_Iterator *iterator,
                        int axis)
{
  int arank, ii;
  UInt32 axes = 0;

  assert(array != NULL);
  assert(iterator != NULL);

  arank = NI_GetArrayRank(array);

  /* support negative axis specification: */
  if (axis < 0)
    axis += arank;

  /* check for valid axis specification: */
  if (axis < 0 || axis >= arank || axis >= 32) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    return 0;
  }
  for(ii = 0; ii < arank; ii++)
    if (ii != axis)
      axes |= ((UInt32)1) << ii;

  return NI_InitSubSpaceIterator(array, iterator, axes);
}


/******************************************************************/
/* Line buffers */
/******************************************************************/

/* Allocate line buffer data */
int NI_AllocateLineBuffer(PyArrayObject* array, int axis, int size1, 
                     int size2, int *lines, int max_size, double **buffer)
{
  int line_size, max_lines, arank, atype, adims[NI_MAXDIM];

  assert(array != NULL);
  assert(buffer != NULL && *buffer == NULL);
  assert(lines != NULL);

  arank = NI_GetArrayRank(array);
  NI_GetArrayDimensions(array, adims);

  /* check the specified axis: */
  if (axis < 0 || axis >= arank) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    return 0;
  }

  /* complex arrays not implemented, yet */
  atype = NI_GetArrayType(array);
  if (atype == tComplex32 || atype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    return 0;
  }

  /* the number of lines of the array is an upper limit for the
     number of lines in the buffer: */
  max_lines = NI_GetArraySize(array);
  if (arank > 0 && adims[axis] > 0)
    max_lines /= adims[axis];
  
  /* calculate the space needed for one line, including space to
     support the boundary conditions: */
  line_size = sizeof(double) * (adims[axis] + size1 + size2);

  /* if *lines < 1, no number of lines is proposed, so we calculate it
     from the maximum size allowed: */
  if (*lines < 1) {
    *lines = line_size > 0 ? max_size / line_size : 0;
    if (*lines < 1) 
      *lines = 1;
  }
  /* no need to allocate too many lines: */
  if (*lines > max_lines)
    *lines = max_lines;

  /* allocate data for the buffer: */
  *buffer = (double*)malloc(*lines * line_size);
  if (!*buffer) {
    PyErr_NoMemory();
    return 0;
  }

  return 1;
}

/* Initialize a line buffer */
int NI_InitLineBuffer(PyArrayObject *array, int axis, int size1, 
    int size2, int buffer_lines, double *buffer_data,
    NI_ExtendMode extend_mode, double extend_value, NI_LineBuffer *buffer)
{
  int line_length = 0, array_lines = 0, size, arank, atype;
  int adims[NI_MAXDIM], astrides[NI_MAXDIM];

  assert(array != NULL);
  assert(buffer != NULL);
  assert(buffer_data != NULL);

  NI_GetArrayDimensions(array, adims);
  NI_GetArrayStrides(array, astrides);

  /* complex arrays not implemented, yet */
  atype = NI_GetArrayType(array);
  if (atype == tComplex32 || atype == tComplex64) {
    PyErr_SetString(PyExc_RuntimeError, "complex arrays not supported");
    return 0;
  }

  /* check the specified axis: */
  arank = NI_GetArrayRank(array);
  if (arank > 0 && (axis < 0 || axis >= arank)) {
    PyErr_SetString(PyExc_RuntimeError, "invalid axis specified");
    return 0;
  }

  size = NI_GetArraySize(array);
  /* check if the buffer is big enough: */
  if (size > 0 && buffer_lines < 1) {
    PyErr_SetString(PyExc_RuntimeError, "buffer too small");
    return 0;
  }
    
  /* Initialize a line iterator to move over the array: */
  if (!NI_InitLineIterator(array, &(buffer->iterator), axis))
    return 0;

  line_length = arank > 0 ? adims[axis] : 1;
  if (line_length > 0)
    array_lines = line_length > 0 ? size / line_length : 1;

  /* initialize the buffer structure: */
  buffer->array_data = NI_GetArrayData(array);
  buffer->buffer_data = buffer_data;
  buffer->buffer_lines = buffer_lines;
  buffer->array_type = atype;
  buffer->array_lines = array_lines;
  buffer->next_line = 0;
  buffer->size1 = size1;
  buffer->size2 = size2;
  buffer->line_length = line_length;
  buffer->line_stride = arank > 0 ? astrides[axis] : 0;
  buffer->extend_mode = extend_mode;
  buffer->extend_value = extend_value;

  return 1;
}

/* Extend a line in memory to implement boundary conditions: */
int NI_ExtendLine(double *line, int length, int size1, 
                  int size2, NI_ExtendMode mode, double constant_value)
{
  int ii, jj, length1, nextend, rextend;
  double *l1, *l2, *l3, val;

  assert(line != NULL);
  
  switch (mode) {
  case NI_EXTEND_WRAP:
    nextend = size1 / length;
    rextend = size1 - nextend * length;
    l1 = line + size1 + length - rextend;
    l2 = line;
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1++;
    for(ii = 0; ii < nextend; ii++) {                                        
      l1 = line + size1;
      for(jj = 0; jj < length; jj++)
        *l2++ = *l1++;
    }                                                                        
    nextend = size2 / length;                        
    rextend = size2 - nextend * length;                
    l1 = line + size1;                
    l2 = line + size1 + length;        
    for(ii = 0; ii < nextend; ii++) {                        
      l3 = l1;                                        
      for(jj = 0; jj < length; jj++)
        *l2++ = *l3++;
    }                                                        
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1++;
    break;
  case NI_EXTEND_MIRROR:
    if (length == 1) {                
      l1 = line;
      val = line[size1];
      for(ii = 0; ii < size1; ii++)
        *l1++ = val;
      l1 = line + size1 + length;
      val = line[size1 + length - 1];
      for(ii = 0; ii < size2; ii++)
        *l1++ = val;
    } else {                                                                   
      length1 = length - 1;                                
      nextend = size1 / length1;                        
      rextend = size1 - nextend * length1;                
      l1 = line + size1 + 1;        
      l2 = l1 - 2;                        
      for(ii = 0; ii < nextend; ii++) {                        
        l3 = l1;                                        
        for(jj = 0; jj < length1; jj++)
          *l2-- = *l3++;
        l1 -= length1;                                
      }                                                        
      for(ii = 0; ii < rextend; ii++)
        *l2-- = *l1++;
      nextend = size2 / length1;                                
      rextend = size2 - nextend * length1;                        
      l1 = line + size1 + length1 - 1;        
      l2 = l1 + 2;                                
      for(ii = 0; ii < nextend; ii++) {                                
        l3 = l1;                                                
        for(jj = 0; jj < length1; jj++)                                
          *l2++ = *l3--;
        l1 += length1;                                        
      }                                                                
      for(ii = 0; ii < rextend; ii++)
        *l2++ = *l1--;
    }
    break;
  case NI_EXTEND_REFLECT:
    nextend = size1 / length;                
    rextend = size1 - nextend * length;        
    l1 = line + size1;        
    l2 = l1 - 1;                        
    for(ii = 0; ii < nextend; ii++) {                
      l3 = l1;                                        
      for(jj = 0; jj < length; jj++)
        *l2-- = *l3++;
      l1 -= length;                        
    }                                                
    l3 = l1;                                        
    for(ii = 0; ii < rextend; ii++)
      *l2-- = *l3++;
    nextend = size2 / length;                                
    rextend = size2 - nextend * length;                        
    l1 = line + size1 + length - 1;        
    l2 = l1 + 1;                                        
    for(ii = 0; ii < nextend; ii++) {                                
      l3 = l1;                                                
      for(jj = 0; jj < length; jj++)
        *l2++ = *l3--;
      l1 += length;                                        
    }                                                                
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1--;
    break;
  case NI_EXTEND_NEAREST:
    l1 = line;
    val = line[size1];
    for(ii = 0; ii < size1; ii++)
      *l1++ = val;                                        
    l1 = line + size1 + length;                
    val = line[size1 + length - 1];
    for(ii = 0; ii < size2; ii++)                                
      *l1++ = val;
    break;
  case NI_EXTEND_CONSTANT:
    l1 = line;
    for(ii = 0; ii < size1; ii++)
      *l1++ = constant_value;                                        
    l1 = line + size1 + length;                
    for(ii = 0; ii < size2; ii++)                                
      *l1++ = constant_value;
    break;
  default:
    PyErr_SetString(PyExc_RuntimeError, "mode not supported");
    return 0;
  }
  return 1;
}


#define COPY_DATA_TO_LINE(pi, po, length, stride, type)    \
  {                                                        \
    int _ii;                                               \
    for(_ii = 0; _ii < length; _ii++) {                    \
      po[_ii] = (double)*(type*)pi;                        \
      pi += stride;                                        \
    }                                                      \
  }


/* Copy a line from an array to a buffer: */
int NI_ArrayToLineBuffer(NI_LineBuffer *buffer, int *number_of_lines, 
                         int *more)
{
  double *pb = buffer->buffer_data;
  char *pa;
  int length = buffer->line_length;

  assert(buffer != NULL);
  assert(more != NULL);

  pb += buffer->size1;

  *number_of_lines = 0;

  /* fill until all lines in the array have been processed, or until
     the buffer is full: */
  while (buffer->next_line < buffer->array_lines &&
         *number_of_lines < buffer->buffer_lines) {

    pa = buffer->array_data;
    /* copy the data from the array to the buffer: */
    switch (buffer->array_type) {
    case tBool:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Bool);
      break;
    case tUInt8:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt8);
      break;
    case tUInt16:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt16);
      break;
    case tUInt32:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt32);
      break;
#if HAS_UINT64
    case tUInt64:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt64);
      break;
#endif
    case tInt8:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int8);
      break;
    case tInt16:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int16);
      break;
    case tInt32:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int32);
      break;
    case tInt64:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int64);
      break;
    case tFloat32:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Float32);
      break;
    case tFloat64:
      COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Float64);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "array type not supported");
      return 0;
    }

    /* goto next line in the array: */
    NI_ITERATOR_NEXT(buffer->iterator, buffer->array_data);

    /* implement boundary conditions to the line: */
    if (buffer->size1 + buffer->size2 > 0)
      if (!NI_ExtendLine(pb - buffer->size1, length, buffer->size1, 
                         buffer->size2, buffer->extend_mode, 
                         buffer->extend_value))
        return 0;

    /* The number of the array lines copied: */
    ++(buffer->next_line);

    /* keep track of (and return) the number of lines in the buffer: */
    ++(*number_of_lines);  
 
    pb += buffer->line_length + buffer->size1 + buffer->size2;
  }

  /* if not all array lines were processed, *more is set true: */
  *more = buffer->next_line < buffer->array_lines;

  return 1;
}

#define COPY_LINE_TO_DATA(pi, po, length, stride, type)    \
  {                                                        \
    int _ii;                                               \
    for(_ii = 0; _ii < length; _ii++) {                    \
      *(type*)po = (type)pi[_ii];                          \
      po += stride;                                        \
    }                                                      \
  }

/* Copy a line from a buffer to an array: */
int NI_LineBufferToArray(NI_LineBuffer *buffer)
{
  double *pb = buffer->buffer_data;
  char *pa;
  int jj, length = buffer->line_length;

  assert(buffer != NULL);

  pb += buffer->size1;

  for(jj = 0; jj < buffer->buffer_lines; jj++) {
    /* if all array lines are copied return: */
    if (buffer->next_line == buffer->array_lines)
      break;

    pa = buffer->array_data;
    /* copy data from the buffer to the array: */
    switch (buffer->array_type) {
    case tBool:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Bool);
      break;
    case tUInt8:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt8);
      break;
    case tUInt16:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt16);
      break;
    case tUInt32:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt32);
      break;
#if HAS_UINT64
    case tUInt64:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt64);
      break;
#endif
    case tInt8:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int8);
      break;
    case tInt16:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int16);
      break;
    case tInt32:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int32);
      break;
    case tInt64:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int64);
      break;
    case tFloat32:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Float32);
      break;
    case tFloat64:
      COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Float64);
      break;
    default:
      PyErr_SetString(PyExc_RuntimeError, "array type not supported");
      return 0;
    }

    /* move to the next line in the array: */
    NI_ITERATOR_NEXT(buffer->iterator, buffer->array_data);

    /* number of lines copied: */
    ++(buffer->next_line);

    /* move the buffer data pointer to the next line: */
    pb += buffer->line_length + buffer->size1 + buffer->size2;
  }

  return 1;
}

/******************************************************************/
/* Multi-dimensional filter support functions */
/******************************************************************/

/* Initialize a filter iterator: */
int NI_InitFilterIterator(int rank, int *filter_shape, int size, 
                          int *array_shape, int *shifts,
                          NI_FilterIterator *iterator)
{
  int ii;

  assert(array_shape != NULL);
  assert(filter_shape != NULL);
  assert(shifts != NULL);
  assert(iterator != NULL);

  /* check filter shape */
  for(ii = 0; ii < rank; ii++)
    if (filter_shape[ii] < 0) {
      PyErr_SetString(PyExc_RuntimeError, "filter shape must be >= 0");
      return 0;
    }

  /* calculate the strides, used to move the offsets pointer through
     the offsets table: */
  if (rank > 0) {
    iterator->strides[rank - 1] = size;
    for(ii = rank - 2; ii >= 0; ii--) {
      int step = array_shape[ii + 1] < filter_shape[ii + 1] ?
        array_shape[ii + 1] : filter_shape[ii + 1];
      iterator->strides[ii] =  iterator->strides[ii + 1] * step;
    }
  }
  
  /* check the shift: */
  for(ii = 0; ii < rank; ii++) {
    int shft = filter_shape[ii] / 2 + shifts[ii];
    if (shft < 0 || (shft > 0 && shft >= filter_shape[ii])) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "shift not within filter extent");
      return 0;
    }
  }

  for(ii = 0; ii < rank; ii++) {
    int step = array_shape[ii] < filter_shape[ii] ?
      array_shape[ii] : filter_shape[ii];
    int shft = filter_shape[ii] / 2 + shifts[ii];
    /* stride for stepping back to previous offsets: */
    iterator->backstrides[ii] = (step - 1) * iterator->strides[ii];
    /* initialize boundary extension sizes: */
    iterator->bound1[ii] = shft;
    iterator->bound2[ii] = array_shape[ii] - filter_shape[ii] + shft;
  }

  return 1;
}

/* Calculate the offsets to the filter points, for all border regions and
   the interior of the array: */
int NI_InitFilterOffsets(PyArrayObject *array,
        Bool *pm, int *fdims, int* shifts, NI_ExtendMode extend_mode,
        int **offsets, int *border_flag_value, int **coordinate_offsets)
{
  int filter_size = 1, footprint_size = 0, offsets_size = 1, ii, jj, kk;
  int shape[NI_MAXDIM], coordinates[NI_MAXDIM], position[NI_MAXDIM];
  int max_size = 0, max_stride = 0, *po, *pc = NULL;
  int rank, dimensions[NI_MAXDIM], strides[NI_MAXDIM];
  
  assert(dimensions != NULL);
  assert(strides != NULL);
  assert(shifts != NULL);
  assert(offsets != NULL && *offsets == NULL);
  assert(border_flag_value != NULL);

  rank = NI_GetArrayRank(array);
  NI_GetArrayDimensions(array, dimensions);
  NI_GetArrayStrides(array, strides);

  /* check footprint shape: */
  for(ii = 0; ii < rank; ii++) {
    shape[ii] = fdims[ii];
    if (shape[ii] < 0) {
      PyErr_SetString(PyExc_RuntimeError, "filter shape must be >= 0");
      *border_flag_value = 1;
      goto exit;
    }
  }
  /* the size of the footprint array: */
  for(ii = 0; ii < rank; ii++)
    filter_size *= fdims[ii];

  /* calculate the number of non-zero elements in the footprint: */
  for(ii = 0; ii < filter_size; ii++)
    if (pm[ii])
      ++footprint_size;

  /* check the filter shift: */
  for(ii = 0; ii < rank; ii++) {
    int shft = shape[ii] / 2 + shifts[ii];
    if (shft < 0 || (shft > 0 && shft >= shape[ii])) {
      PyErr_SetString(PyExc_RuntimeError, 
                      "shift not within filter extent");
      goto exit;
    }
  }

  /* calculate how many sets of offsets must be stored: */
  for(ii = 0; ii < rank; ii++)
    offsets_size *= 
      (dimensions[ii] < shape[ii] ? dimensions[ii] : shape[ii]);
  /* allocate offsets data: */
  *offsets = (int*)malloc(offsets_size * footprint_size * sizeof(int));
  if (!*offsets) {
    PyErr_NoMemory();
    goto exit;
  }
  if (coordinate_offsets) {
    *coordinate_offsets = (int*)malloc(offsets_size * rank * 
                                       footprint_size * sizeof(int));
    if (!*coordinate_offsets) {
      PyErr_NoMemory();
      goto exit;
    }
  }

  for(ii = 0; ii < rank; ii++) {
    int stride;
    /* find maximum axis size: */
    if (dimensions[ii] > max_size)
      max_size = dimensions[ii];
    /* find maximum stride: */
    stride = strides[ii] < 0 ? -strides[ii] : strides[ii];
    if (stride > max_stride)
      max_stride = stride;
    /* coordinates for iterating over the kernel elements: */
    coordinates[ii] = 0;
    /* keep track of the kernel position: */
    position[ii] = 0;
  }
  /* the flag to indicate that we are outside the border must have a 
     value that is larger than any possible offset: */
  *border_flag_value = max_size * max_stride + 1;

  /* calculate all possible offsets to elements in the filter kernel, 
     for all regions in the array (interior and border regions): */
  po = *offsets;
  if (coordinate_offsets) {
    pc = *coordinate_offsets;
  }
  /* iterate over all regions: */
  for(jj = 0; jj < offsets_size; jj++) {
    /* iterate over the elements in the footprint array: */
    for(kk = 0; kk < filter_size; kk++) {
      int offset = 0;
      /* only calculate an offset if the footprint is 1: */
      if (pm[kk]) { 
        /* find offsets along all axes: */
        for(ii = 0; ii < rank; ii++) {
          int shft = shape[ii] / 2 + shifts[ii];
          int cc = coordinates[ii] - shft + position[ii];
          int len = dimensions[ii];
          /* apply boundary conditions, if necessary: */
          switch (extend_mode) {
          case NI_EXTEND_MIRROR:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len - 2;
                cc = sz2 * (int)(-cc / sz2) + cc;
                cc = cc <= 1 - len ? cc + sz2 : -cc;
              }
            } else if (cc >= len) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len - 2;
                cc -= sz2 * (int)(cc / sz2);
                if (cc >= len)
                  cc = sz2 - cc;
              }
            }
            break;
          case NI_EXTEND_REFLECT:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len;
                if (cc < -sz2)
                  cc = sz2 * (int)(-cc / sz2) + cc;
                cc = cc < -len ? cc + sz2 : -cc - 1;
              }
            } else if (cc >= len) {
              if (len <= 1) {cc = 0;
              } else {
                int sz2 = 2 * len;
                cc -= sz2 * (int)(cc / sz2);
                if (cc >= len)
                  cc = sz2 - cc - 1;
              }
            }
            break;
          case NI_EXTEND_WRAP:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz = len;
                cc += sz * (int)(-cc / sz); 
                if (cc < 0)
                  cc += sz;
              }
            } else if (cc >= len) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz = len;
                cc -= sz * (int)(cc / sz); 
              }
            }
            break;
          case NI_EXTEND_NEAREST:
            if (cc < 0) {
              cc = 0;
            } else if (cc >= len) {
              cc = len - 1;
            }
            break;
          case NI_EXTEND_CONSTANT:
            if (cc < 0 || cc >= len)
              cc = *border_flag_value;
            break;
          default:
            PyErr_SetString(PyExc_RuntimeError, "mode not supported");
            goto exit;
          }

          /* calculate offset along current axis: */
          if (cc == *border_flag_value) {
            /* just flag that we are outside the border */
            offset = *border_flag_value;
            if (coordinate_offsets)
              pc[ii] = 0;
            break;
          } else {
            /* use an offset that is possibly mapped from outside the
               border: */
            cc = cc - position[ii];
            offset += strides[ii] * cc;
            if (coordinate_offsets)
              pc[ii] = cc;
          }
        }
        /* store the offset */
        *po++ = offset;
        if (coordinate_offsets)
          pc += rank;
      }
      /* next point in the filter: */
      for(ii = rank - 1; ii >= 0; ii--) {
        if (coordinates[ii] < shape[ii] - 1) {
          coordinates[ii]++;
          break;
        } else {
          coordinates[ii] = 0;
        }
      }
    }
    
    /* move to the next array region: */
    for(ii = rank - 1; ii >= 0; ii--) {
      int shft = shape[ii] / 2 + shifts[ii];
      if (position[ii] == shft) {
        position[ii] += dimensions[ii] - shape[ii] + 1;
        if (position[ii] <= shft)
          position[ii] = shft + 1;
      } else {
        position[ii]++;
      }
      if (position[ii] < dimensions[ii]) {
        break;
      } else {
        position[ii] = 0;
      }
    }
  }
  
 exit:
  if (PyErr_Occurred()) {
    if (*offsets)
      free(*offsets);
    if (coordinate_offsets && *coordinate_offsets)
      free(*coordinate_offsets);
    return 0;
  } else {
    return 1;
  }
}


NI_CoordinateList* NI_InitCoordinateList(int size, int rank)
{
  NI_CoordinateList *list = \
    (NI_CoordinateList*)malloc(sizeof(NI_CoordinateList));
  if (!list) {
    PyErr_NoMemory();
    return NULL;
  }
  list->block_size = size;
  list->rank = rank;
  list->blocks = NULL;
  return list;
}

int NI_CoordinateListStealBlocks(NI_CoordinateList *list1, 
                                 NI_CoordinateList *list2)
{
  if (list1->block_size != list2->block_size ||
      list1->rank != list2->rank) {
    PyErr_SetString(PyExc_RuntimeError, "coordinate lists not compatible");
    return 1;
  }
  if (list1->blocks) {
    PyErr_SetString(PyExc_RuntimeError, "first is list not empty");
    return 1;
  }
  list1->blocks = list2->blocks;
  list2->blocks = NULL;
  return 0;
}

NI_CoordinateBlock* NI_CoordinateListAddBlock(NI_CoordinateList *list)
{
  NI_CoordinateBlock* block = NULL;
  block = (NI_CoordinateBlock*)malloc(sizeof(NI_CoordinateBlock));
  if (!block) {
    PyErr_NoMemory();
    goto exit;
  }
  block->coordinates = (int*)malloc(list->block_size * list->rank * 
                                    sizeof(int));
  if (!block->coordinates) {
    PyErr_NoMemory();
    goto exit;
  }
  block->next = list->blocks;
  list->blocks = block;
  block->size = 0;
  
exit:
  if (PyErr_Occurred()) {
    if (block)
      free(block);
    return NULL;
  }
  return block;
}

NI_CoordinateBlock* NI_CoordinateListDeleteBlock(NI_CoordinateList *list)
{
  NI_CoordinateBlock* block = list->blocks;
  if (block) {
    list->blocks = block->next;
    if (block->coordinates) 
      free(block->coordinates);
    free(block);
  }
  return list->blocks;
}

void NI_FreeCoordinateList(NI_CoordinateList *list)
{
  if (list) {
    NI_CoordinateBlock *block = list->blocks;
    while (block) {
      NI_CoordinateBlock *tmp = block;
      block = block->next;
      if (tmp->coordinates)
        free(tmp->coordinates);
      free(tmp);
    }
    list->blocks = NULL;
    free(list);
  }
}

/*********************************************************************/
/* new stuff */
/*********************************************************************/

/* initialize iterations over single array elements: */
int NI_InitPointIterator2(PyArrayObject *array, NI_Iterator2 *iterator)
{
  int ii;

  iterator->rank_m1 = array->nd - 1;
  for(ii = 0; ii < array->nd; ii++) {
    /* adapt dimensions for use in the macros: */
    iterator->dimensions[ii] = array->dimensions[ii] - 1;
    /* initialize coordinates: */
    iterator->coordinates[ii] = 0;
    /* initialize strides: */
    iterator->strides[ii] = array->strides[ii];
    /* calculate the strides to move back at the end of an axis: */
    iterator->backstrides[ii] = 
        array->strides[ii] * iterator->dimensions[ii];
  }
  return 1;
}


/* initialize iteration over a lower sub-space: */
int NI_SubspaceIterator2(NI_Iterator2 *iterator, UInt32 axes)
{
  int ii, last = 0;
  
  for(ii = 0; ii <= iterator->rank_m1; ii++) {
    if (axes & (((UInt32)1) << ii)) {
      if (last != ii) {
        iterator->dimensions[last] = iterator->dimensions[ii];
        iterator->strides[last] = iterator->strides[ii];
        iterator->backstrides[last] = iterator->backstrides[ii];
      }
      ++last;
    }
  }
  iterator->rank_m1 = last - 1;
  return 1;
}

/* initialize iteration over array lines: */
int NI_LineIterator2(NI_Iterator2 *iterator, int axis)
{
  int ii, rank = iterator->rank_m1 + 1;
  UInt32 axes = 0;

  for(ii = 0; ii < rank; ii++)
    if (ii != axis)
      axes |= ((UInt32)1) << ii;
  return NI_SubspaceIterator2(iterator, axes);
}


/******************************************************************/
/* Line buffers */
/******************************************************************/

/* Allocate line buffer data */
int NI_AllocateLineBuffer2(PyArrayObject* array, int axis, maybelong size1, 
    maybelong size2, maybelong *lines, maybelong max_size, double **buffer)
{
  maybelong line_size, max_lines;

  /* the number of lines of the array is an upper limit for the
     number of lines in the buffer: */
  max_lines = NI_GetArraySize(array);
  if (array->nd > 0 && array->dimensions[axis] > 0)
    max_lines /= array->dimensions[axis];
  /* calculate the space needed for one line, including space to
     support the boundary conditions: */
  line_size = sizeof(double) * (array->dimensions[axis] + size1 + size2);
  /* if *lines < 1, no number of lines is proposed, so we calculate it
     from the maximum size allowed: */
  if (*lines < 1) {
    *lines = line_size > 0 ? max_size / line_size : 0;
    if (*lines < 1) 
      *lines = 1;
  }
  /* no need to allocate too many lines: */
  if (*lines > max_lines)
    *lines = max_lines;
  /* allocate data for the buffer: */
  *buffer = (double*)malloc(*lines * line_size);
  if (!*buffer) {
    PyErr_NoMemory();
    return 0;
  }
  return 1;
}

/* Initialize a line buffer */
int NI_InitLineBuffer2(PyArrayObject *array, int axis, maybelong size1, 
    maybelong size2, maybelong buffer_lines, double *buffer_data,
    NI_ExtendMode extend_mode, double extend_value, NI_LineBuffer2 *buffer)
{
  maybelong line_length = 0, array_lines = 0, size;

  size = NI_GetArraySize(array);
  /* check if the buffer is big enough: */
  if (size > 0 && buffer_lines < 1) {
    PyErr_SetString(PyExc_RuntimeError, "buffer too small");
    return 0;
  }
  /* Initialize a line iterator to move over the array: */
  if (!NI_InitPointIterator2(array, &(buffer->iterator)))
    return 0;
  if (!NI_LineIterator2(&(buffer->iterator), axis))
    return 0;
  line_length = array->nd > 0 ? array->dimensions[axis] : 1;
  if (line_length > 0)
    array_lines = line_length > 0 ? size / line_length : 1;
  /* initialize the buffer structure: */
  buffer->array_data = NA_OFFSETDATA(array);
  buffer->buffer_data = buffer_data;
  buffer->buffer_lines = buffer_lines;
  buffer->array_type = array->descr->type_num;
  buffer->array_lines = array_lines;
  buffer->next_line = 0;
  buffer->size1 = size1;
  buffer->size2 = size2;
  buffer->line_length = line_length;
  buffer->line_stride = array->nd > 0 ? array->strides[axis] : 0;
  buffer->extend_mode = extend_mode;
  buffer->extend_value = extend_value;
  return 1;
}

/* Extend a line in memory to implement boundary conditions: */
int NI_ExtendLine2(double *line, maybelong length, maybelong size1, 
                maybelong size2, NI_ExtendMode mode, double constant_value)
{
  maybelong ii, jj, length1, nextend, rextend;
  double *l1, *l2, *l3, val;

  switch (mode) {
  case NI_EXTEND_WRAP:
    nextend = size1 / length;
    rextend = size1 - nextend * length;
    l1 = line + size1 + length - rextend;
    l2 = line;
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1++;
    for(ii = 0; ii < nextend; ii++) {                                        
      l1 = line + size1;
      for(jj = 0; jj < length; jj++)
        *l2++ = *l1++;
    }                                                                        
    nextend = size2 / length;                        
    rextend = size2 - nextend * length;                
    l1 = line + size1;                
    l2 = line + size1 + length;        
    for(ii = 0; ii < nextend; ii++) {                        
      l3 = l1;                                        
      for(jj = 0; jj < length; jj++)
        *l2++ = *l3++;
    }                                                        
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1++;
    break;
  case NI_EXTEND_MIRROR:
    if (length == 1) {                
      l1 = line;
      val = line[size1];
      for(ii = 0; ii < size1; ii++)
        *l1++ = val;
      l1 = line + size1 + length;
      val = line[size1 + length - 1];
      for(ii = 0; ii < size2; ii++)
        *l1++ = val;
    } else {                                                                   
      length1 = length - 1;                                
      nextend = size1 / length1;                        
      rextend = size1 - nextend * length1;                
      l1 = line + size1 + 1;        
      l2 = l1 - 2;                        
      for(ii = 0; ii < nextend; ii++) {                        
        l3 = l1;                                        
        for(jj = 0; jj < length1; jj++)
          *l2-- = *l3++;
        l1 -= length1;                                
      }                                                        
      for(ii = 0; ii < rextend; ii++)
        *l2-- = *l1++;
      nextend = size2 / length1;                                
      rextend = size2 - nextend * length1;                        
      l1 = line + size1 + length1 - 1;        
      l2 = l1 + 2;                                
      for(ii = 0; ii < nextend; ii++) {                                
        l3 = l1;                                                
        for(jj = 0; jj < length1; jj++)                                
          *l2++ = *l3--;
        l1 += length1;                                        
      }                                                                
      for(ii = 0; ii < rextend; ii++)
        *l2++ = *l1--;
    }
    break;
  case NI_EXTEND_REFLECT:
    nextend = size1 / length;                
    rextend = size1 - nextend * length;        
    l1 = line + size1;        
    l2 = l1 - 1;                        
    for(ii = 0; ii < nextend; ii++) {                
      l3 = l1;                                        
      for(jj = 0; jj < length; jj++)
        *l2-- = *l3++;
      l1 -= length;                        
    }                                                
    l3 = l1;                                        
    for(ii = 0; ii < rextend; ii++)
      *l2-- = *l3++;
    nextend = size2 / length;                                
    rextend = size2 - nextend * length;                        
    l1 = line + size1 + length - 1;        
    l2 = l1 + 1;                                        
    for(ii = 0; ii < nextend; ii++) {                                
      l3 = l1;                                                
      for(jj = 0; jj < length; jj++)
        *l2++ = *l3--;
      l1 += length;                                        
    }                                                                
    for(ii = 0; ii < rextend; ii++)
      *l2++ = *l1--;
    break;
  case NI_EXTEND_NEAREST:
    l1 = line;
    val = line[size1];
    for(ii = 0; ii < size1; ii++)
      *l1++ = val;                                        
    l1 = line + size1 + length;                
    val = line[size1 + length - 1];
    for(ii = 0; ii < size2; ii++)                                
      *l1++ = val;
    break;
  case NI_EXTEND_CONSTANT:
    l1 = line;
    for(ii = 0; ii < size1; ii++)
      *l1++ = constant_value;                                        
    l1 = line + size1 + length;                
    for(ii = 0; ii < size2; ii++)                                
      *l1++ = constant_value;
    break;
  default:
    PyErr_SetString(PyExc_RuntimeError, "mode not supported");
    return 0;
  }
  return 1;
}


#define CASE_COPY_DATA_TO_LINE(_pi, _po, _length, _stride, _type) \
case t ## _type:                                                  \
{                                                                 \
  maybelong _ii;                                                  \
  for(_ii = 0; _ii < _length; _ii++) {                            \
    _po[_ii] = (double)*(_type*)_pi;                              \
    _pi += _stride;                                               \
  }                                                               \
}                                                                 \
break                                                   


/* Copy a line from an array to a buffer: */
int NI_ArrayToLineBuffer2(NI_LineBuffer2 *buffer, maybelong *number_of_lines, 
                         int *more)
{
  double *pb = buffer->buffer_data;
  char *pa;
  maybelong length = buffer->line_length;

  pb += buffer->size1;
  *number_of_lines = 0;
  /* fill until all lines in the array have been processed, or until
     the buffer is full: */
  while (buffer->next_line < buffer->array_lines &&
         *number_of_lines < buffer->buffer_lines) {
    pa = buffer->array_data;
    /* copy the data from the array to the buffer: */
    switch (buffer->array_type) {
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Bool);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt8);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt16);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt32);
#if HAS_UINT64
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, UInt64);
#endif
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int8);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int16);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int32);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Int64);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Float32);
      CASE_COPY_DATA_TO_LINE(pa, pb, length, buffer->line_stride, Float64);
    default:
      PyErr_SetString(PyExc_RuntimeError, "array type not supported");
      return 0;
    }
    /* goto next line in the array: */
    NI_ITERATOR_NEXT(buffer->iterator, buffer->array_data);
    /* implement boundary conditions to the line: */
    if (buffer->size1 + buffer->size2 > 0)
      if (!NI_ExtendLine2(pb - buffer->size1, length, buffer->size1, 
                         buffer->size2, buffer->extend_mode, 
                         buffer->extend_value))
        return 0;
    /* The number of the array lines copied: */
    ++(buffer->next_line);
    /* keep track of (and return) the number of lines in the buffer: */
    ++(*number_of_lines);  
    pb += buffer->line_length + buffer->size1 + buffer->size2;
  }
  /* if not all array lines were processed, *more is set true: */
  *more = buffer->next_line < buffer->array_lines;
  return 1;
}

#define CASE_COPY_LINE_TO_DATA(_pi, _po, _length, _stride, _type) \
case t ## _type:                                                  \
{                                                                 \
  maybelong _ii;                                                  \
  for(_ii = 0; _ii < _length; _ii++) {                            \
    *(_type*)_po = (_type)_pi[_ii];                               \
    _po += _stride;                                               \
  }                                                               \
}                                                                 \
break

/* Copy a line from a buffer to an array: */
int NI_LineBufferToArray2(NI_LineBuffer2 *buffer)
{
  double *pb = buffer->buffer_data;
  char *pa;
  maybelong jj, length = buffer->line_length;

  pb += buffer->size1;
  for(jj = 0; jj < buffer->buffer_lines; jj++) {
    /* if all array lines are copied return: */
    if (buffer->next_line == buffer->array_lines)
      break;
    pa = buffer->array_data;
    /* copy data from the buffer to the array: */
    switch (buffer->array_type) {
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Bool);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt8);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt16);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt32);
#if HAS_UINT64
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, UInt64);
#endif
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int8);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int16);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int32);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Int64);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Float32);
      CASE_COPY_LINE_TO_DATA(pb, pa, length, buffer->line_stride, Float64);
    default:
      PyErr_SetString(PyExc_RuntimeError, "array type not supported");
      return 0;
    }
    /* move to the next line in the array: */
    NI_ITERATOR_NEXT(buffer->iterator, buffer->array_data);
    /* number of lines copied: */
    ++(buffer->next_line);
    /* move the buffer data pointer to the next line: */
    pb += buffer->line_length + buffer->size1 + buffer->size2;
  }
  return 1;
}

/******************************************************************/
/* Multi-dimensional filter support functions */
/******************************************************************/

/* Initialize a filter iterator: */
int 
NI_InitFilterIterator2(int rank, maybelong *filter_shape,
          maybelong filter_size, maybelong *array_shape, UInt32 frame,
          maybelong *origins, NI_FilterIterator2 *iterator)
{
  int ii;
  maybelong fshape[MAXDIM], forigins[MAXDIM];

  for(ii = 0; ii < rank; ii++) {
    if (!((frame >> ii) & (UInt32)1)) {
      fshape[ii] = *filter_shape++;
      forigins[ii] = origins ? *origins++ : 0.0;
    } else {
      fshape[ii] = 1;
      forigins[ii] = 0;
    }
  }
  /* calculate the strides, used to move the offsets pointer through
     the offsets table: */
  if (rank > 0) {
    iterator->strides[rank - 1] = filter_size;
    for(ii = rank - 2; ii >= 0; ii--) {
      maybelong step = array_shape[ii + 1] < fshape[ii + 1] ?
                                     array_shape[ii + 1] : fshape[ii + 1];
      iterator->strides[ii] =  iterator->strides[ii + 1] * step;
    }
  }
  for(ii = 0; ii < rank; ii++) {
    maybelong step = array_shape[ii] < fshape[ii] ? 
                                             array_shape[ii] : fshape[ii];
    maybelong orgn = fshape[ii] / 2 + forigins[ii];
    /* stride for stepping back to previous offsets: */
    iterator->backstrides[ii] = (step - 1) * iterator->strides[ii];
    /* initialize boundary extension sizes: */
    iterator->bound1[ii] = orgn;
    iterator->bound2[ii] = array_shape[ii] - fshape[ii] + orgn;
  }
  return 1;
}

/* Calculate the offsets to the filter points, for all border regions and
   the interior of the array: */
int NI_InitFilterOffsets2(PyArrayObject *array, Bool *footprint,
     maybelong *filter_shape, UInt32 frame, maybelong* origins,
     NI_ExtendMode mode, maybelong **offsets, maybelong *border_flag_value)
{
  int rank, ii;
  maybelong kk, ll, filter_size = 1, offsets_size = 1, max_size = 0;
  maybelong max_stride = 0, *ashape = NULL, *astrides = NULL, *po;
  maybelong footprint_size = 0, coordinates[MAXDIM], position[MAXDIM];
  maybelong fshape[MAXDIM], forigins[MAXDIM];
  
  rank = array->nd;
  ashape = array->dimensions;
  astrides = array->strides;
  for(ii = 0; ii < rank; ii++) {
    if (!((frame >> ii) & (UInt32)1)) {
      fshape[ii] = *filter_shape++;
      forigins[ii] = origins ? *origins++ : 0.0;
    } else {
      fshape[ii] = 1;
      forigins[ii] = 0;
    }
  }
  /* the size of the footprint array: */
  for(ii = 0; ii < rank; ii++)
    filter_size *= fshape[ii];
  /* calculate the number of non-zero elements in the footprint: */
  if (footprint) {
    for(kk = 0; kk < filter_size; kk++)
      if (footprint[kk]) 
        ++footprint_size;
  } else {
    footprint_size = filter_size;
  }
  /* calculate how many sets of offsets must be stored: */
  for(ii = 0; ii < rank; ii++)
    offsets_size *= (ashape[ii] < fshape[ii] ? ashape[ii] : fshape[ii]);
  /* allocate offsets data: */
  *offsets = (maybelong*)malloc(offsets_size * footprint_size *
                                                        sizeof(maybelong));
  if (!*offsets) {
    PyErr_NoMemory();
    goto exit;
  }
  for(ii = 0; ii < rank; ii++) {
    maybelong stride;
    /* find maximum axis size: */
    if (ashape[ii] > max_size)
      max_size = ashape[ii];
    /* find maximum stride: */
    stride = astrides[ii] < 0 ? -astrides[ii] : astrides[ii];
    if (stride > max_stride)
      max_stride = stride;
    /* coordinates for iterating over the kernel elements: */
    coordinates[ii] = 0;
    /* keep track of the kernel position: */
    position[ii] = 0;
  }
  /* the flag to indicate that we are outside the border must have a 
     value that is larger than any possible offset: */
  *border_flag_value = max_size * max_stride + 1;
  /* calculate all possible offsets to elements in the filter kernel, 
     for all regions in the array (interior and border regions): */
  po = *offsets;
  /* iterate over all regions: */
  for(ll = 0; ll < offsets_size; ll++) {
    /* iterate over the elements in the footprint array: */
    for(kk = 0; kk < filter_size; kk++) {
      maybelong offset = 0;
      /* only calculate an offset if the footprint is 1: */
      if (!footprint || footprint[kk]) { 
        /* find offsets along all axes: */
        for(ii = 0; ii < rank; ii++) {
          maybelong orgn = fshape[ii] / 2 + forigins[ii];
          maybelong cc = coordinates[ii] - orgn + position[ii];
          maybelong len = ashape[ii];
          /* apply boundary conditions, if necessary: */
          switch (mode) {
          case NI_EXTEND_MIRROR:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len - 2;
                cc = sz2 * (int)(-cc / sz2) + cc;
                cc = cc <= 1 - len ? cc + sz2 : -cc;
              }
            } else if (cc >= len) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len - 2;
                cc -= sz2 * (int)(cc / sz2);
                if (cc >= len)
                  cc = sz2 - cc;
              }
            }
            break;
          case NI_EXTEND_REFLECT:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz2 = 2 * len;
                if (cc < -sz2)
                  cc = sz2 * (int)(-cc / sz2) + cc;
                cc = cc < -len ? cc + sz2 : -cc - 1;
              }
            } else if (cc >= len) {
              if (len <= 1) {cc = 0;
              } else {
                int sz2 = 2 * len;
                cc -= sz2 * (int)(cc / sz2);
                if (cc >= len)
                  cc = sz2 - cc - 1;
              }
            }
            break;
          case NI_EXTEND_WRAP:
            if (cc < 0) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz = len;
                cc += sz * (int)(-cc / sz); 
                if (cc < 0)
                  cc += sz;
              }
            } else if (cc >= len) {
              if (len <= 1) {
                cc = 0;
              } else {
                int sz = len;
                cc -= sz * (int)(cc / sz); 
              }
            }
            break;
          case NI_EXTEND_NEAREST:
            if (cc < 0) {
              cc = 0;
            } else if (cc >= len) {
              cc = len - 1;
            }
            break;
          case NI_EXTEND_CONSTANT:
            if (cc < 0 || cc >= len)
              cc = *border_flag_value;
            break;
          default:
          PyErr_SetString(PyExc_RuntimeError,
                                          "boundary mode not supported");
            goto exit;
          }

          /* calculate offset along current axis: */
          if (cc == *border_flag_value) {
            /* just flag that we are outside the border */
            offset = *border_flag_value;
            break;
          } else {
            /* use an offset that is possibly mapped from outside the
               border: */
            cc = cc - position[ii];
            offset += astrides[ii] * cc;
          }
        }
        /* store the offset */
        *po++ = offset;
      }
      /* next point in the filter: */
      for(ii = rank - 1; ii >= 0; ii--) {
        if (coordinates[ii] < fshape[ii] - 1) {
          coordinates[ii]++;
          break;
        } else {
          coordinates[ii] = 0;
        }
      }
    }
    
    /* move to the next array region: */
    for(ii = rank - 1; ii >= 0; ii--) {
      int orgn = fshape[ii] / 2 + forigins[ii];
      if (position[ii] == orgn) {
        position[ii] += ashape[ii] - fshape[ii] + 1;
        if (position[ii] <= orgn)
          position[ii] = orgn + 1;
      } else {
        position[ii]++;
      }
      if (position[ii] < ashape[ii]) {
        break;
      } else {
        position[ii] = 0;
      }
    }
  }
  
 exit:
  if (PyErr_Occurred()) {
    if (*offsets)
      free(*offsets);
    return 0;
  } else {
    return 1;
  }
}


