/*
 * Decompiled with CFR 0.152.
 */
package boofcv.alg.filter.convolve.noborder;

import boofcv.concurrency.BoofConcurrency;
import boofcv.concurrency.IWorkArrays;
import boofcv.struct.convolve.Kernel1D_S32;
import boofcv.struct.convolve.Kernel2D_S32;
import boofcv.struct.image.GrayS32;

public class ConvolveImageUnrolled_SB_MT_S32_S32_Div {
    public static boolean horizontal(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        if (kernel.offset != kernel.width / 2 || kernel.width % 2 == 0) {
            return false;
        }
        switch (kernel.width) {
            case 3: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.horizontal3(kernel, image, dest, divisor);
                break;
            }
            case 5: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.horizontal5(kernel, image, dest, divisor);
                break;
            }
            case 7: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.horizontal7(kernel, image, dest, divisor);
                break;
            }
            case 9: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.horizontal9(kernel, image, dest, divisor);
                break;
            }
            case 11: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.horizontal11(kernel, image, dest, divisor);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static boolean vertical(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        if (kernel.offset != kernel.width / 2 || kernel.width % 2 == 0) {
            return false;
        }
        switch (kernel.width) {
            case 3: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.vertical3(kernel, image, dest, divisor);
                break;
            }
            case 5: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.vertical5(kernel, image, dest, divisor);
                break;
            }
            case 7: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.vertical7(kernel, image, dest, divisor);
                break;
            }
            case 9: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.vertical9(kernel, image, dest, divisor);
                break;
            }
            case 11: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.vertical11(kernel, image, dest, divisor);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static boolean convolve(Kernel2D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor, IWorkArrays work) {
        if (kernel.offset != kernel.width / 2 || kernel.width % 2 == 0) {
            return false;
        }
        switch (kernel.width) {
            case 3: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.convolve3(kernel, image, dest, divisor, work);
                break;
            }
            case 5: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.convolve5(kernel, image, dest, divisor, work);
                break;
            }
            case 7: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.convolve7(kernel, image, dest, divisor, work);
                break;
            }
            case 9: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.convolve9(kernel, image, dest, divisor, work);
                break;
            }
            case 11: {
                ConvolveImageUnrolled_SB_MT_S32_S32_Div.convolve11(kernel, image, dest, divisor, work);
                break;
            }
            default: {
                return false;
            }
        }
        return true;
    }

    public static void horizontal3(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int radius = kernel.getRadius();
        int width = image.getWidth();
        int halfDivisor = divisor / 2;
        BoofConcurrency.loopFor(0, image.height, i -> {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int indexSrc = j;
                int total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc] * k3) + halfDivisor) / divisor;
                ++j;
            }
        });
    }

    public static void horizontal5(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int radius = kernel.getRadius();
        int width = image.getWidth();
        int halfDivisor = divisor / 2;
        BoofConcurrency.loopFor(0, image.height, i -> {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int indexSrc = j;
                int total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc] * k5) + halfDivisor) / divisor;
                ++j;
            }
        });
    }

    public static void horizontal7(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int radius = kernel.getRadius();
        int width = image.getWidth();
        int halfDivisor = divisor / 2;
        BoofConcurrency.loopFor(0, image.height, i -> {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int indexSrc = j;
                int total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc] * k7) + halfDivisor) / divisor;
                ++j;
            }
        });
    }

    public static void horizontal9(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int k8 = kernel.data[7];
        int k9 = kernel.data[8];
        int radius = kernel.getRadius();
        int width = image.getWidth();
        int halfDivisor = divisor / 2;
        BoofConcurrency.loopFor(0, image.height, i -> {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int indexSrc = j;
                int total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc] * k9) + halfDivisor) / divisor;
                ++j;
            }
        });
    }

    public static void horizontal11(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int k8 = kernel.data[7];
        int k9 = kernel.data[8];
        int k10 = kernel.data[9];
        int k11 = kernel.data[10];
        int radius = kernel.getRadius();
        int width = image.getWidth();
        int halfDivisor = divisor / 2;
        BoofConcurrency.loopFor(0, image.height, i -> {
            int indexDst = dest.startIndex + i * dest.stride + radius;
            int j = image.startIndex + i * image.stride - radius;
            int jEnd = j + width - radius;
            j += radius;
            while (j < jEnd) {
                int indexSrc = j;
                int total = dataSrc[indexSrc++] * k1;
                total += dataSrc[indexSrc++] * k2;
                total += dataSrc[indexSrc++] * k3;
                total += dataSrc[indexSrc++] * k4;
                total += dataSrc[indexSrc++] * k5;
                total += dataSrc[indexSrc++] * k6;
                total += dataSrc[indexSrc++] * k7;
                total += dataSrc[indexSrc++] * k8;
                total += dataSrc[indexSrc++] * k9;
                total += dataSrc[indexSrc++] * k10;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc] * k11) + halfDivisor) / divisor;
                ++j;
            }
        });
    }

    public static void vertical3(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int radius = kernel.getRadius();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int halfDivisor = divisor / 2;
        int yEnd = imgHeight - radius;
        BoofConcurrency.loopFor(radius, yEnd, y -> {
            int i;
            int indexDst = dest.startIndex + y * dest.stride;
            int iEnd = i + imgWidth;
            for (i = image.startIndex + (y - radius) * image.stride; i < iEnd; ++i) {
                int indexSrc = i;
                int total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += image.stride] * k2;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc += image.stride] * k3) + halfDivisor) / divisor;
            }
        });
    }

    public static void vertical5(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int radius = kernel.getRadius();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int halfDivisor = divisor / 2;
        int yEnd = imgHeight - radius;
        BoofConcurrency.loopFor(radius, yEnd, y -> {
            int i;
            int indexDst = dest.startIndex + y * dest.stride;
            int iEnd = i + imgWidth;
            for (i = image.startIndex + (y - radius) * image.stride; i < iEnd; ++i) {
                int indexSrc = i;
                int total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += image.stride] * k2;
                total += dataSrc[indexSrc += image.stride] * k3;
                total += dataSrc[indexSrc += image.stride] * k4;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc += image.stride] * k5) + halfDivisor) / divisor;
            }
        });
    }

    public static void vertical7(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int radius = kernel.getRadius();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int halfDivisor = divisor / 2;
        int yEnd = imgHeight - radius;
        BoofConcurrency.loopFor(radius, yEnd, y -> {
            int i;
            int indexDst = dest.startIndex + y * dest.stride;
            int iEnd = i + imgWidth;
            for (i = image.startIndex + (y - radius) * image.stride; i < iEnd; ++i) {
                int indexSrc = i;
                int total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += image.stride] * k2;
                total += dataSrc[indexSrc += image.stride] * k3;
                total += dataSrc[indexSrc += image.stride] * k4;
                total += dataSrc[indexSrc += image.stride] * k5;
                total += dataSrc[indexSrc += image.stride] * k6;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc += image.stride] * k7) + halfDivisor) / divisor;
            }
        });
    }

    public static void vertical9(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int k8 = kernel.data[7];
        int k9 = kernel.data[8];
        int radius = kernel.getRadius();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int halfDivisor = divisor / 2;
        int yEnd = imgHeight - radius;
        BoofConcurrency.loopFor(radius, yEnd, y -> {
            int i;
            int indexDst = dest.startIndex + y * dest.stride;
            int iEnd = i + imgWidth;
            for (i = image.startIndex + (y - radius) * image.stride; i < iEnd; ++i) {
                int indexSrc = i;
                int total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += image.stride] * k2;
                total += dataSrc[indexSrc += image.stride] * k3;
                total += dataSrc[indexSrc += image.stride] * k4;
                total += dataSrc[indexSrc += image.stride] * k5;
                total += dataSrc[indexSrc += image.stride] * k6;
                total += dataSrc[indexSrc += image.stride] * k7;
                total += dataSrc[indexSrc += image.stride] * k8;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc += image.stride] * k9) + halfDivisor) / divisor;
            }
        });
    }

    public static void vertical11(Kernel1D_S32 kernel, GrayS32 image, GrayS32 dest, int divisor) {
        int[] dataSrc = image.data;
        int[] dataDst = dest.data;
        int k1 = kernel.data[0];
        int k2 = kernel.data[1];
        int k3 = kernel.data[2];
        int k4 = kernel.data[3];
        int k5 = kernel.data[4];
        int k6 = kernel.data[5];
        int k7 = kernel.data[6];
        int k8 = kernel.data[7];
        int k9 = kernel.data[8];
        int k10 = kernel.data[9];
        int k11 = kernel.data[10];
        int radius = kernel.getRadius();
        int imgWidth = dest.getWidth();
        int imgHeight = dest.getHeight();
        int halfDivisor = divisor / 2;
        int yEnd = imgHeight - radius;
        BoofConcurrency.loopFor(radius, yEnd, y -> {
            int i;
            int indexDst = dest.startIndex + y * dest.stride;
            int iEnd = i + imgWidth;
            for (i = image.startIndex + (y - radius) * image.stride; i < iEnd; ++i) {
                int indexSrc = i;
                int total = dataSrc[indexSrc] * k1;
                total += dataSrc[indexSrc += image.stride] * k2;
                total += dataSrc[indexSrc += image.stride] * k3;
                total += dataSrc[indexSrc += image.stride] * k4;
                total += dataSrc[indexSrc += image.stride] * k5;
                total += dataSrc[indexSrc += image.stride] * k6;
                total += dataSrc[indexSrc += image.stride] * k7;
                total += dataSrc[indexSrc += image.stride] * k8;
                total += dataSrc[indexSrc += image.stride] * k9;
                total += dataSrc[indexSrc += image.stride] * k10;
                dataDst[indexDst++] = ((total += dataSrc[indexSrc += image.stride] * k11) + halfDivisor) / divisor;
            }
        });
    }

    public static void convolve3(Kernel2D_S32 kernel, GrayS32 src, GrayS32 dest, int divisor, IWorkArrays work) {
        if (work == null) {
            work = new IWorkArrays(src.width);
        } else {
            work.reset(src.width);
        }
        IWorkArrays _work = work;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        int kernelRadius = kernel.getRadius();
        int kernelWidth = 2 * kernelRadius + 1;
        BoofConcurrency.loopBlocks(kernelRadius, height - kernelRadius, kernelWidth, (y0, y1) -> {
            int[] totalRow = _work.pop();
            for (int y = y0; y < y1; ++y) {
                int x;
                int k1 = kernel.data[0];
                int k2 = kernel.data[1];
                int k3 = kernel.data[2];
                int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
                for (int x2 = kernelRadius; x2 < width - kernelRadius; ++x2) {
                    int indexSrc = indexSrcRow + x2;
                    int total = 0;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    totalRow[x2] = total += dataSrc[indexSrc] * k3;
                }
                for (int i = 1; i < 3; ++i) {
                    indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;
                    k1 = kernel.data[i * 3 + 0];
                    k2 = kernel.data[i * 3 + 1];
                    k3 = kernel.data[i * 3 + 2];
                    x = kernelRadius;
                    while (x < width - kernelRadius) {
                        int indexSrc = indexSrcRow + x;
                        int total = 0;
                        total += dataSrc[indexSrc++] * k1;
                        total += dataSrc[indexSrc++] * k2;
                        int n = x++;
                        totalRow[n] = totalRow[n] + (total += dataSrc[indexSrc] * k3);
                    }
                }
                int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
                for (x = kernelRadius; x < width - kernelRadius; ++x) {
                    dataDst[indexDst++] = (totalRow[x] + halfDivisor) / divisor;
                }
            }
            _work.recycle(totalRow);
        });
    }

    public static void convolve5(Kernel2D_S32 kernel, GrayS32 src, GrayS32 dest, int divisor, IWorkArrays work) {
        if (work == null) {
            work = new IWorkArrays(src.width);
        } else {
            work.reset(src.width);
        }
        IWorkArrays _work = work;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        int kernelRadius = kernel.getRadius();
        int kernelWidth = 2 * kernelRadius + 1;
        BoofConcurrency.loopBlocks(kernelRadius, height - kernelRadius, kernelWidth, (y0, y1) -> {
            int[] totalRow = _work.pop();
            for (int y = y0; y < y1; ++y) {
                int x;
                int k1 = kernel.data[0];
                int k2 = kernel.data[1];
                int k3 = kernel.data[2];
                int k4 = kernel.data[3];
                int k5 = kernel.data[4];
                int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
                for (int x2 = kernelRadius; x2 < width - kernelRadius; ++x2) {
                    int indexSrc = indexSrcRow + x2;
                    int total = 0;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    totalRow[x2] = total += dataSrc[indexSrc] * k5;
                }
                for (int i = 1; i < 5; ++i) {
                    indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;
                    k1 = kernel.data[i * 5 + 0];
                    k2 = kernel.data[i * 5 + 1];
                    k3 = kernel.data[i * 5 + 2];
                    k4 = kernel.data[i * 5 + 3];
                    k5 = kernel.data[i * 5 + 4];
                    x = kernelRadius;
                    while (x < width - kernelRadius) {
                        int indexSrc = indexSrcRow + x;
                        int total = 0;
                        total += dataSrc[indexSrc++] * k1;
                        total += dataSrc[indexSrc++] * k2;
                        total += dataSrc[indexSrc++] * k3;
                        total += dataSrc[indexSrc++] * k4;
                        int n = x++;
                        totalRow[n] = totalRow[n] + (total += dataSrc[indexSrc] * k5);
                    }
                }
                int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
                for (x = kernelRadius; x < width - kernelRadius; ++x) {
                    dataDst[indexDst++] = (totalRow[x] + halfDivisor) / divisor;
                }
            }
            _work.recycle(totalRow);
        });
    }

    public static void convolve7(Kernel2D_S32 kernel, GrayS32 src, GrayS32 dest, int divisor, IWorkArrays work) {
        if (work == null) {
            work = new IWorkArrays(src.width);
        } else {
            work.reset(src.width);
        }
        IWorkArrays _work = work;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        int kernelRadius = kernel.getRadius();
        int kernelWidth = 2 * kernelRadius + 1;
        BoofConcurrency.loopBlocks(kernelRadius, height - kernelRadius, kernelWidth, (y0, y1) -> {
            int[] totalRow = _work.pop();
            for (int y = y0; y < y1; ++y) {
                int x;
                int k1 = kernel.data[0];
                int k2 = kernel.data[1];
                int k3 = kernel.data[2];
                int k4 = kernel.data[3];
                int k5 = kernel.data[4];
                int k6 = kernel.data[5];
                int k7 = kernel.data[6];
                int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
                for (int x2 = kernelRadius; x2 < width - kernelRadius; ++x2) {
                    int indexSrc = indexSrcRow + x2;
                    int total = 0;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    totalRow[x2] = total += dataSrc[indexSrc] * k7;
                }
                for (int i = 1; i < 7; ++i) {
                    indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;
                    k1 = kernel.data[i * 7 + 0];
                    k2 = kernel.data[i * 7 + 1];
                    k3 = kernel.data[i * 7 + 2];
                    k4 = kernel.data[i * 7 + 3];
                    k5 = kernel.data[i * 7 + 4];
                    k6 = kernel.data[i * 7 + 5];
                    k7 = kernel.data[i * 7 + 6];
                    x = kernelRadius;
                    while (x < width - kernelRadius) {
                        int indexSrc = indexSrcRow + x;
                        int total = 0;
                        total += dataSrc[indexSrc++] * k1;
                        total += dataSrc[indexSrc++] * k2;
                        total += dataSrc[indexSrc++] * k3;
                        total += dataSrc[indexSrc++] * k4;
                        total += dataSrc[indexSrc++] * k5;
                        total += dataSrc[indexSrc++] * k6;
                        int n = x++;
                        totalRow[n] = totalRow[n] + (total += dataSrc[indexSrc] * k7);
                    }
                }
                int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
                for (x = kernelRadius; x < width - kernelRadius; ++x) {
                    dataDst[indexDst++] = (totalRow[x] + halfDivisor) / divisor;
                }
            }
            _work.recycle(totalRow);
        });
    }

    public static void convolve9(Kernel2D_S32 kernel, GrayS32 src, GrayS32 dest, int divisor, IWorkArrays work) {
        if (work == null) {
            work = new IWorkArrays(src.width);
        } else {
            work.reset(src.width);
        }
        IWorkArrays _work = work;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        int kernelRadius = kernel.getRadius();
        int kernelWidth = 2 * kernelRadius + 1;
        BoofConcurrency.loopBlocks(kernelRadius, height - kernelRadius, kernelWidth, (y0, y1) -> {
            int[] totalRow = _work.pop();
            for (int y = y0; y < y1; ++y) {
                int x;
                int k1 = kernel.data[0];
                int k2 = kernel.data[1];
                int k3 = kernel.data[2];
                int k4 = kernel.data[3];
                int k5 = kernel.data[4];
                int k6 = kernel.data[5];
                int k7 = kernel.data[6];
                int k8 = kernel.data[7];
                int k9 = kernel.data[8];
                int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
                for (int x2 = kernelRadius; x2 < width - kernelRadius; ++x2) {
                    int indexSrc = indexSrcRow + x2;
                    int total = 0;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    total += dataSrc[indexSrc++] * k7;
                    total += dataSrc[indexSrc++] * k8;
                    totalRow[x2] = total += dataSrc[indexSrc] * k9;
                }
                for (int i = 1; i < 9; ++i) {
                    indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;
                    k1 = kernel.data[i * 9 + 0];
                    k2 = kernel.data[i * 9 + 1];
                    k3 = kernel.data[i * 9 + 2];
                    k4 = kernel.data[i * 9 + 3];
                    k5 = kernel.data[i * 9 + 4];
                    k6 = kernel.data[i * 9 + 5];
                    k7 = kernel.data[i * 9 + 6];
                    k8 = kernel.data[i * 9 + 7];
                    k9 = kernel.data[i * 9 + 8];
                    x = kernelRadius;
                    while (x < width - kernelRadius) {
                        int indexSrc = indexSrcRow + x;
                        int total = 0;
                        total += dataSrc[indexSrc++] * k1;
                        total += dataSrc[indexSrc++] * k2;
                        total += dataSrc[indexSrc++] * k3;
                        total += dataSrc[indexSrc++] * k4;
                        total += dataSrc[indexSrc++] * k5;
                        total += dataSrc[indexSrc++] * k6;
                        total += dataSrc[indexSrc++] * k7;
                        total += dataSrc[indexSrc++] * k8;
                        int n = x++;
                        totalRow[n] = totalRow[n] + (total += dataSrc[indexSrc] * k9);
                    }
                }
                int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
                for (x = kernelRadius; x < width - kernelRadius; ++x) {
                    dataDst[indexDst++] = (totalRow[x] + halfDivisor) / divisor;
                }
            }
            _work.recycle(totalRow);
        });
    }

    public static void convolve11(Kernel2D_S32 kernel, GrayS32 src, GrayS32 dest, int divisor, IWorkArrays work) {
        if (work == null) {
            work = new IWorkArrays(src.width);
        } else {
            work.reset(src.width);
        }
        IWorkArrays _work = work;
        int[] dataSrc = src.data;
        int[] dataDst = dest.data;
        int width = src.getWidth();
        int height = src.getHeight();
        int halfDivisor = divisor / 2;
        int kernelRadius = kernel.getRadius();
        int kernelWidth = 2 * kernelRadius + 1;
        BoofConcurrency.loopBlocks(kernelRadius, height - kernelRadius, kernelWidth, (y0, y1) -> {
            int[] totalRow = _work.pop();
            for (int y = y0; y < y1; ++y) {
                int x;
                int k1 = kernel.data[0];
                int k2 = kernel.data[1];
                int k3 = kernel.data[2];
                int k4 = kernel.data[3];
                int k5 = kernel.data[4];
                int k6 = kernel.data[5];
                int k7 = kernel.data[6];
                int k8 = kernel.data[7];
                int k9 = kernel.data[8];
                int k10 = kernel.data[9];
                int k11 = kernel.data[10];
                int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
                for (int x2 = kernelRadius; x2 < width - kernelRadius; ++x2) {
                    int indexSrc = indexSrcRow + x2;
                    int total = 0;
                    total += dataSrc[indexSrc++] * k1;
                    total += dataSrc[indexSrc++] * k2;
                    total += dataSrc[indexSrc++] * k3;
                    total += dataSrc[indexSrc++] * k4;
                    total += dataSrc[indexSrc++] * k5;
                    total += dataSrc[indexSrc++] * k6;
                    total += dataSrc[indexSrc++] * k7;
                    total += dataSrc[indexSrc++] * k8;
                    total += dataSrc[indexSrc++] * k9;
                    total += dataSrc[indexSrc++] * k10;
                    totalRow[x2] = total += dataSrc[indexSrc] * k11;
                }
                for (int i = 1; i < 11; ++i) {
                    indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;
                    k1 = kernel.data[i * 11 + 0];
                    k2 = kernel.data[i * 11 + 1];
                    k3 = kernel.data[i * 11 + 2];
                    k4 = kernel.data[i * 11 + 3];
                    k5 = kernel.data[i * 11 + 4];
                    k6 = kernel.data[i * 11 + 5];
                    k7 = kernel.data[i * 11 + 6];
                    k8 = kernel.data[i * 11 + 7];
                    k9 = kernel.data[i * 11 + 8];
                    k10 = kernel.data[i * 11 + 9];
                    k11 = kernel.data[i * 11 + 10];
                    x = kernelRadius;
                    while (x < width - kernelRadius) {
                        int indexSrc = indexSrcRow + x;
                        int total = 0;
                        total += dataSrc[indexSrc++] * k1;
                        total += dataSrc[indexSrc++] * k2;
                        total += dataSrc[indexSrc++] * k3;
                        total += dataSrc[indexSrc++] * k4;
                        total += dataSrc[indexSrc++] * k5;
                        total += dataSrc[indexSrc++] * k6;
                        total += dataSrc[indexSrc++] * k7;
                        total += dataSrc[indexSrc++] * k8;
                        total += dataSrc[indexSrc++] * k9;
                        total += dataSrc[indexSrc++] * k10;
                        int n = x++;
                        totalRow[n] = totalRow[n] + (total += dataSrc[indexSrc] * k11);
                    }
                }
                int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
                for (x = kernelRadius; x < width - kernelRadius; ++x) {
                    dataDst[indexDst++] = (totalRow[x] + halfDivisor) / divisor;
                }
            }
            _work.recycle(totalRow);
        });
    }
}

