Skip to content
2024-02-17

Surface Curvature of an image in OpenCL

Introduction

I was wanting to learn a GPU shading language for a while and just needed an excuse to do so, and I finally got one. I wanted to try my hand at a blob/spot detection within an image. The challenge was I did not want any off-the-shelf libraries for image processing. At first I wanted to do something like Laplacian of Gaussian, but I got side-tracked and ended up computing the surface curvature

Getting the kernels together

I first started out by getting a pyopencl example running on my machine. Then, I copied the clij kernel for calculating the gradient but there are a couple changes I had to make. (btw, the clij/clEsperanto/Fiji/ImageJ community for image processing is great!).

c
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;


__kernel void gradient_x_2d
(
  IMAGE_dst_TYPE dst, 
  IMAGE_src_TYPE src
)
{
  const int i = get_global_id(0), j = get_global_id(1);
  const int2 coord = (int2){i,j};
  const int2 coordA = (int2){i-1,j};
  const int2 coordB = (int2){i+1,j};

  float valueA = READ_src_IMAGE(src, sampler, coordA).x;
  float valueB = READ_src_IMAGE(src, sampler, coordB).x;
  IMAGE_dst_PIXEL_TYPE res = CONVERT_dst_PIXEL_TYPE(valueB - valueA);

  WRITE_dst_IMAGE(dst, coord, res);
}

Notice the IMAGE_dst_TYPE, READ_src_IMAGE, WRITE_dst_IMAGE, CONVERT_dst_PIXEL_TYPE, IMAGE_dst_PIXEL_TYPE are not part of the OpenCL language. (See the README for what they transform to) These are macros that are transformed by CLesperanto and will not work outside the CLesperanto system, so we have to replace them with the correct syntax. Since we will be using a buffer of data rather than an image to avoid color channels and such, we can get rid of the sampler and convert the input/output types to float pointers.

c
__kernel void gradient_x_2d_buffer(__global float *dst, __global float *src) {
  const int i = get_global_id(0), j = get_global_id(1);
  const int width = get_global_size(0);
  const int2 coord = (int2){i, j};
  const int2 coordA = (int2){i - 1, j};
  const int2 coordB = (int2){i + 1, j};

  float valueA = src[coordA.x + coordA.y * width];
  float valueB = src[coordB.x + coordB.y * width];

  // clamp to edge if out of bounds by setting value to the same as the edge
  if (coordA.x < 0)
    valueA = src[coord.x + coord.y * width];
  if (coordB.x >= width)
    valueB = src[coord.x + coord.y * width];

  float res = valueB - valueA;

  dst[i + j * width] = res;
}

Okay, now we can find the derivative of a 2d matrix in the x direction, we just need to copy the kernel function and make one for the y direction.

Since we will be doing the processing as a buffer, we need a way to convert a color image to a float32 buffer, so I wrote a function that converts RGB to a weighted luminance f32 value.

grey.cl

c
__kernel void to_gray_buffer(__global float *output,
                             __read_only image2d_t input) {

  const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
                            CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;

  const int i = get_global_id(0), j = get_global_id(1);
  const int2 coord = (int2){i, j};
  int2 size = get_image_dim(input);
  int width = get_image_width(input);
  int height = get_image_height(input);

  uint4 color = read_imageui(input, sampler, coord);
  float gray = 0.2126 * color.x + 0.7152 * color.y + 0.0722 * color.z;
  output[i + j * width] = gray;
}

Writing the glue

I used python with pyopencl to call the cl code.

python
import os
import math
import numpy as np
import pyopencl as cl
import pyopencl.array
import time
from PIL import Image, ImageEnhance, ImageFilter
from matplotlib import cm

TASKS = 1048576
CL_TASKS = int(TASKS / 4)

filename = "./6_shapes.png"
img = Image.open(filename)
if img.mode != "RGBA":
    img = img.convert("RGBA")
img_width = img.size[0]
img_height = img.size[1]
img_size = img_width * img_height

print("Image size: %d x %d" % (img_width, img_height))

platforms = cl.get_platforms()
print(platforms)
my_gpu_devices = platforms[0].get_devices(device_type=cl.device_type.GPU)
print(my_gpu_devices)
ctx = cl.Context(devices=my_gpu_devices)
# ctx = cl.create_some_context()
print(ctx)
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)

# read kernel from files
kernels = ""
for file in os.listdir(f"./"):
    if file.endswith(".cl"):
        f = open(f"./{file}", "r", encoding="utf-8")
        kernels += "".join(f.readlines())
        f.close()
prg = cl.Program(ctx, kernels).build()

Then we setup out CL memory. cl.Image initializes memory on the device. We call it first on is for input_buf_1 and copies the image bytes to device and the second one is cl.Buffer which is for dest_buf_2 which will hold the results of the kernel function.

python

image_format = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.UNSIGNED_INT8)

# prepare device memory for OpenCL
input_buf_1 = cl.Image(
    ctx,
    cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
    image_format,
    img.size,
    None,
    img.tobytes(),
)

## to_gray

dest_buf_2 = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)

prg.to_gray_buffer(queue, (img_width, img_height), (1, 1), dest_buf_2, input_buf_1).wait()

dest = np.zeros(img_width * img_height, np.float32)
cl.enqueue_copy(
    queue,
    dest=dest,
    src=dest_buf_2,
    buffer_origin=(0,),
    host_origin=(0,),
    region=(img_width * img_height * np.dtype(np.float32).itemsize,),
).wait()

Now that we have a 2d buffer which we will treat as a heightmap or z-values, we can pass it into the gradient function a couple times to get the partial derivatives and then the surface curvature.

H=12(1+(fx)2)2fy22fxfy2fxy+(1+(fy)2)2fx2(1+(fx)2+(fy)2)3/2H = \frac{1}{2} \frac{ \left(1 + \left(\frac{\partial f}{\partial x}\right)^2\right) \frac{\partial^2 f}{\partial y^2} - 2 \frac{\partial f}{\partial x} \frac{\partial f}{\partial y} \frac{\partial^2 f}{\partial x \partial y} + \left(1 + \left(\frac{\partial f}{\partial y}\right)^2\right) \frac{\partial^2 f}{\partial x^2} }{\left(1 + \left(\frac{\partial f}{\partial x}\right)^2 + \left(\frac{\partial f}{\partial y}\right)^2\right)^{3/2}}

mean_curvature.cl

c
__kernel void mean_curvature(__global float *dst, __global float *dx,
                             __global float *dy, __global float *dxx,
                             __global float *dxy, __global float *dyy) {

  const int i = get_global_id(0), j = get_global_id(1);
  const int2 coord = (int2){i, j};
  int width = get_global_size(0);
  // int height = get_global_size(1);

  float dxi = dx[j * width + i];
  float dyi = dy[j * width + i];
  float dxxi = dxx[j * width + i];
  float dxyi = dxy[j * width + i];
  float dyyi = dyy[j * width + i];

  float H;
  H = (dxi * dxi + 1) * dyyi - 2 * dxi * dyi * dxyi + (dyi * dyi + 1) * dxxi;
  H = H / (2 * pow(dxi * dxi + dyi * dyi + 1, (float)1.5));

  dst[j * width + i] = H;
}
py
## gradient

dx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dxx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dxy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)
dyx = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize) # redundant
dyy = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)

prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dx, dest_buf_2).wait()
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dy, dest_buf_2).wait()
prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dxx, dx).wait()
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dxy, dx).wait()
prg.gradient_x_2d_buffer(queue, (img_width, img_height), (1, 1), dyx, dy).wait() # redundant
prg.gradient_y_2d_buffer(queue, (img_width, img_height), (1, 1), dyy, dy).wait()

# mean curvature

h = cl.Buffer(ctx, cl.mem_flags.READ_WRITE, img_width * img_height * np.dtype(np.float32).itemsize)

prg.mean_curvature(queue, (img_width, img_height), (1, 1), h, dx, dy, dxx, dxy, dyy).wait()

# read image back to cpu
dest = np.zeros(img_width * img_height, np.float32)
cl.enqueue_copy(
    queue,
    dest=dest,
    src=h,
    buffer_origin=(0,),
    host_origin=(0,),
    region=(img_width * img_height * np.dtype(np.float32).itemsize,),
).wait()

dest = np.reshape(dest, (img_height, img_width))
# normalize all values to be between 0 and 1
dest2 = (dest-np.min(dest))/(np.max(dest)-np.min(dest))


out_img = Image.fromarray(np.uint8(cm.Greys(dest2)*255))

if out_img.mode == "RGBA":
    out_img = out_img.convert("RGB")

display(img)
display(out_img)

imgout_img

Full example code can be found at https://github.com/phcreery/surface_curvature/tree/master/examples/OpenCL

This page is delivered to you from my garage.