Voxel raycasting algorithm not working correctly

Here is a full C99/C11 implementation of a simple voxel renderer:

// SPDX-License-Identifier: CC0-1.0
//
// Compile using e.g.
//      gcc -Wall -Wextra -O2 this.c -lm -o this

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <errno.h>

/*
 *  float3 support
*/

typedef struct {
    float   x;
    float   y;
    float   z;
} float3;

static inline float3  Float3(const float x, const float y, const float z)
{
    const float3  result = { x, y, z };
    return result;
}

static inline float3  float3_sub3(const float3 a, const float3 b)
{
    const float3  result = { a.x - b.x, a.y - b.y, a.z - b.z };
    return result;
}

static inline float3  float3_add3(const float3 a, const float3 b)
{
    const float3  result = { a.x + b.x, a.y + b.y, a.z + b.z };
    return result;
}

static inline float  float3_length(const float3 a)
{
    return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z);
}

static inline float  float3_dot(const float3 a, const float3 b)
{
    return a.x*b.x + a.y*b.y + a.z*b.z;
}

static inline float3  float3_cross(const float3 a, const float3 b)
{
    const float3  result = { a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x };
    return result;
}

static inline float3  float3_mul1(const float3 a, const float b)
{
    const float3  result = { a.x*b, a.y*b, a.z*b };
    return result;
}

static inline float3  float3_scale_to_length(const float3 a, const float b)
{
    const float n = float3_length(a);
    const float3  result = { a.x*b/n, a.y*b/n, a.z*b/n };
    return result;
}

/*
 *  float4 support
*/

typedef struct {
    float   x;
    float   y;
    float   z;
    float   w;
} float4;

static inline float4  Float4(const float x, const float y, const float z, const float w)
{
    const float4  result = { x, y, z, w };
    return result;
}

/* float4_length(a) == || a ||, Euclidean length of vector a */
static inline float  float4_length(const float4 a)
{
    return sqrtf(a.x*a.x + a.y*a.y + a.z*a.z + a.w*a.w);
}

/* float4_normalize(a) == a / || a || */
static inline float4  float4_normalize(const float4 a)
{
    const float   n = float4_length(a);
    const float4  result = { a.x/n, a.y/n, a.z/n, a.w/n };
    return result;
}

/* float4_sign(a): Component-wise sign: -1.0, 0.0, or +1.0 */
static inline float4  float4_sign(const float4 a)
{
    const float4  result = { (a.x < 0.0f) ? -1.0f : (a.x > 0.0f) ? +1.0f : 0.0f,
                             (a.y < 0.0f) ? -1.0f : (a.y > 0.0f) ? +1.0f : 0.0f,
                             (a.z < 0.0f) ? -1.0f : (a.z > 0.0f) ? +1.0f : 0.0f,
                             (a.w < 0.0f) ? -1.0f : (a.w > 0.0f) ? +1.0f : 0.0f };
    return result;
}

/* float4_max(a, b): Component-wise maximum */
static inline float4  float4_max4(const float4 a, const float4 b)
{
    const float4  result = { (a.x >= b.x) ? a.x : b.x,
                             (a.y >= b.y) ? a.y : b.y,
                             (a.z >= b.z) ? a.z : b.z,
                             (a.w >= b.w) ? a.w : b.w };
    return result;
}

/* float4_min(a, b): Component-wise minimum */
static inline float4  float4_min4(const float4 a, const float4 b)
{
    const float4  result = { (a.x <= b.x) ? a.x : b.x,
                             (a.y <= b.y) ? a.y : b.y,
                             (a.z <= b.z) ? a.z : b.z,
                             (a.w <= b.w) ? a.w : b.w };
    return result;
}

/* float4_add(a, b) == a + b */
static inline float4  float4_add4(const float4 a, const float4 b)
{
    const float4  result = { a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w };
    return result;
}

/* float4_sub4(a, b) == a - b */
static inline float4  float4_sub4(const float4 a, const float4 b)
{
    const float4  result = { a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w };
    return result;
}

/* float4_floor4(a): Round each component towards negative infinity */
static inline float4  float4_floor4(const float4 a)
{
    const float4  result = { floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w) };
    return result;
}

/* float4_mul1(a, b) = { a.x*b, a.y*b, a.z*b, a.w*b } */
static inline float4  float4_mul1(const float4 a, const float b)
{
    const float4  result = { a.x*b, a.y*b, a.z*b, a.w*b };
    return result;
}

/* float4_div1(a, b) = { a.x/b, a.y/b, a.z/b, a.w/b } */
static inline float4  float4_div1(const float4 a, const float b)
{
    const float4  result = { a.x/b, a.y/b, a.z/b, a.w/b };
    return result;
}

/* float4_div4(a, b) = { a.x/b.x, a.y/b.y, a.z/b.x, a.w/b.w } */
static inline float4  float4_div4(const float4 a, const float4 b)
{
    const float4  result = { a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w };
    return result;
}

/*
 * int4 support
*/

typedef struct {
    int     x;
    int     y;
    int     z;
    int     w;
} int4;

/* float4_int4(a): Component-wise cast to int. */
static inline int4  float4_int4(const float4 a)
{
    const int4  result = { (int)floorf(a.x), (int)floorf(a.y), (int)floorf(a.z), (int)floorf(a.w) };
    return result;
}

/*
 *  Voxel map
*/

float4           voxel_rgba(256)(8);    /* Look-up table for voxel faces; all components (0..1). x is red, y is green, z is blue, w is opacity (0=transparent, 1=opaque). */
int4             voxel_size;
unsigned char   *voxel_cell = NULL;
size_t           voxel_xstride = 0;     /* Typically 1 */
size_t           voxel_ystride = 0;     /* Typically voxel_size.x */
size_t           voxel_zstride = 0;     /* Typically voxel_size.x * voxel_size.y */


/* Trace one voxel ray starting at (projection plane) pos, with eye/camera at eye.
 * Return the color and distance { .x=red, .y=green, .z=blue, .w=distance }
*/
float4 voxel_ray(float4 eye, float4 pos, float maxdist)
{
    const float  eps = 0.5f / maxdist;

    /* On input, eye and pos are really 3-component vectors; we need the fourth one to be zero,
       so that it won't affect the unit direction length vector below. */
    eye.w = 0.0f;
    pos.w = 0.0f;

    /* Ray unit direction vector */
    float4  dir = float4_normalize(float4_sub4(pos, eye));
    /* We rely on the .w component to track length, so set that one now. */
    dir.w = 1.0f;

    float4  posf = float4_sub4(pos, float4_floor4(pos));
    /* Note: 0 <= posf.x < 1,
             0 <= posf.y < 1,
             0 <= posf.z < 1. */

    /* Find first intersections with a voxel cell wall (*next),
       and the delta to the consecutive following intersections */
    float4  xnext,  ynext,  znext;
    float4  xdelta, ydelta, zdelta;

    if (dir.x > eps) {
        xnext  = float4_add4(pos, float4_mul1(dir, 1.0f - posf.x));
        xdelta = float4_div1(dir, dir.x);
    } else
    if (dir.x < -eps) {
        xnext  = float4_sub4(pos, float4_mul1(dir, 0.0f - posf.x));
        xdelta = float4_div1(dir, -dir.x);
    } else {
        xnext  = Float4(0.0f, 0.0f, 0.0f, maxdist);
        xdelta = Float4(0.0f, 0.0f, 0.0f, 0.0f);
    }

    if (dir.y > eps) {
        ynext  = float4_add4(pos, float4_mul1(dir, 1.0f - posf.y));
        ydelta = float4_div1(dir, dir.y);
    } else
    if (dir.y < -eps) {
        ynext  = float4_sub4(pos, float4_mul1(dir, 0.0f - posf.y));
        ydelta = float4_div1(dir, -dir.y);
    } else {
        ynext  = Float4(0.0f, 0.0f, 0.0f, maxdist);
        ydelta = Float4(0.0f, 0.0f, 0.0f, 0.0f);
    }

    if (dir.z > eps) {
        znext  = float4_add4(pos, float4_mul1(dir, 1.0f - posf.z));
        zdelta = float4_div1(dir, dir.z);
    } else
    if (dir.z < -eps) {
        znext  = float4_sub4(pos, float4_mul1(dir, 0.0f - posf.z));
        zdelta = float4_div1(dir, -dir.z);
    } else {
        znext  = Float4(0.0f, 0.0f, 0.0f, maxdist);
        zdelta = Float4(0.0f, 0.0f, 0.0f, 0.0f);
    }

    if (xnext.w < 0.0f || xdelta.w < 0.0f) fprintf(stderr, "Warning: xnext.w = %.6f, xdelta.w = %.6fn", xnext.w, xdelta.w);
    if (ynext.w < 0.0f || ydelta.w < 0.0f) fprintf(stderr, "Warning: ynext.w = %.6f, ydelta.w = %.6fn", ynext.w, ydelta.w);
    if (znext.w < 0.0f || zdelta.w < 0.0f) fprintf(stderr, "Warning: znext.w = %.6f, zdelta.w = %.6fn", znext.w, zdelta.w);

    float4  color = { 0.0f, 0.0f, 0.0f, 0.0f };  /* Transparent! */

    while (1) {
        unsigned char  intersection = 0;    /* 1:x, 2:y, 4:z */

        if (pos.w >= maxdist)
            break;
        if (color.w >= 1.0f)
            break;

        /* Pick the closest next step first. */
        pos = xnext;
        if (pos.w > ynext.w) {
            pos = ynext;
        }
        if (pos.w > znext.w) {
            pos = znext;
        }

        /* Update intersection and prepare for the next step. */
        if (pos.w == xnext.w) {
            intersection |= 1;
            xnext = float4_add4(xnext, xdelta);
        }
        if (pos.w == ynext.w) {
            intersection |= 2;
            ynext = float4_add4(ynext, ydelta);
        }
        if (pos.w == znext.w) {
            intersection |= 4;
            znext = float4_add4(znext, zdelta);
        }

        /* If pos.w == INF, we have intersection = 0. */
        if (!intersection) {
            pos.w = maxdist;
            break;
        }

        /* Position within the wraparound voxel space. */
        float4  temp = float4_floor4(pos);
        int4    posi = float4_int4(temp);
        /* We could use the fractional positive sub-voxel coordinates posf,
              posf = float4_sub4(pos, temp);
           where 0 <= posf.x < 1, 0 <= posf.y < 1, 0 <= posf.z < 1
           and if (intersection & 1), posf.x = 0 (except for rounding errors),
               if (intersection & 2), posf.y = 0 (except for rounding errors),
               if (intersection & 4), posf.z = 0 (except for rounding errors),
           for interpolation etc.
        */

        /* Ensure posi is within the positive voxel space. */
        posi.x = posi.x % voxel_size.x; if (posi.x < 0) posi.x += voxel_size.x;
        posi.y = posi.y % voxel_size.y; if (posi.y < 0) posi.y += voxel_size.y;
        posi.z = posi.z % voxel_size.z; if (posi.z < 0) posi.z += voxel_size.z;

        /* Look up the voxel cell properties for this intersection. */
        float4  c = voxel_rgba(voxel_cell( (size_t)posi.x * voxel_xstride
                                         + (size_t)posi.y * voxel_ystride
                                         + (size_t)posi.z * voxel_zstride ))( intersection );

        if (c.w >= 1.0f) {
            /* Opaque; good, ray ends here. Blend 'c' behind 'color'. */
            color = float4_add4(color, float4_mul1(c, 1.0f - color.w));
            break;
        } else
        if (c.w > 0.0f) {
            /* Blend color 'color' *behind* color 'c'. */
            color = float4_add4(color, float4_mul1(c, 1.0f - color.w));
        }
    }
    color.w = pos.w;
    return color;
}

void renderPPM(FILE *outppm, FILE *outpgm, int width, int height, const float3 eye, const float3 forward, const float3 right, const float maxdist)
{
    /* Assume 'right' is perpendicular to 'forward'.  'up' is perpendicular to both, with length (height/width) times that of 'right'. */
    const float3  up = float3_scale_to_length(float3_cross(forward, right), float3_length(right) * (float)height / (float)width);

    /* Image plane corner, rowstart = eye + forward - right + up */
    float3  rowstart = float3_add3(float3_sub3(float3_add3(eye, forward), right), up);

    /* Delta vectors per pixel for the image plane */
    const float3  dx = float3_mul1(right, 2.0f / (float)width);
    const float3  dy = float3_mul1(up,   -2.0f / (float)height);

    if (outppm) fprintf(outppm, "P6n%d %d 255n", width, height);
    if (outpgm) fprintf(outpgm, "P5n%d %d 255n", width, height);

    float  mind = +3.0f*maxdist;
    float  maxd = -3.0f*maxdist;

    for (int y = 0; y < height; y++, rowstart = float3_add3(rowstart, dy)) {
        float3  pos = rowstart;
        for (int x = 0; x < width; x++, pos = float3_add3(pos, dx)) {
            const float4  c = voxel_ray( Float4(eye.x, eye.y, eye.z, 0.0f),
                                         Float4(pos.x, pos.y, pos.z, 0.0f), maxdist);
            const int  r8 = (c.x <= 0.0f) ? 0 : (c.x < 1.0f) ? (int)(0.5f + 255.0f * c.x) : 255;
            const int  g8 = (c.y <= 0.0f) ? 0 : (c.y < 1.0f) ? (int)(0.5f + 255.0f * c.y) : 255;
            const int  b8 = (c.z <= 0.0f) ? 0 : (c.z < 1.0f) ? (int)(0.5f + 255.0f * c.z) : 255;
            const int  d8 = (c.w <= 0.0f) ? 0 : (c.w < maxdist) ? (int)(0.5f + 255.0f * c.w / maxdist) : 255;
            if (mind > c.w) mind = c.w;
            if (maxd < c.w) maxd = c.w;

            if (outppm) {
                fputc(r8, outppm);
                fputc(g8, outppm);
                fputc(b8, outppm);
            }
            if (outpgm) {
                fputc(d8, outpgm);
            }
        }
        fprintf(stderr, "rRow %d of %d completed.", y + 1, height);
        fflush(stderr);
    }

    if (outppm) fflush(outppm);
    if (outpgm) fflush(outpgm);
    fprintf(stderr, "rRendering complete. Distances varied between %.6f and %.6f.n", mind, maxd);
    fflush(stderr);
}

int main(int argc, char *argv())
{
    FILE *ppm, *pgm;

    if (argc != 3 || !strcmp(argv(1), "-h") || !strcmp(argv(1), "--help")) {
        const char *arg0 = (argc > 0 && argv && argv(0) && argv(0)(0)) ? argv(0) : "(this)";
        fprintf(stderr, "n");
        fprintf(stderr, "Usage: %s ( -h | --help )n", arg0);
        fprintf(stderr, "       %s OUT.ppm DEPTH.pgmn", arg0);
        fprintf(stderr, "n");
        return EXIT_FAILURE;
    }

    voxel_size.x = 64;
    voxel_size.y = 64;
    voxel_size.z = 64;

    voxel_xstride = 1;
    voxel_ystride = (size_t)voxel_size.x;
    voxel_zstride = voxel_ystride * (size_t)voxel_size.y;
    const size_t  size = voxel_zstride * (size_t)voxel_size.y;

    voxel_cell = (unsigned char *)malloc(size);
    if (!voxel_cell) {
        fprintf(stderr, "Not enough memory for a %d x %d x %d voxel map.n", voxel_size.x, voxel_size.y, voxel_size.z);
        return EXIT_FAILURE;
    }
    memset(voxel_cell, 0, size);

    /* Make all cell values transparent, */
    for (int i = 0; i < 256; i++) {
        for (int k = 0; k < 8; k++) {
            voxel_rgba(i)(k) = Float4(0.0f, 0.0f, 0.0f, 0.0f);
        }
    }

    /* Cell type 1 faces are blue, red, and green; edges and vertices their mix. */
    voxel_rgba(1)(1) = Float4(0.0f, 0.0f, 1.0f, 1.0f);
    voxel_rgba(1)(2) = Float4(0.0f, 1.0f, 0.0f, 1.0f);
    voxel_rgba(1)(4) = Float4(1.0f, 0.0f, 0.0f, 1.0f);
    voxel_rgba(1)(3) = Float4(0.0f, 0.8f, 0.8f, 1.0f);
    voxel_rgba(1)(5) = Float4(0.8f, 0.0f, 0.8f, 1.0f);
    voxel_rgba(1)(6) = Float4(0.8f, 0.8f, 0.0f, 1.0f);
    voxel_rgba(1)(7) = Float4(0.6f, 0.6f, 0.6f, 1.0f);

    /* Create a shell at the center, minradius 10, maxradius 12 */
    {
        const int  cx = 32;
        const int  cy = 32;
        const int  cz = 32;
        const int  rrmin = 10*10;
        const int  rrmax = 12*12;

        for (int z = 0; z < voxel_size.z; z++) {
            const int  zz = (z-cz)*(z-cz);
            for (int y = 0; y < voxel_size.y; y++) {
                const int  zzyy = zz + (y-cy)*(y-cy);
                for (int x = 0; x < voxel_size.x; x++) {
                    const int  dd = zzyy + (x-cx)*(x-cx);
                    if (dd >= rrmin && dd < rrmax) {
                        voxel_cell((size_t)x * voxel_xstride + (size_t)y * voxel_ystride + (size_t)z * voxel_zstride) = 1;
                    }
                }
            }
        }
    }

    fprintf(stderr, "Constructed a %d x %d x %d voxel map.n", voxel_size.x, voxel_size.y, voxel_size.z);

    ppm = fopen(argv(1), "wb");
    if (!ppm) {
        fprintf(stderr, "%s: %s.n", argv(1), strerror(errno));
        return EXIT_FAILURE;
    }
    pgm = fopen(argv(2), "wb");
    if (!pgm) {
        fprintf(stderr, "%s: %s.n", argv(2), strerror(errno));
        fclose(ppm);
        remove(argv(1));
        return EXIT_FAILURE;
    }

    renderPPM(ppm, pgm, 320, 160, Float3(68.0f, 32.0f, 68.0f), Float3(4.0f,4.0f,4.0f), Float3(4.0f, -4.0f, 0.0f), 256.0f);

    if (fclose(ppm)) {
        fprintf(stderr, "%s: Error closing file.n", argv(1));
        fclose(pgm);
        remove(argv(1));
        remove(argv(2));
        return EXIT_FAILURE;
    }
    if (fclose(pgm)) {
        fprintf(stderr, "%s: Error closing file.n", argv(2));
        remove(argv(1));
        remove(argv(2));
        return EXIT_FAILURE;
    }

    fprintf(stderr, "Saved PPM image as '%s', and depth graymap as PGM image '%s'.n", argv(1), argv(2));
    return EXIT_SUCCESS;
}

Most of the code is support for HLSL-like float3/float4/int4 types, so that the code is easier to port. (I don’t have anything that can compile and run HLSL, myself.)

Compile the program, then run it giving two file names as command-line parameters. The first one will be a NetPBM PPM image of the scene, and the second one a NetPBM PGM depth map:
Example rendering

The voxel space here is periodic, so although there is only one sphere in the voxel map, the rendering has multiple copies of it.

Note how the nearest sphere – in the lower right corner of the rendered image – has green faces on it, while they obviously shouldn’t exist on that side of the sphere? They’re better visible if you increase the image size, too. What is going on?

Since each voxel cell has only three faces (and not six, like a cube has), we see the faces we expect to see only when viewing in a negative direction (where unit ray direction vector has all nonpositive components). When we view in other directions, we do not see the surface face first, but the faces between nonempty voxel cells!

In 2D, the equivalent 2×2 block has extra protruding lines:

┌──┬──
│  │
├──┼──
│  │

In 3D, those extra faces are like vanes or plates standing up on the outside of the surface!

Simply put, if we want to draw a nice voxel box, we need to populate four cells:
$$begin{array}{ccl}
text{Cell} & text{Value} & text{Description} \
hline
(0,0,0) & 7 & text{x, y, and z faces} \
(1,0,0) & 1 & text{x face} \
(0,1,0) & 2 & text{y face} \
(1,1,1) & 4 & text{z face} \
end{array}$$

Alternatively, you can adjust the cell index calculation, by preparing an array of eight vectors (constant for each ray, depends only on the direction; each component is either 0 or -1), that are added to the current position just before the cell index calculation. (Essentially, each intersection type has their own adjustment.) That way we can shift the x faces, the y faces, and the z faces separately from the others for the voxel grid, depending on the ray direction; so that each cell then has SIX faces.