Add uniform buffers

This commit is contained in:
przmk 2024-07-22 00:27:24 +02:00
parent 8c60f79f7d
commit b1bbd65aaa
13 changed files with 5695 additions and 4 deletions

22
libs/zmath/LICENSE Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2021 Michal Ziulek
Copyright (c) 2024 zig-gamedev contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

133
libs/zmath/README.md Normal file
View file

@ -0,0 +1,133 @@
# zmath v0.10.0 - SIMD math library for game developers
Tested on x86_64 and AArch64.
Provides ~140 optimized routines and ~70 extensive tests.
Can be used with any graphics API.
Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig).
Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig).
An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn).
## Getting started
Copy `zmath` into a subdirectory of your project and add the following to your `build.zig.zon` .dependencies:
```zig
.zmath = .{ .path = "libs/zmath" },
```
Then in your `build.zig` add:
```zig
pub fn build(b: *std.Build) void {
const exe = b.addExecutable(.{ ... });
const zmath = b.dependency("zmath", .{});
exe.root_module.addImport("zmath", zmath.module("root"));
}
```
Now in your code you may import and use zmath:
```zig
const zm = @import("zmath");
pub fn main() !void {
//
// OpenGL/Vulkan example
//
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtRh(
zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
);
// `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because GLSL uses column-major matrices by default
gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
// In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
//
// DirectX example
//
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtLh(
zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
);
const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because HLSL uses column-major matrices by default
const mem = allocateUploadMemory(...);
zm.storeMat(mem, zm.transpose(object_to_clip));
// In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
//
// 'WASD' camera movement example
//
{
const speed = zm.f32x4s(10.0);
const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw));
var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
zm.storeArr3(&demo.camera.forward, forward);
const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
forward = speed * delta_time * forward;
var cam_pos = zm.loadArr3(demo.camera.position);
if (keyDown('W')) {
cam_pos += forward;
} else if (keyDown('S')) {
cam_pos -= forward;
}
if (keyDown('D')) {
cam_pos += right;
} else if (keyDown('A')) {
cam_pos -= right;
}
zm.storeArr3(&demo.camera.position, cam_pos);
}
//
// SIMD wave equation solver example (works with vector width 4, 8 and 16)
// 'T' can be F32x4, F32x8 or F32x16
//
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
const vz = zm.splat(T, z);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
const x = scale * @intToFloat(f32, x_index - grid_size / 2);
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
const d = zm.sqrt(vx * vx + vz * vz);
const vy = zm.sin(d - vtime);
const index = @intCast(usize, x_index + z_index * grid_size);
zm.store(xslice[index..], vx, 0);
zm.store(yslice[index..], vy, 0);
zm.store(zslice[index..], vz, 0);
}
}
}
```

62
libs/zmath/build.zig Normal file
View file

@ -0,0 +1,62 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const options = .{
.optimize = b.option(
std.builtin.OptimizeMode,
"optimize",
"Select optimization mode",
) orelse b.standardOptimizeOption(.{
.preferred_optimize_mode = .ReleaseFast,
}),
.enable_cross_platform_determinism = b.option(
bool,
"enable_cross_platform_determinism",
"Enable cross-platform determinism",
) orelse true,
};
const options_step = b.addOptions();
inline for (std.meta.fields(@TypeOf(options))) |field| {
options_step.addOption(field.type, field.name, @field(options, field.name));
}
const options_module = options_step.createModule();
const zmath = b.addModule("root", .{
.root_source_file = b.path("src/main.zig"),
.imports = &.{
.{ .name = "zmath_options", .module = options_module },
},
});
const test_step = b.step("test", "Run zmath tests");
const tests = b.addTest(.{
.name = "zmath-tests",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = options.optimize,
});
b.installArtifact(tests);
tests.root_module.addImport("zmath_options", options_module);
test_step.dependOn(&b.addRunArtifact(tests).step);
const benchmark_step = b.step("benchmark", "Run zmath benchmarks");
const benchmarks = b.addExecutable(.{
.name = "zmath-benchmarks",
.root_source_file = b.path("src/benchmark.zig"),
.target = target,
.optimize = options.optimize,
});
b.installArtifact(benchmarks);
benchmarks.root_module.addImport("zmath", zmath);
benchmark_step.dependOn(&b.addRunArtifact(benchmarks).step);
}

10
libs/zmath/build.zig.zon Normal file
View file

@ -0,0 +1,10 @@
.{
.name = "zmath",
.version = "0.10.0",
.paths = .{
"build.zig",
"build.zig.zon",
"src",
"README.md",
},
}

View file

@ -0,0 +1,469 @@
// -------------------------------------------------------------------------------------------------
// zmath - benchmarks
// -------------------------------------------------------------------------------------------------
// 'zig build benchmark -Doptimize=ReleaseFast' will build and benchmakrs with all optimisations.
//
// -------------------------------------------------------------------------------------------------
// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f, ReleaseFast
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s
// quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s
// wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s
//
// -------------------------------------------------------------------------------------------------
// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0, ReleaseFast
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s
// quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s
// wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s
//
// -------------------------------------------------------------------------------------------------
// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350, ReleaseFast
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s
// quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s
// wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s
//
// -------------------------------------------------------------------------------------------------
// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f, ReleaseFast
// -------------------------------------------------------------------------------------------------
// matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s
// cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s
// quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s
// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
//
// -------------------------------------------------------------------------------------------------
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
// m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
try mat4MulBenchmark(allocator, 100_000);
// v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
try cross3ScaleBiasBenchmark(allocator, 10_000);
// v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
// q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
try quatBenchmark(allocator, 10_000);
// d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
try waveBenchmark(allocator, 1_000);
}
const std = @import("std");
const time = std.time;
const Timer = time.Timer;
const zm = @import("zmath");
var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("\n", .{});
std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
var data0 = std.ArrayList([16]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([16]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 64) : (i += 1) {
try data0.append([16]f32{
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
});
try data1.append([16]f32{
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
});
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const ma = zm.loadMat(a[0..]);
const mb = zm.loadMat(b[0..]);
const r = zm.mul(ma, mb);
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [16]f32{
a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12],
a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13],
a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14],
a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15],
a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12],
a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13],
a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14],
a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15],
a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12],
a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13],
a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14],
a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15],
a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12],
a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13],
a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14],
a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15],
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const ma = zm.loadMat(a[0..]);
const mb = zm.loadMat(b[0..]);
const r = zm.mul(ma, mb);
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
var data0 = std.ArrayList([3]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([3]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
std.mem.doNotOptimizeAway(&cp);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [3]f32{
0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0,
0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0,
0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0,
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
std.mem.doNotOptimizeAway(&cp);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
var data0 = std.ArrayList([3]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([3]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0];
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
const r = [3]f32{
d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0),
d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0),
d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0),
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr3(a);
const vb = zm.loadArr3(b);
const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0));
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
var data0 = std.ArrayList([4]f32).init(allocator);
defer data0.deinit();
var data1 = std.ArrayList([4]f32).init(allocator);
defer data1.deinit();
var i: usize = 0;
while (i < 256) : (i += 1) {
try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
}
// Warmup, fills L1 cache.
i = 0;
while (i < 100) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr4(a);
const vb = zm.loadArr4(b);
const r = zm.qmul(va, vb);
std.mem.doNotOptimizeAway(&r);
}
}
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const r = [4]f32{
(b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]),
(b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]),
(b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]),
(b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]),
};
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
i = 0;
var timer = try Timer.start();
const start = timer.lap();
while (i < count) : (i += 1) {
for (data1.items) |b| {
for (data0.items) |a| {
const va = zm.loadArr4(a);
const vb = zm.loadArr4(b);
const r = zm.qmul(va, vb);
std.mem.doNotOptimizeAway(&r);
}
}
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}
noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
_ = allocator;
std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
const grid_size = 1024;
{
var t: f32 = 0.0;
const scale: f32 = 0.05;
var timer = try Timer.start();
const start = timer.lap();
var iter: usize = 0;
while (iter < count) : (iter += 1) {
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @as(f32, @floatFromInt(z_index - grid_size / 2));
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += 4) {
const x0 = scale * @as(f32, @floatFromInt(x_index + 0 - grid_size / 2));
const x1 = scale * @as(f32, @floatFromInt(x_index + 1 - grid_size / 2));
const x2 = scale * @as(f32, @floatFromInt(x_index + 2 - grid_size / 2));
const x3 = scale * @as(f32, @floatFromInt(x_index + 3 - grid_size / 2));
const d0 = zm.sqrt(x0 * x0 + z * z);
const d1 = zm.sqrt(x1 * x1 + z * z);
const d2 = zm.sqrt(x2 * x2 + z * z);
const d3 = zm.sqrt(x3 * x3 + z * z);
const y0 = zm.sin(d0 - t);
const y1 = zm.sin(d1 - t);
const y2 = zm.sin(d2 - t);
const y3 = zm.sin(d3 - t);
std.mem.doNotOptimizeAway(&y0);
std.mem.doNotOptimizeAway(&y1);
std.mem.doNotOptimizeAway(&y2);
std.mem.doNotOptimizeAway(&y3);
}
}
t += 0.001;
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
}
{
const T = zm.F32x16;
const static = struct {
const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
};
const voffset = zm.load(static.offsets[0..], T, 0);
var vt = zm.splat(T, 0.0);
const scale: f32 = 0.05;
var timer = try Timer.start();
const start = timer.lap();
var iter: usize = 0;
while (iter < count) : (iter += 1) {
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @as(f32, @floatFromInt(z_index - grid_size / 2));
const vz = zm.splat(T, z);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
const x = scale * @as(f32, @floatFromInt(x_index - grid_size / 2));
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
const d = zm.sqrt(vx * vx + vz * vz);
const vy = zm.sin(d - vt);
std.mem.doNotOptimizeAway(&vy);
}
}
vt += zm.splat(T, 0.001);
}
const end = timer.read();
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
}
}

16
libs/zmath/src/main.zig Normal file
View file

@ -0,0 +1,16 @@
//--------------------------------------------------------------------------------------------------
//
// SIMD math library for game developers
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
//
// See zmath.zig for more details.
// See util.zig for additional functionality.
//
//--------------------------------------------------------------------------------------------------
pub usingnamespace @import("zmath.zig");
pub const util = @import("util.zig");
// ensure transitive closure of test coverage
comptime {
_ = util;
}

188
libs/zmath/src/util.zig Normal file
View file

@ -0,0 +1,188 @@
// ==============================================================================
//
// Collection of useful functions building on top of, and extending, core zmath.
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
//
// ------------------------------------------------------------------------------
// 1. Matrix functions
// ------------------------------------------------------------------------------
//
// As an example, in a left handed Y-up system:
// getAxisX is equivalent to the right vector
// getAxisY is equivalent to the up vector
// getAxisZ is equivalent to the forward vector
//
// getTranslationVec(m: Mat) Vec
// getAxisX(m: Mat) Vec
// getAxisY(m: Mat) Vec
// getAxisZ(m: Mat) Vec
//
// ==============================================================================
const zm = @import("zmath.zig");
const std = @import("std");
const math = std.math;
const expect = std.testing.expect;
pub fn getTranslationVec(m: zm.Mat) zm.Vec {
var translation = m[3];
translation[3] = 0;
return translation;
}
pub fn setTranslationVec(m: *zm.Mat, translation: zm.Vec) void {
const w = m[3][3];
m[3] = translation;
m[3][3] = w;
}
pub fn getScaleVec(m: zm.Mat) zm.Vec {
const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0];
const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0];
const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0];
return zm.f32x4(scale_x, scale_y, scale_z, 0);
}
pub fn getRotationQuat(_m: zm.Mat) zm.Quat {
// Ortho normalize given matrix.
const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0));
const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0));
const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0));
var m = _m;
m[0][0] = c1[0];
m[1][0] = c1[1];
m[2][0] = c1[2];
m[0][1] = c2[0];
m[1][1] = c2[1];
m[2][1] = c2[2];
m[0][2] = c3[0];
m[1][2] = c3[1];
m[2][2] = c3[2];
// Extract rotation
return zm.quatFromMat(m);
}
pub fn getAxisX(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0));
}
pub fn getAxisY(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0));
}
pub fn getAxisZ(m: zm.Mat) zm.Vec {
return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0));
}
test "zmath.util.mat.translation" {
// zig fmt: off
const mat_data = [18]f32{
1.0,
2.0, 3.0, 4.0, 5.0,
6.0, 7.0, 8.0, 9.0,
10.0,11.0, 12.0,13.0,
14.0, 15.0, 16.0, 17.0,
18.0,
};
// zig fmt: on
const mat = zm.loadMat(mat_data[1..]);
const translation = getTranslationVec(mat);
try zm.expectVecApproxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001);
}
test "zmath.util.mat.scale" {
const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8));
const scale = getScaleVec(mat);
try zm.expectVecApproxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001);
}
test "zmath.util.mat.rotation" {
const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3);
const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8));
const rotate_get = getRotationQuat(mat);
const v0 = zm.mul(zm.f32x4s(1), rotate_origin);
const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get));
try zm.expectVecApproxEqAbs(v0, v1, 0.0001);
}
test "zmath.util.mat.z_vec" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var z_vec = getAxisZ(identity);
try zm.expectVecApproxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001);
const rot_yaw = zm.rotationY(degToRad(90));
identity = zm.mul(identity, rot_yaw);
z_vec = getAxisZ(identity);
try zm.expectVecApproxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001);
}
test "zmath.util.mat.y_vec" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var y_vec = getAxisY(identity);
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
const rot_yaw = zm.rotationY(degToRad(90));
identity = zm.mul(identity, rot_yaw);
y_vec = getAxisY(identity);
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
const rot_pitch = zm.rotationX(degToRad(90));
identity = zm.mul(identity, rot_pitch);
y_vec = getAxisY(identity);
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01);
}
test "zmath.util.mat.right" {
const degToRad = std.math.degreesToRadians;
var identity = zm.identity();
var right = getAxisX(identity);
try zm.expectVecApproxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01);
const rot_yaw = zm.rotationY(degToRad(90));
identity = zm.mul(identity, rot_yaw);
right = getAxisX(identity);
try zm.expectVecApproxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01);
const rot_pitch = zm.rotationX(degToRad(90));
identity = zm.mul(identity, rot_pitch);
right = getAxisX(identity);
try zm.expectVecApproxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
}
// ------------------------------------------------------------------------------
// This software is available under 2 licenses -- choose whichever you prefer.
// ------------------------------------------------------------------------------
// ALTERNATIVE A - MIT License
// Copyright (c) 2022 Michal Ziulek and Contributors
// Permission is hereby granted, free of charge, to any person obtaining identity copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// ------------------------------------------------------------------------------
// ALTERNATIVE B - Public Domain (www.unlicense.org)
// This is free and unencumbered software released into the public domain.
// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
// software, either in source code form or as identity compiled binary, for any purpose,
// commercial or non-commercial, and by any means.
// In jurisdictions that recognize copyright laws, the author or authors of this
// software dedicate any and all copyright interest in the software to the public
// domain. We make this dedication for the benefit of the public at large and to
// the detriment of our heirs and successors. We intend this dedication to be an
// overt act of relinquishment in perpetuity of all present and future rights to
// this software under copyright law.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ------------------------------------------------------------------------------

4568
libs/zmath/src/zmath.zig Normal file

File diff suppressed because it is too large Load diff