Add uniform buffers
This commit is contained in:
parent
8c60f79f7d
commit
b1bbd65aaa
13 changed files with 5695 additions and 4 deletions
22
libs/zmath/LICENSE
Normal file
22
libs/zmath/LICENSE
Normal file
|
@ -0,0 +1,22 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 Michal Ziulek
|
||||
Copyright (c) 2024 zig-gamedev contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
133
libs/zmath/README.md
Normal file
133
libs/zmath/README.md
Normal file
|
@ -0,0 +1,133 @@
|
|||
# zmath v0.10.0 - SIMD math library for game developers
|
||||
|
||||
Tested on x86_64 and AArch64.
|
||||
|
||||
Provides ~140 optimized routines and ~70 extensive tests.
|
||||
|
||||
Can be used with any graphics API.
|
||||
|
||||
Documentation can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/zmath.zig).
|
||||
|
||||
Benchamrks can be found [here](https://github.com/michal-z/zig-gamedev/blob/main/libs/zmath/src/benchmark.zig).
|
||||
|
||||
An intro article can be found [here](https://zig.news/michalz/fast-multi-platform-simd-math-library-in-zig-2adn).
|
||||
|
||||
## Getting started
|
||||
|
||||
Copy `zmath` into a subdirectory of your project and add the following to your `build.zig.zon` .dependencies:
|
||||
```zig
|
||||
.zmath = .{ .path = "libs/zmath" },
|
||||
```
|
||||
|
||||
Then in your `build.zig` add:
|
||||
|
||||
```zig
|
||||
pub fn build(b: *std.Build) void {
|
||||
const exe = b.addExecutable(.{ ... });
|
||||
|
||||
const zmath = b.dependency("zmath", .{});
|
||||
exe.root_module.addImport("zmath", zmath.module("root"));
|
||||
}
|
||||
```
|
||||
|
||||
Now in your code you may import and use zmath:
|
||||
|
||||
```zig
|
||||
const zm = @import("zmath");
|
||||
|
||||
pub fn main() !void {
|
||||
//
|
||||
// OpenGL/Vulkan example
|
||||
//
|
||||
const object_to_world = zm.rotationY(..);
|
||||
const world_to_view = zm.lookAtRh(
|
||||
zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
|
||||
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
|
||||
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
|
||||
);
|
||||
// `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
|
||||
const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
|
||||
|
||||
const object_to_view = zm.mul(object_to_world, world_to_view);
|
||||
const object_to_clip = zm.mul(object_to_view, view_to_clip);
|
||||
|
||||
// Transposition is needed because GLSL uses column-major matrices by default
|
||||
gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
|
||||
|
||||
// In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
|
||||
|
||||
//
|
||||
// DirectX example
|
||||
//
|
||||
const object_to_world = zm.rotationY(..);
|
||||
const world_to_view = zm.lookAtLh(
|
||||
zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
|
||||
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
|
||||
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
|
||||
);
|
||||
const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
|
||||
|
||||
const object_to_view = zm.mul(object_to_world, world_to_view);
|
||||
const object_to_clip = zm.mul(object_to_view, view_to_clip);
|
||||
|
||||
// Transposition is needed because HLSL uses column-major matrices by default
|
||||
const mem = allocateUploadMemory(...);
|
||||
zm.storeMat(mem, zm.transpose(object_to_clip));
|
||||
|
||||
// In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
|
||||
|
||||
//
|
||||
// 'WASD' camera movement example
|
||||
//
|
||||
{
|
||||
const speed = zm.f32x4s(10.0);
|
||||
const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
|
||||
const transform = zm.mul(zm.rotationX(demo.camera.pitch), zm.rotationY(demo.camera.yaw));
|
||||
var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
|
||||
|
||||
zm.storeArr3(&demo.camera.forward, forward);
|
||||
|
||||
const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
|
||||
forward = speed * delta_time * forward;
|
||||
|
||||
var cam_pos = zm.loadArr3(demo.camera.position);
|
||||
|
||||
if (keyDown('W')) {
|
||||
cam_pos += forward;
|
||||
} else if (keyDown('S')) {
|
||||
cam_pos -= forward;
|
||||
}
|
||||
if (keyDown('D')) {
|
||||
cam_pos += right;
|
||||
} else if (keyDown('A')) {
|
||||
cam_pos -= right;
|
||||
}
|
||||
|
||||
zm.storeArr3(&demo.camera.position, cam_pos);
|
||||
}
|
||||
|
||||
//
|
||||
// SIMD wave equation solver example (works with vector width 4, 8 and 16)
|
||||
// 'T' can be F32x4, F32x8 or F32x16
|
||||
//
|
||||
var z_index: i32 = 0;
|
||||
while (z_index < grid_size) : (z_index += 1) {
|
||||
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
|
||||
const vz = zm.splat(T, z);
|
||||
|
||||
var x_index: i32 = 0;
|
||||
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
|
||||
const x = scale * @intToFloat(f32, x_index - grid_size / 2);
|
||||
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
|
||||
|
||||
const d = zm.sqrt(vx * vx + vz * vz);
|
||||
const vy = zm.sin(d - vtime);
|
||||
|
||||
const index = @intCast(usize, x_index + z_index * grid_size);
|
||||
zm.store(xslice[index..], vx, 0);
|
||||
zm.store(yslice[index..], vy, 0);
|
||||
zm.store(zslice[index..], vz, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
62
libs/zmath/build.zig
Normal file
62
libs/zmath/build.zig
Normal file
|
@ -0,0 +1,62 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
|
||||
const options = .{
|
||||
.optimize = b.option(
|
||||
std.builtin.OptimizeMode,
|
||||
"optimize",
|
||||
"Select optimization mode",
|
||||
) orelse b.standardOptimizeOption(.{
|
||||
.preferred_optimize_mode = .ReleaseFast,
|
||||
}),
|
||||
.enable_cross_platform_determinism = b.option(
|
||||
bool,
|
||||
"enable_cross_platform_determinism",
|
||||
"Enable cross-platform determinism",
|
||||
) orelse true,
|
||||
};
|
||||
|
||||
const options_step = b.addOptions();
|
||||
inline for (std.meta.fields(@TypeOf(options))) |field| {
|
||||
options_step.addOption(field.type, field.name, @field(options, field.name));
|
||||
}
|
||||
|
||||
const options_module = options_step.createModule();
|
||||
|
||||
const zmath = b.addModule("root", .{
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.imports = &.{
|
||||
.{ .name = "zmath_options", .module = options_module },
|
||||
},
|
||||
});
|
||||
|
||||
const test_step = b.step("test", "Run zmath tests");
|
||||
|
||||
const tests = b.addTest(.{
|
||||
.name = "zmath-tests",
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = target,
|
||||
.optimize = options.optimize,
|
||||
});
|
||||
b.installArtifact(tests);
|
||||
|
||||
tests.root_module.addImport("zmath_options", options_module);
|
||||
|
||||
test_step.dependOn(&b.addRunArtifact(tests).step);
|
||||
|
||||
const benchmark_step = b.step("benchmark", "Run zmath benchmarks");
|
||||
|
||||
const benchmarks = b.addExecutable(.{
|
||||
.name = "zmath-benchmarks",
|
||||
.root_source_file = b.path("src/benchmark.zig"),
|
||||
.target = target,
|
||||
.optimize = options.optimize,
|
||||
});
|
||||
b.installArtifact(benchmarks);
|
||||
|
||||
benchmarks.root_module.addImport("zmath", zmath);
|
||||
|
||||
benchmark_step.dependOn(&b.addRunArtifact(benchmarks).step);
|
||||
}
|
10
libs/zmath/build.zig.zon
Normal file
10
libs/zmath/build.zig.zon
Normal file
|
@ -0,0 +1,10 @@
|
|||
.{
|
||||
.name = "zmath",
|
||||
.version = "0.10.0",
|
||||
.paths = .{
|
||||
"build.zig",
|
||||
"build.zig.zon",
|
||||
"src",
|
||||
"README.md",
|
||||
},
|
||||
}
|
469
libs/zmath/src/benchmark.zig
Normal file
469
libs/zmath/src/benchmark.zig
Normal file
|
@ -0,0 +1,469 @@
|
|||
// -------------------------------------------------------------------------------------------------
|
||||
// zmath - benchmarks
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// 'zig build benchmark -Doptimize=ReleaseFast' will build and benchmakrs with all optimisations.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// 'AMD Ryzen 9 3950X 16-Core Processor', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f, ReleaseFast
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// matrix mul benchmark (AOS) - scalar version: 1.5880s, zmath version: 1.0642s
|
||||
// cross3, scale, bias benchmark (AOS) - scalar version: 0.9318s, zmath version: 0.6888s
|
||||
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.2258s, zmath version: 1.1095s
|
||||
// quaternion mul benchmark (AOS) - scalar version: 1.4123s, zmath version: 0.6958s
|
||||
// wave benchmark (SOA) - scalar version: 4.8165s, zmath version: 0.7338s
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// 'AMD Ryzen 7 5800X 8-Core Processer', Linux 5.17.14, Zig 0.10.0-dev.2624+d506275a0, ReleaseFast
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// matrix mul benchmark (AOS) - scalar version: 1.3672s, zmath version: 0.8617s
|
||||
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6586s, zmath version: 0.4803s
|
||||
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.0620s, zmath version: 0.8942s
|
||||
// quaternion mul benchmark (AOS) - scalar version: 1.1324s, zmath version: 0.6064s
|
||||
// wave benchmark (SOA) - scalar version: 3.6598s, zmath version: 0.4231s
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// 'Apple M1 Max', macOS Version 12.4, Zig 0.10.0-dev.2657+74442f350, ReleaseFast
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// matrix mul benchmark (AOS) - scalar version: 1.0297s, zmath version: 1.0538s
|
||||
// cross3, scale, bias benchmark (AOS) - scalar version: 0.6294s, zmath version: 0.6532s
|
||||
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 0.9807s, zmath version: 1.0988s
|
||||
// quaternion mul benchmark (AOS) - scalar version: 1.5413s, zmath version: 0.7800s
|
||||
// wave benchmark (SOA) - scalar version: 3.4220s, zmath version: 1.0255s
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// '11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz', Windows 11, Zig 0.10.0-dev.2620+0e9458a3f, ReleaseFast
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// matrix mul benchmark (AOS) - scalar version: 2.2308s, zmath version: 0.9376s
|
||||
// cross3, scale, bias benchmark (AOS) - scalar version: 1.0821s, zmath version: 0.5110s
|
||||
// cross3, dot3, scale, bias benchmark (AOS) - scalar version: 1.6580s, zmath version: 0.9167s
|
||||
// quaternion mul benchmark (AOS) - scalar version: 2.0139s, zmath version: 0.5856s
|
||||
// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
|
||||
//
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
const allocator = gpa.allocator();
|
||||
|
||||
// m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
|
||||
try mat4MulBenchmark(allocator, 100_000);
|
||||
|
||||
// v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
|
||||
try cross3ScaleBiasBenchmark(allocator, 10_000);
|
||||
|
||||
// v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
|
||||
try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
|
||||
|
||||
// q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
|
||||
try quatBenchmark(allocator, 10_000);
|
||||
|
||||
// d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
|
||||
try waveBenchmark(allocator, 1_000);
|
||||
}
|
||||
|
||||
const std = @import("std");
|
||||
const time = std.time;
|
||||
const Timer = time.Timer;
|
||||
const zm = @import("zmath");
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(0);
|
||||
const random = prng.random();
|
||||
|
||||
noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
|
||||
std.debug.print("\n", .{});
|
||||
std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
|
||||
|
||||
var data0 = std.ArrayList([16]f32).init(allocator);
|
||||
defer data0.deinit();
|
||||
var data1 = std.ArrayList([16]f32).init(allocator);
|
||||
defer data1.deinit();
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 64) : (i += 1) {
|
||||
try data0.append([16]f32{
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
});
|
||||
try data1.append([16]f32{
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
random.float(f32), random.float(f32), random.float(f32), random.float(f32),
|
||||
});
|
||||
}
|
||||
|
||||
// Warmup, fills L1 cache.
|
||||
i = 0;
|
||||
while (i < 100) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const ma = zm.loadMat(a[0..]);
|
||||
const mb = zm.loadMat(b[0..]);
|
||||
const r = zm.mul(ma, mb);
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const r = [16]f32{
|
||||
a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12],
|
||||
a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13],
|
||||
a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14],
|
||||
a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15],
|
||||
a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12],
|
||||
a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13],
|
||||
a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14],
|
||||
a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15],
|
||||
a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12],
|
||||
a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13],
|
||||
a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14],
|
||||
a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15],
|
||||
a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12],
|
||||
a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13],
|
||||
a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14],
|
||||
a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15],
|
||||
};
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const ma = zm.loadMat(a[0..]);
|
||||
const mb = zm.loadMat(b[0..]);
|
||||
const r = zm.mul(ma, mb);
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
|
||||
std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
|
||||
|
||||
var data0 = std.ArrayList([3]f32).init(allocator);
|
||||
defer data0.deinit();
|
||||
var data1 = std.ArrayList([3]f32).init(allocator);
|
||||
defer data1.deinit();
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 256) : (i += 1) {
|
||||
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
|
||||
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
|
||||
}
|
||||
|
||||
// Warmup, fills L1 cache.
|
||||
i = 0;
|
||||
while (i < 100) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr3(a);
|
||||
const vb = zm.loadArr3(b);
|
||||
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
|
||||
std.mem.doNotOptimizeAway(&cp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const r = [3]f32{
|
||||
0.01 * (a[1] * b[2] - a[2] * b[1]) + 1.0,
|
||||
0.01 * (a[2] * b[0] - a[0] * b[2]) + 1.0,
|
||||
0.01 * (a[0] * b[1] - a[1] * b[0]) + 1.0,
|
||||
};
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr3(a);
|
||||
const vb = zm.loadArr3(b);
|
||||
const cp = zm.f32x4s(0.01) * zm.cross3(va, vb) + zm.f32x4s(1.0);
|
||||
std.mem.doNotOptimizeAway(&cp);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
|
||||
std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
|
||||
|
||||
var data0 = std.ArrayList([3]f32).init(allocator);
|
||||
defer data0.deinit();
|
||||
var data1 = std.ArrayList([3]f32).init(allocator);
|
||||
defer data1.deinit();
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 256) : (i += 1) {
|
||||
try data0.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
|
||||
try data1.append([3]f32{ random.float(f32), random.float(f32), random.float(f32) });
|
||||
}
|
||||
|
||||
// Warmup, fills L1 cache.
|
||||
i = 0;
|
||||
while (i < 100) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr3(a);
|
||||
const vb = zm.loadArr3(b);
|
||||
const r = (zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0)))[0];
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const d = a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
const r = [3]f32{
|
||||
d * (0.1 * (a[1] * b[2] - a[2] * b[1]) + 1.0),
|
||||
d * (0.1 * (a[2] * b[0] - a[0] * b[2]) + 1.0),
|
||||
d * (0.1 * (a[0] * b[1] - a[1] * b[0]) + 1.0),
|
||||
};
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr3(a);
|
||||
const vb = zm.loadArr3(b);
|
||||
const r = zm.dot3(va, vb) * (zm.f32x4s(0.1) * zm.cross3(va, vb) + zm.f32x4s(1.0));
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
|
||||
std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
|
||||
|
||||
var data0 = std.ArrayList([4]f32).init(allocator);
|
||||
defer data0.deinit();
|
||||
var data1 = std.ArrayList([4]f32).init(allocator);
|
||||
defer data1.deinit();
|
||||
|
||||
var i: usize = 0;
|
||||
while (i < 256) : (i += 1) {
|
||||
try data0.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
|
||||
try data1.append([4]f32{ random.float(f32), random.float(f32), random.float(f32), random.float(f32) });
|
||||
}
|
||||
|
||||
// Warmup, fills L1 cache.
|
||||
i = 0;
|
||||
while (i < 100) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr4(a);
|
||||
const vb = zm.loadArr4(b);
|
||||
const r = zm.qmul(va, vb);
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const r = [4]f32{
|
||||
(b[3] * a[0]) + (b[0] * a[3]) + (b[1] * a[2]) - (b[2] * a[1]),
|
||||
(b[3] * a[1]) - (b[0] * a[2]) + (b[1] * a[3]) + (b[2] * a[0]),
|
||||
(b[3] * a[2]) + (b[0] * a[1]) - (b[1] * a[0]) + (b[2] * a[3]),
|
||||
(b[3] * a[3]) - (b[0] * a[0]) - (b[1] * a[1]) - (b[2] * a[2]),
|
||||
};
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
|
||||
}
|
||||
|
||||
{
|
||||
i = 0;
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
while (i < count) : (i += 1) {
|
||||
for (data1.items) |b| {
|
||||
for (data0.items) |a| {
|
||||
const va = zm.loadArr4(a);
|
||||
const vb = zm.loadArr4(b);
|
||||
const r = zm.qmul(va, vb);
|
||||
std.mem.doNotOptimizeAway(&r);
|
||||
}
|
||||
}
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
|
||||
_ = allocator;
|
||||
std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
|
||||
|
||||
const grid_size = 1024;
|
||||
{
|
||||
var t: f32 = 0.0;
|
||||
|
||||
const scale: f32 = 0.05;
|
||||
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
|
||||
var iter: usize = 0;
|
||||
while (iter < count) : (iter += 1) {
|
||||
var z_index: i32 = 0;
|
||||
while (z_index < grid_size) : (z_index += 1) {
|
||||
const z = scale * @as(f32, @floatFromInt(z_index - grid_size / 2));
|
||||
|
||||
var x_index: i32 = 0;
|
||||
while (x_index < grid_size) : (x_index += 4) {
|
||||
const x0 = scale * @as(f32, @floatFromInt(x_index + 0 - grid_size / 2));
|
||||
const x1 = scale * @as(f32, @floatFromInt(x_index + 1 - grid_size / 2));
|
||||
const x2 = scale * @as(f32, @floatFromInt(x_index + 2 - grid_size / 2));
|
||||
const x3 = scale * @as(f32, @floatFromInt(x_index + 3 - grid_size / 2));
|
||||
|
||||
const d0 = zm.sqrt(x0 * x0 + z * z);
|
||||
const d1 = zm.sqrt(x1 * x1 + z * z);
|
||||
const d2 = zm.sqrt(x2 * x2 + z * z);
|
||||
const d3 = zm.sqrt(x3 * x3 + z * z);
|
||||
|
||||
const y0 = zm.sin(d0 - t);
|
||||
const y1 = zm.sin(d1 - t);
|
||||
const y2 = zm.sin(d2 - t);
|
||||
const y3 = zm.sin(d3 - t);
|
||||
|
||||
std.mem.doNotOptimizeAway(&y0);
|
||||
std.mem.doNotOptimizeAway(&y1);
|
||||
std.mem.doNotOptimizeAway(&y2);
|
||||
std.mem.doNotOptimizeAway(&y3);
|
||||
}
|
||||
}
|
||||
t += 0.001;
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
|
||||
}
|
||||
|
||||
{
|
||||
const T = zm.F32x16;
|
||||
|
||||
const static = struct {
|
||||
const offsets = [16]f32{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
||||
};
|
||||
const voffset = zm.load(static.offsets[0..], T, 0);
|
||||
var vt = zm.splat(T, 0.0);
|
||||
|
||||
const scale: f32 = 0.05;
|
||||
|
||||
var timer = try Timer.start();
|
||||
const start = timer.lap();
|
||||
|
||||
var iter: usize = 0;
|
||||
while (iter < count) : (iter += 1) {
|
||||
var z_index: i32 = 0;
|
||||
while (z_index < grid_size) : (z_index += 1) {
|
||||
const z = scale * @as(f32, @floatFromInt(z_index - grid_size / 2));
|
||||
const vz = zm.splat(T, z);
|
||||
|
||||
var x_index: i32 = 0;
|
||||
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
|
||||
const x = scale * @as(f32, @floatFromInt(x_index - grid_size / 2));
|
||||
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
|
||||
|
||||
const d = zm.sqrt(vx * vx + vz * vz);
|
||||
|
||||
const vy = zm.sin(d - vt);
|
||||
|
||||
std.mem.doNotOptimizeAway(&vy);
|
||||
}
|
||||
}
|
||||
vt += zm.splat(T, 0.001);
|
||||
}
|
||||
const end = timer.read();
|
||||
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
|
||||
|
||||
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
|
||||
}
|
||||
}
|
16
libs/zmath/src/main.zig
Normal file
16
libs/zmath/src/main.zig
Normal file
|
@ -0,0 +1,16 @@
|
|||
//--------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// SIMD math library for game developers
|
||||
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
|
||||
//
|
||||
// See zmath.zig for more details.
|
||||
// See util.zig for additional functionality.
|
||||
//
|
||||
//--------------------------------------------------------------------------------------------------
|
||||
pub usingnamespace @import("zmath.zig");
|
||||
pub const util = @import("util.zig");
|
||||
|
||||
// ensure transitive closure of test coverage
|
||||
comptime {
|
||||
_ = util;
|
||||
}
|
188
libs/zmath/src/util.zig
Normal file
188
libs/zmath/src/util.zig
Normal file
|
@ -0,0 +1,188 @@
|
|||
// ==============================================================================
|
||||
//
|
||||
// Collection of useful functions building on top of, and extending, core zmath.
|
||||
// https://github.com/michal-z/zig-gamedev/tree/main/libs/zmath
|
||||
//
|
||||
// ------------------------------------------------------------------------------
|
||||
// 1. Matrix functions
|
||||
// ------------------------------------------------------------------------------
|
||||
//
|
||||
// As an example, in a left handed Y-up system:
|
||||
// getAxisX is equivalent to the right vector
|
||||
// getAxisY is equivalent to the up vector
|
||||
// getAxisZ is equivalent to the forward vector
|
||||
//
|
||||
// getTranslationVec(m: Mat) Vec
|
||||
// getAxisX(m: Mat) Vec
|
||||
// getAxisY(m: Mat) Vec
|
||||
// getAxisZ(m: Mat) Vec
|
||||
//
|
||||
// ==============================================================================
|
||||
|
||||
const zm = @import("zmath.zig");
|
||||
const std = @import("std");
|
||||
const math = std.math;
|
||||
const expect = std.testing.expect;
|
||||
|
||||
pub fn getTranslationVec(m: zm.Mat) zm.Vec {
|
||||
var translation = m[3];
|
||||
translation[3] = 0;
|
||||
return translation;
|
||||
}
|
||||
|
||||
pub fn setTranslationVec(m: *zm.Mat, translation: zm.Vec) void {
|
||||
const w = m[3][3];
|
||||
m[3] = translation;
|
||||
m[3][3] = w;
|
||||
}
|
||||
|
||||
pub fn getScaleVec(m: zm.Mat) zm.Vec {
|
||||
const scale_x = zm.length3(zm.f32x4(m[0][0], m[1][0], m[2][0], 0))[0];
|
||||
const scale_y = zm.length3(zm.f32x4(m[0][1], m[1][1], m[2][1], 0))[0];
|
||||
const scale_z = zm.length3(zm.f32x4(m[0][2], m[1][2], m[2][2], 0))[0];
|
||||
return zm.f32x4(scale_x, scale_y, scale_z, 0);
|
||||
}
|
||||
|
||||
pub fn getRotationQuat(_m: zm.Mat) zm.Quat {
|
||||
// Ortho normalize given matrix.
|
||||
const c1 = zm.normalize3(zm.f32x4(_m[0][0], _m[1][0], _m[2][0], 0));
|
||||
const c2 = zm.normalize3(zm.f32x4(_m[0][1], _m[1][1], _m[2][1], 0));
|
||||
const c3 = zm.normalize3(zm.f32x4(_m[0][2], _m[1][2], _m[2][2], 0));
|
||||
var m = _m;
|
||||
m[0][0] = c1[0];
|
||||
m[1][0] = c1[1];
|
||||
m[2][0] = c1[2];
|
||||
m[0][1] = c2[0];
|
||||
m[1][1] = c2[1];
|
||||
m[2][1] = c2[2];
|
||||
m[0][2] = c3[0];
|
||||
m[1][2] = c3[1];
|
||||
m[2][2] = c3[2];
|
||||
|
||||
// Extract rotation
|
||||
return zm.quatFromMat(m);
|
||||
}
|
||||
|
||||
pub fn getAxisX(m: zm.Mat) zm.Vec {
|
||||
return zm.normalize3(zm.f32x4(m[0][0], m[0][1], m[0][2], 0.0));
|
||||
}
|
||||
|
||||
pub fn getAxisY(m: zm.Mat) zm.Vec {
|
||||
return zm.normalize3(zm.f32x4(m[1][0], m[1][1], m[1][2], 0.0));
|
||||
}
|
||||
|
||||
pub fn getAxisZ(m: zm.Mat) zm.Vec {
|
||||
return zm.normalize3(zm.f32x4(m[2][0], m[2][1], m[2][2], 0.0));
|
||||
}
|
||||
|
||||
test "zmath.util.mat.translation" {
|
||||
// zig fmt: off
|
||||
const mat_data = [18]f32{
|
||||
1.0,
|
||||
2.0, 3.0, 4.0, 5.0,
|
||||
6.0, 7.0, 8.0, 9.0,
|
||||
10.0,11.0, 12.0,13.0,
|
||||
14.0, 15.0, 16.0, 17.0,
|
||||
18.0,
|
||||
};
|
||||
// zig fmt: on
|
||||
const mat = zm.loadMat(mat_data[1..]);
|
||||
const translation = getTranslationVec(mat);
|
||||
try zm.expectVecApproxEqAbs(translation, zm.f32x4(14.0, 15.0, 16.0, 0.0), 0.0001);
|
||||
}
|
||||
|
||||
test "zmath.util.mat.scale" {
|
||||
const mat = zm.mul(zm.scaling(3, 4, 5), zm.translation(6, 7, 8));
|
||||
const scale = getScaleVec(mat);
|
||||
try zm.expectVecApproxEqAbs(scale, zm.f32x4(3.0, 4.0, 5.0, 0.0), 0.0001);
|
||||
}
|
||||
|
||||
test "zmath.util.mat.rotation" {
|
||||
const rotate_origin = zm.matFromRollPitchYaw(0.1, 1.2, 2.3);
|
||||
const mat = zm.mul(zm.mul(rotate_origin, zm.scaling(3, 4, 5)), zm.translation(6, 7, 8));
|
||||
const rotate_get = getRotationQuat(mat);
|
||||
const v0 = zm.mul(zm.f32x4s(1), rotate_origin);
|
||||
const v1 = zm.mul(zm.f32x4s(1), zm.quatToMat(rotate_get));
|
||||
try zm.expectVecApproxEqAbs(v0, v1, 0.0001);
|
||||
}
|
||||
|
||||
test "zmath.util.mat.z_vec" {
|
||||
const degToRad = std.math.degreesToRadians;
|
||||
var identity = zm.identity();
|
||||
var z_vec = getAxisZ(identity);
|
||||
try zm.expectVecApproxEqAbs(z_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.0001);
|
||||
const rot_yaw = zm.rotationY(degToRad(90));
|
||||
identity = zm.mul(identity, rot_yaw);
|
||||
z_vec = getAxisZ(identity);
|
||||
try zm.expectVecApproxEqAbs(z_vec, zm.f32x4(1.0, 0.0, 0.0, 0), 0.0001);
|
||||
}
|
||||
|
||||
test "zmath.util.mat.y_vec" {
|
||||
const degToRad = std.math.degreesToRadians;
|
||||
var identity = zm.identity();
|
||||
var y_vec = getAxisY(identity);
|
||||
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
|
||||
const rot_yaw = zm.rotationY(degToRad(90));
|
||||
identity = zm.mul(identity, rot_yaw);
|
||||
y_vec = getAxisY(identity);
|
||||
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
|
||||
const rot_pitch = zm.rotationX(degToRad(90));
|
||||
identity = zm.mul(identity, rot_pitch);
|
||||
y_vec = getAxisY(identity);
|
||||
try zm.expectVecApproxEqAbs(y_vec, zm.f32x4(0.0, 0.0, 1.0, 0), 0.01);
|
||||
}
|
||||
|
||||
test "zmath.util.mat.right" {
|
||||
const degToRad = std.math.degreesToRadians;
|
||||
var identity = zm.identity();
|
||||
var right = getAxisX(identity);
|
||||
try zm.expectVecApproxEqAbs(right, zm.f32x4(1.0, 0.0, 0.0, 0), 0.01);
|
||||
const rot_yaw = zm.rotationY(degToRad(90));
|
||||
identity = zm.mul(identity, rot_yaw);
|
||||
right = getAxisX(identity);
|
||||
try zm.expectVecApproxEqAbs(right, zm.f32x4(0.0, 0.0, -1.0, 0), 0.01);
|
||||
const rot_pitch = zm.rotationX(degToRad(90));
|
||||
identity = zm.mul(identity, rot_pitch);
|
||||
right = getAxisX(identity);
|
||||
try zm.expectVecApproxEqAbs(right, zm.f32x4(0.0, 1.0, 0.0, 0), 0.01);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------
|
||||
// This software is available under 2 licenses -- choose whichever you prefer.
|
||||
// ------------------------------------------------------------------------------
|
||||
// ALTERNATIVE A - MIT License
|
||||
// Copyright (c) 2022 Michal Ziulek and Contributors
|
||||
// Permission is hereby granted, free of charge, to any person obtaining identity copy of
|
||||
// this software and associated documentation files (the "Software"), to deal in
|
||||
// the Software without restriction, including without limitation the rights to
|
||||
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
// of the Software, and to permit persons to whom the Software is furnished to do
|
||||
// so, subject to the following conditions:
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
// ------------------------------------------------------------------------------
|
||||
// ALTERNATIVE B - Public Domain (www.unlicense.org)
|
||||
// This is free and unencumbered software released into the public domain.
|
||||
// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
|
||||
// software, either in source code form or as identity compiled binary, for any purpose,
|
||||
// commercial or non-commercial, and by any means.
|
||||
// In jurisdictions that recognize copyright laws, the author or authors of this
|
||||
// software dedicate any and all copyright interest in the software to the public
|
||||
// domain. We make this dedication for the benefit of the public at large and to
|
||||
// the detriment of our heirs and successors. We intend this dedication to be an
|
||||
// overt act of relinquishment in perpetuity of all present and future rights to
|
||||
// this software under copyright law.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ------------------------------------------------------------------------------
|
4568
libs/zmath/src/zmath.zig
Normal file
4568
libs/zmath/src/zmath.zig
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue