diff --git a/build.bat b/build.bat index ad221b6..780abfa 100644 --- a/build.bat +++ b/build.bat @@ -1,5 +1,5 @@ @echo off pushd %~dp0 -clang main.cpp -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib +clang main.cpp -O2 -mfma -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -I".." -g -o main.exe -Wl,user32.lib popd \ No newline at end of file diff --git a/main.cpp b/main.cpp index 87d1d1e..5c0a2d6 100644 --- a/main.cpp +++ b/main.cpp @@ -364,18 +364,17 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig U64 fill_pixels_begin = __rdtsc(); for (S64 y = min_y; y < max_y; y++) { - F32x8 Y = _mm256_set1_ps(y); for (S64 x8 = min_x; x8 < max_x; x8+=8) { F32x8 X = _mm256_add_ps(_mm256_set1_ps(x8), var07); - // Compute the edges // F32x8 edge0 = (p1.y - p0.y) * (p.x - p0.x) - (p1.x - p0.x) * (p.y - p0.y); F32x8 px_minus_0x = _mm256_sub_ps(X, p0_x); F32x8 py_minus_0y = _mm256_sub_ps(Y, p0_y); F32x8 left0 = _mm256_mul_ps(_dy10, px_minus_0x); F32x8 right0 = _mm256_mul_ps(_dx10, py_minus_0y); + // F32x8 edge0 = _mm256_fmsub_ps(_dy10, px_minus_0x, right0); F32x8 edge0 = _mm256_sub_ps(left0,right0); // F32 result = (p2.y - p1.y) * (p.x - p1.x) - (p2.x - p1.x) * (p.y - p1.y); @@ -1069,8 +1068,9 @@ test_array_list(){ array_add(scratch, &array, 31); array_add(scratch, &array, 32); array_ordered_remove(&array, 32); - array_ordered_remove(&array, 16); array_ordered_remove(&array, 0); + array_ordered_remove(&array, 16); + array_ordered_remove(&array, 29); array_print(&array); } @@ -1102,7 +1102,7 @@ test_array_list(){ array_print(&array); } - __debugbreak(); + // __debugbreak(); } FILE *global_file;