Tried to fix weird graphical glitches, thought it's overdraw but turns out it's not

This commit is contained in:
Krzosa Karol
2022-07-01 16:50:24 +02:00
parent c773d5ad3d
commit 345acba124
3 changed files with 34 additions and 11 deletions

View File

@@ -6,4 +6,4 @@ rem assets.exe
rem tracy/TracyClient.cpp -DTRACY_ENABLE
clang main.cpp -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -g -I".." -o main.exe -Wl,user32.lib -Wl,optick\lib\x64\release\OptickCore.lib
clang main.cpp -O2 -mavx2 -Wall -Wno-unused-function -Wno-missing-braces -fno-exceptions -fdiagnostics-absolute-paths -g -I".." -o main.exe -Wl,user32.lib -Wl,optick\lib\x64\release\OptickCore.lib

View File

@@ -331,7 +331,7 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
Vec8 Dy10 = vec8(dy10) * var07;
Vec8 Dy21 = vec8(dy21) * var07;
Vec8 Dy02 = vec8(dy02) * var07;
Vec8 w0, w1, w2, invw0, invw1, invw2, u, v, interpolated_w, should_fill;
Vec8 w0, w1, w2, invw0, invw1, invw2, u, v, interpolated_w;
Vec8I ui, vi;
U32 *destination = dst->pixels + dst->x*min_y;
@@ -349,7 +349,15 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
Cx2 = vec8(Cx2[7]) + Dy02;
should_fill = Cx0 >= vec8(0) & Cx1 >= vec8(0) & Cx2 >= vec8(0);
Vec8 should_fill;
{
Vec8 a = (vec8(x8) + var07);
Vec8 b = vec8(max_x);
should_fill = a < b;
should_fill = should_fill & (Cx0 >= vec8(0) & Cx1 >= vec8(0) & Cx2 >= vec8(0));
}
w0 = Cx1 / area8;
w1 = Cx2 / area8;
w2 = Cx0 / area8;
@@ -362,6 +370,7 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
Vec8 depth = loadu8(depth_pointer);
should_fill = should_fill & (depth < interpolated_w);
invw0 = (w0 / vec8(p0.w));
invw1 = (w1 / vec8(p1.w));
invw2 = (w2 / vec8(p2.w));
@@ -378,17 +387,26 @@ void draw_triangle_nearest(Bitmap* dst, F32 *depth_buffer, Bitmap *src, Vec3 lig
vi = convert_vec8_to_vec8i(v);
// Origin UV (0,0) is in bottom left
U32 *dst_pixel = destination + x8;
_mm256_maskstore_epi32((int *)depth_pointer, should_fill.simd, interpolated_w.simd);
Vec8I indices = ui + ((vec8i(src->y) - vec8i(1) - vi) * vec8i(src->x));
U32 *pixel[8] = {
src->pixels + indices.e[0],
src->pixels + indices.e[1],
src->pixels + indices.e[2],
src->pixels + indices.e[3],
src->pixels + indices.e[4],
src->pixels + indices.e[5],
src->pixels + indices.e[6],
src->pixels + indices.e[7],
};
U32 *dst_pixel = destination + x8;
for(S64 i = 0; i < 8; i++){
if (should_fill[i]){
PROFILE_SCOPE(fill_triangle_after_depth_test);
depth_pointer[i] = interpolated_w[i];
U32 *pixel = src->pixels + (ui[i] + (src->y - 1ll - vi[i]) * src->x);
Vec4 result_color; {
U32 c = *pixel;
U32 c = *pixel[i];
F32 a = ((c & 0xff000000) >> 24) / 255.f;
F32 b = ((c & 0x00ff0000) >> 16) / 255.f;
F32 g = ((c & 0x0000ff00) >> 8) / 255.f;
@@ -652,7 +670,7 @@ main(int argc, char **argv) {
os.window_size.y = 720;
os.window_resizable = 1;
assert(os_init());
Font font = os_load_font(os.perm_arena, 16, "Arial", 0);
Font font = os_load_font(os.perm_arena, 12*os.dpi_scale, "Arial", 0);
f22 = load_obj_dump(os.perm_arena, "plane.bin"_s);
sponza = load_obj_dump(os.perm_arena, "sponza.bin"_s);

View File

@@ -36,10 +36,15 @@ union Vec8I{
Vec8I vec8i(S32 x){return {_mm256_set1_epi32(x)}; }
Vec8I vec8i(S32 a, S32 b, S32 c, S32 d, S32 e, S32 f, S32 g, S32 h){ return {_mm256_set_epi32(h, g, f, e, d, c, b, a)}; }
Vec8I operator>(Vec8I a, Vec8I b){
return {_mm256_cmpgt_epi32(a.simd, b.simd)};
}
Vec8I operator+(Vec8I a, Vec8I b){ return {_mm256_add_epi32(a.simd, b.simd)}; }
Vec8I operator-(Vec8I a, Vec8I b){ return {_mm256_sub_epi32(a.simd, b.simd)}; }
Vec8I operator*(Vec8I a, Vec8I b){ return {_mm256_mul_epi32(a.simd, b.simd)}; }
Vec8I operator*(Vec8I a, Vec8I b){
return {_mm256_mullo_epi32(a.simd, b.simd)}; //_mm256_mul_epi32
}
// Vec8I operator/(Vec8I a, Vec8I b){ return {_mm256_div_epi32(a.simd, b.simd)}; }
Vec8I operator+=(Vec8I &a, Vec8I b){ return a + b; }
Vec8I convert_vec8_to_vec8i(Vec8 v){ return Vec8I{_mm256_cvtps_epi32(v.simd)}; }
Vec8I convert_vec8_to_vec8i(Vec8 v){ return Vec8I{_mm256_cvtps_epi32(v.simd)}; }