#version 460

#extension GL_EXT_debug_printf : enable

#define GEOMETRY_INFORMATION_STATIC 1
#define LIGHT_PROPERTIES_BINDING 1

uniform sampler2D      s_NoiseRGBA;
uniform sampler2DArray s_BlueNoise;

#include <shaders/materials/commons.glsl>
#include <shaders/commons_hlsl.glsl>
#include <shaders/materials/noise/noise3d.glsl>
#include <shaders/materials/commons_instancing_buffers.h>
#include <shaders/materials/commons_gradient.glsl>
#include <shaders/deferred/lighting/lighting_support.glsl>
#include <shaders/geometry_partitioning/voxel_texture_build_mips_support.glsl>

#include <shaders/geometry_partitioning/raytrace_buffers.glsl>
#include <shaders/geometry_partitioning/raytrace_commons.glsl>

uniform sampler2D sImage1;
uniform sampler2D sImage2;

uniform sampler3D s_voxel_colors;
uniform sampler3D s_voxel_colors_filtered;
uniform sampler3D s_voxel_occupancy;
uniform sampler3D s_voxel_occupancy_filtered;
uniform sampler2D sDepth;


layout(location = 1) in struct
{
	vec3 vCoords;
	vec3 vNorm;
	vec3 vWorldNorm;
	vec3 vLocalPos;
	vec3 vCameraRelativeWorldPos;
	vec4 vColor;
	vec2 vUV0;
} vtx_input;
layout(location = 0) in flat uint instanceID;

layout(std140, row_major) uniform TransformParamsBuffer{
	EntityTransformParams transform_params;
};

struct RaymarchParams
{
	vec3  function_scale;
	int   show_bounding_box;
	vec3  function_origin;
	int   clamp_to_volume;

	int   use_instance_origin;
	int   txt1_flip_y;
	int   txt2_flip_y;
	int   gradient_idx;
	vec4  near_far_plane;
	int   trace_inside;
	int   _pad0;
	int   _pad1;
	int   _pad2;
	vec4  camera_projection_params;
	vec2  frustum_shift;
	vec2  resolution;

	float animation_time;
	float prestep;
	float voxel_sampling_mip;
	float voxel_step_scale;
	float dithering;
	float param6;
	float param7;
	float param8;
};


#note rename param1 Animation Time
#note rename param2 Prestep
#note rename param3 Voxel Sampling Mip
#note rename param4 Voxel Step Scale
#note rename param5 Dithering
#note rename param5 Dithering


layout(std140, row_major) uniform RaymarchParamsBuffer{
	RaymarchParams raymarch_params;
};

layout(std140, row_major) uniform BaseMaterialPropertiesBuffer
{
	vec4 colorDiffuse;
	int gUseDerivedNormal;
	int gMaterialMode;
	int materialId;
	int materialIndex;
	int componentTags;
};

// NOTE: we try to always render using base object front faces so we have conservative depth
//       when inside the object we will just pass the define and have 2 shaders and use the second
//       one to not use the conservative depth

// output for 2 buffers
#ifdef DEFERRED_PASS
layout(location = 0) out vec4 outAlbedo;
#ifndef DEFERRED_PASS_ALBEDO_ONLY
layout(location = 1) out uint outNormalMaterial;
layout(location = 2) out uvec4 outMetalnessRoughnessMeterialTags;
layout(location = 3) out vec4 outEmissive;
#endif
#endif

//

float linearizeDepth(float d)
{
	return raymarch_params.near_far_plane.z / (raymarch_params.near_far_plane.y - d * raymarch_params.near_far_plane.w);
	//return basic_params.camera_near_far_plane.z / (basic_params.camera_near_far_plane.y - d * basic_params.camera_near_far_plane.w);
}

vec3 get_view_direction(vec2 screen_pos)
{
	vec2 vd_pos = screen_pos - raymarch_params.frustum_shift.xy * raymarch_params.resolution.xy * vec2(0.5, -0.5);
	vec3 view_direction;

	view_direction.x = -raymarch_params.camera_projection_params.z + raymarch_params.camera_projection_params.x * vd_pos.x / raymarch_params.resolution.x;
	view_direction.y = -raymarch_params.camera_projection_params.w + raymarch_params.camera_projection_params.y * vd_pos.y / raymarch_params.resolution.y;
	view_direction.z = 1.0;

	#ifdef SPIRV_VULKAN
	view_direction.y = -view_direction.y;
	#endif

	return view_direction;
}

vec4 sample_voxelization_data(vec3 world_position, float color_lod, float occupancy_lod)
{
	vec3 voxelizer_bbox_origin = in_bbox_data.bbox_voxelize_min.xyz;
	vec3 voxelizer_bbox_size   = in_bbox_data.grid_size_voxelize.xyz * GRID_RES;

	vec3 source_voxel_position = (world_position - voxelizer_bbox_origin) / voxelizer_bbox_size;

	if (source_voxel_position.x < 0.0 || source_voxel_position.y < 0.0 || source_voxel_position.z < 0.0
	 || source_voxel_position.x >= 1.0 || source_voxel_position.y >= 1.0 || source_voxel_position.z >= 1.0)
	 return vec4(0.0);

	vec3  voxel_color =     color_convert_rgbm_rgb(textureLod(s_voxel_colors_filtered, source_voxel_position, color_lod));

	//float voxel_occupancy = texelFetch(s_voxel_occupancy_filtered, ivec3(source_voxel_position * vec3(GRID_RES)), 0).r;
	float voxel_occupancy = textureLod(s_voxel_occupancy_filtered, source_voxel_position, occupancy_lod).r;

	return vec4(voxel_color, voxel_occupancy);
}

float sample_voxelization_data_occupancy(vec3 world_position, float color_lod, float occupancy_lod)
{
	vec3 voxelizer_bbox_origin = in_bbox_data.bbox_voxelize_min.xyz;
	vec3 voxelizer_bbox_size   = in_bbox_data.grid_size_voxelize.xyz * GRID_RES;

	vec3 source_voxel_position = (world_position - voxelizer_bbox_origin) / voxelizer_bbox_size;

	if (source_voxel_position.x < 0.0 || source_voxel_position.y < 0.0 || source_voxel_position.z < 0.0
	 || source_voxel_position.x >= 1.0 || source_voxel_position.y >= 1.0 || source_voxel_position.z >= 1.0)
	 return 0.0;

	//float voxel_occupancy = texelFetch(s_voxel_occupancy_filtered, ivec3(source_voxel_position * vec3(GRID_RES)), 0).r;
	float voxel_occupancy = textureLod(s_voxel_occupancy_filtered, source_voxel_position, occupancy_lod).r;

	return voxel_occupancy;
}

// basic template based on the shadertoy framework template

float sdBox(vec3 p, vec3 b)
{
	vec3 q = abs(p) - b;
	return length(max(q,0.0)) + min(max(q.x,max(q.y,q.z)),0.0);
}

float sdTorus( vec3 p, vec2 t )
{
	vec2 q = vec2(length(p.xz) - t.x,p.y);
	return length(q)-t.y;
}

float raymarchBBox(vec3 ro, vec3 rd, vec3 function_scale)
{
	const float maxd = 20000.0;        // max trace distance
	const float precis = 0.001;        // precission of the intersection
	float h = precis*2.0;
	float t = 0.0;
	float res = -1.0;
	for(int i = 0; i < 48; i++)
	{
		if(h<precis || t > maxd) break;

		// yeah, this probably sucks;)
		h = max(sdBox(ro + rd * t, vec3(1000.0)), -sdBox(ro + rd * t, function_scale * 0.5 + 0.01));
		t += h;
	}

	if( t<maxd )
		res = t;

	return res;
}

#ifndef RAYMARCH_STEPS
#define MARCHINGITERATIONS 256
#else
#define MARCHINGITERATIONS RAYMARCH_STEPS
#endif

#if 1

// https://www.shadertoy.com/view/XsX3RB
// 
float lut_noise(vec3 x)
{
	vec3 p = floor(x);
	vec3 f = fract(x);
	f = f*f*(3.0-2.0*f);
	
	// there's an artefact because the y channel almost, but not exactly, matches the r channel shifted (37,17)
	// this artefact doesn't seem to show up in chrome, so I suspect firefox uses different texture compression.
	#if 0
	vec2 uv = (p.xy+vec2(37.0,17.0)*p.z) + f.xy;
	vec2 rg = textureLod(s_NoiseRGBA, (uv+ 0.5)/256.0, 0.0).yx;
	#else
	vec2 uv  = (p.xy+vec2(37.0,17.0)*p.z);
	vec2 rg1 = textureLod(s_NoiseRGBA, (uv+ vec2(0.5,0.5))/256.0, 0.0 ).yx;
	vec2 rg2 = textureLod(s_NoiseRGBA, (uv+ vec2(1.5,0.5))/256.0, 0.0 ).yx;
	vec2 rg3 = textureLod(s_NoiseRGBA, (uv+ vec2(0.5,1.5))/256.0, 0.0 ).yx;
	vec2 rg4 = textureLod(s_NoiseRGBA, (uv+ vec2(1.5,1.5))/256.0, 0.0 ).yx;
	vec2 rg  = mix( mix(rg1,rg2,f.x), mix(rg3,rg4,f.x), f.y );
	#endif

	return mix( rg.x, rg.y, f.z );
}

float alu_noise(in vec3 p)
{
	return snoise(p);
}

const mat3 m = mat3( 0.00,  0.80,  0.60,
                    -0.80,  0.36, -0.48,
                    -0.60, -0.48,  0.64 );

float fbm4Noise(vec3 p)
{
	// [-1, 1]
	//return alu_noise(p);
	return lut_noise(p * 2.0) * 2.0 - 1.0;
}
float fbm4( in vec3 q )
{
    float f  = 0.5000*fbm4Noise( q ); q = m*q*2.02;
          f += 0.2500*fbm4Noise( q ); q = m*q*2.03;
          f += 0.1250*fbm4Noise( q ); q = m*q*2.01;
          f += 0.0625*fbm4Noise( q );
    return f;
}

float displacement( in vec3 p )
{
    float dis = textureLod(sImage2, p.xz, 0.0 ).x;
    return fbm4(p+vec3(1.0,0.0,0.8)) - dis*0.01;
}

float mapTerrain( in vec3 pos )
{
	return pos.y*0.1 + (displacement(pos*vec3(0.8,1.0,0.8)) - 0.4)*(1.0-smoothstep(1.0,3.0,pos.y));
}

// please, do not use in real projects - replace this by something better
float hash(vec3 p)  
{
    p  = 17.0*fract( p*0.3183099+vec3(.11,.17,.13) );
    return fract( p.x*p.y*p.z*(p.x+p.y+p.z) );
}

// https://iquilezles.org/articles/smin
float smax( float a, float b, float k )
{
    float h = max(k-abs(a-b),0.0);
    return max(a, b) + h*h*0.25/k;
}

float sdBase( in vec3 p )
{
#if NOISE==0
    vec3 i = floor(p);
    vec3 f = fract(p);

	#define RAD(r) ((r)*(r)*0.7)
    #define SPH(i,f,c) length(f-c)-RAD(hash(i+c))
    
    return min(min(min(SPH(i,f,vec3(0,0,0)),
                       SPH(i,f,vec3(0,0,1))),
                   min(SPH(i,f,vec3(0,1,0)),
                       SPH(i,f,vec3(0,1,1)))),
               min(min(SPH(i,f,vec3(1,0,0)),
                       SPH(i,f,vec3(1,0,1))),
                   min(SPH(i,f,vec3(1,1,0)),
                       SPH(i,f,vec3(1,1,1)))));
#else
    const float K1 = 0.333333333;
    const float K2 = 0.166666667;
    
    vec3 i = floor(p + (p.x + p.y + p.z) * K1);
    vec3 d0 = p - (i - (i.x + i.y + i.z) * K2);
    
    vec3 e = step(d0.yzx, d0);
	vec3 i1 = e*(1.0-e.zxy);
	vec3 i2 = 1.0-e.zxy*(1.0-e);
    
    vec3 d1 = d0 - (i1  - 1.0*K2);
    vec3 d2 = d0 - (i2  - 2.0*K2);
    vec3 d3 = d0 - (1.0 - 3.0*K2);
    
    float r0 = hash( i+0.0 );
    float r1 = hash( i+i1 );
    float r2 = hash( i+i2 );
    float r3 = hash( i+1.0 );

    #define SPH(d,r) length(d)-r*r*0.55

    return min( min(SPH(d0,r0),
                    SPH(d1,r1)),
                min(SPH(d2,r2),
                    SPH(d3,r3)));
#endif
}

//---------------------------------------------------------------
// subtractive fbm
//---------------------------------------------------------------
vec2 subtractiveFBM( in vec3 p, float d )
{
	p *= 0.251;

    const mat3 m = mat3( 0.00,  0.80,  0.60, 
                        -0.80,  0.36, -0.48,
                        -0.60, -0.48,  0.64 );
    float t = 0.0;
	float s = 1.0;
    for( int i=0; i<6; i++ )
    {
        float n = s*sdBase(p);
    	d = smax( d, -n, 0.15*s );
        t += d;
        p = 2.0*m*p;
        s = 0.55*s;
    }
    
    return vec2(d,t);
}

// Fbm for Perlin noise based on iq's blog
float perlinFBMNoise(vec3 p)
{
	// [-1, 1]
	//return alu_noise(p);
	return lut_noise(p * 2.0) * 2.0 - 1.0;
}

float perlinFBM(vec3 p, float freq, int octaves)
{
	float G = exp2(-.85);
	float amp = 1.;
	float n = 0.;
	for (int i = 0; i < octaves; ++i)
	{
		n += amp * perlinFBMNoise(p * freq);
		freq *= 2.;
		amp *= G;
	}

	return n;
}

// Tileable Worley fbm inspired by Andrew Schneider's Real-Time Volumetric Cloudscapes
// chapter in GPU Pro 7.

float worleyFBMNoise(vec3 p)
{
	// [-1, 1]
	//return alu_noise(p);
	return lut_noise(p * 2.0) * 2.0 - 1.0;
}
float worleyFBM(vec3 p, float freq)
{
	return worleyFBMNoise(p*freq *freq) * .625 +
		   worleyFBMNoise(p*freq*2. * freq*2.) * .25 +
		   worleyFBMNoise(p*freq*4. * freq*4.) * .125;
}

#define NOISE_TYPE 0
vec2 sdFbm(vec3 pos, float d)
{
	#if NOISE_TYPE == 0
	return subtractiveFBM(pos, d);
	#elif NOISE_TYPE == 1
	return vec2(perlinFBM(pos, 0.107, 7), 0.1) * 0.25;
	#elif NOISE_TYPE == 2
	return vec2(worleyFBM(pos, 0.27) - 0.2, 0.1);
	#elif NOISE_TYPE == 3
	return vec2(fbm4(pos * d * 500.0) + 0.1, 0.1) * 0.1;	// 0.1 for precission
	#else
	return vec2(0.5, 0.5);
	#endif
}
#undef NOISE_TYPE

float map(vec3 pos)
{
	//return sdBox(pos, vec3(5.0));
	return sdFbm(pos, -0.004).x;
}

float fun(vec3 pos)
{
	vec3 world_pos = pos;
	world_pos -= raymarch_params.function_origin;
	world_pos /= raymarch_params.function_scale;
	//world_pos = (transform_params.mModel * vec4(world_pos + dither * 0.02, 1.0)).xyz;	// NOTE: this dithering is a huge hack, but for now will do
	world_pos = vector_transform_by_mat43(world_pos, transform_params.mModel).xyz;

	float voxel_occupancy = sample_voxelization_data_occupancy(world_pos, 1.0, raymarch_params.voxel_sampling_mip);
	float d = voxel_occupancy;
	float d_base;

	if (d > 0.0)
	{
		// remap range when close to the surface. surface at 0.25 ... 0.75 to control how tight it aligns
		float zero_point = 0.25;
		d = min(1.0, d * (1.0 / (1.0 - zero_point)));
		d_base = d;
	}

	d = 1.0 - d;
	d = d * raymarch_params.voxel_step_scale * min(min(raymarch_params.function_scale.x, raymarch_params.function_scale.y), raymarch_params.function_scale.z);

	[[branch]]
	//if (voxel_occupancy > 0.0)
	if (voxel_occupancy > 0.0)
	{
		float d2 = map(pos);
		d = max(d, d2.x);
		//d = smax(d, d2.x, 0.025);
	}
	return d;

}

vec4 raymarch( in vec3 ro, in vec3 rd, float max_t, out float intersection )
{
	vec4 sum = vec4(0.0);
	
	// round to the voxel cell size?
	{
		vec3 voxelizer_bbox_size = in_bbox_data.grid_size_voxelize.xyz / GRID_RES;
		//ro = round(ro / voxelizer_bbox_size) * voxelizer_bbox_size;
	}

	float t = 0.0;
	intersection = -1.0;

	// dithering	
	float t_step = 0.1;
	float dither = texelFetch(s_BlueNoise, ivec3(uvec2(gl_FragCoord.xy) % uvec2(128), 0), 0).r;
	t += raymarch_params.dithering * dither;

	// pre-trace into the voxel fiels using low res mip
	for(int i = 0; i < 32; i++)
	{
		float trace_t = t;
		if (trace_t >= max_t)
			break;

		vec3 pos = ro + rd * trace_t;

		vec3 world_pos = pos;
		world_pos -= raymarch_params.function_origin;
		world_pos /= raymarch_params.function_scale;
		//world_pos = (transform_params.mModel * vec4(world_pos + dither * 0.02, 1.0)).xyz;	// NOTE: this dithering is a huge hack, but for now will do
		world_pos = vector_transform_by_mat43(world_pos, transform_params.mModel).xyz;

		float voxel_occupancy = sample_voxelization_data_occupancy(world_pos, 1.0, raymarch_params.voxel_sampling_mip + 1.0);
		if (voxel_occupancy > 0.0)
		{
			break;
		}

		t += t_step * raymarch_params.prestep;
	}

	//t += 0.2*dither;

	// actual tracing

	for(int i = 0; i < MARCHINGITERATIONS; i++)
	{
		float trace_t = t;
		vec3 pos = ro + rd * trace_t;

		if (trace_t >= max_t)
			break;

		float dt = fun(pos);

		//if (dt < 0.1)
		if (dt < 0.0001 * min(min(raymarch_params.function_scale.x, raymarch_params.function_scale.y), raymarch_params.function_scale.z))
		{
			intersection = min(max_t, t);
			//return vec4(max(vec3(0.0), vec3(fract(world_pos.xyz * 0.0012))), 0.0);
			//return vec4(max(vec3(0.0), vec3(fract((ro + rd * t) * 0.13012))), 0.0);
			return vec4(max(vec3(0.0), vec3(1.01)), 0.0);
		}

		vec2 d = vec2(dt, 0.004);
		//vec2 d = sdFbm(pos, -0.004);

		vec3 world_pos = pos;
		world_pos -= raymarch_params.function_origin;
		world_pos /= raymarch_params.function_scale;
		//world_pos = (transform_params.mModel * vec4(world_pos + dither * 0.02, 1.0)).xyz;	// NOTE: this dithering is a huge hack, but for now will do
		world_pos = vector_transform_by_mat43(world_pos, transform_params.mModel).xyz;
		
		sum.rgb = vec3(fract(world_pos.xyz * 0.1012));
		t += d.x;
	}

	sum.a = sum.r;

	if (t >= max_t)
		sum.rgb = vec3(0.0);

	intersection = 0.0;//min(max_t, t);
	return vec4(max(vec3(0.0), sum.xyz), sum.a);
}

float calcIntersection(in vec3 ro, in vec3 rd, float max_t, out vec4 color)
{
	float intersection;
	color.rgba = raymarch(ro, rd, max_t, intersection);

	return intersection;
}

vec3 doModelNormal(vec3 pos)
{
#if defined(SHADOWMAP_PASS)
	return vec3(0.0, 0.0, 1.0);
#else
	float precis = 0.0001;
    vec2 e = vec2(1.0,-1.0) * 0.5773 * precis;
    return normalize( e.xyy*map( pos + e.xyy ) + 
					  e.yyx*map( pos + e.yyx ) + 
					  e.yxy*map( pos + e.yxy ) + 
					  e.xxx*map( pos + e.xxx ) ) * vec3(1.0, 1.0, 1.0);
#endif
}

#endif

#ifdef RAYMARCH_OUTPUT_DEPTH
layout (depth_less) out float gl_FragDepth;
//out float gl_FragDepth;
#endif

mat3 transpose_mat3(mat3 matrix)
{
    vec3 row0 = matrix[0];
    vec3 row1 = matrix[1];
    vec3 row2 = matrix[2];
    mat3 result = mat3(
        vec3(row0.x, row1.x, row2.x),
        vec3(row0.y, row1.y, row2.y),
        vec3(row0.z, row1.z, row2.z)
    );
    return result;
}

float det(mat2 matrix) {
    return matrix[0].x * matrix[1].y - matrix[0].y * matrix[1].x;
}

mat3 inverse_mat3(mat3 matrix)
{
    vec3 row0 = matrix[0];
    vec3 row1 = matrix[1];
    vec3 row2 = matrix[2];

    vec3 minors0 = vec3(
        det(mat2(row1.y, row1.z, row2.y, row2.z)),
        det(mat2(row1.z, row1.x, row2.z, row2.x)),
        det(mat2(row1.x, row1.y, row2.x, row2.y))
    );
    vec3 minors1 = vec3(
        det(mat2(row2.y, row2.z, row0.y, row0.z)),
        det(mat2(row2.z, row2.x, row0.z, row0.x)),
        det(mat2(row2.x, row2.y, row0.x, row0.y))
    );
    vec3 minors2 = vec3(
        det(mat2(row0.y, row0.z, row1.y, row1.z)),
        det(mat2(row0.z, row0.x, row1.z, row1.x)),
        det(mat2(row0.x, row0.y, row1.x, row1.y))
    );

    mat3 adj = transpose_mat3(mat3(minors0, minors1, minors2));

    return (1.0 / dot(row0, minors0)) * adj;
}



void main() 
{
#ifdef MATERIAL_PROPERTIES_BINDING
	MaterialPropertiesGPU material = materials.material_properties[materialIndex];
#else
	MaterialPropertiesGPU material;
	material.diffuse = colorDiffuse.rgb;
	material.emmisive = vec3(0.0f);
	material.metalness = 0.0f;
	material.roughness = 0.5f;
	material.transparency = 0.0f;
	material.refraction = 0.0f;
	material.flags = 0;
#endif

#ifndef DEFERRED_PASS
	vec4 outAlbedo = vec4(1.0);	// this is dummy, will be optimized out
#endif

	// NOTE: Whole instancing support is pretty expensive, but maybe it doesn't matter when the marching code is going to cost 10x of it?
	mat4 mat_instance_model;
	mat4 mat_instance_model_inv;
	mat_instance_model[0] = vec4(1.0, 0.0, 0.0, 0.0);
	mat_instance_model[1] = vec4(0.0, 1.0, 0.0, 0.0);
	mat_instance_model[2] = vec4(0.0, 0.0, 1.0, 0.0);
	mat_instance_model[3] = vec4(0.0, 0.0, 0.0, 1.0);

	if (instance_params.stride > 0)
	{
		vec4 inst_m0 = instance_transform[instanceID * instance_params.stride + 0];
		vec4 inst_m1 = instance_transform[instanceID * instance_params.stride + 1];
		vec4 inst_m2 = instance_transform[instanceID * instance_params.stride + 2];

		mat_instance_model[0].xyz = vec3(inst_m0.x, inst_m1.x, inst_m2.x);
		mat_instance_model[1].xyz = vec3(inst_m0.y, inst_m1.y, inst_m2.y);
		mat_instance_model[2].xyz = vec3(inst_m0.z, inst_m1.z, inst_m2.z);
		mat_instance_model[3].xyz = vec3(inst_m0.w, inst_m1.w, inst_m2.w);
	}

	// if we want to have each instance to be selfcontained. in case of continuous function this can be left out
	//vec3 instance_function_origin = -(mat_instance_model * vec4(raymarch_params.function_origin, 1.0)).xyz;
	vec3 instance_function_origin = raymarch_params.function_origin;
	if (raymarch_params.use_instance_origin != 0)
	{
		instance_function_origin = (mat_instance_model * vec4(raymarch_params.function_origin, 1.0)).xyz * 0.001;
	}
	//instance_function_origin += raymarch_params.function_origin;

	mat_instance_model = transform_params.mModel * mat_instance_model;
	mat_instance_model_inv = mat_instance_model;
	{
		mat3 inv = inverse_mat3(mat3(mat_instance_model_inv));
		//mat3 inv = mat3(transform_params.mModelInv);
		mat_instance_model_inv[0].xyz = inv[0].xyz;
		mat_instance_model_inv[1].xyz = inv[1].xyz;
		mat_instance_model_inv[2].xyz = inv[2].xyz;
		//mat_instance_model_inv = transform_params.mModelInv;
		mat_instance_model_inv[3].x = -(inv[0].x * mat_instance_model[3].x + inv[1].x * mat_instance_model[3].y + inv[2].x * mat_instance_model[3].z);
		mat_instance_model_inv[3].y = -(inv[0].y * mat_instance_model[3].x + inv[1].y * mat_instance_model[3].y + inv[2].y * mat_instance_model[3].z);
		mat_instance_model_inv[3].z = -(inv[0].z * mat_instance_model[3].x + inv[1].z * mat_instance_model[3].y + inv[2].z * mat_instance_model[3].z);
	}

	float g = vtx_input.vNorm.z * 0.5 + 0.5;

#ifdef DEFERRED_PASS

	MetalnessRoughnessMeterialTags metalness_roughness_material_tags;
	metalness_roughness_material_tags.metalness               = material.metalness;
	metalness_roughness_material_tags.roughness               = material.roughness;
	metalness_roughness_material_tags.material_index          = materialIndex;
	metalness_roughness_material_tags.component_tags          = componentTags;
	metalness_roughness_material_tags.material_flag_overrides = 0;

	outMetalnessRoughnessMeterialTags.rgba = encode_metalness_roughness_material_tags(metalness_roughness_material_tags);
	outEmissive.rgba = vec4(0.0);
#endif

	// NOTE: when rendering to the shadowmap we should be rendering backfaces
	//       so maybe just mirror the camera and render 'back' towards the real one
	#if 0
	vec3 ro = (transform_params.mModelInv * vec4(vtx_input.vCameraRelativeWorldPos.xyz + transform_params.vCameraPosition.xyz, 1.0)).xyz; // start tracing at the cube surface. still should clamp at the "outgoing face"
	vec3 rd = (transform_params.mModelInv * vec4(transform_params.vCameraPosition, 1.0)).xyz;
	#else
	vec3 ro = (mat_instance_model_inv * vec4(vtx_input.vCameraRelativeWorldPos.xyz + transform_params.vCameraPosition.xyz, 1.0)).xyz; // start tracing at the cube surface. still should clamp at the "outgoing face"
	vec3 rd = (mat_instance_model_inv * vec4(transform_params.vCameraPosition, 1.0)).xyz;
	#endif
	rd = normalize(ro - rd);
	if (raymarch_params.trace_inside != 0)
	{
		ro = (mat_instance_model_inv * vec4(transform_params.vCameraPosition, 1.0)).xyz;
	}

	ro *= raymarch_params.function_scale;
	vec3 ro_bounding_volume = ro;
	ro += instance_function_origin;

	float raw_depth = texelFetch(sDepth, int2(gl_FragCoord.xy), 0).r;
	float max_depth = linearizeDepth(raw_depth);

	vec4 color = vec4(1.0);
	float bounding_volume_intersection = 100000.0;
	if (raymarch_params.clamp_to_volume != 0)
		bounding_volume_intersection = raymarchBBox(ro_bounding_volume , rd, raymarch_params.function_scale);	// NOTE: yeah, we could just intersect analytically, but i want to have possibility to use custom shapes here

	// ok, this is a HACK!
	bounding_volume_intersection = max_depth;
	//bounding_volume_intersection = min((max_depth - vtx_input.vCoords.z) / (raymarch_params.intersection_density_modifier * raymarch_params.function_scale.x), bounding_volume_intersection);

	//outAlbedo.rgba = vec4((max_depth - vtx_input.vCoords.z) / 1000.0);
	//outAlbedo.rgba = vec4(bounding_volume_intersection * 3.0 / 1000.0);
	//return;

	{
		vec3 view_direction = get_view_direction(vec2(gl_FragCoord.xy));
		vec3 world          = (transform_params.mViewInv * vec4(view_direction * max_depth, 1.0)).xyz;
		float d             = length((transform_params.mModelInv * vec4(world - transform_params.vCameraPosition.xyz, 0.0)).xyz);
		//float d             = length(world - transform_params.vCameraPosition.xyz);

		// need to scale by model scale / function scale. this is approx
		//float s = transform_
		//bounding_volume_intersection = d / (3000.0 / 10.0);
		bounding_volume_intersection = d * raymarch_params.function_scale.z;
	}

	float intersection = calcIntersection(ro, rd, bounding_volume_intersection, color);
	if (intersection > 0.0)
	{
		vec3 ri = ro + intersection * rd;

#ifdef DEFERRED_PASS
		vec3 normal = doModelNormal(ri);
		vec3 world_normal = (mat_instance_model * vec4(normal, 0.0)).xyz;
		outNormalMaterial = encode_normal_material(normalize(world_normal), materialId);
#endif

		ri -= instance_function_origin;
		ri /= raymarch_params.function_scale;

		ri = (transform_params.mView * mat_instance_model * vec4(ri, 1.0)).xyz;

		// scale alpha value by distance to closest currently rendered object.
		{
			//float absorption = (max_depth - vtx_input.vCoords.z) / 1000.0;
			//absorption = clamp(absorption, 0.0, 1.0);
			//color.a = absorption;
		}

		outAlbedo.rgb = material.diffuse.rgb;
		outAlbedo.a   = 1.0 - color.a;
		
		//outAlbedo.a = pow(outAlbedo.a, 3.0);
		//outAlbedo.rgb = pow(outAlbedo.rgb, vec3(2.0));
		//outAlbedo.rgb = vec3(ri.z * 0.0001);
		//outAlbedo.rgb = vec3(color.a);
		//outAlbedo.rgb = vec3(0.1);

		// NOTE: These two should (roughly) match
		//outAlbedo.rgb = vec3(bounding_volume_intersection * 0.01);
		//outAlbedo.rgb = vec3(intersection * 0.01);
		//outAlbedo.rgb = vec3(fract(intersection * 1.1));

		{
			vec3 view_direction = get_view_direction(vec2(gl_FragCoord.xy));
			vec3 world          = (transform_params.mViewInv * vec4(view_direction * max_depth, 1.0)).xyz;
			float d             = length(transform_params.vCameraPosition.xyz - world);

			//outAlbedo.xyz = length(world - transform_params.vCameraPosition.xyz).xxx * 0.0001;
		}

	
#ifdef RAYMARCH_OUTPUT_DEPTH
		{
			float near = -1.0;	// this is depth range, not the projection
			float far  =  1.0;
			float depth = (transform_params.mProjection * vec4(ri, 1.0)).z / (transform_params.mProjection * vec4(ri, 1.0)).w;
			depth = (((far - near) * depth) + near + far) / 2.0;
			gl_FragDepth = depth;
		}
#endif
	}
	else
	{
		if (raymarch_params.show_bounding_box != 0)
		{
			if (intersection == -1.0)
				outAlbedo = vec4(0.4, 0.2, 0.0, 1.0);
			else
				outAlbedo = vec4(5.0, 0.0, 0.0, 1.0);

			outAlbedo.rgb = fract(vtx_input.vCoords.xyz * 0.01);
		}
		else
		{
			outAlbedo.rgb = vec3(1.0, 0.0, 0.0);
			outAlbedo.a = 1.0;
			discard;
		}
	}
}
