Commit 9f6e006c authored by Florian Oetke's avatar Florian Oetke
Browse files

optimized diffuse gi sampler (5.5 -> 4.0ms for GI)

parent 2592b480
......@@ -23,13 +23,12 @@ layout(set=2, binding = 7) uniform sampler2D ao_sampler;
layout (constant_id = 0) const int LAST_SAMPLE = 0; // 1 if this is the last MIP level to sample
layout (constant_id = 1) const float R = 40; // the radius to fetch samples from
layout (constant_id = 2) const int SAMPLES = 128; // the number of samples to fetch
layout (constant_id = 3) const int UPSAMPLE_ONLY = 0;// 1 if only the previous result should be
layout (constant_id = 3) const int UPSAMPLE = 1; // 1 if there is a previous frame that should be upsampled
layout (constant_id = 4) const int UPSAMPLE_ONLY = 0;// 1 if only the previous result should be
// upsampled but no new samples calculated
layout (constant_id = 4) const int JITTER = 1; // 1 if the samples should be jittered to cover more area
layout (constant_id = 5) const int BLEND_HISTORY = 0;// 1 if this is the last call and should blend
// with the history buffer
layout(set=3, binding = 0) uniform Sample_points {
vec4 points[SAMPLES];
} sample_points;
// arguments are packet into the matrices to keep the pipeline layouts compatible between GI passes
layout(push_constant) uniform Push_constants {
......@@ -55,7 +54,7 @@ layout(push_constant) uniform Push_constants {
vec3 gi_sample(int lod, int base_mip);
vec3 calc_illumination_from(int lod, vec2 tex_size, ivec2 src_uv, vec2 shaded_uv, float shaded_depth,
vec3 shaded_point, vec3 shaded_normal, out float weight);
vec3 shaded_point, vec3 shaded_normal);
// calculate luminance of a color (used for normalization)
float luminance_norm(vec3 c) {
......@@ -69,7 +68,7 @@ void main() {
float base_mip = pcs.prev_projection[3][3];
// upsample the previous result (if there is one)
if(current_mip < max_mip)
if(UPSAMPLE==1)
out_color = vec4(upsampled_result(depth_sampler, mat_data_sampler,
prev_depth_sampler, prev_mat_data_sampler,
result_sampler, vertex_out.tex_coords), 1.0);
......@@ -81,7 +80,7 @@ void main() {
out_color.rgb += gi_sample(int(current_mip+0.5), int(base_mip+0.5));
// reached the last MIP level => blend with history
if(abs(current_mip - base_mip) < 0.00001) {
if(BLEND_HISTORY==1) {
// calculate interpolation factor based on the depth-error in its surrounding during reporjection
vec2 hws_step = 1.0 / textureSize(history_weight_sampler, 0);
......@@ -132,27 +131,25 @@ vec3 gi_sample(int lod, int base_mip) {
// fetch SAMPLES samples in a spiral pattern and combine their GI contribution
vec3 c = vec3(0,0,0);
float samples_used = 0.0;
float angle_step = PI*2 / pow((sqrt(5.0)+1.0)/2.0, 2.0);
float angle = PDnrand2(vec4(vertex_out.tex_coords, lod, global_uniforms.time.x*10.0)).r * 2*PI;
float sin_angle = sin(angle);
float cos_angle = cos(angle);
for(int i=0; i<SAMPLES/2; i++) {
vec4 rand = JITTER==0 ? vec4(0.5) : texture(noise_sampler, PDnrand2(vec4(vertex_out.tex_coords, lod+20+i, global_uniforms.time.x*10.0)));
float outer_radius = R;
float inner_radius = LAST_SAMPLE==0 ? outer_radius / 2.0 - 4.0 : 0.0;
float angle_step = PI * 2.0 / pow((sqrt(5.0) + 1.0) / 2.0, 2.0);
vec2 pp = sample_points.points[i].xy;
ivec2 p = ivec2(uv + vec2(pp.x*cos_angle - pp.y*sin_angle, pp.x*sin_angle + pp.y*cos_angle) + R*0.2 * (rand.rg*2-1));
float weight;
c += calc_illumination_from(lod, texture_size, p, uv, depth, P, N, weight);
samples_used += weight;
for(int i=0; i<SAMPLES; i++) {
float r = max(
4.0,
mix(inner_radius, outer_radius, sqrt(float(i) / float(SAMPLES))));
float a = i * angle_step + angle;
float sin_angle = sin(a);
float cos_angle = cos(a);
pp = sample_points.points[i].zw;
p = ivec2(uv + vec2(pp.x*cos_angle - pp.y*sin_angle, pp.x*sin_angle + pp.y*cos_angle) + R*0.2 * (rand.ba*2-1));
c += calc_illumination_from(lod, texture_size, p, uv, depth, P, N, weight);
samples_used += weight;
ivec2 p = ivec2(uv + vec2(sin_angle * r, cos_angle * r));
float weight;
c += calc_illumination_from(lod, texture_size, p, uv, depth, P, N);
}
return c;
......@@ -160,7 +157,7 @@ vec3 gi_sample(int lod, int base_mip) {
// calculate the light transfer between two pixel of the current level
vec3 calc_illumination_from(int lod, vec2 tex_size, ivec2 src_uv, vec2 shaded_uv, float shaded_depth,
vec3 shaded_point, vec3 shaded_normal, out float weight) {
vec3 shaded_point, vec3 shaded_normal) {
// fetch depth/normal at src pixel
vec4 mat_data = texelFetch(mat_data_sampler, src_uv, 0);
vec3 N = decode_normal(mat_data.rg);
......@@ -190,14 +187,18 @@ vec3 calc_illumination_from(int lod, vec2 tex_size, ivec2 src_uv, vec2 shaded_uv
// calculate the size of the differential area
float cos_alpha = Pn.z;
float cos_beta = dot(Pn, N);
float z = depth * global_uniforms.proj_planes.y;
float ds = pcs.prev_projection[2][3] * z*z * clamp(cos_alpha / cos_beta, 0.001, 1000.0);
float ds = pcs.prev_projection[2][3] * depth*depth * clamp(cos_alpha / cos_beta, 0.001, 1000.0);
// multiply all factors, that modulate the light transfer
weight = clamp(visibility * NdotL_dst * NdotL_src * ds / (0.1+r2), 0,1);
float weight = visibility * NdotL_dst * NdotL_src * ds / (0.1+r2);
if(weight<0.001) {
return vec3(0);
// fetch the light emitted by the src pixel, modulate it by the calculated factor and return it
vec3 radiance = texelFetch(color_sampler, src_uv, 0).rgb;
return max(vec3(0.0), radiance * weight);
} else {
// fetch the light emitted by the src pixel, modulate it by the calculated factor and return it
vec3 radiance = texelFetch(color_sampler, src_uv, 0).rgb;
return radiance * weight;
}
}
......@@ -487,9 +487,9 @@ namespace mirrage {
nk_property_int(ctx, "Sample Count", 8, &renderer_settings.gi_samples, 256, 1, 1);
bool_nk_wrapper = renderer_settings.gi_jitter_samples ? 1 : 0;
nk_checkbox_label(ctx, "Jitter GI Samples", &bool_nk_wrapper);
renderer_settings.gi_jitter_samples = bool_nk_wrapper == 1;
// bool_nk_wrapper = renderer_settings.gi_jitter_samples ? 1 : 0;
// nk_checkbox_label(ctx, "Jitter GI Samples", &bool_nk_wrapper);
// renderer_settings.gi_jitter_samples = bool_nk_wrapper == 1;
nk_property_int(ctx,
"Low-Quality MIP-Levels",
......
......@@ -53,7 +53,7 @@ namespace mirrage::graphic {
auto end() noexcept { return _sub_results.end(); }
private:
static constexpr auto history_size = 8;
static constexpr auto history_size = 60;
using Lookup_table = std::unordered_map<std::string, std::size_t>;
using Time_history = std::array<double, history_size>;
......@@ -119,7 +119,8 @@ namespace mirrage::graphic {
void start(vk::CommandBuffer);
auto push(const std::string& name,
vk::PipelineStageFlagBits = vk::PipelineStageFlagBits::eAllCommands) -> Push_raii;
auto push(const char* name, vk::PipelineStageFlagBits stage = vk::PipelineStageFlagBits::eAllCommands)
auto push(const char* name,
vk::PipelineStageFlagBits stage = vk::PipelineStageFlagBits::eAllCommands)
-> Push_raii {
if(_active)
return push(std::string(name), stage);
......
......@@ -42,7 +42,7 @@ namespace mirrage::renderer {
bool ssao = true;
bool bloom = true;
float background_intensity = 1.f;
float background_intensity = 0.f;
bool dynamic_shadows = false;
bool debug_disect = false;
......
......@@ -64,12 +64,6 @@ namespace mirrage::renderer {
vk::UniqueDescriptorSet _reproject_descriptor_set;
// GI sampling for diffuse illumination
vk::UniqueDescriptorSetLayout _sample_descriptor_set_layout;
vk::UniqueDescriptorSet _sample_descriptor_set;
vk::UniqueDescriptorSet _last_sample_descriptor_set;
graphic::Static_buffer _sample_points_buffer;
graphic::Static_buffer _last_sample_points_buffer;
int _resource_loading_delay = 4;
std::vector<graphic::Framebuffer> _sample_framebuffers;
graphic::Render_pass _sample_renderpass;
std::vector<vk::UniqueDescriptorSet> _sample_descriptor_sets;
......
......@@ -191,70 +191,11 @@ namespace mirrage::renderer {
return render_pass;
}
void calculate_sample_points(int count, bool ring, int radius, char* out) {
auto points = gsl::span<glm::vec2>(reinterpret_cast<glm::vec2*>(out), count);
const auto outer_radius = static_cast<float>(radius);
const auto inner_radius = ring ? outer_radius / 2.f - 4.f : 0.0f;
const auto angle_step =
glm::pi<float>() * 2.f / glm::pow((glm::sqrt(5.0f) + 1.0f) / 2.0f, 2.0f);
for(auto i : util::range(count)) {
float r = util::max(
4.f,
glm::mix(inner_radius, outer_radius, glm::sqrt(float(i) / float(count))));
auto angle = i * angle_step;
float sin_angle = glm::sin(angle);
float cos_angle = glm::cos(angle);
points[i].x = sin_angle * r;
points[i].y = cos_angle * r;
}
// sort for cache coherence
auto open_points = std::vector<std::int64_t>();
open_points.resize(gsl::narrow<std::size_t>(count - 1));
std::iota(open_points.begin(), open_points.end(), 1);
auto path = std::vector<glm::vec2>();
path.reserve(gsl::narrow<std::size_t>(points.size()));
path.emplace_back(0);
while(!open_points.empty()) {
auto min_dist = std::numeric_limits<float>::max();
auto min_i = open_points.begin();
auto last_point = path.back();
for(auto i = open_points.begin(); i != open_points.end(); i++) {
auto p = points.at(*i);
auto dx = last_point.x - p.x;
auto dy = last_point.y - p.y;
auto dist = dx * dx + dy * dy;
if(dist < min_dist) {
min_dist = dist;
min_i = i;
}
}
path.emplace_back(points[*min_i]);
*min_i = open_points.back();
open_points.pop_back();
}
std::copy(path.begin(), path.end(), points.begin());
}
auto build_sample_render_pass(Deferred_renderer& renderer,
vk::DescriptorSetLayout desc_set_layout,
vk::DescriptorSetLayout points_desc_set_layout,
int min_mip_level,
int max_mip_level,
int sample_count,
bool jitter_samples,
Render_target_2D& gi_buffer,
std::vector<Framebuffer>& out_framebuffers) {
......@@ -280,7 +221,6 @@ namespace mirrage::renderer {
pipeline.add_descriptor_set_layout(renderer.global_uniforms_layout());
pipeline.add_descriptor_set_layout(renderer.noise_descriptor_set_layout());
pipeline.add_descriptor_set_layout(desc_set_layout);
pipeline.add_descriptor_set_layout(points_desc_set_layout);
pipeline.add_push_constant(
"pcs"_strid,
sizeof(Gi_constants),
......@@ -289,16 +229,29 @@ namespace mirrage::renderer {
auto& pass = builder.add_subpass(pipeline).color_attachment(
color, graphic::all_color_components, graphic::blend_premultiplied_alpha);
// first sample (no upsampling of previous level)
pass.stage("sample_first"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
"main",
2,
sample_count,
3,
0)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
// "normal" sample (+ upsampling)
pass.stage("sample"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
"main",
2,
sample_count,
4,
jitter_samples)
3,
1)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
// circle instead of ring sample-pattern
pass.stage("sample_last"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
......@@ -309,11 +262,36 @@ namespace mirrage::renderer {
sample_count)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
// circle instead of ring sample-pattern AND blend with history
pass.stage("sample_fin"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
"main",
0,
1,
2,
sample_count,
5,
1)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
// only upsample the previous level
pass.stage("upsample"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
"main",
3,
4,
1)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
// only upsample the previous level AND blend with history
pass.stage("upsample_fin"_strid)
.shader("frag_shader:gi_sample"_aid,
graphic::Shader_stage::fragment,
"main",
4,
1,
5,
1)
.shader("vert_shader:gi_sample"_aid, graphic::Shader_stage::vertex);
......@@ -714,36 +692,11 @@ namespace mirrage::renderer {
_integrated_brdf.view(),
_history_weight_prev.view()}))
, _sample_descriptor_set_layout(renderer.device().create_descriptor_set_layout(
vk::DescriptorSetLayoutBinding{0,
vk::DescriptorType::eUniformBuffer,
1,
vk::ShaderStageFlagBits::eFragment}))
, _sample_descriptor_set(
renderer.descriptor_pool().create_descriptor(*_sample_descriptor_set_layout))
, _last_sample_descriptor_set(
renderer.descriptor_pool().create_descriptor(*_sample_descriptor_set_layout))
, _sample_points_buffer(renderer.device().transfer().upload_buffer(
vk::BufferUsageFlagBits::eUniformBuffer,
renderer.queue_family(),
to_2prod(renderer.settings().gi_samples) * sizeof(glm::vec2),
[samples = renderer.settings().gi_samples](char* data) {
calculate_sample_points(to_2prod(samples), true, 40.f, data);
}))
, _last_sample_points_buffer(renderer.device().transfer().upload_buffer(
vk::BufferUsageFlagBits::eUniformBuffer,
renderer.queue_family(),
to_2prod(renderer.settings().gi_samples) * sizeof(glm::vec2),
[samples = renderer.settings().gi_samples](char* data) {
calculate_sample_points(to_2prod(samples), false, 40.f, data);
}))
, _sample_renderpass(build_sample_render_pass(renderer,
*_descriptor_set_layout,
*_sample_descriptor_set_layout,
_min_mip_level,
_max_mip_level,
to_2prod(renderer.settings().gi_samples),
renderer.settings().gi_jitter_samples,
_gi_diffuse,
_sample_framebuffers))
......@@ -801,41 +754,6 @@ namespace mirrage::renderer {
_sample_descriptor_sets.emplace_back(
_descriptor_set_layout.create_set(renderer.descriptor_pool(), images));
}
MIRRAGE_DEBUG("Init gi");
auto sample_count = gsl::narrow<unsigned int>(renderer.settings().gi_samples);
auto buffer_info = vk::DescriptorBufferInfo(
_sample_points_buffer.buffer(), 0, sample_count * sizeof(glm::vec2));
auto desc_writes = std::array<vk::WriteDescriptorSet, 1>();
desc_writes[0] = vk::WriteDescriptorSet{*_sample_descriptor_set,
0,
0,
1,
vk::DescriptorType::eUniformBuffer,
nullptr,
&buffer_info};
renderer.device().vk_device()->updateDescriptorSets(
desc_writes.size(), desc_writes.data(), 0, nullptr);
buffer_info = vk::DescriptorBufferInfo(
_last_sample_points_buffer.buffer(), 0, sample_count * sizeof(glm::vec2));
desc_writes[0] = vk::WriteDescriptorSet{*_last_sample_descriptor_set,
0,
0,
1,
vk::DescriptorType::eUniformBuffer,
nullptr,
&buffer_info};
renderer.device().vk_device()->updateDescriptorSets(
desc_writes.size(), desc_writes.data(), 0, nullptr);
MIRRAGE_DEBUG("Init gi done");
}
......@@ -846,11 +764,6 @@ namespace mirrage::renderer {
vk::DescriptorSet global_uniform_set,
std::size_t) {
if(_resource_loading_delay > 0) {
_resource_loading_delay--;
return;
}
if(!_renderer.settings().gi) {
_first_frame = true;
return;
......@@ -1009,7 +922,8 @@ namespace mirrage::renderer {
float fov_h,
float fov_v,
float screen_width,
float screen_height) {
float screen_height,
float proj_y_plane) {
auto dp = glm::pi<float>() * 40.f * 40.f;
if(last_sample)
dp -= glm::pi<float>() * 20.f * 20.f;
......@@ -1017,7 +931,7 @@ namespace mirrage::renderer {
dp /= samples;
return (4.0f * glm::tan(fov_h / 2.f) * glm::tan(fov_v / 2.f) * dp)
/ (screen_width * screen_height);
/ (screen_width * screen_height) * proj_y_plane * proj_y_plane;
}
} // namespace
void Gi_pass::_generate_gi_samples(vk::CommandBuffer& command_buffer) {
......@@ -1042,7 +956,8 @@ namespace mirrage::renderer {
{
auto _ = _renderer.profiler().push("Sample (diffuse)");
auto first_iteration = true;
for(auto i = end - 1; i >= std::min(_min_mip_level, begin); i--) {
auto last_i = std::min(_min_mip_level, begin);
for(auto i = end - 1; i >= last_i; i--) {
auto& fb = _sample_framebuffers.at(i - _min_mip_level);
if(i < end - 1) {
......@@ -1071,11 +986,21 @@ namespace mirrage::renderer {
}
_sample_renderpass.execute(command_buffer, fb, [&] {
if(i == begin) {
if(i == last_i) {
if(i < begin) {
_sample_renderpass.set_stage("upsample_fin"_strid);
} else {
_sample_renderpass.set_stage("sample_fin"_strid);
}
} else if(i == begin) {
_sample_renderpass.set_stage("sample_last"_strid);
} else if(i < begin) {
_sample_renderpass.set_stage("upsample"_strid);
} else if(i == end - 1) {
_sample_renderpass.set_stage("sample_first"_strid);
} else {
_sample_renderpass.set_stage("sample"_strid);
}
if(first_iteration) {
......@@ -1085,12 +1010,6 @@ namespace mirrage::renderer {
_sample_renderpass.bind_descriptor_set(
2, *_sample_descriptor_sets[i - _min_mip_level]);
if(i == begin) {
_sample_renderpass.bind_descriptor_set(3, *_last_sample_descriptor_set);
} else {
_sample_renderpass.bind_descriptor_set(3, *_sample_descriptor_set);
}
pcs.prev_projection[0][3] = i;
auto fov_h = _renderer.global_uniforms().proj_planes.z;
......@@ -1101,7 +1020,8 @@ namespace mirrage::renderer {
fov_h,
fov_v,
_color_in_out.width(i),
_color_in_out.height(i));
_color_in_out.height(i),
_renderer.global_uniforms().proj_planes.y);
_sample_renderpass.push_constant("pcs"_strid, pcs);
command_buffer.draw(3, 1, 0, 0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment