diff --git a/src/client/client.cpp b/src/client/client.cpp index 222c1a2ac..b350e1cc5 100644 --- a/src/client/client.cpp +++ b/src/client/client.cpp @@ -563,7 +563,8 @@ void Client::step(float dtime) std::vector minimap_mapblocks; bool do_mapper_update = true; - MapSector *sector = m_env.getMap().emergeSector(v2s16(r.p.X, r.p.Z)); + ClientMap &map = m_env.getClientMap(); + MapSector *sector = map.emergeSector(v2s16(r.p.X, r.p.Z)); MapBlock *block = sector->getBlockNoCreateNoEx(r.p.Y); @@ -575,6 +576,8 @@ void Client::step(float dtime) if (block) { // Delete the old mesh + if (block->mesh) + map.invalidateMapBlockMesh(block->mesh); delete block->mesh; block->mesh = nullptr; block->solid_sides = r.solid_sides; @@ -589,9 +592,9 @@ void Client::step(float dtime) if (r.mesh->getMesh(l)->getMeshBufferCount() != 0) is_empty = false; - if (is_empty) + if (is_empty) { delete r.mesh; - else { + } else { // Replace with the new mesh block->mesh = r.mesh; if (r.urgent) diff --git a/src/client/clientmap.cpp b/src/client/clientmap.cpp index ee05d8816..0a5a3f963 100644 --- a/src/client/clientmap.cpp +++ b/src/client/clientmap.cpp @@ -18,6 +18,8 @@ #include "util/basic_macros.h" #include "util/tracy_wrapper.h" #include "client/renderingengine.h" +#include "util/quicktune.h" +#include "util/hex.h" #include @@ -788,29 +790,27 @@ void MeshBufListMaps::addFromBlock(v3s16 block_pos, MapBlockMesh *block_mesh, * @param src buffer list * @param dst draw order * @param get_world_pos returns translation for a buffer - * @param buffer_trash output container for temporary mesh buffers + * @param dynamic_buffers * @return number of buffers that were merged */ -template +template static u32 transformBuffersToDrawOrder( const MeshBufListMaps::MeshBufList &src, DrawDescriptorList &draw_order, - F get_world_pos, C &buffer_trash) + F get_world_pos, CachedMeshBuffers &dynamic_buffers) { /** * This is a tradeoff between time spent merging buffers and time spent * due to excess drawcalls. * Testing has shown that the ideal value is in the low hundreds, as extra - * CPU work quickly eats up the benefits. + * CPU work quickly eats up the benefits (though alleviated by a cache). * In MTG landscape scenes this was found to save around 20-40% of drawcalls. * * NOTE: if you attempt to test this with quicktune, it won't give you valid * results since HW buffers stick around and Irrlicht handles large amounts * inefficiently. - * - * TODO: as a next step we should cache merged meshes, so they do not need - * to be re-built *and* can be kept in GPU memory. */ - const u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices"); + u32 target_min_vertices = g_settings->getU32("mesh_buffer_min_vertices"); + QUICKTUNE_AUTONAME(QVT_INT, target_min_vertices, 0, 2000); const auto draw_order_pre = draw_order.size(); auto *driver = RenderingEngine::get_video_driver(); @@ -826,6 +826,30 @@ static u32 transformBuffersToDrawOrder( } } + // iterate in reverse to get closest blocks first + std::vector> to_merge; + for (auto it = src.rbegin(); it != src.rend(); ++it) { + v3f translate = get_world_pos(it->first); + auto *buf = it->second; + if (can_merge < 2 || buf->getVertexCount() >= target_min_vertices) { + draw_order.emplace_back(translate, buf); + continue; + } + to_merge.emplace_back(translate, buf); + // TEMP: workaround for updateAllHardwareBuffers() inefficiency + driver->removeHardwareBuffer(buf->getVertexBuffer()); + driver->removeHardwareBuffer(buf->getIndexBuffer()); + } + + // TODO: explain and document + std::string kkk; + std::sort(to_merge.begin(), to_merge.end(), [] (const auto &l, const auto &r) { + return static_cast(l.second) < static_cast(r.second); + }); + for (auto &it : to_merge) { + kkk.append(reinterpret_cast(&it.second), sizeof(it.second)); + } + scene::SMeshBuffer *tmp = nullptr; const auto &finish_buf = [&] () { if (tmp) { @@ -842,33 +866,40 @@ static u32 transformBuffersToDrawOrder( tmp = nullptr; }; - // iterate in reverse to get closest blocks first - for (auto it = src.rbegin(); it != src.rend(); ++it) { - v3f translate = get_world_pos(it->first); - auto *buf = it->second; - if (can_merge < 2 || buf->getVertexCount() >= target_min_vertices) { - draw_order.emplace_back(translate, buf); - continue; - } + // take from cache or run merging + auto it2 = dynamic_buffers.find(kkk); + if (it2 != dynamic_buffers.end()) { + g_profiler->avg("transformBuffersToDrawOrder: cache hit rate", 1); + for (auto *buf : it2->second.buf) + draw_order.emplace_back(v3f(0), buf); + it2->second.age = 0; + } else if (!kkk.empty()) { + g_profiler->avg("transformBuffersToDrawOrder: cache hit rate", 0); + auto &put_buffers = dynamic_buffers[kkk]; + //rawstream << "new " << hex_encode(kkk) << std::endl; + for (auto &it : to_merge) { + v3f translate = it.first; + auto *buf = it.second; - bool new_buffer = false; - if (!tmp) - new_buffer = true; - else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX) - new_buffer = true; - if (new_buffer) { - finish_buf(); - tmp = new scene::SMeshBuffer(); - buffer_trash.push_back(tmp); - assert(tmp->getPrimitiveType() == buf->getPrimitiveType()); - tmp->Material = buf->getMaterial(); - // preallocate - tmp->Vertices->Data.reserve(total_vtx); - tmp->Indices->Data.reserve(total_idx); + bool new_buffer = false; + if (!tmp) + new_buffer = true; + else if (tmp->getVertexCount() + buf->getVertexCount() > U16_MAX) + new_buffer = true; + if (new_buffer) { + finish_buf(); + tmp = new scene::SMeshBuffer(); + put_buffers.buf.push_back(tmp); + assert(tmp->getPrimitiveType() == buf->getPrimitiveType()); + tmp->Material = buf->getMaterial(); + // preallocate + tmp->Vertices->Data.reserve(total_vtx); + tmp->Indices->Data.reserve(total_idx); + } + appendToMeshBuffer(tmp, buf, translate); } - appendToMeshBuffer(tmp, buf, translate); + finish_buf(); } - finish_buf(); // first call needs to set the material if (draw_order.size() > draw_order_pre) @@ -921,7 +952,6 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) TimeTaker tt_collect(""); MeshBufListMaps grouped_buffers; - std::vector buffer_trash; DrawDescriptorList draw_order; auto is_frustum_culled = m_client->getCamera()->getFrustumCuller(); @@ -979,7 +1009,7 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) for (auto &map : grouped_buffers.maps) { for (auto &list : map) { merged_count += transformBuffersToDrawOrder( - list.second, draw_order, get_block_wpos, buffer_trash); + list.second, draw_order, get_block_wpos, m_dynamic_buffers); } } @@ -1036,6 +1066,21 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) if (pass == scene::ESNRP_SOLID) { g_profiler->avg("renderMap(): animated meshes [#]", mesh_animate_count); g_profiler->avg(prefix + "merged buffers [#]", merged_count); + + u32 USE_CACHE = 1; + QUICKTUNE_AUTONAME(QVT_INT, USE_CACHE, 0, 1); + u32 cached_count = 0; + for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) { + if (!USE_CACHE || ++it->second.age > 1) { + for (auto *buf : it->second.buf) + buf->drop(); + it = m_dynamic_buffers.erase(it); + } else { + cached_count += it->second.buf.size(); + it++; + } + } + g_profiler->avg(prefix + "merged buffers cached [#]", cached_count); } if (pass == scene::ESNRP_TRANSPARENT) { @@ -1045,9 +1090,55 @@ void ClientMap::renderMap(video::IVideoDriver* driver, s32 pass) g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "material swaps [#]", material_swaps); +} - for (auto &x : buffer_trash) - x->drop(); +void ClientMap::invalidateMapBlockMesh(MapBlockMesh *mesh) +{ + ScopeProfiler sp(g_profiler, "CM::invalidateMapBlockMesh", SPT_ADD, PRECISION_MICRO); + + // find all buffers + MeshBufListMaps tmp; + tmp.addFromBlock(v3s16(), mesh, getSceneManager()->getVideoDriver()); + + std::vector to_delete; + void *maxp = 0; + for (auto &it : tmp.maps) { + for (auto &it2 : it) { + for (auto &it3 : it2.second) { + void *const p = it3.second; + to_delete.push_back(p); + maxp = std::max(maxp, p); + } + } + } + + g_profiler->add("CM::invalidateMapBlockMesh to_delete", to_delete.size()); + + if (to_delete.empty()) + return; + + // remove matching cache elements + u32 deleted = 0; + for (auto it = m_dynamic_buffers.begin(); it != m_dynamic_buffers.end(); ) { + const std::string &key = it->first; + assert(key.size() % sizeof(void*) == 0); + for (size_t off = 0; off < key.size(); off += sizeof(void*)) { + void *v; + memcpy(&v, &key[off], sizeof(void*)); + if (v > maxp) // early exit + break; + if (CONTAINS(to_delete, v)) { + it = m_dynamic_buffers.erase(it); + deleted++; + goto continue_outer; + } + } + it++; + continue_outer: + continue; + } + + g_profiler->add("CM::invalidateMapBlockMesh deleted", deleted); } static bool getVisibleBrightness(Map *map, const v3f &p0, v3f dir, float step, @@ -1263,7 +1354,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, }; MeshBufListMaps grouped_buffers; - std::vector buffer_trash; DrawDescriptorList draw_order; std::size_t count = 0; @@ -1308,7 +1398,7 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, for (auto &map : grouped_buffers.maps) { for (auto &list : map) { transformBuffersToDrawOrder( - list.second, draw_order, get_block_wpos, buffer_trash); + list.second, draw_order, get_block_wpos, m_dynamic_buffers); } } @@ -1373,9 +1463,6 @@ void ClientMap::renderMapShadows(video::IVideoDriver *driver, g_profiler->avg(prefix + "vertices drawn [#]", vertex_count); g_profiler->avg(prefix + "drawcalls [#]", drawcall_count); g_profiler->avg(prefix + "material swaps [#]", material_swaps); - - for (auto &x : buffer_trash) - x->drop(); } /* diff --git a/src/client/clientmap.h b/src/client/clientmap.h index 760a1a4db..b76e513e2 100644 --- a/src/client/clientmap.h +++ b/src/client/clientmap.h @@ -36,6 +36,14 @@ namespace irr::video class IVideoDriver; } +struct CachedMeshBuffer { + std::vector buf; + u8 age = 0; +}; + +using CachedMeshBuffers = std::unordered_map; + + /* ClientMap @@ -95,6 +103,8 @@ public: void renderPostFx(CameraMode cam_mode); + void invalidateMapBlockMesh(MapBlockMesh *mesh); + // For debug printing void PrintInfo(std::ostream &out) override; @@ -151,6 +161,7 @@ private: std::vector m_keeplist; std::map m_drawlist_shadow; bool m_needs_update_drawlist; + CachedMeshBuffers m_dynamic_buffers; bool m_cache_trilinear_filter; bool m_cache_bilinear_filter; diff --git a/src/defaultsettings.cpp b/src/defaultsettings.cpp index ca569486f..4851ebf73 100644 --- a/src/defaultsettings.cpp +++ b/src/defaultsettings.cpp @@ -104,7 +104,7 @@ void set_default_settings() settings->setDefault("sound_extensions_blacklist", ""); settings->setDefault("mesh_generation_interval", "0"); settings->setDefault("mesh_generation_threads", "0"); - settings->setDefault("mesh_buffer_min_vertices", "100"); + settings->setDefault("mesh_buffer_min_vertices", "300"); settings->setDefault("free_move", "false"); settings->setDefault("pitch_move", "false"); settings->setDefault("fast_move", "false"); @@ -211,7 +211,7 @@ void set_default_settings() settings->setDefault("keymap_slot31", ""); settings->setDefault("keymap_slot32", ""); -#ifndef NDEBUG +#if 1 // Default keybinds for quicktune in debug builds settings->setDefault("keymap_quicktune_prev", "KEY_HOME"); settings->setDefault("keymap_quicktune_next", "KEY_END");