- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello,
I am trying to create a High Quality BVH4 structure using the BVH builder tutorial. I am aware that it is creating a BVH2 structure and I tried modifying a few arguments to accomplish this task. MaxBranchingFactor within the RTCBuildArguments was changed to a value of 4. Within the InnerNode stuct, I also increased the number of bounds and children to 4. When calling setChildren and setBounds, I made sure that it would loop through all 4 children and bounds instead of 2 as it was before. However, it seems to break when creating the leaf nodes. Could anyone point me in the right direction on how to create a BVH4/8 structure using the BVH builder tutorial?
Thank you,
Evan
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Evan,
could you post or PM your code so that I can have a look? Do you get a segfault when creating the leaf nodes? For a high quality BVH you also need to reserve some extra space (BuildSettings.extraSpace) to store spatial splits. Other than that one needs to modify a couple of BuildSettings entries like sahBlockSize, maxBranchingFactor, minLeafSize, maxLeafSize, quality etc to get a good high-quality n-wide BVH, but that should be it.
Carsten.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Carsten,
Thank you for the quick response. I commented out the asserts in the create(), setChildren(), and setBounds() functions so my code is no longer breaking when creating the leaf nodes. I also modified sah() to account for 4 children instead of 2, however it looks like not all inner nodes are being created with 4 children which is causing sah() to throw an exception and halt the program. Is there not supposed to be 4 children created at every inner node? Is this a result of my BuildSettings? Also how do you determine an appropriate amount of extraSpace and sahBlockSize?
Thank you for you help, here is my bvh_builder_device.cpp file:
// ======================================================================== // // Copyright 2009-2018 Intel Corporation // // // // Licensed under the Apache License, Version 2.0 (the "License"); // // you may not use this file except in compliance with the License. // // You may obtain a copy of the License at // // // // http://www.apache.org/licenses/LICENSE-2.0 // // // // Unless required by applicable law or agreed to in writing, software // // distributed under the License is distributed on an "AS IS" BASIS, // // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // // See the License for the specific language governing permissions and // // limitations under the License. // // ======================================================================== // #include "../common/tutorial/tutorial_device.h" namespace embree { RTCScene g_scene = nullptr; /* This function is called by the builder to signal progress and to * report memory consumption. */ bool memoryMonitor(void* userPtr, ssize_t bytes, bool post) { return true; } bool buildProgress (void* userPtr, double f) { return true; } void splitPrimitive (const RTCBuildPrimitive* prim, unsigned int dim, float pos, RTCBounds* lprim, RTCBounds* rprim, void* userPtr) { assert(dim < 3); assert(prim->geomID == 0); *(BBox3fa*) lprim = *(BBox3fa*) prim; *(BBox3fa*) rprim = *(BBox3fa*) prim; (&lprim->upper_x)[dim] = pos; (&rprim->lower_x)[dim] = pos; } struct Node { virtual float sah() = 0; unsigned int nodeType; }; struct InnerNode : public Node { BBox3fa bounds[4]; Node* children[4]; InnerNode() { bounds[0] = empty; bounds[1] = empty; bounds[3] = empty; bounds[4] = empty; children[0] = nullptr; children[1] = nullptr; children[2] = nullptr; children[3] = nullptr; nodeType = 1; } float sah() { return 1.0f + (area(bounds[0])*children[0]->sah() + area(bounds[1])*children[1]->sah() + area(bounds[2])*children[2]->sah() + area(bounds[3])*children[3]->sah())/area(merge(bounds[0],bounds[1],bounds[2],bounds[3])); } static void* create (RTCThreadLocalAllocator alloc, unsigned int numChildren, void* userPtr) { //assert(numChildren == 2); void* ptr = rtcThreadLocalAlloc(alloc,sizeof(InnerNode),16); return (void*) new (ptr) InnerNode; } static void setChildren (void* nodePtr, void** childPtr, unsigned int numChildren, void* userPtr) { //assert(numChildren == 2); for (size_t i=0; i<numChildren; i++) ((InnerNode*)nodePtr)->children = (Node*) childPtr; } static void setBounds (void* nodePtr, const RTCBounds** bounds, unsigned int numChildren, void* userPtr) { //assert(numChildren == 2); for (size_t i=0; i<numChildren; i++) ((InnerNode*)nodePtr)->bounds = *(const BBox3fa*) bounds; } }; //leaf node seems to be everything on the bottom/lowest nodes/triangles struct LeafNode : public Node { unsigned id; BBox3fa bounds; LeafNode(unsigned id_new, const BBox3fa& bounds_new) { id = id_new; bounds = bounds_new; nodeType = 2; } float sah() { return 1.0f; } static void* create (RTCThreadLocalAllocator alloc, const RTCBuildPrimitive* prims, size_t numPrims, void* userPtr) { assert(numPrims == 1); void* ptr = rtcThreadLocalAlloc(alloc,sizeof(LeafNode),16); return (void*) new (ptr) LeafNode(prims->primID,*(BBox3fa*)prims); } }; void build(RTCBuildQuality quality, avector<RTCBuildPrimitive>& prims_i, char* cfg, size_t extraSpace = 0) { rtcSetDeviceMemoryMonitorFunction(g_device,memoryMonitor,nullptr); RTCBVH bvh = rtcNewBVH(g_device); avector<RTCBuildPrimitive> prims; prims.reserve(prims_i.size()+extraSpace); prims.resize(prims_i.size()); /* settings for BVH build */ RTCBuildArguments arguments = rtcDefaultBuildArguments(); arguments.byteSize = sizeof(arguments); arguments.buildFlags = RTC_BUILD_FLAG_DYNAMIC; arguments.buildQuality = quality; arguments.maxBranchingFactor = 4; arguments.maxDepth = 1024; arguments.sahBlockSize = 2; arguments.minLeafSize = 1; arguments.maxLeafSize = 1; arguments.traversalCost = 1.0f; arguments.intersectionCost = 1.0f; arguments.bvh = bvh; arguments.primitives = prims.data(); arguments.primitiveCount = prims.size(); arguments.primitiveArrayCapacity = prims.capacity(); arguments.createNode = InnerNode::create; arguments.setNodeChildren = InnerNode::setChildren; arguments.setNodeBounds = InnerNode::setBounds; arguments.createLeaf = LeafNode::create; arguments.splitPrimitive = splitPrimitive; arguments.buildProgress = buildProgress; arguments.userPtr = nullptr; /* we recreate the prims array here, as the builders modify this array */ for (size_t j = 0; j < prims.size(); j++) { prims= prims_i ; } std::cout << "Building BVH over " << prims.size() << " primitives, " << std::flush; double t0 = getSeconds(); Node* root = (Node*) rtcBuildBVH(&arguments); double t1 = getSeconds(); const float sah = root ? root->sah() : 0.0f; std::cout << 1000.0f*(t1-t0) << "ms, " << 1E-6*double(prims.size())/(t1-t0) << " Mprims/s, sah = " << sah << " [DONE]" << std::endl; rtcReleaseBVH(bvh); } /* called by the C++ code for initialization */ extern "C" void device_init (char* cfg) { /* set start render mode */ renderTile = renderTileStandard; /* create random bounding boxes */ const size_t N = 200; const size_t extraSpace = 1000; avector<RTCBuildPrimitive> prims; prims.resize(N); /* Create primitives */ for (size_t i=0; i<N; i++) { const float x = float(drand48()); const float y = float(drand48()); const float z = float(drand48()); const Vec3fa p = 1000.0f*Vec3fa(x,y,z); const BBox3fa b = BBox3fa(p,p+Vec3fa(1.0f)); RTCBuildPrimitive prim; prim.lower_x = b.lower.x; prim.lower_y = b.lower.y; prim.lower_z = b.lower.z; prim.geomID = 0; prim.upper_x = b.upper.x; prim.upper_y = b.upper.y; prim.upper_z = b.upper.z; prim.primID = (unsigned) i; prims = prim; } /* only want to test high quality build std::cout << "Low quality BVH build:" << std::endl; build(RTC_BUILD_QUALITY_LOW,prims,cfg); std::cout << "Normal quality BVH build:" << std::endl; build(RTC_BUILD_QUALITY_MEDIUM,prims,cfg); */ std::cout << "High quality BVH build:" << std::endl; build(RTC_BUILD_QUALITY_HIGH,prims,cfg,extraSpace); } /* task that renders a single screen tile */ void renderTileStandard(int taskIndex, int threadIndex, int* pixels, const unsigned int width, const unsigned int height, const float time, const ISPCCamera& camera, const int numTilesX, const int numTilesY) { } /* task that renders a single screen tile */ void renderTileTask(int taskIndex, int threadIndex, int* pixels, const unsigned int width, const unsigned int height, const float time, const ISPCCamera& camera, const int numTilesX, const int numTilesY) { } /* called by the C++ code to render */ extern "C" void device_render (int* pixels, const int width, const int height, const float time, const ISPCCamera& camera) { } /* called by the C++ code for cleanup */ extern "C" void device_cleanup () { } }
-Evan
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Evan,
thanks for sharing the code. First, there's a little out of bounds access here:
InnerNode() {
...
bounds[4] = empty;
}
You are correct that not all inner nodes have always four valid children, therefore the sah() function should look something like this:
float sah() {
float sum_area = 0.0f;
BBox3fa merged_bounds(empty);
for (size_t i=0;i<4;i++)
{
if (children == nullptr) break;
sum_area += area(bounds)*children->sah();
merged_bounds.extend(bounds);
}
return 1.0f + sum_area / area(merged_bounds);
}
Appropriate amount of extra space should be in 40-100% range with respect to the number of input primitives. Obviously there's a performance vs. memory trade off here.
sahBlockSize should be 4 for a 4-wide BVH and 8 for a 8-wide BVH.
Hope this helps.
C.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Oops, didn't catch that out of bounds error.
This really helps thank you!
-Evan

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page