Intel® Embree Ray Tracing Kernels
Discussion forum on the open source ray tracing kernels for fast photo-realistic rendering on Intel® CPU(s)

BVH4 traverser "optimization"

I've noticed some not needed "else" for leaf nodes. This isbvh4_traverser.cpp: 122. Interestingly, Composer 2011 on Windows seems to have a problem optimizing that. I get 2-3% speed increase on my i7-920, after removing that "else". I tried something similar for the occlusion rays, but doesn't seem to make a difference:
[bash]while (true) { if (__builtin_expect(stackPtr == 0, false)) break; stackPtr--; cur = stack[stackPtr]; next: /*! this is an inner node */ if (__builtin_expect(cur >= 0, true)) { /*! single ray intersection with 4 boxes */ const BVH4::Node& node = bvh->node(nodes,cur); ssef tNearX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearX)) * rdir.x; ssef tNearY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearY)) * rdir.y; ssef tNearZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearZ)) * rdir.z; ssef tNear = max(tNearX,tNearY,tNearZ,rayNear); ssef tFarX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farX)) * rdir.x; ssef tFarY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farY)) * rdir.y; ssef tFarZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farZ)) * rdir.z; ssef tFar = min(tFarX,tFarY,tFarZ,rayFar); size_t _hit = movemask(tNear <= tFar); /*! push hit nodes onto stack */ if (__builtin_expect(_hit == 0, true)) continue; size_t r = __bsf(_hit); _hit = __btc(_hit,r); stack[stackPtr] = cur = node.child; if (__builtin_expect(_hit == 0, true)) goto next; r = __bsf(_hit); _hit = __btc(_hit,r); stack[++stackPtr] = cur = node.child; if (__builtin_expect(_hit == 0, true)) goto next; r = __bsf(_hit); _hit = __btc(_hit,r); stack[++stackPtr] = cur = node.child; if (__builtin_expect(_hit == 0, true)) goto next; r = __bsf(_hit); _hit = __btc(_hit,r); stack[++stackPtr] = cur = node.child; goto next; } /*! this is a leaf node */ { cur ^= 0x80000000; const size_t ofs = size_t(cur) >> 5; const size_t num = size_t(cur) & 0x1F; for (size_t i=ofs; itriangles.occluded(ray)) return true; } }[/bash]
0 Kudos
0 Replies