- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I've noticed some not needed "else" for leaf nodes. This isbvh4_traverser.cpp: 122. Interestingly, Composer 2011 on Windows seems to have a problem optimizing that. I get 2-3% speed increase on my i7-920, after removing that "else". I tried something similar for the occlusion rays, but doesn't seem to make a difference:
[bash]while (true) {
if (__builtin_expect(stackPtr == 0, false)) break;
stackPtr--;
cur = stack[stackPtr];
next:
/*! this is an inner node */
if (__builtin_expect(cur >= 0, true))
{
/*! single ray intersection with 4 boxes */
const BVH4::Node& node = bvh->node(nodes,cur);
ssef tNearX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearX)) * rdir.x;
ssef tNearY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearY)) * rdir.y;
ssef tNearZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+nearZ)) * rdir.z;
ssef tNear = max(tNearX,tNearY,tNearZ,rayNear);
ssef tFarX = (norg.x + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farX)) * rdir.x;
ssef tFarY = (norg.y + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farY)) * rdir.y;
ssef tFarZ = (norg.z + *(ssef*)((const char*)nodes+BVH4::offsetFactor*size_t(cur)+farZ)) * rdir.z;
ssef tFar = min(tFarX,tFarY,tFarZ,rayFar);
size_t _hit = movemask(tNear <= tFar);
/*! push hit nodes onto stack */
if (__builtin_expect(_hit == 0, true)) continue;
size_t r = __bsf(_hit); _hit = __btc(_hit,r);
stack[stackPtr] = cur = node.child;
if (__builtin_expect(_hit == 0, true)) goto next;
r = __bsf(_hit); _hit = __btc(_hit,r);
stack[++stackPtr] = cur = node.child;
if (__builtin_expect(_hit == 0, true)) goto next;
r = __bsf(_hit); _hit = __btc(_hit,r);
stack[++stackPtr] = cur = node.child;
if (__builtin_expect(_hit == 0, true)) goto next;
r = __bsf(_hit); _hit = __btc(_hit,r);
stack[++stackPtr] = cur = node.child;
goto next;
}
/*! this is a leaf node */
{
cur ^= 0x80000000;
const size_t ofs = size_t(cur) >> 5;
const size_t num = size_t(cur) & 0x1F;
for (size_t i=ofs; itriangles.occluded(ray))
return true;
}
}[/bash]
Link Copied
0 Replies

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page