- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
glTexSubImage2D is unacceptably slow with an Intel Series 4 on Windows 7, driver version 8.15.10.2141 (latest). Benchmarked with the following code:
Is there a pullback from texture memory to system memory happening here?
DWORD start = timeGetTime (); for (int i = 0; i < NUM_TEXIMAGE; i++) { glBindTexture (GL_TEXTURE_2D, teximage); glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, sibuffer); } DWORD end = timeGetTime ();A single call requires some 20ms to return for a 512x512 texture; the code above (TEX_WIDTH is 64, TEX_HEIGHT is 512, NUM_TEXIMAGE is 16, timeBeginPeriod (1)) requires ~45ms, compared to < 5ms on an Intel 945, older driver rev, Windows XP. Have ensured formats match and have attempted use of PBO with no measurable performance difference. PFD_SUPPORT_COMPOSITION makes no difference
Is there a pullback from texture memory to system memory happening here?
1 Solution
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH 800 #define WINDOW_HEIGHT 600 #include "SDL.h" #include "SDL_opengl.h" #pragma comment (lib, "SDL.lib") #pragma comment (lib, "SDLmain.lib") #pragma comment (lib, "opengl32.lib") #define TEX_WIDTH 512 #define TEX_HEIGHT 512 unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT]; unsigned int teximage = 0; unsigned int framecount = 0; // find the fastest modes to use for glTexSubImage2D typedef struct tsitest_s { char formatstr[64]; char typestr[64]; GLenum format; GLenum type; int modespeed; bool failed; } tsitest_t; tsitest_t tsimodes[] = { {"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true}, {"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true}, {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true}, {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true}, {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}, {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true} }; int fastest = 666; int bestspeed = 32768; GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type) { GLuint texnum = 0; glEnable (GL_TEXTURE_2D); glGenTextures (1, &texnum); glBindTexture (GL_TEXTURE_2D, texnum); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL); // commit the buffer so that timings are valid glFinish (); return texnum; } void R_SetTSIMode (void) { int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t); for (int i = 0; i < numtsimodes; i++) { // clear last the error (if any) glGetError (); // create a new texture object GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type); Uint32 start = SDL_GetTicks (); // SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them for (int t = 0; t < 16; t++) glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer); Uint32 end = SDL_GetTicks (); // commit the buffer so that timings are valid glFinish (); glDeleteTextures (1, &texnum); if (glGetError () != GL_NO_ERROR) tsimodes.failed = true; else tsimodes.failed = false; tsimodes.modespeed = (end - start); } for (int i = 0; i < numtsimodes; i++) { printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr, tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK"); if (tsimodes.modespeed <= bestspeed && !tsimodes.failed) { bestspeed = tsimodes.modespeed; fastest = i; } } if (fastest == 666) { MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP); exit (0); } } void RenderOpenGL (void) { framecount++; for (int i = 0, w = 0; w < TEX_WIDTH; w++) { for (int h = 0; h < TEX_HEIGHT; h++, i++) { unsigned char *rgba = (unsigned char *) &sibuffer; // 2 == red, 1 == green, 0 == blue rgba[2] = ((h * i) + framecount) & 255; rgba[1] = ((w * h) + framecount) & 255; rgba[0] = ((w * i) + framecount) & 255; rgba[3] = 255; } } glClear (GL_COLOR_BUFFER_BIT); glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT); glMatrixMode (GL_MODELVIEW); glLoadIdentity (); glMatrixMode (GL_PROJECTION); glLoadIdentity (); glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999); glBindTexture (GL_TEXTURE_2D, teximage); glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer); glBegin (GL_QUADS); glTexCoord2f (0, 0); glVertex2f (0, 0); glTexCoord2f (1, 0); glVertex2f (TEX_WIDTH, 0); glTexCoord2f (1, 1); glVertex2f (TEX_WIDTH, TEX_HEIGHT); glTexCoord2f (0, 1); glVertex2f (0, TEX_HEIGHT); glEnd (); } int main (int argc, char *argv[]) { if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0) { printf ("Unable to initialize SDL: %s\n", SDL_GetError ()); return 1; } SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1); SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL); R_SetTSIMode (); teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type); glClearColor (0, 0, 0, 1); int done = 0; SDL_Event evt; while (!done) { while (!done && SDL_PollEvent (&evt)) { if (evt.type == SDL_QUIT) { done = 1; break; } } // run the screen update here RenderOpenGL (); SDL_GL_SwapBuffers (); } return 0; }
Link Copied
5 Replies
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Update:
Internal Format: GL_RGBA
Format: GL_BGRA
Type: GL_UNSIGNED_INT_8_8_8_8_REV
Problem completely goes away.
I suspect that the driver was pulling the teximage data back to system memory otherwise. Can anyone confirm or deny?
Internal Format: GL_RGBA
Format: GL_BGRA
Type: GL_UNSIGNED_INT_8_8_8_8_REV
Problem completely goes away.
I suspect that the driver was pulling the teximage data back to system memory otherwise. Can anyone confirm or deny?
mode: 0 320ms [GL_RGBA/GL_UNSIGNED_BYTE] (OK)
mode: 1 317ms [GL_BGRA/GL_UNSIGNED_BYTE] (OK) mode: 2 377ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8] (OK) mode: 3 375ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8] (OK) mode: 4 376ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK) mode: 5 12ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK) |
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi mfah,
What do you mean the problem completely goes away?
From your first post, I take the problem as a performance differnence concern from older driver on 945 versus a newer driver on Intel 4 Series card.
Are you saying that if you used the GL_UNSIGNED_INT_8_8_8_8_REV / GL_BGRA, this performance difference is not seen anymore?
Thanks,
-Ganesh
What do you mean the problem completely goes away?
From your first post, I take the problem as a performance differnence concern from older driver on 945 versus a newer driver on Intel 4 Series card.
Are you saying that if you used the GL_UNSIGNED_INT_8_8_8_8_REV / GL_BGRA, this performance difference is not seen anymore?
Thanks,
-Ganesh
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Yes, this is correct. The older part/older driver did not exhibit this performance problem, the newer one does. Running a VMWare session (via VMWare's SVGA driver) on the newer part/newer driver also does not exhibit this problem.
By switching the type and format to GL_UNSIGNED_INT_8_8_8_8_REV/GL_BGRA the performance problem is removed.
Yes, this is correct. The older part/older driver did not exhibit this performance problem, the newer one does. Running a VMWare session (via VMWare's SVGA driver) on the newer part/newer driver also does not exhibit this problem.
By switching the type and format to GL_UNSIGNED_INT_8_8_8_8_REV/GL_BGRA the performance problem is removed.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Can you provide a test kernel? We will try to replicate the issue and try to provide a better explanation.
Thanks.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH 800 #define WINDOW_HEIGHT 600 #include "SDL.h" #include "SDL_opengl.h" #pragma comment (lib, "SDL.lib") #pragma comment (lib, "SDLmain.lib") #pragma comment (lib, "opengl32.lib") #define TEX_WIDTH 512 #define TEX_HEIGHT 512 unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT]; unsigned int teximage = 0; unsigned int framecount = 0; // find the fastest modes to use for glTexSubImage2D typedef struct tsitest_s { char formatstr[64]; char typestr[64]; GLenum format; GLenum type; int modespeed; bool failed; } tsitest_t; tsitest_t tsimodes[] = { {"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true}, {"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true}, {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true}, {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true}, {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}, {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true} }; int fastest = 666; int bestspeed = 32768; GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type) { GLuint texnum = 0; glEnable (GL_TEXTURE_2D); glGenTextures (1, &texnum); glBindTexture (GL_TEXTURE_2D, texnum); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL); // commit the buffer so that timings are valid glFinish (); return texnum; } void R_SetTSIMode (void) { int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t); for (int i = 0; i < numtsimodes; i++) { // clear last the error (if any) glGetError (); // create a new texture object GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type); Uint32 start = SDL_GetTicks (); // SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them for (int t = 0; t < 16; t++) glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer); Uint32 end = SDL_GetTicks (); // commit the buffer so that timings are valid glFinish (); glDeleteTextures (1, &texnum); if (glGetError () != GL_NO_ERROR) tsimodes.failed = true; else tsimodes.failed = false; tsimodes.modespeed = (end - start); } for (int i = 0; i < numtsimodes; i++) { printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr, tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK"); if (tsimodes.modespeed <= bestspeed && !tsimodes.failed) { bestspeed = tsimodes.modespeed; fastest = i; } } if (fastest == 666) { MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP); exit (0); } } void RenderOpenGL (void) { framecount++; for (int i = 0, w = 0; w < TEX_WIDTH; w++) { for (int h = 0; h < TEX_HEIGHT; h++, i++) { unsigned char *rgba = (unsigned char *) &sibuffer; // 2 == red, 1 == green, 0 == blue rgba[2] = ((h * i) + framecount) & 255; rgba[1] = ((w * h) + framecount) & 255; rgba[0] = ((w * i) + framecount) & 255; rgba[3] = 255; } } glClear (GL_COLOR_BUFFER_BIT); glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT); glMatrixMode (GL_MODELVIEW); glLoadIdentity (); glMatrixMode (GL_PROJECTION); glLoadIdentity (); glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999); glBindTexture (GL_TEXTURE_2D, teximage); glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer); glBegin (GL_QUADS); glTexCoord2f (0, 0); glVertex2f (0, 0); glTexCoord2f (1, 0); glVertex2f (TEX_WIDTH, 0); glTexCoord2f (1, 1); glVertex2f (TEX_WIDTH, TEX_HEIGHT); glTexCoord2f (0, 1); glVertex2f (0, TEX_HEIGHT); glEnd (); } int main (int argc, char *argv[]) { if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0) { printf ("Unable to initialize SDL: %s\n", SDL_GetError ()); return 1; } SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1); SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL); R_SetTSIMode (); teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type); glClearColor (0, 0, 0, 1); int done = 0; SDL_Event evt; while (!done) { while (!done && SDL_PollEvent (&evt)) { if (evt.type == SDL_QUIT) { done = 1; break; } } // run the screen update here RenderOpenGL (); SDL_GL_SwapBuffers (); } return 0; }

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page