- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
glTexSubImage2D is unacceptably slow with an Intel Series 4 on Windows 7, driver version 8.15.10.2141 (latest). Benchmarked with the following code:
Is there a pullback from texture memory to system memory happening here?
DWORD start = timeGetTime ();
for (int i = 0; i < NUM_TEXIMAGE; i++)
{
glBindTexture (GL_TEXTURE_2D, teximage);
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, sibuffer);
}
DWORD end = timeGetTime ();
A single call requires some 20ms to return for a 512x512 texture; the code above (TEX_WIDTH is 64, TEX_HEIGHT is 512, NUM_TEXIMAGE is 16, timeBeginPeriod (1)) requires ~45ms, compared to < 5ms on an Intel 945, older driver rev, Windows XP. Have ensured formats match and have attempted use of PBO with no measurable performance difference. PFD_SUPPORT_COMPOSITION makes no differenceIs there a pullback from texture memory to system memory happening here?
1 Solution
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH 800
#define WINDOW_HEIGHT 600
#include "SDL.h"
#include "SDL_opengl.h"
#pragma comment (lib, "SDL.lib")
#pragma comment (lib, "SDLmain.lib")
#pragma comment (lib, "opengl32.lib")
#define TEX_WIDTH 512
#define TEX_HEIGHT 512
unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT];
unsigned int teximage = 0;
unsigned int framecount = 0;
// find the fastest modes to use for glTexSubImage2D
typedef struct tsitest_s
{
char formatstr[64];
char typestr[64];
GLenum format;
GLenum type;
int modespeed;
bool failed;
} tsitest_t;
tsitest_t tsimodes[] =
{
{"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true},
{"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}
};
int fastest = 666;
int bestspeed = 32768;
GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type)
{
GLuint texnum = 0;
glEnable (GL_TEXTURE_2D);
glGenTextures (1, &texnum);
glBindTexture (GL_TEXTURE_2D, texnum);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL);
// commit the buffer so that timings are valid
glFinish ();
return texnum;
}
void R_SetTSIMode (void)
{
int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t);
for (int i = 0; i < numtsimodes; i++)
{
// clear last the error (if any)
glGetError ();
// create a new texture object
GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type);
Uint32 start = SDL_GetTicks ();
// SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them
for (int t = 0; t < 16; t++)
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer);
Uint32 end = SDL_GetTicks ();
// commit the buffer so that timings are valid
glFinish ();
glDeleteTextures (1, &texnum);
if (glGetError () != GL_NO_ERROR)
tsimodes.failed = true;
else tsimodes.failed = false;
tsimodes.modespeed = (end - start);
}
for (int i = 0; i < numtsimodes; i++)
{
printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr,
tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK");
if (tsimodes.modespeed <= bestspeed && !tsimodes.failed)
{
bestspeed = tsimodes.modespeed;
fastest = i;
}
}
if (fastest == 666)
{
MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP);
exit (0);
}
}
void RenderOpenGL (void)
{
framecount++;
for (int i = 0, w = 0; w < TEX_WIDTH; w++)
{
for (int h = 0; h < TEX_HEIGHT; h++, i++)
{
unsigned char *rgba = (unsigned char *) &sibuffer;
// 2 == red, 1 == green, 0 == blue
rgba[2] = ((h * i) + framecount) & 255;
rgba[1] = ((w * h) + framecount) & 255;
rgba[0] = ((w * i) + framecount) & 255;
rgba[3] = 255;
}
}
glClear (GL_COLOR_BUFFER_BIT);
glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT);
glMatrixMode (GL_MODELVIEW);
glLoadIdentity ();
glMatrixMode (GL_PROJECTION);
glLoadIdentity ();
glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999);
glBindTexture (GL_TEXTURE_2D, teximage);
glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer);
glBegin (GL_QUADS);
glTexCoord2f (0, 0);
glVertex2f (0, 0);
glTexCoord2f (1, 0);
glVertex2f (TEX_WIDTH, 0);
glTexCoord2f (1, 1);
glVertex2f (TEX_WIDTH, TEX_HEIGHT);
glTexCoord2f (0, 1);
glVertex2f (0, TEX_HEIGHT);
glEnd ();
}
int main (int argc, char *argv[])
{
if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0)
{
printf ("Unable to initialize SDL: %s\n", SDL_GetError ());
return 1;
}
SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1);
SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL);
R_SetTSIMode ();
teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type);
glClearColor (0, 0, 0, 1);
int done = 0;
SDL_Event evt;
while (!done)
{
while (!done && SDL_PollEvent (&evt))
{
if (evt.type == SDL_QUIT)
{
done = 1;
break;
}
}
// run the screen update here
RenderOpenGL ();
SDL_GL_SwapBuffers ();
}
return 0;
}
Link Copied
5 Replies
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Update:
Internal Format: GL_RGBA
Format: GL_BGRA
Type: GL_UNSIGNED_INT_8_8_8_8_REV
Problem completely goes away.
I suspect that the driver was pulling the teximage data back to system memory otherwise. Can anyone confirm or deny?
Internal Format: GL_RGBA
Format: GL_BGRA
Type: GL_UNSIGNED_INT_8_8_8_8_REV
Problem completely goes away.
I suspect that the driver was pulling the teximage data back to system memory otherwise. Can anyone confirm or deny?
| mode: 0 320ms [GL_RGBA/GL_UNSIGNED_BYTE] (OK)
mode: 1 317ms [GL_BGRA/GL_UNSIGNED_BYTE] (OK) mode: 2 377ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8] (OK) mode: 3 375ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8] (OK) mode: 4 376ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK) mode: 5 12ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK) |
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi mfah,
What do you mean the problem completely goes away?
From your first post, I take the problem as a performance differnence concern from older driver on 945 versus a newer driver on Intel 4 Series card.
Are you saying that if you used the GL_UNSIGNED_INT_8_8_8_8_REV / GL_BGRA, this performance difference is not seen anymore?
Thanks,
-Ganesh
What do you mean the problem completely goes away?
From your first post, I take the problem as a performance differnence concern from older driver on 945 versus a newer driver on Intel 4 Series card.
Are you saying that if you used the GL_UNSIGNED_INT_8_8_8_8_REV / GL_BGRA, this performance difference is not seen anymore?
Thanks,
-Ganesh
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Yes, this is correct. The older part/older driver did not exhibit this performance problem, the newer one does. Running a VMWare session (via VMWare's SVGA driver) on the newer part/newer driver also does not exhibit this problem.
By switching the type and format to GL_UNSIGNED_INT_8_8_8_8_REV/GL_BGRA the performance problem is removed.
Yes, this is correct. The older part/older driver did not exhibit this performance problem, the newer one does. Running a VMWare session (via VMWare's SVGA driver) on the newer part/newer driver also does not exhibit this problem.
By switching the type and format to GL_UNSIGNED_INT_8_8_8_8_REV/GL_BGRA the performance problem is removed.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Can you provide a test kernel? We will try to replicate the issue and try to provide a better explanation.
Thanks.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH 800
#define WINDOW_HEIGHT 600
#include "SDL.h"
#include "SDL_opengl.h"
#pragma comment (lib, "SDL.lib")
#pragma comment (lib, "SDLmain.lib")
#pragma comment (lib, "opengl32.lib")
#define TEX_WIDTH 512
#define TEX_HEIGHT 512
unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT];
unsigned int teximage = 0;
unsigned int framecount = 0;
// find the fastest modes to use for glTexSubImage2D
typedef struct tsitest_s
{
char formatstr[64];
char typestr[64];
GLenum format;
GLenum type;
int modespeed;
bool failed;
} tsitest_t;
tsitest_t tsimodes[] =
{
{"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true},
{"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}
};
int fastest = 666;
int bestspeed = 32768;
GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type)
{
GLuint texnum = 0;
glEnable (GL_TEXTURE_2D);
glGenTextures (1, &texnum);
glBindTexture (GL_TEXTURE_2D, texnum);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL);
// commit the buffer so that timings are valid
glFinish ();
return texnum;
}
void R_SetTSIMode (void)
{
int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t);
for (int i = 0; i < numtsimodes; i++)
{
// clear last the error (if any)
glGetError ();
// create a new texture object
GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type);
Uint32 start = SDL_GetTicks ();
// SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them
for (int t = 0; t < 16; t++)
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer);
Uint32 end = SDL_GetTicks ();
// commit the buffer so that timings are valid
glFinish ();
glDeleteTextures (1, &texnum);
if (glGetError () != GL_NO_ERROR)
tsimodes.failed = true;
else tsimodes.failed = false;
tsimodes.modespeed = (end - start);
}
for (int i = 0; i < numtsimodes; i++)
{
printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr,
tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK");
if (tsimodes.modespeed <= bestspeed && !tsimodes.failed)
{
bestspeed = tsimodes.modespeed;
fastest = i;
}
}
if (fastest == 666)
{
MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP);
exit (0);
}
}
void RenderOpenGL (void)
{
framecount++;
for (int i = 0, w = 0; w < TEX_WIDTH; w++)
{
for (int h = 0; h < TEX_HEIGHT; h++, i++)
{
unsigned char *rgba = (unsigned char *) &sibuffer;
// 2 == red, 1 == green, 0 == blue
rgba[2] = ((h * i) + framecount) & 255;
rgba[1] = ((w * h) + framecount) & 255;
rgba[0] = ((w * i) + framecount) & 255;
rgba[3] = 255;
}
}
glClear (GL_COLOR_BUFFER_BIT);
glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT);
glMatrixMode (GL_MODELVIEW);
glLoadIdentity ();
glMatrixMode (GL_PROJECTION);
glLoadIdentity ();
glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999);
glBindTexture (GL_TEXTURE_2D, teximage);
glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer);
glBegin (GL_QUADS);
glTexCoord2f (0, 0);
glVertex2f (0, 0);
glTexCoord2f (1, 0);
glVertex2f (TEX_WIDTH, 0);
glTexCoord2f (1, 1);
glVertex2f (TEX_WIDTH, TEX_HEIGHT);
glTexCoord2f (0, 1);
glVertex2f (0, TEX_HEIGHT);
glEnd ();
}
int main (int argc, char *argv[])
{
if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0)
{
printf ("Unable to initialize SDL: %s\n", SDL_GetError ());
return 1;
}
SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1);
SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL);
R_SetTSIMode ();
teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type);
glClearColor (0, 0, 0, 1);
int done = 0;
SDL_Event evt;
while (!done)
{
while (!done && SDL_PollEvent (&evt))
{
if (evt.type == SDL_QUIT)
{
done = 1;
break;
}
}
// run the screen update here
RenderOpenGL ();
SDL_GL_SwapBuffers ();
}
return 0;
}
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page