Community
cancel
Showing results for 
Search instead for 
Did you mean: 
mfah
Novice
223 Views

OpenGL - glTexSubImage2D on Intel 4 Series/Windows 7 Performance

Jump to solution
glTexSubImage2D is unacceptably slow with an Intel Series 4 on Windows 7, driver version 8.15.10.2141 (latest). Benchmarked with the following code:
	DWORD start = timeGetTime ();

	for (int i = 0; i < NUM_TEXIMAGE; i++)
	{
		glBindTexture (GL_TEXTURE_2D, teximage);
		glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, sibuffer);
	}

	DWORD end = timeGetTime ();
A single call requires some 20ms to return for a 512x512 texture; the code above (TEX_WIDTH is 64, TEX_HEIGHT is 512, NUM_TEXIMAGE is 16, timeBeginPeriod (1)) requires ~45ms, compared to < 5ms on an Intel 945, older driver rev, Windows XP. Have ensured formats match and have attempted use of PBO with no measurable performance difference. PFD_SUPPORT_COMPOSITION makes no difference

Is there a pullback from texture memory to system memory happening here?
0 Kudos
1 Solution
mfah
Novice
223 Views
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH    800
#define WINDOW_HEIGHT    600

#include "SDL.h"
#include "SDL_opengl.h"

#pragma comment (lib, "SDL.lib")
#pragma comment (lib, "SDLmain.lib")
#pragma comment (lib, "opengl32.lib")

#define TEX_WIDTH 512
#define TEX_HEIGHT 512

unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT];
unsigned int teximage = 0;
unsigned int framecount = 0;

// find the fastest modes to use for glTexSubImage2D
typedef struct tsitest_s
{
    char formatstr[64];
    char typestr[64];
    GLenum format;
    GLenum type;
    int modespeed;
    bool failed;
} tsitest_t;

tsitest_t tsimodes[] =
{
    {"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true},
    {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
    {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}
};


int fastest = 666;
int bestspeed = 32768;

GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type)
{
    GLuint texnum = 0;

    glEnable (GL_TEXTURE_2D);
    glGenTextures (1, &texnum);
    glBindTexture (GL_TEXTURE_2D, texnum);
    glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL);

    // commit the buffer so that timings are valid
    glFinish ();

    return texnum;
}


void R_SetTSIMode (void)
{
    int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t);

    for (int i = 0; i < numtsimodes; i++)
    {
        // clear last the error (if any)
        glGetError ();

        // create a new texture object
        GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type);

        Uint32 start = SDL_GetTicks ();

        // SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them
        for (int t = 0; t < 16; t++)
            glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer);

        Uint32 end = SDL_GetTicks ();

        // commit the buffer so that timings are valid
        glFinish ();

        glDeleteTextures (1, &texnum);

        if (glGetError () != GL_NO_ERROR)
            tsimodes.failed = true;
        else tsimodes.failed = false;

        tsimodes.modespeed = (end - start);
    }

    for (int i = 0; i < numtsimodes; i++)
    {
        printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr, 
            tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK");

        if (tsimodes.modespeed <= bestspeed && !tsimodes.failed)
        {
            bestspeed = tsimodes.modespeed;
            fastest = i;
        }
    }

    if (fastest == 666)
    {
        MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP);
        exit (0);
    }
}


void RenderOpenGL (void)
{
    framecount++;

    for (int i = 0, w = 0; w < TEX_WIDTH; w++)
    {
        for (int h = 0; h < TEX_HEIGHT; h++, i++)
        {
            unsigned char *rgba = (unsigned char *) &sibuffer;

            // 2 == red, 1 == green, 0 == blue
            rgba[2] = ((h * i) + framecount) & 255;
            rgba[1] = ((w * h) + framecount) & 255;
            rgba[0] = ((w * i) + framecount) & 255;
            rgba[3] = 255;
        }
    }

    glClear (GL_COLOR_BUFFER_BIT);

    glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT);

    glMatrixMode (GL_MODELVIEW);
    glLoadIdentity ();

    glMatrixMode (GL_PROJECTION);
    glLoadIdentity ();
    glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999);

    glBindTexture (GL_TEXTURE_2D, teximage);
    glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

    glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer);

    glBegin (GL_QUADS);

    glTexCoord2f (0, 0);
    glVertex2f (0, 0);

    glTexCoord2f (1, 0);
    glVertex2f (TEX_WIDTH, 0);

    glTexCoord2f (1, 1);
    glVertex2f (TEX_WIDTH, TEX_HEIGHT);

    glTexCoord2f (0, 1);
    glVertex2f (0, TEX_HEIGHT);

    glEnd ();
}


int main (int argc, char *argv[])
{
    if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0)
    {
        printf ("Unable to initialize SDL: %s\n", SDL_GetError ());
        return 1;
    }

    SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1);
    SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL);

    R_SetTSIMode ();
    teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type);
    glClearColor (0, 0, 0, 1);

    int done = 0;
    SDL_Event evt;

    while (!done)
    {
        while (!done && SDL_PollEvent (&evt))
        {
            if (evt.type == SDL_QUIT)
            {
                done = 1;
                break;
            }
        }

        // run the screen update here
        RenderOpenGL ();
        SDL_GL_SwapBuffers ();
    }

    return 0;
}

View solution in original post

5 Replies
mfah
Novice
223 Views
Update:

Internal Format: GL_RGBA
Format: GL_BGRA
Type: GL_UNSIGNED_INT_8_8_8_8_REV

Problem completely goes away.

I suspect that the driver was pulling the teximage data back to system memory otherwise. Can anyone confirm or deny?


mode: 0 320ms [GL_RGBA/GL_UNSIGNED_BYTE] (OK)
mode: 1 317ms [GL_BGRA/GL_UNSIGNED_BYTE] (OK)
mode: 2 377ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8] (OK)
mode: 3 375ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8] (OK)
mode: 4 376ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK)
mode: 5 12ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK)

223 Views
Hi mfah,

What do you mean the problem completely goes away?

From your first post, I take the problem as a performance differnence concern from older driver on 945 versus a newer driver on Intel 4 Series card.

Are you saying that if you used the GL_UNSIGNED_INT_8_8_8_8_REV / GL_BGRA, this performance difference is not seen anymore?

Thanks,
-Ganesh
mfah
Novice
223 Views
Hi,

Yes, this is correct. The older part/older driver did not exhibit this performance problem, the newer one does. Running a VMWare session (via VMWare's SVGA driver) on the newer part/newer driver also does not exhibit this problem.

By switching the type and format to GL_UNSIGNED_INT_8_8_8_8_REV/GL_BGRA the performance problem is removed.
223 Views

Can you provide a test kernel? We will try to replicate the issue and try to provide a better explanation.

Thanks.

mfah
Novice
224 Views
The following test app (SDL, mostly portable, compiles with MSVC 2008) can be used to test/verify this issue:
#define WINDOW_WIDTH    800
#define WINDOW_HEIGHT    600

#include "SDL.h"
#include "SDL_opengl.h"

#pragma comment (lib, "SDL.lib")
#pragma comment (lib, "SDLmain.lib")
#pragma comment (lib, "opengl32.lib")

#define TEX_WIDTH 512
#define TEX_HEIGHT 512

unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT];
unsigned int teximage = 0;
unsigned int framecount = 0;

// find the fastest modes to use for glTexSubImage2D
typedef struct tsitest_s
{
    char formatstr[64];
    char typestr[64];
    GLenum format;
    GLenum type;
    int modespeed;
    bool failed;
} tsitest_t;

tsitest_t tsimodes[] =
{
    {"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true},
    {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
    {"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true},
    {"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}
};


int fastest = 666;
int bestspeed = 32768;

GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type)
{
    GLuint texnum = 0;

    glEnable (GL_TEXTURE_2D);
    glGenTextures (1, &texnum);
    glBindTexture (GL_TEXTURE_2D, texnum);
    glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL);

    // commit the buffer so that timings are valid
    glFinish ();

    return texnum;
}


void R_SetTSIMode (void)
{
    int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t);

    for (int i = 0; i < numtsimodes; i++)
    {
        // clear last the error (if any)
        glGetError ();

        // create a new texture object
        GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type);

        Uint32 start = SDL_GetTicks ();

        // SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them
        for (int t = 0; t < 16; t++)
            glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes.format, tsimodes.type, sibuffer);

        Uint32 end = SDL_GetTicks ();

        // commit the buffer so that timings are valid
        glFinish ();

        glDeleteTextures (1, &texnum);

        if (glGetError () != GL_NO_ERROR)
            tsimodes.failed = true;
        else tsimodes.failed = false;

        tsimodes.modespeed = (end - start);
    }

    for (int i = 0; i < numtsimodes; i++)
    {
        printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes.modespeed, tsimodes.formatstr, 
            tsimodes.typestr, tsimodes.failed ? "FAILED" : "OK");

        if (tsimodes.modespeed <= bestspeed && !tsimodes.failed)
        {
            bestspeed = tsimodes.modespeed;
            fastest = i;
        }
    }

    if (fastest == 666)
    {
        MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP);
        exit (0);
    }
}


void RenderOpenGL (void)
{
    framecount++;

    for (int i = 0, w = 0; w < TEX_WIDTH; w++)
    {
        for (int h = 0; h < TEX_HEIGHT; h++, i++)
        {
            unsigned char *rgba = (unsigned char *) &sibuffer;

            // 2 == red, 1 == green, 0 == blue
            rgba[2] = ((h * i) + framecount) & 255;
            rgba[1] = ((w * h) + framecount) & 255;
            rgba[0] = ((w * i) + framecount) & 255;
            rgba[3] = 255;
        }
    }

    glClear (GL_COLOR_BUFFER_BIT);

    glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT);

    glMatrixMode (GL_MODELVIEW);
    glLoadIdentity ();

    glMatrixMode (GL_PROJECTION);
    glLoadIdentity ();
    glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999);

    glBindTexture (GL_TEXTURE_2D, teximage);
    glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);

    glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer);

    glBegin (GL_QUADS);

    glTexCoord2f (0, 0);
    glVertex2f (0, 0);

    glTexCoord2f (1, 0);
    glVertex2f (TEX_WIDTH, 0);

    glTexCoord2f (1, 1);
    glVertex2f (TEX_WIDTH, TEX_HEIGHT);

    glTexCoord2f (0, 1);
    glVertex2f (0, TEX_HEIGHT);

    glEnd ();
}


int main (int argc, char *argv[])
{
    if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0)
    {
        printf ("Unable to initialize SDL: %s\n", SDL_GetError ());
        return 1;
    }

    SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1);
    SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL);

    R_SetTSIMode ();
    teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type);
    glClearColor (0, 0, 0, 1);

    int done = 0;
    SDL_Event evt;

    while (!done)
    {
        while (!done && SDL_PollEvent (&evt))
        {
            if (evt.type == SDL_QUIT)
            {
                done = 1;
                break;
            }
        }

        // run the screen update here
        RenderOpenGL ();
        SDL_GL_SwapBuffers ();
    }

    return 0;
}

View solution in original post

Reply