I am working on Decode-OPENCL-Encode pipeline on intel processor. There is a sample code provide by intel for media interop which is attached.
I am integrating the encoder into same.
If we look at the DecodeOneFrame() function below:
mfxStatus CDecodingPipeline::DecodeOneFrame(int Width, int Height, IDirect3DSurface9 *pDstSurface, IDirect3DDevice9* pd3dDevice)
{
mfxU16 nOCLSurfIndex=0;
mfxStatus stsOut = MFX_ERR_NONE;
if(m_Tasks[m_TaskIndex].m_DecodeSync || m_Tasks[m_TaskIndex].m_OCLSync || m_Tasks[m_TaskIndex].m_EncodeSync)
{// wait task is finished and copy result texture to back buffer
mfxStatus sts = MFX_ERR_NONE;
mfxFrameSurface1_OCL* pOutSurface = NULL; // output surface.
//wait the previous submitted tasks
if(m_Tasks[m_TaskIndex].m_DecodeSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pOutSurface = m_Tasks[m_TaskIndex].m_pDecodeOutSurface;
}
if(m_Tasks[m_TaskIndex].m_OCLSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_OCLSync, MSDK_VPP_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pOutSurface = m_Tasks[m_TaskIndex].m_pOCLOutSurface;
}
#ifdef ENCODER
if(m_Tasks[m_TaskIndex].m_EncodeSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_EncodeSync, MSDK_ENC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
//pOutSurface = m_Tasks[m_TaskIndex].m_pEncodeOutSurface;
}
#endif
if(pOutSurface)
{/* copy YUV texture to screen */
HRESULT hr;
IDirect3DSurface9* pSrcSurface = (IDirect3DSurface9*)(pOutSurface->Data.MemId);
assert(pDstSurface && pSrcSurface);
if(pSrcSurface && pDstSurface)
{
RECT r;
r.left = 0;
r.top = 0;
r.right = min(Width,m_mfxDecodeVideoParams.vpp.In.Width);
r.bottom = min(Height,m_mfxDecodeVideoParams.vpp.In.Height);
r.right -= r.right&1;
r.bottom -= r.bottom&1;
V(pd3dDevice->StretchRect(pSrcSurface, &r, pDstSurface, &r,D3DTEXF_POINT));
}
}
#ifdef UNLOCK
if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface && m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
if(m_Tasks[m_TaskIndex].m_pOCLOutSurface && m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#ifdef ENCODER
if(m_Tasks[m_TaskIndex].m_pEncodeOutSurface && m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
#endif
#endif
}
#if 1
// clear sync task for further using
m_Tasks[m_TaskIndex].m_OCLSync = 0;
m_Tasks[m_TaskIndex].m_pOCLOutSurface = 0;
m_Tasks[m_TaskIndex].m_DecodeSync = 0;
m_Tasks[m_TaskIndex].m_pDecodeOutSurface = 0;
#ifdef ENCODER
m_Tasks[m_TaskIndex].m_EncodeSync = 0;
m_Tasks[m_TaskIndex].m_pEncodeOutSurface = 0;
#endif
#endif
if(m_DECODEFlag)
{// feed decoder
mfxSyncPoint DecodeSyncPoint = 0;
static mfxU16 nDecoderSurfIndex = 0; // index of free surface
mfxStatus sts = MFX_ERR_NONE;
m_pmfxDecodeSurfaceLast = NULL; // reset curretn decoder surface to get new one from Decoder
while(MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts || MFX_WRN_DEVICE_BUSY == sts)
{// loop until decoder report that it get request for new frame
if (MFX_WRN_DEVICE_BUSY == sts)
{
Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync
}
else if (MFX_ERR_MORE_DATA == sts)
{ // read more data to input bit stream
sts = m_FileReader.ReadNextFrame(&m_mfxBS);
MSDK_BREAK_ON_ERROR(sts);
}
else if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts)
{// find new working-output surface in m_pmfxDecodeSurfaces
//nDecoderSurfIndex = 0;
nDecoderSurfIndex = GetFreeSurfaceIndex(m_pmfxDecodeSurfaces, m_mfxDecoderResponse.NumFrameActual,nDecoderSurfIndex);
if (MSDK_INVALID_SURF_IDX == nDecoderSurfIndex)
{
return MFX_ERR_MEMORY_ALLOC;
}
}
// send request to decoder
sts = m_pmfxDEC->DecodeFrameAsync(
&m_mfxBS,
(mfxFrameSurface1*)&(m_pmfxDecodeSurfaces[nDecoderSurfIndex]),
(mfxFrameSurface1**)&m_pmfxDecodeSurfaceLast,
&DecodeSyncPoint);
// ignore warnings if output is available,
// if no output and no action required just repeat the same call
if (MFX_ERR_NONE < sts && DecodeSyncPoint)
{
sts = MFX_ERR_NONE;
}
if (MFX_ERR_NONE == sts)
{// decoder return sync point then fill the curretn task nad switch to OCL Plugin feeding
m_Tasks[m_TaskIndex].m_DecodeSync = DecodeSyncPoint;
m_Tasks[m_TaskIndex].m_pDecodeOutSurface = m_pmfxDecodeSurfaceLast;
// look for output process
#ifdef UNLOCK
if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface)
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
#endif
break;
}
}
if(MFX_ERR_NONE != sts)
{
printf("ERROR: Decoder returns error %d!\n",sts);
stsOut = sts;
}
//decoder sync point
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}//if(m_DECODEFlag)
if(m_pOCLPlugin && m_pOCLPlugin->m_OCLFlag)
{// OPENCL part
mfxU16 nOCLSurfIndex=0;
mfxSyncPoint OCLSyncPoint = 0;
mfxStatus sts = MFX_ERR_NONE;
// get index for output surface for OCL plugin
nOCLSurfIndex = GetFreeSurfaceIndex(m_pmfxOCLSurfaces, m_mfxOCLResponse.NumFrameActual);
MSDK_CHECK_ERROR(nOCLSurfIndex, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
//mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
//mfxHDL pOutSurf = &m_pmfxEncSurfaces[nEncSurfIdx];
//m_pmfxOCLSurfaces[nOCLSurfIndex] = m_pmfxEncSurfaces[nEncSurfIdx];
mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
mfxHDL inp = m_pmfxDecodeSurfaceLast;
// OCL filter
for(;;)
{
sts = MFXVideoUSER_ProcessFrameAsync(m_mfxSession, &inp, 1, &pOutSurf, 1, &OCLSyncPoint);
if (MFX_WRN_DEVICE_BUSY == sts)
{
Sleep(1); // just wait and then repeat the same call
}
else
{
break;
}
}
// ignore warnings if output is available,
if (MFX_ERR_NONE < sts && OCLSyncPoint)
{
sts = MFX_ERR_NONE;
}
if(MFX_ERR_NONE!=sts)
{
printf("ERROR: OpenCL filter return error %d!\n",sts);
return sts;
}
{
m_Tasks[m_TaskIndex].m_OCLSync = OCLSyncPoint;
m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxOCLSurfaces[nOCLSurfIndex];
//m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
#ifdef UNLOCK
// look for output process
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#endif
}
}
#ifdef ENCODER
if(m_ENCODEFlag)
{// feed encoder
static mfxU16 nEncSurfIdx = 0; // index of free surface
mfxSyncPoint EncSyncP;
mfxStatus sts = MFX_ERR_NONE;
//mfxFrameSurface1* pSurf = NULL; // dispatching pointer
// find free surface for encoder input
nEncSurfIdx = GetFreeSurface(m_pmfxEncSurfaces, m_mfxEncResponse.NumFrameActual);
MSDK_CHECK_ERROR(nEncSurfIdx, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
// point pSurf to encoder surface
//m_pmfxEncSurfaces[nEncSurfIdx] = m_pmfxOCLSurfaces[nOCLSurfIndex];
for (;;)
{
// at this point surface for encoder contains either a frame from file or a frame processed by vpp
sts = m_pmfxENC->EncodeFrameAsync(NULL, &m_pmfxEncSurfaces[nEncSurfIdx], &m_mfxEncBS, &EncSyncP);
if (MFX_ERR_NONE < sts && !EncSyncP) // repeat the call if warning and no output
{
if (MFX_WRN_DEVICE_BUSY == sts)
MSDK_SLEEP(1); // wait if device is busy
}
else if (MFX_ERR_NONE < sts && EncSyncP)
{
sts = MFX_ERR_NONE; // ignore warnings if output is available
break;
}
else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts)
{
sts = AllocateSufficientBuffer(&m_mfxEncBS);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
printf("\n BUFFER allocated");
}
else
{
// get next surface and new task for 2nd bitstream in ViewOutput mode
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_BITSTREAM);
break;
}
}
if (MFX_ERR_MORE_DATA == sts) {
sts = MFX_ERR_NONE;
}
if (MFX_ERR_NONE == sts)
{
m_Tasks[m_TaskIndex].m_EncodeSync = EncSyncP;
}
#ifdef UNLOCK
if (MFX_ERR_NONE == sts)
{// encoder return sync point then fill the curretn task nad switch to encoder feeding
m_Tasks[m_TaskIndex].m_pEncodeOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
}
#endif
}
#endif
// increase task index to point to next task.
m_TaskIndex = (m_TaskIndex+1)%SYNC_BUF_SIZE;
return stsOut;
}//CDecodingPipeline::DecodeOneFrame
If I use this code the encoder output is corrupted.When I decode the encoder output It seems frames are not displayed in proper order.
I think I am not giving the right surface to encoder as the encoder surface index is independently calculated.
But when I give the OCL output surface to encoder then my OCL also stops working.
Can anyone guide me here?
Link Copied
Hi Manish,
We have an OpenCL forum, you should be able to get the help you need there. I am moving your post to that forum.
For more complete information about compiler optimizations, see our Optimization Notice.