- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I am working on Decode-OPENCL-Encode pipeline on intel processor. There is a sample code provide by intel for media interop which is attached.
I am integrating the encoder into same.
If we look at the DecodeOneFrame() function below:
mfxStatus CDecodingPipeline::DecodeOneFrame(int Width, int Height, IDirect3DSurface9 *pDstSurface, IDirect3DDevice9* pd3dDevice)
{
mfxU16 nOCLSurfIndex=0;
mfxStatus stsOut = MFX_ERR_NONE;
if(m_Tasks[m_TaskIndex].m_DecodeSync || m_Tasks[m_TaskIndex].m_OCLSync || m_Tasks[m_TaskIndex].m_EncodeSync)
{// wait task is finished and copy result texture to back buffer
mfxStatus sts = MFX_ERR_NONE;
mfxFrameSurface1_OCL* pOutSurface = NULL; // output surface.
//wait the previous submitted tasks
if(m_Tasks[m_TaskIndex].m_DecodeSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pOutSurface = m_Tasks[m_TaskIndex].m_pDecodeOutSurface;
}
if(m_Tasks[m_TaskIndex].m_OCLSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_OCLSync, MSDK_VPP_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pOutSurface = m_Tasks[m_TaskIndex].m_pOCLOutSurface;
}
#ifdef ENCODER
if(m_Tasks[m_TaskIndex].m_EncodeSync)
{
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_EncodeSync, MSDK_ENC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
//pOutSurface = m_Tasks[m_TaskIndex].m_pEncodeOutSurface;
}
#endif
if(pOutSurface)
{/* copy YUV texture to screen */
HRESULT hr;
IDirect3DSurface9* pSrcSurface = (IDirect3DSurface9*)(pOutSurface->Data.MemId);
assert(pDstSurface && pSrcSurface);
if(pSrcSurface && pDstSurface)
{
RECT r;
r.left = 0;
r.top = 0;
r.right = min(Width,m_mfxDecodeVideoParams.vpp.In.Width);
r.bottom = min(Height,m_mfxDecodeVideoParams.vpp.In.Height);
r.right -= r.right&1;
r.bottom -= r.bottom&1;
V(pd3dDevice->StretchRect(pSrcSurface, &r, pDstSurface, &r,D3DTEXF_POINT));
}
}
#ifdef UNLOCK
if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface && m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
if(m_Tasks[m_TaskIndex].m_pOCLOutSurface && m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#ifdef ENCODER
if(m_Tasks[m_TaskIndex].m_pEncodeOutSurface && m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked)
_InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
#endif
#endif
}
#if 1
// clear sync task for further using
m_Tasks[m_TaskIndex].m_OCLSync = 0;
m_Tasks[m_TaskIndex].m_pOCLOutSurface = 0;
m_Tasks[m_TaskIndex].m_DecodeSync = 0;
m_Tasks[m_TaskIndex].m_pDecodeOutSurface = 0;
#ifdef ENCODER
m_Tasks[m_TaskIndex].m_EncodeSync = 0;
m_Tasks[m_TaskIndex].m_pEncodeOutSurface = 0;
#endif
#endif
if(m_DECODEFlag)
{// feed decoder
mfxSyncPoint DecodeSyncPoint = 0;
static mfxU16 nDecoderSurfIndex = 0; // index of free surface
mfxStatus sts = MFX_ERR_NONE;
m_pmfxDecodeSurfaceLast = NULL; // reset curretn decoder surface to get new one from Decoder
while(MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts || MFX_WRN_DEVICE_BUSY == sts)
{// loop until decoder report that it get request for new frame
if (MFX_WRN_DEVICE_BUSY == sts)
{
Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync
}
else if (MFX_ERR_MORE_DATA == sts)
{ // read more data to input bit stream
sts = m_FileReader.ReadNextFrame(&m_mfxBS);
MSDK_BREAK_ON_ERROR(sts);
}
else if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts)
{// find new working-output surface in m_pmfxDecodeSurfaces
//nDecoderSurfIndex = 0;
nDecoderSurfIndex = GetFreeSurfaceIndex(m_pmfxDecodeSurfaces, m_mfxDecoderResponse.NumFrameActual,nDecoderSurfIndex);
if (MSDK_INVALID_SURF_IDX == nDecoderSurfIndex)
{
return MFX_ERR_MEMORY_ALLOC;
}
}
// send request to decoder
sts = m_pmfxDEC->DecodeFrameAsync(
&m_mfxBS,
(mfxFrameSurface1*)&(m_pmfxDecodeSurfaces[nDecoderSurfIndex]),
(mfxFrameSurface1**)&m_pmfxDecodeSurfaceLast,
&DecodeSyncPoint);
// ignore warnings if output is available,
// if no output and no action required just repeat the same call
if (MFX_ERR_NONE < sts && DecodeSyncPoint)
{
sts = MFX_ERR_NONE;
}
if (MFX_ERR_NONE == sts)
{// decoder return sync point then fill the curretn task nad switch to OCL Plugin feeding
m_Tasks[m_TaskIndex].m_DecodeSync = DecodeSyncPoint;
m_Tasks[m_TaskIndex].m_pDecodeOutSurface = m_pmfxDecodeSurfaceLast;
// look for output process
#ifdef UNLOCK
if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface)
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
#endif
break;
}
}
if(MFX_ERR_NONE != sts)
{
printf("ERROR: Decoder returns error %d!\n",sts);
stsOut = sts;
}
//decoder sync point
sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}//if(m_DECODEFlag)
if(m_pOCLPlugin && m_pOCLPlugin->m_OCLFlag)
{// OPENCL part
mfxU16 nOCLSurfIndex=0;
mfxSyncPoint OCLSyncPoint = 0;
mfxStatus sts = MFX_ERR_NONE;
// get index for output surface for OCL plugin
nOCLSurfIndex = GetFreeSurfaceIndex(m_pmfxOCLSurfaces, m_mfxOCLResponse.NumFrameActual);
MSDK_CHECK_ERROR(nOCLSurfIndex, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
//mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
//mfxHDL pOutSurf = &m_pmfxEncSurfaces[nEncSurfIdx];
//m_pmfxOCLSurfaces[nOCLSurfIndex] = m_pmfxEncSurfaces[nEncSurfIdx];
mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
mfxHDL inp = m_pmfxDecodeSurfaceLast;
// OCL filter
for(;;)
{
sts = MFXVideoUSER_ProcessFrameAsync(m_mfxSession, &inp, 1, &pOutSurf, 1, &OCLSyncPoint);
if (MFX_WRN_DEVICE_BUSY == sts)
{
Sleep(1); // just wait and then repeat the same call
}
else
{
break;
}
}
// ignore warnings if output is available,
if (MFX_ERR_NONE < sts && OCLSyncPoint)
{
sts = MFX_ERR_NONE;
}
if(MFX_ERR_NONE!=sts)
{
printf("ERROR: OpenCL filter return error %d!\n",sts);
return sts;
}
{
m_Tasks[m_TaskIndex].m_OCLSync = OCLSyncPoint;
m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxOCLSurfaces[nOCLSurfIndex];
//m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
#ifdef UNLOCK
// look for output process
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#endif
}
}
#ifdef ENCODER
if(m_ENCODEFlag)
{// feed encoder
static mfxU16 nEncSurfIdx = 0; // index of free surface
mfxSyncPoint EncSyncP;
mfxStatus sts = MFX_ERR_NONE;
//mfxFrameSurface1* pSurf = NULL; // dispatching pointer
// find free surface for encoder input
nEncSurfIdx = GetFreeSurface(m_pmfxEncSurfaces, m_mfxEncResponse.NumFrameActual);
MSDK_CHECK_ERROR(nEncSurfIdx, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
// point pSurf to encoder surface
//m_pmfxEncSurfaces[nEncSurfIdx] = m_pmfxOCLSurfaces[nOCLSurfIndex];
for (;;)
{
// at this point surface for encoder contains either a frame from file or a frame processed by vpp
sts = m_pmfxENC->EncodeFrameAsync(NULL, &m_pmfxEncSurfaces[nEncSurfIdx], &m_mfxEncBS, &EncSyncP);
if (MFX_ERR_NONE < sts && !EncSyncP) // repeat the call if warning and no output
{
if (MFX_WRN_DEVICE_BUSY == sts)
MSDK_SLEEP(1); // wait if device is busy
}
else if (MFX_ERR_NONE < sts && EncSyncP)
{
sts = MFX_ERR_NONE; // ignore warnings if output is available
break;
}
else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts)
{
sts = AllocateSufficientBuffer(&m_mfxEncBS);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
printf("\n BUFFER allocated");
}
else
{
// get next surface and new task for 2nd bitstream in ViewOutput mode
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_BITSTREAM);
break;
}
}
if (MFX_ERR_MORE_DATA == sts) {
sts = MFX_ERR_NONE;
}
if (MFX_ERR_NONE == sts)
{
m_Tasks[m_TaskIndex].m_EncodeSync = EncSyncP;
}
#ifdef UNLOCK
if (MFX_ERR_NONE == sts)
{// encoder return sync point then fill the curretn task nad switch to encoder feeding
m_Tasks[m_TaskIndex].m_pEncodeOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
_InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
}
#endif
}
#endif
// increase task index to point to next task.
m_TaskIndex = (m_TaskIndex+1)%SYNC_BUF_SIZE;
return stsOut;
}//CDecodingPipeline::DecodeOneFrame
If I use this code the encoder output is corrupted.When I decode the encoder output It seems frames are not displayed in proper order.
I think I am not giving the right surface to encoder as the encoder surface index is independently calculated.
But when I give the OCL output surface to encoder then my OCL also stops working.
Can anyone guide me here?
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Mr. Robert Loffe I need your help to understand Intel media sdk integrated with opencl
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Manish,
Could you please let me know parameters of your system: processor, Operating system, driver you are using?
Also, did you get the interoperability sample from here: https://software.intel.com/en-us/articles/opencl-and-intel-media-sdk ?
Thanks!
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
One more thing, which Visual Studio version are you using to build the project?
Also, the code snippet that you provided above: is this original Intel code you something that you created?
Can you run Original Intel sample out of the box?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Robert,
Thanks for your reply.
I am using intel Haswell family processor i7 4770 CPU and intel HD 4600 card.
Visual Studio: 2010 version.
I am using the sample code of Decode-OPENCL pipeline from the link you mentioned: https://software.intel.com/en-us/articles/opencl-and-intel-media-sdk.
This code works fine. As expected it decodes the stream and applies filter functions as mentioned in .cl file.
Now I have to integrate the code of Encoder in the same framework so that I can create Decoder-OPENCL-ENCODER pipelile using media SDK. SO in above code I have modified the pipeline and added my changes in #ifdef ENCODER switch.
Now when I add encoder in this pipeline the encoder output is not ok. It seems encoder contains re-oredered frames.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Basically I want to undersand how encoder should be added in this code:
https://software.intel.com/en-us/articles/opencl-and-intel-media-sdk
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Robert,
Is there any reason I am not being answered for this question in any of the forum available.
Is something wrong here? Can you tell me where should I post this to get some help?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Manish,
Your question is more of an Media SDK question: I contacted Technical Consulting Engineers for Media SDK within our team and they are looking into your question right now. They will update this post once they figure out what is wrong with your code.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks Robert.I will wait for their reply.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Surbhi,
I did not find any .cl file in sample_multi_transcode. This framework doesn't use any opencl memory objects created.
Please let me know if I am wrong.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Surbhi,
You were right Multi transcode uses the opencl plugin. However, this framework is using a opencl_plugin.dll file which contains the code of rotation of image through opencl.
Thanks. I was able to use the framework.
However, when I give the below parameters to the sample_multi_transcode.exe, I got a speed of 15 fps only.
Arguments --> -i::h264 MediaSDKInterop_video.h264 -o::h264 out.264 -hw -u 7 -angle 180 -opencl
And When I disable opencl and gave below arguments I get speed of apprx. 80 fps.
Arguments --> -i::h264 MediaSDKInterop_video.h264 -o::h264 out.264 -hw -u 7
Could you please let me know the reason why enabling opencl plugin slows the speed so much at my end ?
I am using i7-4770 CPU @3.4GHz on 64 bit Windows 7. My system contains Intel HD 4600 Graphics Card. Windows 7 contains support till Directx9 only.
Regards,
Manish

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page