OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU.
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1719 Discussions

Decoding-opencl-encoding pipeline

Manish_K_
Beginner
325 Views

I am working on Decode-OPENCL-Encode pipeline on intel processor. There is a sample code provide by intel for media interop which is attached.

I am integrating the encoder into same.

If we look at the DecodeOneFrame() function below: 

mfxStatus CDecodingPipeline::DecodeOneFrame(int Width, int Height, IDirect3DSurface9 *pDstSurface, IDirect3DDevice9* pd3dDevice)
{
    mfxU16 nOCLSurfIndex=0;

    mfxStatus stsOut = MFX_ERR_NONE;
    if(m_Tasks[m_TaskIndex].m_DecodeSync || m_Tasks[m_TaskIndex].m_OCLSync || m_Tasks[m_TaskIndex].m_EncodeSync)

    {// wait task is finished and copy result texture to back buffer
        mfxStatus   sts = MFX_ERR_NONE;
        mfxFrameSurface1_OCL*   pOutSurface = NULL; // output surface.
        //wait the previous submitted tasks
        if(m_Tasks[m_TaskIndex].m_DecodeSync)
        {
            sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
            MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
            pOutSurface = m_Tasks[m_TaskIndex].m_pDecodeOutSurface;
            
        }
        if(m_Tasks[m_TaskIndex].m_OCLSync)
        {
            sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_OCLSync, MSDK_VPP_WAIT_INTERVAL);
            MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
            pOutSurface = m_Tasks[m_TaskIndex].m_pOCLOutSurface;

        }
#ifdef ENCODER
        if(m_Tasks[m_TaskIndex].m_EncodeSync)
        {
            sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_EncodeSync, MSDK_ENC_WAIT_INTERVAL);
            MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
                  //pOutSurface = m_Tasks[m_TaskIndex].m_pEncodeOutSurface;

        }
#endif
        if(pOutSurface)
        {/* copy YUV texture to screen */

            HRESULT hr;

            IDirect3DSurface9* pSrcSurface = (IDirect3DSurface9*)(pOutSurface->Data.MemId);
            assert(pDstSurface && pSrcSurface);
            if(pSrcSurface && pDstSurface)
            {
                RECT    r;
                r.left = 0;
                r.top = 0;
                r.right = min(Width,m_mfxDecodeVideoParams.vpp.In.Width);
                r.bottom = min(Height,m_mfxDecodeVideoParams.vpp.In.Height);

                r.right -= r.right&1;
                r.bottom -= r.bottom&1;

                V(pd3dDevice->StretchRect(pSrcSurface, &r, pDstSurface, &r,D3DTEXF_POINT));
            }

        }
#ifdef UNLOCK
        if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface && m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked)
          _InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
        if(m_Tasks[m_TaskIndex].m_pOCLOutSurface && m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked)
            _InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#ifdef ENCODER            
        if(m_Tasks[m_TaskIndex].m_pEncodeOutSurface && m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked)
            _InterlockedDecrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
#endif    

#endif
    }
#if 1
    // clear sync task for further using
    m_Tasks[m_TaskIndex].m_OCLSync = 0;
    m_Tasks[m_TaskIndex].m_pOCLOutSurface = 0;
    m_Tasks[m_TaskIndex].m_DecodeSync = 0;
    m_Tasks[m_TaskIndex].m_pDecodeOutSurface = 0;
#ifdef ENCODER    
    m_Tasks[m_TaskIndex].m_EncodeSync = 0;
    m_Tasks[m_TaskIndex].m_pEncodeOutSurface = 0;
#endif    
#endif
    if(m_DECODEFlag)
    {// feed decoder
        mfxSyncPoint        DecodeSyncPoint = 0;
        static mfxU16      nDecoderSurfIndex = 0; // index of free surface
        mfxStatus   sts = MFX_ERR_NONE;
        m_pmfxDecodeSurfaceLast = NULL; // reset curretn decoder surface to get new one from Decoder
        while(MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts || MFX_ERR_MORE_SURFACE == sts || MFX_WRN_DEVICE_BUSY == sts)
        {// loop until decoder report that it get request for new frame
            if (MFX_WRN_DEVICE_BUSY == sts)
            {
                Sleep(1); // just wait and then repeat the same call to DecodeFrameAsync
            }
            else if (MFX_ERR_MORE_DATA == sts)
            { // read more data to input bit stream
                sts = m_FileReader.ReadNextFrame(&m_mfxBS);
                MSDK_BREAK_ON_ERROR(sts);
            }
            else if (MFX_ERR_MORE_SURFACE == sts || MFX_ERR_NONE == sts)
            {// find new working-output surface in m_pmfxDecodeSurfaces
                //nDecoderSurfIndex = 0;
                nDecoderSurfIndex = GetFreeSurfaceIndex(m_pmfxDecodeSurfaces, m_mfxDecoderResponse.NumFrameActual,nDecoderSurfIndex);
                if (MSDK_INVALID_SURF_IDX == nDecoderSurfIndex)
                {
                    return MFX_ERR_MEMORY_ALLOC;
                }
            }

            // send request to decoder
            sts = m_pmfxDEC->DecodeFrameAsync(
                &m_mfxBS,
                (mfxFrameSurface1*)&(m_pmfxDecodeSurfaces[nDecoderSurfIndex]),
                (mfxFrameSurface1**)&m_pmfxDecodeSurfaceLast,
                &DecodeSyncPoint);
            // ignore warnings if output is available,
            // if no output and no action required just repeat the same call
            if (MFX_ERR_NONE < sts && DecodeSyncPoint)
            {
                sts = MFX_ERR_NONE;
            }

            if (MFX_ERR_NONE == sts)
            {// decoder return sync point then fill the curretn task nad switch to OCL Plugin feeding
                m_Tasks[m_TaskIndex].m_DecodeSync = DecodeSyncPoint;
                m_Tasks[m_TaskIndex].m_pDecodeOutSurface = m_pmfxDecodeSurfaceLast;
                // look for output process
#ifdef UNLOCK
                if(m_Tasks[m_TaskIndex].m_pDecodeOutSurface)
                    _InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pDecodeOutSurface->Data.Locked);
#endif
                break;
            }
        }
        if(MFX_ERR_NONE != sts)
        {
            printf("ERROR: Decoder returns error %d!\n",sts);
            stsOut = sts;
        }
        //decoder sync point
        sts = m_mfxSession.SyncOperation(m_Tasks[m_TaskIndex].m_DecodeSync, MSDK_DEC_WAIT_INTERVAL);
        MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    }//if(m_DECODEFlag)

    if(m_pOCLPlugin && m_pOCLPlugin->m_OCLFlag)
    {// OPENCL part
        mfxU16 nOCLSurfIndex=0;
        mfxSyncPoint        OCLSyncPoint = 0;
        mfxStatus   sts = MFX_ERR_NONE;
        // get index for output surface for OCL plugin
        nOCLSurfIndex = GetFreeSurfaceIndex(m_pmfxOCLSurfaces, m_mfxOCLResponse.NumFrameActual);
        MSDK_CHECK_ERROR(nOCLSurfIndex, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
        

        //mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
        //mfxHDL pOutSurf = &m_pmfxEncSurfaces[nEncSurfIdx];
        //m_pmfxOCLSurfaces[nOCLSurfIndex] = m_pmfxEncSurfaces[nEncSurfIdx];
        mfxHDL pOutSurf = &m_pmfxOCLSurfaces[nOCLSurfIndex];
        mfxHDL inp = m_pmfxDecodeSurfaceLast;

        // OCL filter
        for(;;)
        {
          sts = MFXVideoUSER_ProcessFrameAsync(m_mfxSession, &inp, 1, &pOutSurf, 1, &OCLSyncPoint);

            if (MFX_WRN_DEVICE_BUSY == sts)
            {
                Sleep(1); // just wait and then repeat the same call
            }
            else
            {
                break;
            }
        }

        // ignore warnings if output is available,
        if (MFX_ERR_NONE < sts && OCLSyncPoint)
        {
            sts = MFX_ERR_NONE;
        }

        if(MFX_ERR_NONE!=sts)
        {
            printf("ERROR: OpenCL filter return error %d!\n",sts);
            return sts;
        }

        {
            m_Tasks[m_TaskIndex].m_OCLSync = OCLSyncPoint;
            m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxOCLSurfaces[nOCLSurfIndex];
            //m_Tasks[m_TaskIndex].m_pOCLOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
#ifdef UNLOCK
            // look for output process
            _InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pOCLOutSurface->Data.Locked);
#endif
        }

    }

#ifdef ENCODER

    if(m_ENCODEFlag)
    {// feed encoder
        static mfxU16      nEncSurfIdx = 0; // index of free surface
        mfxSyncPoint EncSyncP;
        mfxStatus sts = MFX_ERR_NONE;
        //mfxFrameSurface1* pSurf = NULL; // dispatching pointer
        // find free surface for encoder input
        nEncSurfIdx = GetFreeSurface(m_pmfxEncSurfaces, m_mfxEncResponse.NumFrameActual);
        MSDK_CHECK_ERROR(nEncSurfIdx, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);

        // point pSurf to encoder surface
        //m_pmfxEncSurfaces[nEncSurfIdx] = m_pmfxOCLSurfaces[nOCLSurfIndex];
        for (;;)

        {
            // at this point surface for encoder contains either a frame from file or a frame processed by vpp
            sts = m_pmfxENC->EncodeFrameAsync(NULL, &m_pmfxEncSurfaces[nEncSurfIdx], &m_mfxEncBS, &EncSyncP);

            if (MFX_ERR_NONE < sts && !EncSyncP) // repeat the call if warning and no output
            {
                if (MFX_WRN_DEVICE_BUSY == sts)
                    MSDK_SLEEP(1); // wait if device is busy
            }
            else if (MFX_ERR_NONE < sts && EncSyncP)
            {
                sts = MFX_ERR_NONE; // ignore warnings if output is available

                break;
            }

            else if (MFX_ERR_NOT_ENOUGH_BUFFER == sts)
            {
              
                sts = AllocateSufficientBuffer(&m_mfxEncBS);
                MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
                printf("\n BUFFER allocated");
            }

            else
            {
                // get next surface and new task for 2nd bitstream in ViewOutput mode
                MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_BITSTREAM);
                break;
            }
        }

      if (MFX_ERR_MORE_DATA == sts) {
        sts = MFX_ERR_NONE;

      }
      if (MFX_ERR_NONE == sts)
      {
            m_Tasks[m_TaskIndex].m_EncodeSync = EncSyncP;
      }

#ifdef UNLOCK
      if (MFX_ERR_NONE == sts)
      {// encoder return sync point then fill the curretn task nad switch to encoder feeding
          m_Tasks[m_TaskIndex].m_pEncodeOutSurface = &m_pmfxEncSurfaces[nEncSurfIdx];
          _InterlockedIncrement16((short*)&m_Tasks[m_TaskIndex].m_pEncodeOutSurface->Data.Locked);
    }
#endif
    }
#endif

 

    // increase task index to point to next task.
    m_TaskIndex = (m_TaskIndex+1)%SYNC_BUF_SIZE;
    return stsOut;
}//CDecodingPipeline::DecodeOneFrame

If I use this code the encoder output is corrupted.When I decode the encoder output It seems frames are not displayed in proper order.

I think I am not giving the right surface to encoder as the encoder surface index is independently calculated.

But when I give the OCL output surface to encoder then my OCL also stops working.

Can anyone guide me here?

 

0 Kudos
1 Reply
Michael_C_Intel2
Employee
325 Views

Hi Manish,

We have an OpenCL forum, you should be able to get the help you need there. I am moving your post to that forum.

0 Kudos
Reply