/*****************************************************************************

INTEL CORPORATION PROPRIETARY INFORMATION
This software is supplied under the terms of a license agreement or
nondisclosure agreement with Intel Corporation and may not be copied
or disclosed except in accordance with the terms of that agreement.
Copyright(c) 2005-2014 Intel Corporation. All Rights Reserved.

*****************************************************************************/

#include "common_utils.h"

mfxStatus ReadPlane(mfxU16 w, mfxU16 h, mfxU8* buf, mfxU8* ptr, mfxU16 pitch, mfxU16 offset, FILE* fSource);
mfxStatus ReadPlane(mfxU16 w, mfxU16 h, mfxU8* buf, mfxU8* ptr, mfxU16 pitch, mfxU16 offset, FILE* fSource)
{
    mfxU32 nBytesRead;
    for (mfxU16 i = 0; i < h; i++) {
        nBytesRead = (mfxU32) fread(buf, 1, w, fSource);
        if (w != nBytesRead)
            return MFX_ERR_MORE_DATA;
        for (mfxU16 j = 0; j < w; j++)
            ptr[i * pitch + j * 2 + offset] = buf[j];
    }
    return MFX_ERR_NONE;
}

mfxStatus LoadRawFrame_YV12toNV12(mfxFrameSurface1* pSurface, FILE* fSource, mfxFrameInfo* streamInfo);
mfxStatus LoadRawFrame_YV12toNV12(mfxFrameSurface1* pSurface, FILE* fSource, mfxFrameInfo* streamInfo)
{
    mfxStatus sts = MFX_ERR_NONE;
    mfxU32 nBytesRead;
    mfxU16 w, h, i, j, pitch;
    mfxU8* ptr;
    mfxFrameInfo* pInfo = &pSurface->Info;
    mfxFrameData* pData = &pSurface->Data;
    pitch = pData->Pitch;
    w = pInfo->Width;
    h = pInfo->Height;

    ptr = pData->Y;
    pitch = pData->Pitch;
    
    for (i = 0; i < h; i++) {
        nBytesRead = (mfxU32) fread(ptr + i * pitch, 1, w, fSource);
        if (w != nBytesRead)
            return MFX_ERR_MORE_DATA;
    }

    mfxU8 buf[2048];        // maximum supported chroma width for nv12
    w /= 2;
    h /= 2;
    ptr = pData->UV;
    if (w > 2048)
        return MFX_ERR_UNSUPPORTED;

    // load U
    sts = ReadPlane(w, h, buf, ptr, pitch, 0, fSource);
    if (MFX_ERR_NONE != sts)
        return sts;
    // load V
    ReadPlane(w, h, buf, ptr, pitch, 1, fSource);
    if (MFX_ERR_NONE != sts)
        return sts;

    return MFX_ERR_NONE;
}

int main()
{
    mfxStatus sts = MFX_ERR_NONE;

    mfxU16 inputWidth = 352;
    mfxU16 inputHeight = 288;

    // =====================================================================
    // Intel Media SDK Video Composition pipeline set-up
    // - Showcasing multiple VPP features
    //   - Crop (frame width and height is halved)
    //   - Composition: Composition of multiple streams. Shown for 2 streams - can be easily extended to >2.
    //   - Video memory is used
    // NOTE: Composition feature is supported on Linux only.
    // =====================================================================

    // Open input two YV12 YUV files - 2 input streams
    // Note: The input streams can be of same of different resolutions.
    // In this tutorial, we choose two streamf of different resolutions.

#define NUM_STREAMS 2

    FILE* fSource[NUM_STREAMS];
    MSDK_FOPEN(fSource[0], "foreman_352_288.yuv", "rb");
    MSDK_CHECK_POINTER(fSource[0], MFX_ERR_NULL_PTR);

    MSDK_FOPEN(fSource[1], "foreman_176_144.yuv", "rb");
    MSDK_CHECK_POINTER(fSource[1], MFX_ERR_NULL_PTR);

    // Create output YUV file
    FILE* fSink;
    MSDK_FOPEN(fSink, "out_simple_composite_vmem.yuv", "wb");
    MSDK_CHECK_POINTER(fSink, MFX_ERR_NULL_PTR);

    // Initialize Intel Media SDK session
    // - MFX_IMPL_AUTO_ANY selects HW acceleration if available (on any adapter)
    // - Version 1.0 is selected for greatest backwards compatibility.
    // If more recent API features are needed, change the version accordingly
    mfxIMPL impl = MFX_IMPL_AUTO_ANY;
    mfxVersion ver = { 0, 1 };
    MFXVideoSession session;
    mfxFrameAllocator mfxAllocator;
    mfxFrameInfo inputStreams[NUM_STREAMS];		// This structure will be used to populate the frame info for each frame. Used when reading the frame for processing.

    sts = Initialize(impl, ver, &session, &mfxAllocator);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    /****** 
    Initialize VPP parameters
    For simplicity, we have filled these parameters for the streams used here.
    The developer is encouraged to geenralize the mfxVideoParams filling using either command-line options or par file usage
    ******/
    mfxVideoParam VPPParams;
    memset(&VPPParams, 0, sizeof(VPPParams));
    // Input data
    VPPParams.vpp.In.FourCC = MFX_FOURCC_NV12;
    VPPParams.vpp.In.ChromaFormat = MFX_CHROMAFORMAT_YUV420;
    VPPParams.vpp.In.CropX = 0;
    VPPParams.vpp.In.CropY = 0;
    VPPParams.vpp.In.CropW = inputWidth;
    VPPParams.vpp.In.CropH = inputHeight;
    VPPParams.vpp.In.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
    VPPParams.vpp.In.FrameRateExtN = 30;
    VPPParams.vpp.In.FrameRateExtD = 1;
      // width must be a multiple of 16
      // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture
    VPPParams.vpp.In.Width = MSDK_ALIGN16(inputWidth);
    VPPParams.vpp.In.Height =
        (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.In.PicStruct) ?
        MSDK_ALIGN16(inputHeight) :
        MSDK_ALIGN32(inputHeight);
    
    // Output data
    VPPParams.vpp.Out.FourCC = MFX_FOURCC_NV12;
    VPPParams.vpp.Out.ChromaFormat = MFX_CHROMAFORMAT_YUV420;
    VPPParams.vpp.Out.CropX = 0;
    VPPParams.vpp.Out.CropY = 0;
    VPPParams.vpp.Out.CropW = inputWidth;
    VPPParams.vpp.Out.CropH = inputHeight;
    VPPParams.vpp.Out.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
    VPPParams.vpp.Out.FrameRateExtN = 30;
    VPPParams.vpp.Out.FrameRateExtD = 1;
    // width must be a multiple of 16
    // height must be a multiple of 16 in case of frame picture and a multiple of 32 in case of field picture
    VPPParams.vpp.Out.Width = MSDK_ALIGN16(VPPParams.vpp.Out.CropW);
    VPPParams.vpp.Out.Height =
        (MFX_PICSTRUCT_PROGRESSIVE == VPPParams.vpp.Out.PicStruct) ?
        MSDK_ALIGN16(VPPParams.vpp.Out.CropH) :
        MSDK_ALIGN32(VPPParams.vpp.Out.CropH);

    // Video memory surfaces are used to storing the raw frames. Use with HW acceleration for better performance
    VPPParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;

    /*************************************************************************************************
    COMPOSITION-SPECIFIC BEGINS: Setting Phase

    How we are compositing?
    Let us crop the second stream to W/2,H/2 size starting at (W/4,H/4) co-ordinate
    This cropped stream will be composited onto the first stream, which will be used at its original resolution.
    You can also choose where you would like the cropped second stream to go on the output surface - let's say we want it at (50,50) co-ordinates
    Here are the crop values that will achieve this
    *************************************************************************************************/

    mfxU16 W1 = 352, H1 = 288;
    mfxU16 Cx1 = 0, Cy1 = 0, Cw1 = W1, Ch1 = H1;

    mfxU16 W2 = 176, H2 = 144;
    mfxU16 Cx2 = W2 >> 2, Cy2 = H2 >> 2, Cw2 = W2 >> 1, Ch2 = H2 >> 1;

    /** Fill frame params in mFrameInfo structures with the above parameters **/
    for (mfxU16 i = 0; i < NUM_STREAMS; i++){
        memcpy(&inputStreams[i],&(VPPParams.vpp.In), sizeof(mfxFrameInfo));
        inputStreams[i].Width = i == 0 ? W1 : W2;
        inputStreams[i].Height = i == 0 ? H1 : H2;
        inputStreams[i].CropX = i == 0 ? Cx1 : Cx2;
        inputStreams[i].CropY = i == 0 ? Cy1 : Cy2;
        inputStreams[i].CropW = i == 0 ? Cw1 : Cw2;
        inputStreams[i].CropH = i == 0 ? Ch1 : Ch2;
    }

    // Create Media SDK VPP component
    MFXVideoVPP mfxVPP(session);

    // Initialize extended buffer for Composition
    mfxExtVPPComposite composite;
    memset(&composite, 0, sizeof(composite));
    composite.Header.BufferId = MFX_EXTBUFF_VPP_COMPOSITE;
    composite.Header.BufferSz = sizeof(mfxExtVPPComposite);
    composite.NumInputStream = 2;
    composite.Y = 10;
    composite.U = 80;
    composite.V = 80;
    composite.InputStream = (mfxVPPCompInputStream*) new mfxVPPCompInputStream * [2];

    composite.InputStream[0].DstX = (mfxU32)0;
    composite.InputStream[0].DstY = (mfxU32)0;
    composite.InputStream[0].DstW = (mfxU32)W1;
    composite.InputStream[0].DstH = (mfxU32)H1;

    composite.InputStream[1].DstX = (mfxU32)50;		//Co-ordinates for where the second stream should go on the output surface
    composite.InputStream[1].DstY = (mfxU32)50;
    composite.InputStream[1].DstW = (mfxU32)Cw2;
    composite.InputStream[1].DstH = (mfxU32)Ch2;

    mfxExtBuffer* ExtBuffer[1];
    ExtBuffer[0] = (mfxExtBuffer*) &composite;
    VPPParams.NumExtParam = 1;
    VPPParams.ExtParam = (mfxExtBuffer**) &ExtBuffer[0];
	
    /*************************************************************************************************
    COMPOSITION-SPECIFIC ENDS: Setting Phase
    *************************************************************************************************/

    // Query number of required surfaces for VPP
    mfxFrameAllocRequest VPPRequest[2];     // [0] - in, [1] - out
    memset(&VPPRequest, 0, sizeof(mfxFrameAllocRequest) * 2);
    sts = mfxVPP.QueryIOSurf(&VPPParams, VPPRequest);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    VPPRequest[0].Type |= WILL_WRITE; // This line is only required for Windows DirectX11 to ensure that surfaces can be written to by the application
    VPPRequest[1].Type |= WILL_READ; // This line is only required for Windows DirectX11 to ensure that surfaces can be retrieved by the application

    // Allocate required surfaces
    mfxFrameAllocResponse mfxResponseIn;
    mfxFrameAllocResponse mfxResponseOut;
    sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[0], &mfxResponseIn);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
    sts = mfxAllocator.Alloc(mfxAllocator.pthis, &VPPRequest[1], &mfxResponseOut);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    mfxU16 nVPPSurfNumIn = mfxResponseIn.NumFrameActual;
    mfxU16 nVPPSurfNumOut = mfxResponseOut.NumFrameActual;

    // Allocate surface headers (mfxFrameSurface1) for VPP
    mfxFrameSurface1** pVPPSurfacesIn = new mfxFrameSurface1 *[nVPPSurfNumIn];
    MSDK_CHECK_POINTER(pVPPSurfacesIn, MFX_ERR_MEMORY_ALLOC);
    for (int i = 0; i < nVPPSurfNumIn; i++) {
        pVPPSurfacesIn[i] = new mfxFrameSurface1;
        memset(pVPPSurfacesIn[i], 0, sizeof(mfxFrameSurface1));
        pVPPSurfacesIn[i]->Data.MemId = mfxResponseIn.mids[i];  // MID (memory id) represent one D3D NV12 surface
    }

    mfxFrameSurface1** pVPPSurfacesOut = new mfxFrameSurface1 *[nVPPSurfNumOut];
    MSDK_CHECK_POINTER(pVPPSurfacesOut, MFX_ERR_MEMORY_ALLOC);
    for (int i = 0; i < nVPPSurfNumOut; i++) {
        pVPPSurfacesOut[i] = new mfxFrameSurface1;
        memset(pVPPSurfacesOut[i], 0, sizeof(mfxFrameSurface1));
        memcpy(&(pVPPSurfacesOut[i]->Info), &(VPPParams.vpp.Out), sizeof(mfxFrameInfo));
        pVPPSurfacesOut[i]->Data.MemId = mfxResponseOut.mids[i];    // MID (memory id) represent one D3D NV12 surface
    }

    // Initialize Media SDK VPP
    sts = mfxVPP.Init(&VPPParams);
    MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    // ===================================
    // Start processing the frames
    //

    mfxTime tStart, tEnd;
    mfxGetTime(&tStart);

    int nSurfIdxIn = 0, nSurfIdxOut = 0;
    mfxSyncPoint syncp;
    mfxU32 nFrame = 0;
    mfxU32 streamNum=0;

    //
    // Stage 1: Main processing loop
    //
    while (MFX_ERR_NONE <= sts || MFX_ERR_MORE_DATA == sts) {
        nSurfIdxIn = GetFreeSurfaceIndex(pVPPSurfacesIn, nVPPSurfNumIn);        // Find free input frame surface
        MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nSurfIdxIn, MFX_ERR_MEMORY_ALLOC);

        // Surface locking required when read/write video surfaces
        sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        /******************************************************************************************************************
        COMPOSITION-SPECIFIC CODE BEGINS: 
        Loading data from each of the input streams, and
        Setting the surface parameters to the Crop, Width, Height, values of the input stream being loaded
        ******************************************************************************************************************/
        streamNum %= NUM_STREAMS;
        memcpy(&(pVPPSurfacesIn[nSurfIdxIn]->Info), &(inputStreams[streamNum]), sizeof(mfxFrameInfo));
        sts = LoadRawFrame_YV12toNV12(pVPPSurfacesIn[nSurfIdxIn], fSource[streamNum], &inputStreams[streamNum]);        // Load frame from file into surface
        streamNum++;
        MSDK_BREAK_ON_ERROR(sts);
        /******************************************************************************************************************
        COMPOSITION-SPECIFIC CODE ENDS: 
        ******************************************************************************************************************/

        sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesIn[nSurfIdxIn]->Data.MemId, &(pVPPSurfacesIn[nSurfIdxIn]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut);     // Find free output frame surface
        MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nSurfIdxOut, MFX_ERR_MEMORY_ALLOC);

        for (;;) {
            // Process a frame asychronously (returns immediately)
            sts = mfxVPP.RunFrameVPPAsync(pVPPSurfacesIn[nSurfIdxIn], pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp);
            if (MFX_WRN_DEVICE_BUSY == sts) {
                MSDK_SLEEP(1);  // Wait if device is busy, then repeat the same call
            } else
                break;
        }

        if (MFX_ERR_MORE_DATA == sts)   // Fetch more input surfaces for VPP
            continue;

        // MFX_ERR_MORE_SURFACE means output is ready but need more surface (example: Frame Rate Conversion 30->60)
        // * Not handled in this example!

        MSDK_BREAK_ON_ERROR(sts);

        sts = session.SyncOperation(syncp, 60000);      // Synchronize. Wait until frame processing is ready
        MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

        ++nFrame;
        // Surface locking required when read/write video surfaces
        sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink);
        MSDK_BREAK_ON_ERROR(sts);

        sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        printf("Frame number: %d\n", nFrame);
        fflush(stdout);
    }

    // MFX_ERR_MORE_DATA means that the input file has ended, need to go to buffering loop, exit in case of other errors
    MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    //
    // Stage 2: Retrieve the buffered VPP frames
    //
    while (MFX_ERR_NONE <= sts) {
        nSurfIdxOut = GetFreeSurfaceIndex(pVPPSurfacesOut, nVPPSurfNumOut);     // Find free frame surface
        MSDK_CHECK_ERROR(MFX_ERR_NOT_FOUND, nSurfIdxOut, MFX_ERR_MEMORY_ALLOC);

        for (;;) {
            // Process a frame asychronously (returns immediately)
            sts = mfxVPP.RunFrameVPPAsync(NULL, pVPPSurfacesOut[nSurfIdxOut], NULL, &syncp);
            if (MFX_WRN_DEVICE_BUSY == sts) {
                MSDK_SLEEP(1);  // Wait if device is busy, then repeat the same call
            } else
                break;
        }

        MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_SURFACE);
        MSDK_BREAK_ON_ERROR(sts);

        sts = session.SyncOperation(syncp, 60000);      // Synchronize. Wait until frame processing is ready
        MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

        ++nFrame;
        // Surface locking required when read/write video surfaces
        sts = mfxAllocator.Lock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        sts = WriteRawFrame(pVPPSurfacesOut[nSurfIdxOut], fSink);
        MSDK_BREAK_ON_ERROR(sts);

        sts = mfxAllocator.Unlock(mfxAllocator.pthis, pVPPSurfacesOut[nSurfIdxOut]->Data.MemId, &(pVPPSurfacesOut[nSurfIdxOut]->Data));
        MSDK_BREAK_ON_ERROR(sts);

        printf("Frame number: %d\n", nFrame);
        fflush(stdout);
    }

    // MFX_ERR_MORE_DATA indicates that there are no more buffered frames, exit in case of other errors
    MSDK_IGNORE_MFX_STS(sts, MFX_ERR_MORE_DATA);
    MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);

    mfxGetTime(&tEnd);
    double elapsed = TimeDiffMsec(tEnd, tStart) / 1000;
    double fps = ((double)nFrame / elapsed);
    printf("\nExecution time: %3.2f s (%3.2f fps)\n", elapsed, fps);

    // ===================================================================
    // Clean up resources
    //  - It is recommended to close Media SDK components first, before releasing allocated surfaces, since
    //    some surfaces may still be locked by internal Media SDK resources.

    mfxVPP.Close();
    //session closed automatically on destruction

    for (int i = 0; i < nVPPSurfNumIn; i++)
        delete pVPPSurfacesIn[i];
    MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesIn);
    for (int i = 0; i < nVPPSurfNumOut; i++)
        delete pVPPSurfacesOut[i];
    MSDK_SAFE_DELETE_ARRAY(pVPPSurfacesOut);

    mfxAllocator.Free(mfxAllocator.pthis, &mfxResponseIn);
    mfxAllocator.Free(mfxAllocator.pthis, &mfxResponseOut);

    if (fSource[0]) fclose(fSource[0]);
    if (fSource[1]) fclose(fSource[1]);
    fclose(fSink);

    Release();

    return 0;
}
