- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
What is the expected (or typical) conversion time for VPP RGB4 to NV12 conversion?
I am running the 2013 SDK on an Core i7 4800MQ 2.7Ghz processor (Haswell).
I am running H.264 encoding on 1080p RGB4 frames at 30hz, One key frame per frame.
The need is for a low latency, real time stream, with very little lag.
The Encoder is really fast, and handles one key frame without problem using native NV12 data.
But since my native data is RGB4, I need the VPP conversion.
It keeps running over my frame times because the VPP RGB4 to NV12 conversion takes longer than 30ms.
I have it narrowed down to the to RunFrameVPPAsync() and the subsequent SyncOperation().
My mfxSession is configured for MFX_IMPL_HARDWARE_ANY. But the VPP runs as if it is converting in software.
I wrote an NV12 converter by hand, and it takes about the same amount of time.
Any ideas what could be slowing down the conversion? Is this typical?
Code Follow: Encodes one frame of data...
int QuickSyncEncode( bool UseHWOnly, int Mode, const Image &image, UInt64 timestamp, Buffer &OutputBitStream )
{
static bool Initialized = 0;
static mfxStatus sts = MFX_ERR_NONE;
static MFXVideoENCODE *pmfxENC = NULL;
static MFXVideoVPP *pmfxVPP = NULL;
static MFXVideoSession mfxSession;
static mfxVideoParam mfxEncParams;
static mfxVideoParam mfxVppParams;
static MFXFrameAllocator *pMFXAllocator = NULL;
static MFXFrameAllocator *pVPPAllocator = NULL;
static mfxAllocatorParams *pmfxAllocatorParams = NULL;
static mfxFrameSurface1 *pEncSurfaces = NULL;
static mfxFrameSurface1 *pVppSurfaces = NULL;
static mfxFrameAllocResponse EncResponse;
static mfxFrameAllocResponse VppResponse;
static mfxU32 nEncodedDataBufferSize = 0;
static mfxU16 nAsyncDepth = 1;
static mfxSyncPoint EncSyncP = NULL;
static mfxSyncPoint VppSyncPoint = NULL;
static mfxBitstream mfxBS;
static APIChangeFeatures features = {};
static unsigned short nWidth = 0;
static unsigned short nHeight = 0;
static mfxU16 nEncSurfNum = 0;
static mfxU16 nVppSurfNum = 0;
static mfxFrameAllocRequest EncRequest;
static mfxFrameAllocRequest VppRequest;
if( Mode == 1 )
goto Exit;
if( Initialized == 0 )
{
nWidth = image.getWidth();
nHeight = image.getHeight();
MSDK_ZERO_MEMORY(mfxEncParams);
MSDK_ZERO_MEMORY(mfxVppParams);
MSDK_ZERO_MEMORY(EncResponse);
MSDK_ZERO_MEMORY(mfxBS);
features.MVCEncode = false;
features.ViewOutput = false;
features.JpegEncode = false;
features.LookAheadBRC = false;
mfxVersion version = getMinimalRequiredVersion(features);
if (UseHWOnly)
sts = mfxSession.Init(MFX_IMPL_HARDWARE_ANY, &version);
else
sts = mfxSession.Init(MFX_IMPL_AUTO_ANY, &version);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pMFXAllocator = new SysMemFrameAllocator;
EXIT_CHECK_POINTER(pMFXAllocator, MFX_ERR_MEMORY_ALLOC);
pVPPAllocator = new SysMemFrameAllocator;
EXIT_CHECK_POINTER(pVPPAllocator, MFX_ERR_MEMORY_ALLOC);
sts = pMFXAllocator->Init( pmfxAllocatorParams);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
sts = pVPPAllocator->Init( pmfxAllocatorParams);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
mfxEncParams.AsyncDepth = nAsyncDepth;
mfxEncParams.ExtParam = NULL;
mfxEncParams.NumExtParam = 0;
mfxEncParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY;
mfxEncParams.mfx.CodecId = MFX_CODEC_AVC;
mfxEncParams.mfx.TargetUsage = MFX_TARGETUSAGE_BALANCED;
mfxEncParams.mfx.TargetKbps = 10000;
mfxEncParams.mfx.RateControlMethod = (mfxU16)MFX_RATECONTROL_CBR;
mfxEncParams.mfx.EncodedOrder = 0;
mfxEncParams.mfx.CodecProfile = MFX_PROFILE_AVC_HIGH;
mfxEncParams.mfx.CodecLevel = MFX_LEVEL_AVC_5;
mfxEncParams.mfx.FrameInfo.FrameRateExtN = 30;
mfxEncParams.mfx.FrameInfo.FrameRateExtD = 1;
mfxEncParams.mfx.FrameInfo.FourCC = MFX_FOURCC_NV12;
mfxEncParams.mfx.FrameInfo.ChromaFormat = MFX_CHROMAFORMAT_YUV420;
mfxEncParams.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
mfxEncParams.mfx.FrameInfo.Width = MSDK_ALIGN16(nWidth);
if(MFX_PICSTRUCT_PROGRESSIVE == mfxEncParams.mfx.FrameInfo.PicStruct)
mfxEncParams.mfx.FrameInfo.Height = MSDK_ALIGN16(nHeight);
else
mfxEncParams.mfx.FrameInfo.Height = MSDK_ALIGN32(nHeight);
mfxEncParams.mfx.FrameInfo.CropX = 0;
mfxEncParams.mfx.FrameInfo.CropY = 0;
mfxEncParams.mfx.FrameInfo.CropW = nWidth;
mfxEncParams.mfx.FrameInfo.CropH = nHeight;
mfxEncParams.mfx.FrameInfo.AspectRatioW = 0;
mfxEncParams.mfx.FrameInfo.AspectRatioH = 0;
memcpy( &mfxVppParams, &mfxEncParams, sizeof(mfxVideoParam) );
memcpy( &mfxVppParams.vpp.In, &mfxEncParams.mfx.FrameInfo, sizeof(mfxFrameInfo) );
memcpy( &mfxVppParams.vpp.Out, &mfxEncParams.mfx.FrameInfo, sizeof(mfxFrameInfo) );
mfxVppParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
mfxVppParams.vpp.In.FourCC = MFX_FOURCC_RGB4;
pmfxENC = new MFXVideoENCODE(mfxSession);
EXIT_CHECK_POINTER(pmfxENC, MFX_ERR_MEMORY_ALLOC);
pmfxVPP = new MFXVideoVPP(mfxSession);
EXIT_CHECK_POINTER(pmfxVPP, MFX_ERR_MEMORY_ALLOC);
sts = pmfxENC->Close();
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_NOT_INITIALIZED);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
sts = pmfxVPP->Close();
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_NOT_INITIALIZED);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
MSDK_SAFE_DELETE_ARRAY(pEncSurfaces);
MSDK_SAFE_DELETE_ARRAY(pVppSurfaces);
if (pMFXAllocator)
{
pMFXAllocator->Free(pMFXAllocator->pthis, &EncResponse);
}
if (pVPPAllocator)
{
pVPPAllocator->Free(pVPPAllocator->pthis, &VppResponse);
}
WipeMfxBitstream(&mfxBS);
MSDK_ZERO_MEMORY(EncRequest);
MSDK_ZERO_MEMORY(VppRequest);
sts = pmfxENC->QueryIOSurf(&mfxEncParams, &EncRequest);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
nEncSurfNum = EncRequest.NumFrameSuggested + (nAsyncDepth - 1);
EncRequest.NumFrameMin = nEncSurfNum;
EncRequest.NumFrameSuggested = nEncSurfNum;
MSDK_MEMCPY_VAR(EncRequest.Info, &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo));
sts = pMFXAllocator->Alloc(pMFXAllocator->pthis, &EncRequest, &EncResponse);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pEncSurfaces = new mfxFrameSurface1 [EncResponse.NumFrameActual];
EXIT_CHECK_POINTER(pEncSurfaces, MFX_ERR_MEMORY_ALLOC);
for (int i = 0; i < EncResponse.NumFrameActual; i++)
{
memset(&(pEncSurfaces), 0, sizeof(mfxFrameSurface1));
MSDK_MEMCPY_VAR(pEncSurfaces.Info, &(mfxEncParams.mfx.FrameInfo), sizeof(mfxFrameInfo));
sts = pMFXAllocator->Lock(pMFXAllocator->pthis, EncResponse.mids, &(pEncSurfaces.Data));
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}
MSDK_ZERO_MEMORY(VppRequest);
sts = pmfxVPP->QueryIOSurf(&mfxVppParams, &VppRequest);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
nVppSurfNum = VppRequest.NumFrameSuggested + (nAsyncDepth - 1);
VppRequest.NumFrameMin = nVppSurfNum;
VppRequest.NumFrameSuggested = nVppSurfNum;
MSDK_MEMCPY_VAR(VppRequest.Info, &(mfxVppParams.mfx.FrameInfo), sizeof(mfxFrameInfo));
sts = pVPPAllocator->Alloc(pVPPAllocator->pthis, &VppRequest, &VppResponse);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
pVppSurfaces = new mfxFrameSurface1 [VppResponse.NumFrameActual];
EXIT_CHECK_POINTER(pVppSurfaces, MFX_ERR_MEMORY_ALLOC);
for (int i = 0; i < VppResponse.NumFrameActual; i++)
{
memset(&(pVppSurfaces), 0, sizeof(mfxFrameSurface1));
MSDK_MEMCPY_VAR(pVppSurfaces.Info, &(mfxVppParams.mfx.FrameInfo), sizeof(mfxFrameInfo));
sts = pVPPAllocator->Lock(pVPPAllocator->pthis, VppResponse.mids, &(pVppSurfaces.Data));
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
sts = pmfxENC->Init(&mfxEncParams);
if (MFX_WRN_PARTIAL_ACCELERATION == sts)
{
TRACE("WARNING: partial acceleration\n");
MSDK_IGNORE_MFX_STS(sts, MFX_WRN_PARTIAL_ACCELERATION);
}
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
sts = pmfxVPP->Init(&mfxVppParams);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
nEncodedDataBufferSize = mfxEncParams.mfx.FrameInfo.Width * mfxEncParams.mfx.FrameInfo.Height * 4;
sts = InitMfxBitstream(&mfxBS, nEncodedDataBufferSize);
EXIT_CHECK_RESULT_SAFE(sts, MFX_ERR_NONE, sts, WipeMfxBitstream(&mfxBS));
TRACE("Processing started\n");
Initialized = 1;
}
sts = MFX_ERR_NONE;
mfxFrameSurface1* pSurf = NULL;
mfxFrameSurface1* pVppSurf = NULL;
mfxU16 nEncSurfIdx = 0;
mfxU16 nVppSurfIdx = 0;
nEncSurfIdx = GetFreeSurface(pEncSurfaces, EncResponse.NumFrameActual);
EXIT_CHECK_ERROR(nEncSurfIdx, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
pSurf = &pEncSurfaces[nEncSurfIdx];
pSurf->Data.TimeStamp = timestamp;
pSurf->Info.FrameId.ViewId = 0;
nVppSurfIdx = GetFreeSurface(pVppSurfaces, VppResponse.NumFrameActual);
EXIT_CHECK_ERROR(nVppSurfIdx, MSDK_INVALID_SURF_IDX, MFX_ERR_MEMORY_ALLOC);
pVppSurf = &pVppSurfaces[nVppSurfIdx];
DWORD DataSize = (nWidth * nHeight) * sizeof(DWORD);
char *pPtr = (char *)image.getBaseAddress();
memcpy( pVppSurf->Data.B, pPtr, DataSize );
pVppSurf->Data.TimeStamp = timestamp;
pVppSurf->Info.FrameId.ViewId = 0;
if(MFX_ERR_NONE == pmfxVPP->RunFrameVPPAsync(pVppSurf, pSurf, NULL, &VppSyncPoint))
sts = mfxSession.SyncOperation(EncSyncP, INFINITE);
if(MFX_ERR_NONE == pmfxENC->EncodeFrameAsync(NULL, pSurf, &mfxBS, &EncSyncP))
sts = mfxSession.SyncOperation(EncSyncP, INFINITE);
if (MFX_ERR_NONE == sts)
{
OutputBitStream.write(mfxBS.Data + mfxBS.DataOffset, mfxBS.DataLength );
mfxBS.DataOffset = 0;
mfxBS.DataLength = 0;
}
if(MFX_ERR_NONE == pmfxENC->EncodeFrameAsync(NULL, NULL, &mfxBS, &EncSyncP))
sts = mfxSession.SyncOperation(EncSyncP, INFINITE);
if (MFX_ERR_NONE == sts)
{
OutputBitStream.write(mfxBS.Data + mfxBS.DataOffset, mfxBS.DataLength );
mfxBS.DataOffset = 0;
mfxBS.DataLength = 0;
}
if (MFX_ERR_DEVICE_LOST == sts || MFX_ERR_DEVICE_FAILED == sts)
{
TRACE("\nERROR: Hardware device was lost or returned an unexpected error. Recovering...\n");
sts = pmfxENC->Close();
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_NOT_INITIALIZED);
EXIT_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
sts = pmfxVPP->Close();
MSDK_IGNORE_MFX_STS(sts, MFX_ERR_NOT_INITIALIZED);
MSDK_CHECK_RESULT(sts, MFX_ERR_NONE, sts);
}
Exit:
if( Mode == 1 )
{
if( Initialized == 1 )
{
MSDK_SAFE_DELETE_ARRAY(pEncSurfaces);
MSDK_SAFE_DELETE_ARRAY(pVppSurfaces);
if (pMFXAllocator)
{
pMFXAllocator->Free(pMFXAllocator->pthis, &EncResponse);
}
if (pVPPAllocator)
{
pVPPAllocator->Free(pVPPAllocator->pthis, &EncResponse);
}
sts = MFX_ERR_NONE;
MSDK_ZERO_MEMORY(EncRequest);
MSDK_ZERO_MEMORY(VppRequest);
MSDK_SAFE_DELETE(pmfxENC);
MSDK_SAFE_DELETE(pmfxVPP);
mfxSession.Close();
MSDK_SAFE_DELETE(pmfxAllocatorParams);
WipeMfxBitstream(&mfxBS);
}
Initialized = 0;
return MFX_ERR_UNKNOWN;
}
return sts;
}
Thanks!
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Steven,
We do not provide specific benchmarks since the performance will vary greatly depending on the specific workload and platform configuration.
That said. 30ms runtime for RGB->NV12 color space conversion is certainly very large for your workload.
On recent Core processor generations the conversion frame processing time should be < 5ms for similar workload (just VPP, no RAW file read etc.).
For low latency please make sure you configure VPP and Encoder AsyncDepth=1. Also, for best HW acceleration perfomance it is recommended you using D3D memory surfaces throughout the pipeline. If your VPP input must be system memory you can still configure VPP output and Encoder input surface as D3D. This will prevent internal copying between system memory and D3D memory.
Additionally, for best performance, delay synchronization as long as possible. In your case, sync on encode and skip VPP sync.
Also, make sure you use the latest available Intel Graphics driver.
BTW. there is a RGB32->NV12->encode sample available as part of the Media SDK tutorial. It may be useful as a reference: http://software.intel.com/en-us/articles/intel-media-sdk-tutorial-simple-6-encode-d3d-vpp-preproc
Regards,
Petter
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Petter,
Thanks for the prompt response. Will try out your suggestions and post my results.
Best Regards.
PS> Petter: sorry it too so long to give you feedback. The tutorial helped very much. It allowed me to use RGB frames and convert them to H,264 elementary stream. Thank you.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Petter,
I am curious how to configure the VPP to use system memory on the input side (directx offscreen surface) and video memory on the output side. Using the sample_encode project, I attempted to modify the following line in
mfxStatus CEncodingPipeline::InitMfxVppParams(sInputParams *pInParams)
from:
m_mfxVppParams.IOPattern = MFX_IOPATTERN_IN_VIDEO_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY;
to:
m_mfxVppParams.IOPattern = MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
However, I get a configuration not supported error. What I am trying to do is use GetFrontBufferData to load directly into the surface that was allocated for the VPP in mfxFrameData and stored in MemId.
Thanks,
JoeT
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Joe,
If you want input to be located in system memory and output in D3D memory, then you need to set IOPattern as follows:
MFX_IOPATTERN_IN_SYSTEM_MEMORY | MFX_IOPATTERN_OUT_VIDEO_MEMORY
Please also explore the sample in the Media SDK tutorial I referred to earlier. You should be able to make the required changes to using system memory input by looking at the other tutorial samples which are using system memory
Regards,
Petter
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page