#include "pch.h" #include #include #include "Image.h" #include #include #include using namespace Concurrency; bool ResizeByIntel5_2(unsigned char* pSrcPixels, SIZE srcSize, int srcLineLength, RECT srcRoi, unsigned char* pDestPixels, int destLineLength, SIZE destSize, double xScale, double yScale, int inerpolation); bool ResizeByIntel2019(unsigned char* pSrcPixels, SIZE srcSize, int srcLineLength, RECT srcRoi, unsigned char* pDestPixels, int destLineLength, SIZE destSize, double xScale, double yScale, int interpolation, int channels); typedef unsigned long DWORD; typedef unsigned short WORD; typedef unsigned char BYTE; BYTE* ResizeByIntel2019Mod_initSpec(int interpolation, int srcCols, int srcRows, int dstCols, int dstRows); void ResizeByIntel2019Mod_free(BYTE* toFree); BYTE* ResizeByIntel2019Mod_allocBuffer(int dstCols, int dstRows, int dstRowsLast, int channels, int numThreads, BYTE* pSpec, BYTE* pSpecLast, int* bufSize); bool ResizeByIntel2019Mod_resize(unsigned char* pSrcPixels, SIZE srcSize, int srcStride, unsigned char* pDestPixels, SIZE dstSize, int dstStride, double xScale, double yScale, int interpolation, int channels, int ithr, BYTE* pSpec, BYTE* pBuff); CImage::CImage() { m_pData = 0; m_width = 0; m_height = 0; m_channels = 0; } void CImage::Create(int cols, int rows, int channels) { m_width = cols; m_height = rows; m_channels = channels; unsigned int size = m_width * m_height*m_channels; m_pData = new unsigned char[size]; memset(m_pData, 0, size); } CImage::~CImage() { if (m_pData) delete [] m_pData; } unsigned char* CImage::pData() { return m_pData; } int CImage::Rows() { return abs(m_height); } int CImage::Cols() { return m_width; } int CImage::Channels() { return m_channels; } int CImage::Size() { return Rows() * Cols() * Channels(); } bool CImage::Read(const char* pFilename) { FILE *pFile = fopen(pFilename, "rb"); if (!pFile) return false; BITMAPFILEHEADER bmpFileH={0}; BITMAPINFOHEADER bmi={0}; fread(&bmpFileH, sizeof(bmpFileH), 1, pFile); fread(&bmi, sizeof(bmi), 1, pFile); m_width = bmi.biWidth; m_height = bmi.biHeight; m_channels = bmi.biBitCount / 8; if (m_channels == 1) fseek(pFile, 1024, SEEK_CUR); m_pData = new unsigned char[m_width*m_height*m_channels]; int lineLen = Stride(); int alignedlineLen = (lineLen + 3) & ~3; BYTE *pLine = new BYTE[alignedlineLen]; if (bmi.biHeight < 0) { for (int i = 0; i < m_height; i++) { fread(pLine, sizeof(char), alignedlineLen, pFile); memcpy(m_pData + i * lineLen, pLine, lineLen); } } else { for (int i = m_height-1; i >= 0; i--) { fread(pLine, sizeof(char), alignedlineLen, pFile); memcpy(m_pData + i * lineLen, pLine, lineLen); } } delete [] pLine; fclose(pFile); return true; } bool CImage::Write(const char* pFilename) { FILE *pFile = fopen(pFilename, "wb"); if (!pFile) return false; BITMAPFILEHEADER BmFileH; memset(&BmFileH, 0, sizeof(BITMAPFILEHEADER)); memcpy(&(BmFileH.bfType), "BM", 2); BmFileH.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER); int iPalleteSize = 4; if (m_channels == 1) { iPalleteSize = 1024; BmFileH.bfOffBits += iPalleteSize; } BmFileH.bfSize = BmFileH.bfOffBits; const int BmFileAlignmentBytes = (4 - (BmFileH.bfSize % 4)) % 4; BmFileH.bfSize += BmFileAlignmentBytes; BmFileH.bfReserved1 = 0; BmFileH.bfReserved2 = 0; ///////////////////////////////////////////////////////////// // WRITE the BMP File Header fwrite(&BmFileH, sizeof(BmFileH), 1, pFile); bool yFlip = m_height > 0 ? true : false; BITMAPINFOHEADER bmi = { 0 }; bmi.biBitCount = m_channels * 8; bmi.biClrImportant = m_channels == 1 ? 0 : 256; bmi.biClrUsed = m_channels == 1 ? 256: 0; bmi.biCompression = 0; bmi.biHeight = m_height; bmi.biPlanes = 1; bmi.biSize = sizeof(BITMAPINFOHEADER); bmi.biWidth = Cols(); bmi.biXPelsPerMeter = 0; bmi.biYPelsPerMeter = 0; fwrite(&bmi, sizeof(bmi), 1, pFile); int lineLen = Stride(); long alignedLineLen = (lineLen + 3) & ~3; int rows = Rows(); BYTE *pLine = new BYTE[alignedLineLen]; if (!yFlip) { for (int i = 0; i < rows; i++) { memcpy(pLine, pData() + i * lineLen, lineLen); fwrite(pLine, sizeof(char), alignedLineLen, pFile); } } else { for (int i = rows-1; i >= 0; i--) { memcpy(pLine, pData() + i * lineLen, lineLen); fwrite(pLine, sizeof(char), alignedLineLen, pFile); } } delete[] pLine; char FileAlignmentBuff[] = { 0,0,0 }; fwrite(&FileAlignmentBuff, sizeof(char), BmFileAlignmentBytes, pFile); fclose(pFile); return true; } int CImage::Stride() { return m_width*m_channels; } CImage* CImage::Resize(float xScale, float yScale, int inerpolation, int numThreads, int intelVersion, int* elapseTime) { CImage* dstImage = new CImage(); int dstCols = (int)(Cols()*xScale + 0.5); int dstRows = (int)(Rows()*yScale + 0.5); double verRatio = (double)Rows() / (double)dstRows; dstImage->Create(dstCols, dstRows, m_channels); int dstStride = dstImage->Stride(); unsigned char* pSrcPixels = pData(); int srcStride = Stride(); unsigned char* pDstPixels = dstImage->pData(); int tileheight = dstRows / numThreads + 1; int start = GetTickCount(); parallel_for(0, numThreads, [&](int i) // for (int i = 0; i < numThreads; i++) { int destTop = i * tileheight; int destBottom = min(dstRows, destTop + tileheight); int srcTop = static_cast(destTop * verRatio); int srcBottom = static_cast(destBottom * verRatio); if (i == numThreads - 1) { srcBottom = Rows(); srcTop = srcBottom - (int)((destBottom - destTop)*verRatio); } SIZE srcSize = { Cols(), srcBottom - srcTop }; SIZE destSize = { dstCols, destBottom - destTop }; RECT srcRoi = { 0, 0, Cols(), srcBottom-srcTop }; int offset = destTop * dstStride; BYTE* pS = pSrcPixels + srcTop * srcStride; BYTE* pD = pDstPixels + offset; if (intelVersion == 0) { ResizeByIntel5_2(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation); } else { ResizeByIntel2019(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation, m_channels); } } ); *elapseTime = GetTickCount() - start; return dstImage; } CImage* CImage::ResizeMod(float xScale, float yScale, int inerpolation, int numThreads, int intelVersion, int parallelMode, int* elapseTime) { CImage* dstImage = new CImage(); int dstCols = (int)(Cols() * xScale + 0.5); int dstRows = (int)(Rows() * yScale + 0.5); int optNumDstRows = static_cast(yScale * 100.0); int numIters = dstRows / optNumDstRows; int optNumDstRowsLast = dstRows - optNumDstRows * numIters; int numItersRem = dstRows % optNumDstRows; if (numItersRem != 0) numIters++; int iterPerThread = numIters / numThreads; int iterPerThreadRem = numIters % numThreads; int* iters = new int[numThreads+1]; iters[0] = 0; for (int i = 1; i < numThreads+1; i++) { iters[i] = iters[i-1] + iterPerThread; if (i <= iterPerThreadRem) { iters[i]++; } } double verRatio = (double)Rows() / (double)dstRows; dstImage->Create(dstCols, dstRows, m_channels); int dstStride = dstImage->Stride(); unsigned char* pSrcPixels = pData(); int srcStride = Stride(); unsigned char* pDstPixels = dstImage->pData(); int tileheight = optNumDstRows; int tileheightlast = optNumDstRowsLast; int srctileheight = static_cast(std::floor(tileheight * verRatio)); int srctileheightlast = Rows() - srctileheight * (numIters - 1); if (srctileheightlast <= 0) { srctileheightlast = static_cast(std::floor(tileheightlast * verRatio)); } int start = GetTickCount(); BYTE* pOneSpec = 0; BYTE* pOneSpecLast = 0; BYTE* pOneBuff = 0; int oneBuffSize = 0; if (intelVersion > 0) { pOneSpec = ResizeByIntel2019Mod_initSpec(inerpolation, Cols(), srctileheight, dstCols, tileheight); if (pOneSpec == NULL) { printf("IPP initialization error 1\n");//return error return dstImage; } if (tileheightlast > 0) { pOneSpecLast = ResizeByIntel2019Mod_initSpec(inerpolation, Cols(), srctileheightlast, dstCols, tileheightlast); if (pOneSpecLast == NULL) { ResizeByIntel2019Mod_free(pOneSpec); printf("IPP initialization error 2\n");//return error return dstImage; } } pOneBuff = ResizeByIntel2019Mod_allocBuffer(dstCols, tileheight, tileheightlast, m_channels, numThreads, pOneSpec, pOneSpecLast, &oneBuffSize); if ((pOneBuff == NULL) || (oneBuffSize == 0)) { ResizeByIntel2019Mod_free(pOneSpec); ResizeByIntel2019Mod_free(pOneSpecLast); printf("IPP initialization error 3\n");//return error return dstImage; } } if (parallelMode == 1) { std::vector threads; threads.reserve(numThreads); for (int index = 0; index < numThreads; index++) { threads.emplace_back([&](int i) { BYTE* pCurrBuff = 0; int curIterPerThread = iters[i + 1] - iters[i]; for (int j = 0; j < curIterPerThread; j++) { int curIndex = iters[i] + j; int destTop = curIndex * tileheight; int destBottom = destTop + tileheight; int srcTop = curIndex * srctileheight; int srcBottom = srcTop + srctileheight; BYTE* pSpecCurr = pOneSpec; BYTE* pCurrBuff = pOneBuff + i * oneBuffSize; if (curIndex == numIters - 1) { destBottom = dstRows; srcBottom = Rows(); srcTop = srcBottom - srctileheightlast; pSpecCurr = pOneSpecLast; } SIZE srcSize = { Cols(), srcBottom - srcTop }; SIZE destSize = { dstCols, destBottom - destTop }; RECT srcRoi = { 0, 0, Cols(), srcBottom - srcTop }; int offset = destTop * dstStride; BYTE* pS = pSrcPixels + srcTop * srcStride; BYTE* pD = pDstPixels + offset; if (intelVersion == 0) { ResizeByIntel5_2(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation); } else { ResizeByIntel2019Mod_resize( pS, srcSize, srcStride, pD, destSize, dstStride, xScale, yScale, inerpolation, m_channels, curIndex, pSpecCurr, pCurrBuff); } } }, index); } for (auto& t : threads) { if (t.joinable()) t.join(); } } else { parallel_for(0, numThreads, [&](int i) // for (int i = 0; i < numThreads; i++) { BYTE* pCurrBuff = 0; int curIterPerThread = iters[i + 1] - iters[i]; for (int j = 0; j < curIterPerThread; j++) { int curIndex = iters[i] + j; int destTop = curIndex * tileheight; int destBottom = destTop + tileheight; int srcTop = curIndex * srctileheight; int srcBottom = srcTop + srctileheight; BYTE* pSpecCurr = pOneSpec; BYTE* pCurrBuff = pOneBuff + i * oneBuffSize; if (curIndex == numIters - 1) { destBottom = dstRows; srcBottom = Rows(); srcTop = srcBottom - srctileheightlast; pSpecCurr = pOneSpecLast; } SIZE srcSize = { Cols(), srcBottom - srcTop }; SIZE destSize = { dstCols, destBottom - destTop }; RECT srcRoi = { 0, 0, Cols(), srcBottom - srcTop }; int offset = destTop * dstStride; BYTE* pS = pSrcPixels + srcTop * srcStride; BYTE* pD = pDstPixels + offset; if (intelVersion == 0) { ResizeByIntel5_2(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation); } else { ResizeByIntel2019Mod_resize( pS, srcSize, srcStride, pD, destSize, dstStride, xScale, yScale, inerpolation, m_channels, curIndex, pSpecCurr, pCurrBuff); } } } ); } if (intelVersion > 0) { ResizeByIntel2019Mod_free(pOneSpec); ResizeByIntel2019Mod_free(pOneSpecLast); ResizeByIntel2019Mod_free(pOneBuff); } *elapseTime = GetTickCount() - start; delete[] iters; return dstImage; } CImage* CImage::ResizeMod2(float xScale, float yScale, int inerpolation, int numThreads, int intelVersion, int parallelMode, int* elapseTime) { CImage* dstImage = new CImage(); int dstCols = (int)(Cols() * xScale + 0.5); int dstRows = (int)(Rows() * yScale + 0.5); int tileheight = dstRows / numThreads; int tileheightlast = dstRows - tileheight * (numThreads - 1); double verRatio = (double)Rows() / (double)dstRows; dstImage->Create(dstCols, dstRows, m_channels); int dstStride = dstImage->Stride(); unsigned char* pSrcPixels = pData(); int srcStride = Stride(); unsigned char* pDstPixels = dstImage->pData(); int srctileheight = (int)(tileheight * verRatio + 0.5); int srctileheightlast = Rows() - srctileheight * (numThreads - 1); int start = GetTickCount(); BYTE* pOneSpec = 0; BYTE* pOneSpecLast = 0; BYTE* pOneBuff = 0; int oneBuffSize = 0; if (intelVersion > 0) { pOneSpec = ResizeByIntel2019Mod_initSpec(inerpolation, Cols(), srctileheight, dstCols, tileheight); if (pOneSpec == NULL) { printf("IPP initialization error 1\n");//return error return dstImage; } if (tileheightlast) { pOneSpecLast = ResizeByIntel2019Mod_initSpec(inerpolation, Cols(), srctileheightlast, dstCols, tileheightlast); if (pOneSpecLast == NULL) { ResizeByIntel2019Mod_free(pOneSpec); printf("IPP initialization error 2\n");//return error return dstImage; } } pOneBuff = ResizeByIntel2019Mod_allocBuffer(dstCols, tileheight, tileheightlast, m_channels, numThreads, pOneSpec, pOneSpecLast, &oneBuffSize); if ((pOneBuff == NULL) || (oneBuffSize == 0)) { ResizeByIntel2019Mod_free(pOneSpec); ResizeByIntel2019Mod_free(pOneSpecLast); printf("IPP initialization error 3\n");//return error return dstImage; } } if (parallelMode == 1) { std::vector threads; threads.reserve(numThreads); for (int index = 0; index < numThreads; index++) { threads.emplace_back([&](int i) { int curIndex = i; int destTop = curIndex * tileheight; int destBottom = destTop + tileheight; int srcTop = curIndex * srctileheight; int srcBottom = srcTop + srctileheight; BYTE* pSpecCurr = pOneSpec; BYTE* pCurrBuff = pOneBuff + i * oneBuffSize; if (curIndex == numThreads - 1) { destBottom = dstRows; srcBottom = Rows(); srcTop = srcBottom - srctileheightlast; pSpecCurr = pOneSpecLast; } SIZE srcSize = { Cols(), srcBottom - srcTop }; SIZE destSize = { dstCols, destBottom - destTop }; RECT srcRoi = { 0, 0, Cols(), srcBottom - srcTop }; int offset = destTop * dstStride; BYTE* pS = pSrcPixels + srcTop * srcStride; BYTE* pD = pDstPixels + offset; if (intelVersion == 0) { ResizeByIntel5_2(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation); } else { ResizeByIntel2019Mod_resize( pS, srcSize, srcStride, pD, destSize, dstStride, xScale, yScale, inerpolation, m_channels, curIndex, pSpecCurr, pCurrBuff); } }, index); } for (auto& t : threads) { if (t.joinable()) t.join(); } } else { parallel_for(0, numThreads, [&](int i) // for (int i = 0; i < numThreads; i++) { int curIndex = i; int destTop = curIndex * tileheight; int destBottom = destTop + tileheight; int srcTop = curIndex * srctileheight; int srcBottom = srcTop + srctileheight; BYTE* pSpecCurr = pOneSpec; BYTE* pCurrBuff = pOneBuff + i * oneBuffSize; if (curIndex == numThreads - 1) { destBottom = dstRows; srcBottom = Rows(); srcTop = srcBottom - srctileheightlast; pSpecCurr = pOneSpecLast; } SIZE srcSize = { Cols(), srcBottom - srcTop }; SIZE destSize = { dstCols, destBottom - destTop }; RECT srcRoi = { 0, 0, Cols(), srcBottom - srcTop }; int offset = destTop * dstStride; BYTE* pS = pSrcPixels + srcTop * srcStride; BYTE* pD = pDstPixels + offset; if (intelVersion == 0) { ResizeByIntel5_2(pS, srcSize, srcStride, srcRoi, pD, dstStride, destSize, xScale, 1 / verRatio, inerpolation); } else { ResizeByIntel2019Mod_resize( pS, srcSize, srcStride, pD, destSize, dstStride, xScale, yScale, inerpolation, m_channels, curIndex, pSpecCurr, pCurrBuff); } } ); } if (intelVersion > 0) { ResizeByIntel2019Mod_free(pOneSpec); ResizeByIntel2019Mod_free(pOneSpecLast); ResizeByIntel2019Mod_free(pOneBuff); } *elapseTime = GetTickCount() - start; return dstImage; }