Developing Games on Intel Graphics
If you are gaming on graphics integrated in your Intel Processor, this is the place for you! Find answers to your questions or post your issues with PC games
492 Discussions

Intel UHD 设备上使用compute shader 将RGB转YUV 输出无效 显示绿屏

OlderW
Novice
3,901 Views

The application developed by Unity engine uses computer shader to convert RGB images into YUV data。

Compute Shader code

 

Texture2D<float4> InputTexture;
RWBuffer<uint> OutputBuffer;
bool useLinear;
#define GAMA_1 0.45

uint CalcValueY(uint yIndex, uint width)
{
	uint3 tid;
	tid.x = yIndex % width;
	tid.y = yIndex / width;
	vector fRGB = InputTexture[float2(tid.x,tid.y)];
	if (useLinear)
	{
	    fRGB.r = pow(abs(fRGB.r), GAMA_1);
	    fRGB.g = pow(abs(fRGB.g), GAMA_1);
	    fRGB.b = pow(abs(fRGB.b), GAMA_1);
	}
	
	uint rrr = fRGB.r * 255;
	uint ggg = fRGB.g * 255;
	uint bbb = fRGB.b * 255;
	uint ret = (0.299 * rrr + 0.587 * ggg + 0.114 * bbb);
	return ret;
}

uint CalcValueU(uint uIndex, uint width)
{
	uint3 tid;
	tid.x = (uIndex % (width / 2)) * 2;
	tid.y = (uIndex / (width / 2)) * 2;
	vector fRGB = InputTexture[float2(tid.x,tid.y)];
	if (useLinear)
	{
	    fRGB.r = pow(abs(fRGB.r), GAMA_1);
	    fRGB.g = pow(abs(fRGB.g), GAMA_1);
	    fRGB.b = pow(abs(fRGB.b), GAMA_1);
	}
	
	uint rrr = fRGB.r * 255;
	uint ggg = fRGB.g * 255;
	uint bbb = fRGB.b * 255;
    uint ret = (-0.169 * rrr - 0.331 * ggg + 0.5 * bbb + 128);
	return ret;
}

uint CalcValueV(uint vIndex, uint width)
{
	uint3 tid;
	tid.x = (vIndex % (width / 2)) * 2;
	tid.y = (vIndex / (width / 2)) * 2 + 1;
	vector fRGB = InputTexture[float2(tid.x,tid.y)];
	if (useLinear)
	{
	    fRGB.r = pow(abs(fRGB.r), GAMA_1);
	    fRGB.g = pow(abs(fRGB.g), GAMA_1);
	    fRGB.b = pow(abs(fRGB.b), GAMA_1);
	}
	
	uint rrr = fRGB.r * 255;
	uint ggg = fRGB.g * 255;
	uint bbb = fRGB.b * 255;
    uint ret = (0.5 * rrr - 0.419 * ggg - 0.081 * bbb + 128);
	return ret;
}

[numthreads(8, 8, 1)]
void ReadYUV420Data_Reverse(uint3 id : SV_DispatchThreadID)
{
	uint w, h;
	InputTexture.GetDimensions(w, h);
	uint ySize = w * h / 4;				
	uint index = (id.x + id.y * w);		
	
	if (index < ySize)	
	{
		uint yIndex = index * 4;
		uint yy0 = CalcValueY(yIndex, w);
		uint yy1 = CalcValueY(yIndex + 1, w);
		uint yy2 = CalcValueY(yIndex + 2, w);
		uint yy3 = CalcValueY(yIndex + 3, w);
		
		uint yyy = (yy0 & 0x000000ff) | ((yy1<<8) & 0x0000ff00) | ((yy2<<16) & 0x00ff0000) | ((yy3<<24) & 0xff000000);
		OutputBuffer[index] = yyy;
	}
	else if ((index >= ySize) && index < (ySize * 5 / 4))		
	{
		uint uIndex = index * 4 - ySize * 4;
		uint uu0 = CalcValueU(uIndex, w);
		uint uu1 = CalcValueU(uIndex + 1, w);
		uint uu2 = CalcValueU(uIndex + 2, w);
		uint uu3 = CalcValueU(uIndex + 3, w);
		
		uint uuu = (uu0 & 0x000000ff) | ((uu1<<8) & 0x0000ff00) | ((uu2<<16) & 0x00ff0000) | ((uu3<<24) & 0xff000000);
		OutputBuffer[index] = uuu;
	}
	else if ((index >= (ySize * 5 / 4)) && index < (ySize * 3 / 2))		//V分量
	{
		uint vIndex = index * 4 - ySize * 5;
		uint vv0 = CalcValueV(vIndex, w);
		uint vv1 = CalcValueV(vIndex + 1, w);
		uint vv2 = CalcValueV(vIndex + 2, w);
		uint vv3 = CalcValueV(vIndex + 3, w);
		
		uint vvv = (vv0 & 0x000000ff) | ((vv1<<8) & 0x0000ff00) | ((vv2<<16) & 0x00ff0000) | ((vv3<<24) & 0xff000000);
		OutputBuffer[index] = vvv;
	}
	else if (index >= (ySize * 3 / 2))
	{
		return;
	}
}

 

C# call code:

private byte[] GetYUVData(int width,int height) 
        {
            byteSize = width * height * 3 / 2 ;
            uintSize = byteSize / 4;

            if (m_outputBuffer == null)
            {
                m_outputBuffer = new ComputeBuffer(uintSize, sizeof(uint));

                if (m_yuv420RawData == null)
                {
                    m_yuv420RawData = new uint[uintSize];
                }
                m_outputBuffer.SetData(m_yuv420RawData);
            }

            if (m_yuv420RawData ==null || uintSize != m_yuv420RawData.Length)
            {
                if (m_outputBuffer != null)
                {
                    m_outputBuffer.Release();
                    m_outputBuffer = null;
                }
                m_outputBuffer = new ComputeBuffer(uintSize, sizeof(uint));

                m_outputBuffer.SetData(m_yuv420RawData);
            }


            int handle = shader.FindKernel("ReadYUV420Data_Reverse");

            if (rt != null)
            {
                rt.DiscardContents();

                if (rt.width != width || rt.height != height)
                {

                    RenderTexture.ReleaseTemporary(rt);
                    rt = null;
                }
            }

            if (rt == null )
            {
                rt = RenderTexture.GetTemporary(width,height,24, RenderTextureFormat.Default, RenderTextureReadWrite.Default);
                rt.enableRandomWrite= true;
                rt.name = "ScreenFrameLiving";
            }

            ScreenCapture.CaptureScreenshotIntoRenderTexture(rt);

          

            shader.SetTexture(handle, "InputTexture", rt);
            shader.SetBuffer(handle, "OutputBuffer", m_outputBuffer);
            shader.SetBool("useLinear", true);

            shader.Dispatch(handle, width / 8, height / 8, 1);

            if (result == null || result.Length != byteSize)
            {
                result = new byte[byteSize];
            }

            m_outputBuffer.GetData(m_yuv420RawData);

            MeetingNativeBridge.CopyUintToByte(m_yuv420RawData, byteSize, result);
            
            return result;
        }

This code can obtain data normally on NVIDIA device, but on Intel UHD Graphics 770 device with ComputeBuffer.GetData failed to obtain valid data. And save tests by ScreenCapture. CaptureScreenshotIntoRenderTexture interface to get to the rt is normal pictures, and rt is 1920 * 1080 pixels.

What causes this and how can I fix it?

Labels (4)
0 Kudos
1 Solution
Arturo_Intel
Moderator
3,653 Views

Thank you for the info @OlderW !

I have news, I found the problem and the fix is really simple

You just need to change the type of the OutputBuffer in your shader. from RWbuffer to RWStructuredBuffer.

RWbuffer is basically a 1D texture, the reason for this is in the past (DX10 times)  the HW did not allow any random access to the memory except via the texturing unit, so it was very common to use the 1D texture as a buffer.

I asume, Nvidia API/Drivers change the type of the buffer to a structured one automatically when the call arrives to the HW. 

 

With this change it will work perfectly in both HW. 

Please try it and let me know how it goes.

 

 

View solution in original post

11 Replies
karen_intel
Moderator
3,848 Views

Hi @OlderW  thank you for posting your findings

Questions: Can you help us share your SSU log and the Unity version you're using?

That way we can start debugging this behavior along with you. We will start investigating on our end, but please send us the info.

Talk to you soon

Karen

0 Kudos
karen_intel
Moderator
3,812 Views

BTW @OlderW if you could share your Unity project that will help us a lot

Hope to hear from you soon!

Karen

0 Kudos
OlderW
Novice
3,735 Views
0 Kudos
karen_intel
Moderator
3,711 Views

Hi @OlderW 
Thank you for the project  
I have already tested it in Unity, but I couldn't find the version you mention (2021.3.4f1c1) it says it is not a valid version.
However I could test with Unity v.2021.3.9f1 and I'm not getting the ComputeBuffer.GetData failed to obtain valid data
In the following screenshot you can see the tests were saved by screen capture in both .bmp and .png
I am using a 13900K desktop machine + ARC 770 + v 4255 Graphics driver
It was also tested using the UHD 770 iGPU with the same graphics driver and I did get output on both scenarios.

Unity SS Output with Intel Gfx v.4255Unity SS Output with Intel Gfx v.4255

 

Can you try with this Unity + driver combo? Please let me know if this is the expected output or what else can I do to help

Talk to you soon

Karen

0 Kudos
OlderW
Novice
3,693 Views

Hi @karen_intel 

The display adapter for the faulty device is UHD Graphics only.

image.png

 

I have tested that there are devices like the following that do not make mistakes.

image2.png

0 Kudos
OlderW
Novice
3,691 Views

You can use the following steps to get the same version of Unity, but I don't think it has much to do with the version of Unity.

image11.png

image12.png

image13.png

0 Kudos
OlderW
Novice
3,801 Views

Hi @karen_intel I'm sorry to reply you so late.

I can't use the equipment these days. I will provide relevant log information as required by you next week.

Unity uses version 2021.3.4f1c1.

W

Arturo_Intel
Moderator
3,673 Views

hey @OlderW thanks for the clarification on the actual issue.

We were able to reproduce this problem in our lab. I was reviewing the project code you share and debug it all day yesterday. Here are my findings:

The PNG creation is working on both GPUs, meaning that the ScreenCapture data is ok.

I modify a little bit the shader to use the c float definition (X.XXf), but still the problem comes up.

I assume then the problem was in the BMP creation itself, so I create a c++ program to generate from 0 (no 3rd party libs) a BMP image, that worked as intended. No problem in the BMP creation.

The only thing i could not verify/debug properly was the NDLivingModule Package (LiveAdapt.dll) used to convert the uint[] ComputerBuffer object (m_yv420RawData) to the byte[ ] result object. I didnt find any documentation online.

The lib could be using Nvidia API to do the convertions. 

I also need to verify if the data obtained from the computer buffer object is the same in both cases.

I am also working on create a convertor from uint to byte from scratch, maybe that would help.

 

Keep you post!

 

 

 

 

0 Kudos
OlderW
Novice
3,661 Views

@Arturo_Intel  I hope the following information can help you again.
LiveAdapt is my encapsulation library,I just called the memory copy.

You can also modify to call your own C++ library for verification.

1.png

0 Kudos
Arturo_Intel
Moderator
3,654 Views

Thank you for the info @OlderW !

I have news, I found the problem and the fix is really simple

You just need to change the type of the OutputBuffer in your shader. from RWbuffer to RWStructuredBuffer.

RWbuffer is basically a 1D texture, the reason for this is in the past (DX10 times)  the HW did not allow any random access to the memory except via the texturing unit, so it was very common to use the 1D texture as a buffer.

I asume, Nvidia API/Drivers change the type of the buffer to a structured one automatically when the call arrives to the HW. 

 

With this change it will work perfectly in both HW. 

Please try it and let me know how it goes.

 

 

OlderW
Novice
3,585 Views

@Arturo_Intel According to your modification, solved the problem!

I will learn the difference between RWStructuredBuffer and RWbuffer.

Thank you for your help.

 

0 Kudos
Reply