- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
my code fails to run though they compile well. any help? see the snipet codes below
void tom::Transform(void* btr)
{
__declspec(align(16)) short* block =(short*)btr;
int j;
if(_mode != QuantOnly) ///< 2 = Q only.
{
__asm
{
mov eax,block // load first row
movq mm1, [eax] // load fist row
movq mm2, [eax+8] // load second row
movq mm3, [eax+16] // load the third row
movq mm4, [eax+24] //load the fourth row
// Step one
punpcklwd mm1, mm2
punpcklwd mm3, mm4
movq mm5, mm1
punpckldq mm1, mm3
punpckhdq mm5, mm3
movq [ecx], mm1
movq [ecx+8], mm5
movq mm1, [eax]
movq mm2, [eax+8]
movq mm3, [eax+16]
movq mm4, [eax+24]
// Step two
punpckhwd mm1, mm2
punpckhwd mm3, mm4
movq mm5, mm1
punpckldq mm1, mm3
punpckhdq mm5, mm3
movq [ecx+16], mm1
movq [ecx+24], mm5
// buterfly
mov eax,block
movq mm1, [ecx]
movq mm2, [ecx+8]
movq mm3, [ecx+16]
movq mm4, [ecx+24]
paddw mm1, mm4 // mm1 + mm4
paddw mm2, mm3 // mm2+ mm3
movq [eax], mm1
movq [eax+8], mm2
movq mm1, [ecx]
movq mm2, [ecx+8]
psubw mm2, mm3 // mm1 - mm2
psubw mm1, mm4 // mm0 - mm3
movq [eax+16], mm2
movq [eax+24], mm1
mov edx,block
movq mm1,[eax]
movq mm2,[eax+8]
movq mm3,[eax+16]
movq mm4,[eax+24]
paddw mm1, mm2// mm0 + mm1
psllw mm4,1 //mm4<<1
paddw mm3, mm4 // mm3+ mm4<<1
movq [edx], mm1
movq [edx+8], mm3
movq mm1,[eax]
movq mm2, [eax+8]
psubw mm1, mm2// mm0 - mm1
movq mm3, [eax+16]
movq mm4, [eax+24]
psllw mm3,1 //mm3<<1
psubw mm4, mm3 // mm4 - mm3<<1
movq [edx+16], mm1
movq [edx+24], mm4
emms
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
A program part that can be compiled may exhibit unsatisfied externals at link time, and execution errors at run-time.
Please provide a more complete description of what else goes into the program, which compiler was used, the compiler options in effect, the operating system, and what you mean by "fail to run".
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Only declares that the pointer "block" is aligned to 16 byte boundry
It does not declare, nor enforces, that what block points to is aligned (in this case what the void* btr points to). Your coding will not declare that btr must be aligned to 16 byte boundary.
However, since your asm code is only referencing memory via movq, the alignment requirement has bearing on your problem.
I suggest you step through the code and see what else is wrong.
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
You are using ECX as a pointer to an output buffer, but you are not setting it to any value beforehand.
This is obviously a C++ code so ECX should already contain this pointer, but:
1. I do not think that is guaranteed behavior -- IMO compiler is free to save/restore register values as it sees fit.
2. You did not show the class definition so your intention is not clear -- does class object have adequate buffer which starts at this address?

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page