- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Can someone to help me with the next problem???
I have the following code for SSE with GCC compiler.
tsh=(short int *)memalign(w*h*sizeof(short int), 16);
//tsh is previously filled up with another function
tshv=(short int *)memalign(w*tableH*sizeof(short int), 16);
for (i=0; i{
asm volatile (
"movdqa (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"movdqa %%xmm0,(%2) "
:
: "r" (tsh+i), "r" (2*w), "r" (tshv+i));
for (j=5; j
asm volatile (
"paddw (%0),%%xmm0 "
"psubw (%1),%%xmm0 "
"movdqa %%xmm0,(%2) "
:
: "r" (tsh+j*w+i), "r" (tsh+(j-5)*w+i), "r" (tshv+(j-4)*w+i));
}//END loop for(first)
I want to prepare this code to the intel compiler. I have done the next code.
int tsh1, tsh2, tshv, mul;
tsh=(short int *)memalign(w*h*sizeof(short int), 16);
//tsh is previously filled up with another function
tshv=(short int *)memalign(w*tableH*sizeof(short int), 16);
mul=2*w;//w is int
for (i=0; i{
__asm {
mov eax, tsh
mov ebx, tshv
}
tsh1=i;
tshv=i;
__asm {
add eax, tsh1
add ebx, tshv
mov ecx, eax
movdqa xmm0, [eax]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
movdqa [ebx], xmm0
}
for (j=5; j
{
__asm {
mov eax, tsh1
mov ebx, tsh2
mov ecx, tshv
}
tsh1=j*w+i;
tsh2=(j-5)*w+i;
tshv1=(j-4)*w+i;
__asm {
add eax, tsh1
add ebx, tsh2
add ecx, tshv
paddw xmm0, [eax]
psubw xmm0, [ebx]
movdqa [ecx], xmm0
}
}
}
The problem es to charge vectors tsh and tshv,Im becoming crazy because I dont know how to select the position that I want with this code format. The lines that I dont know how to convert it are the followings:
: "r" (tsh+i), "r" (2*w), "r" (tshv+i));
: "r" (tsh+j*w+i), "r" (tsh+(j-5)*w+i), "r"(tshv+(j-4)*w+i));
How can I convert that lines to intel compiler code?
Thanks.
Can someone to help me with the next problem???
I have the following code for SSE with GCC compiler.
tsh=(short int *)memalign(w*h*sizeof(short int), 16);
//tsh is previously filled up with another function
tshv=(short int *)memalign(w*tableH*sizeof(short int), 16);
for (i=0; i{
asm volatile (
"movdqa (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"addl %1,%0 "
"paddw (%0),%%xmm0 "
"movdqa %%xmm0,(%2) "
:
: "r" (tsh+i), "r" (2*w), "r" (tshv+i));
for (j=5; j
asm volatile (
"paddw (%0),%%xmm0 "
"psubw (%1),%%xmm0 "
"movdqa %%xmm0,(%2) "
:
: "r" (tsh+j*w+i), "r" (tsh+(j-5)*w+i), "r" (tshv+(j-4)*w+i));
}//END loop for(first)
I want to prepare this code to the intel compiler. I have done the next code.
int tsh1, tsh2, tshv, mul;
tsh=(short int *)memalign(w*h*sizeof(short int), 16);
//tsh is previously filled up with another function
tshv=(short int *)memalign(w*tableH*sizeof(short int), 16);
mul=2*w;//w is int
for (i=0; i{
__asm {
mov eax, tsh
mov ebx, tshv
}
tsh1=i;
tshv=i;
__asm {
add eax, tsh1
add ebx, tshv
mov ecx, eax
movdqa xmm0, [eax]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
add ecx, mul
paddw xmm0, [ecx]
movdqa [ebx], xmm0
}
for (j=5; j
{
__asm {
mov eax, tsh1
mov ebx, tsh2
mov ecx, tshv
}
tsh1=j*w+i;
tsh2=(j-5)*w+i;
tshv1=(j-4)*w+i;
__asm {
add eax, tsh1
add ebx, tsh2
add ecx, tshv
paddw xmm0, [eax]
psubw xmm0, [ebx]
movdqa [ecx], xmm0
}
}
}
The problem es to charge vectors tsh and tshv,Im becoming crazy because I dont know how to select the position that I want with this code format. The lines that I dont know how to convert it are the followings:
: "r" (tsh+i), "r" (2*w), "r" (tshv+i));
: "r" (tsh+j*w+i), "r" (tsh+(j-5)*w+i), "r"(tshv+(j-4)*w+i));
How can I convert that lines to intel compiler code?
Thanks.
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The code is not clear because the web dont leave me to put it in the correct form. I give the next file with a clear version of the code.
Thanks
Thanks
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page