- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I am working on DPC++ on CPU backend.
I have the following function:
void Evaluate_Kernel(Creature *creatures, double *fitness_function,
uint32_t count, uint32_t num_vars,
sycl::nd_item<3> item_ct1) {
double fitness = 0;
for (int j = 0; j < num_vars; j++) {
double pow = 127;
for (int k = 0; k < j + 1; k++) {
pow *= 7.3;
printf("add\n");
}
fitness += pow;
}
printf("fitness: %f\n", fitness);
}
And when I see the dumped code, it has:
0000000000407a60 <_Z15Evaluate_KernelP8CreaturePdjjN2cl4sycl7nd_itemILi3EEE>:
407a60: 55 push %rbp
407a61: 48 89 e5 mov %rsp,%rbp
407a64: 48 83 ec 40 sub $0x40,%rsp
407a68: 48 89 7d f8 mov %rdi,-0x8(%rbp)
407a6c: 48 89 75 f0 mov %rsi,-0x10(%rbp)
407a70: 89 55 ec mov %edx,-0x14(%rbp)
407a73: 89 4d e8 mov %ecx,-0x18(%rbp)
407a76: 0f 57 c0 xorps %xmm0,%xmm0
407a79: f2 0f 11 45 e0 movsd %xmm0,-0x20(%rbp)
407a7e: c7 45 dc 00 00 00 00 movl $0x0,-0x24(%rbp)
407a85: 8b 45 dc mov -0x24(%rbp),%eax
407a88: 3b 45 e8 cmp -0x18(%rbp),%eax
407a8b: 0f 83 73 00 00 00 jae 407b04 <_Z15Evaluate_KernelP8CreaturePdjjN2cl4sycl7nd_itemILi3EEE+0xa4>
407a91: f2 0f 10 05 87 16 02 movsd 0x21687(%rip),%xmm0 # 429120 <_ZTI5Timer+0x10>
407a98: 00
407a99: f2 0f 11 45 d0 movsd %xmm0,-0x30(%rbp)
407a9e: c7 45 cc 00 00 00 00 movl $0x0,-0x34(%rbp)
407aa5: 8b 45 cc mov -0x34(%rbp),%eax
407aa8: 8b 4d dc mov -0x24(%rbp),%ecx
407aab: 83 c1 01 add $0x1,%ecx
407aae: 39 c8 cmp %ecx,%eax
407ab0: 0f 8d 31 00 00 00 jge 407ae7 <_Z15Evaluate_KernelP8CreaturePdjjN2cl4sycl7nd_itemILi3EEE+0x87>
407ab6: f2 0f 10 05 6a 16 02 movsd 0x2166a(%rip),%xmm0 # 429128 <_ZTI5Timer+0x18>
407abd: 00
407abe: f2 0f 59 45 d0 mulsd -0x30(%rbp),%xmm0
407ac3: f2 0f 11 45 d0 movsd %xmm0,-0x30(%rbp)
407ac8: 48 bf 70 95 42 00 00 movabs $0x429570,%rdi
407acf: 00 00 00
407ad2: b0 00 mov $0x0,%al
407ad4: e8 97 c5 ff ff callq 404070 <printf@plt>
407ad9: 8b 45 cc mov -0x34(%rbp),%eax
407adc: 83 c0 01 add $0x1,%eax
407adf: 89 45 cc mov %eax,-0x34(%rbp)
407ae2: e9 be ff ff ff jmpq 407aa5 <_Z15Evaluate_KernelP8CreaturePdjjN2cl4sycl7nd_itemILi3EEE+0x45>
407ae7: f2 0f 10 45 d0 movsd -0x30(%rbp),%xmm0
407aec: f2 0f 58 45 e0 addsd -0x20(%rbp),%xmm0
407af1: f2 0f 11 45 e0 movsd %xmm0,-0x20(%rbp)
407af6: 8b 45 dc mov -0x24(%rbp),%eax
407af9: 83 c0 01 add $0x1,%eax
407afc: 89 45 dc mov %eax,-0x24(%rbp)
407aff: e9 81 ff ff ff jmpq 407a85 <_Z15Evaluate_KernelP8CreaturePdjjN2cl4sycl7nd_itemILi3EEE+0x25>
407b04: f2 0f 10 45 e0 movsd -0x20(%rbp),%xmm0
407b09: 48 bf 75 95 42 00 00 movabs $0x429575,%rdi
407b10: 00 00 00
407b13: b0 01 mov $0x1,%al
407b15: e8 56 c5 ff ff callq 404070 <printf@plt>
407b1a: 48 83 c4 40 add $0x40,%rsp
407b1e: 5d pop %rbp
407b1f: c3 retq
I am quite confused as I cannot understand what the optimized instructions do. Would anyone please give me some hints of how to understand the optimized code?
Thanks
Link Copied
0 Replies
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page