; Header Section [BITS 64] [default rel] global Main_Entry_fn global ListComp_01_fn extern sched_getcpu extern fopen, fread, fwrite, fclose extern malloc, calloc, realloc, free extern posix_memalign extern get_core_count_C, thread_create_in_C extern Start_Clock, End_Clock, Print_Time extern open_shm, get_shm_name, remap_shm, unlink_shm extern site_begin, iter_task, site_end, itt_resume, itt_pause ; Advisor annotations section .data align=64 Return_Pointer_Array: dq 0, 0, 0 shm_lengths: times 4 dq 0 Clock_Stop: dq 0 numbers_ptr: dq 0 numbers_ctr: dq 0 numbers_length: dq 0 doubled_odds_ptr: dq 0 doubled_odds_ctr: dq 0 doubled_odds_length: dq 0 M_ptr: dq 0 M_ctr: dq 0 M_length: dq 0 collect_ptr: dq 0 collect_ctr: dq 0 collect_length: dq 0 input_array_ptr: dq 0 data_master_ptr: dq 0 Number_Of_Cores: dq 0 Number_Of_Cores_Seq: dq 0 Number_Of_Cores_Open: dq 0 Number_Of_Cores_Calc: dq 0 stride: dq 0 AVX_Width: dq 0 Start_Time_C: dq 0 writer_core: dq 0 out_fname_ptr: dq 0 shm_size: dq 0 shm_fd: dq 0 shm_fds_ptr: dq 0 shm_buffers_ptr: dq 0 shm_buffers_count: dq 0 core_count: dq 0 remap_new_ptr: dq 0 remap_new_len: dq 0 cores_to_use: dq 1 main_loop_ptr: dq 0 main_loop_length: dq 0 stack_lengths: dq 0 stack_ptrs: dq 0 pad_data_section: times 5 dq 0 section .rodata align=64 shm_base_name: db "shm_object_%d_%d",0x00 const_2: dq 2 pad_const_section: dq 0 CURR_DTM: db 1612890092.429779,0 out_fname_L: db "/opt/Test_Output_Files/List_Comp_01_NASM_Test",0x00 file_mode_create: db "wb+",0x00 file_mode_open: db "a",0x00 file_mode_open_read: db "r",0x00 ; _________ section .text Init_Cores_fn: %include "/opt/P01_SH/_Include_Utilities/Output_Files.asm" %include "/opt/P01_SH/_Include_Utilities/Init_Cores.asm" lea rdi,[shm_lengths] mov rax,[shm_size] mov [rdi+0],rax mov [rdi+8],rax mov [rdi+16],rax mov [rdi+24],rax mov rbx,4 %include "/opt/P01_SH/_Include_Utilities/POSIX_Shared_Memory_Multi.asm" ; Create Threads label_first: mov rdi,[Number_Of_Cores_Seq] ; see above call [rel thread_create_in_C wrt ..got] ; Create_Threads_in_C.c label_after: jmp label_900 ;____________ ListComp_01_fn: mov rbp,rsp sub rsp,128 ; Get the core number call [rel sched_getcpu wrt ..got] sub rax,[writer_core] mov rbx,8 mul rbx mov [rbp-8],rax mov rdi,[shm_buffers_ptr] ; Populate registers mov rax,[rdi+0] mov r15,rax xor r14,r14 mov rax,[rdi+8] mov r13,rax xor r12,r12 mov r11,[main_loop_ptr] xor r10,r10 mov rax,[rdi+16] mov r9,rax xor r8,r8 mov rax,400000 mov [shm_size],rax ; Populate stack xor rcx,rcx push r10 lea r10,[shm_lengths] mov rbx,[rdi+24] ; M_ptr mov [rbp-16],rbx mov [rbp-24],rcx ; M_ctr mov rbx,[r10+0] mov [rbp-32],rbx ; collect_length mov rbx,[r10+8] mov [rbp-40],rbx ; numbers_length mov rbx,[shm_size] mov [rbp-48],rbx ; main_loop_length mov rbx,[r10+16] mov [rbp-56],rbx ; doubled_odds_length mov rbx,[r10+24] mov [rbp-64],rbx ; M_length pop r10 mov rax,65535 kmovq k7,rax call_starts: %include "/opt/P01_SH/_Include_Utilities/Registers_Push_NoAVX.asm" call [rel itt_resume wrt ..got] ; Intel Advisor call [rel site_begin wrt ..got] ; Intel Advisor %include "/opt/P01_SH/_Include_Utilities/Registers_Pop_NoAVX.asm" label_401: %include "/opt/P01_SH/_Include_Utilities/Registers_Push_NoAVX.asm" call [rel iter_task wrt ..got] ; Intel Advisor %include "/opt/P01_SH/_Include_Utilities/Registers_Pop_NoAVX.asm" xor rcx,rcx push r8 mov r8,10 Range_401_Loop: ;______ mov rax,rcx mov rbx,rcx mul rbx ;______ mov [r13+r12],rax return_label_401: add r12,8 add rcx,1 cmp rcx,r8 jl Range_401_Loop mov [rbp-40],r12 pop r8 ;______ label_4020: xor r12,r12 ; _____ label_402: mov rax,[r13+r12] label_8010: label_801: bt rax,0 jnc return_label_4020 ;______ mov rbx,2 mul rbx ;______ mov [r9+r8],rax add r8,8 return_label_4020: add r12,8 cmp r12,[rbp-40] jl label_402 mov [rbp-56],r8 ;______ label_4030: xor r12,r12 ; _____ label_403: mov rax,[r13+r12] ;______ label_8020: label_802: bt rax,0 jc return_label_4030 ;______ push r8 mov r8,[rbp-16] push r9 mov r9,[rbp-24] mov [r8+r9],rax pop r9 pop r8 add qword[rbp-24],8 return_label_4030: add r12,8 cmp r12,[rbp-40] jl label_403 mov rax,[rbp-24] mov [rbp-64],rax ;______ label_4040: mov rax,[rbp-56] mov rax,[rbp-64] xor r10,r10 ; _____ label_404: movsd xmm0,[r11+r10] label_8030: xor r8,r8 label_803: mov rcx,[r9+r8] ;______ label_12010: xor rbx,rbx mov [rbp-24],rbx ; _____ label_1201: push r8 mov r8,[rbp-16] push r9 mov r9,[rbp-24] mov rbx,[r8+r9] pop r9 pop r8 ;______ cvtsi2sd xmm0,rcx vmovsd [r15+r14],xmm0 add r14,8 cmp r14,[rbp-32] jl next_26 next_26: ;______ cvtsi2sd xmm0,rbx vmovsd [r15+r14],xmm0 add r14,8 cmp r14,[rbp-32] jl next_27 next_27: mov rax,[rbp-24] add rax,8 mov [rbp-24],rax cmp rax,[rbp-64] jl label_1201 add r8,8 cmp r8,[rbp-56] jl label_803 return_to_top: add r10,8 cmp r10,[rbp-48] jl label_404 ; __________ label_899: call [rel site_end wrt ..got] ; Intel Advisor %include "/opt/P01_SH/_Include_Utilities/Label_899_Clock_Stop.asm" mov [collect_ptr],r15 mov [collect_length],r14 add rsp,128 ret ; __________ label_900: %include "/opt/P01_SH/_Include_Utilities/Label_900_SglCore-SglOutput.asm" mov rax,8 %include "/opt/P01_SH/_Include_Utilities/POSIX_Shared_Memory_Delete.asm" mov rdi,Return_Pointer_Array mov rax,rdi ret ;__________ ; Main Entry Main_Entry_fn: push rdi push rsi call [rel Start_Clock wrt ..got] mov [Start_Time_C],rax pop rsi pop rdi push rdi push rbp push rbx mov [input_array_ptr],rdi mov [data_master_ptr],rsi mov rax,[rdi+0] mov [main_loop_ptr],rax mov rax,[rsi+0] mov [main_loop_length],rax mov rax,40000000 mov [shm_size],rax mov [collect_length],rax ; __________ call Init_Cores_fn pop rbx pop rbp pop rdi ret remap_collect: %include "/opt/P01_SH/_Include_Utilities/Registers_Push_NoAVX.asm" mov rdi,r15 mov rsi,[rbp-32] %include "/opt/P01_SH/_Include_Utilities/Remap_Collect-SglOutput.asm" %include "/opt/P01_SH/_Include_Utilities/Registers_Pop_NoAVX.asm" mov r15,[remap_new_ptr] mov [collect_ptr],r15 mov r15,[remap_new_len] mov [rbp-32],r15 ret