alex@thor|~/mpi_mic> setenv I_MPI_MIC 1 alex@thor|~/mpi_mic> setenv I_MPI_MIC_PREFIX $I_MPI_ROOT/mic/bin/ alex@thor|~/mpi_mic> setenv I_MPI_DEBUG 100 alex@thor|~/mpi_mic> setenv I_MPI_FABRICS shm:tcp alex@thor|~/mpi_mic> mpirun -ppn 1 -n 2 -hosts thor,mic0 IMB-MPI1 pingpong [0] MPI startup(): Intel(R) MPI Library, Version 5.0 Update 3 Build 20150128 (build id: 11250) [0] MPI startup(): Copyright (C) 2003-2015 Intel Corporation. All rights reserved. [0] MPI startup(): Multi-threaded optimized library [0] MPID_nem_impi_create_numa_nodes_map(): NUMA map->self_id = 0 [0] MPID_nem_impi_create_numa_nodes_map(): NUMA map->devices_num = 2 [0] MPID_nem_impi_create_numa_nodes_map(): NUMA map->nodes_num = 2 [0] MPID_nem_impi_create_numa_nodes_map(): NUMA map->devices: [0] MPID_nem_impi_create_numa_nodes_map(): mic0:1 [0] MPID_nem_impi_create_numa_nodes_map(): mic1:1 [0] MPID_nem_impi_create_numa_nodes_map(): NUMA map->distances: [0] MPID_nem_impi_create_numa_nodes_map(): 0 -> 0 = 10 [0] MPID_nem_impi_create_numa_nodes_map(): 0 -> 1 = 21 [1] MPID_nem_impi_create_numa_nodes_map(): Fetching extra numa information from /etc/ofed-mic.map [0] MPI startup(): shm and tcp data transfer modes [1] MPI startup(): shm and tcp data transfer modes [1] MPI startup(): Recognition mode: 2, selected platform: 64 own platform: 64 [0] MPI startup(): Recognition mode: 2, selected platform: 64 own platform: 16 [0] MPI startup(): Device_reset_idx=1 [0] MPI startup(): Allgather: 1: 0-2147483647 & 0-2 [0] MPI startup(): Allgather: 1: 0-8192 & 0-2147483647 [0] MPI startup(): Allgather: 1: 0-131072 & 0-4 [0] MPI startup(): Allgather: 3: 0-2147483647 & 0-2147483647 [0] MPI startup(): Allgatherv: 0: 0-2147483647 & 0-2147483647 [0] MPI startup(): Allreduce: 0: 4194300-2147483647 & 0-3 [0] MPI startup(): Allreduce: 1: 0-1024 & 0-2147483647 [0] MPI startup(): Allreduce: 1: 0-2147483647 & 0-2 [0] MPI startup(): Allreduce: 1: 0-16384 & 0-4 [0] MPI startup(): Allreduce: 1: 0-8182 & 0-8 [0] MPI startup(): Allreduce: 1: 0-4096 & 0-16 [0] MPI startup(): Allreduce: 2: 0-2147483647 & 0-2147483647 [0] MPI startup(): Alltoall: 1: 0-16 & 9-2147483647 [0] MPI startup(): Alltoall: 1: 0-32 & 17-2147483647 [0] MPI startup(): Alltoall: 1: 4097-16384 & 0-2 [0] MPI startup(): Alltoall: 2: 0-2147483647 & 0-2 [0] MPI startup(): Alltoall: 2: 0-8192 & 0-2147483647 [0] MPI startup(): Alltoall: 2: 0-32768 & 0-16 [0] MPI startup(): Alltoall: 2: 0-262144 & 0-8 [0] MPI startup(): Alltoall: 2: 1048576-2147483647 & 0-4 [0] MPI startup(): Alltoall: 4: 32768-2147483647 & 3-16 [0] MPI startup(): Alltoall: 3: 0-2147483647 & 0-2147483647 [0] MPI startup(): Alltoallv: 1: 0-2147483647 & 0-2147483647 [0] MPI startup(): Alltoallw: 0: 0-2147483647 & 0-2147483647 [0] MPI startup(): Barrier: 1: 0-2147483647 & 0-2 [0] MPI startup(): Barrier: 2: 0-2147483647 & 0-2147483647 [0] MPI startup(): Bcast: 1: 0-2147483647 & 0-2 [0] MPI startup(): Bcast: 1: 0-1024 & 0-2147483647 [0] MPI startup(): Bcast: 1: 0-8192 & 0-4 [0] MPI startup(): Bcast: 7: 0-2147483647 & 0-2147483647 [0] MPI startup(): Exscan: 0: 0-2147483647 & 0-2147483647 [0] MPI startup(): Gather: 1: 0-1048576 & 0-2 [0] MPI startup(): Gather: 1: 262145-1048576 & 9-16 [0] MPI startup(): Gather: 3: 0-1024 & 0-2147483647 [0] MPI startup(): Gather: 0: 0-2147483647 & 0-2147483647 [0] MPI startup(): Gatherv: 1: 0-2147483647 & 0-2147483647 [0] MPI startup(): Reduce_scatter: 1: 0-32768 & 0-2147483647 [0] MPI startup(): Reduce_scatter: 1: 0-65536 & 0-2 [0] MPI startup(): Reduce_scatter: 1: 0-65536 & 9-2147483647 [0] MPI startup(): Reduce_scatter: 2: 0-2147483647 & 0-2147483647 [0] MPI startup(): Reduce: 1: 0-2147483647 & 0-2147483647 [0] MPI startup(): Scan: 0: 0-2147483647 & 0-2147483647 [0] MPI startup(): Scatter: 1: 0-524288 & 0-2 [0] MPI startup(): Scatter: 3: 0-1024 & 0-2147483647 [0] MPI startup(): Scatter: 2: 0-2147483647 & 0-2147483647 [0] MPI startup(): Scatterv: 1: 0-2147483647 & 0-2147483647 [0] MPI startup(): Rank Pid Node name Pin cpu [0] MPI startup(): 0 25433 thor {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23} [0] MPI startup(): 1 7658 thor-mic0 {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29, 30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56 ,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,8 3,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,10 7,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,12 7,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,14 7,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,16 7,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,18 7,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,20 7,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,22 7} [0] MPI startup(): Recognition=2 Platform(code=64 ippn=0 dev=5) Fabric(intra=1 inter=6 flags=0x0) [0] MPI startup(): Topology split mode = 1 | rank | node | space=2 | 0 | 0 | | 1 | 1 | [0] MPI startup(): I_MPI_DEBUG=100 [0] MPI startup(): I_MPI_FABRICS=shm:tcp [1] MPI startup(): Recognition=2 Platform(code=64 ippn=0 dev=5) Fabric(intra=1 inter=6 flags=0x0) [0] MPI startup(): I_MPI_INFO_BRAND=Intel(R) Xeon(R) [0] MPI startup(): I_MPI_INFO_CACHE1=0,1,2,3,4,5,16,17,18,19,20,21,0,1,2,3,4,5,16,17,18,19,20,21 [0] MPI startup(): I_MPI_INFO_CACHE2=0,1,2,3,4,5,16,17,18,19,20,21,0,1,2,3,4,5,16,17,18,19,20,21 [0] MPI startup(): I_MPI_INFO_CACHE3=0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1 [0] MPI startup(): I_MPI_INFO_CACHES=3 [0] MPI startup(): I_MPI_INFO_CACHE_SHARE=2,2,32 [0] MPI startup(): I_MPI_INFO_CACHE_SIZE=32768,262144,15728640 [0] MPI startup(): I_MPI_INFO_CORE=0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5 [0] MPI startup(): I_MPI_INFO_C_NAME=Unknown [0] MPI startup(): I_MPI_INFO_DESC=1342177285 [0] MPI startup(): I_MPI_INFO_FLGB=641 [0] MPI startup(): I_MPI_INFO_FLGC=2143216639 [0] MPI startup(): I_MPI_INFO_FLGD=-1075053569 [0] MPI startup(): I_MPI_INFO_LCPU=24 [0] MPI startup(): I_MPI_INFO_MODE=775 [0] MPI startup(): I_MPI_INFO_NUMA_NODE_DIST=10,21,21,10 [0] MPI startup(): I_MPI_INFO_NUMA_NODE_MAP=mic0:1,mic1:1 [0] MPI startup(): I_MPI_INFO_NUMA_NODE_NUM=2 [0] MPI startup(): I_MPI_INFO_PACK=0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,1,1,1,1,1,1 [0] MPI startup(): I_MPI_INFO_SERIAL=E5-2620 v2 [0] MPI startup(): I_MPI_INFO_SIGN=198372 [0] MPI startup(): I_MPI_INFO_STATE=0 [0] MPI startup(): I_MPI_INFO_THREAD=0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 [0] MPI startup(): I_MPI_INFO_VEND=1 [0] MPI startup(): I_MPI_MIC=1 [0] MPI startup(): I_MPI_PIN_INFO=x0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 [0] MPI startup(): I_MPI_PIN_MAPPING=1:0 0 benchmarks to run pingpong #------------------------------------------------------------ # Intel (R) MPI Benchmarks 4.0 Update 1, MPI-1 part #------------------------------------------------------------ # Date : Mon Jan 18 13:14:59 2016 # Machine : x86_64 # System : Linux # Release : 3.10.0-327.4.4.el7.x86_64 # Version : #1 SMP Tue Jan 5 16:07:00 UTC 2016 # MPI Version : 3.0 # MPI Thread Environment: # New default behavior from Version 3.2 on: # the number of iterations per message size is cut down # dynamically when a certain run time (per message size sample) # is expected to be exceeded. Time limit is defined by variable # "SECS_PER_SAMPLE" (=> IMB_settings.h) # or through the flag => -time # Calling sequence was: # IMB-MPI1 pingpong # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 215.20 0.00 1 1000 185.85 0.01 2 1000 126.51 0.02 4 1000 179.30 0.02 8 1000 128.98 0.06 16 1000 158.92 0.10 32 1000 130.78 0.23 64 1000 186.40 0.33 128 1000 261.80 0.47 256 1000 270.87 0.90 512 1000 248.77 1.96 1024 1000 129.55 7.54 2048 1000 134.49 14.52 4096 1000 136.24 28.67 8192 1000 162.11 48.19 16384 1000 197.71 79.03 32768 1000 281.52 111.00 65536 640 399.17 156.58 131072 320 617.67 202.37 262144 160 1761.63 141.91 524288 80 2908.96 171.88 1048576 40 4502.54 222.10 2097152 20 7349.97 272.11 4194304 10 12400.15 322.58 # All processes entering MPI_Finalize