#------------------------------------------------------------ # Intel(R) MPI Benchmarks 2019 Update 3, MPI-1 part #------------------------------------------------------------ # Date : Fri Nov 29 16:18:42 2019 # Machine : x86_64 # System : Linux # Release : 3.10.0-957.1.3.el7.x86_64 # Version : #1 SMP Thu Nov 29 14:49:43 UTC 2018 # MPI Version : 3.1 # MPI Thread Environment: # Calling sequence was: # IMB-MPI1 # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_local # Reduce_scatter # Reduce_scatter_block # Allgather # Allgatherv # Gather # Gatherv # Scatter # Scatterv # Alltoall # Alltoallv # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 0.98 0.00 1 1000 0.99 1.01 2 1000 1.01 1.99 4 1000 1.02 3.90 8 1000 1.00 7.96 16 1000 1.03 15.58 32 1000 1.01 31.69 64 1000 1.11 57.71 128 1000 1.17 109.74 256 1000 1.29 198.75 512 1000 1.94 264.01 1024 1000 2.13 480.97 2048 1000 2.62 782.07 4096 1000 3.28 1250.39 8192 1000 4.92 1663.40 16384 1000 8.61 1902.07 32768 1000 12.47 2628.56 65536 640 24.45 2680.94 131072 320 37.26 3518.19 262144 160 71.15 3684.34 524288 80 105.51 4968.89 1048576 40 197.88 5298.98 2097152 20 431.24 4863.07 4194304 10 820.83 5109.86 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 1.63 0.00 1 1000 1.65 0.61 2 1000 1.61 1.24 4 1000 1.65 2.43 8 1000 1.65 4.86 16 1000 1.66 9.62 32 1000 1.96 16.32 64 1000 2.00 32.08 128 1000 2.24 57.22 256 1000 2.27 112.53 512 1000 2.85 179.58 1024 1000 2.98 343.76 2048 1000 3.38 606.54 4096 1000 4.21 972.51 8192 1000 5.86 1399.11 16384 1000 9.73 1684.64 32768 1000 16.15 2029.09 65536 640 29.50 2221.85 131072 320 56.04 2338.80 262144 160 121.76 2153.00 524288 80 165.19 3173.91 1048576 40 330.68 3170.99 2097152 20 737.96 2841.81 4194304 10 1564.84 2680.34 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.44 1.44 1.44 0.00 1 1000 1.49 1.49 1.49 1.34 2 1000 1.45 1.45 1.45 2.75 4 1000 1.46 1.46 1.46 5.48 8 1000 1.48 1.48 1.48 10.82 16 1000 1.47 1.47 1.47 21.84 32 1000 1.51 1.51 1.51 42.38 64 1000 1.53 1.53 1.53 83.70 128 1000 1.63 1.63 1.63 157.38 256 1000 1.63 1.63 1.63 313.98 512 1000 2.29 2.29 2.29 447.79 1024 1000 2.57 2.57 2.57 796.27 2048 1000 3.07 3.08 3.07 1331.98 4096 1000 3.78 3.78 3.78 2166.60 8192 1000 5.37 5.37 5.37 3049.09 16384 1000 9.00 9.00 9.00 3642.43 32768 1000 15.81 15.81 15.81 4145.16 65536 640 35.92 35.94 35.93 3646.51 131072 320 55.52 55.52 55.52 4721.58 262144 160 121.19 121.44 121.31 4317.24 524288 80 164.27 164.28 164.27 6382.88 1048576 40 335.37 335.43 335.40 6252.07 2097152 20 662.00 662.05 662.02 6335.36 4194304 10 1298.27 1298.46 1298.37 6460.41 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.42 1.42 1.42 0.00 1 1000 1.49 1.49 1.49 1.34 2 1000 1.48 1.48 1.48 2.70 4 1000 1.46 1.46 1.46 5.49 8 1000 1.51 1.51 1.51 10.58 16 1000 1.47 1.47 1.47 21.72 32 1000 1.53 1.53 1.53 41.81 64 1000 1.57 1.57 1.57 81.74 128 1000 1.67 1.67 1.67 153.69 256 1000 1.62 1.62 1.62 316.78 512 1000 2.29 2.29 2.29 447.47 1024 1000 2.54 2.54 2.54 805.87 2048 1000 2.90 2.90 2.90 1412.71 4096 1000 3.76 3.76 3.76 2179.00 8192 1000 5.57 5.57 5.57 2939.81 16384 1000 9.32 9.33 9.33 3513.54 32768 1000 19.23 19.25 19.24 3405.19 65536 640 38.68 38.75 38.71 3382.89 131072 320 63.74 63.79 63.76 4109.29 262144 160 128.49 128.82 128.66 4070.03 524288 80 243.04 245.44 244.30 4272.22 1048576 40 543.17 549.38 546.85 3817.31 2097152 20 1034.34 1070.16 1058.72 3919.33 4194304 10 2024.27 2144.97 2089.03 3910.83 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.43 1.43 1.43 0.00 1 1000 1.47 1.47 1.47 1.36 2 1000 1.50 1.50 1.50 2.66 4 1000 1.49 1.49 1.49 5.36 8 1000 1.47 1.48 1.47 10.85 16 1000 1.55 1.55 1.55 20.62 32 1000 1.54 1.54 1.54 41.55 64 1000 1.55 1.56 1.56 82.24 128 1000 1.62 1.62 1.62 158.28 256 1000 1.73 1.73 1.73 295.74 512 1000 2.28 2.28 2.28 449.07 1024 1000 2.70 2.70 2.70 758.33 2048 1000 3.06 3.07 3.06 1336.37 4096 1000 3.86 3.86 3.86 2123.97 8192 1000 7.43 7.43 7.43 2204.43 16384 1000 12.35 12.35 12.35 2652.52 32768 1000 24.40 24.43 24.42 2682.34 65536 640 48.35 48.49 48.42 2702.82 131072 320 89.19 89.46 89.33 2930.22 262144 160 180.55 181.34 180.95 2891.22 524288 80 440.51 443.49 442.39 2364.40 1048576 40 986.34 999.88 993.85 2097.39 2097152 20 1733.90 1950.39 1862.78 2150.49 4194304 10 3428.63 3831.74 3617.38 2189.25 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 3.25 3.25 3.25 0.00 1 1000 3.36 3.36 3.36 1.19 2 1000 3.39 3.39 3.39 2.36 4 1000 3.37 3.37 3.37 4.75 8 1000 3.39 3.39 3.39 9.44 16 1000 3.43 3.43 3.43 18.66 32 1000 3.53 3.53 3.53 36.27 64 1000 3.79 3.80 3.80 67.44 128 1000 3.66 3.66 3.66 139.76 256 1000 3.87 3.87 3.87 264.58 512 1000 5.58 5.58 5.58 367.14 1024 1000 5.81 5.81 5.81 704.63 2048 1000 6.78 6.78 6.78 1208.57 4096 1000 8.45 8.45 8.45 1939.47 8192 1000 11.68 11.69 11.68 2804.25 16384 1000 19.16 19.17 19.17 3419.27 32768 1000 32.03 32.03 32.03 4091.88 65536 640 72.86 72.90 72.88 3595.73 131072 320 112.78 112.79 112.78 4648.45 262144 160 242.94 242.95 242.94 4316.08 524288 80 359.42 359.44 359.43 5834.49 1048576 40 744.56 744.82 744.69 5631.29 2097152 20 1582.55 1582.64 1582.60 5300.38 4194304 10 3157.21 3235.78 3196.50 5184.90 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.02 4.02 4.02 0.00 1 1000 4.17 4.17 4.17 0.96 2 1000 4.15 4.15 4.15 1.93 4 1000 4.21 4.21 4.21 3.80 8 1000 4.20 4.20 4.20 7.62 16 1000 4.28 4.28 4.28 14.95 32 1000 4.32 4.32 4.32 29.62 64 1000 4.52 4.52 4.52 56.58 128 1000 4.53 4.54 4.53 112.89 256 1000 4.65 4.65 4.65 220.10 512 1000 5.96 5.96 5.96 343.56 1024 1000 6.13 6.14 6.14 667.44 2048 1000 7.15 7.15 7.15 1145.48 4096 1000 8.69 8.70 8.70 1883.88 8192 1000 12.41 12.42 12.41 2639.14 16384 1000 20.49 20.49 20.49 3197.88 32768 1000 40.24 40.27 40.26 3254.70 65536 640 78.65 78.76 78.71 3328.47 131072 320 127.70 127.95 127.82 4097.55 262144 160 270.84 271.57 271.08 3861.10 524288 80 536.45 541.14 539.02 3875.40 1048576 40 964.86 982.60 974.72 4268.57 2097152 20 1782.44 1830.21 1805.93 4583.42 4194304 10 3368.72 3526.83 3425.30 4757.02 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 3.97 3.98 3.97 0.00 1 1000 4.15 4.15 4.15 0.96 2 1000 4.12 4.13 4.12 1.94 4 1000 4.18 4.18 4.18 3.83 8 1000 4.17 4.17 4.17 7.67 16 1000 4.29 4.30 4.29 14.90 32 1000 4.29 4.30 4.30 29.77 64 1000 4.51 4.52 4.51 56.66 128 1000 4.48 4.48 4.48 114.17 256 1000 4.68 4.68 4.68 218.75 512 1000 5.92 5.92 5.92 345.83 1024 1000 6.31 6.32 6.32 648.07 2048 1000 7.16 7.17 7.16 1143.17 4096 1000 8.88 8.89 8.89 1843.57 8192 1000 14.99 15.00 15.00 2184.70 16384 1000 25.28 25.28 25.28 2592.39 32768 1000 49.50 49.55 49.53 2645.07 65536 640 98.81 99.01 98.90 2647.65 131072 320 178.50 178.92 178.67 2930.27 262144 160 406.12 409.12 407.66 2563.03 524288 80 892.95 901.40 897.80 2326.55 1048576 40 1676.58 1716.99 1699.73 2442.83 2097152 20 3012.51 3141.98 3077.82 2669.85 4194304 10 5473.54 6057.46 5815.06 2769.68 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.58 1.64 1.61 4 1000 1.70 1.75 1.72 8 1000 1.74 1.76 1.75 16 1000 1.71 1.76 1.73 32 1000 1.72 1.76 1.74 64 1000 1.99 2.25 2.12 128 1000 2.18 2.46 2.32 256 1000 2.53 2.95 2.74 512 1000 2.83 3.25 3.04 1024 1000 3.53 3.94 3.73 2048 1000 4.91 5.31 5.11 4096 1000 84.48 85.95 85.22 8192 1000 89.49 90.56 90.02 16384 1000 46.12 46.86 46.49 32768 1000 56.38 57.07 56.72 65536 640 73.58 75.16 74.37 131072 320 106.34 107.10 106.72 262144 160 174.58 174.78 174.68 524288 80 547.45 547.47 547.46 1048576 40 1267.94 1268.43 1268.18 2097152 20 4830.16 4925.74 4877.95 4194304 10 9469.58 9549.50 9509.54 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.54 0.63 0.57 4 1000 2.11 2.18 2.15 8 1000 2.12 2.18 2.16 16 1000 2.11 2.19 2.16 32 1000 2.11 2.16 2.14 64 1000 2.62 3.26 2.83 128 1000 2.85 3.53 3.08 256 1000 3.35 3.90 3.53 512 1000 167.51 172.90 170.28 1024 1000 36.97 55.23 46.17 2048 1000 37.89 55.86 47.10 4096 1000 180.64 186.60 184.51 8192 1000 189.10 194.90 192.72 16384 1000 89.85 92.16 90.83 32768 1000 104.31 111.61 106.86 65536 640 132.98 144.93 136.75 131072 320 210.13 214.04 211.36 262144 160 311.82 319.38 314.94 524288 80 790.69 827.37 807.45 1048576 40 1785.34 1851.76 1820.27 2097152 20 4344.05 4509.44 4427.87 4194304 10 8522.85 8762.08 8640.20 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.55 0.68 0.57 4 1000 2.56 2.65 2.61 8 1000 2.62 2.79 2.69 16 1000 2.64 2.73 2.68 32 1000 2.64 2.73 2.68 64 1000 3.23 4.29 3.58 128 1000 228.32 237.99 235.14 256 1000 228.67 236.60 234.23 512 1000 275.71 283.75 280.83 1024 1000 276.77 284.42 281.64 2048 1000 290.43 297.03 294.66 4096 1000 294.00 303.55 299.95 8192 1000 311.20 318.87 315.85 16384 1000 145.32 147.71 146.06 32768 1000 165.77 172.46 168.84 65536 640 212.15 218.14 216.04 131072 320 435.65 442.42 440.24 262144 160 597.40 606.04 603.23 524288 80 1086.12 1109.49 1095.41 1048576 40 2540.44 2620.47 2576.49 2097152 20 5213.73 5337.77 5272.10 4194304 10 12242.73 12607.67 12473.87 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.57 0.65 0.61 4 1000 1.75 2.52 2.14 8 1000 1.74 2.47 2.10 16 1000 1.87 2.65 2.26 32 1000 1.91 2.66 2.28 64 1000 1.99 2.76 2.38 128 1000 2.12 2.98 2.55 256 1000 2.25 3.09 2.67 512 1000 2.54 3.44 2.99 1024 1000 3.01 4.03 3.52 2048 1000 4.07 5.02 4.55 4096 1000 6.12 7.19 6.65 8192 1000 10.62 11.84 11.23 16384 1000 20.07 22.30 21.18 32768 1000 13.92 64.72 39.32 65536 640 19.99 109.68 64.83 131072 320 34.97 209.25 122.11 262144 160 63.53 402.25 232.89 524288 80 118.61 1179.14 648.87 1048576 40 272.86 2331.03 1301.95 2097152 20 674.69 4639.33 2657.01 4194304 10 2626.95 9302.30 5964.62 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.54 0.62 0.56 4 1000 2.03 2.82 2.23 8 1000 2.09 2.87 2.29 16 1000 2.10 2.89 2.30 32 1000 2.09 2.86 2.29 64 1000 2.22 3.05 2.43 128 1000 2.50 3.41 2.73 256 1000 2.74 3.59 3.00 512 1000 3.30 4.14 3.55 1024 1000 4.31 5.28 4.60 2048 1000 6.25 7.21 6.54 4096 1000 10.49 11.64 10.83 8192 1000 19.01 20.32 19.39 16384 1000 38.14 49.34 44.28 32768 1000 65.60 108.47 84.34 65536 640 85.07 126.44 101.96 131072 320 35.16 291.47 215.91 262144 160 64.81 526.62 397.42 524288 80 125.37 1121.79 839.66 1048576 40 280.09 2523.59 1944.10 2097152 20 675.69 5008.46 3904.91 4194304 10 2761.62 9964.13 8148.45 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.57 0.66 0.59 4 1000 2.65 3.39 2.74 8 1000 2.65 3.44 2.75 16 1000 2.67 3.45 2.77 32 1000 2.60 3.40 2.71 64 1000 2.93 3.81 3.05 128 1000 2.89 3.70 3.10 256 1000 3.28 4.13 3.51 512 1000 4.08 4.99 4.31 1024 1000 5.65 6.65 5.89 2048 1000 6.11 62.64 24.03 4096 1000 6.84 61.83 24.10 8192 1000 8.56 67.80 27.13 16384 1000 75.69 121.93 89.38 32768 1000 88.79 144.34 105.32 65536 640 113.27 169.61 129.58 131072 320 35.18 364.51 278.84 262144 160 65.15 597.29 486.89 524288 80 125.57 1204.95 992.20 1048576 40 282.27 2666.50 2301.94 2097152 20 682.01 5194.10 4560.02 4194304 10 2683.64 10285.58 9242.65 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.23 0.25 0.24 4 1000 0.28 0.28 0.28 8 1000 0.28 0.30 0.29 16 1000 0.28 0.30 0.29 32 1000 0.28 0.29 0.28 64 1000 0.28 0.29 0.29 128 1000 0.30 0.30 0.30 256 1000 0.31 0.34 0.33 512 1000 0.35 0.37 0.36 1024 1000 0.41 0.45 0.43 2048 1000 0.53 0.56 0.55 4096 1000 0.79 0.81 0.80 8192 1000 1.30 1.32 1.31 16384 1000 3.42 3.51 3.47 32768 1000 6.59 6.73 6.66 65536 640 12.94 13.79 13.36 131072 320 25.10 25.17 25.13 262144 160 49.82 50.11 49.96 524288 80 211.16 227.84 219.50 1048576 40 1566.40 1574.00 1570.20 2097152 20 3132.73 3138.39 3135.56 4194304 10 6272.13 6276.74 6274.43 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.23 0.24 0.23 4 1000 0.27 0.27 0.27 8 1000 0.28 0.29 0.29 16 1000 0.28 0.29 0.29 32 1000 0.27 0.28 0.28 64 1000 0.28 0.29 0.29 128 1000 0.29 0.30 0.30 256 1000 0.31 0.31 0.31 512 1000 0.35 0.35 0.35 1024 1000 0.43 0.43 0.43 2048 1000 0.54 0.54 0.54 4096 1000 0.79 0.79 0.79 8192 1000 1.28 1.34 1.30 16384 1000 3.50 3.63 3.55 32768 1000 7.09 7.13 7.11 65536 640 12.56 13.78 13.19 131072 320 25.09 25.10 25.10 262144 160 49.52 50.25 49.89 524288 80 264.96 272.21 268.49 1048576 40 1571.84 1587.55 1579.26 2097152 20 3133.93 3160.99 3146.41 4194304 10 6276.68 6325.21 6298.33 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.22 0.24 0.23 4 1000 0.27 0.28 0.27 8 1000 0.28 0.28 0.28 16 1000 0.28 0.29 0.29 32 1000 0.28 0.29 0.28 64 1000 0.28 0.29 0.29 128 1000 0.29 0.31 0.30 256 1000 0.31 0.32 0.32 512 1000 0.35 0.36 0.36 1024 1000 0.41 0.42 0.41 2048 1000 0.54 0.56 0.55 4096 1000 0.78 0.81 0.79 8192 1000 1.27 1.29 1.28 16384 1000 3.35 3.57 3.46 32768 1000 6.58 6.77 6.64 65536 640 12.58 13.79 12.88 131072 320 25.08 25.18 25.11 262144 160 49.63 50.28 49.83 524288 80 292.19 303.22 297.54 1048576 40 1614.93 1628.20 1620.99 2097152 20 3223.65 3244.41 3231.58 4194304 10 6444.42 6495.26 6469.09 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.81 0.94 0.88 4 1000 24.86 25.56 25.21 8 1000 24.83 25.46 25.14 16 1000 24.96 25.45 25.20 32 1000 24.89 25.56 25.22 64 1000 25.21 25.96 25.59 128 1000 25.26 26.28 25.77 256 1000 25.37 26.49 25.93 512 1000 26.79 27.60 27.20 1024 1000 26.98 27.82 27.40 2048 1000 28.46 28.94 28.70 4096 1000 30.62 31.22 30.92 8192 1000 34.04 34.73 34.39 16384 1000 41.31 41.70 41.51 32768 1000 54.94 56.90 55.92 65536 640 79.71 80.39 80.05 131072 320 181.56 186.74 184.15 262144 160 490.78 496.00 493.39 524288 80 1263.06 1272.74 1267.90 1048576 40 2506.69 2516.50 2511.60 2097152 20 4865.19 4870.13 4867.66 4194304 10 9481.37 9551.40 9516.38 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.87 0.97 0.90 4 1000 40.68 41.88 41.06 8 1000 40.70 41.83 41.09 16 1000 41.01 42.17 41.45 32 1000 41.21 42.38 41.65 64 1000 41.44 42.97 42.04 128 1000 41.68 42.97 42.14 256 1000 43.35 44.76 43.99 512 1000 44.54 45.80 45.02 1024 1000 46.01 47.20 46.51 2048 1000 49.21 50.14 49.56 4096 1000 54.91 55.91 55.33 8192 1000 64.97 67.48 65.88 16384 1000 82.27 87.33 83.98 32768 1000 118.49 119.68 118.93 65536 640 211.54 223.62 215.72 131072 320 344.97 350.49 348.17 262144 160 1212.05 1228.63 1218.22 524288 80 3543.46 3677.27 3614.20 1048576 40 7039.78 7228.62 7148.69 2097152 20 13726.43 13960.10 13864.89 4194304 10 26725.91 27288.62 27047.10 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.89 1.00 0.92 4 1000 55.71 57.17 56.40 8 1000 56.61 58.75 57.38 16 1000 56.84 58.74 57.54 32 1000 56.83 58.65 57.51 64 1000 57.04 59.01 57.93 128 1000 59.10 60.82 59.79 256 1000 60.96 63.04 61.66 512 1000 63.07 64.58 63.62 1024 1000 66.48 67.86 67.15 2048 1000 75.00 76.37 75.51 4096 1000 89.66 92.12 90.71 8192 1000 116.82 119.10 117.52 16384 1000 169.26 171.09 170.23 32768 1000 280.22 290.39 286.19 65536 640 480.07 514.60 490.85 131072 320 998.98 1036.11 1018.00 262144 160 3048.77 3080.98 3066.90 524288 80 9497.66 9762.07 9659.41 1048576 40 19196.35 19576.83 19445.87 2097152 20 36054.51 37508.20 36833.74 4194304 10 70810.52 71891.17 71545.55 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.53 0.58 0.55 4 1000 23.80 25.01 24.40 8 1000 23.87 24.95 24.41 16 1000 23.83 24.87 24.35 32 1000 23.92 25.01 24.46 64 1000 24.28 25.26 24.77 128 1000 24.33 25.43 24.88 256 1000 24.61 25.79 25.20 512 1000 25.65 26.65 26.15 1024 1000 26.02 27.04 26.53 2048 1000 27.52 28.13 27.82 4096 1000 29.79 30.34 30.06 8192 1000 33.29 33.91 33.60 16384 1000 40.64 40.85 40.74 32768 1000 54.28 56.23 55.26 65536 640 79.04 79.84 79.44 131072 320 177.36 183.16 180.26 262144 160 487.06 491.31 489.18 524288 80 1259.65 1281.71 1270.68 1048576 40 2503.64 2518.17 2510.90 2097152 20 4860.34 4866.64 4863.49 4194304 10 9479.84 9556.19 9518.02 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.49 0.57 0.52 4 1000 39.61 40.91 40.02 8 1000 39.54 40.63 39.95 16 1000 40.32 41.49 40.78 32 1000 40.41 41.43 40.78 64 1000 40.70 41.79 41.12 128 1000 41.08 42.26 41.46 256 1000 42.81 43.89 43.15 512 1000 43.72 44.90 44.10 1024 1000 45.14 46.10 45.45 2048 1000 48.18 48.89 48.40 4096 1000 53.91 54.89 54.41 8192 1000 63.62 66.29 64.63 16384 1000 81.56 86.14 83.02 32768 1000 117.98 119.30 118.47 65536 640 209.29 221.36 213.02 131072 320 342.04 347.83 345.43 262144 160 1202.11 1210.71 1205.75 524288 80 3542.39 3668.26 3607.64 1048576 40 7036.76 7226.07 7145.05 2097152 20 13765.50 13972.87 13891.23 4194304 10 26853.21 27464.94 27194.71 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.52 0.59 0.54 4 1000 54.42 56.35 55.22 8 1000 55.70 57.10 56.25 16 1000 56.11 57.47 56.65 32 1000 56.39 57.70 56.89 64 1000 56.54 57.87 57.17 128 1000 58.23 59.54 58.75 256 1000 60.27 61.77 60.99 512 1000 62.00 63.27 62.45 1024 1000 65.58 66.42 65.91 2048 1000 73.89 75.00 74.34 4096 1000 88.80 90.79 89.43 8192 1000 114.62 118.14 115.95 16384 1000 168.43 170.56 169.59 32768 1000 282.49 291.19 287.57 65536 640 480.18 513.41 490.70 131072 320 998.17 1034.73 1017.16 262144 160 3033.55 3070.12 3053.94 524288 80 9488.65 9755.92 9653.32 1048576 40 19257.13 19618.03 19494.17 2097152 20 36044.79 37518.34 36830.35 4194304 10 70736.50 71970.20 71585.65 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 21.01 21.97 21.49 1 1000 19.57 20.35 19.96 2 1000 19.52 20.80 20.16 4 1000 19.66 21.18 20.42 8 1000 19.61 20.60 20.11 16 1000 19.68 21.16 20.42 32 1000 20.00 20.86 20.43 64 1000 20.16 20.94 20.55 128 1000 20.11 21.15 20.63 256 1000 20.22 21.17 20.70 512 1000 21.82 22.92 22.37 1024 1000 21.71 22.87 22.29 2048 1000 22.50 23.24 22.87 4096 1000 24.03 25.02 24.53 8192 1000 26.55 27.71 27.13 16384 1000 30.19 31.36 30.78 32768 1000 38.32 39.47 38.90 65536 640 54.09 54.97 54.53 131072 320 85.15 86.16 85.65 262144 160 230.90 239.58 235.24 524288 80 427.96 428.54 428.25 1048576 40 891.16 891.61 891.39 2097152 20 1699.35 1699.77 1699.56 4194304 10 3225.08 3225.13 3225.10 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.53 0.60 0.55 1 1000 34.29 35.66 34.74 2 1000 34.28 35.85 34.74 4 1000 34.44 35.80 34.87 8 1000 34.37 35.99 34.88 16 1000 35.25 36.55 35.65 32 1000 34.73 36.36 35.28 64 1000 34.96 36.60 35.52 128 1000 35.11 36.57 35.64 256 1000 37.17 38.49 37.59 512 1000 38.38 39.07 38.67 1024 1000 38.72 39.75 39.04 2048 1000 40.83 41.68 41.13 4096 1000 44.33 45.15 44.64 8192 1000 50.91 52.66 51.57 16384 1000 65.42 66.70 66.00 32768 1000 98.29 103.14 100.48 65536 640 132.02 140.08 135.03 131072 320 267.03 279.64 273.33 262144 160 590.49 597.52 594.80 524288 80 1090.81 1177.60 1135.17 1048576 40 2135.41 2227.10 2181.30 2097152 20 4118.58 4201.16 4158.02 4194304 10 7881.73 7962.33 7921.61 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.55 0.63 0.57 1 1000 48.47 50.14 49.12 2 1000 48.38 50.13 48.99 4 1000 48.05 50.08 48.81 8 1000 49.48 50.92 50.07 16 1000 49.68 51.27 50.26 32 1000 49.37 50.82 49.85 64 1000 49.32 51.07 50.05 128 1000 51.39 53.16 51.98 256 1000 52.86 54.55 53.47 512 1000 54.75 56.21 55.24 1024 1000 56.74 57.96 57.14 2048 1000 62.15 63.01 62.52 4096 1000 72.65 74.82 73.28 8192 1000 93.65 95.10 94.33 16384 1000 183.05 190.24 186.91 32768 1000 257.24 269.54 261.79 65536 640 376.28 384.04 379.14 131072 320 851.51 910.01 886.45 262144 160 1817.70 1979.46 1893.08 524288 80 3373.78 3965.92 3665.33 1048576 40 6645.39 7610.83 7116.70 2097152 20 11886.44 12196.48 12042.25 4194304 10 22774.23 23252.29 23028.73 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.62 0.72 0.67 1 1000 20.60 21.33 20.97 2 1000 20.56 21.42 20.99 4 1000 20.65 21.65 21.15 8 1000 20.67 21.67 21.17 16 1000 20.78 21.55 21.16 32 1000 20.71 21.79 21.25 64 1000 20.82 21.83 21.32 128 1000 21.12 22.12 21.62 256 1000 21.32 22.31 21.81 512 1000 22.18 23.36 22.77 1024 1000 22.68 23.76 23.22 2048 1000 23.26 24.49 23.88 4096 1000 24.77 25.88 25.33 8192 1000 27.12 28.23 27.68 16384 1000 30.96 32.14 31.55 32768 1000 39.06 40.16 39.61 65536 640 54.83 55.64 55.24 131072 320 85.91 87.52 86.72 262144 160 235.00 240.47 237.73 524288 80 428.30 429.21 428.76 1048576 40 891.32 891.38 891.35 2097152 20 1699.35 1699.46 1699.41 4194304 10 3233.79 3241.93 3237.86 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.66 0.76 0.69 1 1000 35.72 37.14 36.28 2 1000 35.69 37.23 36.26 4 1000 35.71 37.19 36.25 8 1000 35.79 37.13 36.30 16 1000 48.31 50.55 49.12 32 1000 48.50 50.46 49.27 64 1000 49.31 51.22 50.04 128 1000 49.15 51.05 49.91 256 1000 49.30 51.20 50.02 512 1000 51.97 54.11 52.68 1024 1000 53.46 55.58 54.15 2048 1000 55.26 56.90 55.78 4096 1000 58.80 60.13 59.29 8192 1000 65.70 68.19 66.89 16384 1000 78.06 79.59 78.56 32768 1000 99.59 104.97 101.94 65536 640 143.41 153.72 147.82 131072 320 240.88 260.08 251.59 262144 160 737.72 777.13 758.02 524288 80 1112.70 1222.86 1176.08 1048576 40 2085.40 2339.85 2254.29 2097152 20 4021.00 4328.94 4215.17 4194304 10 7657.87 8198.08 7977.62 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.72 0.80 0.73 1 1000 50.28 52.29 50.99 2 1000 50.20 52.13 50.90 4 1000 50.08 52.16 50.93 8 1000 50.89 53.12 51.76 16 1000 51.04 53.12 51.90 32 1000 51.38 52.88 51.90 64 1000 51.31 53.15 52.06 128 1000 53.58 55.26 54.31 256 1000 55.10 56.89 55.83 512 1000 57.21 58.67 57.81 1024 1000 59.94 60.96 60.30 2048 1000 66.40 67.27 66.81 4096 1000 77.57 79.86 78.16 8192 1000 100.34 105.05 101.58 16384 1000 181.58 191.74 187.96 32768 1000 259.06 271.91 263.31 65536 640 401.74 412.37 406.78 131072 320 978.12 1067.73 1023.77 262144 160 2060.86 2214.34 2139.92 524288 80 3687.37 3968.61 3867.95 1048576 40 7231.98 7661.75 7481.89 2097152 20 13463.24 14416.49 14004.77 4194304 10 25009.63 27527.16 26623.32 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.53 0.66 0.60 1 1000 1.31 18.76 10.04 2 1000 0.96 19.73 10.34 4 1000 0.97 20.07 10.52 8 1000 1.30 19.07 10.19 16 1000 0.98 19.98 10.48 32 1000 0.98 20.07 10.52 64 1000 1.30 18.97 10.14 128 1000 1.03 20.43 10.73 256 1000 1.06 20.45 10.76 512 1000 1.53 20.89 11.21 1024 1000 1.68 20.16 10.92 2048 1000 1.84 20.70 11.27 4096 1000 2.28 21.54 11.91 8192 1000 2.90 23.13 13.01 16384 1000 4.24 25.86 15.05 32768 1000 7.32 30.59 18.95 65536 640 15.53 52.36 33.94 131072 320 26.75 61.39 44.07 262144 160 49.70 147.64 98.67 524288 80 95.67 317.93 206.80 1048576 40 181.49 699.57 440.53 2097152 20 332.77 1366.97 849.87 4194304 10 676.40 2681.27 1678.83 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.56 0.63 0.58 1 1000 2.89 32.34 14.75 2 1000 2.89 32.64 14.91 4 1000 2.93 32.68 14.92 8 1000 2.94 32.84 14.97 16 1000 3.63 44.94 13.99 32 1000 2.90 32.59 15.03 64 1000 3.79 45.07 14.12 128 1000 3.65 45.44 14.10 256 1000 3.63 45.41 14.10 512 1000 4.17 46.87 14.85 1024 1000 4.39 47.53 15.20 2048 1000 4.62 48.35 15.59 4096 1000 5.18 50.45 16.55 8192 1000 5.95 55.06 18.27 16384 1000 7.76 62.16 21.39 32768 1000 10.97 73.64 26.73 65536 640 17.48 97.53 37.68 131072 320 25.69 204.77 95.02 262144 160 35.94 473.16 235.19 524288 80 63.28 903.53 465.74 1048576 40 159.97 1659.57 903.44 2097152 20 336.78 3158.35 1759.22 4194304 10 678.27 6120.96 3434.41 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.55 0.64 0.57 1 1000 4.13 45.00 16.98 2 1000 4.14 45.01 16.94 4 1000 4.17 45.10 17.07 8 1000 4.14 45.44 17.07 16 1000 4.15 45.35 17.14 32 1000 4.07 45.15 17.15 64 1000 4.17 45.38 17.30 128 1000 4.24 46.44 17.47 256 1000 4.30 47.49 18.23 512 1000 5.01 48.78 19.10 1024 1000 5.35 100.15 17.30 2048 1000 5.83 101.78 17.98 4096 1000 6.45 105.84 19.16 8192 1000 6.22 116.51 20.21 16384 1000 11.35 132.97 26.77 32768 1000 18.04 158.66 35.89 65536 640 18.43 271.68 88.32 131072 320 30.92 593.34 183.07 262144 160 58.58 1220.61 434.05 524288 80 129.17 2431.36 918.96 1048576 40 210.72 4638.79 1793.59 2097152 20 441.37 9096.32 3552.57 4194304 10 938.22 17716.27 6939.63 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.53 1.09 0.81 1 1000 21.77 28.18 24.98 2 1000 21.79 28.22 25.00 4 1000 21.89 28.21 25.05 8 1000 21.82 28.13 24.98 16 1000 21.79 28.28 25.04 32 1000 21.86 28.11 24.99 64 1000 21.91 28.15 25.03 128 1000 22.20 28.11 25.15 256 1000 22.32 28.14 25.23 512 1000 22.73 28.97 25.85 1024 1000 22.98 29.29 26.13 2048 1000 23.16 29.32 26.24 4096 1000 24.34 30.09 27.21 8192 1000 26.04 30.74 28.39 16384 1000 28.59 32.61 30.60 32768 1000 33.08 36.14 34.61 65536 640 42.04 42.28 42.16 131072 320 61.46 64.57 63.02 262144 160 157.56 160.59 159.08 524288 80 329.84 331.49 330.66 1048576 40 709.33 709.43 709.38 2097152 20 1374.06 1374.64 1374.35 4194304 10 2686.55 2686.82 2686.68 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.55 1.13 0.70 1 1000 36.82 47.71 41.46 2 1000 34.48 47.77 41.44 4 1000 34.30 47.68 41.45 8 1000 33.47 47.74 41.42 16 1000 32.60 47.78 41.52 32 1000 33.63 47.83 41.55 64 1000 36.04 47.88 41.55 128 1000 34.90 48.43 41.75 256 1000 35.61 48.48 41.86 512 1000 33.95 49.84 42.50 1024 1000 35.57 49.82 42.71 2048 1000 38.19 51.15 43.25 4096 1000 40.34 53.04 44.36 8192 1000 42.65 57.61 46.94 16384 1000 49.85 65.06 54.62 32768 1000 58.47 77.13 63.39 65536 640 73.61 101.17 81.50 131072 320 130.39 168.99 140.62 262144 160 390.46 524.33 425.34 524288 80 476.92 588.95 509.07 1048576 40 853.98 1117.46 964.62 2097152 20 1551.51 2173.30 1872.97 4194304 10 3046.80 4230.40 3639.75 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.54 1.18 0.63 1 1000 62.50 98.92 68.17 2 1000 62.02 98.71 67.98 4 1000 62.24 98.94 68.13 8 1000 62.34 98.99 68.19 16 1000 62.07 98.92 68.18 32 1000 62.41 98.80 68.04 64 1000 62.26 99.09 68.23 128 1000 62.39 100.07 69.13 256 1000 62.64 100.09 69.11 512 1000 65.30 102.73 71.42 1024 1000 65.01 103.59 71.77 2048 1000 65.33 105.94 73.29 4096 1000 69.36 109.93 75.82 8192 1000 75.71 120.35 82.96 16384 1000 83.67 137.91 93.35 32768 1000 101.22 165.21 112.04 65536 640 155.64 270.35 181.49 131072 320 332.66 524.27 374.49 262144 160 647.66 1069.77 727.70 524288 80 705.54 1040.97 761.70 1048576 40 1212.69 1952.13 1418.06 2097152 20 1992.89 3967.84 2887.32 4194304 10 4299.61 7894.49 5794.88 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.79 16.41 9.10 1 1000 2.05 17.08 9.57 2 1000 2.02 17.33 9.67 4 1000 1.93 17.11 9.52 8 1000 1.92 17.10 9.51 16 1000 1.95 17.12 9.53 32 1000 1.77 17.11 9.44 64 1000 1.91 17.13 9.52 128 1000 2.00 17.48 9.74 256 1000 2.04 17.38 9.71 512 1000 2.57 17.94 10.26 1024 1000 2.85 18.10 10.47 2048 1000 3.01 18.34 10.68 4096 1000 3.60 18.79 11.19 8192 1000 4.62 20.22 12.42 16384 1000 6.94 22.38 14.66 32768 1000 10.65 25.96 18.31 65536 640 18.01 32.84 25.42 131072 320 33.85 48.62 41.24 262144 160 75.58 125.17 100.38 524288 80 331.09 358.06 344.58 1048576 40 672.18 700.89 686.54 2097152 20 1297.59 1328.49 1313.04 4194304 10 2571.38 2603.31 2587.35 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.55 0.65 0.57 1 1000 2.77 16.70 13.16 2 1000 2.80 16.83 13.26 4 1000 2.70 16.67 13.16 8 1000 2.72 16.74 13.15 16 1000 2.72 16.79 13.20 32 1000 2.63 16.73 13.17 64 1000 2.76 16.87 13.30 128 1000 3.11 17.04 13.50 256 1000 3.08 17.06 13.51 512 1000 4.36 17.52 14.22 1024 1000 4.77 17.89 14.54 2048 1000 5.69 18.14 15.01 4096 1000 7.58 19.00 16.02 8192 1000 9.27 19.99 17.22 16384 1000 14.27 22.31 20.15 32768 1000 23.18 39.38 28.65 65536 640 42.13 68.07 53.75 131072 320 62.47 110.06 84.50 262144 160 218.49 331.13 273.21 524288 80 376.29 595.14 507.70 1048576 40 695.22 1121.02 955.45 2097152 20 1358.32 2169.73 1859.43 4194304 10 2599.56 4200.14 3593.03 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 3.97 17.04 15.04 1 1000 4.20 17.08 15.16 2 1000 4.23 17.15 15.24 4 1000 4.23 17.07 15.15 8 1000 4.21 17.04 15.12 16 1000 4.22 17.02 15.13 32 1000 4.18 17.00 15.12 64 1000 4.52 17.19 15.28 128 1000 4.83 17.28 15.49 256 1000 4.99 17.33 15.51 512 1000 7.11 17.91 16.27 1024 1000 8.11 18.22 16.72 2048 1000 10.41 18.72 17.42 4096 1000 14.67 20.03 18.97 8192 1000 17.49 29.43 20.97 16384 1000 21.87 33.75 28.58 32768 1000 25.79 60.95 44.89 65536 640 46.18 110.32 79.55 131072 320 99.21 246.96 180.31 262144 160 232.87 544.62 404.54 524288 80 358.38 986.96 708.29 1048576 40 726.11 1933.69 1406.38 2097152 20 1347.13 3763.84 2705.90 4194304 10 2537.51 7373.55 5254.06 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 3.06 17.08 10.07 1 1000 3.24 17.27 10.25 2 1000 3.22 17.15 10.18 4 1000 3.27 17.09 10.18 8 1000 3.29 17.10 10.19 16 1000 3.30 17.14 10.22 32 1000 3.13 17.45 10.29 64 1000 3.20 17.34 10.27 128 1000 3.34 17.65 10.49 256 1000 3.30 17.65 10.48 512 1000 3.87 17.95 10.91 1024 1000 4.02 18.10 11.06 2048 1000 4.29 18.34 11.31 4096 1000 4.89 18.87 11.88 8192 1000 6.33 20.41 13.37 16384 1000 8.54 22.55 15.55 32768 1000 12.32 26.05 19.19 65536 640 19.99 33.09 26.54 131072 320 35.90 48.48 42.19 262144 160 75.04 104.89 89.97 524288 80 157.61 333.79 245.70 1048576 40 269.01 672.59 470.80 2097152 20 484.41 1307.74 896.07 4194304 10 919.34 2602.65 1760.99 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 3.81 17.24 13.79 1 1000 4.02 17.21 13.85 2 1000 4.01 17.19 13.83 4 1000 4.05 17.21 13.84 8 1000 4.09 17.19 13.84 16 1000 4.08 17.20 13.86 32 1000 3.94 17.21 13.81 64 1000 4.05 17.27 13.88 128 1000 4.25 17.61 14.22 256 1000 4.22 17.61 14.20 512 1000 5.59 17.97 14.84 1024 1000 6.08 18.34 15.19 2048 1000 6.89 18.48 15.54 4096 1000 9.02 19.21 16.53 8192 1000 10.81 20.41 17.91 16384 1000 15.95 22.88 20.97 32768 1000 25.14 40.00 29.59 65536 640 34.50 54.90 44.48 131072 320 49.78 97.35 75.13 262144 160 122.48 261.40 195.87 524288 80 158.72 553.61 344.21 1048576 40 270.95 1068.87 633.16 2097152 20 486.57 2108.49 1206.63 4194304 10 905.25 4155.67 2328.35 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 5.45 40.18 26.58 1 1000 5.62 40.51 26.68 2 1000 5.62 40.39 26.68 4 1000 5.54 40.34 26.67 8 1000 5.69 40.60 26.86 16 1000 5.75 40.57 26.76 32 1000 5.82 40.44 26.88 64 1000 5.86 40.48 26.94 128 1000 6.74 41.00 27.13 256 1000 6.95 42.63 26.59 512 1000 8.20 50.93 30.81 1024 1000 9.28 51.23 31.33 2048 1000 11.03 52.58 33.36 4096 1000 14.99 53.16 34.83 8192 1000 19.79 63.91 40.41 16384 1000 31.15 76.94 52.54 32768 1000 48.77 103.01 77.10 65536 640 90.32 176.57 129.63 131072 320 178.61 313.15 251.96 262144 160 332.08 593.02 465.12 524288 80 616.98 1062.23 861.28 1048576 40 1172.09 2079.48 1626.47 2097152 20 2241.64 4059.53 3125.20 4194304 10 5007.74 8606.40 6718.51 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.59 0.64 0.61 1 1000 19.43 20.72 20.07 2 1000 19.43 20.52 19.97 4 1000 19.55 20.81 20.18 8 1000 19.56 20.76 20.16 16 1000 19.46 20.74 20.10 32 1000 19.77 20.65 20.21 64 1000 19.79 20.81 20.30 128 1000 20.15 21.11 20.63 256 1000 20.22 21.17 20.69 512 1000 21.31 22.42 21.86 1024 1000 21.64 22.55 22.09 2048 1000 22.32 23.57 22.94 4096 1000 23.93 24.99 24.46 8192 1000 26.04 27.31 26.68 16384 1000 29.76 31.10 30.43 32768 1000 38.90 40.80 39.85 65536 640 53.69 55.07 54.38 131072 320 83.73 86.08 84.90 262144 160 326.71 327.89 327.30 524288 80 480.17 482.51 481.34 1048576 40 896.76 897.44 897.10 2097152 20 1686.70 1687.53 1687.12 4194304 10 3235.37 3235.42 3235.40 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.57 0.64 0.59 1 1000 47.54 49.27 48.03 2 1000 47.49 49.08 47.98 4 1000 47.36 49.38 47.93 8 1000 47.46 49.44 48.00 16 1000 47.58 49.50 48.23 32 1000 48.37 49.54 48.75 64 1000 48.93 50.15 49.31 128 1000 48.97 50.40 49.39 256 1000 49.27 50.58 49.71 512 1000 52.28 53.63 52.67 1024 1000 53.09 54.58 53.62 2048 1000 55.18 56.24 55.57 4096 1000 59.58 60.37 59.83 8192 1000 66.61 68.22 67.27 16384 1000 78.29 79.00 78.56 32768 1000 124.80 127.85 126.33 65536 640 186.15 191.23 188.06 131072 320 402.45 454.15 434.98 262144 160 849.76 949.82 899.93 524288 80 1021.78 1159.25 1089.86 1048576 40 1919.19 2114.07 2012.27 2097152 20 3697.05 3969.75 3827.48 4194304 10 7118.19 7632.78 7414.96 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.56 0.64 0.57 1 1000 100.77 105.43 102.61 2 1000 100.24 104.77 102.03 4 1000 100.30 105.05 102.15 8 1000 100.10 104.92 102.17 16 1000 100.55 105.95 102.84 32 1000 101.57 107.91 104.47 64 1000 102.75 108.92 105.56 128 1000 103.12 108.76 105.65 256 1000 103.58 109.31 106.12 512 1000 110.65 115.39 112.19 1024 1000 113.14 117.03 114.45 2048 1000 117.94 121.21 118.95 4096 1000 127.80 130.16 128.86 8192 1000 153.97 157.40 155.05 16384 1000 193.05 194.87 193.88 32768 1000 267.51 270.39 269.56 65536 640 577.78 593.84 585.16 131072 320 1073.28 1185.04 1129.95 262144 160 2176.91 2346.49 2273.35 524288 80 3121.58 3518.04 3302.01 1048576 40 6139.16 7020.58 6615.89 2097152 20 11962.44 13780.70 13132.72 4194304 10 23874.13 27149.57 26100.36 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.12 1.16 1.14 1 1000 35.13 36.11 35.62 2 1000 35.05 36.09 35.57 4 1000 34.97 36.18 35.58 8 1000 35.06 36.15 35.61 16 1000 35.22 36.17 35.70 32 1000 35.54 36.62 36.08 64 1000 35.62 36.65 36.13 128 1000 35.52 36.89 36.20 256 1000 35.74 37.02 36.38 512 1000 37.35 38.55 37.95 1024 1000 37.63 38.67 38.15 2048 1000 38.76 39.87 39.32 4096 1000 40.71 41.59 41.15 8192 1000 45.89 46.79 46.34 16384 1000 52.99 53.87 53.43 32768 1000 66.48 67.38 66.93 65536 640 93.61 94.06 93.84 131072 320 138.75 138.99 138.87 262144 160 408.06 409.33 408.70 524288 80 436.87 438.95 437.91 1048576 40 785.94 786.42 786.18 2097152 20 1422.48 1424.22 1423.35 4194304 10 2668.83 2668.99 2668.91 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.28 1.37 1.31 1 1000 60.15 61.88 60.68 2 1000 59.96 61.65 60.51 4 1000 59.91 61.80 60.58 8 1000 59.95 62.00 60.66 16 1000 60.45 62.33 61.05 32 1000 60.85 62.61 61.45 64 1000 61.14 62.78 61.71 128 1000 61.21 62.79 61.76 256 1000 61.36 62.99 61.84 512 1000 64.02 65.80 64.62 1024 1000 64.93 66.67 65.45 2048 1000 66.81 68.76 67.46 4096 1000 70.83 72.49 71.45 8192 1000 80.88 84.05 82.14 16384 1000 96.19 103.05 98.45 32768 1000 125.22 128.07 126.40 65536 640 186.64 191.52 188.01 131072 320 403.12 452.97 435.09 262144 160 850.38 951.63 900.17 524288 80 1023.14 1157.58 1090.65 1048576 40 1915.34 2107.91 2009.00 2097152 20 3682.74 3964.60 3821.30 4194304 10 7132.85 7628.36 7440.39 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.70 1.79 1.73 1 1000 108.66 115.45 111.71 2 1000 108.70 115.03 111.61 4 1000 108.91 115.15 111.74 8 1000 108.76 115.16 111.73 16 1000 109.71 116.58 112.77 32 1000 110.02 116.68 113.08 64 1000 110.32 117.10 113.41 128 1000 110.85 117.30 113.80 256 1000 111.19 117.92 114.10 512 1000 115.06 121.82 118.32 1024 1000 117.14 123.45 120.02 2048 1000 121.47 127.27 124.17 4096 1000 129.33 135.30 132.53 8192 1000 155.95 161.60 157.86 16384 1000 194.42 199.20 196.45 32768 1000 270.02 275.67 273.43 65536 640 620.90 666.32 640.45 131072 320 1074.02 1185.12 1130.98 262144 160 2175.94 2343.81 2270.79 524288 80 3117.00 3470.10 3287.98 1048576 40 6141.68 7027.94 6622.42 2097152 20 11789.85 13801.62 13095.11 4194304 10 23315.63 27032.02 25859.61 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.56 0.87 0.71 1 1000 1.41 1.43 1.42 2 1000 1.43 1.45 1.44 4 1000 1.46 1.47 1.46 8 1000 1.49 1.50 1.49 16 1000 1.47 1.49 1.48 32 1000 1.38 1.40 1.39 64 1000 1.66 1.96 1.81 128 1000 1.63 1.94 1.78 256 1000 1.84 2.30 2.07 512 1000 1.96 2.40 2.18 1024 1000 1.98 2.44 2.21 2048 1000 2.70 3.15 2.92 4096 1000 3.16 3.61 3.39 8192 1000 4.48 4.93 4.70 16384 1000 4.91 22.64 13.77 32768 1000 7.67 26.05 16.86 65536 640 55.40 65.13 60.27 131072 320 25.35 48.64 37.00 262144 160 63.54 382.97 223.26 524288 80 100.30 139.32 119.81 1048576 40 218.98 257.79 238.38 2097152 20 442.53 477.91 460.22 4194304 10 863.31 899.93 881.62 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.50 0.57 0.53 1 1000 1.50 1.59 1.55 2 1000 1.48 1.52 1.50 4 1000 1.53 1.57 1.55 8 1000 1.45 1.50 1.48 16 1000 1.46 1.52 1.49 32 1000 1.39 1.45 1.42 64 1000 1.70 2.39 1.94 128 1000 1.64 2.40 1.92 256 1000 1.88 2.42 2.05 512 1000 2.00 2.55 2.17 1024 1000 2.17 2.71 2.34 2048 1000 2.50 3.04 2.67 4096 1000 3.30 3.86 3.48 8192 1000 4.76 5.31 4.96 16384 1000 8.33 9.60 8.90 32768 1000 11.86 102.91 72.99 65536 640 31.15 32.37 31.70 131072 320 61.27 62.50 61.80 262144 160 122.30 123.61 122.88 524288 80 244.46 245.68 244.99 1048576 40 738.39 739.65 738.97 2097152 20 1904.01 1907.29 1904.90 4194304 10 3669.27 3676.02 3672.94 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.72 0.87 0.77 1 1000 1.63 1.72 1.67 2 1000 1.70 1.80 1.75 4 1000 1.71 1.85 1.78 8 1000 1.66 1.81 1.74 16 1000 1.65 1.77 1.71 32 1000 1.57 1.71 1.64 64 1000 1.93 3.02 2.28 128 1000 1.99 3.11 2.39 256 1000 2.36 3.12 2.57 512 1000 2.37 3.18 2.61 1024 1000 2.60 3.47 2.85 2048 1000 2.92 3.95 3.20 4096 1000 3.59 4.64 3.94 8192 1000 5.18 6.26 5.56 16384 1000 12.76 60.98 52.91 32768 1000 16.97 20.83 18.39 65536 640 32.22 34.12 33.11 131072 320 68.11 73.49 69.70 262144 160 134.46 138.71 137.17 524288 80 284.99 295.16 289.05 1048576 40 828.01 832.80 830.14 2097152 20 1857.92 1861.94 1859.63 4194304 10 3693.18 3696.51 3694.78 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.19 1.19 1.19 #--------------------------------------------------- # Benchmarking Barrier # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.15 1.15 1.15 #--------------------------------------------------- # Benchmarking Barrier # #processes = 8 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.38 1.38 1.38 # All processes entering MPI_Finalize