#------------------------------------------------------------ # Intel(R) MPI Benchmarks 2019 Update 3, MPI-1 part #------------------------------------------------------------ # Date : Fri Nov 29 16:20:09 2019 # Machine : x86_64 # System : Linux # Release : 3.10.0-957.1.3.el7.x86_64 # Version : #1 SMP Thu Nov 29 14:49:43 UTC 2018 # MPI Version : 3.1 # MPI Thread Environment: # Calling sequence was: # IMB-MPI1 # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_local # Reduce_scatter # Reduce_scatter_block # Allgather # Allgatherv # Gather # Gatherv # Scatter # Scatterv # Alltoall # Alltoallv # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 0.85 0.00 1 1000 0.86 1.16 2 1000 0.86 2.33 4 1000 0.88 4.53 8 1000 0.87 9.19 16 1000 0.89 17.90 32 1000 0.90 35.69 64 1000 1.24 51.58 128 1000 1.38 92.74 256 1000 1.53 167.48 512 1000 2.21 231.99 1024 1000 2.57 398.64 2048 1000 2.91 703.92 4096 1000 3.33 1231.33 8192 1000 4.80 1706.94 16384 1000 7.54 2172.44 32768 1000 10.32 3174.59 65536 640 16.43 3989.71 131072 320 29.75 4405.51 262144 160 56.99 4599.50 524288 80 102.48 5116.07 1048576 40 196.95 5324.07 2097152 20 384.49 5454.31 4194304 10 782.89 5357.49 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 2.08 0.00 1 1000 2.15 0.47 2 1000 2.09 0.96 4 1000 2.11 1.89 8 1000 2.15 3.72 16 1000 2.12 7.55 32 1000 2.55 12.55 64 1000 2.59 24.71 128 1000 2.93 43.75 256 1000 2.95 86.81 512 1000 3.67 139.48 1024 1000 3.73 274.36 2048 1000 4.15 493.05 4096 1000 4.71 869.41 8192 1000 5.42 1510.74 16384 1000 7.88 2079.90 32768 1000 14.75 2221.24 65536 640 26.47 2475.56 131072 320 49.67 2638.76 262144 160 103.70 2527.95 524288 80 181.09 2895.21 1048576 40 334.49 3134.82 2097152 20 571.92 3666.84 4194304 10 1146.80 3657.39 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.70 1.70 1.70 0.00 1 1000 1.61 1.61 1.61 1.24 2 1000 1.90 1.90 1.90 2.10 4 1000 1.92 1.92 1.92 4.18 8 1000 1.89 1.89 1.89 8.47 16 1000 1.89 1.89 1.89 16.93 32 1000 1.93 1.93 1.93 33.10 64 1000 2.01 2.01 2.01 63.68 128 1000 2.11 2.11 2.11 121.25 256 1000 2.12 2.12 2.12 241.05 512 1000 2.83 2.83 2.83 361.83 1024 1000 3.22 3.22 3.22 635.07 2048 1000 3.64 3.64 3.64 1126.15 4096 1000 4.11 4.11 4.11 1991.59 8192 1000 5.81 5.81 5.81 2820.50 16384 1000 8.89 8.89 8.89 3684.94 32768 1000 14.51 14.51 14.51 4516.74 65536 640 26.10 26.10 26.10 5021.33 131072 320 48.99 49.03 49.01 5347.00 262144 160 101.95 102.23 102.09 5128.29 524288 80 179.46 180.33 179.90 5814.75 1048576 40 344.53 344.57 344.55 6086.30 2097152 20 622.01 622.16 622.08 6741.52 4194304 10 1194.08 1194.13 1194.11 7024.86 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.26 1.26 1.26 0.00 1 1000 1.28 1.28 1.28 1.56 2 1000 1.27 1.27 1.27 3.16 4 1000 1.27 1.27 1.27 6.32 8 1000 1.30 1.30 1.30 12.31 16 1000 1.26 1.26 1.26 25.30 32 1000 1.33 1.33 1.33 48.09 64 1000 1.33 1.33 1.33 96.43 128 1000 1.43 1.43 1.43 179.55 256 1000 1.48 1.48 1.48 345.20 512 1000 2.03 2.03 2.03 503.98 1024 1000 2.30 2.30 2.30 891.86 2048 1000 2.60 2.60 2.60 1576.21 4096 1000 2.97 2.97 2.97 2754.57 8192 1000 4.86 4.86 4.86 3370.47 16384 1000 8.03 8.03 8.03 4080.96 32768 1000 13.89 13.89 13.89 4718.53 65536 640 24.87 24.90 24.89 5264.39 131072 320 45.58 45.69 45.64 5737.45 262144 160 107.49 108.05 107.77 4852.40 524288 80 210.07 211.44 210.87 4959.24 1048576 40 441.43 441.45 441.44 4750.63 2097152 20 859.53 862.99 860.81 4860.19 4194304 10 1652.08 1663.16 1659.36 5043.78 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.05 2.05 2.05 0.00 1 1000 1.90 1.90 1.90 1.05 2 1000 1.90 1.90 1.90 2.11 4 1000 1.92 1.92 1.92 4.17 8 1000 1.98 1.98 1.98 8.08 16 1000 1.73 1.73 1.73 18.49 32 1000 1.79 1.79 1.79 35.81 64 1000 1.78 1.78 1.78 71.91 128 1000 2.13 2.16 2.13 118.65 256 1000 1.98 1.99 1.98 257.89 512 1000 2.92 2.92 2.92 350.88 1024 1000 3.27 3.27 3.27 626.18 2048 1000 3.86 3.86 3.86 1061.35 4096 1000 4.31 4.37 4.33 1876.11 8192 1000 7.58 7.58 7.58 2161.02 16384 1000 11.98 11.98 11.98 2736.00 32768 1000 19.93 19.94 19.93 3287.28 65536 640 33.65 33.69 33.67 3890.95 131072 320 63.91 64.18 64.06 4084.76 262144 160 140.87 142.23 141.58 3686.13 524288 80 341.36 344.29 342.84 3045.58 1048576 40 698.72 710.07 705.98 2953.46 2097152 20 1386.86 1417.76 1406.42 2958.41 4194304 10 2688.28 2792.33 2753.94 3004.16 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.08 4.08 4.08 0.00 1 1000 4.28 4.28 4.28 0.93 2 1000 4.15 4.16 4.15 1.93 4 1000 4.28 4.28 4.28 3.74 8 1000 4.26 4.26 4.26 7.51 16 1000 4.24 4.24 4.24 15.10 32 1000 4.45 4.45 4.45 28.77 64 1000 4.83 4.83 4.83 53.04 128 1000 4.65 4.65 4.65 110.19 256 1000 4.96 4.96 4.96 206.42 512 1000 6.93 6.93 6.93 295.55 1024 1000 6.90 6.90 6.90 593.50 2048 1000 7.60 7.60 7.60 1077.38 4096 1000 8.50 8.50 8.50 1928.34 8192 1000 11.86 11.86 11.86 2763.22 16384 1000 17.53 17.53 17.53 3738.73 32768 1000 25.77 25.78 25.77 5084.85 65536 640 47.76 47.78 47.77 5486.95 131072 320 91.50 91.60 91.55 5723.83 262144 160 191.60 191.61 191.60 5472.56 524288 80 310.97 311.00 310.98 6743.32 1048576 40 592.09 592.10 592.09 7083.80 2097152 20 1178.70 1178.70 1178.70 7116.80 4194304 10 2270.87 2314.61 2292.74 7248.39 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.76 4.77 4.76 0.00 1 1000 5.39 5.39 5.39 0.74 2 1000 5.20 5.20 5.20 1.54 4 1000 5.30 5.31 5.31 3.01 8 1000 5.28 5.28 5.28 6.06 16 1000 5.38 5.38 5.38 11.89 32 1000 5.50 5.51 5.51 23.24 64 1000 5.74 5.74 5.74 44.61 128 1000 5.71 5.71 5.71 89.60 256 1000 5.94 5.94 5.94 172.32 512 1000 7.14 7.15 7.15 286.52 1024 1000 7.22 7.22 7.22 567.49 2048 1000 8.09 8.09 8.09 1012.47 4096 1000 9.18 9.18 9.18 1784.50 8192 1000 12.42 12.43 12.42 2636.90 16384 1000 19.01 19.01 19.01 3447.25 32768 1000 27.32 27.33 27.33 4795.41 65536 640 50.95 51.00 50.97 5140.31 131072 320 97.10 97.29 97.19 5389.12 262144 160 208.86 209.08 208.97 5015.30 524288 80 458.69 458.70 458.69 4571.99 1048576 40 743.38 748.47 746.25 5603.81 2097152 20 1313.23 1340.46 1329.05 6258.01 4194304 10 2744.73 3011.61 2918.20 5570.85 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.88 4.88 4.88 0.00 1 1000 5.19 5.19 5.19 0.77 2 1000 5.23 5.24 5.23 1.53 4 1000 5.26 5.27 5.27 3.04 8 1000 5.25 5.26 5.26 6.09 16 1000 5.37 5.38 5.38 11.90 32 1000 5.45 5.46 5.45 23.45 64 1000 5.71 5.72 5.72 44.77 128 1000 5.67 5.67 5.67 90.27 256 1000 5.94 5.95 5.94 172.19 512 1000 7.23 7.24 7.23 283.06 1024 1000 7.46 7.47 7.46 548.63 2048 1000 8.00 8.00 8.00 1023.55 4096 1000 9.29 9.30 9.29 1762.31 8192 1000 14.79 14.80 14.79 2214.60 16384 1000 21.61 21.62 21.61 3031.65 32768 1000 35.83 35.85 35.85 3655.68 65536 640 66.28 66.39 66.34 3948.74 131072 320 131.88 132.20 132.02 3965.86 262144 160 304.02 305.07 304.48 3437.13 524288 80 652.16 660.92 656.85 3173.07 1048576 40 1269.52 1300.50 1285.37 3225.14 2097152 20 2217.26 2340.77 2299.95 3583.70 4194304 10 3830.56 4245.52 3972.45 3951.75 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 2.02 2.06 2.04 4 1000 2.18 2.26 2.22 8 1000 2.10 2.18 2.14 16 1000 2.15 2.22 2.18 32 1000 2.25 2.30 2.27 64 1000 2.46 2.81 2.63 128 1000 2.53 2.82 2.67 256 1000 3.10 3.58 3.34 512 1000 3.36 3.85 3.60 1024 1000 3.73 4.21 3.97 2048 1000 4.18 4.65 4.42 4096 1000 76.05 76.83 76.44 8192 1000 79.22 79.84 79.53 16384 1000 39.70 40.09 39.90 32768 1000 47.72 48.05 47.88 65536 640 60.50 62.34 61.42 131072 320 87.84 89.34 88.59 262144 160 146.04 152.28 149.16 524288 80 348.40 351.99 350.20 1048576 40 585.26 585.98 585.62 2097152 20 1681.19 1686.11 1683.65 4194304 10 3240.62 3249.33 3244.97 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.47 0.53 0.49 4 1000 2.29 2.35 2.32 8 1000 2.69 2.75 2.72 16 1000 2.63 2.67 2.65 32 1000 2.60 2.63 2.62 64 1000 3.12 3.86 3.38 128 1000 3.42 4.28 3.76 256 1000 3.96 4.60 4.15 512 1000 151.50 159.15 156.37 1024 1000 32.21 47.37 40.07 2048 1000 32.76 48.30 40.83 4096 1000 164.11 169.82 167.18 8192 1000 177.04 179.30 178.11 16384 1000 77.68 79.11 78.49 32768 1000 90.26 91.46 91.03 65536 640 114.60 116.53 115.44 131072 320 179.40 181.07 179.99 262144 160 262.82 271.92 267.66 524288 80 540.62 553.52 545.24 1048576 40 1151.70 1163.49 1156.31 2097152 20 2027.83 2098.34 2061.12 4194304 10 3767.16 3858.51 3810.55 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.73 0.87 0.77 4 1000 2.78 2.96 2.86 8 1000 3.29 3.71 3.44 16 1000 2.80 2.95 2.87 32 1000 2.79 2.92 2.85 64 1000 3.84 5.11 4.24 128 1000 207.20 214.26 211.37 256 1000 206.37 213.45 209.79 512 1000 250.70 257.77 254.38 1024 1000 253.87 260.91 258.21 2048 1000 268.36 275.30 272.08 4096 1000 282.17 289.37 284.58 8192 1000 292.62 300.64 296.54 16384 1000 125.13 128.09 127.24 32768 1000 145.99 149.76 148.51 65536 640 185.35 191.18 188.88 131072 320 366.35 377.75 374.32 262144 160 485.60 499.42 491.52 524288 80 833.77 860.94 849.14 1048576 40 1628.60 1682.59 1658.62 2097152 20 3305.31 3402.70 3359.18 4194304 10 6410.50 6662.48 6567.08 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.74 0.86 0.80 4 1000 2.25 3.20 2.72 8 1000 2.29 3.28 2.78 16 1000 2.36 3.33 2.85 32 1000 2.46 3.43 2.94 64 1000 2.53 3.60 3.06 128 1000 2.68 3.84 3.26 256 1000 2.85 3.91 3.38 512 1000 2.92 4.02 3.47 1024 1000 3.12 4.35 3.74 2048 1000 3.30 4.51 3.90 4096 1000 3.79 5.01 4.40 8192 1000 5.06 6.52 5.79 16384 1000 7.99 10.42 9.20 32768 1000 12.26 53.99 33.13 65536 640 18.99 90.19 54.59 131072 320 32.74 177.99 105.37 262144 160 60.86 335.09 197.98 524288 80 114.52 739.94 427.23 1048576 40 257.01 1460.92 858.96 2097152 20 610.54 2895.32 1752.93 4194304 10 1704.91 5789.20 3747.05 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.70 0.81 0.73 4 1000 2.23 3.11 2.45 8 1000 2.33 3.20 2.55 16 1000 2.31 3.23 2.54 32 1000 2.60 3.61 2.86 64 1000 2.83 3.85 3.09 128 1000 3.08 4.18 3.36 256 1000 3.42 4.51 3.74 512 1000 3.60 4.65 3.91 1024 1000 3.87 5.14 4.24 2048 1000 4.26 5.36 4.58 4096 1000 5.20 6.51 5.58 8192 1000 7.72 9.21 8.14 16384 1000 12.25 14.50 12.85 32768 1000 55.94 84.56 67.52 65536 640 71.17 107.03 85.92 131072 320 32.64 245.91 182.62 262144 160 61.07 436.64 333.45 524288 80 120.32 905.23 696.19 1048576 40 265.74 1770.27 1381.12 2097152 20 616.16 3511.82 2775.89 4194304 10 1973.92 6935.03 5685.39 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.74 0.86 0.76 4 1000 2.51 3.39 2.63 8 1000 2.75 3.63 2.86 16 1000 2.68 3.55 2.79 32 1000 2.56 3.45 2.68 64 1000 3.25 4.15 3.37 128 1000 3.19 4.11 3.43 256 1000 3.22 4.04 3.44 512 1000 3.75 4.69 4.00 1024 1000 4.07 5.08 4.31 2048 1000 5.39 53.66 20.74 4096 1000 5.82 53.65 20.85 8192 1000 7.72 61.50 23.86 16384 1000 65.64 106.17 77.94 32768 1000 75.38 118.83 88.11 65536 640 97.47 146.57 110.88 131072 320 32.89 305.09 234.40 262144 160 61.29 505.24 410.89 524288 80 119.63 956.21 810.76 1048576 40 271.51 1844.13 1603.51 2097152 20 621.43 3566.46 3155.66 4194304 10 1900.48 7017.66 6341.51 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.30 0.30 0.30 4 1000 0.37 0.39 0.38 8 1000 0.38 0.40 0.39 16 1000 0.38 0.41 0.40 32 1000 0.36 0.39 0.37 64 1000 0.37 0.40 0.38 128 1000 0.39 0.43 0.41 256 1000 0.42 0.46 0.44 512 1000 0.49 0.50 0.50 1024 1000 0.53 0.54 0.54 2048 1000 0.70 0.71 0.70 4096 1000 1.02 1.07 1.05 8192 1000 1.67 1.72 1.69 16384 1000 4.16 4.25 4.20 32768 1000 7.67 7.69 7.68 65536 640 14.50 15.75 15.13 131072 320 28.79 28.80 28.79 262144 160 57.24 58.71 57.97 524288 80 136.54 137.97 137.25 1048576 40 301.45 350.41 325.93 2097152 20 586.92 587.84 587.38 4194304 10 1158.31 1164.66 1161.49 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.29 0.29 0.29 4 1000 0.35 0.38 0.36 8 1000 0.33 0.34 0.34 16 1000 0.33 0.34 0.33 32 1000 0.36 0.38 0.37 64 1000 0.37 0.38 0.38 128 1000 0.39 0.42 0.41 256 1000 0.40 0.41 0.41 512 1000 0.45 0.48 0.46 1024 1000 0.53 0.56 0.54 2048 1000 0.70 0.71 0.70 4096 1000 1.02 1.05 1.02 8192 1000 1.68 1.72 1.70 16384 1000 4.41 4.51 4.47 32768 1000 8.25 8.27 8.26 65536 640 14.15 15.63 14.95 131072 320 28.82 28.89 28.87 262144 160 57.04 58.17 57.61 524288 80 136.19 137.76 137.04 1048576 40 303.01 305.42 304.23 2097152 20 571.83 592.90 582.60 4194304 10 1124.90 1180.24 1153.88 #---------------------------------------------------------------- # Benchmarking Reduce_local # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.29 0.31 0.30 4 1000 0.37 0.39 0.38 8 1000 0.36 0.36 0.36 16 1000 0.37 0.37 0.37 32 1000 0.36 0.38 0.36 64 1000 0.37 0.39 0.38 128 1000 0.40 0.40 0.40 256 1000 0.41 0.42 0.42 512 1000 0.46 0.50 0.47 1024 1000 0.54 0.55 0.54 2048 1000 0.69 0.70 0.69 4096 1000 1.01 1.03 1.02 8192 1000 1.67 1.69 1.67 16384 1000 4.06 4.18 4.12 32768 1000 7.62 7.64 7.63 65536 640 10.79 11.85 10.99 131072 320 21.31 21.48 21.36 262144 160 56.28 57.64 56.76 524288 80 135.19 139.45 137.60 1048576 40 355.35 362.40 358.82 2097152 20 697.54 731.49 716.90 4194304 10 1397.73 1447.39 1425.58 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.09 1.21 1.15 4 1000 22.12 22.46 22.29 8 1000 21.75 22.05 21.90 16 1000 22.17 22.39 22.28 32 1000 22.31 22.72 22.52 64 1000 22.58 22.93 22.75 128 1000 23.17 23.27 23.22 256 1000 22.69 23.23 22.96 512 1000 23.53 24.03 23.78 1024 1000 23.88 24.28 24.08 2048 1000 24.70 24.96 24.83 4096 1000 26.32 26.58 26.45 8192 1000 29.39 29.58 29.49 16384 1000 35.33 35.43 35.38 32768 1000 46.00 46.87 46.44 65536 640 66.82 67.67 67.24 131072 320 165.39 171.94 168.67 262144 160 270.28 273.02 271.65 524288 80 498.21 499.26 498.73 1048576 40 908.69 913.08 910.89 2097152 20 1701.42 1707.88 1704.65 4194304 10 3249.63 3258.28 3253.96 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.14 1.30 1.18 4 1000 35.35 35.87 35.61 8 1000 35.44 35.91 35.66 16 1000 35.69 36.43 36.01 32 1000 35.92 36.47 36.18 64 1000 36.21 37.13 36.57 128 1000 36.44 37.14 36.71 256 1000 38.15 38.67 38.34 512 1000 39.04 39.60 39.27 1024 1000 40.12 40.93 40.46 2048 1000 42.49 43.39 42.85 4096 1000 47.98 48.96 48.34 8192 1000 56.43 56.97 56.58 16384 1000 71.65 72.25 71.92 32768 1000 101.01 104.64 102.30 65536 640 198.33 204.06 200.55 131072 320 312.91 319.18 315.89 262144 160 747.62 753.23 751.32 524288 80 1484.09 1513.66 1504.90 1048576 40 2796.45 2801.16 2799.32 2097152 20 5109.68 5125.99 5117.75 4194304 10 9506.36 9578.26 9559.13 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.17 1.40 1.21 4 1000 48.17 49.20 48.68 8 1000 48.79 50.41 49.64 16 1000 49.20 50.17 49.67 32 1000 49.29 50.61 49.84 64 1000 49.72 51.49 50.36 128 1000 51.29 52.56 51.93 256 1000 52.75 53.95 53.44 512 1000 54.76 55.70 55.40 1024 1000 57.38 58.27 57.86 2048 1000 65.65 66.75 66.40 4096 1000 78.37 80.17 79.54 8192 1000 101.28 103.85 103.19 16384 1000 148.91 150.70 149.97 32768 1000 263.50 271.42 268.42 65536 640 437.69 446.66 441.82 131072 320 858.08 878.59 867.62 262144 160 1906.79 1941.60 1928.32 524288 80 4260.19 4436.36 4361.60 1048576 40 8554.05 8723.99 8647.06 2097152 20 15747.79 16164.52 16010.65 4194304 10 31282.39 31580.18 31482.12 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.70 0.80 0.75 4 1000 20.73 21.38 21.06 8 1000 21.24 21.83 21.54 16 1000 21.18 21.70 21.44 32 1000 21.42 22.02 21.72 64 1000 21.83 22.16 22.00 128 1000 21.57 22.26 21.92 256 1000 21.74 22.41 22.07 512 1000 23.09 23.35 23.22 1024 1000 23.42 23.61 23.52 2048 1000 23.91 24.23 24.07 4096 1000 25.49 25.81 25.65 8192 1000 28.73 28.81 28.77 16384 1000 34.57 34.62 34.60 32768 1000 45.09 45.86 45.48 65536 640 66.24 66.96 66.60 131072 320 163.00 171.84 167.42 262144 160 270.97 271.43 271.20 524288 80 499.10 501.92 500.51 1048576 40 907.10 908.92 908.01 2097152 20 1698.83 1703.39 1701.11 4194304 10 3261.64 3265.35 3263.50 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.66 0.80 0.70 4 1000 34.64 35.03 34.78 8 1000 34.47 34.82 34.66 16 1000 35.22 35.64 35.40 32 1000 35.28 35.57 35.44 64 1000 35.72 36.04 35.91 128 1000 35.79 36.08 35.99 256 1000 37.46 37.73 37.57 512 1000 38.36 38.61 38.48 1024 1000 39.27 39.66 39.44 2048 1000 41.69 41.93 41.81 4096 1000 47.39 48.14 47.68 8192 1000 55.34 55.78 55.56 16384 1000 70.45 71.03 70.71 32768 1000 101.17 102.29 101.52 65536 640 196.71 200.49 198.36 131072 320 311.16 316.59 313.45 262144 160 742.88 746.56 744.91 524288 80 1510.47 1519.56 1515.61 1048576 40 2801.18 2803.64 2802.60 2097152 20 5135.17 5147.99 5140.27 4194304 10 9531.10 9595.75 9569.88 #---------------------------------------------------------------- # Benchmarking Reduce_scatter_block # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.67 0.79 0.69 4 1000 47.12 47.96 47.63 8 1000 48.52 49.15 48.75 16 1000 48.62 49.32 49.03 32 1000 48.78 49.52 49.26 64 1000 49.20 49.85 49.59 128 1000 50.70 51.42 51.14 256 1000 52.57 53.32 53.06 512 1000 53.84 54.67 54.31 1024 1000 56.57 57.35 57.02 2048 1000 64.88 66.03 65.49 4096 1000 77.36 79.31 78.53 8192 1000 100.82 103.21 102.51 16384 1000 148.99 150.93 149.84 32768 1000 259.25 268.34 264.78 65536 640 432.24 442.37 438.20 131072 320 864.58 883.32 873.58 262144 160 1913.36 1949.90 1935.06 524288 80 4296.21 4450.34 4376.06 1048576 40 8584.26 8749.47 8671.75 2097152 20 15978.76 16356.10 16221.07 4194304 10 30434.90 30860.20 30742.43 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 18.79 19.26 19.02 1 1000 18.06 18.72 18.39 2 1000 17.10 17.99 17.54 4 1000 18.12 19.12 18.62 8 1000 18.09 18.86 18.48 16 1000 18.00 19.05 18.52 32 1000 18.58 18.88 18.73 64 1000 18.54 18.88 18.71 128 1000 18.62 19.12 18.87 256 1000 17.74 18.26 18.00 512 1000 19.77 20.36 20.06 1024 1000 19.73 20.36 20.04 2048 1000 20.37 20.71 20.54 4096 1000 21.19 21.55 21.37 8192 1000 22.95 23.38 23.17 16384 1000 25.42 25.99 25.70 32768 1000 31.55 32.98 32.26 65536 640 43.84 44.80 44.32 131072 320 68.66 70.22 69.44 262144 160 192.87 193.06 192.96 524288 80 334.34 334.67 334.51 1048576 40 605.17 606.25 605.71 2097152 20 1103.95 1105.92 1104.93 4194304 10 2113.14 2113.86 2113.50 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.70 0.80 0.73 1 1000 30.07 30.55 30.21 2 1000 30.05 30.51 30.19 4 1000 30.10 30.60 30.27 8 1000 30.09 30.72 30.27 16 1000 30.69 31.27 30.91 32 1000 30.39 30.93 30.58 64 1000 30.68 31.40 30.92 128 1000 30.87 31.20 30.98 256 1000 32.50 32.96 32.69 512 1000 33.66 33.94 33.79 1024 1000 34.02 34.15 34.07 2048 1000 35.26 35.56 35.36 4096 1000 39.00 39.29 39.09 8192 1000 44.81 45.22 45.02 16384 1000 57.45 59.09 58.12 32768 1000 89.01 89.40 89.14 65536 640 119.31 121.04 120.21 131072 320 228.93 252.16 240.27 262144 160 497.51 510.61 505.87 524288 80 922.61 926.89 925.00 1048576 40 1707.50 1711.50 1709.51 2097152 20 3185.86 3195.43 3188.60 4194304 10 5863.59 5980.77 5921.61 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.70 0.80 0.72 1 1000 41.95 42.78 42.54 2 1000 41.64 42.88 42.41 4 1000 41.87 43.01 42.58 8 1000 42.79 43.62 43.29 16 1000 42.70 43.85 43.49 32 1000 42.33 43.53 43.04 64 1000 42.65 43.79 43.27 128 1000 44.60 46.11 45.38 256 1000 46.05 46.78 46.55 512 1000 47.55 48.40 48.10 1024 1000 48.84 49.44 49.20 2048 1000 55.36 56.00 55.70 4096 1000 64.94 65.85 65.56 8192 1000 83.20 84.96 84.04 16384 1000 161.92 166.65 164.56 32768 1000 230.05 234.84 233.13 65536 640 329.76 335.86 332.44 131072 320 691.69 754.57 735.27 262144 160 1468.33 1528.98 1508.33 524288 80 2880.91 2956.30 2921.56 1048576 40 5455.49 5642.77 5562.60 2097152 20 8433.56 8662.35 8555.96 4194304 10 16004.19 16135.87 16069.83 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.83 0.90 0.87 1 1000 18.74 19.10 18.92 2 1000 18.85 19.34 19.10 4 1000 18.19 18.61 18.40 8 1000 18.92 19.43 19.17 16 1000 18.83 19.35 19.09 32 1000 19.00 19.59 19.29 64 1000 18.19 18.93 18.56 128 1000 19.21 19.87 19.54 256 1000 19.36 20.08 19.72 512 1000 20.06 20.75 20.40 1024 1000 20.37 21.06 20.71 2048 1000 20.80 21.53 21.17 4096 1000 21.54 22.20 21.87 8192 1000 23.49 24.13 23.81 16384 1000 26.11 26.96 26.54 32768 1000 32.41 33.75 33.08 65536 640 44.83 45.83 45.33 131072 320 71.53 73.00 72.26 262144 160 193.58 193.60 193.59 524288 80 339.47 339.69 339.58 1048576 40 623.78 626.22 625.00 2097152 20 1143.04 1148.56 1145.80 4194304 10 2175.94 2176.69 2176.31 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.88 1.00 0.91 1 1000 31.40 31.88 31.60 2 1000 31.27 31.80 31.52 4 1000 31.21 31.88 31.48 8 1000 31.38 31.67 31.48 16 1000 42.27 42.79 42.47 32 1000 42.88 43.40 43.06 64 1000 43.08 43.54 43.25 128 1000 43.08 43.53 43.22 256 1000 43.10 43.51 43.25 512 1000 45.41 46.32 45.84 1024 1000 46.64 47.49 46.96 2048 1000 48.05 48.51 48.22 4096 1000 50.23 50.79 50.53 8192 1000 57.67 58.01 57.80 16384 1000 67.22 68.40 67.64 32768 1000 87.86 88.04 87.96 65536 640 126.21 127.17 126.49 131072 320 202.74 229.95 214.71 262144 160 497.80 526.17 511.74 524288 80 967.18 973.84 971.26 1048576 40 1780.34 1784.56 1781.91 2097152 20 3251.35 3267.22 3257.53 4194304 10 5899.34 6026.77 5971.83 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.96 1.07 0.99 1 1000 43.52 44.70 44.19 2 1000 43.39 44.57 44.03 4 1000 43.41 44.45 44.04 8 1000 44.08 45.28 44.71 16 1000 44.03 45.44 44.91 32 1000 44.31 45.26 44.89 64 1000 44.28 45.36 45.08 128 1000 46.50 47.41 47.06 256 1000 47.75 48.85 48.44 512 1000 49.90 50.57 50.32 1024 1000 51.50 52.21 52.00 2048 1000 58.38 59.39 59.10 4096 1000 68.68 70.12 69.66 8192 1000 88.32 90.26 89.51 16384 1000 165.34 169.18 167.25 32768 1000 232.38 236.43 234.74 65536 640 341.93 351.07 346.21 131072 320 735.41 789.45 767.30 262144 160 1523.00 1595.32 1557.38 524288 80 2804.90 2951.35 2896.27 1048576 40 5567.70 5786.65 5701.73 2097152 20 10613.54 10961.59 10841.28 4194304 10 20466.91 21042.40 20799.41 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 0.86 0.78 1 1000 1.24 17.22 9.23 2 1000 0.83 17.26 9.04 4 1000 0.90 18.36 9.63 8 1000 1.26 17.76 9.51 16 1000 0.92 18.35 9.63 32 1000 0.93 18.32 9.62 64 1000 1.20 17.66 9.43 128 1000 0.94 18.57 9.75 256 1000 0.94 18.56 9.75 512 1000 1.37 19.05 10.21 1024 1000 1.56 18.40 9.98 2048 1000 1.67 18.98 10.32 4096 1000 1.80 19.12 10.46 8192 1000 2.70 20.30 11.50 16384 1000 4.04 21.72 12.88 32768 1000 6.74 24.69 15.71 65536 640 12.29 32.73 22.51 131072 320 25.81 51.31 38.56 262144 160 48.91 124.00 86.45 524288 80 88.28 229.22 158.75 1048576 40 156.39 440.12 298.26 2097152 20 306.93 811.94 559.43 4194304 10 610.51 1567.39 1088.95 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 0.84 0.74 1 1000 2.43 27.41 12.54 2 1000 2.43 27.26 12.52 4 1000 2.48 27.63 12.67 8 1000 2.49 27.61 12.67 16 1000 3.15 38.27 11.94 32 1000 2.43 27.57 12.77 64 1000 3.17 38.42 12.00 128 1000 3.03 38.54 11.94 256 1000 3.08 38.63 11.99 512 1000 3.64 39.91 12.73 1024 1000 3.91 40.26 13.03 2048 1000 4.14 43.11 13.92 4096 1000 4.52 42.56 14.04 8192 1000 5.28 45.68 15.40 16384 1000 7.10 51.62 18.26 32768 1000 9.96 57.57 21.92 65536 640 16.19 75.14 31.02 131072 320 23.75 154.19 77.96 262144 160 36.71 331.18 167.99 524288 80 77.29 579.76 313.06 1048576 40 181.90 1049.65 601.20 2097152 20 335.43 1972.06 1150.16 4194304 10 651.32 3854.49 2269.44 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 0.81 0.73 1 1000 3.61 38.39 14.68 2 1000 3.56 38.40 14.72 4 1000 3.65 38.60 14.78 8 1000 3.64 38.68 14.78 16 1000 3.63 38.69 14.90 32 1000 3.59 38.57 14.96 64 1000 3.68 38.76 14.97 128 1000 3.67 39.61 15.20 256 1000 3.73 40.40 15.77 512 1000 4.49 41.68 16.70 1024 1000 4.69 84.93 15.05 2048 1000 4.91 86.36 15.37 4096 1000 5.48 88.79 16.20 8192 1000 7.83 97.02 19.13 16384 1000 7.49 107.53 20.14 32768 1000 16.64 123.56 30.26 65536 640 16.86 214.93 75.82 131072 320 31.77 413.89 140.59 262144 160 64.88 802.08 304.03 524288 80 154.65 1506.11 608.85 1048576 40 329.21 2894.97 1208.29 2097152 20 508.72 5591.49 2322.73 4194304 10 882.87 10912.62 4503.23 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.73 1.36 1.05 1 1000 18.64 24.23 21.44 2 1000 18.75 24.44 21.60 4 1000 18.82 24.47 21.65 8 1000 18.75 24.39 21.57 16 1000 18.70 24.39 21.54 32 1000 18.68 24.23 21.45 64 1000 18.99 24.57 21.78 128 1000 19.33 24.45 21.89 256 1000 19.24 24.39 21.81 512 1000 19.76 25.17 22.47 1024 1000 19.73 25.28 22.50 2048 1000 20.08 25.43 22.76 4096 1000 20.76 25.88 23.32 8192 1000 21.92 26.89 24.41 16384 1000 23.55 28.42 25.98 32768 1000 27.02 31.67 29.34 65536 640 33.24 37.79 35.51 131072 320 51.77 54.13 52.95 262144 160 129.76 129.90 129.83 524288 80 243.75 248.01 245.88 1048576 40 450.51 451.98 451.25 2097152 20 823.38 823.46 823.42 4194304 10 1577.15 1577.29 1577.22 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.70 1.43 0.90 1 1000 30.74 40.53 35.67 2 1000 32.92 40.66 35.74 4 1000 31.82 40.77 35.82 8 1000 32.93 40.73 35.79 16 1000 31.81 40.69 35.76 32 1000 32.37 40.76 35.74 64 1000 33.09 40.82 35.80 128 1000 32.69 41.37 35.93 256 1000 32.00 41.37 35.89 512 1000 32.07 42.49 36.66 1024 1000 33.95 42.57 36.87 2048 1000 29.80 45.55 38.85 4096 1000 31.72 44.99 38.16 8192 1000 36.55 48.01 39.43 16384 1000 44.06 54.65 46.95 32768 1000 47.53 61.30 51.67 65536 640 58.40 78.23 63.82 131072 320 103.09 125.53 109.13 262144 160 214.02 295.20 240.90 524288 80 338.14 445.65 369.53 1048576 40 604.31 803.75 665.78 2097152 20 1149.29 1516.06 1250.66 4194304 10 2207.69 2963.14 2451.79 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.72 1.55 0.83 1 1000 49.20 84.16 57.47 2 1000 49.90 84.31 57.62 4 1000 49.18 84.40 57.60 8 1000 49.70 84.34 57.60 16 1000 50.33 84.42 57.68 32 1000 50.15 84.20 57.48 64 1000 52.34 85.15 58.44 128 1000 53.17 85.59 58.85 256 1000 54.10 85.63 58.98 512 1000 56.75 88.28 61.52 1024 1000 56.53 88.83 61.86 2048 1000 56.78 90.89 63.14 4096 1000 60.01 93.92 65.21 8192 1000 64.65 102.65 71.51 16384 1000 69.15 113.73 77.45 32768 1000 80.39 132.10 90.34 65536 640 123.94 210.34 146.66 131072 320 188.54 280.30 207.80 262144 160 329.30 591.30 404.96 524288 80 498.02 828.91 577.63 1048576 40 934.21 1592.06 1118.31 2097152 20 1690.33 3019.99 2098.63 4194304 10 3520.09 5953.93 4118.83 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.73 15.56 8.64 1 1000 1.99 16.48 9.23 2 1000 1.94 17.24 9.59 4 1000 1.88 16.79 9.34 8 1000 1.92 17.04 9.48 16 1000 1.88 16.89 9.38 32 1000 1.82 17.06 9.44 64 1000 1.77 16.05 8.91 128 1000 2.01 17.22 9.62 256 1000 1.98 17.04 9.51 512 1000 2.51 17.50 10.01 1024 1000 2.75 17.35 10.05 2048 1000 2.66 16.53 9.60 4096 1000 3.06 17.84 10.45 8192 1000 4.36 18.18 11.27 16384 1000 6.69 19.65 13.17 32768 1000 9.99 21.23 15.61 65536 640 18.54 27.09 22.81 131072 320 32.59 35.95 34.27 262144 160 75.15 109.23 92.19 524288 80 209.13 234.23 221.68 1048576 40 403.50 426.84 415.17 2097152 20 785.40 806.39 795.90 4194304 10 1551.37 1567.81 1559.59 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 0.83 0.75 1 1000 2.83 16.44 13.00 2 1000 2.77 17.03 13.09 4 1000 2.80 16.46 13.01 8 1000 2.70 16.57 13.05 16 1000 2.38 14.35 11.33 32 1000 2.65 16.77 13.19 64 1000 2.71 16.69 13.16 128 1000 3.05 16.76 13.31 256 1000 3.06 16.55 13.16 512 1000 4.31 17.25 13.95 1024 1000 4.53 17.11 13.87 2048 1000 4.97 17.40 14.23 4096 1000 5.82 18.12 15.01 8192 1000 9.32 18.35 16.01 16384 1000 14.42 19.36 18.05 32768 1000 20.30 31.88 26.19 65536 640 35.99 54.86 44.66 131072 320 53.21 95.65 75.87 262144 160 138.91 249.27 198.31 524288 80 232.62 438.01 355.30 1048576 40 422.03 817.10 662.87 2097152 20 801.58 1594.20 1290.60 4194304 10 1561.93 3121.92 2534.06 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 4.39 17.98 16.09 1 1000 4.00 15.32 13.72 2 1000 3.79 14.61 13.07 4 1000 3.85 14.63 13.10 8 1000 4.35 16.51 14.81 16 1000 4.41 16.90 15.13 32 1000 4.48 16.65 14.94 64 1000 4.70 16.88 15.18 128 1000 4.96 16.63 15.01 256 1000 4.54 14.86 13.43 512 1000 7.09 17.46 16.01 1024 1000 7.91 17.62 16.22 2048 1000 8.69 18.16 16.65 4096 1000 10.39 18.77 17.20 8192 1000 17.07 26.80 18.56 16384 1000 20.07 39.15 27.58 32768 1000 19.91 59.71 40.05 65536 640 30.83 103.10 70.40 131072 320 67.91 215.76 149.87 262144 160 142.19 435.24 303.93 524288 80 233.42 830.93 566.84 1048576 40 420.48 1600.57 1082.90 2097152 20 804.13 3154.62 2122.75 4194304 10 1531.64 6225.02 4165.79 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 2.84 16.15 9.49 1 1000 2.74 14.85 8.79 2 1000 3.12 16.80 9.96 4 1000 3.16 16.82 9.99 8 1000 3.15 16.64 9.90 16 1000 3.20 16.80 10.00 32 1000 3.00 17.11 10.05 64 1000 3.01 16.91 9.96 128 1000 3.21 16.97 10.09 256 1000 3.15 17.05 10.10 512 1000 3.71 17.09 10.40 1024 1000 3.87 17.24 10.55 2048 1000 4.08 17.46 10.77 4096 1000 4.26 17.49 10.87 8192 1000 5.77 17.18 11.48 16384 1000 8.18 19.57 13.87 32768 1000 11.44 21.18 16.31 65536 640 20.19 27.19 23.69 131072 320 38.90 40.82 39.86 262144 160 76.13 96.34 86.24 524288 80 146.32 216.41 181.37 1048576 40 239.89 407.76 323.83 2097152 20 444.57 795.83 620.20 4194304 10 839.39 1562.02 1200.70 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 3.64 16.17 13.00 1 1000 3.97 16.90 13.64 2 1000 3.70 15.93 12.84 4 1000 3.93 16.53 13.35 8 1000 3.93 16.75 13.52 16 1000 3.93 16.82 13.52 32 1000 3.41 14.81 11.93 64 1000 4.02 16.78 13.54 128 1000 4.07 16.96 13.68 256 1000 4.17 17.12 13.84 512 1000 5.47 17.27 14.24 1024 1000 5.68 17.53 14.47 2048 1000 5.59 16.13 13.43 4096 1000 7.10 18.00 15.18 8192 1000 10.37 18.45 16.37 16384 1000 14.95 18.83 17.83 32768 1000 20.93 30.65 24.22 65536 640 25.15 55.20 42.30 131072 320 45.28 86.41 69.11 262144 160 97.59 208.74 161.58 524288 80 139.72 412.92 284.42 1048576 40 240.73 801.65 530.49 2097152 20 438.79 1577.31 1020.14 4194304 10 836.28 3107.48 1991.20 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 4.71 33.75 22.64 1 1000 4.82 34.13 22.92 2 1000 4.75 34.07 22.83 4 1000 4.76 34.07 22.90 8 1000 4.85 34.21 22.92 16 1000 4.88 34.18 22.96 32 1000 4.97 38.22 22.90 64 1000 5.12 41.89 23.31 128 1000 5.91 43.09 24.34 256 1000 6.02 43.29 24.47 512 1000 6.98 44.02 27.58 1024 1000 7.62 44.27 27.84 2048 1000 9.42 44.56 28.52 4096 1000 11.76 44.85 29.36 8192 1000 18.34 55.01 33.87 16384 1000 27.40 66.25 43.64 32768 1000 43.01 83.39 62.23 65536 640 81.54 148.49 114.53 131072 320 152.57 273.85 217.34 262144 160 281.22 482.95 389.25 524288 80 551.90 896.73 747.72 1048576 40 1073.43 1744.71 1436.50 2097152 20 2073.49 3499.76 2803.18 4194304 10 4404.29 7249.57 5881.38 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.75 0.83 0.79 1 1000 17.65 18.66 18.15 2 1000 17.79 18.61 18.20 4 1000 16.91 17.88 17.39 8 1000 17.97 18.97 18.47 16 1000 17.79 18.80 18.30 32 1000 18.22 18.90 18.56 64 1000 18.14 18.81 18.48 128 1000 18.30 19.11 18.71 256 1000 18.46 19.13 18.80 512 1000 18.47 19.28 18.88 1024 1000 18.81 21.41 20.11 2048 1000 19.33 20.01 19.67 4096 1000 20.19 20.87 20.53 8192 1000 22.20 23.16 22.68 16384 1000 25.06 26.15 25.60 32768 1000 31.41 32.72 32.07 65536 640 43.73 48.02 45.88 131072 320 70.63 74.82 72.73 262144 160 213.07 215.60 214.33 524288 80 356.15 357.38 356.76 1048576 40 619.72 623.20 621.46 2097152 20 1142.93 1149.86 1146.40 4194304 10 2164.52 2169.02 2166.77 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.74 0.86 0.77 1 1000 41.22 41.94 41.54 2 1000 41.23 42.06 41.52 4 1000 41.06 42.12 41.47 8 1000 41.00 42.02 41.41 16 1000 41.47 42.30 41.74 32 1000 42.11 42.45 42.27 64 1000 42.44 42.90 42.69 128 1000 42.49 43.09 42.80 256 1000 42.72 43.28 43.05 512 1000 45.27 45.93 45.59 1024 1000 46.11 46.59 46.44 2048 1000 47.58 47.81 47.73 4096 1000 49.94 50.31 50.16 8192 1000 57.86 58.08 57.98 16384 1000 67.30 68.03 67.52 32768 1000 106.92 108.28 107.43 65536 640 164.91 168.99 166.75 131072 320 333.01 336.19 334.54 262144 160 633.11 676.66 660.69 524288 80 918.55 941.83 930.98 1048576 40 1610.72 1643.49 1632.52 2097152 20 3079.64 3092.88 3088.33 4194304 10 5721.47 5818.46 5787.51 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 0.82 0.74 1 1000 86.46 89.19 88.46 2 1000 86.69 88.40 88.01 4 1000 86.55 88.84 88.27 8 1000 86.75 88.71 88.24 16 1000 86.82 89.20 88.64 32 1000 89.15 92.15 91.43 64 1000 89.04 92.24 91.22 128 1000 90.14 92.88 92.09 256 1000 90.84 93.04 92.22 512 1000 96.22 98.10 97.48 1024 1000 98.45 100.02 99.42 2048 1000 101.49 102.80 102.39 4096 1000 106.23 108.18 107.42 8192 1000 132.22 137.55 136.40 16384 1000 167.15 171.45 170.19 32768 1000 233.81 237.73 236.37 65536 640 443.86 456.79 450.25 131072 320 760.18 807.93 786.46 262144 160 1588.50 1641.48 1609.69 524288 80 2383.31 2575.10 2486.62 1048576 40 4450.22 4901.94 4684.76 2097152 20 8160.74 9257.70 8792.94 4194304 10 17082.78 19267.63 17846.83 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.50 1.58 1.54 1 1000 30.50 31.09 30.80 2 1000 30.34 31.10 30.72 4 1000 30.32 31.05 30.68 8 1000 30.33 31.16 30.74 16 1000 30.56 31.25 30.90 32 1000 30.82 31.53 31.17 64 1000 30.68 31.38 31.03 128 1000 30.84 31.59 31.22 256 1000 31.04 31.83 31.43 512 1000 32.70 33.25 32.97 1024 1000 32.91 33.35 33.13 2048 1000 33.60 34.09 33.84 4096 1000 34.67 35.11 34.89 8192 1000 39.21 40.01 39.61 16384 1000 44.65 45.48 45.07 32768 1000 54.87 57.12 55.99 65536 640 76.03 77.62 76.83 131072 320 119.57 119.61 119.59 262144 160 281.90 285.01 283.45 524288 80 401.86 406.05 403.95 1048576 40 704.66 712.81 708.74 2097152 20 1280.56 1295.47 1288.02 4194304 10 2382.78 2479.93 2431.35 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.69 1.87 1.74 1 1000 52.23 52.94 52.51 2 1000 52.25 52.79 52.48 4 1000 52.33 53.04 52.65 8 1000 52.37 52.87 52.60 16 1000 52.68 53.23 52.93 32 1000 53.08 53.57 53.30 64 1000 53.21 53.68 53.46 128 1000 53.32 53.64 53.53 256 1000 53.66 53.83 53.73 512 1000 55.86 56.42 56.14 1024 1000 56.66 59.09 57.34 2048 1000 57.83 58.23 58.04 4096 1000 59.90 60.34 60.16 8192 1000 70.41 71.33 71.04 16384 1000 83.18 85.11 84.09 32768 1000 108.34 109.64 108.85 65536 640 165.24 171.82 167.81 131072 320 336.07 338.95 337.31 262144 160 639.14 679.73 663.91 524288 80 934.67 949.17 942.42 1048576 40 1615.62 1644.06 1635.18 2097152 20 3069.38 3084.43 3080.31 4194304 10 5754.36 5824.62 5802.70 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 2.20 2.33 2.24 1 1000 93.55 97.29 96.14 2 1000 93.70 97.30 96.15 4 1000 93.84 97.17 96.23 8 1000 93.55 97.15 96.22 16 1000 94.19 98.03 96.96 32 1000 94.72 98.88 97.53 64 1000 94.82 98.66 97.66 128 1000 95.57 99.05 98.01 256 1000 95.75 99.30 98.27 512 1000 100.06 103.94 102.58 1024 1000 101.41 105.11 103.77 2048 1000 104.11 107.82 106.24 4096 1000 108.68 111.78 110.69 8192 1000 133.72 138.76 136.73 16384 1000 167.54 171.87 170.34 32768 1000 239.37 248.59 245.67 65536 640 464.91 496.68 482.16 131072 320 763.34 814.68 791.21 262144 160 1590.79 1636.24 1607.37 524288 80 2387.35 2577.50 2489.71 1048576 40 4499.38 4913.57 4689.78 2097152 20 8106.30 9255.22 8779.44 4194304 10 15428.48 17625.50 16767.09 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.71 1.13 0.92 1 1000 1.85 1.87 1.86 2 1000 1.83 1.84 1.84 4 1000 2.09 2.15 2.12 8 1000 1.92 1.95 1.93 16 1000 1.91 1.92 1.91 32 1000 1.84 1.86 1.85 64 1000 2.13 2.52 2.32 128 1000 2.05 2.44 2.24 256 1000 2.33 2.90 2.62 512 1000 2.41 2.95 2.68 1024 1000 2.52 3.06 2.79 2048 1000 2.93 3.51 3.22 4096 1000 3.62 4.14 3.88 8192 1000 4.35 4.89 4.62 16384 1000 4.61 19.54 12.07 32768 1000 7.45 21.50 14.48 65536 640 37.83 48.32 43.08 131072 320 27.49 40.70 34.09 262144 160 59.67 311.78 185.72 524288 80 107.97 149.64 128.81 1048576 40 229.98 269.71 249.85 2097152 20 429.88 458.73 444.30 4194304 10 805.72 829.08 817.40 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.65 0.74 0.68 1 1000 1.94 2.03 1.99 2 1000 1.88 1.96 1.92 4 1000 1.84 1.95 1.90 8 1000 1.91 1.99 1.96 16 1000 1.90 1.97 1.94 32 1000 1.79 1.88 1.83 64 1000 2.19 2.92 2.45 128 1000 2.09 2.97 2.42 256 1000 2.38 3.04 2.59 512 1000 2.54 3.35 2.84 1024 1000 2.78 3.46 3.00 2048 1000 3.25 3.92 3.45 4096 1000 3.82 4.55 4.04 8192 1000 4.89 5.67 5.14 16384 1000 7.30 8.06 7.56 32768 1000 10.97 85.70 61.88 65536 640 21.53 22.44 21.81 131072 320 42.44 44.22 43.37 262144 160 84.58 86.28 85.51 524288 80 167.53 168.80 168.13 1048576 40 382.47 384.10 383.18 2097152 20 810.08 811.40 810.51 4194304 10 1634.01 1635.78 1634.64 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.93 1.15 0.99 1 1000 2.07 2.19 2.12 2 1000 2.05 2.15 2.09 4 1000 2.06 2.23 2.15 8 1000 2.06 2.22 2.13 16 1000 2.00 2.15 2.07 32 1000 1.98 2.15 2.06 64 1000 2.33 3.64 2.72 128 1000 2.48 3.80 2.95 256 1000 2.86 3.72 3.08 512 1000 2.94 3.79 3.15 1024 1000 3.09 4.07 3.37 2048 1000 3.57 4.64 3.85 4096 1000 4.18 5.15 4.38 8192 1000 5.24 6.33 5.58 16384 1000 11.87 52.24 45.34 32768 1000 12.59 14.31 13.30 65536 640 21.61 23.76 22.55 131072 320 41.88 45.43 43.82 262144 160 85.28 88.70 87.10 524288 80 174.19 176.04 174.70 1048576 40 510.58 521.67 513.93 2097152 20 1062.86 1075.99 1068.74 4194304 10 2129.20 2143.74 2136.51 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.54 1.54 1.54 #--------------------------------------------------- # Benchmarking Barrier # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.46 1.46 1.46 #--------------------------------------------------- # Benchmarking Barrier # #processes = 8 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.62 1.62 1.62 # All processes entering MPI_Finalize