#--------------------------------------------------- # Intel (R) MPI Benchmark Suite V3.2.4, MPI-1 part #--------------------------------------------------- # Date : Sun Sep 21 10:11:00 2014 # Machine : x86_64 # System : Linux # Release : 2.6.32-279.el6.x86_64 # Version : #1 SMP Thu Jun 21 07:08:44 CDT 2012 # MPI Version : 2.2 # MPI Thread Environment: # New default behavior from Version 3.2 on: # the number of iterations per message size is cut down # dynamically when a certain run time (per message size sample) # is expected to be exceeded. Time limit is defined by variable # "SECS_PER_SAMPLE" (=> IMB_settings.h) # or through the flag => -time # Calling sequence was: # IMB-MPI1 # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Gather # Gatherv # Scatter # Scatterv # Alltoall # Alltoallv # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 0.33 0.00 1 1000 0.33 2.86 2 1000 0.33 5.78 4 1000 0.34 11.24 8 1000 0.34 22.21 16 1000 0.35 43.21 32 1000 0.40 76.40 64 1000 0.39 155.10 128 1000 0.45 273.07 256 1000 0.51 479.63 512 1000 0.57 850.67 1024 1000 1.58 618.08 2048 1000 3.45 566.45 4096 1000 4.34 900.79 8192 1000 7.24 1078.39 16384 1000 8.62 1813.59 32768 1000 11.22 2785.57 65536 640 18.41 3394.00 131072 320 29.13 4290.46 262144 160 46.05 5428.90 524288 80 77.48 6453.15 1048576 40 353.56 2828.35 2097152 20 741.58 2696.95 4194304 10 1461.85 2736.26 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 0.55 0.00 1 1000 0.59 1.63 2 1000 0.60 3.17 4 1000 0.60 6.39 8 1000 0.60 12.80 16 1000 0.64 23.92 32 1000 0.60 50.69 64 1000 0.61 100.71 128 1000 0.62 196.55 256 1000 0.69 354.33 512 1000 0.76 641.60 1024 1000 0.89 1101.08 2048 1000 1.12 1751.55 4096 1000 1.56 2499.47 8192 1000 3.21 2433.21 16384 1000 4.60 3400.40 32768 1000 8.72 3584.14 65536 640 30.77 2031.19 131072 320 49.78 2511.30 262144 160 87.46 2858.37 524288 80 149.80 3337.82 1048576 40 740.02 1351.31 2097152 20 1466.20 1364.07 4194304 10 2918.98 1370.34 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 0.46 0.46 0.46 0.00 1 1000 0.50 0.50 0.50 3.78 2 1000 0.51 0.51 0.51 7.45 4 1000 0.51 0.51 0.51 15.02 8 1000 0.51 0.51 0.51 30.09 16 1000 0.51 0.51 0.51 59.95 32 1000 0.50 0.50 0.50 121.62 64 1000 0.50 0.50 0.50 241.74 128 1000 0.55 0.55 0.55 447.94 256 1000 0.62 0.62 0.62 791.34 512 1000 0.65 0.65 0.65 1509.21 1024 1000 0.76 0.76 0.76 2556.01 2048 1000 0.98 0.98 0.98 3977.66 4096 1000 1.45 1.45 1.45 5402.80 8192 1000 2.33 2.33 2.33 6703.08 16384 1000 4.09 4.09 4.09 7636.89 32768 1000 7.57 7.57 7.57 8254.16 65536 640 21.69 21.69 21.69 5762.40 131072 320 39.77 39.79 39.78 6283.37 262144 160 74.95 74.97 74.96 6669.40 524288 80 137.59 137.64 137.61 7265.54 1048576 40 736.77 738.92 737.85 2706.64 2097152 20 1460.55 1464.80 1462.67 2730.76 4194304 10 2907.30 2916.00 2911.65 2743.48 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 0.59 0.59 0.59 0.00 1 1000 0.59 0.59 0.59 3.23 2 1000 0.60 0.60 0.60 6.37 4 1000 0.59 0.59 0.59 12.83 8 1000 0.58 0.58 0.58 26.09 16 1000 0.58 0.58 0.58 52.78 32 1000 0.59 0.59 0.59 103.27 64 1000 0.58 0.58 0.58 209.41 128 1000 0.65 0.65 0.65 377.44 256 1000 0.66 0.66 0.66 743.11 512 1000 0.70 0.70 0.70 1389.42 1024 1000 0.84 0.84 0.84 2322.00 2048 1000 1.45 1.45 1.45 2695.62 4096 1000 1.50 1.50 1.50 5212.02 8192 1000 2.36 2.36 2.36 6621.14 16384 1000 4.18 4.18 4.18 7483.41 32768 1000 7.68 7.68 7.68 8138.09 65536 640 21.58 21.59 21.58 5790.44 131072 320 39.67 39.70 39.68 6297.75 262144 160 74.47 74.59 74.53 6703.51 524288 80 139.26 139.41 139.34 7172.97 1048576 40 756.50 759.38 757.96 2633.74 2097152 20 1503.90 1508.70 1506.17 2651.29 4194304 10 3073.29 3083.49 3078.39 2594.46 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 0.77 0.78 0.78 0.00 1 1000 0.66 0.66 0.66 2.87 2 1000 0.62 0.62 0.62 6.12 4 1000 0.62 0.62 0.62 12.21 8 1000 0.60 0.60 0.60 25.64 16 1000 0.65 0.65 0.65 46.89 32 1000 0.63 0.63 0.63 97.01 64 1000 0.63 0.63 0.63 194.09 128 1000 0.68 0.68 0.68 359.05 256 1000 0.74 0.74 0.74 663.43 512 1000 0.77 0.77 0.77 1271.26 1024 1000 0.84 0.84 0.84 2319.37 2048 1000 1.42 1.42 1.42 2755.00 4096 1000 1.59 1.59 1.59 4910.53 8192 1000 2.50 2.50 2.50 6252.24 16384 1000 4.65 4.65 4.65 6714.41 32768 1000 8.80 8.80 8.80 7099.17 65536 640 21.37 21.40 21.39 5839.82 131072 320 40.56 40.62 40.59 6154.41 262144 160 76.70 76.86 76.81 6505.19 524288 80 148.50 148.84 148.67 6718.81 1048576 40 834.82 842.57 838.45 2373.69 2097152 20 1801.69 1813.20 1807.44 2206.05 4194304 10 4017.81 4331.90 4219.31 1846.77 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.21 1.21 1.21 0.00 1 1000 1.28 1.29 1.29 1.48 2 1000 1.28 1.29 1.29 2.95 4 1000 1.27 1.28 1.28 5.94 8 1000 1.27 1.28 1.27 11.97 16 1000 1.28 1.29 1.28 23.69 32 1000 1.27 1.28 1.27 47.75 64 1000 1.28 1.29 1.29 94.48 128 1000 1.28 1.29 1.29 188.96 256 1000 1.26 1.27 1.26 385.69 512 1000 1.58 1.59 1.58 615.38 1024 1000 1.85 1.87 1.86 1046.10 2048 1000 2.35 2.37 2.36 1650.95 4096 1000 3.37 3.40 3.39 2299.83 8192 1000 5.24 5.27 5.26 2963.28 16384 1000 8.73 8.78 8.75 3560.48 32768 1000 15.80 15.88 15.85 3935.03 65536 640 38.72 38.95 38.83 3209.11 131072 320 72.74 73.32 73.03 3409.76 262144 160 133.55 135.83 134.67 3681.03 524288 80 260.48 273.07 266.50 3662.03 1048576 40 873.60 889.72 881.14 2247.89 2097152 20 1879.85 1921.31 1902.42 2081.91 4194304 10 3973.32 4143.91 4039.95 1930.55 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.44 1.46 1.45 0.00 1 1000 1.42 1.45 1.44 1.31 2 1000 1.33 1.35 1.34 2.83 4 1000 1.32 1.33 1.32 5.74 8 1000 1.32 1.34 1.33 11.39 16 1000 1.30 1.32 1.31 23.03 32 1000 1.31 1.33 1.32 45.96 64 1000 1.35 1.38 1.37 88.52 128 1000 1.41 1.44 1.42 170.13 256 1000 1.46 1.48 1.47 331.02 512 1000 1.69 1.71 1.70 569.76 1024 1000 1.99 2.02 2.01 965.35 2048 1000 2.41 2.44 2.43 1600.16 4096 1000 3.42 3.45 3.43 2267.05 8192 1000 5.36 5.42 5.40 2880.70 16384 1000 9.16 9.28 9.22 3367.47 32768 1000 16.12 16.28 16.20 3838.35 65536 640 40.78 41.36 41.09 3021.97 131072 320 72.45 73.64 73.02 3394.99 262144 160 137.09 146.10 141.41 3422.32 524288 80 281.51 307.41 296.90 3252.97 1048576 40 902.95 956.50 931.22 2090.96 2097152 20 1912.84 2089.14 2004.38 1914.66 4194304 10 4006.98 7992.60 5196.49 1000.93 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.68 1.76 1.73 0.00 1 1000 1.47 1.53 1.50 1.25 2 1000 1.37 1.42 1.39 2.69 4 1000 1.36 1.42 1.39 5.36 8 1000 1.36 1.41 1.39 10.80 16 1000 1.39 1.45 1.42 21.12 32 1000 1.37 1.43 1.41 42.57 64 1000 1.41 1.47 1.44 83.16 128 1000 1.48 1.54 1.51 158.02 256 1000 1.51 1.56 1.54 313.39 512 1000 1.73 1.81 1.77 540.44 1024 1000 2.06 2.12 2.09 920.45 2048 1000 2.64 2.74 2.69 1427.68 4096 1000 3.62 3.74 3.68 2087.67 8192 1000 5.53 5.70 5.62 2742.67 16384 1000 9.31 9.58 9.46 3261.63 32768 1000 18.34 18.57 18.49 3365.83 65536 640 38.47 39.28 38.88 3182.05 131072 320 72.89 77.22 74.77 3237.44 262144 160 131.21 139.67 135.21 3579.75 524288 80 256.64 306.64 276.37 3261.16 1048576 40 833.35 920.25 880.77 2173.33 2097152 20 1771.01 2060.65 1951.24 1941.13 4194304 10 3792.60 4425.91 4189.18 1807.54 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.77 1.92 1.87 0.00 1 1000 1.50 1.63 1.57 1.17 2 1000 1.42 1.54 1.49 2.47 4 1000 1.43 1.54 1.48 4.96 8 1000 1.42 1.54 1.48 9.91 16 1000 1.42 1.52 1.47 20.13 32 1000 1.39 1.50 1.44 40.56 64 1000 1.44 1.53 1.49 79.69 128 1000 1.54 1.59 1.56 153.64 256 1000 1.52 1.63 1.57 300.29 512 1000 1.74 1.80 1.77 542.59 1024 1000 2.07 2.14 2.10 911.34 2048 1000 2.73 2.90 2.81 1347.92 4096 1000 3.64 3.77 3.70 2071.17 8192 1000 5.50 5.66 5.57 2758.60 16384 1000 9.23 9.71 9.45 3218.62 32768 1000 17.42 18.65 18.06 3350.47 65536 640 39.02 41.21 40.07 3033.06 131072 320 71.78 77.81 74.86 3212.98 262144 160 130.03 144.17 135.99 3468.16 524288 80 252.04 332.79 285.61 3004.92 1048576 40 856.10 1342.62 1097.97 1489.62 2097152 20 1891.26 3201.54 2475.15 1249.40 4194304 10 3874.49 6864.50 5340.61 1165.42 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.68 1.91 1.78 0.00 1 1000 1.43 1.57 1.51 1.21 2 1000 1.39 1.52 1.47 2.51 4 1000 1.39 1.55 1.46 4.91 8 1000 15.99 16.08 16.04 0.95 16 1000 1.37 1.61 1.50 18.97 32 1000 1.38 1.58 1.47 38.65 64 1000 1.38 1.59 1.48 77.02 128 1000 1.47 1.67 1.56 146.62 256 1000 1.50 1.59 1.55 306.31 512 1000 1.74 1.91 1.82 510.53 1024 1000 2.04 2.28 2.16 858.07 2048 1000 2.76 3.01 2.89 1298.57 4096 1000 3.63 3.84 3.73 2034.52 8192 1000 5.48 5.77 5.60 2705.64 16384 1000 10.31 11.14 10.71 2806.26 32768 1000 25.49 27.07 26.44 2308.93 65536 640 39.63 44.29 41.82 2822.38 131072 320 71.78 76.91 74.64 3250.58 262144 160 129.91 144.73 136.71 3454.70 524288 80 247.56 345.09 283.09 2897.82 1048576 40 868.40 1323.45 1159.00 1511.20 2097152 20 1810.10 3086.56 2651.07 1295.94 4194304 10 3744.51 6888.29 5853.99 1161.39 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 384 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.31 1.49 1.40 0.00 1 1000 1.35 1.57 1.45 1.22 2 1000 1.32 1.51 1.43 2.52 4 1000 1.33 1.59 1.45 4.79 8 1000 1.30 1.53 1.41 9.97 16 1000 1.35 1.55 1.45 19.64 32 1000 1.33 1.52 1.42 40.03 64 1000 1.32 1.55 1.43 78.71 128 1000 1.42 1.63 1.54 149.40 256 1000 1.43 1.59 1.51 308.06 512 1000 1.69 1.92 1.79 507.31 1024 1000 2.01 2.23 2.12 874.65 2048 1000 2.56 2.76 2.67 1414.36 4096 1000 3.37 3.79 3.63 2061.40 8192 1000 5.27 5.78 5.58 2703.30 16384 1000 8.61 9.40 9.09 3323.41 32768 1000 59.27 90.50 74.83 690.65 65536 640 36.08 40.32 38.81 3100.55 131072 320 61.17 76.31 71.66 3276.29 262144 160 107.50 140.94 130.25 3547.54 524288 80 189.34 294.18 249.83 3399.33 1048576 40 495.37 1323.97 1055.98 1510.60 2097152 20 1627.76 3317.45 2556.65 1205.74 4194304 10 3626.30 6949.31 5480.34 1151.19 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.13 1.13 1.13 0.00 1 1000 1.23 1.23 1.23 3.11 2 1000 1.23 1.23 1.23 6.21 4 1000 1.22 1.22 1.22 12.49 8 1000 1.22 1.22 1.22 25.01 16 1000 1.22 1.22 1.22 49.86 32 1000 1.21 1.21 1.21 100.47 64 1000 1.23 1.23 1.23 198.95 128 1000 1.30 1.30 1.30 374.20 256 1000 1.38 1.38 1.38 705.60 512 1000 1.50 1.50 1.50 1298.67 1024 1000 1.75 1.75 1.75 2234.59 2048 1000 2.19 2.19 2.19 3570.67 4096 1000 3.00 3.00 3.00 5201.68 8192 1000 4.62 4.62 4.62 6768.50 16384 1000 7.87 7.87 7.87 7939.67 32768 1000 14.45 14.45 14.45 8647.62 65536 640 47.13 47.13 47.13 5304.17 131072 320 82.87 82.88 82.88 6032.91 262144 160 151.19 151.22 151.21 6612.88 524288 80 276.10 276.15 276.13 7242.41 1048576 40 1483.07 1485.25 1484.16 2693.16 2097152 20 2938.85 2943.15 2941.00 2718.18 4194304 10 5898.31 5907.01 5902.66 2708.65 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.18 1.18 1.18 0.00 1 1000 1.20 1.20 1.20 3.18 2 1000 1.19 1.19 1.19 6.39 4 1000 1.19 1.19 1.19 12.87 8 1000 1.19 1.19 1.19 25.67 16 1000 1.19 1.19 1.19 51.24 32 1000 1.19 1.19 1.19 102.15 64 1000 1.21 1.21 1.21 202.29 128 1000 1.28 1.28 1.28 380.25 256 1000 1.38 1.38 1.38 709.76 512 1000 1.54 1.54 1.54 1266.54 1024 1000 1.82 1.82 1.82 2149.85 2048 1000 2.37 2.37 2.37 3296.25 4096 1000 3.08 3.08 3.08 5068.13 8192 1000 4.54 4.54 4.54 6880.42 16384 1000 8.15 8.15 8.15 7665.93 32768 1000 14.73 14.74 14.73 8482.53 65536 640 46.72 46.74 46.73 5348.85 131072 320 82.73 82.76 82.74 6041.82 262144 160 148.90 148.98 148.94 6712.30 524288 80 282.60 282.69 282.64 7074.96 1048576 40 1562.10 1565.15 1563.62 2555.66 2097152 20 3110.80 3117.81 3114.32 2565.90 4194304 10 6323.91 6334.40 6329.13 2525.89 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.21 1.21 1.21 0.00 1 1000 1.27 1.27 1.27 3.00 2 1000 1.30 1.31 1.31 5.84 4 1000 1.23 1.23 1.23 12.38 8 1000 1.21 1.21 1.21 25.12 16 1000 1.24 1.24 1.24 49.34 32 1000 1.20 1.20 1.20 101.31 64 1000 1.22 1.22 1.22 199.45 128 1000 1.30 1.30 1.30 374.75 256 1000 1.39 1.39 1.39 704.14 512 1000 1.60 1.61 1.60 1216.87 1024 1000 1.83 1.83 1.83 2134.45 2048 1000 2.56 2.57 2.57 3041.11 4096 1000 3.49 3.49 3.49 4478.34 8192 1000 4.91 4.91 4.91 6359.32 16384 1000 8.49 8.50 8.49 7352.86 32768 1000 15.26 15.28 15.27 8181.65 65536 640 48.90 48.93 48.91 5109.20 131072 320 88.86 88.93 88.90 5622.11 262144 160 160.17 160.77 160.50 6220.12 524288 80 342.35 342.64 342.48 5837.08 1048576 40 1827.25 1840.56 1834.37 2173.26 2097152 20 3867.55 3959.55 3913.73 2020.43 4194304 10 7900.00 8101.01 8039.85 1975.06 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1.95 1.96 1.95 0.00 1 1000 2.28 2.29 2.28 1.67 2 1000 2.28 2.29 2.29 3.33 4 1000 2.25 2.26 2.26 6.75 8 1000 2.26 2.27 2.27 13.43 16 1000 2.25 2.26 2.26 26.95 32 1000 2.32 2.32 2.32 52.53 64 1000 2.25 2.26 2.26 107.94 128 1000 2.58 2.59 2.59 188.37 256 1000 2.51 2.53 2.52 386.60 512 1000 2.99 3.01 3.00 649.75 1024 1000 3.59 3.60 3.60 1084.17 2048 1000 4.44 4.46 4.45 1751.64 4096 1000 6.04 6.06 6.05 2578.33 8192 1000 9.27 9.31 9.29 3356.26 16384 1000 16.02 16.08 16.05 3886.32 32768 1000 29.16 29.27 29.21 4271.19 65536 640 66.18 66.46 66.31 3761.88 131072 320 112.76 113.69 113.20 4397.90 262144 160 185.31 186.78 185.99 5353.85 524288 80 434.22 442.21 438.38 4522.74 1048576 40 1934.88 1975.38 1954.61 2024.93 2097152 20 4156.99 4316.84 4254.04 1853.21 4194304 10 8247.30 9321.52 8741.38 1716.46 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.08 2.09 2.08 0.00 1 1000 2.57 2.59 2.58 1.47 2 1000 2.37 2.39 2.38 3.19 4 1000 2.48 2.50 2.49 6.09 8 1000 2.34 2.37 2.35 12.90 16 1000 2.46 2.48 2.47 24.60 32 1000 2.33 2.36 2.35 51.77 64 1000 2.46 2.49 2.47 98.21 128 1000 2.65 2.67 2.66 183.09 256 1000 2.82 2.84 2.83 343.28 512 1000 3.17 3.21 3.19 608.84 1024 1000 3.64 3.67 3.66 1063.21 2048 1000 5.01 5.05 5.03 1546.10 4096 1000 7.06 7.13 7.09 2190.81 8192 1000 10.26 10.36 10.31 3015.51 16384 1000 16.42 16.56 16.49 3773.27 32768 1000 28.14 28.35 28.24 4409.78 65536 640 65.32 65.86 65.61 3795.96 131072 320 111.64 112.71 112.22 4436.07 262144 160 191.19 196.36 193.56 5092.76 524288 80 433.20 455.38 443.02 4391.97 1048576 40 1958.43 2014.40 1989.76 1985.70 2097152 20 4217.95 4441.70 4337.75 1801.11 4194304 10 8269.19 9281.11 8859.94 1723.93 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.10 2.14 2.11 0.00 1 1000 2.49 2.52 2.50 1.51 2 1000 2.46 2.50 2.48 3.06 4 1000 2.46 2.50 2.48 6.10 8 1000 2.49 2.53 2.50 12.07 16 1000 2.46 2.50 2.48 24.41 32 1000 2.47 2.51 2.49 48.71 64 1000 2.60 2.64 2.62 92.62 128 1000 2.86 2.90 2.88 168.38 256 1000 3.57 3.65 3.61 267.69 512 1000 4.37 4.44 4.41 439.70 1024 1000 6.67 6.75 6.70 578.45 2048 1000 10.44 10.62 10.53 735.65 4096 1000 16.40 16.76 16.56 932.22 8192 1000 22.59 22.99 22.76 1359.47 16384 1000 28.75 29.27 29.02 2135.51 32768 1000 32.60 33.11 32.85 3775.31 65536 640 65.62 66.42 66.05 3764.09 131072 320 111.73 113.06 112.38 4422.59 262144 160 199.29 207.22 203.27 4825.68 524288 80 437.20 487.85 465.36 4099.63 1048576 40 1842.50 2013.33 1951.02 1986.76 2097152 20 3881.30 4492.84 4210.93 1780.61 4194304 10 7737.68 9131.72 8668.50 1752.13 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.42 2.48 2.45 0.00 1 1000 2.55 2.61 2.58 1.46 2 1000 2.51 2.59 2.55 2.94 4 1000 2.50 2.57 2.53 5.95 8 1000 2.48 2.55 2.51 11.96 16 1000 2.51 2.58 2.54 23.67 32 1000 2.51 2.60 2.56 46.93 64 1000 2.52 2.60 2.56 93.87 128 1000 2.92 2.98 2.95 163.92 256 1000 3.59 3.68 3.63 265.44 512 1000 4.38 4.51 4.44 432.59 1024 1000 6.11 6.34 6.24 616.52 2048 1000 12.74 13.24 13.01 590.12 4096 1000 20.78 21.63 21.28 722.48 8192 1000 31.96 33.19 32.76 941.58 16384 1000 52.86 55.71 54.33 1121.92 32768 1000 60.07 62.88 61.42 1987.79 65536 640 65.57 67.42 66.46 3708.33 131072 320 115.25 120.11 117.55 4162.98 262144 160 203.19 212.71 208.13 4701.18 524288 80 452.67 531.39 492.67 3763.73 1048576 40 1993.90 3194.98 2664.13 1251.97 2097152 20 4245.45 6911.60 5637.18 1157.47 4194304 10 8039.00 16648.01 12048.52 961.08 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.34 2.54 2.44 0.00 1 1000 2.55 2.72 2.63 1.40 2 1000 2.57 2.69 2.64 2.83 4 1000 2.51 2.71 2.60 5.63 8 1000 2.53 2.65 2.60 11.50 16 1000 2.52 2.69 2.60 22.67 32 1000 2.48 2.66 2.57 45.82 64 1000 2.49 2.64 2.56 92.48 128 1000 2.89 3.04 2.96 160.51 256 1000 3.56 3.62 3.59 269.62 512 1000 4.32 4.46 4.39 437.63 1024 1000 6.23 6.48 6.36 603.11 2048 1000 12.77 13.84 13.27 564.40 4096 1000 21.00 22.66 21.77 689.63 8192 1000 53.67 61.32 56.39 509.65 16384 1000 58.85 62.33 60.74 1002.77 32768 1000 70.70 76.34 73.92 1637.31 65536 640 65.55 67.66 66.48 3695.08 131072 320 117.39 120.77 118.81 4140.25 262144 160 231.71 243.60 237.86 4105.07 524288 80 447.66 540.03 499.58 3703.52 1048576 40 1975.42 3188.70 2789.91 1254.43 2097152 20 4135.11 8193.50 6379.40 976.38 4194304 10 8216.19 15676.78 12464.92 1020.62 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 384 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.02 2.17 2.12 0.00 1 1000 2.56 2.78 2.67 1.37 2 1000 2.38 2.59 2.51 2.94 4 1000 2.42 2.61 2.54 5.84 8 1000 2.37 2.66 2.53 11.47 16 1000 2.34 2.62 2.51 23.28 32 1000 2.53 2.73 2.64 44.75 64 1000 2.39 2.60 2.51 93.86 128 1000 2.74 3.00 2.89 162.60 256 1000 3.34 3.64 3.52 268.36 512 1000 3.96 4.44 4.24 440.00 1024 1000 5.57 6.37 6.02 612.92 2048 1000 10.94 12.63 11.89 618.52 4096 1000 18.17 20.91 19.72 747.18 8192 1000 30.42 34.90 32.95 895.39 16384 1000 48.45 56.74 53.02 1101.44 32768 1000 87.99 105.37 96.54 1186.28 65536 640 63.27 67.52 65.94 3702.52 131072 320 102.09 119.53 113.86 4183.01 262144 160 175.33 224.58 208.77 4452.73 524288 80 349.87 541.11 457.06 3696.09 1048576 40 1236.47 3195.32 2372.79 1251.83 2097152 20 3336.00 7888.65 5435.05 1014.12 4194304 10 7265.11 15372.11 11353.62 1040.85 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 4 1000 0.79 0.79 0.79 8 1000 0.79 0.79 0.79 16 1000 0.79 0.79 0.79 32 1000 0.77 0.77 0.77 64 1000 0.79 0.79 0.79 128 1000 0.88 0.88 0.88 256 1000 0.97 0.97 0.97 512 1000 1.03 1.03 1.03 1024 1000 1.19 1.19 1.19 2048 1000 1.49 1.49 1.49 4096 1000 2.79 2.79 2.79 8192 1000 4.08 4.08 4.08 16384 1000 6.63 6.63 6.63 32768 1000 11.75 11.75 11.75 65536 640 22.05 22.05 22.05 131072 320 64.50 64.51 64.50 262144 160 122.05 122.07 122.06 524288 80 237.59 237.64 237.61 1048576 40 718.00 719.93 718.97 2097152 20 1422.20 1425.95 1424.07 4194304 10 3079.80 3087.90 3083.85 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 4 1000 1.40 1.40 1.40 8 1000 1.38 1.38 1.38 16 1000 1.39 1.39 1.39 32 1000 1.40 1.40 1.40 64 1000 1.40 1.40 1.40 128 1000 1.53 1.53 1.53 256 1000 1.67 1.67 1.67 512 1000 1.85 1.85 1.85 1024 1000 2.21 2.21 2.21 2048 1000 2.84 2.84 2.84 4096 1000 4.78 4.78 4.78 8192 1000 6.70 6.70 6.70 16384 1000 10.78 10.78 10.78 32768 1000 18.58 18.58 18.58 65536 640 35.03 35.03 35.03 131072 320 90.68 90.71 90.69 262144 160 177.97 178.01 177.99 524288 80 361.74 361.86 361.80 1048576 40 1158.95 1162.83 1160.89 2097152 20 2560.00 2568.15 2564.06 4194304 10 5838.20 5857.61 5847.85 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 1.97 1.97 1.97 8 1000 1.95 1.95 1.95 16 1000 1.98 1.98 1.98 32 1000 2.00 2.00 2.00 64 1000 1.99 1.99 1.99 128 1000 2.22 2.22 2.22 256 1000 2.36 2.36 2.36 512 1000 2.70 2.70 2.70 1024 1000 3.11 3.11 3.11 2048 1000 4.03 4.03 4.03 4096 1000 6.47 6.47 6.47 8192 1000 8.70 8.71 8.70 16384 1000 13.27 13.27 13.27 32768 1000 22.43 22.43 22.43 65536 640 41.10 41.10 41.10 131072 320 102.45 102.47 102.46 262144 160 203.33 203.38 203.36 524288 80 413.21 414.34 414.09 1048576 40 1169.22 1173.25 1172.54 2097152 20 3647.01 3666.81 3663.81 4194304 10 7493.21 7584.12 7571.99 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 3.95 3.95 3.95 8 1000 3.93 3.93 3.93 16 1000 4.03 4.03 4.03 32 1000 4.05 4.05 4.05 64 1000 3.99 3.99 3.99 128 1000 4.46 4.47 4.47 256 1000 4.87 4.87 4.87 512 1000 5.32 5.32 5.32 1024 1000 6.69 6.69 6.69 2048 1000 8.69 8.69 8.69 4096 1000 11.95 11.95 11.95 8192 1000 14.45 14.45 14.45 16384 1000 20.54 20.54 20.54 32768 1000 32.06 32.06 32.06 65536 640 56.47 56.47 56.47 131072 320 134.65 134.67 134.66 262144 160 268.92 268.96 268.94 524288 80 786.93 790.09 788.96 1048576 40 2073.57 2086.03 2082.91 2097152 20 5318.15 5372.20 5360.82 4194304 10 11214.90 11414.69 11372.76 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 6.48 6.48 6.48 8 1000 6.64 6.64 6.64 16 1000 6.63 6.63 6.63 32 1000 6.60 6.60 6.60 64 1000 6.72 6.73 6.72 128 1000 7.40 7.40 7.40 256 1000 8.31 8.31 8.31 512 1000 9.28 9.28 9.28 1024 1000 11.57 11.57 11.57 2048 1000 19.75 19.75 19.75 4096 1000 18.27 18.27 18.27 8192 1000 22.22 22.23 22.22 16384 1000 29.83 29.83 29.83 32768 1000 43.77 43.78 43.78 65536 640 73.15 73.16 73.16 131072 320 156.34 156.36 156.35 262144 160 309.15 309.22 309.19 524288 80 632.26 632.39 632.33 1048576 40 1853.20 1855.05 1853.93 2097152 20 7969.80 7982.24 7975.26 4194304 10 12970.19 13005.09 12986.66 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 21.66 21.69 21.67 8 1000 26.74 26.78 26.76 16 1000 28.33 28.36 28.35 32 1000 26.88 26.91 26.90 64 1000 26.90 26.93 26.91 128 1000 31.72 31.75 31.73 256 1000 44.95 45.00 44.97 512 1000 32.98 33.01 33.00 1024 1000 39.59 39.63 39.61 2048 1000 55.96 55.98 55.97 4096 1000 58.37 58.39 58.38 8192 1000 69.59 69.61 69.60 16384 1000 100.69 100.71 100.70 32768 1000 154.80 154.82 154.81 65536 640 254.60 254.65 254.62 131072 320 416.57 417.13 416.90 262144 160 646.59 648.13 647.61 524288 80 1166.83 1171.10 1169.80 1048576 40 2659.20 2674.53 2671.38 2097152 20 5646.90 5712.50 5700.13 4194304 10 10354.30 10619.59 10574.92 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 14.07 14.08 14.08 8 1000 14.18 14.19 14.18 16 1000 14.23 14.24 14.24 32 1000 15.93 15.93 15.93 64 1000 15.98 15.98 15.98 128 1000 17.69 17.69 17.69 256 1000 20.76 20.76 20.76 512 1000 39.61 39.63 39.62 1024 1000 27.00 27.01 27.00 2048 1000 33.78 33.79 33.78 4096 1000 107.34 107.37 107.35 8192 1000 116.61 116.62 116.62 16384 1000 199.29 199.31 199.30 32768 1000 207.41 207.44 207.42 65536 640 319.07 319.14 319.10 131072 320 658.19 658.80 658.40 262144 160 1045.81 1047.35 1046.80 524288 80 1391.47 1396.30 1394.82 1048576 40 3833.95 3860.20 3850.37 2097152 20 8043.90 8170.86 8123.96 4194304 10 15045.50 15606.40 15397.12 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 15.85 15.85 15.85 8 1000 15.70 15.71 15.71 16 1000 15.79 15.79 15.79 32 1000 17.81 17.81 17.81 64 1000 17.78 17.79 17.79 128 1000 19.51 19.52 19.52 256 1000 44.77 44.79 44.78 512 1000 52.27 52.30 52.29 1024 1000 30.32 30.33 30.33 2048 1000 38.14 38.15 38.14 4096 1000 236.58 236.61 236.59 8192 1000 173.53 173.56 173.54 16384 1000 202.48 202.53 202.50 32768 1000 258.67 258.72 258.70 65536 640 378.14 378.23 378.17 131072 320 721.40 721.81 721.60 262144 160 1072.19 1075.04 1073.60 524288 80 1437.82 1443.71 1441.84 1048576 40 3885.80 3915.15 3906.11 2097152 20 8107.70 8242.70 8203.49 4194304 10 15218.00 15830.49 15640.45 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.07 0.06 4 1000 16.67 16.70 16.68 8 1000 16.63 16.66 16.64 16 1000 16.64 16.67 16.65 32 1000 18.62 18.66 18.63 64 1000 42.13 42.16 42.15 128 1000 20.78 20.82 20.80 256 1000 24.35 24.37 24.36 512 1000 56.55 56.59 56.57 1024 1000 31.43 31.45 31.44 2048 1000 39.99 40.02 40.00 4096 1000 136.31 136.36 136.33 8192 1000 132.96 133.00 132.98 16384 1000 164.84 164.88 164.86 32768 1000 279.39 279.45 279.43 65536 640 419.67 419.94 419.80 131072 320 864.47 865.84 865.10 262144 160 1104.59 1107.61 1105.88 524288 80 1483.45 1490.14 1487.06 1048576 40 3924.88 3951.18 3942.49 2097152 20 8767.25 8904.96 8853.95 4194304 10 15455.29 16057.01 15846.89 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 0.47 0.47 0.47 8 1000 0.48 0.48 0.48 16 1000 0.48 0.48 0.48 32 1000 0.53 0.54 0.53 64 1000 0.54 0.54 0.54 128 1000 0.61 0.61 0.61 256 1000 0.69 0.69 0.69 512 1000 0.74 0.74 0.74 1024 1000 0.88 0.88 0.88 2048 1000 1.21 1.21 1.21 4096 1000 1.72 1.72 1.72 8192 1000 2.74 2.74 2.74 16384 1000 5.33 5.34 5.34 32768 1000 8.45 8.45 8.45 65536 640 14.40 14.41 14.40 131072 320 29.67 29.71 29.69 262144 160 59.53 59.68 59.61 524288 80 116.75 117.34 117.04 1048576 40 227.07 229.40 228.24 2097152 20 458.49 467.80 463.15 4194304 10 1105.90 1151.80 1128.85 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 0.97 0.97 0.97 8 1000 1.00 1.00 1.00 16 1000 1.03 1.03 1.03 32 1000 1.10 1.10 1.10 64 1000 1.13 1.13 1.13 128 1000 1.30 1.30 1.30 256 1000 1.41 1.41 1.41 512 1000 1.60 1.61 1.61 1024 1000 1.97 1.97 1.97 2048 1000 2.50 2.50 2.50 4096 1000 3.57 3.58 3.57 8192 1000 5.65 5.65 5.65 16384 1000 10.20 10.21 10.21 32768 1000 14.82 14.84 14.83 65536 640 24.95 24.97 24.96 131072 320 46.67 46.79 46.75 262144 160 87.84 88.21 88.10 524288 80 169.32 170.66 170.29 1048576 40 330.92 335.10 333.98 2097152 20 686.40 699.45 696.08 4194304 10 1526.81 1592.90 1576.25 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 1.10 1.11 1.10 8 1000 1.14 1.14 1.14 16 1000 1.11 1.11 1.11 32 1000 1.15 1.15 1.15 64 1000 1.15 1.15 1.15 128 1000 1.36 1.36 1.36 256 1000 1.43 1.44 1.43 512 1000 1.65 1.66 1.65 1024 1000 2.04 2.05 2.04 2048 1000 2.60 2.61 2.61 4096 1000 3.71 3.72 3.72 8192 1000 5.75 5.77 5.76 16384 1000 10.54 10.57 10.55 32768 1000 15.58 15.62 15.60 65536 640 25.40 25.47 25.44 131072 320 48.00 48.16 48.11 262144 160 90.87 91.29 91.17 524288 80 177.69 178.86 178.59 1048576 40 366.55 370.42 369.62 2097152 20 969.90 985.65 983.06 4194304 10 1962.40 2037.22 2026.69 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 1.67 1.68 1.68 8 1000 1.71 1.71 1.71 16 1000 1.72 1.73 1.72 32 1000 1.95 1.97 1.96 64 1000 1.95 1.96 1.96 128 1000 2.17 2.18 2.18 256 1000 2.24 2.25 2.24 512 1000 2.60 2.61 2.60 1024 1000 3.03 3.04 3.04 2048 1000 3.91 3.93 3.92 4096 1000 5.64 5.67 5.66 8192 1000 8.99 9.04 9.02 16384 1000 16.47 16.56 16.52 32768 1000 26.89 26.97 26.93 65536 640 46.88 47.03 46.97 131072 320 90.19 90.64 90.45 262144 160 173.56 174.91 174.37 524288 80 334.80 339.14 337.51 1048576 40 693.97 709.25 703.94 2097152 20 1437.00 1619.95 1594.80 4194304 10 2778.79 3313.71 3201.96 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 1.95 1.97 1.96 8 1000 1.93 1.95 1.94 16 1000 2.03 2.05 2.04 32 1000 2.19 2.21 2.20 64 1000 2.22 2.24 2.23 128 1000 2.42 2.44 2.43 256 1000 2.56 2.58 2.57 512 1000 2.88 2.91 2.89 1024 1000 3.39 3.43 3.41 2048 1000 4.24 4.28 4.26 4096 1000 5.96 6.02 5.99 8192 1000 9.38 9.48 9.43 16384 1000 17.45 17.64 17.55 32768 1000 28.26 28.44 28.35 65536 640 70.03 70.32 70.20 131072 320 92.48 93.22 92.93 262144 160 179.83 181.69 181.06 524288 80 352.11 358.55 356.45 1048576 40 713.17 737.07 729.58 2097152 20 1487.66 1640.26 1622.70 4194304 10 2810.91 3393.51 3322.98 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 4 1000 1.97 2.01 1.99 8 1000 1.99 2.03 2.01 16 1000 2.07 2.11 2.09 32 1000 2.08 2.13 2.11 64 1000 2.17 2.22 2.20 128 1000 2.41 2.46 2.44 256 1000 2.59 2.65 2.62 512 1000 2.76 2.82 2.79 1024 1000 3.43 3.50 3.47 2048 1000 4.46 4.56 4.51 4096 1000 6.16 6.30 6.24 8192 1000 9.64 9.86 9.76 16384 1000 17.39 17.72 17.57 32768 1000 28.67 29.06 28.89 65536 640 49.84 50.65 50.35 131072 320 93.67 96.30 95.49 262144 160 175.44 186.07 183.73 524288 80 353.11 365.71 360.98 1048576 40 727.10 753.30 746.83 2097152 20 1460.60 1627.05 1605.40 4194304 10 2765.11 3363.30 3309.33 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.08 0.07 4 1000 2.06 2.14 2.10 8 1000 2.14 2.21 2.17 16 1000 2.05 2.16 2.11 32 1000 2.23 2.31 2.27 64 1000 2.21 2.29 2.26 128 1000 2.59 2.70 2.65 256 1000 2.73 2.85 2.80 512 1000 2.99 3.10 3.05 1024 1000 3.59 3.73 3.66 2048 1000 4.64 4.83 4.74 4096 1000 6.33 6.68 6.52 8192 1000 9.96 10.42 10.22 16384 1000 18.47 19.27 18.92 32768 1000 30.11 30.87 30.53 65536 640 50.95 52.40 51.81 131072 320 95.76 100.32 98.83 262144 160 179.62 193.94 190.44 524288 80 350.94 430.50 393.16 1048576 40 724.25 770.40 754.89 2097152 20 1403.20 1722.41 1628.27 4194304 10 2618.69 5367.90 4412.74 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.08 0.07 4 1000 2.16 2.32 2.25 8 1000 2.13 2.31 2.22 16 1000 2.13 2.32 2.24 32 1000 2.24 2.41 2.33 64 1000 2.34 2.51 2.43 128 1000 2.51 2.77 2.65 256 1000 2.85 3.06 2.96 512 1000 3.07 3.33 3.20 1024 1000 3.66 4.03 3.85 2048 1000 4.77 5.14 4.96 4096 1000 6.55 7.06 6.81 8192 1000 9.88 10.72 10.31 16384 1000 19.17 20.80 20.08 32768 1000 30.64 32.65 31.68 65536 640 51.53 54.51 53.20 131072 320 96.48 104.10 101.29 262144 160 179.84 200.76 193.43 524288 80 351.71 394.14 378.68 1048576 40 725.40 795.57 771.03 2097152 20 1349.45 2329.00 2143.03 4194304 10 2609.42 5461.50 4909.58 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.10 0.07 4 1000 2.15 2.34 2.25 8 1000 2.17 2.42 2.29 16 1000 2.12 2.34 2.24 32 1000 2.30 2.54 2.42 64 1000 2.34 2.60 2.49 128 1000 2.62 2.88 2.76 256 1000 2.87 3.16 3.01 512 1000 3.26 3.66 3.47 1024 1000 3.75 4.15 3.95 2048 1000 4.77 5.34 5.04 4096 1000 6.51 7.26 6.88 8192 1000 10.19 11.58 10.92 16384 1000 18.64 21.60 20.02 32768 1000 30.85 33.30 32.06 65536 640 51.10 55.50 53.23 131072 320 89.89 106.79 100.94 262144 160 180.13 345.58 247.59 524288 80 350.39 402.96 383.40 1048576 40 722.40 810.45 781.17 2097152 20 1345.15 2444.70 2276.00 4194304 10 2649.88 5328.61 4930.58 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.15 0.15 0.15 4 1000 0.78 0.89 0.83 8 1000 1.17 1.17 1.17 16 1000 1.16 1.16 1.16 32 1000 1.16 1.16 1.16 64 1000 1.16 1.16 1.16 128 1000 1.25 1.25 1.25 256 1000 1.46 1.46 1.46 512 1000 1.50 1.50 1.50 1024 1000 1.77 1.77 1.77 2048 1000 1.99 1.99 1.99 4096 1000 2.29 2.29 2.29 8192 1000 3.04 3.04 3.04 16384 1000 5.09 5.09 5.09 32768 1000 8.93 8.93 8.93 65536 640 16.79 16.79 16.79 131072 320 47.73 47.74 47.73 262144 160 67.09 67.10 67.09 524288 80 127.95 128.00 127.97 1048576 40 243.35 243.47 243.41 2097152 20 846.51 849.70 848.10 4194304 10 1816.80 1823.78 1820.29 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.16 0.16 0.16 4 1000 0.68 0.87 0.80 8 1000 1.32 1.51 1.42 16 1000 1.83 1.83 1.83 32 1000 1.80 1.80 1.80 64 1000 1.80 1.80 1.80 128 1000 1.87 1.87 1.87 256 1000 2.04 2.04 2.04 512 1000 2.10 2.10 2.10 1024 1000 2.46 2.46 2.46 2048 1000 2.74 2.74 2.74 4096 1000 3.22 3.22 3.22 8192 1000 4.19 4.19 4.19 16384 1000 6.63 6.63 6.63 32768 1000 11.41 11.41 11.41 65536 640 21.22 21.22 21.22 131072 320 56.54 56.54 56.54 262144 160 102.10 102.14 102.12 524288 80 183.66 183.72 183.69 1048576 40 341.62 341.82 341.72 2097152 20 668.94 669.15 669.03 4194304 10 2600.38 2615.28 2607.54 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.17 0.19 0.17 4 1000 0.69 0.98 0.85 8 1000 2.34 3.11 2.69 16 1000 1.85 3.94 2.96 32 1000 2.42 2.42 2.42 64 1000 2.44 2.44 2.44 128 1000 2.50 2.50 2.50 256 1000 2.53 2.53 2.53 512 1000 2.66 2.66 2.66 1024 1000 3.19 3.19 3.19 2048 1000 3.50 3.50 3.50 4096 1000 4.10 4.10 4.10 8192 1000 5.38 5.38 5.38 16384 1000 8.01 8.01 8.01 32768 1000 13.26 13.26 13.26 65536 640 26.90 26.90 26.90 131072 320 65.45 65.46 65.45 262144 160 80.54 80.55 80.55 524288 80 230.79 230.91 230.83 1048576 40 468.25 468.77 468.54 2097152 20 1407.30 1409.01 1407.86 4194304 10 3213.00 3222.80 3217.46 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.20 0.25 0.21 4 1000 0.71 1.25 1.05 8 1000 4.70 7.62 5.96 16 1000 3.28 5.74 4.35 32 1000 3.33 11.17 7.42 64 1000 4.57 4.58 4.57 128 1000 4.77 4.77 4.77 256 1000 4.93 4.93 4.93 512 1000 5.24 5.25 5.25 1024 1000 5.79 5.79 5.79 2048 1000 6.71 6.71 6.71 4096 1000 8.29 8.29 8.29 8192 1000 8.55 8.55 8.55 16384 1000 12.32 12.32 12.32 32768 1000 18.98 18.98 18.98 65536 640 35.18 35.19 35.19 131072 320 82.90 82.92 82.91 262144 160 266.88 266.98 266.93 524288 80 526.04 526.36 526.19 1048576 40 985.07 985.75 985.29 2097152 20 2357.45 2360.50 2358.96 4194304 10 5080.80 5091.60 5084.78 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.28 0.37 0.31 4 1000 0.81 1.42 1.29 8 1000 9.14 18.50 12.83 16 1000 7.15 12.09 9.50 32 1000 5.89 8.42 6.91 64 1000 3.97 19.90 12.25 128 1000 7.38 7.39 7.38 256 1000 7.87 7.87 7.87 512 1000 8.61 8.61 8.61 1024 1000 9.54 9.54 9.54 2048 1000 11.06 11.06 11.06 4096 1000 14.11 14.12 14.11 8192 1000 13.08 13.09 13.09 16384 1000 17.11 17.11 17.11 32768 1000 24.71 24.71 24.71 65536 640 42.17 42.18 42.17 131072 320 94.04 94.07 94.06 262144 160 430.36 430.44 430.40 524288 80 791.58 791.76 791.65 1048576 40 1459.95 1460.65 1460.18 2097152 20 3204.39 3208.60 3206.31 4194304 10 6906.70 6915.62 6911.30 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.42 0.53 0.43 4 1000 1.16 9.26 2.62 8 1000 10.74 10.87 10.80 16 1000 10.56 10.79 10.65 32 1000 13.53 13.79 13.66 64 1000 13.42 13.84 13.67 128 1000 9.49 9.58 9.54 256 1000 33.64 33.68 33.66 512 1000 32.40 32.43 32.41 1024 1000 32.70 32.73 32.71 2048 1000 32.79 32.82 32.80 4096 1000 34.19 34.22 34.20 8192 1000 39.24 39.28 39.26 16384 1000 50.78 50.83 50.80 32768 1000 75.33 75.39 75.35 65536 640 122.73 122.89 122.78 131072 320 330.74 331.20 330.88 262144 160 427.09 428.39 427.49 524288 80 1036.29 1042.14 1038.20 1048576 40 1676.90 1694.27 1683.04 2097152 20 3685.06 3817.75 3712.85 4194304 10 5664.90 5978.70 5773.41 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.69 0.81 0.71 4 1000 1.43 19.31 4.20 8 1000 25.71 26.36 26.08 16 1000 25.68 26.35 26.09 32 1000 26.81 27.49 27.20 64 1000 26.69 27.57 27.24 128 1000 28.32 28.69 28.53 256 1000 45.06 45.39 45.23 512 1000 60.40 60.45 60.42 1024 1000 63.59 63.64 63.61 2048 1000 109.05 109.10 109.07 4096 1000 65.15 65.20 65.17 8192 1000 69.11 69.17 69.14 16384 1000 84.23 84.29 84.26 32768 1000 112.74 112.81 112.78 65536 640 167.67 167.83 167.74 131072 320 355.86 356.68 356.22 262144 160 756.14 759.36 757.51 524288 80 1968.77 1978.92 1972.09 1048576 40 2916.25 2964.05 2931.07 2097152 20 4872.25 5085.21 4936.61 4194304 10 9735.61 10633.61 9991.31 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.21 1.42 1.25 4 1000 42.52 43.94 43.43 8 1000 42.39 43.76 43.21 16 1000 42.49 44.02 43.35 32 1000 40.05 41.88 41.20 64 1000 41.14 43.00 42.38 128 1000 45.68 46.71 46.30 256 1000 60.79 61.72 61.33 512 1000 111.81 112.56 112.24 1024 1000 124.04 124.08 124.06 2048 1000 101.39 101.45 101.42 4096 1000 102.07 102.14 102.11 8192 1000 105.86 105.93 105.90 16384 1000 121.06 121.12 121.09 32768 1000 147.39 147.47 147.43 65536 640 206.20 206.41 206.33 131072 320 404.37 405.28 404.83 262144 160 848.72 852.32 850.77 524288 80 2006.16 2024.94 2018.15 1048576 40 3206.20 3225.05 3210.39 2097152 20 7645.64 7879.54 7739.47 4194304 10 11913.90 12960.29 12324.71 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.42 1.96 1.67 4 1000 2.86 76.22 50.51 8 1000 3.17 76.36 52.77 16 1000 3.23 74.73 44.20 32 1000 3.41 61.77 32.30 64 1000 24.17 29.17 27.56 128 1000 25.10 29.76 28.21 256 1000 34.14 39.92 38.29 512 1000 45.08 52.23 50.79 1024 1000 80.52 81.20 80.99 2048 1000 544.31 544.77 544.53 4096 1000 1396.32 1397.40 1396.98 8192 1000 2840.30 2842.27 2841.48 16384 1000 6639.93 6644.01 6642.26 32768 808 12523.77 12538.32 12532.11 65536 640 13472.38 13483.38 13478.43 131072 320 19987.44 20006.45 19997.42 262144 160 29620.34 29713.79 29673.70 524288 80 60974.84 61308.69 61165.94 1048576 40 4607.15 4633.75 4613.04 2097152 20 6500.60 6631.60 6576.46 4194304 10 11875.92 12551.00 12274.17 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 0.63 0.63 0.63 2 1000 0.62 0.62 0.62 4 1000 0.63 0.63 0.63 8 1000 0.63 0.63 0.63 16 1000 0.65 0.65 0.65 32 1000 0.68 0.68 0.68 64 1000 0.68 0.68 0.68 128 1000 0.70 0.70 0.70 256 1000 0.72 0.72 0.72 512 1000 0.77 0.77 0.77 1024 1000 0.90 0.90 0.90 2048 1000 1.15 1.15 1.15 4096 1000 1.70 1.70 1.70 8192 1000 2.88 2.88 2.88 16384 1000 5.00 5.00 5.00 32768 1000 9.31 9.31 9.31 65536 640 27.63 27.63 27.63 131072 320 51.88 51.89 51.89 262144 160 101.02 101.04 101.03 524288 80 187.64 187.72 187.68 1048576 40 833.40 835.55 834.47 2097152 20 1651.30 1655.55 1653.43 4194304 10 3477.00 3485.61 3481.30 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 1.19 1.19 1.19 2 1000 1.19 1.19 1.19 4 1000 1.16 1.16 1.16 8 1000 1.20 1.20 1.20 16 1000 1.20 1.20 1.20 32 1000 1.22 1.22 1.22 64 1000 1.28 1.28 1.28 128 1000 2.05 2.05 2.05 256 1000 2.03 2.03 2.03 512 1000 2.14 2.14 2.14 1024 1000 2.57 2.57 2.57 2048 1000 3.38 3.38 3.38 4096 1000 4.91 4.91 4.91 8192 1000 7.57 7.57 7.57 16384 1000 13.07 13.07 13.07 32768 1000 26.31 26.31 26.31 65536 640 78.24 78.26 78.25 131072 320 137.42 137.44 137.43 262144 160 251.86 251.93 251.89 524288 80 475.31 475.50 475.40 1048576 40 2259.55 2262.43 2260.98 2097152 20 5354.61 5359.01 5356.73 4194304 10 10869.19 10878.49 10873.60 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 1.78 1.79 1.78 2 1000 1.77 1.77 1.77 4 1000 1.79 1.79 1.79 8 1000 1.76 1.76 1.76 16 1000 1.81 1.81 1.81 32 1000 1.86 1.86 1.86 64 1000 1.98 1.98 1.98 128 1000 16.81 16.82 16.82 256 1000 5.00 5.00 5.00 512 1000 11.92 11.93 11.93 1024 1000 14.67 14.67 14.67 2048 1000 8.06 8.06 8.06 4096 1000 11.22 11.22 11.22 8192 1000 17.46 17.46 17.46 16384 1000 30.99 30.99 30.99 32768 1000 71.18 71.19 71.19 65536 640 174.64 174.66 174.65 131072 320 310.41 310.46 310.44 262144 160 924.52 925.01 924.78 524288 80 2431.81 2433.43 2432.52 1048576 40 7265.65 7277.65 7271.77 2097152 20 15208.15 15220.30 15214.33 4194304 10 30428.60 30447.79 30438.11 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 3.73 3.73 3.73 2 1000 3.77 3.77 3.77 4 1000 3.86 3.86 3.86 8 1000 3.85 3.85 3.85 16 1000 4.06 4.06 4.06 32 1000 4.46 4.47 4.47 64 1000 4.72 4.72 4.72 128 1000 19.21 19.22 19.21 256 1000 18.35 18.36 18.35 512 1000 23.48 23.49 23.48 1024 1000 28.03 28.05 28.04 2048 1000 34.90 34.92 34.91 4096 1000 50.63 50.65 50.64 8192 1000 81.04 81.08 81.06 16384 1000 144.36 144.43 144.40 32768 1000 300.08 300.18 300.13 65536 640 606.29 606.44 606.36 131072 320 1472.47 1473.39 1472.88 262144 160 2962.94 2967.17 2964.96 524288 80 6200.68 6219.46 6210.45 1048576 40 16952.35 16975.38 16965.32 2097152 20 34278.00 34332.14 34306.71 4194304 10 69708.90 69912.10 69836.57 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 6.42 6.42 6.42 2 1000 6.45 6.46 6.46 4 1000 6.28 6.29 6.29 8 1000 6.79 6.79 6.79 16 1000 7.35 7.36 7.36 32 1000 8.16 8.17 8.16 64 1000 9.48 9.48 9.48 128 1000 56.71 56.73 56.72 256 1000 44.54 44.56 44.55 512 1000 52.46 52.49 52.48 1024 1000 63.70 63.73 63.72 2048 1000 77.32 77.34 77.33 4096 1000 108.50 108.55 108.52 8192 1000 225.16 225.24 225.20 16384 1000 307.13 307.27 307.20 32768 1000 576.62 576.84 576.73 65536 640 1499.57 1499.98 1499.74 131072 320 3351.77 3354.42 3353.06 262144 160 6158.47 6168.91 6163.87 524288 80 13588.91 13635.71 13613.84 1048576 40 33547.22 33602.23 33576.58 2097152 20 68948.25 69029.39 69003.67 4194304 10 138640.79 140606.81 139798.86 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 26.46 26.49 26.47 2 1000 26.53 26.55 26.54 4 1000 30.50 30.53 30.51 8 1000 43.37 43.42 43.39 16 1000 65.56 65.61 65.58 32 1000 23.43 23.45 23.44 64 1000 67.28 67.34 67.31 128 1000 95.93 95.98 95.96 256 1000 94.93 94.98 94.96 512 1000 111.21 111.29 111.26 1024 1000 129.97 130.04 130.01 2048 1000 160.02 160.09 160.06 4096 1000 242.33 242.44 242.38 8192 1000 418.38 418.57 418.48 16384 1000 626.69 626.84 626.77 32768 1000 1549.36 1549.81 1549.60 65536 640 3374.91 3375.69 3375.27 131072 320 6558.64 6562.76 6560.49 262144 160 12743.82 12764.74 12753.80 524288 80 26029.86 26118.33 26077.82 1048576 40 64719.00 64981.93 64862.65 2097152 20 134645.20 135647.31 135185.68 4194304 10 276008.20 279872.39 278099.81 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 35.87 35.89 35.88 2 1000 57.54 57.58 57.56 4 1000 46.33 46.35 46.34 8 1000 46.48 46.51 46.49 16 1000 30.56 30.57 30.57 32 1000 77.59 77.65 77.62 64 1000 111.02 111.11 111.07 128 1000 196.79 196.86 196.83 256 1000 198.02 198.11 198.07 512 1000 243.11 243.23 243.17 1024 1000 275.92 276.06 275.99 2048 1000 340.57 340.65 340.62 4096 1000 496.35 496.57 496.45 8192 1000 768.36 768.59 768.47 16384 1000 3156.96 3158.31 3157.72 32768 1000 7200.56 7203.52 7202.15 65536 640 10305.72 10310.37 10308.40 131072 320 20264.27 20299.16 20280.05 262144 160 40849.20 40931.39 40895.73 524288 80 89719.78 90061.43 89913.26 1048576 40 209747.75 211284.33 210657.76 2097152 20 424673.25 431208.19 428386.85 4194304 10 850323.10 876140.21 864797.19 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 54.25 54.28 54.27 2 1000 56.27 56.31 56.29 4 1000 63.10 63.14 63.12 8 1000 50.56 50.57 50.56 16 1000 99.73 99.80 99.77 32 1000 120.19 120.29 120.24 64 1000 190.59 190.75 190.67 128 1000 208.19 208.36 208.28 256 1000 393.16 393.26 393.22 512 1000 574.74 575.17 574.98 1024 1000 570.24 570.51 570.37 2048 1000 739.52 739.76 739.62 4096 1000 1038.51 1038.78 1038.67 8192 1000 3112.90 3114.05 3113.37 16384 1000 8366.18 8371.87 8369.22 32768 694 14751.27 14760.99 14756.99 65536 492 20706.24 20713.07 20710.81 131072 249 40603.09 40633.61 40620.15 262144 121 82526.31 82694.55 82615.78 524288 55 181245.74 182347.35 181819.94 1048576 23 422460.17 426727.56 424879.68 2097152 12 853788.08 868739.98 862295.90 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.06 0.06 1 1000 115.83 115.91 115.88 2 1000 127.46 127.55 127.51 4 1000 70.54 70.59 70.56 8 1000 279.21 279.38 279.30 16 1000 133.63 133.74 133.69 32 1000 193.20 193.35 193.28 64 1000 213.56 213.73 213.66 128 1000 292.16 292.34 292.26 256 1000 314.57 314.81 314.70 512 1000 826.52 826.72 826.61 1024 1000 904.33 904.61 904.45 2048 1000 1354.53 1354.84 1354.70 4096 1000 1699.24 1699.71 1699.48 8192 1000 5910.48 5913.99 5912.18 16384 788 12649.56 12654.51 12652.23 32768 19 516549.57 544796.89 530683.38 65536 19 30214.84 30624.53 30466.66 131072 19 59867.47 60838.21 60430.26 262144 19 122401.90 124144.89 123463.81 524288 19 268093.89 273077.85 270737.70 1048576 16 627392.58 638963.12 634235.45 2097152 8 1256875.51 1303613.63 1284264.48 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 0.89 0.89 0.89 2 1000 0.89 0.89 0.89 4 1000 0.86 0.86 0.86 8 1000 0.89 0.89 0.89 16 1000 0.85 0.85 0.85 32 1000 0.88 0.88 0.88 64 1000 0.93 0.93 0.93 128 1000 0.96 0.96 0.96 256 1000 1.04 1.04 1.04 512 1000 1.10 1.10 1.10 1024 1000 1.04 1.04 1.04 2048 1000 1.64 1.64 1.64 4096 1000 1.80 1.80 1.80 8192 1000 6.56 6.56 6.56 16384 1000 5.17 5.17 5.17 32768 1000 16.86 16.87 16.87 65536 640 44.67 44.68 44.68 131072 320 76.04 76.04 76.04 262144 160 101.64 101.67 101.66 524288 80 188.06 188.11 188.09 1048576 40 833.77 835.95 834.86 2097152 20 1651.45 1655.76 1653.61 4194304 10 3480.32 3488.71 3484.51 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 1 1000 1.57 1.57 1.57 2 1000 1.60 1.60 1.60 4 1000 1.54 1.54 1.54 8 1000 1.57 1.57 1.57 16 1000 1.53 1.53 1.53 32 1000 1.62 1.62 1.62 64 1000 1.61 1.61 1.61 128 1000 1.76 1.76 1.76 256 1000 1.87 1.87 1.87 512 1000 2.10 2.10 2.10 1024 1000 2.75 2.75 2.75 2048 1000 3.51 3.51 3.51 4096 1000 4.97 4.97 4.97 8192 1000 11.61 11.61 11.61 16384 1000 13.28 13.28 13.28 32768 1000 51.83 51.85 51.83 65536 640 102.94 102.96 102.95 131072 320 138.01 138.03 138.03 262144 160 253.11 253.18 253.14 524288 80 479.05 479.23 479.13 1048576 40 2277.35 2280.58 2278.97 2097152 20 5359.49 5365.35 5362.36 4194304 10 10854.20 10864.59 10859.29 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.10 1 1000 2.37 2.37 2.37 2 1000 2.39 2.39 2.39 4 1000 2.38 2.38 2.38 8 1000 2.36 2.36 2.36 16 1000 2.44 2.44 2.44 32 1000 2.45 2.45 2.45 64 1000 2.60 2.60 2.60 128 1000 2.67 2.67 2.67 256 1000 3.05 3.05 3.05 512 1000 3.53 3.53 3.53 1024 1000 6.80 6.81 6.80 2048 1000 9.08 9.08 9.08 4096 1000 11.37 11.37 11.37 8192 1000 21.72 21.72 21.72 16384 1000 31.34 31.35 31.34 32768 1000 111.13 111.14 111.13 65536 640 176.33 176.36 176.34 131072 320 318.36 318.40 318.38 262144 160 968.22 968.78 968.47 524288 80 2464.51 2466.16 2465.19 1048576 40 7333.20 7345.13 7338.68 2097152 20 15108.55 15145.55 15126.75 4194304 10 30524.80 30557.01 30539.69 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.12 0.12 0.12 1 1000 4.80 4.80 4.80 2 1000 4.82 4.82 4.82 4 1000 4.78 4.78 4.78 8 1000 4.89 4.89 4.89 16 1000 5.14 5.14 5.14 32 1000 5.43 5.43 5.43 64 1000 5.73 5.73 5.73 128 1000 6.61 6.62 6.62 256 1000 8.04 8.04 8.04 512 1000 12.56 12.56 12.56 1024 1000 28.39 28.40 28.39 2048 1000 44.81 44.83 44.82 4096 1000 51.01 51.03 51.02 8192 1000 154.89 154.96 154.93 16384 1000 144.63 144.70 144.67 32768 1000 256.76 256.85 256.81 65536 640 616.80 617.03 616.91 131072 320 1308.91 1309.50 1309.21 262144 160 2961.02 2965.82 2963.47 524288 80 6229.76 6249.30 6239.88 1048576 40 16013.95 16025.05 16019.77 2097152 20 32809.25 32916.05 32854.54 4194304 10 67035.29 67252.80 67138.85 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.18 0.19 0.18 1 1000 8.26 8.26 8.26 2 1000 8.23 8.23 8.23 4 1000 8.28 8.28 8.28 8 1000 8.56 8.56 8.56 16 1000 9.28 9.29 9.28 32 1000 9.99 10.00 9.99 64 1000 11.40 11.41 11.40 128 1000 14.12 14.13 14.13 256 1000 26.14 26.14 26.14 512 1000 35.86 35.87 35.87 1024 1000 62.20 62.23 62.22 2048 1000 138.80 138.83 138.83 4096 1000 110.03 110.07 110.05 8192 1000 452.14 452.35 452.27 16384 1000 308.36 308.50 308.44 32768 1000 530.34 530.43 530.38 65536 640 1422.54 1422.95 1422.73 131072 320 3376.07 3378.07 3377.01 262144 160 6436.44 6447.65 6442.18 524288 80 12690.41 12737.06 12715.33 1048576 40 33825.77 33929.45 33880.82 2097152 20 69496.15 69664.25 69578.88 4194304 10 141010.31 142741.20 142012.03 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.30 0.32 0.31 1 1000 29.80 29.84 29.83 2 1000 30.00 30.03 30.01 4 1000 33.97 34.01 33.99 8 1000 45.07 45.12 45.09 16 1000 46.65 46.66 46.66 32 1000 48.57 48.58 48.58 64 1000 51.83 51.85 51.85 128 1000 62.88 62.90 62.89 256 1000 96.53 96.61 96.58 512 1000 113.62 113.68 113.65 1024 1000 129.94 130.01 129.98 2048 1000 161.57 161.66 161.62 4096 1000 243.57 243.70 243.64 8192 1000 385.23 385.41 385.33 16384 1000 684.86 685.23 685.05 32768 1000 1492.79 1493.39 1493.17 65536 640 3457.70 3458.70 3458.20 131072 320 6702.71 6706.25 6704.25 262144 160 13042.73 13061.13 13050.97 524288 80 27185.30 27291.85 27240.72 1048576 40 65730.95 65903.78 65811.97 2097152 20 134521.34 135801.65 135315.37 4194304 10 274518.49 277864.60 276439.82 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.51 0.53 0.51 1 1000 91.54 91.54 91.54 2 1000 50.59 50.60 50.59 4 1000 74.51 74.56 74.54 8 1000 52.80 52.80 52.80 16 1000 55.31 55.32 55.31 32 1000 286.73 287.01 286.87 64 1000 661.02 661.61 661.36 128 1000 87.85 87.88 87.87 256 1000 135.77 135.80 135.79 512 1000 250.71 250.74 250.72 1024 1000 367.97 368.01 367.99 2048 1000 578.91 578.96 578.93 4096 1000 1000.41 1000.47 1000.43 8192 1000 2668.27 2668.82 2668.56 16384 1000 5139.71 5140.52 5140.13 32768 956 10406.56 10408.08 10407.29 65536 431 23389.97 23398.28 23394.05 131072 209 47880.32 47914.91 47897.12 262144 105 96694.07 96836.84 96762.29 524288 52 192850.04 193405.94 193116.50 1048576 40 209800.73 211311.88 210700.64 2097152 20 424986.90 431016.19 428584.63 4194304 10 853672.60 879317.19 868698.44 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.93 0.94 0.93 1 1000 64.23 64.24 64.23 2 1000 63.51 63.52 63.51 4 1000 127.90 127.99 127.94 8 1000 66.21 66.23 66.22 16 1000 69.97 69.99 69.98 32 1000 832.03 832.75 832.33 64 1000 1839.79 1841.47 1840.53 128 1000 166.13 166.17 166.15 256 1000 240.07 240.11 240.09 512 1000 351.91 351.95 351.93 1024 1000 616.41 616.47 616.44 2048 1000 1083.00 1083.06 1083.03 4096 1000 3835.44 3836.00 3835.79 8192 1000 7355.11 7355.95 7355.63 16384 891 11436.08 11437.71 11437.09 32768 410 25080.03 25088.69 25085.91 65536 172 51717.77 51759.95 51746.67 131072 96 103138.08 103289.58 103242.40 262144 49 204095.16 204691.63 204501.76 524288 25 407650.24 409986.40 409255.62 1048576 24 422223.75 425739.38 424476.82 2097152 12 852080.74 867051.68 861108.29 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.10 1.36 1.26 1 1000 75.19 75.22 75.20 2 1000 75.76 75.78 75.77 4 1000 168.67 168.80 168.73 8 1000 79.18 79.21 79.19 16 1000 86.61 86.63 86.62 32 1000 1119.96 1120.80 1120.39 64 1000 2596.90 2599.14 2598.03 128 1000 224.37 224.49 224.45 256 1000 376.90 377.03 376.98 512 1000 508.55 508.72 508.64 1024 1000 959.96 964.52 960.15 2048 1000 1765.46 1765.87 1765.71 4096 1000 5964.36 5966.44 5965.58 8192 1000 10402.49 10405.43 10404.26 16384 492 19434.56 19444.66 19440.30 32768 250 40653.27 40699.77 40680.47 65536 122 81084.33 81277.79 81196.02 131072 62 161682.86 162445.73 162123.39 262144 31 321507.26 324564.32 323269.44 524288 15 638440.93 651092.40 645706.70 1048576 15 627761.40 640981.26 635172.99 2097152 8 1255587.25 1301221.76 1283912.60 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Gather # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 0.41 0.41 0.41 2 1000 0.41 0.41 0.41 4 1000 0.42 0.42 0.42 8 1000 0.43 0.43 0.43 16 1000 0.43 0.43 0.43 32 1000 0.53 0.53 0.53 64 1000 0.54 0.54 0.54 128 1000 0.58 0.58 0.58 256 1000 0.62 0.62 0.62 512 1000 0.66 0.66 0.66 1024 1000 0.77 0.77 0.77 2048 1000 1.04 1.04 1.04 4096 1000 1.46 1.46 1.46 8192 1000 2.31 2.31 2.31 16384 1000 3.92 3.92 3.92 32768 1000 7.04 7.05 7.05 65536 640 38.70 38.70 38.70 131072 320 52.26 52.27 52.27 262144 160 56.72 56.75 56.73 524288 80 109.51 109.57 109.54 1048576 40 378.42 380.62 379.52 2097152 20 837.41 841.70 839.56 4194304 10 1921.51 1930.09 1925.80 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 0.43 0.43 0.43 2 1000 0.44 0.44 0.44 4 1000 0.44 0.44 0.44 8 1000 0.44 0.44 0.44 16 1000 0.45 0.45 0.45 32 1000 0.50 0.50 0.50 64 1000 0.53 0.54 0.53 128 1000 0.58 0.58 0.58 256 1000 0.60 0.60 0.60 512 1000 0.66 0.66 0.66 1024 1000 0.76 0.76 0.76 2048 1000 1.05 1.06 1.05 4096 1000 1.68 1.69 1.68 8192 1000 2.79 2.79 2.79 16384 1000 4.41 4.42 4.41 32768 1000 8.54 8.56 8.55 65536 640 21.11 21.16 21.14 131072 320 37.71 37.86 37.80 262144 160 71.31 71.82 71.62 524288 80 139.61 141.46 140.74 1048576 40 672.35 693.90 685.02 2097152 20 1492.55 1572.66 1540.90 4194304 10 3222.92 3535.51 3414.81 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 0.47 0.47 0.47 2 1000 0.48 0.48 0.48 4 1000 0.48 0.48 0.48 8 1000 0.49 0.49 0.49 16 1000 0.50 0.50 0.50 32 1000 0.49 0.50 0.50 64 1000 0.51 0.51 0.51 128 1000 0.60 0.61 0.61 256 1000 0.64 0.64 0.64 512 1000 0.69 0.69 0.69 1024 1000 0.78 0.78 0.78 2048 1000 1.09 1.10 1.09 4096 1000 1.70 1.70 1.70 8192 1000 2.83 2.85 2.84 16384 1000 4.64 4.66 4.65 32768 1000 9.20 9.24 9.22 65536 640 22.18 22.33 22.26 131072 320 46.48 47.01 46.78 262144 160 198.75 202.57 201.02 524288 80 408.05 422.21 416.27 1048576 40 1103.80 1167.98 1139.75 2097152 20 2209.70 2453.10 2346.35 4194304 10 5161.60 6118.39 5702.90 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 0.95 0.96 0.96 2 1000 0.95 0.96 0.96 4 1000 0.98 0.99 0.99 8 1000 0.98 0.98 0.98 16 1000 0.98 0.99 0.98 32 1000 1.13 1.14 1.14 64 1000 1.22 1.23 1.23 128 1000 1.31 1.32 1.32 256 1000 1.38 1.39 1.39 512 1000 1.58 1.59 1.58 1024 1000 1.92 1.94 1.93 2048 1000 2.69 2.71 2.70 4096 1000 4.21 4.24 4.23 8192 1000 8.29 8.34 8.31 16384 1000 17.64 17.74 17.70 32768 1000 36.17 36.36 36.25 65536 640 64.53 65.30 65.04 131072 320 185.97 189.02 187.87 262144 160 388.21 401.23 396.63 524288 80 817.86 867.31 850.73 1048576 40 2794.33 2947.90 2878.77 2097152 20 4791.05 5416.20 5136.79 4194304 10 10300.21 12519.00 11490.68 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 5.05 5.07 5.06 2 1000 5.32 5.33 5.33 4 1000 5.49 5.50 5.49 8 1000 5.81 5.82 5.81 16 1000 6.29 6.31 6.30 32 1000 6.84 6.86 6.85 64 1000 7.45 7.48 7.46 128 1000 8.94 8.97 8.95 256 1000 11.46 11.50 11.48 512 1000 16.16 16.22 16.19 1024 1000 33.16 33.25 33.20 2048 1000 4.46 4.53 4.49 4096 1000 7.47 7.55 7.51 8192 1000 13.41 13.64 13.55 16384 1000 42.54 43.16 42.97 32768 1000 57.12 57.99 57.57 65536 640 131.61 134.78 133.55 131072 320 312.16 325.08 320.67 262144 160 575.53 634.97 618.08 524288 80 2490.71 2574.97 2540.77 1048576 40 4962.25 5327.30 5168.22 2097152 20 8979.55 10284.90 9666.34 4194304 10 24736.00 30067.09 27575.65 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 9.11 9.15 9.13 2 1000 9.20 9.24 9.22 4 1000 9.55 9.58 9.57 8 1000 11.36 11.41 11.38 16 1000 12.88 12.94 12.91 32 1000 15.44 15.50 15.47 64 1000 25.74 25.89 25.81 128 1000 14.73 14.88 14.80 256 1000 18.73 18.93 18.83 512 1000 25.92 26.20 26.06 1024 1000 142.49 143.01 142.76 2048 1000 47.80 49.93 48.99 4096 1000 67.00 70.12 68.73 8192 1000 123.84 128.63 126.79 16384 1000 344.57 357.85 352.27 32768 1000 331.71 338.24 336.48 65536 640 790.23 840.30 834.87 131072 320 1331.98 1526.08 1486.67 262144 160 2240.26 2664.00 2543.37 524288 80 4219.78 5086.55 4808.79 1048576 40 10229.05 10698.38 10547.20 2097152 20 15157.40 21709.94 19066.95 4194304 10 28181.79 38806.89 34899.27 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 12.54 12.59 12.57 2 1000 13.02 13.07 13.04 4 1000 14.80 14.86 14.83 8 1000 17.48 17.55 17.52 16 1000 20.45 20.52 20.48 32 1000 26.15 26.23 26.19 64 1000 17.55 18.87 18.34 128 1000 20.77 22.35 21.73 256 1000 26.50 28.65 27.79 512 1000 37.92 40.80 39.65 1024 1000 1987.20 2000.85 1994.21 2048 1000 158.52 174.64 169.24 4096 1000 164.69 181.38 175.37 8192 1000 308.62 334.58 322.89 16384 1000 1231.45 1333.27 1281.99 32768 1000 846.61 873.37 864.10 65536 640 2319.05 2533.36 2483.65 131072 320 3605.49 3824.29 3754.39 262144 160 2884.80 6908.97 6507.85 524288 80 11071.66 17434.92 15947.52 1048576 40 21653.40 25189.73 24465.04 2097152 20 8701.94 50501.39 42532.37 4194304 10 15562.11 78125.41 64246.62 #---------------------------------------------------------------- # Benchmarking Gather # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.08 0.06 1 1000 17.01 17.08 17.05 2 1000 18.71 18.80 18.76 4 1000 21.47 21.57 21.52 8 1000 25.24 25.35 25.30 16 1000 31.47 31.60 31.54 32 1000 23.64 29.24 26.76 64 1000 26.25 33.20 30.13 128 1000 30.00 37.35 34.19 256 1000 38.55 47.94 43.84 512 1000 56.12 70.58 64.33 1024 1000 828.90 954.51 927.13 2048 1000 664.22 813.80 773.69 4096 1000 608.17 724.23 688.04 8192 1000 761.39 873.53 830.59 16384 1000 1350.04 1636.34 1519.05 32768 1000 5859.70 8000.86 6971.59 65536 640 5298.59 6456.37 6202.13 131072 320 3952.15 9061.00 8401.18 262144 160 2001.89 18857.92 14364.40 524288 80 25347.58 38752.37 36226.76 1048576 40 49615.53 57384.05 56451.12 2097152 20 11526.30 112998.86 103027.27 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Gather # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.07 0.06 1 1000 16.80 16.90 16.85 2 1000 18.75 18.85 18.80 4 1000 21.16 21.27 21.22 8 1000 24.59 24.71 24.65 16 1000 62.14 62.27 62.20 32 1000 31.59 46.46 39.70 64 1000 32.32 48.16 40.94 128 1000 35.84 53.35 45.22 256 1000 43.87 65.06 54.88 512 1000 67.58 101.04 84.45 1024 1000 1067.02 1364.98 1274.34 2048 1000 1136.36 1493.01 1390.97 4096 1000 1017.05 1342.78 1254.89 8192 1000 1153.74 1509.58 1398.17 16384 1000 1485.70 1986.56 1768.50 32768 893 7178.56 13486.29 10290.24 65536 640 4420.35 7794.35 6822.68 131072 320 111.52 13293.07 10345.38 262144 160 3813.91 35551.63 27761.82 524288 80 39639.95 56031.53 53588.85 1048576 40 77603.15 89718.00 88783.03 2097152 20 10116.20 176432.70 164285.37 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.15 0.15 0.15 1 1000 0.45 0.45 0.45 2 1000 0.46 0.46 0.46 4 1000 0.46 0.46 0.46 8 1000 0.47 0.47 0.47 16 1000 0.47 0.47 0.47 32 1000 0.51 0.51 0.51 64 1000 0.51 0.51 0.51 128 1000 0.55 0.55 0.55 256 1000 0.64 0.64 0.64 512 1000 0.68 0.68 0.68 1024 1000 0.79 0.79 0.79 2048 1000 1.04 1.04 1.04 4096 1000 1.49 1.49 1.49 8192 1000 2.33 2.34 2.34 16384 1000 3.94 3.94 3.94 32768 1000 7.06 7.06 7.06 65536 640 13.56 13.57 13.56 131072 320 26.86 26.87 26.87 262144 160 54.00 54.03 54.01 524288 80 108.95 109.00 108.98 1048576 40 377.33 379.50 378.41 2097152 20 837.99 842.34 840.17 4194304 10 1916.41 1924.92 1920.66 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.12 0.12 0.12 1 1000 0.49 0.49 0.49 2 1000 0.49 0.49 0.49 4 1000 0.49 0.49 0.49 8 1000 0.51 0.51 0.51 16 1000 0.52 0.52 0.52 32 1000 0.55 0.55 0.55 64 1000 0.54 0.54 0.54 128 1000 0.60 0.60 0.60 256 1000 0.66 0.66 0.66 512 1000 0.70 0.71 0.70 1024 1000 0.82 0.82 0.82 2048 1000 1.10 1.10 1.10 4096 1000 1.71 1.71 1.71 8192 1000 2.82 2.84 2.83 16384 1000 4.46 4.47 4.46 32768 1000 8.54 8.55 8.55 65536 640 21.03 21.08 21.06 131072 320 37.62 37.76 37.70 262144 160 70.91 71.43 71.22 524288 80 139.30 141.16 140.43 1048576 40 673.03 694.35 685.52 2097152 20 1490.75 1571.45 1539.64 4194304 10 3233.79 3545.78 3425.12 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.11 0.10 1 1000 0.60 0.60 0.60 2 1000 0.58 0.58 0.58 4 1000 0.60 0.60 0.60 8 1000 0.62 0.62 0.62 16 1000 0.59 0.59 0.59 32 1000 0.63 0.63 0.63 64 1000 0.61 0.61 0.61 128 1000 0.70 0.70 0.70 256 1000 0.73 0.73 0.73 512 1000 0.81 0.81 0.81 1024 1000 0.93 0.94 0.93 2048 1000 1.19 1.19 1.19 4096 1000 1.80 1.81 1.81 8192 1000 2.92 2.93 2.93 16384 1000 4.63 4.65 4.64 32768 1000 9.21 9.25 9.23 65536 640 22.03 22.16 22.10 131072 320 46.78 47.32 47.08 262144 160 198.72 202.60 201.04 524288 80 409.37 423.79 417.78 1048576 40 968.83 1032.27 1004.24 2097152 20 1810.00 2053.45 1946.54 4194304 10 4659.01 5622.79 5207.03 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 1 1000 0.99 1.00 1.00 2 1000 0.99 1.00 0.99 4 1000 0.99 1.00 0.99 8 1000 0.99 1.00 1.00 16 1000 1.06 1.08 1.08 32 1000 1.16 1.17 1.17 64 1000 1.17 1.18 1.18 128 1000 1.33 1.34 1.33 256 1000 1.46 1.47 1.47 512 1000 1.63 1.64 1.63 1024 1000 1.96 1.98 1.97 2048 1000 2.69 2.71 2.70 4096 1000 4.24 4.27 4.26 8192 1000 8.25 8.32 8.29 16384 1000 17.30 17.40 17.35 32768 1000 36.49 36.68 36.58 65536 640 64.55 65.31 65.05 131072 320 185.25 188.46 187.26 262144 160 391.57 404.54 399.94 524288 80 981.20 1010.84 997.82 1048576 40 2641.80 2861.52 2778.87 2097152 20 5562.54 6227.76 5928.53 4194304 10 13630.60 15971.11 14864.16 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 8.21 8.25 8.24 2 1000 7.38 7.42 7.40 4 1000 7.68 7.74 7.72 8 1000 7.67 7.69 7.68 16 1000 7.66 7.69 7.68 32 1000 8.43 8.46 8.45 64 1000 7.84 7.88 7.87 128 1000 9.01 9.04 9.03 256 1000 9.08 9.14 9.12 512 1000 9.16 9.21 9.19 1024 1000 10.73 10.78 10.76 2048 1000 14.05 14.12 14.10 4096 1000 18.64 18.74 18.71 8192 1000 26.82 26.87 26.84 16384 1000 56.24 56.51 56.41 32768 1000 122.11 122.66 122.45 65536 640 133.18 136.34 135.07 131072 320 314.20 327.25 322.82 262144 160 567.83 626.36 609.17 524288 80 2122.00 2201.52 2168.25 1048576 40 4874.85 5406.42 5189.48 2097152 20 7903.85 9266.20 8637.52 4194304 10 16003.70 21254.49 18750.42 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.08 0.08 1 1000 35.35 35.60 35.49 2 1000 35.23 35.49 35.39 4 1000 35.24 35.50 35.40 8 1000 35.33 35.60 35.49 16 1000 35.35 35.61 35.49 32 1000 35.32 35.56 35.45 64 1000 39.55 39.82 39.71 128 1000 35.56 35.81 35.70 256 1000 39.84 40.15 40.02 512 1000 47.52 47.82 47.68 1024 1000 43.09 43.47 43.32 2048 1000 52.66 53.53 53.23 4096 1000 71.40 72.48 72.11 8192 1000 123.88 124.48 124.22 16384 1000 363.30 364.32 363.86 32768 1000 356.43 358.65 357.75 65536 640 653.29 685.78 676.56 131072 320 1291.21 1411.93 1377.48 262144 160 2267.99 2703.40 2582.02 524288 80 4226.59 4976.65 4709.81 1048576 40 9636.62 10437.97 10147.31 2097152 20 12921.25 21749.15 18183.16 4194304 10 27216.60 38971.21 34520.96 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.08 1 1000 104.42 105.03 104.85 2 1000 102.10 102.59 102.43 4 1000 112.75 114.17 113.53 8 1000 107.69 108.31 108.12 16 1000 108.10 108.79 108.60 32 1000 107.55 108.17 107.99 64 1000 107.71 108.38 108.19 128 1000 110.32 111.02 110.82 256 1000 114.00 114.75 114.57 512 1000 125.43 129.99 127.95 1024 1000 123.74 124.63 124.21 2048 1000 144.17 146.09 145.46 4096 1000 217.56 218.41 218.12 8192 1000 452.48 468.19 463.66 16384 1000 1069.24 1116.61 1104.09 32768 1000 2837.02 2843.88 2842.35 65536 640 4756.48 5129.01 5054.44 131072 320 7065.20 7428.13 7374.99 262144 160 12657.67 13877.39 13714.61 524288 80 11195.69 18266.95 16408.04 1048576 40 21655.52 25071.73 24350.42 2097152 20 9714.95 49441.55 41738.86 4194304 10 27092.39 77462.39 66526.54 #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.08 1 1000 298.50 300.37 299.94 2 1000 299.51 301.38 300.96 4 1000 298.54 300.40 299.98 8 1000 312.26 314.22 313.78 16 1000 304.67 306.62 306.19 32 1000 304.71 306.67 306.24 64 1000 304.08 306.03 305.60 128 1000 312.66 314.66 314.21 256 1000 326.86 328.94 328.47 512 1000 331.85 333.95 333.48 1024 1000 340.46 342.58 342.07 2048 1000 379.30 381.44 380.90 4096 1000 511.67 513.87 513.20 8192 1000 1010.83 1013.84 1012.90 16384 1000 2532.69 2538.33 2536.47 32768 1000 7685.98 7697.66 7695.46 65536 640 11515.39 12427.28 12256.65 131072 320 15335.26 15853.65 15696.52 262144 160 25161.04 31773.04 29983.84 524288 80 25348.87 39648.56 37535.49 1048576 40 49452.38 57243.97 56402.91 2097152 20 8721.15 113901.15 104000.27 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Gatherv # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.11 0.09 1 1000 1134.66 1359.86 1298.52 2 1000 1066.11 1290.40 1229.52 4 1000 1016.02 1253.75 1193.42 8 1000 1066.96 1269.36 1209.76 16 1000 1114.45 1355.98 1294.83 32 1000 995.16 1210.44 1152.96 64 1000 993.75 1206.39 1146.45 128 1000 943.38 1114.53 1060.10 256 1000 965.44 1140.97 1079.46 512 1000 738.59 904.29 841.44 1024 1000 502.86 677.38 602.03 2048 1000 594.13 813.30 717.16 4096 1000 667.53 949.47 818.62 8192 1000 871.43 1288.38 1089.64 16384 1000 1374.81 2083.86 1744.09 32768 963 9200.05 14855.99 13197.95 65536 620 6597.12 18616.10 14421.59 131072 320 111.64 25558.79 20934.12 262144 137 3444.56 71997.53 60630.58 524288 80 44298.24 54643.28 53683.23 1048576 40 81647.57 98639.93 96637.38 2097152 20 13885.15 187726.90 171333.82 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 0.47 0.47 0.47 2 1000 0.46 0.46 0.46 4 1000 0.61 0.61 0.61 8 1000 0.61 0.61 0.61 16 1000 0.63 0.63 0.63 32 1000 0.62 0.62 0.62 64 1000 0.63 0.63 0.63 128 1000 0.65 0.65 0.65 256 1000 0.71 0.71 0.71 512 1000 0.76 0.76 0.76 1024 1000 0.89 0.89 0.89 2048 1000 1.16 1.16 1.16 4096 1000 1.72 1.72 1.72 8192 1000 2.86 2.86 2.86 16384 1000 5.11 5.12 5.12 32768 1000 9.18 9.18 9.18 65536 640 16.88 16.88 16.88 131072 320 31.82 31.83 31.82 262144 160 61.49 61.51 61.50 524288 80 116.25 116.31 116.28 1048576 40 486.65 488.55 487.60 2097152 20 969.10 972.95 971.03 4194304 10 2238.39 2246.59 2242.49 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.09 0.08 1 1000 0.74 0.74 0.74 2 1000 0.76 0.76 0.76 4 1000 0.65 0.65 0.65 8 1000 0.64 0.65 0.64 16 1000 0.65 0.65 0.65 32 1000 0.63 0.63 0.63 64 1000 0.65 0.65 0.65 128 1000 0.69 0.69 0.69 256 1000 0.77 0.77 0.77 512 1000 0.82 0.82 0.82 1024 1000 0.93 0.93 0.93 2048 1000 1.24 1.24 1.24 4096 1000 1.91 1.91 1.91 8192 1000 3.03 3.03 3.03 16384 1000 5.23 5.23 5.23 32768 1000 9.89 9.90 9.90 65536 640 27.57 27.60 27.59 131072 320 46.93 47.03 46.99 262144 160 87.85 88.26 88.10 524288 80 165.75 167.34 166.72 1048576 40 796.85 810.53 805.02 2097152 20 1756.00 1821.80 1796.15 4194304 10 3682.30 3967.50 3858.52 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 0.90 0.90 0.90 2 1000 0.95 0.95 0.95 4 1000 0.62 0.62 0.62 8 1000 0.65 0.65 0.65 16 1000 0.62 0.62 0.62 32 1000 0.64 0.65 0.65 64 1000 0.64 0.64 0.64 128 1000 0.70 0.71 0.70 256 1000 0.73 0.74 0.73 512 1000 0.81 0.81 0.81 1024 1000 0.94 0.95 0.95 2048 1000 1.25 1.26 1.25 4096 1000 1.95 1.95 1.95 8192 1000 3.07 3.07 3.07 16384 1000 5.58 5.59 5.59 32768 1000 10.46 10.48 10.47 65536 640 30.77 30.85 30.81 131072 320 57.61 57.94 57.79 262144 160 176.06 177.83 177.08 524288 80 364.35 371.56 368.28 1048576 40 1164.75 1209.90 1190.02 2097152 20 2140.81 2361.70 2268.21 4194304 10 4194.31 5112.50 4721.09 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 2.08 2.09 2.08 2 1000 2.09 2.09 2.09 4 1000 1.34 1.35 1.35 8 1000 1.34 1.35 1.35 16 1000 1.35 1.36 1.36 32 1000 1.43 1.44 1.43 64 1000 1.36 1.37 1.36 128 1000 1.48 1.49 1.48 256 1000 1.58 1.59 1.58 512 1000 1.75 1.76 1.76 1024 1000 2.10 2.11 2.11 2048 1000 2.86 2.88 2.87 4096 1000 4.64 4.66 4.66 8192 1000 9.11 9.14 9.12 16384 1000 19.19 19.22 19.21 32768 1000 38.23 38.33 38.29 65536 640 84.35 84.56 84.47 131072 320 190.82 191.96 191.47 262144 160 415.93 421.13 418.82 524288 80 682.61 730.57 714.77 1048576 40 1595.68 1762.03 1697.34 2097152 20 3062.61 3779.60 3464.40 4194304 10 4751.11 7600.00 6346.29 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 2.99 3.00 2.99 2 1000 3.20 3.20 3.20 4 1000 1.54 1.57 1.56 8 1000 1.53 1.56 1.55 16 1000 1.64 1.66 1.65 32 1000 1.54 1.56 1.55 64 1000 1.54 1.56 1.55 128 1000 1.75 1.77 1.76 256 1000 1.82 1.85 1.84 512 1000 2.23 2.27 2.25 1024 1000 2.95 2.99 2.97 2048 1000 4.84 4.90 4.88 4096 1000 8.13 8.22 8.19 8192 1000 14.40 14.57 14.51 16384 1000 28.17 28.53 28.42 32768 1000 59.21 59.80 59.62 65536 640 158.75 159.50 159.16 131072 320 371.34 374.80 373.16 262144 160 620.16 633.42 627.01 524288 80 1112.57 1261.15 1207.85 1048576 40 2432.27 3020.42 2767.85 2097152 20 3094.24 5340.29 4478.40 4194304 10 4841.40 14799.90 10540.74 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 3.55 3.58 3.57 2 1000 3.76 3.79 3.78 4 1000 3.92 3.94 3.93 8 1000 4.21 4.31 4.28 16 1000 4.29 4.37 4.34 32 1000 4.58 4.71 4.66 64 1000 5.13 5.27 5.23 128 1000 6.34 6.50 6.45 256 1000 8.34 8.55 8.49 512 1000 13.68 13.71 13.69 1024 1000 69.83 70.41 70.17 2048 1000 112.33 113.25 112.79 4096 1000 174.26 175.85 175.04 8192 1000 240.68 242.84 241.76 16384 1000 364.25 367.35 365.79 32768 1000 1342.62 1351.66 1347.70 65536 640 1436.31 1495.00 1485.45 131072 320 1905.16 1998.21 1980.05 262144 160 3034.44 3140.24 3088.09 524288 80 3071.31 3726.55 3501.74 1048576 40 6040.30 7498.93 6998.89 2097152 20 3113.70 15325.95 11309.18 4194304 10 4867.39 32682.51 22913.05 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 4.41 4.42 4.41 2 1000 12.43 16.44 15.27 4 1000 4.75 4.99 4.93 8 1000 8.71 13.78 12.31 16 1000 5.06 5.29 5.23 32 1000 5.82 6.01 5.95 64 1000 7.11 7.43 7.34 128 1000 9.60 10.05 9.91 256 1000 17.94 18.67 18.43 512 1000 26.10 26.12 26.11 1024 1000 169.79 172.18 171.05 2048 1000 189.11 198.32 192.36 4096 1000 247.95 257.13 252.54 8192 1000 437.66 443.02 441.08 16384 1000 1227.20 1242.78 1233.02 32768 1000 1463.80 1479.33 1475.01 65536 640 2321.83 2398.85 2388.67 131072 320 3773.87 3905.99 3873.11 262144 160 6025.91 6288.70 6215.09 524288 80 9423.23 11469.56 11012.50 1048576 40 18660.20 23624.02 21822.33 2097152 20 3137.70 42450.45 35118.03 4194304 10 4920.70 81628.89 65087.71 #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.08 0.06 1 1000 5.13 5.14 5.14 2 1000 5.17 5.24 5.21 4 1000 5.51 5.55 5.53 8 1000 5.28 5.63 5.51 16 1000 5.59 5.95 5.82 32 1000 6.45 6.76 6.63 64 1000 8.83 9.38 9.14 128 1000 14.63 14.83 14.72 256 1000 34.98 35.00 34.99 512 1000 50.35 50.37 50.36 1024 1000 2917.57 2918.87 2917.88 2048 1000 3267.70 3268.98 3268.45 4096 1000 909.52 914.88 912.37 8192 1000 933.43 959.08 948.64 16384 1000 1505.53 1578.53 1547.97 32768 1000 2305.48 2356.93 2337.26 65536 640 5849.43 5998.44 5982.54 131072 320 12153.94 12211.77 12176.11 262144 160 19641.74 19817.66 19737.47 524288 80 22979.92 27983.40 27165.11 1048576 40 42129.25 51833.65 49881.42 2097152 20 3123.65 103034.35 93705.21 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Scatter # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.07 0.06 1 1000 6.14 6.17 6.15 2 1000 5.87 5.89 5.88 4 1000 5.96 5.99 5.98 8 1000 5.69 5.73 5.72 16 1000 6.56 6.58 6.57 32 1000 7.75 7.79 7.77 64 1000 11.04 11.12 11.08 128 1000 17.63 17.66 17.65 256 1000 49.25 49.29 49.26 512 1000 74.83 74.87 74.85 1024 1000 3212.28 3215.15 3212.99 2048 1000 6575.48 6578.38 6576.91 4096 1000 1510.14 1518.33 1514.61 8192 1000 1659.15 1671.59 1664.26 16384 1000 1840.92 1929.17 1889.00 32768 879 3096.03 3155.67 3136.35 65536 327 13226.20 13334.91 13324.96 131072 320 18822.03 18991.19 18975.97 262144 160 21949.98 22131.41 22034.27 524288 80 37187.54 45459.92 44847.01 1048576 40 64840.58 79758.85 77785.58 2097152 20 3181.00 147983.21 139305.51 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.16 0.16 0.16 1 1000 0.65 0.65 0.65 2 1000 0.64 0.64 0.64 4 1000 0.65 0.65 0.65 8 1000 0.64 0.64 0.64 16 1000 0.64 0.64 0.64 32 1000 0.65 0.65 0.65 64 1000 0.65 0.65 0.65 128 1000 0.69 0.69 0.69 256 1000 0.72 0.72 0.72 512 1000 0.79 0.79 0.79 1024 1000 0.90 0.90 0.90 2048 1000 1.18 1.18 1.18 4096 1000 1.67 1.67 1.67 8192 1000 2.63 2.63 2.63 16384 1000 4.60 4.61 4.60 32768 1000 8.21 8.21 8.21 65536 640 16.70 16.70 16.70 131072 320 31.43 31.44 31.44 262144 160 61.05 61.08 61.07 524288 80 115.94 115.99 115.96 1048576 40 485.72 487.73 486.73 2097152 20 976.00 979.95 977.98 4194304 10 2240.30 2248.60 2244.45 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.12 0.14 0.13 1 1000 0.85 0.85 0.85 2 1000 0.85 0.85 0.85 4 1000 0.84 0.84 0.84 8 1000 0.83 0.83 0.83 16 1000 0.84 0.84 0.84 32 1000 0.84 0.84 0.84 64 1000 0.87 0.88 0.88 128 1000 0.93 0.93 0.93 256 1000 0.97 0.97 0.97 512 1000 1.07 1.07 1.07 1024 1000 1.23 1.23 1.23 2048 1000 1.60 1.60 1.60 4096 1000 2.42 2.42 2.42 8192 1000 3.94 3.94 3.94 16384 1000 6.74 6.75 6.74 32768 1000 12.73 12.73 12.73 65536 640 28.61 28.64 28.62 131072 320 48.38 48.49 48.45 262144 160 90.04 90.44 90.29 524288 80 169.41 171.03 170.40 1048576 40 799.80 813.45 807.99 2097152 20 1791.60 1857.20 1831.54 4194304 10 3653.41 3939.10 3829.88 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.11 0.11 1 1000 1.34 1.34 1.34 2 1000 1.34 1.34 1.34 4 1000 1.36 1.36 1.36 8 1000 1.33 1.34 1.33 16 1000 1.33 1.33 1.33 32 1000 1.32 1.32 1.32 64 1000 1.34 1.34 1.34 128 1000 1.46 1.46 1.46 256 1000 1.55 1.55 1.55 512 1000 1.68 1.68 1.68 1024 1000 1.97 1.97 1.97 2048 1000 2.68 2.68 2.68 4096 1000 3.86 3.87 3.87 8192 1000 6.40 6.41 6.41 16384 1000 11.76 11.77 11.76 32768 1000 22.65 22.67 22.66 65536 640 30.54 30.63 30.59 131072 320 64.51 64.85 64.70 262144 160 178.41 180.22 179.44 524288 80 360.66 367.86 364.57 1048576 40 1200.78 1245.70 1225.86 2097152 20 2138.40 2360.15 2266.74 4194304 10 4196.21 5107.21 4716.91 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 5.40 5.41 5.40 2 1000 5.38 5.39 5.39 4 1000 5.34 5.35 5.34 8 1000 5.37 5.38 5.38 16 1000 5.41 5.43 5.42 32 1000 5.36 5.37 5.36 64 1000 5.39 5.40 5.39 128 1000 5.61 5.62 5.61 256 1000 5.78 5.79 5.79 512 1000 6.84 6.86 6.85 1024 1000 7.91 7.92 7.92 2048 1000 10.27 10.29 10.28 4096 1000 13.78 13.81 13.79 8192 1000 22.59 22.63 22.62 16384 1000 40.85 40.95 40.91 32768 1000 121.18 121.38 121.29 65536 640 86.09 86.28 86.20 131072 320 189.08 190.21 189.72 262144 160 415.71 420.88 418.58 524288 80 683.07 705.65 696.15 1048576 40 1564.35 1731.57 1666.41 2097152 20 3093.80 3825.01 3505.50 4194304 10 4709.10 7596.49 6337.65 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.09 1 1000 14.12 14.15 14.14 2 1000 14.10 14.13 14.12 4 1000 14.05 14.08 14.06 8 1000 14.06 14.08 14.07 16 1000 14.03 14.06 14.05 32 1000 14.07 14.09 14.08 64 1000 14.07 14.10 14.09 128 1000 14.33 14.36 14.35 256 1000 14.49 14.52 14.50 512 1000 29.20 29.24 29.22 1024 1000 25.02 25.06 25.04 2048 1000 31.14 31.19 31.17 4096 1000 35.15 35.23 35.20 8192 1000 55.18 55.34 55.28 16384 1000 96.49 96.81 96.70 32768 1000 183.94 184.34 184.17 65536 640 167.16 167.91 167.57 131072 320 376.01 379.54 377.84 262144 160 639.31 690.09 667.62 524288 80 1114.79 1257.03 1207.18 1048576 40 2635.50 3220.78 2968.43 2097152 20 3087.00 5277.30 4426.86 4194304 10 4872.11 14875.70 10597.61 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.08 1 1000 36.99 37.11 37.05 2 1000 37.03 37.15 37.09 4 1000 36.99 37.12 37.06 8 1000 39.08 39.22 39.15 16 1000 38.91 39.26 39.20 32 1000 39.27 39.39 39.33 64 1000 39.04 39.15 39.09 128 1000 41.44 41.57 41.50 256 1000 45.67 45.88 45.77 512 1000 51.95 52.20 52.07 1024 1000 72.28 72.78 72.53 2048 1000 112.95 113.70 113.31 4096 1000 170.77 171.89 171.29 8192 1000 275.56 277.26 276.33 16384 1000 398.05 400.16 399.04 32768 1000 1366.74 1368.90 1367.76 65536 640 1550.29 1578.50 1570.21 131072 320 1797.02 1973.04 1957.23 262144 160 2761.72 3031.27 2987.34 524288 80 3068.81 3735.14 3505.36 1048576 40 6046.20 7400.85 6938.47 2097152 20 3120.60 14989.45 11085.87 4194304 10 4898.50 32915.12 23058.50 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.08 1 1000 86.91 87.01 86.96 2 1000 86.49 86.59 86.54 4 1000 88.11 88.67 88.16 8 1000 94.49 94.60 94.55 16 1000 93.02 93.13 93.08 32 1000 91.94 92.04 91.99 64 1000 90.84 90.95 90.90 128 1000 97.65 97.76 97.71 256 1000 123.68 123.97 123.86 512 1000 121.89 122.19 122.07 1024 1000 130.75 130.97 130.85 2048 1000 180.65 181.12 180.91 4096 1000 288.35 289.71 289.24 8192 1000 659.65 663.41 662.10 16384 1000 1981.78 1995.83 1991.06 32768 1000 1420.38 1435.61 1429.89 65536 640 2341.36 2403.91 2393.54 131072 320 3721.30 3875.72 3835.51 262144 160 5907.03 6174.66 6133.41 524288 80 10134.50 11871.69 11445.56 1048576 40 18080.55 22724.15 21497.12 2097152 20 3123.30 42756.59 35765.05 4194304 10 4918.69 81541.80 64936.06 #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.08 0.09 0.08 1 1000 547.71 547.94 547.83 2 1000 544.55 544.76 544.66 4 1000 538.63 538.85 538.74 8 1000 614.65 614.88 614.77 16 1000 634.03 634.27 634.16 32 1000 628.65 632.31 628.79 64 1000 585.22 585.46 585.35 128 1000 553.70 553.97 553.84 256 1000 683.49 684.59 684.29 512 1000 664.24 665.28 664.97 1024 1000 573.63 574.21 573.97 2048 1000 673.16 674.05 673.69 4096 1000 767.80 771.16 770.29 8192 1000 1481.91 1492.93 1490.48 16384 1000 4002.52 4039.97 4032.40 32768 1000 2155.33 2173.64 2169.54 65536 640 4123.83 4265.61 4246.49 131072 320 7829.27 8108.03 8075.61 262144 160 12838.00 12918.17 12888.54 524288 80 24177.44 28753.95 28122.09 1048576 40 42490.80 51347.10 49830.54 2097152 20 3128.80 93431.75 85317.66 4194304 out-of-mem.; needed X= 1.005 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Scatterv # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.11 0.09 1 1000 2695.57 2702.19 2697.94 2 1000 2817.70 2839.02 2828.30 4 1000 2755.51 2762.41 2758.67 8 1000 2818.53 2819.30 2818.92 16 1000 2636.55 2648.10 2637.87 32 1000 2664.62 2681.86 2673.66 64 1000 2909.54 2915.96 2911.93 128 1000 2787.02 2787.71 2787.27 256 1000 2956.97 2969.37 2963.16 512 1000 2917.03 2917.73 2917.40 1024 1000 2745.63 2762.59 2748.11 2048 1000 2856.77 2863.20 2860.19 4096 1000 2757.02 2763.31 2762.30 8192 1000 2344.40 2363.56 2355.76 16384 1000 2712.56 2751.50 2735.89 32768 1000 3357.88 3399.83 3377.48 65536 640 13506.12 14122.69 14088.29 131072 320 17130.81 19641.36 19453.72 262144 160 23983.14 31217.13 30798.30 524288 80 43706.83 56361.80 55259.93 1048576 40 78914.55 102440.80 99576.10 2097152 20 3104.95 183144.70 168936.37 4194304 out-of-mem.; needed X= 1.505 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 3.29 3.29 3.29 2 1000 3.27 3.27 3.27 4 1000 3.27 3.27 3.27 8 1000 3.30 3.30 3.30 16 1000 3.25 3.25 3.25 32 1000 3.26 3.26 3.26 64 1000 3.33 3.33 3.33 128 1000 3.35 3.35 3.35 256 1000 3.43 3.43 3.43 512 1000 3.63 3.63 3.63 1024 1000 3.12 3.12 3.12 2048 1000 3.64 3.64 3.64 4096 1000 4.91 4.91 4.91 8192 1000 3.56 3.56 3.56 16384 1000 5.87 5.87 5.87 32768 1000 10.31 10.31 10.31 65536 640 30.53 30.53 30.53 131072 320 53.32 53.33 53.33 262144 160 100.11 100.14 100.12 524288 80 187.08 187.13 187.10 1048576 40 837.43 839.63 838.53 2097152 20 1699.24 1703.45 1701.35 4194304 10 3885.60 3894.11 3889.86 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 5.48 5.48 5.48 2 1000 5.51 5.51 5.51 4 1000 5.46 5.46 5.46 8 1000 5.48 5.48 5.48 16 1000 5.56 5.56 5.56 32 1000 5.57 5.57 5.57 64 1000 5.53 5.53 5.53 128 1000 5.71 5.71 5.71 256 1000 5.98 5.98 5.98 512 1000 6.53 6.53 6.53 1024 1000 7.08 7.08 7.08 2048 1000 8.63 8.63 8.63 4096 1000 11.69 11.69 11.69 8192 1000 8.84 8.85 8.84 16384 1000 15.74 15.75 15.74 32768 1000 33.21 33.24 33.23 65536 640 83.80 83.81 83.80 131072 320 140.47 140.48 140.48 262144 160 264.88 264.94 264.90 524288 80 736.73 737.56 737.14 1048576 40 2674.06 2678.43 2676.24 2097152 20 5465.65 5473.04 5469.30 4194304 10 10895.80 10905.91 10900.76 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 9.30 9.30 9.30 2 1000 9.34 9.34 9.34 4 1000 9.24 9.24 9.24 8 1000 9.33 9.33 9.33 16 1000 9.35 9.35 9.35 32 1000 9.40 9.40 9.40 64 1000 9.43 9.43 9.43 128 1000 9.86 9.86 9.86 256 1000 10.68 10.68 10.68 512 1000 11.87 11.87 11.87 1024 1000 14.41 14.41 14.41 2048 1000 17.88 17.88 17.88 4096 1000 24.74 24.74 24.74 8192 1000 19.17 19.18 19.18 16384 1000 37.71 37.71 37.71 32768 1000 71.84 71.85 71.85 65536 640 210.20 210.22 210.21 131072 320 768.54 768.62 768.58 262144 160 2086.55 2086.89 2086.74 524288 80 3748.16 3749.04 3748.70 1048576 40 7697.40 7702.70 7699.70 2097152 20 14933.80 14946.90 14940.44 4194304 10 30993.80 31340.31 31228.96 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 26.02 26.02 26.02 2 1000 26.10 26.10 26.10 4 1000 25.94 25.95 25.94 8 1000 25.98 25.98 25.98 16 1000 26.02 26.02 26.02 32 1000 26.20 26.21 26.20 64 1000 26.23 26.23 26.23 128 1000 29.03 29.03 29.03 256 1000 30.51 30.51 30.51 512 1000 35.00 35.00 35.00 1024 1000 45.80 45.81 45.80 2048 1000 59.35 59.35 59.35 4096 1000 91.76 91.77 91.76 8192 1000 123.71 123.72 123.72 16384 1000 243.61 243.63 243.62 32768 1000 496.87 496.91 496.89 65536 640 1309.17 1309.27 1309.22 131072 320 2887.73 2888.12 2887.97 262144 160 5569.51 5570.63 5570.07 524288 80 10968.64 10973.97 10972.04 1048576 40 23024.05 23037.80 23032.69 2097152 20 46244.69 46263.95 46255.81 4194304 10 92724.90 93224.81 93071.30 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 55.50 55.51 55.50 2 1000 55.58 55.59 55.58 4 1000 55.39 55.40 55.39 8 1000 55.43 55.44 55.43 16 1000 55.71 55.72 55.72 32 1000 55.78 55.78 55.78 64 1000 62.26 62.26 62.26 128 1000 83.18 83.20 83.19 256 1000 69.31 69.32 69.31 512 1000 86.22 86.24 86.23 1024 1000 117.24 117.25 117.25 2048 1000 169.16 169.19 169.17 4096 1000 268.11 268.14 268.12 8192 1000 406.32 406.35 406.34 16384 1000 805.94 806.00 805.97 32768 1000 1838.25 1838.43 1838.38 65536 640 4267.42 4267.74 4267.63 131072 320 8249.79 8250.71 8250.33 262144 160 15644.81 15655.82 15650.20 524288 80 34030.24 34082.37 34056.77 1048576 40 70217.97 70381.52 70298.46 2097152 20 142514.10 143157.99 142833.34 4194304 10 250974.32 252196.50 251556.46 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 1 1000 140.22 140.26 140.24 2 1000 146.77 146.81 146.79 4 1000 148.84 148.88 148.86 8 1000 161.22 161.26 161.24 16 1000 240.99 241.03 241.01 32 1000 122.70 122.82 122.76 64 1000 233.09 233.34 233.20 128 1000 469.75 469.87 469.81 256 1000 765.65 770.00 765.82 512 1000 1087.33 1088.21 1087.81 1024 1000 2735.39 2738.08 2737.17 2048 1000 3323.88 3326.15 3325.01 4096 1000 4469.22 4472.33 4471.06 8192 847 11734.81 11735.57 11735.19 16384 581 17459.03 17460.30 17459.76 32768 434 23091.73 23098.36 23094.31 65536 296 34432.11 34435.80 34434.30 131072 170 58821.96 58833.31 58828.11 262144 95 105485.39 105532.50 105516.51 524288 52 194283.87 194371.50 194352.06 1048576 27 383132.86 383408.37 383319.49 2097152 13 755826.84 756664.15 756351.44 4194304 7 1509369.54 1511471.71 1510848.09 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 193.59 193.62 193.60 2 1000 195.27 195.30 195.28 4 1000 252.40 252.44 252.42 8 1000 228.26 228.30 228.28 16 1000 264.49 264.55 264.52 32 1000 532.33 532.60 532.50 64 1000 991.90 992.59 992.22 128 1000 843.01 843.23 843.11 256 1000 2395.49 2396.80 2396.14 512 1000 3250.87 3253.47 3252.12 1024 1000 4333.91 4336.34 4335.03 2048 1000 7344.56 7348.25 7346.39 4096 745 13528.83 13537.66 13534.65 8192 148 65752.69 65764.36 65758.59 16384 69 145570.98 145610.54 145593.55 32768 69 79645.06 79687.52 79665.04 65536 69 114355.38 114416.85 114395.91 131072 50 203568.18 203660.34 203611.02 262144 28 353870.26 354137.46 354009.99 524288 9 1146518.20 1152717.78 1149672.18 1048576 7 2058637.72 2084776.57 2071368.15 2097152 4 3187763.04 3206922.23 3197086.03 4194304 2 5355086.45 5391294.00 5373734.80 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 1 1000 287.84 287.88 287.86 2 1000 309.68 309.73 309.70 4 1000 325.99 326.04 326.02 8 1000 353.61 353.67 353.63 16 1000 431.24 431.40 431.28 32 1000 1533.35 1534.54 1533.97 64 1000 2870.72 2872.36 2871.67 128 1000 2378.32 2379.51 2378.73 256 1000 5399.33 5401.27 5400.24 512 935 8657.20 8665.75 8660.08 1024 789 12272.20 12282.74 12278.53 2048 463 21477.32 21493.98 21486.84 4096 191 53372.71 53455.57 53434.32 8192 58 172385.81 172595.57 172486.08 16384 26 386682.12 388182.73 387412.95 32768 26 313446.15 314746.88 314094.57 65536 26 354945.54 355382.96 355196.45 131072 21 494578.04 494877.10 494748.76 262144 11 799787.09 800867.73 800394.18 524288 6 2058008.00 2070900.48 2063821.15 1048576 3 3901220.00 3946913.64 3923411.63 2097152 2 6723312.50 6820899.01 6781886.13 4194304 out-of-mem.; needed X= 2.001 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.07 0.06 1 1000 233.62 233.73 233.69 2 1000 267.61 267.74 267.68 4 1000 397.75 397.93 397.86 8 1000 498.30 498.68 498.49 16 1000 1252.01 1252.63 1252.36 32 1000 2350.49 2351.87 2351.40 64 1000 4315.95 4318.11 4317.24 128 645 14206.83 14211.50 14209.07 256 622 15077.73 15090.46 15085.51 512 501 19309.80 19332.57 19323.93 1024 337 28073.79 28126.36 28109.00 2048 277 35926.17 35986.91 35970.37 4096 147 68409.20 68569.84 68518.63 8192 51 197815.10 197886.37 197858.15 16384 24 418497.00 419166.96 418847.85 32768 14 760272.86 761587.57 761068.33 65536 10 1036129.71 1037918.81 1037211.61 131072 6 2035670.48 2039363.19 2037896.28 262144 6 1425284.15 1429863.17 1427781.65 524288 4 2804956.26 2820026.76 2813422.51 1048576 2 5057878.02 5104152.44 5085252.52 2097152 out-of-mem.; needed X= 1.501 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) 4194304 out-of-mem.; needed X= 3.001 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.29 0.29 0.29 1 1000 1.17 1.17 1.17 2 1000 1.17 1.17 1.17 4 1000 1.19 1.19 1.19 8 1000 1.18 1.18 1.18 16 1000 1.18 1.18 1.18 32 1000 1.19 1.19 1.19 64 1000 1.19 1.19 1.19 128 1000 1.24 1.24 1.24 256 1000 1.32 1.32 1.32 512 1000 1.36 1.36 1.36 1024 1000 1.50 1.50 1.50 2048 1000 1.73 1.73 1.73 4096 1000 2.38 2.38 2.38 8192 1000 3.54 3.54 3.54 16384 1000 5.82 5.82 5.82 32768 1000 10.29 10.29 10.29 65536 640 31.87 31.88 31.87 131072 320 54.69 54.70 54.69 262144 160 101.22 101.24 101.23 524288 80 187.91 187.96 187.94 1048576 40 839.22 841.40 840.31 2097152 20 1700.04 1704.39 1702.22 4194304 10 3889.80 3898.60 3894.20 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.33 0.36 0.34 1 1000 2.37 2.38 2.37 2 1000 2.37 2.37 2.37 4 1000 2.36 2.36 2.36 8 1000 2.35 2.35 2.35 16 1000 2.34 2.34 2.34 32 1000 2.36 2.36 2.36 64 1000 2.36 2.36 2.36 128 1000 2.48 2.49 2.48 256 1000 2.65 2.65 2.65 512 1000 2.94 2.94 2.94 1024 1000 3.27 3.28 3.28 2048 1000 4.24 4.24 4.24 4096 1000 5.88 5.88 5.88 8192 1000 8.90 8.91 8.91 16384 1000 15.76 15.77 15.77 32768 1000 33.05 33.06 33.05 65536 640 84.07 84.09 84.08 131072 320 144.39 144.49 144.45 262144 160 267.07 267.29 267.23 524288 80 692.90 694.01 693.39 1048576 40 2611.40 2613.70 2612.54 2097152 20 5397.75 5408.25 5402.95 4194304 10 10914.49 10934.00 10924.21 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.41 0.42 0.42 1 1000 4.87 4.87 4.87 2 1000 4.89 4.89 4.89 4 1000 4.82 4.83 4.82 8 1000 4.83 4.83 4.83 16 1000 4.85 4.85 4.85 32 1000 4.86 4.87 4.87 64 1000 4.89 4.89 4.89 128 1000 5.15 5.15 5.15 256 1000 5.51 5.51 5.51 512 1000 5.94 5.94 5.94 1024 1000 6.87 6.87 6.87 2048 1000 8.77 8.77 8.77 4096 1000 12.02 12.02 12.02 8192 1000 39.67 39.68 39.67 16384 1000 37.80 37.81 37.81 32768 1000 72.11 72.12 72.12 65536 640 183.66 183.70 183.68 131072 320 685.20 685.46 685.37 262144 160 2545.90 2547.24 2546.54 524288 80 4217.01 4218.49 4217.65 1048576 40 7768.98 7780.45 7773.87 2097152 20 15536.55 15568.49 15555.92 4194304 10 31303.38 31416.89 31381.40 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.58 0.60 0.59 1 1000 19.21 19.21 19.21 2 1000 19.32 19.33 19.32 4 1000 19.12 19.12 19.12 8 1000 45.32 45.32 45.32 16 1000 31.16 31.17 31.17 32 1000 19.12 19.12 19.12 64 1000 19.14 19.15 19.14 128 1000 21.72 21.72 21.72 256 1000 22.27 22.27 22.27 512 1000 25.89 25.90 25.90 1024 1000 31.09 31.10 31.09 2048 1000 41.49 41.50 41.50 4096 1000 65.18 65.19 65.18 8192 1000 123.62 123.64 123.63 16384 1000 246.90 246.93 246.91 32768 1000 506.86 506.98 506.93 65536 640 1237.09 1237.24 1237.19 131072 320 2980.43 2981.25 2980.91 262144 160 6222.01 6224.91 6223.89 524288 80 12273.96 12283.76 12279.42 1048576 40 24147.58 24174.80 24165.98 2097152 20 46501.26 46578.80 46541.72 4194304 10 94451.62 95109.89 94837.01 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.91 1.00 0.93 1 1000 47.35 47.35 47.35 2 1000 48.76 48.76 48.76 4 1000 47.17 47.18 47.17 8 1000 47.35 47.35 47.35 16 1000 47.15 47.15 47.15 32 1000 47.12 47.12 47.12 64 1000 47.19 47.19 47.19 128 1000 53.22 53.23 53.22 256 1000 56.24 56.25 56.25 512 1000 69.72 69.73 69.72 1024 1000 94.02 94.03 94.03 2048 1000 197.57 197.59 197.59 4096 1000 243.79 243.82 243.80 8192 1000 404.84 404.87 404.86 16384 1000 810.62 810.76 810.69 32768 1000 1938.54 1938.91 1938.72 65536 640 3813.39 3813.64 3813.52 131072 320 7868.02 7868.91 7868.43 262144 160 15728.81 15733.51 15731.24 524288 80 30916.68 30935.86 30925.32 1048576 40 62894.05 62975.67 62953.47 2097152 20 125125.30 125463.46 125330.32 4194304 10 250121.21 251721.29 250648.93 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1.55 1.67 1.57 1 1000 1250.20 1250.30 1250.24 2 1000 1200.32 1200.43 1200.37 4 1000 1240.73 1240.84 1240.78 8 1000 1305.32 1305.42 1305.36 16 1000 1309.54 1309.61 1309.57 32 1000 1302.89 1302.99 1302.94 64 1000 1339.41 1339.53 1339.46 128 1000 1364.45 1364.55 1364.50 256 1000 1550.53 1550.64 1550.58 512 1000 1976.71 1976.85 1976.78 1024 1000 2929.16 2929.29 2929.22 2048 1000 4287.00 4287.21 4287.11 4096 1000 6722.66 6727.20 6725.56 8192 885 11395.26 11395.64 11395.50 16384 531 19099.72 19101.47 19100.72 32768 315 34220.41 34227.01 34224.49 65536 227 42585.45 42591.51 42588.89 131072 151 66245.27 66260.85 66254.99 262144 86 116909.21 116943.81 116928.29 524288 52 196687.48 196795.44 196744.34 1048576 27 374944.04 375093.07 375050.08 2097152 14 725942.92 726389.22 726216.99 4194304 7 1440304.86 1441622.97 1440988.34 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 2.80 2.90 2.83 1 1000 4307.58 4307.86 4307.72 2 1000 4275.70 4276.04 4275.89 4 1000 4325.47 4325.68 4325.57 8 1000 4435.48 4435.67 4435.59 16 1000 4492.26 4492.52 4492.38 32 1000 4431.38 4431.66 4431.51 64 1000 4485.12 4485.31 4485.20 128 1000 4720.98 4721.29 4721.12 256 1000 5738.43 5738.72 5738.58 512 1000 6933.54 6933.79 6933.69 1024 1000 9635.90 9636.20 9636.08 2048 613 16211.91 16212.50 16212.23 4096 353 28890.70 28896.18 28894.09 8192 171 57743.39 57772.26 57763.77 16384 70 145862.44 146068.33 145966.98 32768 70 137607.16 137669.56 137635.66 65536 63 158607.46 158694.25 158665.06 131072 42 241359.57 241596.26 241497.65 262144 24 425104.84 425894.13 425574.29 524288 13 810563.84 813779.08 812588.24 1048576 8 1468811.51 1476950.50 1474082.41 2097152 4 2561020.26 2587063.25 2577476.70 4194304 3 4888281.35 4951554.38 4926926.52 #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 5.30 5.49 5.34 1 834 11930.70 11931.18 11930.93 2 834 12588.99 12589.52 12589.29 4 834 11983.24 11983.71 11983.46 8 756 12570.47 12571.06 12570.81 16 756 12525.32 12525.79 12525.55 32 756 12578.16 12578.80 12578.51 64 756 12549.81 12550.36 12550.13 128 756 13136.03 13136.55 13136.31 256 568 18040.48 18041.64 18041.10 512 496 20620.88 20622.13 20621.49 1024 379 26418.28 26419.89 26419.10 2048 239 42579.33 42584.10 42582.01 4096 136 73173.11 73188.01 73181.72 8192 73 138551.56 138623.17 138588.06 16384 33 299321.15 299659.22 299534.81 32768 20 514629.85 515821.71 515317.44 65536 18 581614.67 582502.55 582221.33 131072 14 707279.36 709009.43 708307.10 262144 10 997917.01 1001281.19 999878.55 524288 6 1801054.99 1807646.04 1805267.13 1048576 4 3186446.25 3203816.24 3198451.93 2097152 2 5653410.55 5715161.44 5697278.36 4194304 out-of-mem.; needed X= 2.001 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Alltoallv # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 6.28 8.01 7.33 1 490 20132.29 20133.39 20132.73 2 490 20054.72 20056.11 20055.29 4 490 20035.73 20037.05 20036.22 8 474 22102.05 22103.58 22102.78 16 471 21021.90 21023.54 21022.65 32 471 21230.41 21232.01 21231.13 64 458 20915.54 20917.01 20916.17 128 458 21928.42 21929.58 21928.94 256 324 30989.20 30991.03 30990.11 512 280 34967.68 34972.71 34970.38 1024 230 43204.92 43212.84 43209.72 2048 149 67050.77 67066.52 67059.10 4096 92 109583.64 109647.24 109620.46 8192 50 199578.24 199810.24 199713.24 16384 25 414693.20 416043.08 415487.74 32768 14 777045.93 779542.28 778599.93 65536 12 839132.67 842987.16 841537.74 131072 8 1405264.50 1410179.26 1408395.27 262144 7 1532216.45 1540672.00 1537469.58 524288 4 2834661.25 2855754.49 2846867.55 1048576 2 5073160.53 5145914.08 5118196.74 2097152 out-of-mem.; needed X= 1.501 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) 4194304 out-of-mem.; needed X= 3.001 GB; use flag "-mem X" or MAX_MEM_USAGE>=X (IMB_mem_info.h) #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 0.39 0.39 0.39 2 1000 0.40 0.40 0.40 4 1000 0.40 0.40 0.40 8 1000 0.41 0.41 0.41 16 1000 0.41 0.41 0.41 32 1000 0.46 0.46 0.46 64 1000 0.46 0.46 0.46 128 1000 0.48 0.48 0.48 256 1000 0.55 0.55 0.55 512 1000 0.61 0.61 0.61 1024 1000 0.70 0.70 0.70 2048 1000 0.95 0.95 0.95 4096 1000 1.39 1.39 1.39 8192 1000 2.32 2.32 2.32 16384 1000 4.04 4.04 4.04 32768 1000 7.01 7.01 7.01 65536 640 11.10 11.10 11.10 131072 320 20.77 20.78 20.78 262144 160 38.04 38.06 38.05 524288 80 69.67 69.73 69.70 1048576 40 369.62 371.82 370.72 2097152 20 732.60 737.14 734.87 4194304 10 1455.59 1464.39 1459.99 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 0.70 0.70 0.70 2 1000 0.70 0.70 0.70 4 1000 0.69 0.69 0.69 8 1000 0.73 0.73 0.73 16 1000 0.70 0.70 0.70 32 1000 0.78 0.78 0.78 64 1000 0.79 0.79 0.79 128 1000 0.86 0.86 0.86 256 1000 0.93 0.93 0.93 512 1000 1.02 1.02 1.02 1024 1000 1.15 1.15 1.15 2048 1000 1.54 1.54 1.54 4096 1000 2.35 2.35 2.35 8192 1000 3.95 3.95 3.95 16384 1000 10.79 10.81 10.81 32768 1000 15.64 15.67 15.66 65536 640 23.46 23.47 23.46 131072 320 42.62 42.64 42.63 262144 160 76.68 76.74 76.71 524288 80 140.41 140.55 140.48 1048576 40 756.20 762.30 759.14 2097152 20 1494.40 1510.25 1502.16 4194304 10 2995.30 3040.60 3017.46 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 0.85 0.85 0.85 2 1000 0.83 0.83 0.83 4 1000 0.84 0.84 0.84 8 1000 0.85 0.85 0.85 16 1000 0.85 0.85 0.85 32 1000 1.10 1.11 1.11 64 1000 1.10 1.10 1.10 128 1000 1.22 1.22 1.22 256 1000 1.30 1.31 1.30 512 1000 1.44 1.45 1.44 1024 1000 1.61 1.62 1.62 2048 1000 1.89 1.90 1.90 4096 1000 3.05 3.06 3.05 8192 1000 4.61 4.62 4.61 16384 1000 9.15 9.17 9.16 32768 1000 13.86 13.89 13.88 65536 640 22.96 23.01 22.99 131072 320 41.39 41.49 41.46 262144 160 82.34 82.59 82.52 524288 80 168.25 168.91 168.74 1048576 40 370.65 372.75 372.30 2097152 20 1210.30 1223.95 1221.44 4194304 10 2561.40 2649.59 2637.31 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 1.86 1.87 1.87 2 1000 1.87 1.87 1.87 4 1000 1.87 1.87 1.87 8 1000 1.92 1.92 1.92 16 1000 1.92 1.93 1.92 32 1000 1.77 1.78 1.78 64 1000 1.80 1.80 1.80 128 1000 1.93 1.94 1.94 256 1000 2.06 2.07 2.07 512 1000 2.31 2.32 2.31 1024 1000 2.83 2.84 2.83 2048 1000 4.76 4.76 4.76 4096 1000 5.22 5.24 5.23 8192 1000 11.11 11.12 11.12 16384 1000 15.30 15.36 15.33 32768 1000 24.68 24.73 24.71 65536 640 44.46 44.56 44.51 131072 320 83.21 83.43 83.32 262144 160 164.76 165.73 165.25 524288 80 326.07 327.85 327.02 1048576 40 679.03 685.24 682.41 2097152 20 1546.50 1737.84 1686.33 4194304 10 3264.59 4048.59 3815.29 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 1.75 1.77 1.76 2 1000 1.76 1.78 1.77 4 1000 1.75 1.77 1.76 8 1000 1.74 1.76 1.75 16 1000 1.78 1.79 1.78 32 1000 1.90 1.92 1.91 64 1000 1.87 1.89 1.88 128 1000 2.11 2.13 2.12 256 1000 2.15 2.17 2.16 512 1000 2.53 2.56 2.54 1024 1000 2.86 2.89 2.88 2048 1000 6.81 6.81 6.81 4096 1000 5.33 5.38 5.36 8192 1000 8.17 8.26 8.22 16384 1000 15.84 15.96 15.90 32768 1000 25.64 25.77 25.71 65536 640 46.03 46.24 46.14 131072 320 87.01 87.77 87.36 262144 160 172.88 174.04 173.54 524288 80 340.80 346.19 343.87 1048576 40 1099.82 1187.25 1131.40 2097152 20 1666.20 1833.15 1810.44 4194304 10 3340.51 4052.59 3909.81 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 1 1000 1.74 1.78 1.76 2 1000 1.70 1.74 1.72 4 1000 1.76 1.80 1.79 8 1000 1.82 1.87 1.85 16 1000 1.81 1.84 1.83 32 1000 1.89 1.94 1.92 64 1000 1.88 1.93 1.91 128 1000 2.13 2.18 2.16 256 1000 2.23 2.29 2.26 512 1000 2.46 2.52 2.50 1024 1000 3.05 3.11 3.08 2048 1000 3.89 4.00 3.95 4096 1000 5.51 5.64 5.58 8192 1000 8.34 8.54 8.45 16384 1000 15.70 15.96 15.84 32768 1000 26.82 27.12 26.99 65536 640 46.95 47.68 47.43 131072 320 88.33 90.39 89.73 262144 160 171.36 180.01 177.93 524288 80 340.51 352.91 347.27 1048576 40 704.28 719.40 715.01 2097152 20 1630.45 1899.10 1864.66 4194304 10 3431.30 4529.81 4340.67 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.25 0.05 1 1000 1.91 1.99 1.95 2 1000 1.80 1.87 1.84 4 1000 1.84 1.91 1.88 8 1000 1.85 1.92 1.89 16 1000 1.88 1.99 1.95 32 1000 1.97 2.06 2.02 64 1000 1.92 2.00 1.96 128 1000 2.22 2.30 2.26 256 1000 2.31 2.43 2.37 512 1000 2.64 2.76 2.71 1024 1000 3.14 3.28 3.23 2048 1000 3.95 4.10 4.03 4096 1000 5.72 5.96 5.85 8192 1000 8.54 8.92 8.74 16384 1000 17.11 17.71 17.45 32768 1000 26.98 27.60 27.35 65536 640 48.19 49.33 48.88 131072 320 89.62 93.41 92.14 262144 160 171.96 184.49 181.02 524288 80 341.96 361.17 354.34 1048576 40 697.68 733.53 719.88 2097152 20 1667.70 3635.35 2974.37 4194304 10 3447.10 7813.10 6236.12 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.06 0.05 1 1000 1.85 2.01 1.93 2 1000 1.80 1.95 1.89 4 1000 1.91 2.05 1.98 8 1000 1.90 2.04 1.97 16 1000 1.86 2.01 1.94 32 1000 2.00 2.17 2.09 64 1000 2.02 2.18 2.10 128 1000 2.26 2.48 2.39 256 1000 2.37 2.58 2.48 512 1000 2.70 2.91 2.81 1024 1000 3.28 3.57 3.45 2048 1000 4.14 4.52 4.35 4096 1000 5.76 6.23 6.01 8192 1000 8.79 9.52 9.17 16384 1000 17.99 19.17 18.59 32768 1000 27.96 29.45 28.78 65536 640 49.35 52.08 50.93 131072 320 90.58 96.20 94.16 262144 160 172.86 190.96 185.75 524288 80 339.31 373.90 360.88 1048576 40 694.42 759.82 733.09 2097152 20 1693.40 3606.36 3261.43 4194304 10 3399.80 7946.99 7091.96 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 384 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.06 0.05 1 1000 1.95 2.17 2.08 2 1000 1.81 2.06 1.97 4 1000 1.89 2.09 2.00 8 1000 1.96 2.18 2.07 16 1000 1.90 2.11 2.02 32 1000 2.03 2.30 2.18 64 1000 2.04 2.30 2.19 128 1000 2.31 2.58 2.45 256 1000 2.43 2.70 2.56 512 1000 2.67 3.05 2.83 1024 1000 3.36 3.74 3.55 2048 1000 4.22 4.70 4.46 4096 1000 5.76 6.47 6.14 8192 1000 8.72 9.80 9.26 16384 1000 17.15 18.84 18.07 32768 1000 27.65 29.38 28.59 65536 640 48.38 51.69 49.99 131072 320 84.17 97.54 93.09 262144 160 172.90 194.47 187.24 524288 80 340.99 381.24 365.88 1048576 40 700.92 774.10 744.11 2097152 20 1684.30 4683.65 3787.09 4194304 10 3372.00 8041.79 7362.74 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 # ( 382 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 0.47 0.47 0.47 #--------------------------------------------------- # Benchmarking Barrier # #processes = 4 # ( 380 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 0.99 0.99 0.99 #--------------------------------------------------- # Benchmarking Barrier # #processes = 8 # ( 376 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 1.47 1.47 1.47 #--------------------------------------------------- # Benchmarking Barrier # #processes = 16 # ( 368 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 2.91 2.92 2.92 #--------------------------------------------------- # Benchmarking Barrier # #processes = 32 # ( 352 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 5.25 5.25 5.25 #--------------------------------------------------- # Benchmarking Barrier # #processes = 64 # ( 320 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 20.37 20.40 20.38 #--------------------------------------------------- # Benchmarking Barrier # #processes = 128 # ( 256 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 12.64 12.65 12.64 #--------------------------------------------------- # Benchmarking Barrier # #processes = 256 # ( 128 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 14.35 14.36 14.35 #--------------------------------------------------- # Benchmarking Barrier # #processes = 384 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 15.11 15.13 15.11 # All processes entering MPI_Finalize