Intel® MPI Library
Get help with building, analyzing, optimizing, and scaling high-performance computing (HPC) applications.
2229 Discussions

Incorrect data distribution of subarrays with MPI_Scatterv

pavel_kratochvil
Beginner
877 Views

Working on an MPI application in which I scatter tiles of ints from the root rank across all other ranks using `MPI_Scatterv`, I encountered a bug in the distribution in certain configurations (sizes of the tiles and number of processes).

The program runs correctly with OpenMPI and as was confirmed here also with MPICH. However, produces incorrect results when launched with Intel MPI, where the data distributed across ranks is incorrect, although the counts and displacements used in `MPI_Scatterv` are verified to be correct.

This issue specifically arises when the `np * 4 == global_edge_size`, while other configurations with different decompositions and tile sizes work fine.

In the provided code, the `global_edge_size == 32` produces incorrect results for `-np 8`. The incorrect distribution is visible when the ranks 5, 6 and 7 abort when comparing the received values with the reference ones.

Code in C++ can be found in the original SO submission here.

main.c

 

#include "mpi.h"
#include <stdio.h>

int main(int argc, char** argv) {
    MPI_Init(&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    const int global_edge_size = 32;

    // fill the initial values for distribution on root rank
    int values_for_distribution[32 * 32] = { 0 };
    if (rank == 0) {
        for (int i = 0; i < global_edge_size * global_edge_size; i++) {
            values_for_distribution[i] = i;
        }
    }

    // 1d decomposition
    // num_ranks in x and y direction
    const int dims[2] = {8, 1};
    const int n_dims = 1;

    const int tile_size_x = global_edge_size / size;
    const int tile_size_y = global_edge_size;
    // local tile size with border around the tile
    const int border_size = 2;
    const int tile_size_with_halo_x = tile_size_x + (2 * border_size);
    const int tile_size_with_halo_y = tile_size_y + (2 * border_size);
    
    MPI_Datatype global_tile_type_float, local_tile_type_float;
    
    // only root needs the global tile type
    if (rank == 0) {
        const int domain_dims[2] = {global_edge_size, global_edge_size};
        const int tile_dims[2] = {tile_size_y, tile_size_x};
        const int start_arr[2] = {0, 0};

        // initial datatypes to derive the resized types from
        MPI_Datatype tile_org_type_float = MPI_DATATYPE_NULL;
        MPI_Type_create_subarray(2, domain_dims, tile_dims, start_arr, MPI_ORDER_C, MPI_INT, &tile_org_type_float);
        MPI_Type_create_resized(tile_org_type_float, 0, 1 * sizeof(float), &global_tile_type_float);
        MPI_Type_commit(&global_tile_type_float);

        MPI_Type_free(&tile_org_type_float);
    }

    int local_tile_with_halo_dims[2] = {tile_size_with_halo_y, tile_size_with_halo_x};
    int local_tile_dims[2] = {tile_size_y, tile_size_x};
    int start_arr[2] = {border_size, border_size};

    MPI_Type_create_subarray(2, local_tile_with_halo_dims, local_tile_dims, start_arr, MPI_ORDER_C, MPI_INT, &local_tile_type_float);
    MPI_Type_commit(&local_tile_type_float);

    // array of ones, each rank receives one tile
    int counts[8] = {1, 1, 1, 1, 1, 1, 1, 1};
    int displacements[8] = {0, 4, 8, 12, 16, 20, 24, 28};
    // local buffer for receiving
    // (tile_size_y + (2 * border_size)) * (tile_size_x + (2 * border_size))
    int tile_temps[(32 + (2 * 2)) * (4 + (2 * 2))] = {0};

    MPI_Scatterv(values_for_distribution, counts, displacements, global_tile_type_float, tile_temps, 1, local_tile_type_float, 0, MPI_COMM_WORLD);

    for (int i = 0; i < size; i++) {
        if (rank == i) {
            for (int i = 0; i < global_edge_size * global_edge_size; i++) {
                values_for_distribution[i] = i;
            }
            for (int r = 0; r < tile_size_y; r++) {
                for (int c = 0; c < tile_size_x; c++) {
                    int rank_value = tile_temps[(r + border_size) * tile_size_with_halo_x + c + border_size];
                    
                    int offset_in_row = rank * tile_size_x;
                    int reference_value = values_for_distribution[r * 32 + offset_in_row + c];

                    if (rank_value != reference_value) MPI_Abort(MPI_COMM_WORLD, -1);
                }
            }
        }
    }

    MPI_Finalize();
    return 0;
}

 

CMakeLists.txt 

 

cmake_minimum_required(VERSION 3.20)

project(test LANGUAGES C)

set(CMAKE_CXX_STANDARD          17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

find_package(MPI REQUIRED COMPONENTS C)

add_executable(main ${CMAKE_CURRENT_SOURCE_DIR}/main.c)
target_link_libraries(main MPI::MPI_C)

 

Produced output:

Abort(-1) on node 5 (rank 5 in comm 0): application called MPI_Abort(MPI_COMM_WORLD, -1) - process 5
Abort(-1) on node 6 (rank 6 in comm 0): application called MPI_Abort(MPI_COMM_WORLD, -1) - process 6
Abort(-1) on node 7 (rank 7 in comm 0): application called MPI_Abort(MPI_COMM_WORLD, -1) - process 7

 Commands used for compilation

 

cmake -Bbuild -S.
cmake --build build --config Release
mpirun -np 8 ./main

 

mpirun --version 

 

$ mpirun --version
Intel(R) MPI Library for Linux* OS, Version 2021.10 Build 20230619 (id: c2e19c2f3e)
Copyright 2003-2023, Intel Corporation.

 

2 Replies
TobiasK
Moderator
694 Views

@pavel_kratochvil


Thank you for reporting this. We are looking into this issue and will fix the issue in a future release.

In the meantime, please use:


export I_MPI_ADJUST_SCATTERV=0

or

export I_MPI_ADJUST_SCATTERV=1

or

export I_MPI_ADJUST_SCATTERV=2


depending on the overall performance impact for your application.


0 Kudos
TobiasK
Moderator
313 Views

@pavel_kratochvil


can you please try with 2021.13.1/oneAPI 2024.2.1 ?


0 Kudos
Reply