- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
My code is failing to complete a gatherv on an MPI type for a struct that includes padding. In most cases, gatherv is called on all processes but never completes on process 0, and the code does not exit (I would call this hanging). The issue occurs when using icc 17.4 + IMPI 17.3, but not icc 19.1.2 + IMPI 19.8. I am running in a managed HPC environment on Intel Xeon 6230's. The sample code is:
$ cat main.c
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <mpi.h>
// Simple demonstration code for calling gatherv on struct that includes padding
// Workaround to add manual padding, comment or uncomment
// #define ADD_MANUAL_PADDING
struct my_struct
{
int i1, i2, i3; // Note odd number of integers if no padding added
int iv[18];
#ifdef ADD_MANUAL_PADDING
int manual_padding;
#endif
double x1, x2, x3, x4;
double xv[11];
};
#ifdef ADD_MANUAL_PADDING
# define MY_STRUCT_N_INTEGER 21 + 1
#else
# define MY_STRUCT_N_INTEGER 21
#endif
#define MY_STRUCT_N_DOUBLE 15
// Main: initialization, MPI type create, and GATHERV call. In a real program, would be split.
int main(int argc, char *argv[])
{
int p, NP, N, i, j;
// For defining MPI type
int count, lengths[2];
MPI_Aint displacements[2];
MPI_Datatype types[2];
long unsigned int size_computed, size_actual;
MPI_Datatype MPI_my_struct;
// For testing gatherv
struct my_struct *ms_vect, *ms_vect_recv;
int *gather_sizes, *gather_offsets;
int size_recv;
// Initialize MPI
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &NP);
MPI_Comm_rank(MPI_COMM_WORLD, &p);
if (p==0) {
printf(" -- Beginning test program, there are %d total processes\n", NP);
#ifdef ADD_MANUAL_PADDING
printf(" -- Adding manual padding\n");
#else
printf(" -- Not adding manual padding\n");
#endif
}
// ===== Define an MPI struct for my_struct
// Define what the struct contains
count = 2;
displacements[0] = 0;
types[0] = MPI_INT;
lengths[0] = MY_STRUCT_N_INTEGER;
displacements[1] = offsetof(struct my_struct, x1); // x1 must be the first double in struct
types[1] = MPI_DOUBLE;
lengths[1] = MY_STRUCT_N_DOUBLE;
// Print sizes to confirm padding
if (p==0) {
printf(" -- Size of integer times number of integers: %d\n", (MY_STRUCT_N_INTEGER) * sizeof(int));
printf(" -- Offset of first double in struct: %d\n", displacements[1]);
if ((MY_STRUCT_N_INTEGER) * sizeof(int) != displacements[1])
printf(" -- Size does not match offset, so the compiler added padding\n");
else
printf(" -- Size matches offset, so the compiler has not added padding\n");
}
// Check that final size matches sizeof
size_computed = displacements[1] + sizeof(double)*lengths[1];
size_actual = sizeof(struct my_struct);
if (size_computed != size_actual) {
printf("Size comparison failed: %lu %lu\n", size_computed, size_actual);
return 1; // Do not bother with MPI_FINALIZE or other cleanup in this example
}
// Actually create the type
MPI_Type_create_struct(count, lengths, displacements, types, &MPI_my_struct);
MPI_Type_commit(&MPI_my_struct);
// ===== Test gatherv operation
// Allocate my_struct on each process
N = 5000+10*p; // Number per process
ms_vect = (struct my_struct *) malloc(N * sizeof(struct my_struct));
if (ms_vect == NULL) {
printf("Failed to allocate ms_vect\n");
return 1;
}
// Initialize my_struct with some dummy data
for (i=0; i<N; i++) {
ms_vect[i].i1 = p*N + i + 11;
ms_vect[i].i2 = p*N + i + 22;
ms_vect[i].i3 = p*N + i + 33;
for (j=0; j<18; j++)
ms_vect[i].iv[j] = p*N + i + 100 + j;
ms_vect[i].x1 = (double) p*N + i + 111.;
ms_vect[i].x2 = (double) p*N + i + 222.;
ms_vect[i].x3 = (double) p*N + i + 333.;
ms_vect[i].x4 = (double) p*N + i + 444.;
for (j=0; j<11; j++)
ms_vect[i].xv[j] = (double) p*N + i + 1000 + j;
}
// Initialize receive struct and offset vector on process 0
if (p==0) {
gather_sizes = (int *) malloc(NP * sizeof(int));
if (gather_sizes == NULL) {
printf("Failed to allocate gather_sizes\n");
return 1;
}
for (i=0; i<NP; i++)
gather_sizes[i] = N+10*i;
gather_offsets = (int *) malloc(NP * sizeof(int));
if (gather_offsets == NULL) {
printf("Failed to allocate gather_offsets\n");
return 1;
}
gather_offsets[0] = 0;
for (i=1; i<NP; i++)
gather_offsets[i] = gather_offsets[i-1] + gather_sizes[i-1];
size_recv = gather_offsets[NP-1] + gather_sizes[NP-1];
printf(" -- Allocating %d structs with total memory %lu on proc 0\n",
size_recv, size_recv * sizeof(struct my_struct));
ms_vect_recv = (struct my_struct *) malloc(size_recv * sizeof(struct my_struct));
if (ms_vect_recv == NULL) {
printf("Failed to allocate ms_vect_recv\n");
return 1;
}
// Do not initialize ms_vect_recv to anything
}
MPI_Barrier(MPI_COMM_WORLD);
if (p==0)
printf(" -- Calling MPI_Gatherv\n");
MPI_Gatherv(ms_vect, N, MPI_my_struct, ms_vect_recv, gather_sizes, gather_offsets, MPI_my_struct, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
if (p==0)
printf(" -- Finished MPI_Gatherv\n");
// Do not bother to check values, error is hanging, not wrong values
free(ms_vect);
if (p==0) {
free(ms_vect_recv);
free(gather_offsets);
free(gather_sizes);
}
// Cleanup and deallocate
MPI_Finalize;
if (p==0)
printf(" -- complete, exiting\n");
}
The code is relatively simple. A struct is defined with an odd number of integers followed by some doubles, which results in the compiler adding 4 bytes of padding after the integers. An MPI type is created for the struct, and then gatherv is called with some non-trivial number of structures. A preprocessor directive may be passed that adds an extra integer, eliminating the need for padding.
Using the Intel 17 stack, without manual padding, the issue presents itself:
$ mpirun --version
Intel(R) MPI Library for Linux* OS, Version 2017 Update 3 Build 20170405 (id: 17193)
Copyright (C) 2003-2017, Intel Corporation. All rights reserved.
$ mpiicc --version
icc (ICC) 17.0.4 20170411
Copyright (C) 1985-2017 Intel Corporation. All rights reserved.
$ mpiicc main.c -o main
$ time mpirun -np 4 -genv I_MPI_FABRICS=shm:ofa ./main
-- Beginning test program, there are 4 total processes
-- Not adding manual padding
-- Size of integer times number of integers: 84
-- Offset of first double in struct: 88
-- Size does not match offset, so the compiler added padding
-- Allocating 20060 structs with total memory 4172480 on proc 0
-- Calling MPI_Gatherv
<time passes, code hangs, so I abort>
^C[mpiexec@host] Sending Ctrl-C to processes as requested
[mpiexec@host] Press Ctrl-C again to force abort
real 2m16.947s
user 0m0.016s
sys 0m0.017s
If manual padding is added, there is no problem:
$ mpirun --version
Intel(R) MPI Library for Linux* OS, Version 2017 Update 3 Build 20170405 (id: 17193)
Copyright (C) 2003-2017, Intel Corporation. All rights reserved.
$ mpiicc --version
icc (ICC) 17.0.4 20170411
Copyright (C) 1985-2017 Intel Corporation. All rights reserved.
$ mpicc -DADD_MANUAL_PADDING main.c -o main
$ time mpirun -np 4 -genv I_MPI_FABRICS=shm:ofa ./main
-- Beginning test program, there are 4 total processes
-- Adding manual padding
-- Size of integer times number of integers: 88
-- Offset of first double in struct: 88
-- Size matches offset, so the compiler has not added padding
-- Allocating 20060 structs with total memory 4172480 on proc 0
-- Calling MPI_Gatherv
-- Finished MPI_Gatherv
-- complete, exiting
real 0m0.189s
user 0m0.014s
sys 0m0.021s
Finally, on a newer version of Intel I have access to, there is no problem with or without manual padding:
$ mpirun --version
Intel(R) MPI Library for Linux* OS, Version 2019 Update 8 Build 20200624 (id: 4f16ad915)
Copyright 2003-2020, Intel Corporation.
$ mpiicc --version
icc (ICC) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation. All rights reserved.
$ mpiicc main.c -o main
$ time mpirun -np 4 ./main
-- Beginning test program, there are 4 total processes
-- Not adding manual padding
-- Size of integer times number of integers: 84
-- Offset of first double in struct: 88
-- Size does not match offset, so the compiler added padding
-- Allocating 20060 structs with total memory 4172480 on proc 0
-- Calling MPI_Gatherv
-- Finished MPI_Gatherv
-- complete, exiting
real 0m0.554s
user 0m0.017s
sys 0m0.012s
The issue seems to occur more easily on more processors, especially with inter-node (ie not shared memory) communication. This is the simplest case I can reliably reproduce it with, though, so that's what I'm showing. Furthermore, in the production code I sometimes get the following error:
Fatal error in PMPI_Gatherv: Other MPI error, error stack:
PMPI_Gatherv(1001).................: MPI_Gatherv failed(sbuf=0x28dc1d0, scount=2491, dtype=USER<struct>, rbuf=0x4a90660, rcnts=0x2483550, displs=0x2483480, dtype=USER<struct>, root=0, MPI_COMM_WORLD) failed
MPIR_Gatherv_impl(545).............: fail failed
I_MPIR_Gatherv_intra(611)..........: fail failed
MPIR_Gatherv(428)..................: fail failed
MPIR_Waitall_impl(221).............: fail failed
PMPIDI_CH3I_Progress(845)..........: fail failed
MPID_OFA_poll(230).................: fail failed
handle_read(1509)..................: fail failed
handle_read_individual(1718).......: fail failed
MPIDI_CH3_Packetized_recv_req(1533): fail failed
which appears to be related, but I haven't been able to reproduce it using the toy code shown (and I won't share the production code). I've tried a variety of other things (IMPI runtime debugging, check memory, try with GCC, other things) but they don't add much information. The code runs fine with GCC + OpenMPI.
So it seems to me that this a bug with Intel MPI 17. I have the following questions/comments:
- Are other people able to reproduce the behavior in their environment? Could somebody more familiar with MPI_Type_create_struct confirm that I'm constructing the MPI type correctly?
- In my HPC environment with Intel 17, shm:ofa is required as the default fabric is not present. shm:ofa appears to work fine elsewhere, though. Could this be related to the issue?
- If this is a bug, I would appreciate some sort of confirmation, and perhaps this case should be added to some internal Intel database to ensure there are no regressions on it in the future.
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Thanks for reaching out to us.
We no longer support the issues related to the Intel MPI library 2017 version. Kindly upgrade to the latest version.
You can download the latest supported version from the below link:
Thanks & Regards
Santosh
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Santosh, thanks for the response. Yes, I do have access to a newer version of icc/IMPI and as stated in the question the issue does not present itself in that case. However, the issue still raises several concerns that I would like addressed, and your answer does not help.
First, a 4-year-old compiler/MPI stack is not that old in the context of a production HPC environment. Admins do not update with every cycle, and furthermore users can become familiar with one stack and do not wish to rebuild all dependencies, switch paths, and learn new fabric commands. I'm not expecting bugfixes to be provided for 2017, but I do want a bit of information:
- Is this indeed a bug in 2017, or is the example code creating the MPI structure incorrectly? The fact that the issue does not arise with 2019 does not definitively answer this question for me, as the issue could still be present with a different run configuration.
- If it is a bug, then I want my implemented workaround to be minimally invasive while still mitigating the problem. If manual padding is added (as in the test code), will the issue not present itself? I cannot expect all users to avoid specific versions of compilers, nor do I even know which versions are affected.
Second, the nature of the error is unclear at this stage, and it's entirely possible that the bug is still present in newer versions. I hope that Intel takes this opportunity to understand what's actually going on in order to fix the issue for good. If this has already been done internally, then great - but this should be mentioned either way.
I don't mean to be a jerk about this, but the issue I'm describing appears to be serious and I've reduced it down to a relatively simple demonstrating test case. It should be looked at by an developer who can answer my questions. If this forum is not the appropriate place to receive this kind of support, please let me know and I will use other channels.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello,
We're indeed no longer supporting products from 2017. So we cannot verify your findings. You also stated that you do not see this issue with later versions of IMPI. The Forum is intended for other community members to comment on the questions -- so hopefully somebody would reply on your question -- "Could somebody more familiar with MPI_Type_create_struct confirm that I'm constructing the MPI type correctly?" I will also alert our team wrt to compliance to MPI standard of MPI_Type_create_struct usage. It is a good idea to have a test case for checking if MPI_Type_create_struct creates MPI type correctly.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello,
I checked with our engineering team, and they are not aware of a bug related to your question in 2017 product. Since then, the Intel MPI went through major code base change in 2019 version. So, it is likely that the potential bug you are observing is gone now.
For reference, I found many external tutorials on MPI_Type_create_struct usage, and likely you can find many more:
https://www.codingame.com/playgrounds/349/introduction-to-mpi/custom-types---exercise
https://www.rookiehpc.com/mpi/docs/mpi_type_create_struct.php
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Mark,
Thanks for the update, it's good to know that 2019 is the potential point at which the bug was fixed.
As if to illustrate my point, the second example you provided on MPI_Type_create_struct is incorrect - it does not account for any padding in c structs, which is the central problem leading to this issue. You can see their example assumes that the displacement of the first double is sizeof(int) from the start of the struct, when in reality most compilers will add 4 bytes of padding. The lack of reliable information available on MPI_Type_creat_struct is the main reason why I requested somebody look at my example code.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page