PROGRAM possibly_recoverable_simulation USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY: TEAM_TYPE, STAT_FAILED_IMAGE IMPLICIT NONE INTEGER, ALLOCATABLE :: failures (:) INTEGER :: images_used, i, images_spare, status, k INTEGER :: id [*], me [*] TYPE (TEAM_TYPE) :: simulation_team LOGICAL :: read_checkpoint, done [*] ! Keep 1% spare images if we have a lot, just1 if 10-199 images, 0 if <10. images_spare = MAX (INT (0.01*NUM_IMAGES ()), 0, MIN (NUM_IMAGES () - 10, 1)) images_used = NUM_IMAGES () - images_spare read_checkpoint = THIS_IMAGE () > images_used setup : DO me = THIS_IMAGE () id = MERGE (1, 2, me<=images_used) ! ! Set up spare images as replacement for failed ones. ! IF (IMAGE_STATUS (1) == STAT_FAILED_IMAGE) & ERROR STOP "cannot recover" IF (me == 1) THEN failures = FAILED_IMAGES () k = images_used DO i = 1, SIZE (failures) DO k = k+1, NUM_IMAGES () IF (IMAGE_STATUS (k) == 0) EXIT END DO IF (k > NUM_IMAGES ()) ERROR STOP "cannot recover" me [k] = failures (i) id [k] = 1 END DO images_used = k END IF ! ! Set up a simulation team of constant size. ! Team 2 is the set of spares, so does not participate in the simulation. ! FORM TEAM (id, simulation_team, NEW_INDEX=me, STAT=status) simulation : CHANGE TEAM (simulation_team, STAT=status) IF (status == STAT_FAILED_IMAGE) EXIT simulation IF (TEAM_NUMBER () == 1) THEN iter : DO CALL simulation_procedure (read_checkpoint, status, done) ! The simulation_procedure: ! - sets up and performs some part of the simulation; ! - resets to the last checkpoint if requested; ! - sets status from its internal synchronizations; ! - sets done to .TRUE. when the simulation has completed. IF (status == STAT_FAILED_IMAGE) THEN read_checkpoint = .TRUE. EXIT simulation ELSE IF (done) THEN EXIT iter END IF read_checkpoint = .FALSE. END DO iter END IF END TEAM (STAT=status) simulation SYNC ALL (STAT=status) IF (THIS_IMAGE () > images_used) done = done[1] IF (done) EXIT setup END DO setup END PROGRAM possibly_recoverable_simulation