Intel® Fortran Compiler
Build applications that can scale for the future with optimized code designed for Intel® Xeon® and compatible processors.
29533 Discussions

Internal compiler error with OpenMP code in version 2026.0

caplanr
New Contributor II
27 Views

Hi,

 

I am getting an internal compiler error with one of our OpenMP codes:

 

getpb_reproducer.f90: error #5633: **Internal compiler error: segmentation violation signal raised** Please report this error along with the circumstances in which it occurred in a Software Problem Report. Note: File and line given may not be explicit cause of this error.
compilation aborted for getpb_reproducer.f90 (code 3)

 

The error did not happen with 2025 or 2024.

 

If I compile without "-fopenmp" it works correctly.

 

I have created a small reproducer:

 

! =============================================================================
! GETPB OpenMP ICE Reproducer for Intel 2026
! Compiles with: gfortran -O3 -fopenmp getpb_reproducer.f90 -o repro
! =============================================================================

module constants
  use, intrinsic :: iso_fortran_env, only: real64
  implicit none
  integer, parameter :: r_typ = real64
  real(r_typ), parameter :: pi = 3.141592653589793_r_typ
  real(r_typ), parameter :: twopi = 2.0_r_typ * pi
  real(r_typ), parameter :: halfpi = pi * 0.5_r_typ
  real(r_typ), parameter :: degtorad = pi / 180.0_r_typ
  real(r_typ), parameter :: au_in_rs = 214.939469_r_typ
end module constants

module params
  use constants
  implicit none
  integer :: nx, ny
  real(r_typ) :: x0, x1, y0, y1
  real(r_typ) :: rocc, int_rmin, int_rmax
  real(r_typ) :: r_obs, r_obs_rs
  logical :: r_obs_set
  real(r_typ) :: dsmult
  logical :: compute_pb, compute_b
  logical :: do_integral_weighting
  logical :: verbose
end module params

module image_region
  use constants
  implicit none
  real(r_typ), allocatable :: elong(:), alt(:)
end module image_region

module image_fields
  use constants
  implicit none
  real(r_typ), allocatable :: r_min(:,:), pb(:,:)
end module image_fields

! =============================================================================
program repro_getpb
! =============================================================================
  use constants
  use params
  use image_region
  use image_fields
  implicit none

  integer :: i
  real(r_typ) :: dummy_val
  logical :: twodee_local = .false.

  ! --------------------- Setup ---------------------
  nx = 101
  ny = 101
  x0 = -3.0_r_typ;  x1 =  3.0_r_typ
  y0 = -3.0_r_typ;  y1 =  3.0_r_typ
  rocc = 1.0_r_typ
  int_rmin = 1.0_r_typ
  int_rmax = 20.0_r_typ
  r_obs = 1.0_r_typ
  r_obs_set = .true.
  r_obs_rs = r_obs * au_in_rs
  dsmult = 1.0_r_typ
  compute_pb = .true.
  compute_b = .false.
  do_integral_weighting = .false.
  verbose = .true.

  allocate(r_min(nx,ny))
  allocate(pb(nx,ny))
  pb = 0.0_r_typ
  r_min = 0.0_r_typ

  allocate(elong(nx))
  allocate(alt(ny))

  if (verbose) then
    write(*,'(A)') '=== GETPB minimal reproducer (Intel 2026 ICE test) ==='
    write(*,'(A,I0,A,I0)') 'Image: ',nx,' x ',ny
  end if

  call get_image_obs_distance_specified

  dummy_val = pb(1,1) + pb(nx/2,ny/2) + pb(nx,ny)
  write(*,'(A,ES12.4)') 'Sample pB sum = ', dummy_val
  write(*,'(A)') 'SUCCESS: Reproducer compiled and ran.'

  deallocate(r_min, pb, elong, alt)

contains

  subroutine transform_position(x, y, z, r, t, p)
    !$acc routine seq
    real(r_typ), intent(in)  :: x, y, z
    real(r_typ), intent(out) :: r, t, p
    r = 5.0_r_typ; t = 0.0_r_typ; p = 0.0_r_typ
  end subroutine

  subroutine interp_field_ds(d1,d2,r,t,p,fv,drtp,d3,d4)
    !$acc routine seq
    real(r_typ), intent(in) :: d1,d2,r,t,p,d3,d4
    real(r_typ), intent(out) :: fv
    real(r_typ), dimension(3), intent(out) :: drtp
    fv = 1.0_r_typ
    drtp = 0.01_r_typ
  end subroutine

  subroutine get_kernels(rmin_loc, rv, ne, cp_pb, cp_b, pb_loc, b_loc)
    !$acc routine seq
    real(r_typ), intent(in) :: rmin_loc, rv, ne
    logical, intent(in) :: cp_pb, cp_b
    real(r_typ), intent(out) :: pb_loc, b_loc
    if (cp_pb) then
      pb_loc = (rmin_loc / rv)**2 * ne * 1.0e-15_r_typ
    else
      pb_loc = 0.0_r_typ
    end if
    b_loc = 0.0_r_typ
  end subroutine

  function ds_s2c(r, t, drtp, axisymmetric) result(res)
    !$acc routine seq
    real(r_typ), intent(in) :: r, t
    real(r_typ), dimension(3), intent(in) :: drtp
    logical, intent(in) :: axisymmetric
    real(r_typ) :: res
    res = 0.01_r_typ
  end function ds_s2c

  ! ===================================================================
  ! Critical subroutine (this is what triggers the Intel ICE)
  ! ===================================================================
  subroutine get_image_obs_distance_specified
    use constants
    use params
    use image_region
    use image_fields
    implicit none

    real(r_typ), parameter :: zero = 0.0_r_typ
    real(r_typ), parameter :: half = 0.5_r_typ
    real(r_typ), parameter :: one  = 1.0_r_typ

    integer :: i, j, nstop
    real(r_typ) :: dx, dy, s0, s1, dmax, s, t, ds, dsm, ds_local
    real(r_typ) :: e_rad, a_rad, cos_a, t_r_min, rv, tv, pv, pb_local
    real(r_typ), dimension(3) :: v_obs, v_ref, v_los, v_r_min
    real(r_typ) :: ds_fac
    real(r_typ), dimension(3) :: ex, ey, ez
    real(r_typ) :: n_e
    real(r_typ), dimension(3) :: drtp

    ! Build image axes
    if (nx == 1) then
      elong(1) = x0
    else
      dx = (x1-x0)/real(nx-1,r_typ)
      do i=1,nx; elong(i)=x0+(i-1)*dx; enddo
    endif

    if (ny == 1) then
      alt(1) = y0
    else
      dy = (y1-y0)/real(ny-1,r_typ)
      do j=1,ny; alt(j)=y0+(j-1)*dy; enddo
    endif

    v_obs(1) = r_obs_rs
    v_obs(2:3) = 0.0_r_typ

    ! ==================== CRITICAL PARALLEL REGION ====================
    !$omp parallel do &
    !$omp& shared(pb, r_min, v_obs) &
    !$omp& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
    !$omp& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
    !$omp& private(pb_local, n_e, drtp, ds_fac) &
    !$omp& private(ex,ey,ez) &
    !$omp& collapse(2) schedule(dynamic,10)

    !$acc parallel loop collapse(2) &
    !$acc& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
    !$acc& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
    !$acc& private(pb_local, n_e, drtp) &
    !$acc& private(ex,ey,ez)
    do j = 1, ny
      do i = 1, nx

        e_rad = degtorad * elong(i)
        a_rad = degtorad * alt(j)
        cos_a = cos(a_rad)

        v_ref(1) = r_obs_rs * (one - cos(e_rad)*cos_a)
        v_ref(2) = r_obs_rs * sin(e_rad) * cos_a
        v_ref(3) = r_obs_rs * sin(a_rad)

        t_r_min = -(v_obs(1)*(v_ref(1)-v_obs(1)) + &
                    v_obs(2)*(v_ref(2)-v_obs(2)) + &
                    v_obs(3)*(v_ref(3)-v_obs(3))) / (r_obs_rs**2)

        v_r_min = t_r_min*v_ref + (one-t_r_min)*v_obs
        r_min(i,j) = sqrt(dot_product(v_r_min, v_r_min))

        if (abs(e_rad) <= halfpi .and. r_min(i,j) <= rocc) then
          pb(i,j) = -1.0_r_typ
          cycle
        end if
        if (r_min(i,j) >= int_rmax) cycle

        dmax = sqrt(int_rmax**2 - r_min(i,j)**2)
        s0 = t_r_min*r_obs_rs - dmax
        s1 = t_r_min*r_obs_rs + dmax
        if (s1 <= zero) cycle
        s0 = max(s0, zero)

        s = s0
        dsm = 0.0_r_typ
        nstop = 0

        do
          t = s / r_obs_rs
          v_los = t*v_ref + (one-t)*v_obs

          call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
          rv = max(int_rmin, min(rv, int_rmax))

          call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
          ds_local = ds_s2c(rv, tv, drtp, twodee_local)

          call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)

          ds = ds_local * dsmult
          s = s + ds
          if (s >= s1) then
            ds = ds - (s - s1)
            s = s1
            nstop = nstop + 1
          end if

          ds_fac = 0.5_r_typ * (dsm + ds)
          pb(i,j) = pb(i,j) + ds_fac * pb_local

          dsm = ds
          if (nstop >= 2) exit
        end do

      end do
    end do
    !$acc end parallel
    !$omp end parallel do

  end subroutine get_image_obs_distance_specified

end program repro_getpb
1 Reply
Harald_S_Intel
Employee
4 Views

Hello,

  we're taking a look at the issue. In the meantime, I think I found a workaround for it. Can you try using a do-while construct in your inner loop (lines 223-248) rather than using a do construct? I tried the following and it worked on my end using 2026.0

        do while (nstop < 2)
          t = s / r_obs_rs
          v_los = t*v_ref + (one-t)*v_obs

          call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
          rv = max(int_rmin, min(rv, int_rmax))

          call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
          ds_local = ds_s2c(rv, tv, drtp, twodee_local)

          call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)

          ds = ds_local * dsmult
          s = s + ds
          if (s >= s1) then
            ds = ds - (s - s1)
            s = s1
            nstop = nstop + 1
          end if

          ds_fac = 0.5_r_typ * (dsm + ds)
          pb(i,j) = pb(i,j) + ds_fac * pb_local

          dsm = ds
          ! if (nstop >= 2) exit
       end do

 Best,

0 Kudos
Reply