Intel® Fortran Compiler
Build applications that can scale for the future with optimized code designed for Intel® Xeon® and compatible processors.
29576 Discussions

Internal compiler error with OpenMP code in version 2026.0

caplanr
New Contributor II
428 Views

Hi,

 

I am getting an internal compiler error with one of our OpenMP codes:

 

getpb_reproducer.f90: error #5633: **Internal compiler error: segmentation violation signal raised** Please report this error along with the circumstances in which it occurred in a Software Problem Report. Note: File and line given may not be explicit cause of this error.
compilation aborted for getpb_reproducer.f90 (code 3)

 

The error did not happen with 2025 or 2024.

 

If I compile without "-fopenmp" it works correctly.

 

I have created a small reproducer:

 

! =============================================================================
! GETPB OpenMP ICE Reproducer for Intel 2026
! Compiles with: gfortran -O3 -fopenmp getpb_reproducer.f90 -o repro
! =============================================================================

module constants
  use, intrinsic :: iso_fortran_env, only: real64
  implicit none
  integer, parameter :: r_typ = real64
  real(r_typ), parameter :: pi = 3.141592653589793_r_typ
  real(r_typ), parameter :: twopi = 2.0_r_typ * pi
  real(r_typ), parameter :: halfpi = pi * 0.5_r_typ
  real(r_typ), parameter :: degtorad = pi / 180.0_r_typ
  real(r_typ), parameter :: au_in_rs = 214.939469_r_typ
end module constants

module params
  use constants
  implicit none
  integer :: nx, ny
  real(r_typ) :: x0, x1, y0, y1
  real(r_typ) :: rocc, int_rmin, int_rmax
  real(r_typ) :: r_obs, r_obs_rs
  logical :: r_obs_set
  real(r_typ) :: dsmult
  logical :: compute_pb, compute_b
  logical :: do_integral_weighting
  logical :: verbose
end module params

module image_region
  use constants
  implicit none
  real(r_typ), allocatable :: elong(:), alt(:)
end module image_region

module image_fields
  use constants
  implicit none
  real(r_typ), allocatable :: r_min(:,:), pb(:,:)
end module image_fields

! =============================================================================
program repro_getpb
! =============================================================================
  use constants
  use params
  use image_region
  use image_fields
  implicit none

  integer :: i
  real(r_typ) :: dummy_val
  logical :: twodee_local = .false.

  ! --------------------- Setup ---------------------
  nx = 101
  ny = 101
  x0 = -3.0_r_typ;  x1 =  3.0_r_typ
  y0 = -3.0_r_typ;  y1 =  3.0_r_typ
  rocc = 1.0_r_typ
  int_rmin = 1.0_r_typ
  int_rmax = 20.0_r_typ
  r_obs = 1.0_r_typ
  r_obs_set = .true.
  r_obs_rs = r_obs * au_in_rs
  dsmult = 1.0_r_typ
  compute_pb = .true.
  compute_b = .false.
  do_integral_weighting = .false.
  verbose = .true.

  allocate(r_min(nx,ny))
  allocate(pb(nx,ny))
  pb = 0.0_r_typ
  r_min = 0.0_r_typ

  allocate(elong(nx))
  allocate(alt(ny))

  if (verbose) then
    write(*,'(A)') '=== GETPB minimal reproducer (Intel 2026 ICE test) ==='
    write(*,'(A,I0,A,I0)') 'Image: ',nx,' x ',ny
  end if

  call get_image_obs_distance_specified

  dummy_val = pb(1,1) + pb(nx/2,ny/2) + pb(nx,ny)
  write(*,'(A,ES12.4)') 'Sample pB sum = ', dummy_val
  write(*,'(A)') 'SUCCESS: Reproducer compiled and ran.'

  deallocate(r_min, pb, elong, alt)

contains

  subroutine transform_position(x, y, z, r, t, p)
    !$acc routine seq
    real(r_typ), intent(in)  :: x, y, z
    real(r_typ), intent(out) :: r, t, p
    r = 5.0_r_typ; t = 0.0_r_typ; p = 0.0_r_typ
  end subroutine

  subroutine interp_field_ds(d1,d2,r,t,p,fv,drtp,d3,d4)
    !$acc routine seq
    real(r_typ), intent(in) :: d1,d2,r,t,p,d3,d4
    real(r_typ), intent(out) :: fv
    real(r_typ), dimension(3), intent(out) :: drtp
    fv = 1.0_r_typ
    drtp = 0.01_r_typ
  end subroutine

  subroutine get_kernels(rmin_loc, rv, ne, cp_pb, cp_b, pb_loc, b_loc)
    !$acc routine seq
    real(r_typ), intent(in) :: rmin_loc, rv, ne
    logical, intent(in) :: cp_pb, cp_b
    real(r_typ), intent(out) :: pb_loc, b_loc
    if (cp_pb) then
      pb_loc = (rmin_loc / rv)**2 * ne * 1.0e-15_r_typ
    else
      pb_loc = 0.0_r_typ
    end if
    b_loc = 0.0_r_typ
  end subroutine

  function ds_s2c(r, t, drtp, axisymmetric) result(res)
    !$acc routine seq
    real(r_typ), intent(in) :: r, t
    real(r_typ), dimension(3), intent(in) :: drtp
    logical, intent(in) :: axisymmetric
    real(r_typ) :: res
    res = 0.01_r_typ
  end function ds_s2c

  ! ===================================================================
  ! Critical subroutine (this is what triggers the Intel ICE)
  ! ===================================================================
  subroutine get_image_obs_distance_specified
    use constants
    use params
    use image_region
    use image_fields
    implicit none

    real(r_typ), parameter :: zero = 0.0_r_typ
    real(r_typ), parameter :: half = 0.5_r_typ
    real(r_typ), parameter :: one  = 1.0_r_typ

    integer :: i, j, nstop
    real(r_typ) :: dx, dy, s0, s1, dmax, s, t, ds, dsm, ds_local
    real(r_typ) :: e_rad, a_rad, cos_a, t_r_min, rv, tv, pv, pb_local
    real(r_typ), dimension(3) :: v_obs, v_ref, v_los, v_r_min
    real(r_typ) :: ds_fac
    real(r_typ), dimension(3) :: ex, ey, ez
    real(r_typ) :: n_e
    real(r_typ), dimension(3) :: drtp

    ! Build image axes
    if (nx == 1) then
      elong(1) = x0
    else
      dx = (x1-x0)/real(nx-1,r_typ)
      do i=1,nx; elong(i)=x0+(i-1)*dx; enddo
    endif

    if (ny == 1) then
      alt(1) = y0
    else
      dy = (y1-y0)/real(ny-1,r_typ)
      do j=1,ny; alt(j)=y0+(j-1)*dy; enddo
    endif

    v_obs(1) = r_obs_rs
    v_obs(2:3) = 0.0_r_typ

    ! ==================== CRITICAL PARALLEL REGION ====================
    !$omp parallel do &
    !$omp& shared(pb, r_min, v_obs) &
    !$omp& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
    !$omp& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
    !$omp& private(pb_local, n_e, drtp, ds_fac) &
    !$omp& private(ex,ey,ez) &
    !$omp& collapse(2) schedule(dynamic,10)

    !$acc parallel loop collapse(2) &
    !$acc& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
    !$acc& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
    !$acc& private(pb_local, n_e, drtp) &
    !$acc& private(ex,ey,ez)
    do j = 1, ny
      do i = 1, nx

        e_rad = degtorad * elong(i)
        a_rad = degtorad * alt(j)
        cos_a = cos(a_rad)

        v_ref(1) = r_obs_rs * (one - cos(e_rad)*cos_a)
        v_ref(2) = r_obs_rs * sin(e_rad) * cos_a
        v_ref(3) = r_obs_rs * sin(a_rad)

        t_r_min = -(v_obs(1)*(v_ref(1)-v_obs(1)) + &
                    v_obs(2)*(v_ref(2)-v_obs(2)) + &
                    v_obs(3)*(v_ref(3)-v_obs(3))) / (r_obs_rs**2)

        v_r_min = t_r_min*v_ref + (one-t_r_min)*v_obs
        r_min(i,j) = sqrt(dot_product(v_r_min, v_r_min))

        if (abs(e_rad) <= halfpi .and. r_min(i,j) <= rocc) then
          pb(i,j) = -1.0_r_typ
          cycle
        end if
        if (r_min(i,j) >= int_rmax) cycle

        dmax = sqrt(int_rmax**2 - r_min(i,j)**2)
        s0 = t_r_min*r_obs_rs - dmax
        s1 = t_r_min*r_obs_rs + dmax
        if (s1 <= zero) cycle
        s0 = max(s0, zero)

        s = s0
        dsm = 0.0_r_typ
        nstop = 0

        do
          t = s / r_obs_rs
          v_los = t*v_ref + (one-t)*v_obs

          call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
          rv = max(int_rmin, min(rv, int_rmax))

          call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
          ds_local = ds_s2c(rv, tv, drtp, twodee_local)

          call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)

          ds = ds_local * dsmult
          s = s + ds
          if (s >= s1) then
            ds = ds - (s - s1)
            s = s1
            nstop = nstop + 1
          end if

          ds_fac = 0.5_r_typ * (dsm + ds)
          pb(i,j) = pb(i,j) + ds_fac * pb_local

          dsm = ds
          if (nstop >= 2) exit
        end do

      end do
    end do
    !$acc end parallel
    !$omp end parallel do

  end subroutine get_image_obs_distance_specified

end program repro_getpb
6 Replies
Harald_S_Intel
Employee
405 Views

Hello,

  we're taking a look at the issue. In the meantime, I think I found a workaround for it. Can you try using a do-while construct in your inner loop (lines 223-248) rather than using a do construct? I tried the following and it worked on my end using 2026.0

        do while (nstop < 2)
          t = s / r_obs_rs
          v_los = t*v_ref + (one-t)*v_obs

          call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
          rv = max(int_rmin, min(rv, int_rmax))

          call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
          ds_local = ds_s2c(rv, tv, drtp, twodee_local)

          call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)

          ds = ds_local * dsmult
          s = s + ds
          if (s >= s1) then
            ds = ds - (s - s1)
            s = s1
            nstop = nstop + 1
          end if

          ds_fac = 0.5_r_typ * (dsm + ds)
          pb(i,j) = pb(i,j) + ds_fac * pb_local

          dsm = ds
          ! if (nstop >= 2) exit
       end do

 Best,

0 Kudos
caplanr
New Contributor II
339 Views

Hi,

 
It looks like this work-around works!
 
It would still be good to have this fixed within the next couple of releases though so we don't have to change the code.
 
 - Ron
0 Kudos
Harald_S_Intel
Employee
321 Views

@caplanr -- there is another workaround available. If you remove the "collapse(2)" clause from line 182, it also works on my end.

0 Kudos
caplanr
New Contributor II
305 Views

I think that would reduce performance since the inner loop would not be parallelized?

0 Kudos
Harald_S_Intel
Employee
298 Views

Yes -- using this latter WA, performance will likely depend now on the parallelization over ny rather than ny*nx, and that could be impactful depending on the value of ny and nx. If you want a more performant solution, you could go with the first WA. If you want a less intrusive solution, you can go with the second. That said, compiler colleagues are taking a look at the bug.

0 Kudos
yzh_intel
Moderator
66 Views

Hi @caplanr, I did reproduce your issue with the 2026.0 compiler, but couldn't reproduce with latest internal compiler, so it looks like the problem has been fixed. The 2026.1 compiler will be released soon and the fix should be there. Please double check once the 2026.1 compiler is out. Thanks!

0 Kudos
Reply