- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I am getting an internal compiler error with one of our OpenMP codes:
getpb_reproducer.f90: error #5633: **Internal compiler error: segmentation violation signal raised** Please report this error along with the circumstances in which it occurred in a Software Problem Report. Note: File and line given may not be explicit cause of this error.
compilation aborted for getpb_reproducer.f90 (code 3)
The error did not happen with 2025 or 2024.
If I compile without "-fopenmp" it works correctly.
I have created a small reproducer:
! =============================================================================
! GETPB OpenMP ICE Reproducer for Intel 2026
! Compiles with: gfortran -O3 -fopenmp getpb_reproducer.f90 -o repro
! =============================================================================
module constants
use, intrinsic :: iso_fortran_env, only: real64
implicit none
integer, parameter :: r_typ = real64
real(r_typ), parameter :: pi = 3.141592653589793_r_typ
real(r_typ), parameter :: twopi = 2.0_r_typ * pi
real(r_typ), parameter :: halfpi = pi * 0.5_r_typ
real(r_typ), parameter :: degtorad = pi / 180.0_r_typ
real(r_typ), parameter :: au_in_rs = 214.939469_r_typ
end module constants
module params
use constants
implicit none
integer :: nx, ny
real(r_typ) :: x0, x1, y0, y1
real(r_typ) :: rocc, int_rmin, int_rmax
real(r_typ) :: r_obs, r_obs_rs
logical :: r_obs_set
real(r_typ) :: dsmult
logical :: compute_pb, compute_b
logical :: do_integral_weighting
logical :: verbose
end module params
module image_region
use constants
implicit none
real(r_typ), allocatable :: elong(:), alt(:)
end module image_region
module image_fields
use constants
implicit none
real(r_typ), allocatable :: r_min(:,:), pb(:,:)
end module image_fields
! =============================================================================
program repro_getpb
! =============================================================================
use constants
use params
use image_region
use image_fields
implicit none
integer :: i
real(r_typ) :: dummy_val
logical :: twodee_local = .false.
! --------------------- Setup ---------------------
nx = 101
ny = 101
x0 = -3.0_r_typ; x1 = 3.0_r_typ
y0 = -3.0_r_typ; y1 = 3.0_r_typ
rocc = 1.0_r_typ
int_rmin = 1.0_r_typ
int_rmax = 20.0_r_typ
r_obs = 1.0_r_typ
r_obs_set = .true.
r_obs_rs = r_obs * au_in_rs
dsmult = 1.0_r_typ
compute_pb = .true.
compute_b = .false.
do_integral_weighting = .false.
verbose = .true.
allocate(r_min(nx,ny))
allocate(pb(nx,ny))
pb = 0.0_r_typ
r_min = 0.0_r_typ
allocate(elong(nx))
allocate(alt(ny))
if (verbose) then
write(*,'(A)') '=== GETPB minimal reproducer (Intel 2026 ICE test) ==='
write(*,'(A,I0,A,I0)') 'Image: ',nx,' x ',ny
end if
call get_image_obs_distance_specified
dummy_val = pb(1,1) + pb(nx/2,ny/2) + pb(nx,ny)
write(*,'(A,ES12.4)') 'Sample pB sum = ', dummy_val
write(*,'(A)') 'SUCCESS: Reproducer compiled and ran.'
deallocate(r_min, pb, elong, alt)
contains
subroutine transform_position(x, y, z, r, t, p)
!$acc routine seq
real(r_typ), intent(in) :: x, y, z
real(r_typ), intent(out) :: r, t, p
r = 5.0_r_typ; t = 0.0_r_typ; p = 0.0_r_typ
end subroutine
subroutine interp_field_ds(d1,d2,r,t,p,fv,drtp,d3,d4)
!$acc routine seq
real(r_typ), intent(in) :: d1,d2,r,t,p,d3,d4
real(r_typ), intent(out) :: fv
real(r_typ), dimension(3), intent(out) :: drtp
fv = 1.0_r_typ
drtp = 0.01_r_typ
end subroutine
subroutine get_kernels(rmin_loc, rv, ne, cp_pb, cp_b, pb_loc, b_loc)
!$acc routine seq
real(r_typ), intent(in) :: rmin_loc, rv, ne
logical, intent(in) :: cp_pb, cp_b
real(r_typ), intent(out) :: pb_loc, b_loc
if (cp_pb) then
pb_loc = (rmin_loc / rv)**2 * ne * 1.0e-15_r_typ
else
pb_loc = 0.0_r_typ
end if
b_loc = 0.0_r_typ
end subroutine
function ds_s2c(r, t, drtp, axisymmetric) result(res)
!$acc routine seq
real(r_typ), intent(in) :: r, t
real(r_typ), dimension(3), intent(in) :: drtp
logical, intent(in) :: axisymmetric
real(r_typ) :: res
res = 0.01_r_typ
end function ds_s2c
! ===================================================================
! Critical subroutine (this is what triggers the Intel ICE)
! ===================================================================
subroutine get_image_obs_distance_specified
use constants
use params
use image_region
use image_fields
implicit none
real(r_typ), parameter :: zero = 0.0_r_typ
real(r_typ), parameter :: half = 0.5_r_typ
real(r_typ), parameter :: one = 1.0_r_typ
integer :: i, j, nstop
real(r_typ) :: dx, dy, s0, s1, dmax, s, t, ds, dsm, ds_local
real(r_typ) :: e_rad, a_rad, cos_a, t_r_min, rv, tv, pv, pb_local
real(r_typ), dimension(3) :: v_obs, v_ref, v_los, v_r_min
real(r_typ) :: ds_fac
real(r_typ), dimension(3) :: ex, ey, ez
real(r_typ) :: n_e
real(r_typ), dimension(3) :: drtp
! Build image axes
if (nx == 1) then
elong(1) = x0
else
dx = (x1-x0)/real(nx-1,r_typ)
do i=1,nx; elong(i)=x0+(i-1)*dx; enddo
endif
if (ny == 1) then
alt(1) = y0
else
dy = (y1-y0)/real(ny-1,r_typ)
do j=1,ny; alt(j)=y0+(j-1)*dy; enddo
endif
v_obs(1) = r_obs_rs
v_obs(2:3) = 0.0_r_typ
! ==================== CRITICAL PARALLEL REGION ====================
!$omp parallel do &
!$omp& shared(pb, r_min, v_obs) &
!$omp& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
!$omp& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
!$omp& private(pb_local, n_e, drtp, ds_fac) &
!$omp& private(ex,ey,ez) &
!$omp& collapse(2) schedule(dynamic,10)
!$acc parallel loop collapse(2) &
!$acc& private(i,j, e_rad,a_rad,cos_a, v_ref,t_r_min,v_r_min) &
!$acc& private(s,s0,s1,dmax, t,ds,dsm,ds_local,nstop,v_los,rv,tv,pv) &
!$acc& private(pb_local, n_e, drtp) &
!$acc& private(ex,ey,ez)
do j = 1, ny
do i = 1, nx
e_rad = degtorad * elong(i)
a_rad = degtorad * alt(j)
cos_a = cos(a_rad)
v_ref(1) = r_obs_rs * (one - cos(e_rad)*cos_a)
v_ref(2) = r_obs_rs * sin(e_rad) * cos_a
v_ref(3) = r_obs_rs * sin(a_rad)
t_r_min = -(v_obs(1)*(v_ref(1)-v_obs(1)) + &
v_obs(2)*(v_ref(2)-v_obs(2)) + &
v_obs(3)*(v_ref(3)-v_obs(3))) / (r_obs_rs**2)
v_r_min = t_r_min*v_ref + (one-t_r_min)*v_obs
r_min(i,j) = sqrt(dot_product(v_r_min, v_r_min))
if (abs(e_rad) <= halfpi .and. r_min(i,j) <= rocc) then
pb(i,j) = -1.0_r_typ
cycle
end if
if (r_min(i,j) >= int_rmax) cycle
dmax = sqrt(int_rmax**2 - r_min(i,j)**2)
s0 = t_r_min*r_obs_rs - dmax
s1 = t_r_min*r_obs_rs + dmax
if (s1 <= zero) cycle
s0 = max(s0, zero)
s = s0
dsm = 0.0_r_typ
nstop = 0
do
t = s / r_obs_rs
v_los = t*v_ref + (one-t)*v_obs
call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
rv = max(int_rmin, min(rv, int_rmax))
call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
ds_local = ds_s2c(rv, tv, drtp, twodee_local)
call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)
ds = ds_local * dsmult
s = s + ds
if (s >= s1) then
ds = ds - (s - s1)
s = s1
nstop = nstop + 1
end if
ds_fac = 0.5_r_typ * (dsm + ds)
pb(i,j) = pb(i,j) + ds_fac * pb_local
dsm = ds
if (nstop >= 2) exit
end do
end do
end do
!$acc end parallel
!$omp end parallel do
end subroutine get_image_obs_distance_specified
end program repro_getpb
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello,
we're taking a look at the issue. In the meantime, I think I found a workaround for it. Can you try using a do-while construct in your inner loop (lines 223-248) rather than using a do construct? I tried the following and it worked on my end using 2026.0
do while (nstop < 2)
t = s / r_obs_rs
v_los = t*v_ref + (one-t)*v_obs
call transform_position(v_los(1),v_los(2),v_los(3), rv,tv,pv)
rv = max(int_rmin, min(rv, int_rmax))
call interp_field_ds(0.0_r_typ,0.0_r_typ,rv,tv,pv,n_e,drtp,0.0_r_typ,0.0_r_typ)
ds_local = ds_s2c(rv, tv, drtp, twodee_local)
call get_kernels(r_min(i,j), rv, n_e, compute_pb, compute_b, pb_local, pb_local)
ds = ds_local * dsmult
s = s + ds
if (s >= s1) then
ds = ds - (s - s1)
s = s1
nstop = nstop + 1
end if
ds_fac = 0.5_r_typ * (dsm + ds)
pb(i,j) = pb(i,j) + ds_fac * pb_local
dsm = ds
! if (nstop >= 2) exit
end doBest,
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
@caplanr -- there is another workaround available. If you remove the "collapse(2)" clause from line 182, it also works on my end.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I think that would reduce performance since the inner loop would not be parallelized?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Yes -- using this latter WA, performance will likely depend now on the parallelization over ny rather than ny*nx, and that could be impactful depending on the value of ny and nx. If you want a more performant solution, you could go with the first WA. If you want a less intrusive solution, you can go with the second. That said, compiler colleagues are taking a look at the bug.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi @caplanr, I did reproduce your issue with the 2026.0 compiler, but couldn't reproduce with latest internal compiler, so it looks like the problem has been fixed. The 2026.1 compiler will be released soon and the fix should be there. Please double check once the 2026.1 compiler is out. Thanks!
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page