Software Tuning, Performance Optimization & Platform Monitoring
Discussion regarding monitoring and software tuning methodologies, Performance Monitoring Unit (PMU) of Intel microprocessors, and platform updating.

intel-cmt-cat segfaults at lockf.c

Jones__Brian
New Contributor I
379 Views

I recently installed intel-cmt-cat (cache management technology - cache allocation technology) from https://github.com/intel/intel-cmt-cat on Ubuntu 18.04.

I get a segfault in lockf where it's called by _pqos_api_lock. I debugged this with gdb and below are two debug sessions -- the first one is from an AWS cloud server and the second one is from a Digital Ocean cloud server. Both servers are at Linux kernel version 4.15.0-58-generic and glibc 2.27.

The problem occurs where lockf is called from _pqos_api_lock. Lockf takes three arguments. Below are the states of rdi, rsi and rdx for each new instruction in lockf. The rdi and rsi registers do not change, but at the point shown below, rdx suddenly switches from 0 to 0x7fffffffe0f8 (140737488347384) and then it segfaults.

At the end of the AWS session below I show the call stack from the gdb where command.

AWS session:

Breakpoint 1, lockf (fd=4, cmd=cmd@entry=1, len=len@entry=0) at lockf.c:36

warning: Source file is more recent than executable.

36 {

(gdb) ni

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) ni

36 {

(gdb) ni

42 fl.l_whence = SEEK_CUR;

(gdb) ni

36 {

(gdb) ni

36 {

(gdb) i r rdi

rdi 0x4 4

(gdb) i r rsi

rsi 0x1 1

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07bb 36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07c0 36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

46 switch (cmd)

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07c8 39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdx

rdx 0x7fffffffe0f8 140737488347384

(gdb) ni

Program received signal SIGSEGV, Segmentation fault.

0x00007ffff78f07c8 in lockf (fd=4, cmd=cmd@entry=1, len=len@entry=0)

at lockf.c:39

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb)

(gdb) where

#0 0x00007ffff78f07c8 in lockf (fd=4, cmd=cmd@entry=1, len=len@entry=0)

at lockf.c:39

#1 0x00007fffdf84ee66 in _pqos_api_lock () at cap.c:207

#2 0x00007fffdf84f06a in pqos_init (config=0x7fffffffe258) at cap.c:1375

#3 0x00007fffdf84e834 in L3_main (argc=-542744026, argv=0x7ffff7994c97)

at allocation_app_l3cat.c:197

#4 0x00007fffdf84d349 in get_cores () at While_Loop_List_Comp.asm:141

#5 0x00007fffdf84ddc1 in Write_vars_on_entry ()

at While_Loop_List_Comp.asm:849

#6 0x0000000000000000 in ?? ()

____________

Digital Ocean session:

Breakpoint 1, lockf (fd=4, cmd=cmd@entry=1, len=len@entry=0) at lockf.c:36

36 {

(gdb) ni

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdi

rdi 0x4 4

(gdb) i r rsi

rsi 0x1 1

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

42 fl.l_whence = SEEK_CUR;

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07bb 36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07c0 36 {

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

46 switch (cmd)

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdx

rdx 0x0 0

(gdb) ni

0x00007ffff78f07c8 39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdx

rdx 0x7fffffffe0b8 140737488347320

(gdb) ni

Program received signal SIGSEGV, Segmentation fault.

0x00007ffff78f07c8 in lockf (fd=4, cmd=cmd@entry=1, len=len@entry=0)

at lockf.c:39

39 memset ((char *) &fl, '\0', sizeof (fl));

(gdb) i r rdi

rdi 0x4 4

(gdb) i r rsi

rsi 0x1 1

(gdb) i r rdx

rdx 0x7fffffffe0b8 140737488347320

(gdb)

__________

Here is the code for lockf (from glibc 2.27) for the output shown above:

[1] /* Copyright (C) 1994-2018 Free Software Foundation, Inc.

[2] This file is part of the GNU C Library.

[3]

[4] The GNU C Library is free software; you can redistribute it and/or

[5] modify it under the terms of the GNU Lesser General Public

[6] License as published by the Free Software Foundation; either

[7] version 2.1 of the License, or (at your option) any later version.

[8]

[9] The GNU C Library is distributed in the hope that it will be useful,

[10] but WITHOUT ANY WARRANTY; without even the implied warranty of

[11] MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

[12] Lesser General Public License for more details.

[13]

[14] You should have received a copy of the GNU Lesser General Public

[15] License along with the GNU C Library; if not, see

[16] <http://www.gnu.org/licenses/>. */

[17]

[18] /* We need to avoid the header declaration of lockf64, because

[19] the types don't match lockf and then the compiler will

[20] complain about the mismatch when we do the alias below. */

[21] #define lockf64 __renamed_lockf64

[22]

[23] #include <fcntl.h>

[24]

[25] #undef lockf64

[26]

[27] #include <sys/types.h>

[28] #include <unistd.h>

[29] #include <errno.h>

[30] #include <string.h>

[31]

[32] /* lockf is a simplified interface to fcntl's locking facilities. */

[33]

[34] int

[35] lockf (int fd, int cmd, off_t len)

[36] {

[37] struct flock fl;

[38]

[39] memset ((char *) &fl, '\0', sizeof (fl));

[40]

[41] /* lockf is always relative to the current file position. */

[42] fl.l_whence = SEEK_CUR;

[43] fl.l_start = 0;

[44] fl.l_len = len;

[45]

[46] switch (cmd)

[47] {

[48] case F_TEST:

[49] /* Test the lock: return 0 if FD is unlocked or locked by this process;

[50] return -1, set errno to EACCES, if another process holds the lock. */

[51] fl.l_type = F_RDLCK;

[52] if (__fcntl (fd, F_GETLK, &fl) < 0)

[53] return -1;

[54] if (fl.l_type == F_UNLCK || fl.l_pid == __getpid ())

[55] return 0;

[56] __set_errno (EACCES);

[57] return -1;

[58]

[59] case F_ULOCK:

[60] fl.l_type = F_UNLCK;

[61] cmd = F_SETLK;

[62] break;

[63] case F_LOCK:

[64] fl.l_type = F_WRLCK;

[65] cmd = F_SETLKW;

[66] break;

[67] case F_TLOCK:

[68] fl.l_type = F_WRLCK;

[69] cmd = F_SETLK;

[70] break;

[71]

[72] default:

[73] __set_errno (EINVAL);

[74] return -1;

[75] }

[76]

[77] /* lockf() is a cancellation point but so is fcntl() if F_SETLKW is

[78] used. Therefore we don't have to care about cancellation here,

[79] the fcntl() function will take care of it. */

[80] return __fcntl (fd, cmd, &fl);

[81] }

[82]

[83] #ifdef __OFF_T_MATCHES_OFF64_T

[84] weak_alias (lockf, lockf64)

[85] #endif

In my use case, I link the object files for all cmt-cat programs into a shared object. I enter at the "main" function of allocation_app_l3cat.c -- I changed the name of "main" to "L3_main" so I can use it as a shared object, so the entry point from my program to the intel-cmt-cat suite is from L3_main in allocation_app_l3cat.c. My purpose in using intel-cmt-cat is to control cache usage from shared objects.

The large value rdx switches to obviously triggers the segfault. Should we use lockf64 instead of lockf? If not, how can I avoid this problem?

Thanks for any ideas.

 

 

0 Kudos
0 Replies
Reply