Intel® oneAPI Data Parallel C++
Support for Intel® oneAPI DPC++ Compiler, Intel® oneAPI DPC++ Library, Intel ICX Compiler , Intel® DPC++ Compatibility Tool, and GDB*
584 Discussions

OneAPI 2023.0.0 -- OMPT Target callbacks issued after finalization

Reuter_Jan
Beginner
536 Views

The OpenMP specification contains a method to manually finalize the OMPT interface. This method is called ompt_finalize_tool. This method is retrieved when initializing the OMPT interface via the lookup function. 

 

The OpenMP specification states the following effect when calling ompt_finalize_tool:

The ompt_finalize_tool routine detaches the tool from the runtime, unregisters all callbacks and invalidates all OMPT entry points passed to the tool in the lookup-function. Upon completion of ompt_finalize_tool, no further callbacks will be issued on any thread. Before the callbacks are unregistered, the OpenMP runtime should attempt to dispatch all outstanding registered callbacks as well as the callbacks that would be encountered during shutdown of the runtime, if possible in the current execution context. 

Link to the documentation: https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf#page=546

 

However, this is not the case. We can take the following source code:

 

#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <omp-tools.h>

bool                 tool_is_finalized = false;
ompt_finalize_tool_t ompt_finalize_tool;


void callback_ompt_device_initialize(int device_num,
                                     const char *type,
                                     ompt_device_t *device,
                                     ompt_function_lookup_t lookup,
                                     const char *documentation)
{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

void
callback_ompt_device_load( int         device_num,
                           const char* filename,
                           int64_t     offset_in_line,
                           void*       vma_in_file,
                           size_t      bytes,
                           void*       host_addr,
                           void*       device_addr,
                           uint64_t    module_id )
{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

void
callback_ompt_device_unload( int device_num,
		             uint64_t module_id )
{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

void
callback_ompt_device_finalize( int device_num )
{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

void
callback_ompt_target( ompt_target_t         kind,
                      ompt_scope_endpoint_t endpoint,
                      int                   device_num,
                      ompt_data_t*          task_data,
                      ompt_id_t             target_id,
                      const void*           codeptr_ra )

{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

void 
callback_ompt_target_data_op( ompt_scope_endpoint_t endpoint,
			      ompt_id_t             target_id,
                              ompt_id_t             host_op_id,
                              ompt_target_data_op_t optype,
                              void*                 src_addr,
                              int                   src_device_num,
                              void*                 dest_addr,
                              int                   dest_device_num,
                              size_t                bytes,
                              const void*           codeptr_ra )
{
    assert(tool_is_finalized == false);
    printf("%s\n", __FUNCTION__);
}

static int
initialize_tool( ompt_function_lookup_t lookup,
                 int                    initialDeviceNum,
                 ompt_data_t*           toolData )
{
    ompt_set_callback_t set_callback =
        ( ompt_set_callback_t )lookup( "ompt_set_callback" );
    assert( set_callback != 0 );
    ompt_finalize_tool =
        ( ompt_finalize_tool_t )lookup( "ompt_finalize_tool" );
    assert( ompt_finalize_tool != 0 );

    ompt_set_result_t registration_result = set_callback(ompt_callback_device_initialize, (ompt_callback_t) &callback_ompt_device_initialize);
    assert(registration_result == ompt_set_always);

    registration_result = set_callback(ompt_callback_device_load, (ompt_callback_t) &callback_ompt_device_load);
    assert(registration_result == ompt_set_always);

    registration_result = set_callback(ompt_callback_device_unload, (ompt_callback_t) &callback_ompt_device_unload);
    assert(registration_result == ompt_set_always);

    registration_result = set_callback(ompt_callback_device_finalize, (ompt_callback_t) &callback_ompt_device_finalize);
    assert(registration_result == ompt_set_always);

    registration_result = set_callback(ompt_callback_target, (ompt_callback_t) &callback_ompt_target);
    assert(registration_result == ompt_set_always);

    registration_result = set_callback(ompt_callback_target_data_op, (ompt_callback_t) &callback_ompt_target_data_op);
    assert(registration_result == ompt_set_always);

    return 1;
}

static void
finalize_tool( ompt_data_t* toolData )
{
    tool_is_finalized = true;
}

ompt_start_tool_result_t*
ompt_start_tool( unsigned int omp_version, /* == _OPENMP */
                 const char*  runtime_version )
{
    static ompt_start_tool_result_t tool = { &initialize_tool,
                                             &finalize_tool,
                                             ompt_data_none };
    return &tool;
}

#define N 10
int main(void) {
    int a[N];
    #pragma omp target map(a[:N])
    {
        a[N-1] = 0;
    }
    ompt_finalize_tool();
    return a[N - 1];
}

 The tool registers some of the available device callbacks and just prints if they are executed. In addition, it is checked if the tool was finalized already. Compiling and running the tool yields the following output:

~/tmp/Error » icx -fiopenmp -fopenmp-targets=spir64 error_inteloneapi_2.c -o error_inteloneapi_2                                                                              
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
~/tmp/Error » ./error_inteloneapi_2                                                                                                                                           
callback_ompt_device_initialize
callback_ompt_device_load
callback_ompt_target
callback_ompt_target_data_op
callback_ompt_target_data_op
callback_ompt_target_data_op
callback_ompt_target_data_op
callback_ompt_target_data_op
callback_ompt_target_data_op
callback_ompt_target
error_inteloneapi_2: error_inteloneapi_2.c:38: void callback_ompt_device_unload(int, uint64_t): Assertion `tool_is_finalized == false' failed.
[1]    1095478 IOT instruction (core dumped)  ./error_inteloneapi_2

 

This suggests, that callbacks are still being called after the interface is finalized. While this is just a small test case, I also encountered the same issue when cleaning up data after calling ompt_finalize_tool.

 

The issue was tested with Intel OneAPI 2023.0.0 on Ubuntu 22.04 and an Intel Core i7-1260P. 

 

0 Kudos
3 Replies
SantoshY_Intel
Moderator
494 Views

Hi,


Thanks for posting in Intel communities.


We were able to reproduce your issue. We are working on your issue and will get back to you soon.


Thanks & Regards,

Santosh


0 Kudos
SantoshY_Intel
Moderator
455 Views

Hi,


Thank you for your feedback. We have provided your feedback to the relevant team. At this moment there is no visibility of when it will be implemented and available for use. Please let me know if we can go ahead and close this case.


Thanks & Regards,

Santosh


0 Kudos
SantoshY_Intel
Moderator
426 Views

Hi,


This thread will no longer be monitored by Intel. If you need any additional information, please post a new question as this thread will no longer be monitored by Intel.


Thanks & Regards,

Santosh


0 Kudos
Reply