- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I tried directly calling the mkl from python with ctypes, but in that case, mkl can only use a single cpu. The cause of that problem is unknown.
I am writing a c extension for python that calls the mkl as an alternative approach.
The following c extension can be imported into python without problem. However, when I call the function, it created the following error message:
Intel MKL FATAL ERROR: Cannot load libmkl_mc.so or libmkl_def.so
What is the correct options for the icc compiler that I should use in setup.py?
I get some of the options in the setup.py from the intel link line advisor. I can't put all the options into setup.py.
mkl_helper.c
#include "Python.h" #include "mkl.h" #include "numpy/arrayobject.h" static PyObject* test4 (PyObject *self, PyObject *args) { // test4 (m, n, // a, ja, ia, // c, jc, ic) PyArrayObject *shape_array; PyArrayObject *a_array; // csr_matrix.data PyArrayObject *ja_array; // csr_matrix.indices PyArrayObject *ia_array; // csr_matrix.indptr PyArrayObject *c_array; PyArrayObject *jc_array; PyArrayObject *ic_array; if (!PyArg_ParseTuple(args, "O!O!O!O!O!O!O!", &PyArray_Type, &shape_array, &PyArray_Type, &a_array, &PyArray_Type, &ja_array, &PyArray_Type, &ia_array, &PyArray_Type, &c_array, &PyArray_Type, &jc_array, &PyArray_Type, &ic_array)) { return NULL; } int * ptr_int = shape_array->data; int m = ptr_int[0]; int n = ptr_int[1]; int k = n; float * a_data_ptr = a_array->data; float * ja_data_ptr = ja_array->data; float * ia_data_ptr = ia_array->data; float * c_data_ptr = c_array->data; float * jc_data_ptr = jc_array->data; float * ic_data_ptr = ic_array->data; char trans = 'T'; int sort = 0; int nzmax = n*n; int info = 0; int request = 0; // This is supposed to "suggest" mkl use 12 threads. // I also tried mkl_set_num_threads(&num_cpu); // That also doesn't work. int num_cpu = 12; mkl_set_num_threads(12); mkl_set_num_threads_local(12); mkl_domain_set_num_threads(12,0); mkl_scsrmultcsr(&trans, &request, &sort, &m, &n, &k, a_data_ptr, ja_data_ptr, ia_data_ptr, a_data_ptr, ja_data_ptr, ia_data_ptr, c_data_ptr, jc_data_ptr, ic_data_ptr, &nzmax, &info); return PyInt_FromLong(info); } static struct PyMethodDef methods[] = { {"test4", test4, METH_VARARGS, "test2(arr1)\n take a numpy array and return its shape as a tuple"}, {NULL, NULL, 0, NULL} }; PyMODINIT_FUNC initmkl_helper (void) { (void)Py_InitModule("mkl_helper", methods); import_array(); }
setup.py
from distutils.core import setup, Extension import numpy as np extra_link_args=["-Bstatic","-I${MKLROOT}/include", "-L{$MKLROOT}/lib/intel64/"] extra_link_args += ["-mkl"] extra_link_args += ["-lrt" ] extra_link_args += ["-L${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a", "-L${MKLROOT}/lib/intel64/libmkl_core.a", "-L${MKLROOT}/lib/intel64/libmkl_intel_thread.a", "-lpthread", "-lm", "-ldl"] extra_link_args += ["-DMKL_ILP64", "-qopenmp" ,"-I${MKLROOT}/include"] ext_modules = [ Extension('mkl_helper', sources = ['mkl_helper.c'], extra_link_args=extra_link_args) ] setup( name = 'mkl_helper', version = '1.0', include_dirs = [np.get_include()], #Add Include path of numpy ext_modules = ext_modules )
test.py
import mkl_helper import numpy as np import numpy as np import scipy.sparse as spsp def get_csr_handle2(data, indices, indptr, shape): a_pointer = data.ctypes.data_as(POINTER(c_float)) ja_pointer = indices.ctypes.data_as(POINTER(c_int)) ia_pointer = indptr.ctypes.data_as(POINTER(c_int)) return (a_pointer, ja_pointer, ia_pointer, shape) def get_csr_handle(A,clear=False): if clear == True: A.indptr[:] = 0 A.indices[:] = 0 A.data[:] = 0 return get_csr_handle2(A.data, A.indices, A.indptr, A.shape) print "test4" test_size = 1200 test_size2 = 1200 AA = np.random.choice([0,1], size=(test_size,test_size2), replace=True, p=[0.99,0.01]) A_original = spsp.csr_matrix(AA) print "Answer from scipy:" print AA.dot(AA.T) A = A_original.astype(np.float32).tocsc() A = spsp.csr_matrix( (A.data, A.indices, A.indptr) ) A.indptr += 1 # convert to 1-based indexing A.indices += 1 # convert to 1-based indexing C = spsp.csr_matrix( np.ones((test_size,test_size)), dtype=np.float32) (m,n) = A.shape shape_arr = np.array([m,n], dtype=np.int32) while(True): ret = mkl_helper.test4(shape_arr, A.data, A.indices, A.indptr, C.data, C.indices, C.indptr) C.indptr -= 1 C.indices -= 1 nz = C.indptr[test_size] print "nz:",nz print "Answer from mkl" C_fix = spsp.csr_matrix( (C.data[:nz], C.indices[:nz], C.indptr[:(test_size+1)]), shape=(test_size, test_size)) print C_fix.todense().astype(int) #print C.todense() print "ret:", ret
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
if we are talking about mkl_scsrmultcsr(...), then the problem size is pretty small and internally this computation is dispatched to 1 thread execution mode.
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I finally made a setup.py that seems to work ...
from distutils.core import setup, Extension import numpy as np d = {} d['MKLROOT'] = "/gpfs/rxu/intel/compilers_and_libraries_2016.3.210/linux/mkl" extra_compile_args = "-DMKL_ILP64 -qopenmp -I{MKLROOT}/include".format(**d).split(' ') extra_link_args = "-Wl,--start-group {MKLROOT}/lib/intel64/libmkl_intel_ilp64.a {MKLROOT}/lib/intel64/libmkl_core.a {MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm -ldl -liomp5".format(**d).split(' ') ext_modules = [ Extension('mkl_helper', sources = ['mkl_helper.c'], extra_link_args=extra_link_args, extra_compile_args=extra_compile_args)] setup( name = 'mkl_helper', version = '1.0', include_dirs = [np.get_include()], #Add Include path of numpy ext_modules = ext_modules )
From mkl linkline advisor
Intel® Math Kernel Library (Intel® MKL) Link Line Advisor v4.6 Select Intel® product: Intel(R) Parallel Studio XE 2016 Select OS: Linux* Select usage model of Intel® Xeon Phi™ Coprocessor: None Select compiler: Intel(R) C/C++ Select architecture: Intel(R) 64 Select dynamic or static linking: Static Select interface layer: ILP64 (64-bit integer) Select threading layer: OpenMP threading Select OpenMP library: Intel(R) (libiomp5) Select cluster library: [ ] Cluster PARDISO (BLACS required) [ ] CDFT (BLACS required) [ ] ScaLAPACK (BLACS required) [ ] BLACS Select MPI library: <Select MPI> Select the Fortran 95 interfaces: [ ] BLAS95 [ ] LAPACK95 Link with Intel® MKL libraries explicitly: [Check] Use this link line: -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_core.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm -ldl Compiler options: -DMKL_ILP64 -qopenmp -I${MKLROOT}/include
shorthand:
%INTELHOME=/gpfs/rxu/intel/compilers_and_libraries_2016.3.210 %MKLHOME=%INTELHOME/linux/mkl %USR=/home/rxu/local_icc/
Setting up evironment
source %CCEHOME/bin/iccvars.sh source %FCEHOME/bin/ifortvars.sh source %OPENMPIHOME/bin/mpivars.sh source %INTELHOME/bin/compilervars.sh intel64 export PATH=%USR/bin:/share/apps/pkgs/openmpi.1.8.1/bin:$PATH source %MKLHOME/bin/mklvars.sh intel64 export CC="icc" export CXX="icpc" export F77=ifort export LD=xild export AR=xiar export CPP="icc -E"
Output from python setup.py install
running install running build running build_ext building 'mkl_helper' extension icc -fno-strict-aliasing -O3 -fp-model strict -fp-model source -xHost -ipo -prec-div -prec-sqrt -DNDEBUG -g -O3 -Wall -Wstrict-prototypes -fPIC -I%USR/lib/python2.7/site-packages/numpy-1.11.0-py2.7-linux-x86_64.egg/numpy/core/include -I%USR/include/python2.7 -c mkl_helper.c -o build/temp.linux-x86_64-2.7/mkl_helper.o -DMKL_ILP64 -qopenmp -I%MKLHOME/include icc: command line warning #10006: ignoring unknown option '-qopenmp' ... some harmless warning ... icc -shared -L%USR/lib/ -L%USR/lib64/ -L%USR/lib/thread2.7.3 -L%USR/lib/itcl4.0.4 -L%USR/lib/tdbc1.0.4 -L%USR/lib/tdbcmysql1.0.4 -L%USR/lib/tdbcodbc1.0.4 -L%USR/lib/tdbcpostgres1.0.4 -L%USR/lib/sqlite3.11.0 -L%USR/lib/thread2.7.3/ -L%USR/lib/ -L%USR/lib64/ -L%USR/lib/thread2.7.3 -L%USR/lib/itcl4.0.4 -L%USR/lib/tdbc1.0.4 -L%USR/lib/tdbcmysql1.0.4 -L%USR/lib/tdbcodbc1.0.4 -L%USR/lib/tdbcpostgres1.0.4 -L%USR/lib/sqlite3.11.0 -L%USR/lib/thread2.7.3/ -L%USR/lib/ -L%USR/lib64/ -L%USR/thread2.7.3 build/temp.linux-x86_64-2.7/mkl_helper.o -L%USR/lib -lpython2.7 -o build/lib.linux-x86_64-2.7/mkl_helper.so -Wl,--start-group %MKLHOME/lib/intel64/libmkl_intel_ilp64.a %MKLHOME/lib/intel64/libmkl_core.a %MKLHOME/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm -ldl -liomp5 %INTELHOME/linux/compiler/lib/intel64/libimf.so: warning: warning: feupdateenv is not implemented and will always fail running install_lib copying build/lib.linux-x86_64-2.7/mkl_helper.so -> %USR/lib/python2.7/site-packages running install_egg_info Removing %USR/lib/python2.7/site-packages/mkl_helper-1.0-py2.7.egg-info Writing %USR/lib/python2.7/site-packages/mkl_helper-1.0-py2.7.egg-info
The problem was that distutil library of python adds a bunch of flags. Those flags seems to be the ones I use when I manually compile numpy linked to mkl.
The new error message is:
Intel MKL ERROR: Parameter 2 was incorrect on entry to MKL_SCSRMULTCSR.
Is this an error of the code, or an error of the compiler/linker flags in setup.py?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
In setup.py, ilp64 is used.
The 2nd parameter of csrcsc is request and has a MKL_INT type.
MKL_INT should be long long instead of integer when ilp64 libraries are used.
New setup.py that works:
from distutils.core import setup, Extension import numpy as np d = {} d['MKLROOT'] = "/gpfs/rxu/intel/compilers_and_libraries_2016.3.210/linux/mkl" extra_compile_args = "-qopenmp -I{MKLROOT}/include".format(**d).split(' ') extra_link_args = "-Wl,--start-group {MKLROOT}/lib/intel64/libmkl_intel_lp64.a {MKLROOT}/lib/intel64/libmkl_core.a {MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm -ldl -liomp5".format(**d).split(' ') ext_modules = [ Extension('mkl_helper', sources = ['mkl_helper.c'], extra_link_args=extra_link_args, extra_compile_args=extra_compile_args)] setup( name = 'mkl_helper', version = '1.0', include_dirs = [np.get_include()], #Add Include path of numpy ext_modules = ext_modules )
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Yet. after all of this, mkl still just use one of the 12 cpu.
Changing setup.py to the following would link mkl as shared library.
This doesn't give any warning about icc fail to interpret any flag.
extra_compile_args = "-I${MKLROOT}/include".format(**d).split(" ")
extra_link_args = "-L{MKLROOT}/lib/intel64 -lmkl_rt -lpthread -lm -ldl".format(**d).split(" ")
It still only use one out of the 12 cpu on the machine.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
if we are talking about mkl_scsrmultcsr(...), then the problem size is pretty small and internally this computation is dispatched to 1 thread execution mode.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page