#pragma once #include #include #include #include #include #include #include #include #define allignment 256 //Used for the boost bandwidth compression typedef boost::adjacency_list< boost::vecS, boost::vecS, boost::undirectedS, boost::property< boost::vertex_color_t, boost::default_color_type, boost::property< boost::vertex_degree_t, int > > > Graph; typedef boost::graph_traits< Graph >::vertex_descriptor Vertex; typedef boost::graph_traits< Graph >::vertices_size_type size_type; typedef std::pair< std::size_t, std::size_t > Pair; //bandwidth compressiokn properties int _uncompressedBandwidth(0); int _compressedBandwidth(0); bool _reversed = true; //mkl setting sparse_index_base_t _indexSchema = SPARSE_INDEX_BASE_ZERO; matrix_descr _desc; //properties needed for creating the matrix int _doFX(0), _doFY(0), _doFZ(0); int _matrixDimension(0), _componentSize(0); int _iterations = 2000; /***************************** Auxiliary Functions ***************************************/ int GetCoordinateIndex(int x, int y, int z) { return z + _doFZ * y + _doFZ * _doFY * x; } /***************************** Functions for bandwidth compression ***************************************/ Graph GenerateGraph(int rows, int columns, int * column_indx, int * row_start, int * row_end, float * values) { int current_rowStart(0), current_rowEnd(0), current_column(0), valuePointer(0), nnz(0); Graph graph(rows); for (int r = 0; r < rows; r++) { current_rowStart = row_start[r]; current_rowEnd = row_end[r]; for (int i = current_rowStart; i < current_rowEnd; i++) { if (fabsf(values[valuePointer]) > FLT_EPSILON) { boost::add_edge(r, column_indx[valuePointer], graph); //edges[pairCounter] = Pair(r, colIndices[valuePointer]); //pairCounter++; } valuePointer++; } } return graph; } void GeneratePermutationVektor(int rows, int columns, int * column_indx, int * row_start, int * row_end, float * values, int * permutVector) { Graph graph = GenerateGraph(rows, columns, column_indx, row_start, row_end, values); _uncompressedBandwidth = boost::bandwidth(graph); std::vector< Vertex > inv_perm(num_vertices(graph)); std::vector< size_type > perm(num_vertices(graph)); boost::property_map< Graph, boost::vertex_index_t >::type index_map = get(boost::vertex_index, graph); if (_reversed) { boost::cuthill_mckee_ordering(graph, inv_perm.rbegin(), get(boost::vertex_color, graph), get(boost::vertex_degree, graph)); } else { boost::cuthill_mckee_ordering(graph, inv_perm.begin(), get(boost::vertex_color, graph), get(boost::vertex_degree, graph)); } for (size_type c = 0; c != inv_perm.size(); ++c) perm[index_map[inv_perm[c]]] = c; _compressedBandwidth = bandwidth(graph, make_iterator_property_map(&perm[0], index_map, perm[0])); for (std::vector< Vertex >::const_iterator i = inv_perm.begin(); i != inv_perm.end(); ++i) { permutVector[*i] = inv_perm[*i]; } } void SimilarityTransformation(int * permutationVector, sparse_matrix_t &curlMatrixA, sparse_matrix_t &curlMatrixB, int &source) { //1. Create permutation matrix sparse_matrix_t permut, temp; int rows(_matrixDimension); int columns(_matrixDimension); int *row_indx(nullptr), *col_indx(nullptr), *row_start(nullptr), *row_end(nullptr); float *values(nullptr); row_indx = (int*)mkl_malloc(rows * sizeof(int), allignment); col_indx = (int*)mkl_malloc(rows * sizeof(int), allignment); values = (float*)mkl_malloc(rows * sizeof(float), allignment); for (int i = 0; i < _matrixDimension; i++) { row_indx[i] = i; col_indx[i] = permutationVector[i]; values[i] = 1; } mkl_sparse_s_create_coo(&temp, _indexSchema, rows, columns, _matrixDimension, row_indx, col_indx, values); mkl_sparse_convert_csr(temp, SPARSE_OPERATION_NON_TRANSPOSE, &permut); mkl_sparse_destroy(temp); mkl_free(row_indx); mkl_free(col_indx); mkl_free(values); //2. Perform similarity transformation mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, permut, SPARSE_OPERATION_NON_TRANSPOSE, _desc, curlMatrixA, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(curlMatrixA); mkl_sparse_copy(temp, _desc, &curlMatrixA); mkl_sparse_destroy(temp); mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, curlMatrixA, SPARSE_OPERATION_TRANSPOSE, _desc, permut, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(curlMatrixA); mkl_sparse_copy(temp, _desc, &curlMatrixA); mkl_sparse_destroy(temp); mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, permut, SPARSE_OPERATION_NON_TRANSPOSE, _desc, curlMatrixB, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(curlMatrixB); mkl_sparse_copy(temp, _desc, &curlMatrixB); mkl_sparse_destroy(temp); mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, curlMatrixB, SPARSE_OPERATION_TRANSPOSE, _desc, permut, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(curlMatrixB); mkl_sparse_copy(temp, _desc, &curlMatrixB); mkl_sparse_destroy(temp); //3. Permutate source Index mkl_sparse_convert_csr(permut, SPARSE_OPERATION_TRANSPOSE, &temp); sparse_index_base_t indexing; int cols(0); int * colIndices(nullptr), *invPermVector(nullptr); mkl_sparse_s_export_csr(temp, &indexing, &rows, &cols, &row_start, &row_end, &colIndices, &values); invPermVector = new int[rows]; for (int i = 0; i < rows; i++) { invPermVector[i] = colIndices[i]; } source = invPermVector[source]; delete[] invPermVector; mkl_sparse_destroy(temp); mkl_sparse_destroy(permut); } /***************************** Functions for creating matrix ***************************************/ void CreateCurlMatrices(sparse_matrix_t &hCurlMatrix, sparse_matrix_t &eCurlMatrix) { int offset_Ey(_componentSize), offset_Hy(_componentSize); int offset_Ez(_componentSize * 2), offset_Hz(_componentSize * 2); // C~ matrix float * values = (float*)mkl_malloc(_matrixDimension * 4 * sizeof(float), allignment); int * rows_indx = (int*)mkl_malloc(_matrixDimension * 4 * sizeof(int), allignment); int * col_indx = (int*)mkl_malloc(_matrixDimension * 4 * sizeof(int), allignment); int val_index = 0; //Ex curl for (int x = 0; x < _doFX; x++) { for (int y = 0; y < _doFY; y++) { for (int z = 0; z < _doFZ; z++) { // Hy(x,y,z-1) if (z != 0) { values[val_index] = 1.f; col_indx[val_index] = offset_Hy + GetCoordinateIndex(x, y, z - 1); rows_indx[val_index] = GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(GetCoordinateIndex(x, y, z), offset_Hy + GetCoordinateIndex(x, y, z - 1)) = 1; } // - Hy(x,y,z) values[val_index] = -1.f; col_indx[val_index] = offset_Hy + GetCoordinateIndex(x, y, z); rows_indx[val_index] = GetCoordinateIndex(x, y, z); val_index++; // - Hz(x,y-1,z) if (y != 0) { values[val_index] = -1.f; col_indx[val_index] = offset_Hz + GetCoordinateIndex(x, y - 1, z); rows_indx[val_index] = GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(GetCoordinateIndex(x, y, z), offset_Hz + GetCoordinateIndex(x, y - 1, z)) = -1; } // Hz(x,y,z) values[val_index] = 1.f; col_indx[val_index] = offset_Hz + GetCoordinateIndex(x, y, z); rows_indx[val_index] = GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(GetCoordinateIndex(x,y,z), offset_Hz + GetCoordinateIndex(x, y, z)) = 1; //curlMat.insert(GetCoordinateIndex(x,y,z), offset_Hy + GetCoordinateIndex(x, y, z)) = - 1; } } } //Ey curl for (int x = 0; x < _doFX; x++) { for (int y = 0; y < _doFY; y++) { for (int z = 0; z < _doFZ; z++) { // - Hx(x,y,z -1) if (z != 0) { values[val_index] = -1.f; col_indx[val_index] = GetCoordinateIndex(x, y, z - 1); rows_indx[val_index] = offset_Ey + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ey + GetCoordinateIndex(x, y, z), GetCoordinateIndex(x, y, z - 1)) = -1; } // Hx(x,y,z) values[val_index] = 1.f; col_indx[val_index] = GetCoordinateIndex(x, y, z); rows_indx[val_index] = offset_Ey + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ey + GetCoordinateIndex(x,y,z), GetCoordinateIndex(x, y, z )) = 1; // Hz(x - 1,y ,z) if (x != 0) { values[val_index] = 1.f; col_indx[val_index] = offset_Hz + GetCoordinateIndex(x - 1, y, z); rows_indx[val_index] = offset_Ey + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ey + GetCoordinateIndex(x, y, z), offset_Hz + GetCoordinateIndex(x - 1, y, z)) = 1; } // - Hz(x,y,z) values[val_index] = -1.f; col_indx[val_index] = offset_Hz + GetCoordinateIndex(x, y, z); rows_indx[val_index] = offset_Ey + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ey + GetCoordinateIndex(x,y,z), offset_Hz + GetCoordinateIndex(x, y , z)) = -1; } } } //Ez curl for (int x = 0; x < _doFX; x++) { for (int y = 0; y < _doFY; y++) { for (int z = 0; z < _doFZ; z++) { // Hx(x,y - 1,z) if (y != 0) { values[val_index] = 1.f; col_indx[val_index] = GetCoordinateIndex(x, y - 1, z); rows_indx[val_index] = offset_Ez + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ez + GetCoordinateIndex(x, y, z), GetCoordinateIndex(x, y - 1, z)) = 1; } // - Hx(x,y,z) values[val_index] = -1.f; col_indx[val_index] = GetCoordinateIndex(x, y, z); rows_indx[val_index] = offset_Ez + GetCoordinateIndex(x, y, z); val_index++; // - Hy(x - 1,y,z) if (x != 0) { values[val_index] = -1.f; col_indx[val_index] = offset_Hy + GetCoordinateIndex(x - 1, y, z); rows_indx[val_index] = offset_Ez + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ez + GetCoordinateIndex(x, y, z), offset_Hy + GetCoordinateIndex(x - 1, y, z)) = -1; } // Hy(x,y,z) values[val_index] = 1.f; col_indx[val_index] = offset_Hy + GetCoordinateIndex(x, y, z); rows_indx[val_index] = offset_Ez + GetCoordinateIndex(x, y, z); val_index++; //curlMat.insert(offset_Ez + GetCoordinateIndex(x,y,z), offset_Hy + GetCoordinateIndex(x, y, z )) = 1; //curlMat.insert(offset_Ez + GetCoordinateIndex(x,y,z), GetCoordinateIndex(x, y , z)) = - 1; } } } sparse_matrix_t temp; sparse_status_t stat = mkl_sparse_s_create_coo(&temp, _indexSchema, _matrixDimension, _matrixDimension, val_index, rows_indx, col_indx, values); if (stat != 0) { throw std::runtime_error("Sparse matrix creation failed due to error: " + std::to_string(stat)); } stat = mkl_sparse_convert_csr(temp, SPARSE_OPERATION_NON_TRANSPOSE, &hCurlMatrix); mkl_sparse_destroy(temp); stat = mkl_sparse_s_create_coo(&temp, _indexSchema, _matrixDimension, _matrixDimension, val_index, col_indx, rows_indx, values); if (stat != 0) { throw std::runtime_error("Sparse matrix creation failed due to error: " + std::to_string(stat)); } stat = mkl_sparse_convert_csr(temp, SPARSE_OPERATION_NON_TRANSPOSE, &eCurlMatrix); mkl_sparse_destroy(temp); mkl_free(values); mkl_free(rows_indx); mkl_free(col_indx); } void CreateMaterialMatrix(float value, sparse_matrix_t & materialMatrix) { int * row_indx = (int*)mkl_malloc(_matrixDimension * sizeof(int), allignment); float * values = (float*)mkl_malloc(_matrixDimension * sizeof(float), allignment); int * col_indx = (int*)mkl_malloc(_matrixDimension * sizeof(int), allignment); for (int i = 0; i < _matrixDimension; i++) { values[i] = value; col_indx[i] = i; row_indx[i] = i; } sparse_matrix_t temp; sparse_status_t stat = mkl_sparse_s_create_coo(&temp, _indexSchema, _matrixDimension, _matrixDimension, _matrixDimension, row_indx, col_indx, values); if (stat != 0) { throw std::runtime_error("Creating material matrix failed due to error: " + std::to_string(stat)); } mkl_sparse_convert_csr(temp, SPARSE_OPERATION_NON_TRANSPOSE, &materialMatrix); mkl_sparse_destroy(temp); mkl_free(row_indx); mkl_free(values); mkl_free(col_indx); } void SetBoundary(sparse_matrix_t &permittivities, sparse_matrix_t &permeabilities) { int rowIndex; int offset_z = _componentSize * 2; int offset_y = _componentSize; //Ez & Hz Boundaries for (int z = 0; z < _doFZ; z++) { for (int x = 0; x < _doFX; x++) { //Ez: y = 0 rowIndex = GetCoordinateIndex(x, 0, z) + offset_z; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); // y = DoFY-1 rowIndex = GetCoordinateIndex(x, _doFY - 1, z) + offset_z; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hz: y = DoFY-1 rowIndex = GetCoordinateIndex(x, _doFY - 1, z) + offset_z; mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } for (int y = 0; y < _doFY; y++) { //Ez: x = 0 rowIndex = GetCoordinateIndex(0, y, z) + offset_z; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //x = DoFX - 1 rowIndex = GetCoordinateIndex(_doFX - 1, y, z) + offset_z; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hz: x = DoFX -1 rowIndex = GetCoordinateIndex(_doFX - 1, y, z) + offset_z; mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } } //Ez: z= DoFZ for (int y = 0; y < _doFY; y++) { for (int x = 0; x < _doFX; x++) { rowIndex = GetCoordinateIndex(x, y, _doFZ - 1) + offset_z; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); } } //Ey & Hy Boundaries for (int y = 0; y < _doFY; y++) { for (int x = 0; x < _doFX; x++) { //Ey: z = 0 rowIndex = GetCoordinateIndex(x, y, 0) + offset_y; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Z = DoF -1 rowIndex = GetCoordinateIndex(x, y, _doFZ - 1) + offset_y; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hy: z = DoF -1 rowIndex = GetCoordinateIndex(x, y, _doFZ - 1) + _componentSize; mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } for (int z = 0; z < _doFZ; z++) { //Ey: x = 0 rowIndex = GetCoordinateIndex(0, y, z) + offset_y; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //x = DoF -1 rowIndex = GetCoordinateIndex(_doFX - 1, y, z) + offset_y; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hy: x = DoF -1 rowIndex = GetCoordinateIndex(_doFX - 1, y, z) + offset_y; mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } } for (int z = 0; z < _doFZ - 1; z++) { for (int x = 0; x < _doFX; x++) { //Ey: y = DoFY -1 rowIndex = GetCoordinateIndex(x, _doFY - 1, z) + offset_y; mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); } } //Ex & Hx boundary for (int x = 0; x < _doFX; x++) { for (int y = 0; y < _doFY; y++) { //Ex: z = 0 rowIndex = GetCoordinateIndex(x, y, 0); mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //z = DoFZ -1 rowIndex = GetCoordinateIndex(x, y, _doFZ - 1); mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hx: z = DoFZ -1 rowIndex = GetCoordinateIndex(x, y, _doFZ - 1); mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } for (int z = 0; z < _doFZ; z++) { //Ex: y = 0 rowIndex = GetCoordinateIndex(x, 0, z); mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //y = DoFY -1 rowIndex = GetCoordinateIndex(x, _doFY - 1, z); mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); //Hx: y = DoFY -1 rowIndex = GetCoordinateIndex(x, _doFY - 1, z); mkl_sparse_s_set_value(permeabilities, rowIndex, rowIndex, 0.f); } } for (int z = 0; z < _doFZ - 1; z++) { for (int y = 0; y < _doFY - 1; y++) { //Ex: x = DoFX -1 rowIndex = GetCoordinateIndex(_doFX - 1, y, z); mkl_sparse_s_set_value(permittivities, rowIndex, rowIndex, 0.f); } } } void AssembleCompleteMatrix(sparse_matrix_t &hCurlMatrix, sparse_matrix_t &eCurlMatrix, int& source) { CreateCurlMatrices(hCurlMatrix, eCurlMatrix); sparse_matrix_t permea; sparse_matrix_t permit; CreateMaterialMatrix(213.091354f, permit); CreateMaterialMatrix(0.00150232902, permea); //Implement PEC boundary by setting all entries in rows corresponding with boundary nodes to 0 SetBoundary(permit, permea); //HCurl = HCurl * Permit. Matrix sparse_matrix_t temp; sparse_status_t stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, permit, SPARSE_OPERATION_NON_TRANSPOSE, _desc, hCurlMatrix, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(hCurlMatrix); mkl_sparse_copy(temp, _desc, &hCurlMatrix); mkl_sparse_destroy(temp); //ECurl = ECurl * Permeab. Matrix stat = mkl_sparse_sp2m(SPARSE_OPERATION_NON_TRANSPOSE, _desc, permea, SPARSE_OPERATION_NON_TRANSPOSE, _desc, eCurlMatrix, SPARSE_STAGE_FULL_MULT, &temp); mkl_sparse_destroy(eCurlMatrix); mkl_sparse_copy(temp, _desc, &eCurlMatrix); mkl_sparse_destroy(temp); stat = mkl_sparse_destroy(permea); stat = mkl_sparse_destroy(permit); //Bandwidth compression sparse_index_base_t indexing; int rows(0), cols(0); int * row_start(nullptr), *row_end(nullptr), *colIndices(nullptr); float * values(nullptr); int * perm = new int[_matrixDimension]; mkl_sparse_s_export_csr(eCurlMatrix, &indexing, &rows, &cols, &row_start, &row_end, &colIndices, &values); GeneratePermutationVektor(rows, cols, colIndices, row_start, row_end, values, perm); SimilarityTransformation(perm, eCurlMatrix, hCurlMatrix, source); mkl_sparse_set_mv_hint(eCurlMatrix, SPARSE_OPERATION_NON_TRANSPOSE, _desc, _iterations); stat = mkl_sparse_optimize(eCurlMatrix); mkl_sparse_set_mv_hint(hCurlMatrix, SPARSE_OPERATION_NON_TRANSPOSE, _desc, _iterations); stat = mkl_sparse_optimize(hCurlMatrix); } /***************************** Execute this for testing scalability of process with bandwidth compression***************************************/ void TestMKLWithBWCompression() { _iterations = 2000; int middlePoint = 100; int deviation = 30; int finaldimension = 70; _desc.type = SPARSE_MATRIX_TYPE_GENERAL; //Iterations for testing different matrix dimensions std::cout << "Matrix dimension; Performance E-Feld Update in MCells/sec; Performance H-Feld Update in MCells/sec; Duration E-Feld Update ;Duration H-Feld Update ; Compression rate" << std::endl; for (int d = 3; d < finaldimension; d++) { _doFX = d; _doFY = d; _doFZ = d; _matrixDimension = _doFX * _doFY*_doFZ * 3; _componentSize = _doFX * _doFY*_doFZ; int center = d/2; int source = GetCoordinateIndex(center, center, center); float * fieldVectorE = (float*)mkl_malloc(_matrixDimension * sizeof(float), allignment); float * fieldVectorH = (float*)mkl_malloc(_matrixDimension * sizeof(float), allignment); for (int i = 0; i < _matrixDimension; i++) { fieldVectorE[i] = 0.f; fieldVectorH[i] = 0.f; } sparse_matrix_t eCurlMatrix, hCurlMatrix; AssembleCompleteMatrix(hCurlMatrix, eCurlMatrix, source); double durationE(0.), durationH(0.); //To ensure that both matrix-vektor equations perform equally, the duration for each operations are measurred separately. for (int t = 0; t < _iterations; t++) { float currentValue = expf(-(t - middlePoint)*(t - middlePoint) / deviation); auto start = std::chrono::high_resolution_clock::now(); mkl_sparse_s_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.f, hCurlMatrix, _desc, fieldVectorH, 1.f, fieldVectorE); auto middle = std::chrono::high_resolution_clock::now(); mkl_sparse_s_mv(SPARSE_OPERATION_NON_TRANSPOSE, -1.f, eCurlMatrix, _desc, fieldVectorE, 1.f, fieldVectorH); auto end = std::chrono::high_resolution_clock::now(); durationE += std::chrono::duration_cast>(middle - start).count(); durationH += std::chrono::duration_cast>(end -middle ).count(); //Hardwired source at the center of the grid fieldVectorE[source] = currentValue; } mkl_free(fieldVectorE); mkl_free(fieldVectorH); mkl_sparse_destroy(eCurlMatrix); mkl_sparse_destroy(hCurlMatrix); /*To make the performance of each single matrix-vektor multiplication comparrible with the performance of both operations together (this is what counts), the performance is devided by 2. */ std::cout << _componentSize << ";" << (_componentSize * _iterations) / (2 * durationE * pow(10, 6)) << ";" << (_componentSize * _iterations) / (2 * durationH * pow(10, 6)) << ";" << durationE << ";" << durationH << ";" << (float)_compressedBandwidth/_uncompressedBandwidth << std::endl; } }