- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Dear Intel
With your help a month ago, I could set up the 'cluster sparse solver 64' program using 'iparm[1]=10 (The MPI version of the nested dissection and symbolic factorization algorithms)' with my 4 cluster computers.
This program is devised for very large sparse matrix, which contains about 10^8 - 10^9 rows.
However, it shows the lower performance(4MPI & 4OpenMP) than the result of a single machine(1MPI & 4OpenMP)
Following table is the result of the test.
target : 4*10^8 rows matrix
time consumption (1MPI & 4OpenMP || 4MPI & 4OpenMP(iparm[1]=3) || 4MPI & 4OpenMP(iparm[1]=10))
-------------------------------------------------------------------------------------------------------------------------
Reorder time (1021.6 s || 2094.3 s || 7644.3 s)
Factorization time (1403.2 s || 2136.6 s || 9263.1 s)
Solution time (158.6 s || 684.9 s || 554.14 s)
-------------------------------------------------------------------------------------------------------------------------
Could you please look into this issue?
I attache my code below.
Thank you very much in advance!!!
Regards,
Yong-hee
P.S. This is my code. (almost same with the example code)
=================================================================================
/******************************************************************************* * Copyright 2004-2015 Intel Corporation All Rights Reserved. * * The source code, information and material ("Material") contained herein is * owned by Intel Corporation or its suppliers or licensors, and title to such * Material remains with Intel Corporation or its suppliers or licensors. The * Material contains proprietary information of Intel or its suppliers and * licensors. The Material is protected by worldwide copyright laws and treaty * provisions. No part of the Material may be used, copied, reproduced, * modified, published, uploaded, posted, transmitted, distributed or disclosed * in any way without Intel's prior express written permission. No license under * any patent, copyright or other intellectual property rights in the Material * is granted to or conferred upon you, either expressly, by implication, * inducement, estoppel or otherwise. Any license under such intellectual * property rights must be express and approved by Intel in writing. * * Unless otherwise agreed by Intel in writing, you may not remove or alter this * notice or any other notice embedded in Materials by Intel or Intel's * suppliers or licensors in any way. *******************************************************************************/ /* * * MKL Cluster Sparse Solver example demonstrating the case when initial data (matrix * and rhs) distributed between several MPI processes, final solution is * distributed between MPI processes in the same way as they hold initial data. * ******************************************************************************** */ #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <math.h> #include <time.h> #include "mpi.h" #include "mkl.h" #include "mkl_cluster_sparse_solver.h" #ifdef MKL_ILP64 #define MPI_DT MPI_LONG #else #define MPI_DT MPI_INT #endif #define MPI_REDUCE_AND_BCAST \ MPI_Reduce(&err_mem, &error, 1, MPI_DT, MPI_SUM, 0, MPI_COMM_WORLD); \ MPI_Bcast(&error, 1, MPI_DT, 0, MPI_COMM_WORLD); int main(void) { clock_t before; double result; MKL_INT64 AllocatedBytes; int N_AllocatedBuffers; mkl_peak_mem_usage(MKL_PEAK_MEM_ENABLE); AllocatedBytes = mkl_mem_stat(&N_AllocatedBuffers); mkl_set_num_threads(2); FILE* pre_inputFile=fopen("/user/source/CREATE_MATRIX/SPM_test_20000_whole.txt", "r"); if (pre_inputFile==NULL){ puts( "There is no file." );} MKL_INT64 n; fscanf(pre_inputFile, "%lld", &n); n -= 1; fclose(pre_inputFile); /* Matrix data. */ //MKL_INT64 n = 5; MKL_INT64 mtype = 2; /* Real symmetric definite matrix */ MKL_INT64 *ia = NULL; MKL_INT64 *ja = NULL; double *a = NULL; /* RHS and solution vectors. */ double *b = NULL; double *x = NULL; char hostName[1024] = "\0"; MKL_INT64 nrhs = 1; /* Number of right hand sides. */ /* Internal solver memory pointer pt, */ /* 32-bit: int pt[64]; 64-bit: long int pt[64] */ /* or void *pt[64] should be OK on both architectures */ void *pt[64] = { 0 }; /* Cluster Sparse Solver control parameters. */ MKL_INT64 iparm[64] = { 0 }; MKL_INT64 maxfct, mnum, phase, msglvl, error, err_mem; /* Auxiliary variables. */ double ddum; /* Double dummy */ MKL_INT64 idum; /* Integer dummy. */ MKL_INT64 j; int mpi_stat = 0; int argc = 0; int comm, rank, size; char** argv; printf("at the beginning of the program, peak memory : %ld bytes\n", mkl_peak_mem_usage(MKL_PEAK_MEM)); /* -------------------------------------------------------------------- */ /* .. Init MPI. */ /* -------------------------------------------------------------------- */ mpi_stat = MPI_Init( &argc, &argv ); mpi_stat = MPI_Comm_rank( MPI_COMM_WORLD, &rank ); mpi_stat = MPI_Comm_size( MPI_COMM_WORLD, &size ); comm = MPI_Comm_c2f( MPI_COMM_WORLD ); //printf ("comm : %d, rank : %d, size : %d", comm, rank, size); if( size < 2 ) { printf("\nERROR: this example doesn't work on number of MPI less than 2"); mpi_stat = MPI_Finalize(); return 1; } /* -------------------------------------------------------------------- */ /* .. Setup Cluster Sparse Solver control parameters. */ /* -------------------------------------------------------------------- */ iparm[ 0] = 1; /* Solver default parameters overriden with provided by iparm */ iparm[ 1] = 10; /* Use METIS for fill-in reordering */ iparm[ 5] = 0; /* Write solution into x */ iparm[ 7] = 2; /* Max number of iterative refinement steps */ iparm[ 9] = 13; /* Perturb the pivot elements with 1E-13 */ iparm[10] = 0; /* Don't use nonsymmetric permutation and scaling MPS */ iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ //iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ //iparm[18] = -1; /* Output: Mflops for LU factorization */ //iparm[26] = 1; /* Check input data for correctness */ //iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ //iparm[39] = 2; /* Input: matrix/rhs/solution are distributed between MPI processes */ /* If iparm[39]=2, the matrix is provided in distributed assembled matrix input format. In this case, each MPI process stores only a part (or domain) of the matrix A data. The bounds of the domain should be set via iparm(41) and iparm(42). Solution vector is distributed between process in same manner with rhs. */ maxfct = 1; /* Maximum number of numerical factorizations. */ mnum = 1; /* Which factorization to use. */ msglvl = 1; /* Print statistical information in file */ error = 0; /* Initialize error flag */ err_mem = 0; /* Initialize error flag for memory allocation */ /* Initialize matrix and rhs components on each process: In this example initial matrix is distributed between 2 processes so for MPI processes with rank > 1 input domains are empty */ MKL_INT64 ii, ia_index, ja_index, up_r, down_r; FILE * inputFile; if (rank == 0) { //mkl_set_num_threads(2); inputFile=fopen("/user/rail7/source/CREATE_MATRIX/SPM_test_20000_n_1.txt", "r"); fscanf(inputFile, "%lld %lld", &ia_index, &ja_index); iparm[40] = 1; /* The number of row in global matrix, rhs element and solution vector that begins the input domain belonging to this MPI process */ iparm[41] = n/4; /* The number of row in global matrix, rhs element and solution vector that ends the input domain belonging to this MPI process */ //printf("%d %d %d %d", ia_index, ja_index, up_r, down_r); ia = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ia_index, 64); ja = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ja_index, 64); a = (double*) MKL_malloc (sizeof (double) * ja_index, 64); x = (double*) MKL_malloc (sizeof (double) * n, 64); b = (double*) MKL_malloc (sizeof (double) * n, 64); MPI_REDUCE_AND_BCAST; for (ii=0; ii<ia_index; ii++) fscanf(inputFile, "%lld", &ia[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lld", &ja[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lf", &a[ii]); for (ii=0; ii<n; ii++) b[ii] = 1.0; fclose(inputFile); //printf("%d %d %d %d %lf %lf", ia[0], ia[ia_index-1], ja[0], ja[ja_index-1], a[0], a[ja_index-1]); //printf("%d %d", iparm[40], iparm[41]); //printf("%lf %lf %lf %lf",b[0],b[1],b[ia_index-3],b[ia_index-1]); if ( ia == NULL || ja == NULL || a == NULL || x == NULL || b == NULL ) { if ( rank == 0 ) printf ("\nERROR during memory allocation: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 1; } } else if (rank == 1) { //mkl_set_num_threads(2); inputFile=fopen("/user/rail7/source/CREATE_MATRIX/SPM_test_20000_n_2.txt", "r"); fscanf(inputFile, "%lld %lld", &ia_index, &ja_index); iparm[40] = (n/4)+1; /* The number of row in global matrix, rhs element and solution vector that begins the input domain belonging to this MPI process */ iparm[41] = (n/4)*2; /* The number of row in global matrix, rhs element and solution vector that ends the input domain belonging to this MPI process */ ia = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ia_index, 64); ja = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ja_index, 64); a = (double*) MKL_malloc (sizeof (double) * ja_index, 64); x = (double*) MKL_malloc (sizeof (double) * n, 64); b = (double*) MKL_malloc (sizeof (double) * n, 64); MPI_REDUCE_AND_BCAST; for (ii=0; ii<ia_index; ii++) fscanf(inputFile, "%lld", &ia[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lld", &ja[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lf", &a[ii]); fclose(inputFile); if ( ia == NULL || ja == NULL || a == NULL || x == NULL || b == NULL ) { if ( rank == 1 ) printf ("\nERROR during memory allocation: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 1; } } else if (rank == 2) { //mkl_set_num_threads(2); inputFile=fopen("/user/rail7/source/CREATE_MATRIX/SPM_test_20000_n_3.txt", "r"); fscanf(inputFile, "%lld %lld", &ia_index, &ja_index); iparm[40] = (n/4)*2+1; /* The number of row in global matrix, rhs element and solution vector that begins the input domain belonging to this MPI process */ iparm[41] = (n/4)*3; /* The number of row in global matrix, rhs element and solution vector that ends the input domain belonging to this MPI process */ ia = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ia_index, 64); ja = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ja_index, 64); a = (double*) MKL_malloc (sizeof (double) * ja_index, 64); x = (double*) MKL_malloc (sizeof (double) * n, 64); b = (double*) MKL_malloc (sizeof (double) * n, 64); MPI_REDUCE_AND_BCAST; for (ii=0; ii<ia_index; ii++) fscanf(inputFile, "%lld", &ia[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lld", &ja[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lf", &a[ii]); fclose(inputFile); if ( ia == NULL || ja == NULL || a == NULL || x == NULL || b == NULL ) { if ( rank == 0 ) printf ("\nERROR during memory allocation: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 1; } } else if (rank == 3) { //mkl_set_num_threads(2); inputFile=fopen("/user/rail7/source/CREATE_MATRIX/SPM_test_20000_n_4.txt", "r"); fscanf(inputFile, "%lld %lld", &ia_index, &ja_index); iparm[40] = (n/4)*3+1; /* The number of row in global matrix, rhs element and solution vector that begins the input domain belonging to this MPI process */ iparm[41] = (n/4)*4; /* The number of row in global matrix, rhs element and solution vector that ends the input domain belonging to this MPI process */ ia = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ia_index, 64); ja = (MKL_INT64*) MKL_malloc (sizeof (MKL_INT64) * ja_index, 64); a = (double*) MKL_malloc (sizeof (double) * ja_index, 64); x = (double*) MKL_malloc (sizeof (double) * n, 64); b = (double*) MKL_malloc (sizeof (double) * n, 64); MPI_REDUCE_AND_BCAST; for (ii=0; ii<ia_index; ii++) fscanf(inputFile, "%lld", &ia[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lld", &ja[ii]); for (ii=0; ii<ja_index; ii++) fscanf(inputFile, "%lf", &a[ii]); fclose(inputFile); if ( ia == NULL || ja == NULL || a == NULL || x == NULL || b == NULL ) { if ( rank == 0 ) printf ("\nERROR during memory allocation: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 1; } } else { MPI_REDUCE_AND_BCAST; /* In this example MPI processes with rank > 1 doesn't have input domain so iparm[40] need to be greater then iparm[41] */ iparm[40] = 2; iparm[41] = 1; } //*************************************************************************************************************************************************** gethostname(hostName, 1023); printf("\n%d th rank at the end of the loading phase, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); if (rank == 0 ) before = clock(); /* -------------------------------------------------------------------- */ /* .. Reordering and Symbolic Factorization. This step also allocates */ /* all memory that is necessary for the factorization. */ /* -------------------------------------------------------------------- */ phase = 11; cluster_sparse_solver_64 ( pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, &comm, &error ); if ( error != 0 ) { if ( rank == 0 ) printf ("\nERROR during symbolic factorization: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 1; } if ( rank == 0 ) printf ("\nReordering completed ... "); printf("\n%d th rank at the end of the phase 11, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); printf("\n%d th rank at the end of the phase 11, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); if (rank == 0 ) { result = (double)(clock()-before)/CLOCKS_PER_SEC; printf("##### Time consumption for reordering is %7.2lf seconds.\n", result); before = clock(); } //*************************************************************************************************************************************************** /* -------------------------------------------------------------------- */ /* .. Numerical factorization. */ /* -------------------------------------------------------------------- */ phase = 22; cluster_sparse_solver_64 ( pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, &comm, &error ); if ( error != 0 ) { if ( rank == 0 ) printf ("\nERROR during numerical factorization: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 2; } if ( rank == 0 ) printf ("\nFactorization completed ... "); printf("\n%d th rank at the end of the phase 22, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); printf("\n%d th rank at the end of the phase 22, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); if (rank == 0 ) { result = (double)(clock()-before)/CLOCKS_PER_SEC; printf("##### Time consumption for factorization is %7.2lf seconds.\n", result); before = clock(); } //*************************************************************************************************************************************************** /* -------------------------------------------------------------------- */ /* .. Back substitution and iterative refinement. */ /* -------------------------------------------------------------------- */ phase = 33; if ( rank == 0 ) printf ("\nSolving system..."); cluster_sparse_solver_64 ( pt, &maxfct, &mnum, &mtype, &phase, &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, b, x, &comm, &error ); if ( error != 0 ) { if ( rank == 0 ) printf ("\nERROR during solution: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 4; } /* The solution of the system is distributed between MPI processes like as input matrix so MPI processes with rank 0 and 1 keep only part of solution */ printf("\n%d th rank at the end of the program, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); printf("\n%d th rank at the end of the program, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); if (rank == 0 ) { result = (double)(clock()-before)/CLOCKS_PER_SEC; printf("##### Time consumption for solving is %7.2lf seconds.\n", result); } //*************************************************************************************************************************************************** if ( rank == 0 ) { printf ("\nThe solution of the system is: "); for ( j = 0; j < 10; j++ ) { printf ("\n on zero process x [%lli] = % f", (long long int)j, x); } printf ("\n"); } MPI_Barrier(MPI_COMM_WORLD); if ( rank == 1 ) { printf ("\nThe solution of the system is: "); for ( j = n-10; j < n; j++ ) { printf ("\n on first process x [%lli] = % f", (long long int)j, x ); } printf ("\n"); } MPI_Barrier(MPI_COMM_WORLD); //double res, res0; //char* uplo; //uplo = "Upper-triangle"; //mkl_cspblas_scsrsymv ( uplo, &n, a, ia, ja, x, bs ); //res = 0.0; //res0 = 0.0; //printf ("%lf %lf %lf %lf \n", b[0], b[1], b[2], b[3]); //printf ("%lf %lf %lf %lf \n", bs[0], bs[1], bs[2], bs[3]); //printf("XXXX"); /* -------------------------------------------------------------------- */ /* .. Termination and release of memory. */ /* -------------------------------------------------------------------- */ phase = -1; /* Release internal memory. */ cluster_sparse_solver_64 ( pt, &maxfct, &mnum, &mtype, &phase, &n, &ddum, ia, ja, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, &comm, &error ); //printf ("%lf %lf %lf %lf \n", b[0], b[1], b[2], b[3]); //printf ("%lf %lf %lf %lf \n", bs[0], bs[1], bs[2], bs[3]); if ( error != 0 ) { if ( rank == 0 ) printf ("\nERROR during release memory: %lli", (long long int)error); mpi_stat = MPI_Finalize(); return 5; } if ( rank < size ) { MKL_free(ia); MKL_free(ja); MKL_free(a); MKL_free(x); MKL_free(b); } printf("\n%d th rank at the end of the phase 33, hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); mkl_peak_mem_usage(MKL_PEAK_MEM_RESET); //gethostname(hostName, 1023); //printf("\n%d th rank hostname : %s peak memory : %ld bytes\n", rank, hostName, mkl_peak_mem_usage(MKL_PEAK_MEM)); //printf("\nPeak memory allocated by Intel MKL memory allocator after reset of peak memory counter %ld bytes\n", mkl_peak_mem_usage(MKL_PEAK_MEM)); mpi_stat = MPI_Finalize(); return 0; }
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Yong-hee,
The results that you shows are quite strange, can i ask you to send us the tested matrix? Something goes wrong and we need to play with reproducer on our side
Thanks,
Alex
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Alexander Kalinkin,
Below, I attach code for matrix generation..
If you execute this code, you can input the matrix size.
First, input 10000 at size and input 25 at interconnection, then you can generate a matrix having 4*10^8 rows same that I used.
I really appreciate your consideration and help.
Regards,
Yong-hee
#include <stdio.h> #include <stdlib.h> #include <tgmath.h> void main() { unsigned long long int i, j, row, col, input_n, percentage, temp_1; unsigned long long int spRow, spColumn, ia_index, ja_index, ia_check, rate_of_intercon; unsigned long long int total_ia, total_ja; unsigned long long int spRowrow, spRowrowcol, spRC; unsigned long long int *ia_data, *ja_data, *division_index; double *a_data; FILE *SP_0; FILE *SP_1; FILE *SP_2; FILE *SP_3; FILE *SP_4; FILE *SP_0_1; FILE *SP_0_2; FILE *SP_0_3; FILE *SP_0_4; char title[300] = "SPM_test_"; char title2; printf("Size of a die(int) : "); scanf("%llu", &input_n); printf("Perc. of interconnection(int) : "); scanf("%llu", &percentage); //sprintf(title2, "%llu", input_n); //title = title+title2; //puts(title); spRow = spColumn = input_n; //size of 1-die of PDN spRC = spRow*spColumn; rate_of_intercon = percentage; //percentage of interconnection total_ia = 4*(spRow*spRow)+1; total_ja = 4*3*spRow*spRow; //total_ja = 4*(3*(spRow-1)*(spRow-1)+2*2*(spRow-1)+1)+3*(spRow*(rate_of_intercon/100.0))pRow printf("Total number of the 'ia' is %llu and 'ja' is %llu\n", total_ia, total_ja); ia_data = (unsigned long long int*)malloc(sizeof(unsigned long long int)*total_ia); ja_data = (unsigned long long int*)malloc(sizeof(unsigned long long int)*total_ja); a_data = (double*)malloc(sizeof(double)*total_ja); division_index = (unsigned long long int*)malloc(sizeof(unsigned long long int)*5); ia_index = 0; ja_index = 0; ia_check = 1; division_index[0] = 0; ia_data[0] = 1; for (row=0; row<spRow; row++) //1st-die { spRowrow = spRow*row; for (col=0; col<spColumn; col++) { spRowrowcol = spRowrow+(col+1); //printf("%llu %llu|", row, col); if (((row+1)==spRow) && ((col+1)==spColumn)) { division_index[1] = ja_index; a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; } else if ((row+1)==spRow) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; } else if ((col+1)==spColumn) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } else if (((row+1)==(spRow/2)) && (((col+1)%(100/rate_of_intercon))==0)) { //printf("row : %llu, column : %llu\n", row+1, col+1); a_data[ia_index] = 5; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRC; ia_index++; ja_index++; } else { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } ia_data[ia_check] = ia_index+1; ia_check++; } } for (row=0; row<spRow; row++) //2nd-die { spRowrow = spRC + spRow*row; for (col=0; col<spColumn; col++) { spRowrowcol = spRowrow+(col+1); if (((row+1)==spRow) && ((col+1)==spColumn)) { division_index[2] = ja_index; a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; } else if ((row+1)==spRow) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; } else if ((col+1)==spColumn) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } else if (((row+1)==(spRow/2)) && (((col+1)%(100/rate_of_intercon))==0)) { a_data[ia_index] = 6; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRC; ia_index++; ja_index++; } else { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } ia_data[ia_check] = ia_index+1; ia_check++; } } for (row=0; row<spRow; row++) //3rd-die { spRowrow = spRC*2 + spRow*row; for (col=0; col<spColumn; col++) { spRowrowcol = spRowrow+(col+1); if (((row+1)==spRow) && ((col+1)==spColumn)) { division_index[3] = ja_index; a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; } else if ((row+1)==spRow) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; } else if ((col+1)==spColumn) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } else if (((row+1)==(spRow/2)) && (((col+1)%(100/rate_of_intercon))==0)) { a_data[ia_index] = 6; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRC; ia_index++; ja_index++; } else { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } ia_data[ia_check] = ia_index+1; ia_check++; } } for (row=0; row<spRow; row++) //4th-die { spRowrow = spRC*3 + spRow*row; for (col=0; col<spColumn; col++) { spRowrowcol = spRowrow+(col+1); if (((row+1)==spRow) && ((col+1)==spColumn)) { division_index[4] = ja_index; a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; } else if ((row+1)==spRow) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; } else if ((col+1)==spColumn) { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } else if (((row+1)==(spRow/2)) && (((col+1)%(100/rate_of_intercon))==0)) { a_data[ia_index] = 5; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } else { a_data[ia_index] = 4; ja_data[ja_index] = spRowrowcol; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+1; ia_index++; ja_index++; a_data[ia_index] = -1; ja_data[ja_index] = spRowrowcol+spRow; ia_index++; ja_index++; } ia_data[ia_check] = ia_index+1; ia_check++; } } a_data[0] = 1000.0; //Voltage source //******************************************** SP_0 = fopen("SPM_test_20000_whole.txt", "w"); fprintf(SP_0, "%llu %llu\n", ia_check, ja_index); for (i=0; i<ia_check; i++) fprintf(SP_0, "%llu ", ia_data); fprintf(SP_0, "\n"); for (i=0; i<ja_index; i++) fprintf(SP_0, "%llu ", ja_data); fprintf(SP_0, "\n"); for (i=0; i<ja_index; i++) fprintf(SP_0, "%lf ", a_data); fprintf(SP_0, "\n"); //******************************************** SP_0_1 = fopen("SPM_test_20000_n_1.txt", "w"); SP_0_2 = fopen("SPM_test_20000_n_2.txt", "w"); SP_0_3 = fopen("SPM_test_20000_n_3.txt", "w"); SP_0_4 = fopen("SPM_test_20000_n_4.txt", "w"); fprintf(SP_0_1, "%llu %llu\n", input_n*input_n+1, ia_data[input_n*input_n]-ia_data[0]); fprintf(SP_0_2, "%llu %llu\n", input_n*input_n+1, ia_data[input_n*input_n*2]-ia_data[input_n*input_n]); fprintf(SP_0_3, "%llu %llu\n", input_n*input_n+1, ia_data[input_n*input_n*3]-ia_data[input_n*input_n*2]); fprintf(SP_0_4, "%llu %llu\n", input_n*input_n+1, ia_data[input_n*input_n*4]-ia_data[input_n*input_n*3]); for (i=0; i<=input_n*input_n; i++) fprintf(SP_0_1, "%llu ", ia_data); fprintf(SP_0_1, "\n"); temp_1 = ia_data[input_n*input_n]-1; for (i=input_n*input_n; i<=input_n*input_n*2; i++) fprintf(SP_0_2, "%llu ", ia_data-temp_1); fprintf(SP_0_2, "\n"); temp_1 = ia_data[input_n*input_n*2]-1; for (i=input_n*input_n*2; i<=input_n*input_n*3; i++) fprintf(SP_0_3, "%llu ", ia_data-temp_1); fprintf(SP_0_3, "\n"); temp_1 = ia_data[input_n*input_n*3]-1; for (i=input_n*input_n*3; i<=input_n*input_n*4; i++) fprintf(SP_0_4, "%llu ", ia_data-temp_1); fprintf(SP_0_4, "\n"); for (i=ia_data[0]-1; i<ia_data[input_n*input_n]-1; i++) fprintf(SP_0_1, "%llu ", ja_data); fprintf(SP_0_1, "\n"); for (i=ia_data[input_n*input_n]-1; i<ia_data[input_n*input_n*2]-1; i++) fprintf(SP_0_2, "%llu ", ja_data); fprintf(SP_0_2, "\n"); for (i=ia_data[input_n*input_n*2]-1; i<ia_data[input_n*input_n*3]-1; i++) fprintf(SP_0_3, "%llu ", ja_data); fprintf(SP_0_3, "\n"); for (i=ia_data[input_n*input_n*3]-1; i<ia_data[input_n*input_n*4]-1; i++) fprintf(SP_0_4, "%llu ", ja_data); fprintf(SP_0_4, "\n"); for (i=ia_data[0]-1; i<ia_data[input_n*input_n]-1; i++) fprintf(SP_0_1, "%lf ", a_data); fprintf(SP_0_1, "\n"); for (i=ia_data[input_n*input_n]-1; i<ia_data[input_n*input_n*2]-1; i++) fprintf(SP_0_2, "%lf ", a_data); fprintf(SP_0_2, "\n"); for (i=ia_data[input_n*input_n*2]-1; i<ia_data[input_n*input_n*3]-1; i++) fprintf(SP_0_3, "%lf ", a_data); fprintf(SP_0_3, "\n"); for (i=ia_data[input_n*input_n*3]-1; i<ia_data[input_n*input_n*4]-1; i++) fprintf(SP_0_4, "%lf ", a_data); fprintf(SP_0_4, "\n"); //******************************************** a_data[division_index[1]] = a_data[division_index[1]]/2.0; a_data[division_index[2]] = a_data[division_index[2]]/2.0; a_data[division_index[3]] = a_data[division_index[3]]/2.0; SP_1 = fopen("SPM_test_20000_1.txt", "w"); fprintf(SP_1, "%llu %llu %llu %llu\n", spRC+1, division_index[1]-division_index[0]+1, 1, spRC); for (i=0; i<=spRC; i++) fprintf(SP_1, "%llu ", ia_data); //fprintf(SP_1, "%llu ", ia_data); fprintf(SP_1, "\n"); for (i=division_index[0]; i<=division_index[1]; i++) fprintf(SP_1, "%llu ", ja_data); fprintf(SP_1, "\n"); for (i=division_index[0]; i<=division_index[1]; i++) fprintf(SP_1, "%lf ", a_data); fprintf(SP_1, "\n"); SP_2 = fopen("SPM_test_20000_2.txt", "w"); fprintf(SP_2, "%llu %llu %llu %llu\n", spRC+2, division_index[2]-division_index[1]+1, spRC, spRC*2); for (i=spRC-1; i<=spRC*2; i++) fprintf(SP_2, "%llu ", ia_data-ia_data[spRC-1]+1); //fprintf(SP_2, "%llu ", ia_data-ia_data[spRC-1]+1); fprintf(SP_2, "\n"); for (i=division_index[1]; i<=division_index[2]; i++) fprintf(SP_2, "%llu ", ja_data); fprintf(SP_2, "\n"); for (i=division_index[1]; i<=division_index[2]; i++) fprintf(SP_2, "%lf ", a_data); fprintf(SP_2, "\n"); SP_3 = fopen("SPM_test_20000_3.txt", "w"); fprintf(SP_3, "%llu %llu %llu %llu\n", spRC+2, division_index[3]-division_index[2]+1, spRC*2, spRC*3); for (i=spRC*2-1; i<=spRC*3; i++) fprintf(SP_3, "%llu ", ia_data-ia_data[spRC*2-1]+1); //fprintf(SP_3, "%llu ", ia_data-ia_data[spRC*2-1]+1); fprintf(SP_3, "\n"); for (i=division_index[2]; i<=division_index[3]; i++) fprintf(SP_3, "%llu ", ja_data); fprintf(SP_3, "\n"); for (i=division_index[2]; i<=division_index[3]; i++) fprintf(SP_3, "%lf ", a_data); fprintf(SP_3, "\n"); SP_4 = fopen("SPM_test_20000_4.txt", "w"); fprintf(SP_4, "%llu %llu %llu %llu\n", spRC+2, division_index[4]-division_index[3]+1, spRC*3, spRC*4); for (i=spRC*3-1; i<=spRC*4; i++) fprintf(SP_4, "%llu ", ia_data-ia_data[spRC*3-1]+1); //fprintf(SP_4, "%llu ", ia_data-ia_data[spRC*3-1]+1); fprintf(SP_4, "\n"); for (i=division_index[3]; i<=division_index[4]; i++) fprintf(SP_4, "%llu ", ja_data); fprintf(SP_4, "\n"); for (i=division_index[3]; i<=division_index[4]; i++) fprintf(SP_4, "%lf ", a_data); fprintf(SP_4, "\n"); //******************************************** fclose(SP_0); fclose(SP_1); fclose(SP_2); fclose(SP_3); fclose(SP_4); fclose(SP_0_1); fclose(SP_0_2); fclose(SP_0_3); fclose(SP_0_4); free(ia_data); free(ja_data); free(a_data); free(division_index); }
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page