00001 00011 #ifndef __SUPERLU_UTIL /* allow multiple inclusions */ 00012 #define __SUPERLU_UTIL 00013 00014 #include <stdio.h> 00015 #include <stdlib.h> 00016 #include <string.h> 00017 /* 00018 #ifndef __STDC__ 00019 #include <malloc.h> 00020 #endif 00021 */ 00022 #include <assert.h> 00023 #include "superlu_enum_consts.h" 00024 00025 /*********************************************************************** 00026 * Macros 00027 ***********************************************************************/ 00028 #define FIRSTCOL_OF_SNODE(i) (xsup[i]) 00029 /* No of marker arrays used in the symbolic factorization, 00030 each of size n */ 00031 #define NO_MARKER 3 00032 #define NUM_TEMPV(m,w,t,b) ( SUPERLU_MAX(m, (t + b)*w) ) 00033 00034 #ifndef USER_ABORT 00035 #define USER_ABORT(msg) superlu_abort_and_exit(msg) 00036 #endif 00037 00038 #define ABORT(err_msg) \ 00039 { char msg[256];\ 00040 sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\ 00041 USER_ABORT(msg); } 00042 00043 00044 #ifndef USER_MALLOC 00045 #if 1 00046 #define USER_MALLOC(size) superlu_malloc(size) 00047 #else 00048 /* The following may check out some uninitialized data */ 00049 #define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size) 00050 #endif 00051 #endif 00052 00053 #define SUPERLU_MALLOC(size) USER_MALLOC(size) 00054 00055 #ifndef USER_FREE 00056 #define USER_FREE(addr) superlu_free(addr) 00057 #endif 00058 00059 #define SUPERLU_FREE(addr) USER_FREE(addr) 00060 00061 #define CHECK_MALLOC(where) { \ 00062 extern int superlu_malloc_total; \ 00063 printf("%s: malloc_total %d Bytes\n", \ 00064 where, superlu_malloc_total); \ 00065 } 00066 00067 #define SUPERLU_MAX(x, y) ( (x) > (y) ? (x) : (y) ) 00068 #define SUPERLU_MIN(x, y) ( (x) < (y) ? (x) : (y) ) 00069 00070 /********************************************************* 00071 * Macros used for easy access of sparse matrix entries. * 00072 *********************************************************/ 00073 #define L_SUB_START(col) ( Lstore->rowind_colptr[col] ) 00074 #define L_SUB(ptr) ( Lstore->rowind[ptr] ) 00075 #define L_NZ_START(col) ( Lstore->nzval_colptr[col] ) 00076 #define L_FST_SUPC(superno) ( Lstore->sup_to_col[superno] ) 00077 #define U_NZ_START(col) ( Ustore->colptr[col] ) 00078 #define U_SUB(ptr) ( Ustore->rowind[ptr] ) 00079 00080 00081 /*********************************************************************** 00082 * Constants 00083 ***********************************************************************/ 00084 #define EMPTY (-1) 00085 /*#define NO (-1)*/ 00086 #define FALSE 0 00087 #define TRUE 1 00088 00089 #define NO_MEMTYPE 4 /* 0: lusup; 00090 1: ucol; 00091 2: lsub; 00092 3: usub */ 00093 00094 #define GluIntArray(n) (5 * (n) + 5) 00095 00096 /* Dropping rules */ 00097 #define NODROP ( 0x0000 ) 00098 #define DROP_BASIC ( 0x0001 ) /* ILU(tau) */ 00099 #define DROP_PROWS ( 0x0002 ) /* ILUTP: keep p maximum rows */ 00100 #define DROP_COLUMN ( 0x0004 ) /* ILUTP: for j-th column, 00101 p = gamma * nnz(A(:,j)) */ 00102 #define DROP_AREA ( 0x0008 ) /* ILUTP: for j-th column, use 00103 nnz(F(:,1:j)) / nnz(A(:,1:j)) 00104 to limit memory growth */ 00105 #define DROP_SECONDARY ( 0x000E ) /* PROWS | COLUMN | AREA */ 00106 #define DROP_DYNAMIC ( 0x0010 ) /* adaptive tau */ 00107 #define DROP_INTERP ( 0x0100 ) /* use interpolation */ 00108 00109 00110 #if 1 00111 #define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */ 00112 #else 00113 #define MILU_ALPHA 1.0 /* multiple of drop_sum to be added to diagonal */ 00114 #endif 00115 00116 00117 /*********************************************************************** 00118 * Type definitions 00119 ***********************************************************************/ 00120 typedef float flops_t; 00121 typedef unsigned char Logical; 00122 00123 /* 00124 *-- This contains the options used to control the solution process. 00125 * 00126 * Fact (fact_t) 00127 * Specifies whether or not the factored form of the matrix 00128 * A is supplied on entry, and if not, how the matrix A should 00129 * be factorizaed. 00130 * = DOFACT: The matrix A will be factorized from scratch, and the 00131 * factors will be stored in L and U. 00132 * = SamePattern: The matrix A will be factorized assuming 00133 * that a factorization of a matrix with the same sparsity 00134 * pattern was performed prior to this one. Therefore, this 00135 * factorization will reuse column permutation vector 00136 * ScalePermstruct->perm_c and the column elimination tree 00137 * LUstruct->etree. 00138 * = SamePattern_SameRowPerm: The matrix A will be factorized 00139 * assuming that a factorization of a matrix with the same 00140 * sparsity pattern and similar numerical values was performed 00141 * prior to this one. Therefore, this factorization will reuse 00142 * both row and column scaling factors R and C, both row and 00143 * column permutation vectors perm_r and perm_c, and the 00144 * L & U data structures set up from the previous factorization. 00145 * = FACTORED: On entry, L, U, perm_r and perm_c contain the 00146 * factored form of A. If DiagScale is not NOEQUIL, the matrix 00147 * A has been equilibrated with scaling factors R and C. 00148 * 00149 * Equil (yes_no_t) 00150 * Specifies whether to equilibrate the system (scale A's row and 00151 * columns to have unit norm). 00152 * 00153 * ColPerm (colperm_t) 00154 * Specifies what type of column permutation to use to reduce fill. 00155 * = NATURAL: use the natural ordering 00156 * = MMD_ATA: use minimum degree ordering on structure of A'*A 00157 * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A 00158 * = COLAMD: use approximate minimum degree column ordering 00159 * = MY_PERMC: use the ordering specified by the user 00160 * 00161 * Trans (trans_t) 00162 * Specifies the form of the system of equations: 00163 * = NOTRANS: A * X = B (No transpose) 00164 * = TRANS: A**T * X = B (Transpose) 00165 * = CONJ: A**H * X = B (Transpose) 00166 * 00167 * IterRefine (IterRefine_t) 00168 * Specifies whether to perform iterative refinement. 00169 * = NO: no iterative refinement 00170 * = SLU_SINGLE: perform iterative refinement in single precision 00171 * = SLU_DOUBLE: perform iterative refinement in double precision 00172 * = SLU_EXTRA: perform iterative refinement in extra precision 00173 * 00174 * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU) 00175 * Specifies the threshold used for a diagonal entry to be an 00176 * acceptable pivot. 00177 * 00178 * SymmetricMode (yest_no_t) 00179 * Specifies whether to use symmetric mode. Symmetric mode gives 00180 * preference to diagonal pivots, and uses an (A'+A)-based column 00181 * permutation algorithm. 00182 * 00183 * PivotGrowth (yes_no_t) 00184 * Specifies whether to compute the reciprocal pivot growth. 00185 * 00186 * ConditionNumber (ues_no_t) 00187 * Specifies whether to compute the reciprocal condition number. 00188 * 00189 * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU) 00190 * Specifies whether to permute rows of the original matrix. 00191 * = NO: not to permute the rows 00192 * = LargeDiag: make the diagonal large relative to the off-diagonal 00193 * = MY_PERMR: use the permutation given by the user 00194 * 00195 * ILU_DropRule (int) 00196 * Specifies the dropping rule: 00197 * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau). 00198 * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n. 00199 * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column, 00200 * p = gamma * nnz(A(:,j)). 00201 * = DROP_AREA: Variation of ILUTP, for j-th column, use 00202 * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory. 00203 * = DROP_DYNAMIC: Modify the threshold tau during factorizaion: 00204 * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma 00205 * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2); 00206 * Otherwise 00207 * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2); 00208 * tau_U(j) uses the similar rule. 00209 * NOTE: the thresholds used by L and U are separate. 00210 * = DROP_INTERP: Compute the second dropping threshold by 00211 * interpolation instead of sorting (default). 00212 * In this case, the actual fill ratio is not 00213 * guaranteed to be smaller than gamma. 00214 * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive. 00215 * ( Default: DROP_BASIC | DROP_AREA ) 00216 * 00217 * ILU_DropTol (double) 00218 * numerical threshold for dropping. 00219 * 00220 * ILU_FillFactor (double) 00221 * Gamma in the secondary dropping. 00222 * 00223 * ILU_Norm (norm_t) 00224 * Specify which norm to use to measure the row size in a 00225 * supernode: infinity-norm, 1-norm, or 2-norm. 00226 * 00227 * ILU_FillTol (double) 00228 * numerical threshold for zero pivot perturbation. 00229 * 00230 * ILU_MILU (milu_t) 00231 * Specifies which version of MILU to use. 00232 * 00233 * ILU_MILU_Dim (double) 00234 * Dimension of the PDE if available. 00235 * 00236 * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST) 00237 * Specifies whether to replace the tiny diagonals by 00238 * sqrt(epsilon)*||A|| during LU factorization. 00239 * 00240 * SolveInitialized (yes_no_t) (only for SuperLU_DIST) 00241 * Specifies whether the initialization has been performed to the 00242 * triangular solve. 00243 * 00244 * RefineInitialized (yes_no_t) (only for SuperLU_DIST) 00245 * Specifies whether the initialization has been performed to the 00246 * sparse matrix-vector multiplication routine needed in iterative 00247 * refinement. 00248 * 00249 * PrintStat (yes_no_t) 00250 * Specifies whether to print the solver's statistics. 00251 */ 00252 typedef struct { 00253 fact_t Fact; 00254 yes_no_t Equil; 00255 colperm_t ColPerm; 00256 trans_t Trans; 00257 IterRefine_t IterRefine; 00258 double DiagPivotThresh; 00259 yes_no_t SymmetricMode; 00260 yes_no_t PivotGrowth; 00261 yes_no_t ConditionNumber; 00262 rowperm_t RowPerm; 00263 int ILU_DropRule; 00264 double ILU_DropTol; /* threshold for dropping */ 00265 double ILU_FillFactor; /* gamma in the secondary dropping */ 00266 norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */ 00267 double ILU_FillTol; /* threshold for zero pivot perturbation */ 00268 milu_t ILU_MILU; 00269 double ILU_MILU_Dim; /* Dimension of PDE (if available) */ 00270 yes_no_t ParSymbFact; 00271 yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */ 00272 yes_no_t SolveInitialized; 00273 yes_no_t RefineInitialized; 00274 yes_no_t PrintStat; 00275 int nnzL, nnzU; /* used to store nnzs for now */ 00276 int num_lookaheads; /* num of levels in look-ahead */ 00277 yes_no_t lookahead_etree; /* use etree computed from the 00278 serial symbolic factorization */ 00279 yes_no_t SymPattern; /* symmetric factorization */ 00280 } superlu_options_t; 00281 00283 typedef struct e_node { 00284 int size; /* length of the memory that has been used */ 00285 void *mem; /* pointer to the new malloc'd store */ 00286 } ExpHeader; 00287 00288 typedef struct { 00289 int size; 00290 int used; 00291 int top1; /* grow upward, relative to &array[0] */ 00292 int top2; /* grow downward */ 00293 void *array; 00294 } LU_stack_t; 00295 00296 typedef struct { 00297 int *panel_histo; /* histogram of panel size distribution */ 00298 double *utime; /* running time at various phases */ 00299 flops_t *ops; /* operation count at various phases */ 00300 int TinyPivots; /* number of tiny pivots */ 00301 int RefineSteps; /* number of iterative refinement steps */ 00302 int expansions; /* number of memory expansions */ 00303 } SuperLUStat_t; 00304 00305 typedef struct { 00306 float for_lu; 00307 float total_needed; 00308 } mem_usage_t; 00309 00310 00311 /*********************************************************************** 00312 * Prototypes 00313 ***********************************************************************/ 00314 #ifdef __cplusplus 00315 extern "C" { 00316 #endif 00317 00318 extern int input_error(char *, int *); 00319 00320 extern void Destroy_SuperMatrix_Store(SuperMatrix *); 00321 extern void Destroy_CompCol_Matrix(SuperMatrix *); 00322 extern void Destroy_CompRow_Matrix(SuperMatrix *); 00323 extern void Destroy_SuperNode_Matrix(SuperMatrix *); 00324 extern void Destroy_CompCol_Permuted(SuperMatrix *); 00325 extern void Destroy_Dense_Matrix(SuperMatrix *); 00326 extern void get_perm_c(int, SuperMatrix *, int *); 00327 extern void set_default_options(superlu_options_t *options); 00328 extern void ilu_set_default_options(superlu_options_t *options); 00329 extern void sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*, 00330 SuperMatrix*); 00331 extern void superlu_abort_and_exit(char*); 00332 extern void *superlu_malloc (size_t); 00333 extern int *intMalloc (int); 00334 extern int *intCalloc (int); 00335 extern void superlu_free (void*); 00336 extern void SetIWork (int, int, int, int *, int **, int **, int **, 00337 int **, int **, int **, int **); 00338 extern int sp_coletree (int *, int *, int *, int, int, int *); 00339 extern void relax_snode (const int, int *, const int, int *, int *); 00340 extern void heap_relax_snode (const int, int *, const int, int *, int *); 00341 extern int mark_relax(int, int *, int *, int *, int *, int *, int *); 00342 extern void ilu_relax_snode (const int, int *, const int, int *, 00343 int *, int *); 00344 extern void ilu_heap_relax_snode (const int, int *, const int, int *, 00345 int *, int*); 00346 extern void resetrep_col (const int, const int *, int *); 00347 extern int spcoletree (int *, int *, int *, int, int, int *); 00348 extern int *TreePostorder (int, int *); 00349 extern double SuperLU_timer_ (); 00350 extern int sp_ienv (int); 00351 extern int lsame_ (char *, char *); 00352 extern int xerbla_ (char *, int *); 00353 extern void ifill (int *, int, int); 00354 extern void snode_profile (int, int *); 00355 extern void super_stats (int, int *); 00356 extern void check_repfnz(int, int, int, int *); 00357 extern void PrintSumm (char *, int, int, int); 00358 extern void StatInit(SuperLUStat_t *); 00359 extern void StatPrint (SuperLUStat_t *); 00360 extern void StatFree(SuperLUStat_t *); 00361 extern void print_panel_seg(int, int, int, int, int *, int *); 00362 extern int print_int_vec(char *,int, int *); 00363 extern int slu_PrintInt10(char *, int, int *); 00364 00365 #ifdef __cplusplus 00366 } 00367 #endif 00368 00369 #endif /* __SUPERLU_UTIL */