ModFEM: lsd_mkb/lsd_mkb_superlu/superlu_seq/slu

00001 
00011 #ifndef __SUPERLU_UTIL /* allow multiple inclusions */
00012 #define __SUPERLU_UTIL
00013 
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <string.h>
00017 /*
00018 #ifndef __STDC__
00019 #include <malloc.h>
00020 #endif
00021 */
00022 #include <assert.h>
00023 #include "superlu_enum_consts.h"
00024 
00025 /***********************************************************************
00026  * Macros
00027  ***********************************************************************/
00028 #define FIRSTCOL_OF_SNODE(i)    (xsup[i])
00029 /* No of marker arrays used in the symbolic factorization,
00030    each of size n */
00031 #define NO_MARKER     3
00032 #define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
00033 
00034 #ifndef USER_ABORT
00035 #define USER_ABORT(msg) superlu_abort_and_exit(msg)
00036 #endif
00037 
00038 #define ABORT(err_msg) \
00039  { char msg[256];\
00040    sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
00041    USER_ABORT(msg); }
00042 
00043 
00044 #ifndef USER_MALLOC
00045 #if 1
00046 #define USER_MALLOC(size) superlu_malloc(size)
00047 #else
00048 /* The following may check out some uninitialized data */
00049 #define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
00050 #endif
00051 #endif
00052 
00053 #define SUPERLU_MALLOC(size) USER_MALLOC(size)
00054 
00055 #ifndef USER_FREE
00056 #define USER_FREE(addr) superlu_free(addr)
00057 #endif
00058 
00059 #define SUPERLU_FREE(addr) USER_FREE(addr)
00060 
00061 #define CHECK_MALLOC(where) {                 \
00062     extern int superlu_malloc_total;        \
00063     printf("%s: malloc_total %d Bytes\n",     \
00064        where, superlu_malloc_total); \
00065 }
00066 
00067 #define SUPERLU_MAX(x, y)   ( (x) > (y) ? (x) : (y) )
00068 #define SUPERLU_MIN(x, y)   ( (x) < (y) ? (x) : (y) )
00069 
00070 /*********************************************************
00071  * Macros used for easy access of sparse matrix entries. *
00072  *********************************************************/
00073 #define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
00074 #define L_SUB(ptr)           ( Lstore->rowind[ptr] )
00075 #define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
00076 #define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
00077 #define U_NZ_START(col)      ( Ustore->colptr[col] )
00078 #define U_SUB(ptr)           ( Ustore->rowind[ptr] )
00079 
00080 
00081 /***********************************************************************
00082  * Constants 
00083  ***********************************************************************/
00084 #define EMPTY   (-1)
00085 /*#define NO    (-1)*/
00086 #define FALSE   0
00087 #define TRUE    1
00088 
00089 #define NO_MEMTYPE  4      /* 0: lusup;
00090                   1: ucol;
00091                   2: lsub;
00092                   3: usub */
00093 
00094 #define GluIntArray(n)   (5 * (n) + 5)
00095 
00096 /* Dropping rules */
00097 #define  NODROP         ( 0x0000 )
00098 #define  DROP_BASIC ( 0x0001 )  /* ILU(tau) */
00099 #define  DROP_PROWS ( 0x0002 )  /* ILUTP: keep p maximum rows */
00100 #define  DROP_COLUMN    ( 0x0004 )  /* ILUTP: for j-th column, 
00101                               p = gamma * nnz(A(:,j)) */
00102 #define  DROP_AREA  ( 0x0008 )  /* ILUTP: for j-th column, use
00103                           nnz(F(:,1:j)) / nnz(A(:,1:j))
00104                           to limit memory growth  */
00105 #define  DROP_SECONDARY ( 0x000E )  /* PROWS | COLUMN | AREA */
00106 #define  DROP_DYNAMIC   ( 0x0010 )  /* adaptive tau */
00107 #define  DROP_INTERP    ( 0x0100 )  /* use interpolation */
00108 
00109 
00110 #if 1
00111 #define MILU_ALPHA (1.0e-2) /* multiple of drop_sum to be added to diagonal */
00112 #else
00113 #define MILU_ALPHA  1.0 /* multiple of drop_sum to be added to diagonal */
00114 #endif
00115 
00116 
00117 /***********************************************************************
00118  * Type definitions
00119  ***********************************************************************/
00120 typedef float    flops_t;
00121 typedef unsigned char Logical;
00122 
00123 /* 
00124  *-- This contains the options used to control the solution process.
00125  *
00126  * Fact   (fact_t)
00127  *        Specifies whether or not the factored form of the matrix
00128  *        A is supplied on entry, and if not, how the matrix A should
00129  *        be factorizaed.
00130  *        = DOFACT: The matrix A will be factorized from scratch, and the
00131  *             factors will be stored in L and U.
00132  *        = SamePattern: The matrix A will be factorized assuming
00133  *             that a factorization of a matrix with the same sparsity
00134  *             pattern was performed prior to this one. Therefore, this
00135  *             factorization will reuse column permutation vector 
00136  *             ScalePermstruct->perm_c and the column elimination tree
00137  *             LUstruct->etree.
00138  *        = SamePattern_SameRowPerm: The matrix A will be factorized
00139  *             assuming that a factorization of a matrix with the same
00140  *             sparsity pattern and similar numerical values was performed
00141  *             prior to this one. Therefore, this factorization will reuse
00142  *             both row and column scaling factors R and C, both row and
00143  *             column permutation vectors perm_r and perm_c, and the
00144  *             L & U data structures set up from the previous factorization.
00145  *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
00146  *              factored form of A. If DiagScale is not NOEQUIL, the matrix
00147  *              A has been equilibrated with scaling factors R and C.
00148  *
00149  * Equil  (yes_no_t)
00150  *        Specifies whether to equilibrate the system (scale A's row and
00151  *        columns to have unit norm).
00152  *
00153  * ColPerm (colperm_t)
00154  *        Specifies what type of column permutation to use to reduce fill.
00155  *        = NATURAL: use the natural ordering 
00156  *        = MMD_ATA: use minimum degree ordering on structure of A'*A
00157  *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
00158  *        = COLAMD: use approximate minimum degree column ordering
00159  *        = MY_PERMC: use the ordering specified by the user
00160  *         
00161  * Trans  (trans_t)
00162  *        Specifies the form of the system of equations:
00163  *        = NOTRANS: A * X = B        (No transpose)
00164  *        = TRANS:   A**T * X = B     (Transpose)
00165  *        = CONJ:    A**H * X = B     (Transpose)
00166  *
00167  * IterRefine (IterRefine_t)
00168  *        Specifies whether to perform iterative refinement.
00169  *        = NO: no iterative refinement
00170  *        = SLU_SINGLE: perform iterative refinement in single precision
00171  *        = SLU_DOUBLE: perform iterative refinement in double precision
00172  *        = SLU_EXTRA: perform iterative refinement in extra precision
00173  *
00174  * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
00175  *        Specifies the threshold used for a diagonal entry to be an
00176  *        acceptable pivot.
00177  *
00178  * SymmetricMode (yest_no_t)
00179  *        Specifies whether to use symmetric mode. Symmetric mode gives 
00180  *        preference to diagonal pivots, and uses an (A'+A)-based column
00181  *        permutation algorithm.
00182  *
00183  * PivotGrowth (yes_no_t)
00184  *        Specifies whether to compute the reciprocal pivot growth.
00185  *
00186  * ConditionNumber (ues_no_t)
00187  *        Specifies whether to compute the reciprocal condition number.
00188  *
00189  * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU)
00190  *        Specifies whether to permute rows of the original matrix.
00191  *        = NO: not to permute the rows
00192  *        = LargeDiag: make the diagonal large relative to the off-diagonal
00193  *        = MY_PERMR: use the permutation given by the user
00194  *
00195  * ILU_DropRule (int)
00196  *        Specifies the dropping rule:
00197  *    = DROP_BASIC:   Basic dropping rule, supernodal based ILUTP(tau).
00198  *    = DROP_PROWS:   Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
00199  *    = DROP_COLUMN:  Variant of ILUTP(p,tau), for j-th column,
00200  *                p = gamma * nnz(A(:,j)).
00201  *    = DROP_AREA:    Variation of ILUTP, for j-th column, use
00202  *                nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
00203  *    = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
00204  *            If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
00205  *                tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
00206  *            Otherwise
00207  *                tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
00208  *            tau_U(j) uses the similar rule.
00209  *            NOTE: the thresholds used by L and U are separate.
00210  *    = DROP_INTERP:  Compute the second dropping threshold by
00211  *                    interpolation instead of sorting (default).
00212  *                    In this case, the actual fill ratio is not
00213  *            guaranteed to be smaller than gamma.
00214  *        Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
00215  *    ( Default: DROP_BASIC | DROP_AREA )
00216  *
00217  * ILU_DropTol (double)
00218  *        numerical threshold for dropping.
00219  *
00220  * ILU_FillFactor (double) 
00221  *        Gamma in the secondary dropping.
00222  *
00223  * ILU_Norm (norm_t)
00224  *        Specify which norm to use to measure the row size in a
00225  *        supernode: infinity-norm, 1-norm, or 2-norm.
00226  *
00227  * ILU_FillTol (double)
00228  *        numerical threshold for zero pivot perturbation.
00229  *
00230  * ILU_MILU (milu_t)
00231  *        Specifies which version of MILU to use.
00232  *
00233  * ILU_MILU_Dim (double) 
00234  *        Dimension of the PDE if available.
00235  *
00236  * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
00237  *        Specifies whether to replace the tiny diagonals by
00238  *        sqrt(epsilon)*||A|| during LU factorization.
00239  *
00240  * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
00241  *        Specifies whether the initialization has been performed to the
00242  *        triangular solve.
00243  *
00244  * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
00245  *        Specifies whether the initialization has been performed to the
00246  *        sparse matrix-vector multiplication routine needed in iterative
00247  *        refinement.
00248  *
00249  * PrintStat (yes_no_t)
00250  *        Specifies whether to print the solver's statistics.
00251  */
00252 typedef struct {
00253     fact_t        Fact;
00254     yes_no_t      Equil;
00255     colperm_t     ColPerm;
00256     trans_t       Trans;
00257     IterRefine_t  IterRefine;
00258     double        DiagPivotThresh;
00259     yes_no_t      SymmetricMode;
00260     yes_no_t      PivotGrowth;
00261     yes_no_t      ConditionNumber;
00262     rowperm_t     RowPerm;
00263     int       ILU_DropRule;
00264     double    ILU_DropTol;    /* threshold for dropping */
00265     double    ILU_FillFactor; /* gamma in the secondary dropping */
00266     norm_t    ILU_Norm;       /* infinity-norm, 1-norm, or 2-norm */
00267     double    ILU_FillTol;    /* threshold for zero pivot perturbation */
00268     milu_t    ILU_MILU;
00269     double    ILU_MILU_Dim;   /* Dimension of PDE (if available) */
00270     yes_no_t      ParSymbFact;
00271     yes_no_t      ReplaceTinyPivot; /* used in SuperLU_DIST */
00272     yes_no_t      SolveInitialized;
00273     yes_no_t      RefineInitialized;
00274     yes_no_t      PrintStat;
00275     int           nnzL, nnzU;      /* used to store nnzs for now       */
00276     int           num_lookaheads;  /* num of levels in look-ahead      */
00277     yes_no_t      lookahead_etree; /* use etree computed from the
00278                       serial symbolic factorization */
00279     yes_no_t      SymPattern;      /* symmetric factorization          */
00280 } superlu_options_t;
00281 
00283 typedef struct e_node {
00284     int size;      /* length of the memory that has been used */
00285     void *mem;     /* pointer to the new malloc'd store */
00286 } ExpHeader;
00287 
00288 typedef struct {
00289     int  size;
00290     int  used;
00291     int  top1;  /* grow upward, relative to &array[0] */
00292     int  top2;  /* grow downward */
00293     void *array;
00294 } LU_stack_t;
00295 
00296 typedef struct {
00297     int     *panel_histo; /* histogram of panel size distribution */
00298     double  *utime;       /* running time at various phases */
00299     flops_t *ops;         /* operation count at various phases */
00300     int     TinyPivots;   /* number of tiny pivots */
00301     int     RefineSteps;  /* number of iterative refinement steps */
00302     int     expansions;   /* number of memory expansions */
00303 } SuperLUStat_t;
00304 
00305 typedef struct {
00306     float for_lu;
00307     float total_needed;
00308 } mem_usage_t;
00309 
00310 
00311 /***********************************************************************
00312  * Prototypes
00313  ***********************************************************************/
00314 #ifdef __cplusplus
00315 extern "C" {
00316 #endif
00317 
00318 extern int     input_error(char *, int *);
00319 
00320 extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
00321 extern void    Destroy_CompCol_Matrix(SuperMatrix *);
00322 extern void    Destroy_CompRow_Matrix(SuperMatrix *);
00323 extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
00324 extern void    Destroy_CompCol_Permuted(SuperMatrix *);
00325 extern void    Destroy_Dense_Matrix(SuperMatrix *);
00326 extern void    get_perm_c(int, SuperMatrix *, int *);
00327 extern void    set_default_options(superlu_options_t *options);
00328 extern void    ilu_set_default_options(superlu_options_t *options);
00329 extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
00330                 SuperMatrix*);
00331 extern void    superlu_abort_and_exit(char*);
00332 extern void    *superlu_malloc (size_t);
00333 extern int     *intMalloc (int);
00334 extern int     *intCalloc (int);
00335 extern void    superlu_free (void*);
00336 extern void    SetIWork (int, int, int, int *, int **, int **, int **,
00337                          int **, int **, int **, int **);
00338 extern int     sp_coletree (int *, int *, int *, int, int, int *);
00339 extern void    relax_snode (const int, int *, const int, int *, int *);
00340 extern void    heap_relax_snode (const int, int *, const int, int *, int *);
00341 extern int     mark_relax(int, int *, int *, int *, int *, int *, int *);
00342 extern void    ilu_relax_snode (const int, int *, const int, int *,
00343                 int *, int *);
00344 extern void    ilu_heap_relax_snode (const int, int *, const int, int *,
00345                      int *, int*);
00346 extern void    resetrep_col (const int, const int *, int *);
00347 extern int     spcoletree (int *, int *, int *, int, int, int *);
00348 extern int     *TreePostorder (int, int *);
00349 extern double  SuperLU_timer_ ();
00350 extern int     sp_ienv (int);
00351 extern int     lsame_ (char *, char *);
00352 extern int     xerbla_ (char *, int *);
00353 extern void    ifill (int *, int, int);
00354 extern void    snode_profile (int, int *);
00355 extern void    super_stats (int, int *);
00356 extern void    check_repfnz(int, int, int, int *);
00357 extern void    PrintSumm (char *, int, int, int);
00358 extern void    StatInit(SuperLUStat_t *);
00359 extern void    StatPrint (SuperLUStat_t *);
00360 extern void    StatFree(SuperLUStat_t *);
00361 extern void    print_panel_seg(int, int, int, int, int *, int *);
00362 extern int     print_int_vec(char *,int, int *);
00363 extern int     slu_PrintInt10(char *, int, int *);
00364 
00365 #ifdef __cplusplus
00366   }
00367 #endif
00368 
00369 #endif /* __SUPERLU_UTIL */
lsd_mkb/lsd_mkb_superlu/superlu_seq/slu_util.h