template_lapack_stein.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_STEIN_HEADER
00036 #define TEMPLATE_LAPACK_STEIN_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e, 
00041         const integer *m, const Treal *w, const integer *iblock, const integer *isplit, 
00042         Treal *z__, const integer *ldz, Treal *work, integer *iwork, 
00043         integer *ifail, integer *info)
00044 {
00045 /*  -- LAPACK routine (version 3.0) --   
00046        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00047        Courant Institute, Argonne National Lab, and Rice University   
00048        September 30, 1994   
00049 
00050 
00051     Purpose   
00052     =======   
00053 
00054     DSTEIN computes the eigenvectors of a real symmetric tridiagonal   
00055     matrix T corresponding to specified eigenvalues, using inverse   
00056     iteration.   
00057 
00058     The maximum number of iterations allowed for each eigenvector is   
00059     specified by an internal parameter MAXITS (currently set to 5).   
00060 
00061     Arguments   
00062     =========   
00063 
00064     N       (input) INTEGER   
00065             The order of the matrix.  N >= 0.   
00066 
00067     D       (input) DOUBLE PRECISION array, dimension (N)   
00068             The n diagonal elements of the tridiagonal matrix T.   
00069 
00070     E       (input) DOUBLE PRECISION array, dimension (N)   
00071             The (n-1) subdiagonal elements of the tridiagonal matrix   
00072             T, in elements 1 to N-1.  E(N) need not be set.   
00073 
00074     M       (input) INTEGER   
00075             The number of eigenvectors to be found.  0 <= M <= N.   
00076 
00077     W       (input) DOUBLE PRECISION array, dimension (N)   
00078             The first M elements of W contain the eigenvalues for   
00079             which eigenvectors are to be computed.  The eigenvalues   
00080             should be grouped by split-off block and ordered from   
00081             smallest to largest within the block.  ( The output array   
00082             W from DSTEBZ with ORDER = 'B' is expected here. )   
00083 
00084     IBLOCK  (input) INTEGER array, dimension (N)   
00085             The submatrix indices associated with the corresponding   
00086             eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to   
00087             the first submatrix from the top, =2 if W(i) belongs to   
00088             the second submatrix, etc.  ( The output array IBLOCK   
00089             from DSTEBZ is expected here. )   
00090 
00091     ISPLIT  (input) INTEGER array, dimension (N)   
00092             The splitting points, at which T breaks up into submatrices.   
00093             The first submatrix consists of rows/columns 1 to   
00094             ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1   
00095             through ISPLIT( 2 ), etc.   
00096             ( The output array ISPLIT from DSTEBZ is expected here. )   
00097 
00098     Z       (output) DOUBLE PRECISION array, dimension (LDZ, M)   
00099             The computed eigenvectors.  The eigenvector associated   
00100             with the eigenvalue W(i) is stored in the i-th column of   
00101             Z.  Any vector which fails to converge is set to its current   
00102             iterate after MAXITS iterations.   
00103 
00104     LDZ     (input) INTEGER   
00105             The leading dimension of the array Z.  LDZ >= max(1,N).   
00106 
00107     WORK    (workspace) DOUBLE PRECISION array, dimension (5*N)   
00108 
00109     IWORK   (workspace) INTEGER array, dimension (N)   
00110 
00111     IFAIL   (output) INTEGER array, dimension (M)   
00112             On normal exit, all elements of IFAIL are zero.   
00113             If one or more eigenvectors fail to converge after   
00114             MAXITS iterations, then their indices are stored in   
00115             array IFAIL.   
00116 
00117     INFO    (output) INTEGER   
00118             = 0: successful exit.   
00119             < 0: if INFO = -i, the i-th argument had an illegal value   
00120             > 0: if INFO = i, then i eigenvectors failed to converge   
00121                  in MAXITS iterations.  Their indices are stored in   
00122                  array IFAIL.   
00123 
00124     Internal Parameters   
00125     ===================   
00126 
00127     MAXITS  INTEGER, default = 5   
00128             The maximum number of iterations performed.   
00129 
00130     EXTRA   INTEGER, default = 2   
00131             The number of iterations performed after norm growth   
00132             criterion is satisfied, should be at least 1.   
00133 
00134     =====================================================================   
00135 
00136 
00137        Test the input parameters.   
00138 
00139        Parameter adjustments */
00140     /* Table of constant values */
00141      integer c__2 = 2;
00142      integer c__1 = 1;
00143      integer c_n1 = -1;
00144     
00145     /* System generated locals */
00146     integer z_dim1, z_offset, i__1, i__2, i__3;
00147     Treal d__1, d__2, d__3, d__4, d__5;
00148     /* Local variables */
00149      integer jblk, nblk;
00150      integer jmax;
00151      integer i__, j;
00152      integer iseed[4], gpind, iinfo;
00153      integer b1;
00154      integer j1;
00155      Treal ortol;
00156      integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
00157      Treal xj;
00158      integer nrmchk;
00159      integer blksiz;
00160      Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol;
00161      integer its;
00162      Treal xjm, ztr, eps1;
00163 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
00164 
00165 
00166     --d__;
00167     --e;
00168     --w;
00169     --iblock;
00170     --isplit;
00171     z_dim1 = *ldz;
00172     z_offset = 1 + z_dim1 * 1;
00173     z__ -= z_offset;
00174     --work;
00175     --iwork;
00176     --ifail;
00177 
00178     /* Initialization added by Elias to get rid of compiler warnings. */
00179     ortol = dtpcrt = xjm = onenrm = gpind = 0;
00180     /* Function Body */
00181     *info = 0;
00182     i__1 = *m;
00183     for (i__ = 1; i__ <= i__1; ++i__) {
00184         ifail[i__] = 0;
00185 /* L10: */
00186     }
00187 
00188     if (*n < 0) {
00189         *info = -1;
00190     } else if (*m < 0 || *m > *n) {
00191         *info = -4;
00192     } else if (*ldz < maxMACRO(1,*n)) {
00193         *info = -9;
00194     } else {
00195         i__1 = *m;
00196         for (j = 2; j <= i__1; ++j) {
00197             if (iblock[j] < iblock[j - 1]) {
00198                 *info = -6;
00199                 goto L30;
00200             }
00201             if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
00202                 *info = -5;
00203                 goto L30;
00204             }
00205 /* L20: */
00206         }
00207 L30:
00208         ;
00209     }
00210 
00211     if (*info != 0) {
00212         i__1 = -(*info);
00213         template_blas_erbla("STEIN ", &i__1);
00214         return 0;
00215     }
00216 
00217 /*     Quick return if possible */
00218 
00219     if (*n == 0 || *m == 0) {
00220         return 0;
00221     } else if (*n == 1) {
00222         z___ref(1, 1) = 1.;
00223         return 0;
00224     }
00225 
00226 /*     Get machine constants. */
00227 
00228     eps = template_lapack_lamch("Precision", (Treal)0);
00229 
00230 /*     Initialize seed for random number generator DLARNV. */
00231 
00232     for (i__ = 1; i__ <= 4; ++i__) {
00233         iseed[i__ - 1] = 1;
00234 /* L40: */
00235     }
00236 
00237 /*     Initialize pointers. */
00238 
00239     indrv1 = 0;
00240     indrv2 = indrv1 + *n;
00241     indrv3 = indrv2 + *n;
00242     indrv4 = indrv3 + *n;
00243     indrv5 = indrv4 + *n;
00244 
00245 /*     Compute eigenvectors of matrix blocks. */
00246 
00247     j1 = 1;
00248     i__1 = iblock[*m];
00249     for (nblk = 1; nblk <= i__1; ++nblk) {
00250 
00251 /*        Find starting and ending indices of block nblk. */
00252 
00253         if (nblk == 1) {
00254             b1 = 1;
00255         } else {
00256             b1 = isplit[nblk - 1] + 1;
00257         }
00258         bn = isplit[nblk];
00259         blksiz = bn - b1 + 1;
00260         if (blksiz == 1) {
00261             goto L60;
00262         }
00263         gpind = b1;
00264 
00265 /*        Compute reorthogonalization criterion and stopping criterion. */
00266 
00267         onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2));
00268 /* Computing MAX */
00269         d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1],
00270                  absMACRO(d__2));
00271         onenrm = maxMACRO(d__3,d__4);
00272         i__2 = bn - 1;
00273         for (i__ = b1 + 1; i__ <= i__2; ++i__) {
00274 /* Computing MAX */
00275             d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[
00276                     i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3));
00277             onenrm = maxMACRO(d__4,d__5);
00278 /* L50: */
00279         }
00280         ortol = onenrm * .001;
00281 
00282         dtpcrt = template_blas_sqrt(.1 / blksiz);
00283 
00284 /*        Loop through eigenvalues of block nblk. */
00285 
00286 L60:
00287         jblk = 0;
00288         i__2 = *m;
00289         for (j = j1; j <= i__2; ++j) {
00290             if (iblock[j] != nblk) {
00291                 j1 = j;
00292                 goto L160;
00293             }
00294             ++jblk;
00295             xj = w[j];
00296 
00297 /*           Skip all the work if the block size is one. */
00298 
00299             if (blksiz == 1) {
00300                 work[indrv1 + 1] = 1.;
00301                 goto L120;
00302             }
00303 
00304 /*           If eigenvalues j and j-1 are too close, add a relatively   
00305              small perturbation. */
00306 
00307             if (jblk > 1) {
00308                 eps1 = (d__1 = eps * xj, absMACRO(d__1));
00309                 pertol = eps1 * 10.;
00310                 sep = xj - xjm;
00311                 if (sep < pertol) {
00312                     xj = xjm + pertol;
00313                 }
00314             }
00315 
00316             its = 0;
00317             nrmchk = 0;
00318 
00319 /*           Get random starting vector. */
00320 
00321             template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
00322 
00323 /*           Copy the matrix T so it won't be destroyed in factorization. */
00324 
00325             template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
00326             i__3 = blksiz - 1;
00327             template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
00328             i__3 = blksiz - 1;
00329             template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
00330 
00331 /*           Compute LU factors with partial pivoting  ( PT = LU ) */
00332 
00333             tol = 0.;
00334             template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
00335                     indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
00336 
00337 /*           Update iteration count. */
00338 
00339 L70:
00340             ++its;
00341             if (its > 5) {
00342                 goto L100;
00343             }
00344 
00345 /*           Normalize and scale the righthand side vector Pb.   
00346 
00347    Computing MAX */
00348             d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1));
00349             scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[
00350                     indrv1 + 1], &c__1);
00351             template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
00352 
00353 /*           Solve the system LU = Pb. */
00354 
00355             template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
00356                     work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
00357                     indrv1 + 1], &tol, &iinfo);
00358 
00359 /*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are   
00360              close enough. */
00361 
00362             if (jblk == 1) {
00363                 goto L90;
00364             }
00365             if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) {
00366                 gpind = j;
00367             }
00368             if (gpind != j) {
00369                 i__3 = j - 1;
00370                 for (i__ = gpind; i__ <= i__3; ++i__) {
00371                     ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(
00372                             b1, i__), &c__1);
00373                     template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[
00374                             indrv1 + 1], &c__1);
00375 /* L80: */
00376                 }
00377             }
00378 
00379 /*           Check the infinity norm of the iterate. */
00380 
00381 L90:
00382             jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
00383             nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1));
00384 
00385 /*           Continue for additional iterations after norm reaches   
00386              stopping criterion. */
00387 
00388             if (nrm < dtpcrt) {
00389                 goto L70;
00390             }
00391             ++nrmchk;
00392             if (nrmchk < 3) {
00393                 goto L70;
00394             }
00395 
00396             goto L110;
00397 
00398 /*           If stopping criterion was not satisfied, update info and   
00399              store eigenvector number in array ifail. */
00400 
00401 L100:
00402             ++(*info);
00403             ifail[*info] = j;
00404 
00405 /*           Accept iterate as jth eigenvector. */
00406 
00407 L110:
00408             scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1);
00409             jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
00410             if (work[indrv1 + jmax] < 0.) {
00411                 scl = -scl;
00412             }
00413             template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
00414 L120:
00415             i__3 = *n;
00416             for (i__ = 1; i__ <= i__3; ++i__) {
00417                 z___ref(i__, j) = 0.;
00418 /* L130: */
00419             }
00420             i__3 = blksiz;
00421             for (i__ = 1; i__ <= i__3; ++i__) {
00422                 z___ref(b1 + i__ - 1, j) = work[indrv1 + i__];
00423 /* L140: */
00424             }
00425 
00426 /*           Save the shift to check eigenvalue spacing at next   
00427              iteration. */
00428 
00429             xjm = xj;
00430 
00431 /* L150: */
00432         }
00433 L160:
00434         ;
00435     }
00436 
00437     return 0;
00438 
00439 /*     End of DSTEIN */
00440 
00441 } /* dstein_ */
00442 
00443 #undef z___ref
00444 
00445 
00446 #endif

Generated on Mon Sep 17 14:32:56 2012 for ergo by  doxygen 1.4.7