template_lapack_larrv.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LARRV_HEADER
00036 #define TEMPLATE_LAPACK_LARRV_HEADER
00037 
00038 template<class Treal>
00039 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, 
00040         Treal *d__, Treal *l, Treal *pivmin, integer *isplit, 
00041         integer *m, integer *dol, integer *dou, Treal *minrgp, 
00042         Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, 
00043          Treal *wgap, integer *iblock, integer *indexw, Treal *gers, 
00044          Treal *z__, const integer *ldz, integer *isuppz, Treal *work, 
00045         integer *iwork, integer *info)
00046 {
00047     /* System generated locals */
00048     integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
00049     Treal d__1, d__2;
00050     logical L__1;
00051 
00052 
00053     /* Local variables */
00054     integer minwsize, i__, j, k, p, q, miniwsize, ii;
00055     Treal gl;
00056     integer im, in;
00057     Treal gu, gap, eps, tau, tol, tmp;
00058     integer zto;
00059     Treal ztz;
00060     integer iend, jblk;
00061     Treal lgap;
00062     integer done;
00063     Treal rgap, left;
00064     integer wend, iter;
00065     Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00066     integer itmp1;
00067     integer indld;
00068     Treal fudge;
00069     integer idone;
00070     Treal sigma;
00071     integer iinfo, iindr;
00072     Treal resid;
00073     logical eskip;
00074     Treal right;
00075     integer nclus, zfrom;
00076     Treal rqtol;
00077     integer iindc1, iindc2;
00078     logical stp2ii;
00079     Treal lambda;
00080     integer ibegin, indeig;
00081     logical needbs;
00082     integer indlld;
00083     Treal sgndef, mingma;
00084     integer oldien, oldncl, wbegin;
00085     Treal spdiam;
00086     integer negcnt;
00087     integer oldcls;
00088     Treal savgap;
00089     integer ndepth;
00090     Treal ssigma;
00091     logical usedbs;
00092     integer iindwk, offset;
00093     Treal gaptol;
00094     integer newcls, oldfst, indwrk, windex, oldlst;
00095     logical usedrq;
00096     integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
00097     Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00098     integer newsiz, zusedu, zusedw;
00099     Treal nrminv, rqcorr;
00100     logical tryrqc;
00101     integer isupmx;
00102 
00103 
00104 /*  -- LAPACK auxiliary routine (version 3.2) -- */
00105 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00106 /*     November 2006 */
00107 
00108 /*     .. Scalar Arguments .. */
00109 /*     .. */
00110 /*     .. Array Arguments .. */
00111 /*     .. */
00112 
00113 /*  Purpose */
00114 /*  ======= */
00115 
00116 /*  DLARRV computes the eigenvectors of the tridiagonal matrix */
00117 /*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
00118 /*  The input eigenvalues should have been computed by DLARRE. */
00119 
00120 /*  Arguments */
00121 /*  ========= */
00122 
00123 /*  N       (input) INTEGER */
00124 /*          The order of the matrix.  N >= 0. */
00125 
00126 /*  VL      (input) DOUBLE PRECISION */
00127 /*  VU      (input) DOUBLE PRECISION */
00128 /*          Lower and upper bounds of the interval that contains the desired */
00129 /*          eigenvalues. VL < VU. Needed to compute gaps on the left or right */
00130 /*          end of the extremal eigenvalues in the desired RANGE. */
00131 
00132 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
00133 /*          On entry, the N diagonal elements of the diagonal matrix D. */
00134 /*          On exit, D may be overwritten. */
00135 
00136 /*  L       (input/output) DOUBLE PRECISION array, dimension (N) */
00137 /*          On entry, the (N-1) subdiagonal elements of the unit */
00138 /*          bidiagonal matrix L are in elements 1 to N-1 of L */
00139 /*          (if the matrix is not splitted.) At the end of each block */
00140 /*          is stored the corresponding shift as given by DLARRE. */
00141 /*          On exit, L is overwritten. */
00142 
00143 /*  PIVMIN  (in) DOUBLE PRECISION */
00144 /*          The minimum pivot allowed in the Sturm sequence. */
00145 
00146 /*  ISPLIT  (input) INTEGER array, dimension (N) */
00147 /*          The splitting points, at which T breaks up into blocks. */
00148 /*          The first block consists of rows/columns 1 to */
00149 /*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
00150 /*          through ISPLIT( 2 ), etc. */
00151 
00152 /*  M       (input) INTEGER */
00153 /*          The total number of input eigenvalues.  0 <= M <= N. */
00154 
00155 /*  DOL     (input) INTEGER */
00156 /*  DOU     (input) INTEGER */
00157 /*          If the user wants to compute only selected eigenvectors from all */
00158 /*          the eigenvalues supplied, he can specify an index range DOL:DOU. */
00159 /*          Or else the setting DOL=1, DOU=M should be applied. */
00160 /*          Note that DOL and DOU refer to the order in which the eigenvalues */
00161 /*          are stored in W. */
00162 /*          If the user wants to compute only selected eigenpairs, then */
00163 /*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
00164 /*          computed eigenvectors. All other columns of Z are set to zero. */
00165 
00166 /*  MINRGP  (input) DOUBLE PRECISION */
00167 
00168 /*  RTOL1   (input) DOUBLE PRECISION */
00169 /*  RTOL2   (input) DOUBLE PRECISION */
00170 /*           Parameters for bisection. */
00171 /*           An interval [LEFT,RIGHT] has converged if */
00172 /*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
00173 
00174 /*  W       (input/output) DOUBLE PRECISION array, dimension (N) */
00175 /*          The first M elements of W contain the APPROXIMATE eigenvalues for */
00176 /*          which eigenvectors are to be computed.  The eigenvalues */
00177 /*          should be grouped by split-off block and ordered from */
00178 /*          smallest to largest within the block ( The output array */
00179 /*          W from DLARRE is expected here ). Furthermore, they are with */
00180 /*          respect to the shift of the corresponding root representation */
00181 /*          for their block. On exit, W holds the eigenvalues of the */
00182 /*          UNshifted matrix. */
00183 
00184 /*  WERR    (input/output) DOUBLE PRECISION array, dimension (N) */
00185 /*          The first M elements contain the semiwidth of the uncertainty */
00186 /*          interval of the corresponding eigenvalue in W */
00187 
00188 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N) */
00189 /*          The separation from the right neighbor eigenvalue in W. */
00190 
00191 /*  IBLOCK  (input) INTEGER array, dimension (N) */
00192 /*          The indices of the blocks (submatrices) associated with the */
00193 /*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
00194 /*          W(i) belongs to the first block from the top, =2 if W(i) */
00195 /*          belongs to the second block, etc. */
00196 
00197 /*  INDEXW  (input) INTEGER array, dimension (N) */
00198 /*          The indices of the eigenvalues within each block (submatrix); */
00199 /*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
00200 /*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
00201 
00202 /*  GERS    (input) DOUBLE PRECISION array, dimension (2*N) */
00203 /*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
00204 /*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
00205 /*          be computed from the original UNshifted matrix. */
00206 
00207 /*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
00208 /*          If INFO = 0, the first M columns of Z contain the */
00209 /*          orthonormal eigenvectors of the matrix T */
00210 /*          corresponding to the input eigenvalues, with the i-th */
00211 /*          column of Z holding the eigenvector associated with W(i). */
00212 /*          Note: the user must ensure that at least max(1,M) columns are */
00213 /*          supplied in the array Z. */
00214 
00215 /*  LDZ     (input) INTEGER */
00216 /*          The leading dimension of the array Z.  LDZ >= 1, and if */
00217 /*          JOBZ = 'V', LDZ >= max(1,N). */
00218 
00219 /*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) ) */
00220 /*          The support of the eigenvectors in Z, i.e., the indices */
00221 /*          indicating the nonzero elements in Z. The I-th eigenvector */
00222 /*          is nonzero only in elements ISUPPZ( 2*I-1 ) through */
00223 /*          ISUPPZ( 2*I ). */
00224 
00225 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (12*N) */
00226 
00227 /*  IWORK   (workspace) INTEGER array, dimension (7*N) */
00228 
00229 /*  INFO    (output) INTEGER */
00230 /*          = 0:  successful exit */
00231 
00232 /*          > 0:  A problem occured in DLARRV. */
00233 /*          < 0:  One of the called subroutines signaled an internal problem. */
00234 /*                Needs inspection of the corresponding parameter IINFO */
00235 /*                for further information. */
00236 
00237 /*          =-1:  Problem in DLARRB when refining a child's eigenvalues. */
00238 /*          =-2:  Problem in DLARRF when computing the RRR of a child. */
00239 /*                When a child is inside a tight cluster, it can be difficult */
00240 /*                to find an RRR. A partial remedy from the user's point of */
00241 /*                view is to make the parameter MINRGP smaller and recompile. */
00242 /*                However, as the orthogonality of the computed vectors is */
00243 /*                proportional to 1/MINRGP, the user should be aware that */
00244 /*                he might be trading in precision when he decreases MINRGP. */
00245 /*          =-3:  Problem in DLARRB when refining a single eigenvalue */
00246 /*                after the Rayleigh correction was rejected. */
00247 /*          = 5:  The Rayleigh Quotient Iteration failed to converge to */
00248 /*                full accuracy in MAXITR steps. */
00249 
00250 /*  Further Details */
00251 /*  =============== */
00252 
00253 /*  Based on contributions by */
00254 /*     Beresford Parlett, University of California, Berkeley, USA */
00255 /*     Jim Demmel, University of California, Berkeley, USA */
00256 /*     Inderjit Dhillon, University of Texas, Austin, USA */
00257 /*     Osni Marques, LBNL/NERSC, USA */
00258 /*     Christof Voemel, University of California, Berkeley, USA */
00259 
00260 /*  ===================================================================== */
00261 
00262 /*     .. Parameters .. */
00263 /*     .. */
00264 /*     .. Local Scalars .. */
00265 /*     .. */
00266 /*     .. External Functions .. */
00267 /*     .. */
00268 /*     .. External Subroutines .. */
00269 /*     .. */
00270 /*     .. Intrinsic Functions .. */
00271 /*     .. */
00272 /*     .. Executable Statements .. */
00273 /*     .. */
00274 /*     The first N entries of WORK are reserved for the eigenvalues */
00275 
00276 /* Table of constant values */
00277 
00278  Treal c_b5 = 0.;
00279  integer c__1 = 1;
00280  integer c__2 = 2;
00281 
00282 
00283     /* Parameter adjustments */
00284     --d__;
00285     --l;
00286     --isplit;
00287     --w;
00288     --werr;
00289     --wgap;
00290     --iblock;
00291     --indexw;
00292     --gers;
00293     z_dim1 = *ldz;
00294     z_offset = 1 + z_dim1;
00295     z__ -= z_offset;
00296     --isuppz;
00297     --work;
00298     --iwork;
00299 
00300     /* Function Body */
00301     indld = *n + 1;
00302     indlld = (*n << 1) + 1;
00303     indwrk = *n * 3 + 1;
00304     minwsize = *n * 12;
00305     i__1 = minwsize;
00306     for (i__ = 1; i__ <= i__1; ++i__) {
00307         work[i__] = 0.;
00308 /* L5: */
00309     }
00310 /*     IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
00311 /*     factorization used to compute the FP vector */
00312     iindr = 0;
00313 /*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
00314 /*     layer and the one above. */
00315     iindc1 = *n;
00316     iindc2 = *n << 1;
00317     iindwk = *n * 3 + 1;
00318     miniwsize = *n * 7;
00319     i__1 = miniwsize;
00320     for (i__ = 1; i__ <= i__1; ++i__) {
00321         iwork[i__] = 0;
00322 /* L10: */
00323     }
00324     zusedl = 1;
00325     if (*dol > 1) {
00326 /*        Set lower bound for use of Z */
00327         zusedl = *dol - 1;
00328     }
00329     zusedu = *m;
00330     if (*dou < *m) {
00331 /*        Set lower bound for use of Z */
00332         zusedu = *dou + 1;
00333     }
00334 /*     The width of the part of Z that is used */
00335     zusedw = zusedu - zusedl + 1;
00336     template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
00337     eps = template_lapack_lamch("Precision",(Treal)0);
00338     rqtol = eps * 2.;
00339 
00340 /*     Set expert flags for standard code. */
00341     tryrqc = TRUE_;
00342     if (*dol == 1 && *dou == *m) {
00343     } else {
00344 /*        Only selected eigenpairs are computed. Since the other evalues */
00345 /*        are not refined by RQ iteration, bisection has to compute to full */
00346 /*        accuracy. */
00347         *rtol1 = eps * 4.;
00348         *rtol2 = eps * 4.;
00349     }
00350 /*     The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
00351 /*     desired eigenvalues. The support of the nonzero eigenvector */
00352 /*     entries is contained in the interval IBEGIN:IEND. */
00353 /*     Remark that if k eigenpairs are desired, then the eigenvectors */
00354 /*     are stored in k contiguous columns of Z. */
00355 /*     DONE is the number of eigenvectors already computed */
00356     done = 0;
00357     ibegin = 1;
00358     wbegin = 1;
00359     i__1 = iblock[*m];
00360     for (jblk = 1; jblk <= i__1; ++jblk) {
00361         iend = isplit[jblk];
00362         sigma = l[iend];
00363 /*        Find the eigenvectors of the submatrix indexed IBEGIN */
00364 /*        through IEND. */
00365         wend = wbegin - 1;
00366 L15:
00367         if (wend < *m) {
00368             if (iblock[wend + 1] == jblk) {
00369                 ++wend;
00370                 goto L15;
00371             }
00372         }
00373         if (wend < wbegin) {
00374             ibegin = iend + 1;
00375             goto L170;
00376         } else if (wend < *dol || wbegin > *dou) {
00377             ibegin = iend + 1;
00378             wbegin = wend + 1;
00379             goto L170;
00380         }
00381 /*        Find local spectral diameter of the block */
00382         gl = gers[(ibegin << 1) - 1];
00383         gu = gers[ibegin * 2];
00384         i__2 = iend;
00385         for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
00386 /* Computing MIN */
00387             d__1 = gers[(i__ << 1) - 1];
00388             gl = minMACRO(d__1,gl);
00389 /* Computing MAX */
00390             d__1 = gers[i__ * 2];
00391             gu = maxMACRO(d__1,gu);
00392 /* L20: */
00393         }
00394         spdiam = gu - gl;
00395 /*        OLDIEN is the last index of the previous block */
00396         oldien = ibegin - 1;
00397 /*        Calculate the size of the current block */
00398         in = iend - ibegin + 1;
00399 /*        The number of eigenvalues in the current block */
00400         im = wend - wbegin + 1;
00401 /*        This is for a 1x1 block */
00402         if (ibegin == iend) {
00403             ++done;
00404             z__[ibegin + wbegin * z_dim1] = 1.;
00405             isuppz[(wbegin << 1) - 1] = ibegin;
00406             isuppz[wbegin * 2] = ibegin;
00407             w[wbegin] += sigma;
00408             work[wbegin] = w[wbegin];
00409             ibegin = iend + 1;
00410             ++wbegin;
00411             goto L170;
00412         }
00413 /*        The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
00414 /*        Note that these can be approximations, in this case, the corresp. */
00415 /*        entries of WERR give the size of the uncertainty interval. */
00416 /*        The eigenvalue approximations will be refined when necessary as */
00417 /*        high relative accuracy is required for the computation of the */
00418 /*        corresponding eigenvectors. */
00419         template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
00420 /*        We store in W the eigenvalue approximations w.r.t. the original */
00421 /*        matrix T. */
00422         i__2 = im;
00423         for (i__ = 1; i__ <= i__2; ++i__) {
00424             w[wbegin + i__ - 1] += sigma;
00425 /* L30: */
00426         }
00427 /*        NDEPTH is the current depth of the representation tree */
00428         ndepth = 0;
00429 /*        PARITY is either 1 or 0 */
00430         parity = 1;
00431 /*        NCLUS is the number of clusters for the next level of the */
00432 /*        representation tree, we start with NCLUS = 1 for the root */
00433         nclus = 1;
00434         iwork[iindc1 + 1] = 1;
00435         iwork[iindc1 + 2] = im;
00436 /*        IDONE is the number of eigenvectors already computed in the current */
00437 /*        block */
00438         idone = 0;
00439 /*        loop while( IDONE.LT.IM ) */
00440 /*        generate the representation tree for the current block and */
00441 /*        compute the eigenvectors */
00442 L40:
00443         if (idone < im) {
00444 /*           This is a crude protection against infinitely deep trees */
00445             if (ndepth > *m) {
00446                 *info = -2;
00447                 return 0;
00448             }
00449 /*           breadth first processing of the current level of the representation */
00450 /*           tree: OLDNCL = number of clusters on current level */
00451             oldncl = nclus;
00452 /*           reset NCLUS to count the number of child clusters */
00453             nclus = 0;
00454 
00455             parity = 1 - parity;
00456             if (parity == 0) {
00457                 oldcls = iindc1;
00458                 newcls = iindc2;
00459             } else {
00460                 oldcls = iindc2;
00461                 newcls = iindc1;
00462             }
00463 /*           Process the clusters on the current level */
00464             i__2 = oldncl;
00465             for (i__ = 1; i__ <= i__2; ++i__) {
00466                 j = oldcls + (i__ << 1);
00467 /*              OLDFST, OLDLST = first, last index of current cluster. */
00468 /*                               cluster indices start with 1 and are relative */
00469 /*                               to WBEGIN when accessing W, WGAP, WERR, Z */
00470                 oldfst = iwork[j - 1];
00471                 oldlst = iwork[j];
00472                 if (ndepth > 0) {
00473 /*                 Retrieve relatively robust representation (RRR) of cluster */
00474 /*                 that has been computed at the previous level */
00475 /*                 The RRR is stored in Z and overwritten once the eigenvectors */
00476 /*                 have been computed or when the cluster is refined */
00477                     if (*dol == 1 && *dou == *m) {
00478 /*                    Get representation from location of the leftmost evalue */
00479 /*                    of the cluster */
00480                         j = wbegin + oldfst - 1;
00481                     } else {
00482                         if (wbegin + oldfst - 1 < *dol) {
00483 /*                       Get representation from the left end of Z array */
00484                             j = *dol - 1;
00485                         } else if (wbegin + oldfst - 1 > *dou) {
00486 /*                       Get representation from the right end of Z array */
00487                             j = *dou;
00488                         } else {
00489                             j = wbegin + oldfst - 1;
00490                         }
00491                     }
00492                     template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
00493 , &c__1);
00494                     i__3 = in - 1;
00495                     template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
00496                             ibegin], &c__1);
00497                     sigma = z__[iend + (j + 1) * z_dim1];
00498 /*                 Set the corresponding entries in Z to zero */
00499                     template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 
00500                             * z_dim1], ldz);
00501                 }
00502 /*              Compute DL and DLL of current RRR */
00503                 i__3 = iend - 1;
00504                 for (j = ibegin; j <= i__3; ++j) {
00505                     tmp = d__[j] * l[j];
00506                     work[indld - 1 + j] = tmp;
00507                     work[indlld - 1 + j] = tmp * l[j];
00508 /* L50: */
00509                 }
00510                 if (ndepth > 0) {
00511 /*                 P and Q are index of the first and last eigenvalue to compute */
00512 /*                 within the current block */
00513                     p = indexw[wbegin - 1 + oldfst];
00514                     q = indexw[wbegin - 1 + oldlst];
00515 /*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
00516 /*                 thru' Q-OFFSET elements of these arrays are to be used. */
00517 /*                  OFFSET = P-OLDFST */
00518                     offset = indexw[wbegin] - 1;
00519 /*                 perform limited bisection (if necessary) to get approximate */
00520 /*                 eigenvalues to the precision needed. */
00521                     template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 
00522                              &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
00523                             wbegin], &werr[wbegin], &work[indwrk], &iwork[
00524                             iindwk], pivmin, &spdiam, &in, &iinfo);
00525                     if (iinfo != 0) {
00526                         *info = -1;
00527                         return 0;
00528                     }
00529 /*                 We also recompute the extremal gaps. W holds all eigenvalues */
00530 /*                 of the unshifted matrix and must be used for computation */
00531 /*                 of WGAP, the entries of WORK might stem from RRRs with */
00532 /*                 different shifts. The gaps from WBEGIN-1+OLDFST to */
00533 /*                 WBEGIN-1+OLDLST are correctly computed in DLARRB. */
00534 /*                 However, we only allow the gaps to become greater since */
00535 /*                 this is what should happen when we decrease WERR */
00536                     if (oldfst > 1) {
00537 /* Computing MAX */
00538                         d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 
00539                                 oldfst - 1] - werr[wbegin + oldfst - 1] - w[
00540                                 wbegin + oldfst - 2] - werr[wbegin + oldfst - 
00541                                 2];
00542                         wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2);
00543                     }
00544                     if (wbegin + oldlst - 1 < wend) {
00545 /* Computing MAX */
00546                         d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 
00547                                 oldlst] - werr[wbegin + oldlst] - w[wbegin + 
00548                                 oldlst - 1] - werr[wbegin + oldlst - 1];
00549                         wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2);
00550                     }
00551 /*                 Each time the eigenvalues in WORK get refined, we store */
00552 /*                 the newly found approximation with all shifts applied in W */
00553                     i__3 = oldlst;
00554                     for (j = oldfst; j <= i__3; ++j) {
00555                         w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
00556 /* L53: */
00557                     }
00558                 }
00559 /*              Process the current node. */
00560                 newfst = oldfst;
00561                 i__3 = oldlst;
00562                 for (j = oldfst; j <= i__3; ++j) {
00563                     if (j == oldlst) {
00564 /*                    we are at the right end of the cluster, this is also the */
00565 /*                    boundary of the child cluster */
00566                         newlst = j;
00567                     } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
00568                             wbegin + j - 1], absMACRO(d__1))) {
00569 /*                    the right relative gap is big enough, the child cluster */
00570 /*                    (NEWFST,..,NEWLST) is well separated from the following */
00571                         newlst = j;
00572                     } else {
00573 /*                    inside a child cluster, the relative gap is not */
00574 /*                    big enough. */
00575                         goto L140;
00576                     }
00577 /*                 Compute size of child cluster found */
00578                     newsiz = newlst - newfst + 1;
00579 /*                 NEWFTT is the place in Z where the new RRR or the computed */
00580 /*                 eigenvector is to be stored */
00581                     if (*dol == 1 && *dou == *m) {
00582 /*                    Store representation at location of the leftmost evalue */
00583 /*                    of the cluster */
00584                         newftt = wbegin + newfst - 1;
00585                     } else {
00586                         if (wbegin + newfst - 1 < *dol) {
00587 /*                       Store representation at the left end of Z array */
00588                             newftt = *dol - 1;
00589                         } else if (wbegin + newfst - 1 > *dou) {
00590 /*                       Store representation at the right end of Z array */
00591                             newftt = *dou;
00592                         } else {
00593                             newftt = wbegin + newfst - 1;
00594                         }
00595                     }
00596                     if (newsiz > 1) {
00597 
00598 /*                    Current child is not a singleton but a cluster. */
00599 /*                    Compute and store new representation of child. */
00600 
00601 
00602 /*                    Compute left and right cluster gap. */
00603 
00604 /*                    LGAP and RGAP are not computed from WORK because */
00605 /*                    the eigenvalue approximations may stem from RRRs */
00606 /*                    different shifts. However, W hold all eigenvalues */
00607 /*                    of the unshifted matrix. Still, the entries in WGAP */
00608 /*                    have to be computed from WORK since the entries */
00609 /*                    in W might be of the same order so that gaps are not */
00610 /*                    exhibited correctly for very close eigenvalues. */
00611                         if (newfst == 1) {
00612 /* Computing MAX */
00613                             d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
00614                             lgap = maxMACRO(d__1,d__2);
00615                         } else {
00616                             lgap = wgap[wbegin + newfst - 2];
00617                         }
00618                         rgap = wgap[wbegin + newlst - 1];
00619 
00620 /*                    Compute left- and rightmost eigenvalue of child */
00621 /*                    to high precision in order to shift as close */
00622 /*                    as possible and obtain as large relative gaps */
00623 /*                    as possible */
00624 
00625                         for (k = 1; k <= 2; ++k) {
00626                             if (k == 1) {
00627                                 p = indexw[wbegin - 1 + newfst];
00628                             } else {
00629                                 p = indexw[wbegin - 1 + newlst];
00630                             }
00631                             offset = indexw[wbegin] - 1;
00632                             template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 
00633                                     - 1], &p, &p, &rqtol, &rqtol, &offset, &
00634                                     work[wbegin], &wgap[wbegin], &werr[wbegin]
00635 , &work[indwrk], &iwork[iindwk], pivmin, &
00636                                     spdiam, &in, &iinfo);
00637 /* L55: */
00638                         }
00639 
00640                         if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 
00641                                 > *dou) {
00642 /*                       if the cluster contains no desired eigenvalues */
00643 /*                       skip the computation of that branch of the rep. tree */
00644 
00645 /*                       We could skip before the refinement of the extremal */
00646 /*                       eigenvalues of the child, but then the representation */
00647 /*                       tree could be different from the one when nothing is */
00648 /*                       skipped. For this reason we skip at this place. */
00649                             idone = idone + newlst - newfst + 1;
00650                             goto L139;
00651                         }
00652 
00653 /*                    Compute RRR of child cluster. */
00654 /*                    Note that the new RRR is stored in Z */
00655 
00656 /*                    DLARRF needs LWORK = 2*N */
00657                         template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld + 
00658                                 ibegin - 1], &newfst, &newlst, &work[wbegin], 
00659                                 &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 
00660                                 &rgap, pivmin, &tau, &z__[ibegin + newftt * 
00661                                 z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 
00662                                  &work[indwrk], &iinfo);
00663                         if (iinfo == 0) {
00664 /*                       a new RRR for the cluster was found by DLARRF */
00665 /*                       update shift and store it */
00666                             ssigma = sigma + tau;
00667                             z__[iend + (newftt + 1) * z_dim1] = ssigma;
00668 /*                       WORK() are the midpoints and WERR() the semi-width */
00669 /*                       Note that the entries in W are unchanged. */
00670                             i__4 = newlst;
00671                             for (k = newfst; k <= i__4; ++k) {
00672                                 fudge = eps * 3. * (d__1 = work[wbegin + k - 
00673                                         1], absMACRO(d__1));
00674                                 work[wbegin + k - 1] -= tau;
00675                                 fudge += eps * 4. * (d__1 = work[wbegin + k - 
00676                                         1], absMACRO(d__1));
00677 /*                          Fudge errors */
00678                                 werr[wbegin + k - 1] += fudge;
00679 /*                          Gaps are not fudged. Provided that WERR is small */
00680 /*                          when eigenvalues are close, a zero gap indicates */
00681 /*                          that a new representation is needed for resolving */
00682 /*                          the cluster. A fudge could lead to a wrong decision */
00683 /*                          of judging eigenvalues 'separated' which in */
00684 /*                          reality are not. This could have a negative impact */
00685 /*                          on the orthogonality of the computed eigenvectors. */
00686 /* L116: */
00687                             }
00688                             ++nclus;
00689                             k = newcls + (nclus << 1);
00690                             iwork[k - 1] = newfst;
00691                             iwork[k] = newlst;
00692                         } else {
00693                             *info = -2;
00694                             return 0;
00695                         }
00696                     } else {
00697 
00698 /*                    Compute eigenvector of singleton */
00699 
00700                         iter = 0;
00701 
00702                         tol = template_blas_log((Treal) in) * 4. * eps;
00703 
00704                         k = newfst;
00705                         windex = wbegin + k - 1;
00706 /* Computing MAX */
00707                         i__4 = windex - 1;
00708                         windmn = maxMACRO(i__4,1);
00709 /* Computing MIN */
00710                         i__4 = windex + 1;
00711                         windpl = minMACRO(i__4,*m);
00712                         lambda = work[windex];
00713                         ++done;
00714 /*                    Check if eigenvector computation is to be skipped */
00715                         if (windex < *dol || windex > *dou) {
00716                             eskip = TRUE_;
00717                             goto L125;
00718                         } else {
00719                             eskip = FALSE_;
00720                         }
00721                         left = work[windex] - werr[windex];
00722                         right = work[windex] + werr[windex];
00723                         indeig = indexw[windex];
00724 /*                    Note that since we compute the eigenpairs for a child, */
00725 /*                    all eigenvalue approximations are w.r.t the same shift. */
00726 /*                    In this case, the entries in WORK should be used for */
00727 /*                    computing the gaps since they exhibit even very small */
00728 /*                    differences in the eigenvalues, as opposed to the */
00729 /*                    entries in W which might "look" the same. */
00730                         if (k == 1) {
00731 /*                       In the case RANGE='I' and with not much initial */
00732 /*                       accuracy in LAMBDA and VL, the formula */
00733 /*                       LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
00734 /*                       can lead to an overestimation of the left gap and */
00735 /*                       thus to inadequately early RQI 'convergence'. */
00736 /*                       Prevent this by forcing a small left gap. */
00737 /* Computing MAX */
00738                             d__1 = absMACRO(left), d__2 = absMACRO(right);
00739                             lgap = eps * maxMACRO(d__1,d__2);
00740                         } else {
00741                             lgap = wgap[windmn];
00742                         }
00743                         if (k == im) {
00744 /*                       In the case RANGE='I' and with not much initial */
00745 /*                       accuracy in LAMBDA and VU, the formula */
00746 /*                       can lead to an overestimation of the right gap and */
00747 /*                       thus to inadequately early RQI 'convergence'. */
00748 /*                       Prevent this by forcing a small right gap. */
00749 /* Computing MAX */
00750                             d__1 = absMACRO(left), d__2 = absMACRO(right);
00751                             rgap = eps * maxMACRO(d__1,d__2);
00752                         } else {
00753                             rgap = wgap[windex];
00754                         }
00755                         gap = minMACRO(lgap,rgap);
00756                         if (k == 1 || k == im) {
00757 /*                       The eigenvector support can become wrong */
00758 /*                       because significant entries could be cut off due to a */
00759 /*                       large GAPTOL parameter in LAR1V. Prevent this. */
00760                             gaptol = 0.;
00761                         } else {
00762                             gaptol = gap * eps;
00763                         }
00764                         isupmn = in;
00765                         isupmx = 1;
00766 /*                    Update WGAP so that it holds the minimum gap */
00767 /*                    to the left or the right. This is crucial in the */
00768 /*                    case where bisection is used to ensure that the */
00769 /*                    eigenvalue is refined up to the required precision. */
00770 /*                    The correct value is restored afterwards. */
00771                         savgap = wgap[windex];
00772                         wgap[windex] = gap;
00773 /*                    We want to use the Rayleigh Quotient Correction */
00774 /*                    as often as possible since it converges quadratically */
00775 /*                    when we are close enough to the desired eigenvalue. */
00776 /*                    However, the Rayleigh Quotient can have the wrong sign */
00777 /*                    and lead us away from the desired eigenvalue. In this */
00778 /*                    case, the best we can do is to use bisection. */
00779                         usedbs = FALSE_;
00780                         usedrq = FALSE_;
00781 /*                    Bisection is initially turned off unless it is forced */
00782                         needbs = ! tryrqc;
00783 L120:
00784 /*                    Check if bisection should be used to refine eigenvalue */
00785                         if (needbs) {
00786 /*                       Take the bisection as new iterate */
00787                             usedbs = TRUE_;
00788                             itmp1 = iwork[iindr + windex];
00789                             offset = indexw[wbegin] - 1;
00790                             d__1 = eps * 2.;
00791                             template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 
00792                                     - 1], &indeig, &indeig, &c_b5, &d__1, &
00793                                     offset, &work[wbegin], &wgap[wbegin], &
00794                                     werr[wbegin], &work[indwrk], &iwork[
00795                                     iindwk], pivmin, &spdiam, &itmp1, &iinfo);
00796                             if (iinfo != 0) {
00797                                 *info = -3;
00798                                 return 0;
00799                             }
00800                             lambda = work[windex];
00801 /*                       Reset twist index from inaccurate LAMBDA to */
00802 /*                       force computation of true MINGMA */
00803                             iwork[iindr + windex] = 0;
00804                         }
00805 /*                    Given LAMBDA, compute the eigenvector. */
00806                         L__1 = ! usedbs;
00807                         template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
00808                                 ibegin], &work[indld + ibegin - 1], &work[
00809                                 indlld + ibegin - 1], pivmin, &gaptol, &z__[
00810                                 ibegin + windex * z_dim1], &L__1, &negcnt, &
00811                                 ztz, &mingma, &iwork[iindr + windex], &isuppz[
00812                                 (windex << 1) - 1], &nrminv, &resid, &rqcorr, 
00813                                 &work[indwrk]);
00814                         if (iter == 0) {
00815                             bstres = resid;
00816                             bstw = lambda;
00817                         } else if (resid < bstres) {
00818                             bstres = resid;
00819                             bstw = lambda;
00820                         }
00821 /* Computing MIN */
00822                         i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
00823                         isupmn = minMACRO(i__4,i__5);
00824 /* Computing MAX */
00825                         i__4 = isupmx, i__5 = isuppz[windex * 2];
00826                         isupmx = maxMACRO(i__4,i__5);
00827                         ++iter;
00828 /*                    sin alpha <= |resid|/gap */
00829 /*                    Note that both the residual and the gap are */
00830 /*                    proportional to the matrix, so ||T|| doesn't play */
00831 /*                    a role in the quotient */
00832 
00833 /*                    Convergence test for Rayleigh-Quotient iteration */
00834 /*                    (omitted when Bisection has been used) */
00835 
00836                         if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO(
00837                                 lambda) && ! usedbs) {
00838 /*                       We need to check that the RQCORR update doesn't */
00839 /*                       move the eigenvalue away from the desired one and */
00840 /*                       towards a neighbor. -> protection with bisection */
00841                             if (indeig <= negcnt) {
00842 /*                          The wanted eigenvalue lies to the left */
00843                                 sgndef = -1.;
00844                             } else {
00845 /*                          The wanted eigenvalue lies to the right */
00846                                 sgndef = 1.;
00847                             }
00848 /*                       We only use the RQCORR if it improves the */
00849 /*                       the iterate reasonably. */
00850                             if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 
00851                                     right && lambda + rqcorr >= left) {
00852                                 usedrq = TRUE_;
00853 /*                          Store new midpoint of bisection interval in WORK */
00854                                 if (sgndef == 1.) {
00855 /*                             The current LAMBDA is on the left of the true */
00856 /*                             eigenvalue */
00857                                     left = lambda;
00858 /*                             We prefer to assume that the error estimate */
00859 /*                             is correct. We could make the interval not */
00860 /*                             as a bracket but to be modified if the RQCORR */
00861 /*                             chooses to. In this case, the RIGHT side should */
00862 /*                             be modified as follows: */
00863 /*                              RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
00864                                 } else {
00865 /*                             The current LAMBDA is on the right of the true */
00866 /*                             eigenvalue */
00867                                     right = lambda;
00868 /*                             See comment about assuming the error estimate is */
00869 /*                             correct above. */
00870 /*                              LEFT = MIN(LEFT, LAMBDA + RQCORR) */
00871                                 }
00872                                 work[windex] = (right + left) * .5;
00873 /*                          Take RQCORR since it has the correct sign and */
00874 /*                          improves the iterate reasonably */
00875                                 lambda += rqcorr;
00876 /*                          Update width of error interval */
00877                                 werr[windex] = (right - left) * .5;
00878                             } else {
00879                                 needbs = TRUE_;
00880                             }
00881                             if (right - left < rqtol * absMACRO(lambda)) {
00882 /*                             The eigenvalue is computed to bisection accuracy */
00883 /*                             compute eigenvector and stop */
00884                                 usedbs = TRUE_;
00885                                 goto L120;
00886                             } else if (iter < 10) {
00887                                 goto L120;
00888                             } else if (iter == 10) {
00889                                 needbs = TRUE_;
00890                                 goto L120;
00891                             } else {
00892                                 *info = 5;
00893                                 return 0;
00894                             }
00895                         } else {
00896                             stp2ii = FALSE_;
00897                             if (usedrq && usedbs && bstres <= resid) {
00898                                 lambda = bstw;
00899                                 stp2ii = TRUE_;
00900                             }
00901                             if (stp2ii) {
00902 /*                          improve error angle by second step */
00903                                 L__1 = ! usedbs;
00904                                 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin]
00905 , &l[ibegin], &work[indld + ibegin - 
00906                                         1], &work[indlld + ibegin - 1], 
00907                                         pivmin, &gaptol, &z__[ibegin + windex 
00908                                         * z_dim1], &L__1, &negcnt, &ztz, &
00909                                         mingma, &iwork[iindr + windex], &
00910                                         isuppz[(windex << 1) - 1], &nrminv, &
00911                                         resid, &rqcorr, &work[indwrk]);
00912                             }
00913                             work[windex] = lambda;
00914                         }
00915 
00916 /*                    Compute FP-vector support w.r.t. whole matrix */
00917 
00918                         isuppz[(windex << 1) - 1] += oldien;
00919                         isuppz[windex * 2] += oldien;
00920                         zfrom = isuppz[(windex << 1) - 1];
00921                         zto = isuppz[windex * 2];
00922                         isupmn += oldien;
00923                         isupmx += oldien;
00924 /*                    Ensure vector is ok if support in the RQI has changed */
00925                         if (isupmn < zfrom) {
00926                             i__4 = zfrom - 1;
00927                             for (ii = isupmn; ii <= i__4; ++ii) {
00928                                 z__[ii + windex * z_dim1] = 0.;
00929 /* L122: */
00930                             }
00931                         }
00932                         if (isupmx > zto) {
00933                             i__4 = isupmx;
00934                             for (ii = zto + 1; ii <= i__4; ++ii) {
00935                                 z__[ii + windex * z_dim1] = 0.;
00936 /* L123: */
00937                             }
00938                         }
00939                         i__4 = zto - zfrom + 1;
00940                         template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 
00941                                 &c__1);
00942 L125:
00943 /*                    Update W */
00944                         w[windex] = lambda + sigma;
00945 /*                    Recompute the gaps on the left and right */
00946 /*                    But only allow them to become larger and not */
00947 /*                    smaller (which can only happen through "bad" */
00948 /*                    cancellation and doesn't reflect the theory */
00949 /*                    where the initial gaps are underestimated due */
00950 /*                    to WERR being too crude.) */
00951                         if (! eskip) {
00952                             if (k > 1) {
00953 /* Computing MAX */
00954                                 d__1 = wgap[windmn], d__2 = w[windex] - werr[
00955                                         windex] - w[windmn] - werr[windmn];
00956                                 wgap[windmn] = maxMACRO(d__1,d__2);
00957                             }
00958                             if (windex < wend) {
00959 /* Computing MAX */
00960                                 d__1 = savgap, d__2 = w[windpl] - werr[windpl]
00961                                          - w[windex] - werr[windex];
00962                                 wgap[windex] = maxMACRO(d__1,d__2);
00963                             }
00964                         }
00965                         ++idone;
00966                     }
00967 /*                 here ends the code for the current child */
00968 
00969 L139:
00970 /*                 Proceed to any remaining child nodes */
00971                     newfst = j + 1;
00972 L140:
00973                     ;
00974                 }
00975 /* L150: */
00976             }
00977             ++ndepth;
00978             goto L40;
00979         }
00980         ibegin = iend + 1;
00981         wbegin = wend + 1;
00982 L170:
00983         ;
00984     }
00985 
00986     return 0;
00987 
00988 /*     End of DLARRV */
00989 
00990 } /* dlarrv_ */
00991 
00992 #endif

Generated on Wed Nov 21 09:32:01 2012 for ergo by  doxygen 1.4.7