template_lapack_larrf.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LARRF_HEADER
00036 #define TEMPLATE_LAPACK_LARRF_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_larrf(integer *n, Treal *d__, Treal *l, 
00041         Treal *ld, integer *clstrt, integer *clend, Treal *w, 
00042         Treal *wgap, Treal *werr, Treal *spdiam, Treal *
00043         clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, 
00044         Treal *dplus, Treal *lplus, Treal *work, integer *info)
00045 {
00046     /* System generated locals */
00047     integer i__1;
00048     Treal d__1, d__2, d__3;
00049 
00050     /* Local variables */
00051     integer i__;
00052     Treal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, 
00053             znm2, growthbound, fail, fact, oldp;
00054     integer indx;
00055     Treal prod;
00056     integer ktry;
00057     Treal fail2, avgap, ldmax, rdmax;
00058     integer shift;
00059     logical dorrr1;
00060     Treal ldelta;
00061     logical nofail;
00062     Treal mingap, lsigma, rdelta;
00063     logical forcer;
00064     Treal rsigma, clwdth;
00065     logical sawnan1, sawnan2, tryrrr1;
00066 
00067 
00068 /*  -- LAPACK auxiliary routine (version 3.2) -- */
00069 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00070 /*     November 2006 */
00071 /* * */
00072 /*     .. Scalar Arguments .. */
00073 /*     .. */
00074 /*     .. Array Arguments .. */
00075 /*     .. */
00076 
00077 /*  Purpose */
00078 /*  ======= */
00079 
00080 /*  Given the initial representation L D L^T and its cluster of close */
00081 /*  eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
00082 /*  W( CLEND ), DLARRF finds a new relatively robust representation */
00083 /*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
00084 /*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
00085 
00086 /*  Arguments */
00087 /*  ========= */
00088 
00089 /*  N       (input) INTEGER */
00090 /*          The order of the matrix (subblock, if the matrix splitted). */
00091 
00092 /*  D       (input) DOUBLE PRECISION array, dimension (N) */
00093 /*          The N diagonal elements of the diagonal matrix D. */
00094 
00095 /*  L       (input) DOUBLE PRECISION array, dimension (N-1) */
00096 /*          The (N-1) subdiagonal elements of the unit bidiagonal */
00097 /*          matrix L. */
00098 
00099 /*  LD      (input) DOUBLE PRECISION array, dimension (N-1) */
00100 /*          The (N-1) elements L(i)*D(i). */
00101 
00102 /*  CLSTRT  (input) INTEGER */
00103 /*          The index of the first eigenvalue in the cluster. */
00104 
00105 /*  CLEND   (input) INTEGER */
00106 /*          The index of the last eigenvalue in the cluster. */
00107 
00108 /*  W       (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
00109 /*          The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
00110 /*          W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
00111 /*          close eigenalues. */
00112 
00113 /*  WGAP    (input/output) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
00114 /*          The separation from the right neighbor eigenvalue in W. */
00115 
00116 /*  WERR    (input) DOUBLE PRECISION array, dimension >=  (CLEND-CLSTRT+1) */
00117 /*          WERR contain the semiwidth of the uncertainty */
00118 /*          interval of the corresponding eigenvalue APPROXIMATION in W */
00119 
00120 /*  SPDIAM (input) estimate of the spectral diameter obtained from the */
00121 /*          Gerschgorin intervals */
00122 
00123 /*  CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
00124 /*          Set by the calling routine to protect against shifts too close */
00125 /*          to eigenvalues outside the cluster. */
00126 
00127 /*  PIVMIN  (input) DOUBLE PRECISION */
00128 /*          The minimum pivot allowed in the Sturm sequence. */
00129 
00130 /*  SIGMA   (output) DOUBLE PRECISION */
00131 /*          The shift used to form L(+) D(+) L(+)^T. */
00132 
00133 /*  DPLUS   (output) DOUBLE PRECISION array, dimension (N) */
00134 /*          The N diagonal elements of the diagonal matrix D(+). */
00135 
00136 /*  LPLUS   (output) DOUBLE PRECISION array, dimension (N-1) */
00137 /*          The first (N-1) elements of LPLUS contain the subdiagonal */
00138 /*          elements of the unit bidiagonal matrix L(+). */
00139 
00140 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N) */
00141 /*          Workspace. */
00142 
00143 /*  Further Details */
00144 /*  =============== */
00145 
00146 /*  Based on contributions by */
00147 /*     Beresford Parlett, University of California, Berkeley, USA */
00148 /*     Jim Demmel, University of California, Berkeley, USA */
00149 /*     Inderjit Dhillon, University of Texas, Austin, USA */
00150 /*     Osni Marques, LBNL/NERSC, USA */
00151 /*     Christof Voemel, University of California, Berkeley, USA */
00152 
00153 /*  ===================================================================== */
00154 
00155 /*     .. Parameters .. */
00156 /*     .. */
00157 /*     .. Local Scalars .. */
00158 /*     .. */
00159 /*     .. External Functions .. */
00160 /*     .. */
00161 /*     .. External Subroutines .. */
00162 /*     .. */
00163 /*     .. Intrinsic Functions .. */
00164 /*     .. */
00165 /*     .. Executable Statements .. */
00166 
00167 /* Table of constant values */
00168     
00169     integer c__1 = 1;
00170 
00171 
00172 
00173     /* Parameter adjustments */
00174     --work;
00175     --lplus;
00176     --dplus;
00177     --werr;
00178     --wgap;
00179     --w;
00180     --ld;
00181     --l;
00182     --d__;
00183 
00184     /* Initialization added by Elias to get rid of compiler warnings. */
00185     indx = 0;
00186     /* Function Body */
00187     *info = 0;
00188     fact = 2.;
00189     eps = template_lapack_lamch("Precision", (Treal)0);
00190     shift = 0;
00191     forcer = FALSE_;
00192 /*     Note that we cannot guarantee that for any of the shifts tried, */
00193 /*     the factorization has a small or even moderate element growth. */
00194 /*     There could be Ritz values at both ends of the cluster and despite */
00195 /*     backing off, there are examples where all factorizations tried */
00196 /*     (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
00197 /*     element growth. */
00198 /*     For this reason, we should use PIVMIN in this subroutine so that at */
00199 /*     least the L D L^T factorization exists. It can be checked afterwards */
00200 /*     whether the element growth caused bad residuals/orthogonality. */
00201 /*     Decide whether the code should accept the best among all */
00202 /*     representations despite large element growth or signal INFO=1 */
00203     nofail = TRUE_;
00204 
00205 /*     Compute the average gap length of the cluster */
00206     clwdth = (d__1 = w[*clend] - w[*clstrt], absMACRO(d__1)) + werr[*clend] + werr[
00207             *clstrt];
00208     avgap = clwdth / (Treal) (*clend - *clstrt);
00209     mingap = minMACRO(*clgapl,*clgapr);
00210 /*     Initial values for shifts to both ends of cluster */
00211 /* Computing MIN */
00212     d__1 = w[*clstrt], d__2 = w[*clend];
00213     lsigma = minMACRO(d__1,d__2) - werr[*clstrt];
00214 /* Computing MAX */
00215     d__1 = w[*clstrt], d__2 = w[*clend];
00216     rsigma = maxMACRO(d__1,d__2) + werr[*clend];
00217 /*     Use a small fudge to make sure that we really shift to the outside */
00218     lsigma -= absMACRO(lsigma) * 4. * eps;
00219     rsigma += absMACRO(rsigma) * 4. * eps;
00220 /*     Compute upper bounds for how much to back off the initial shifts */
00221     ldmax = mingap * .25 + *pivmin * 2.;
00222     rdmax = mingap * .25 + *pivmin * 2.;
00223 /* Computing MAX */
00224     d__1 = avgap, d__2 = wgap[*clstrt];
00225     ldelta = maxMACRO(d__1,d__2) / fact;
00226 /* Computing MAX */
00227     d__1 = avgap, d__2 = wgap[*clend - 1];
00228     rdelta = maxMACRO(d__1,d__2) / fact;
00229 
00230 /*     Initialize the record of the best representation found */
00231 
00232     s = template_lapack_lamch("S", (Treal)0);
00233     smlgrowth = 1. / s;
00234     fail = (Treal) (*n - 1) * mingap / (*spdiam * eps);
00235     fail2 = (Treal) (*n - 1) * mingap / (*spdiam * template_blas_sqrt(eps));
00236     bestshift = lsigma;
00237 
00238 /*     while (KTRY <= KTRYMAX) */
00239     ktry = 0;
00240     growthbound = *spdiam * 8.;
00241 L5:
00242     sawnan1 = FALSE_;
00243     sawnan2 = FALSE_;
00244 /*     Ensure that we do not back off too much of the initial shifts */
00245     ldelta = minMACRO(ldmax,ldelta);
00246     rdelta = minMACRO(rdmax,rdelta);
00247 /*     Compute the element growth when shifting to both ends of the cluster */
00248 /*     accept the shift if there is no element growth at one of the two ends */
00249 /*     Left end */
00250     s = -lsigma;
00251     dplus[1] = d__[1] + s;
00252     if (absMACRO(dplus[1]) < *pivmin) {
00253         dplus[1] = -(*pivmin);
00254 /*        Need to set SAWNAN1 because refined RRR test should not be used */
00255 /*        in this case */
00256         sawnan1 = TRUE_;
00257     }
00258     max1 = absMACRO(dplus[1]);
00259     i__1 = *n - 1;
00260     for (i__ = 1; i__ <= i__1; ++i__) {
00261         lplus[i__] = ld[i__] / dplus[i__];
00262         s = s * lplus[i__] * l[i__] - lsigma;
00263         dplus[i__ + 1] = d__[i__ + 1] + s;
00264         if ((d__1 = dplus[i__ + 1], absMACRO(d__1)) < *pivmin) {
00265             dplus[i__ + 1] = -(*pivmin);
00266 /*           Need to set SAWNAN1 because refined RRR test should not be used */
00267 /*           in this case */
00268             sawnan1 = TRUE_;
00269         }
00270 /* Computing MAX */
00271         d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], absMACRO(d__1));
00272         max1 = maxMACRO(d__2,d__3);
00273 /* L6: */
00274     }
00275     sawnan1 = sawnan1 || template_lapack_isnan(&max1);
00276     if (forcer || ( max1 <= growthbound && ! sawnan1 ) ) {
00277         *sigma = lsigma;
00278         shift = 1;
00279         goto L100;
00280     }
00281 /*     Right end */
00282     s = -rsigma;
00283     work[1] = d__[1] + s;
00284     if (absMACRO(work[1]) < *pivmin) {
00285         work[1] = -(*pivmin);
00286 /*        Need to set SAWNAN2 because refined RRR test should not be used */
00287 /*        in this case */
00288         sawnan2 = TRUE_;
00289     }
00290     max2 = absMACRO(work[1]);
00291     i__1 = *n - 1;
00292     for (i__ = 1; i__ <= i__1; ++i__) {
00293         work[*n + i__] = ld[i__] / work[i__];
00294         s = s * work[*n + i__] * l[i__] - rsigma;
00295         work[i__ + 1] = d__[i__ + 1] + s;
00296         if ((d__1 = work[i__ + 1], absMACRO(d__1)) < *pivmin) {
00297             work[i__ + 1] = -(*pivmin);
00298 /*           Need to set SAWNAN2 because refined RRR test should not be used */
00299 /*           in this case */
00300             sawnan2 = TRUE_;
00301         }
00302 /* Computing MAX */
00303         d__2 = max2, d__3 = (d__1 = work[i__ + 1], absMACRO(d__1));
00304         max2 = maxMACRO(d__2,d__3);
00305 /* L7: */
00306     }
00307     sawnan2 = sawnan2 || template_lapack_isnan(&max2);
00308     if (forcer || ( max2 <= growthbound && ! sawnan2 ) ) {
00309         *sigma = rsigma;
00310         shift = 2;
00311         goto L100;
00312     }
00313 /*     If we are at this point, both shifts led to too much element growth */
00314 /*     Record the better of the two shifts (provided it didn't lead to NaN) */
00315     if (sawnan1 && sawnan2) {
00316 /*        both MAX1 and MAX2 are NaN */
00317         goto L50;
00318     } else {
00319         if (! sawnan1) {
00320             indx = 1;
00321             if (max1 <= smlgrowth) {
00322                 smlgrowth = max1;
00323                 bestshift = lsigma;
00324             }
00325         }
00326         if (! sawnan2) {
00327             if (sawnan1 || max2 <= max1) {
00328                 indx = 2;
00329             }
00330             if (max2 <= smlgrowth) {
00331                 smlgrowth = max2;
00332                 bestshift = rsigma;
00333             }
00334         }
00335     }
00336 /*     If we are here, both the left and the right shift led to */
00337 /*     element growth. If the element growth is moderate, then */
00338 /*     we may still accept the representation, if it passes a */
00339 /*     refined test for RRR. This test supposes that no NaN occurred. */
00340 /*     Moreover, we use the refined RRR test only for isolated clusters. */
00341     if (clwdth < mingap / 128. && minMACRO(max1,max2) < fail2 && ! sawnan1 && ! 
00342             sawnan2) {
00343         dorrr1 = TRUE_;
00344     } else {
00345         dorrr1 = FALSE_;
00346     }
00347     tryrrr1 = TRUE_;
00348     if (tryrrr1 && dorrr1) {
00349         if (indx == 1) {
00350             tmp = (d__1 = dplus[*n], absMACRO(d__1));
00351             znm2 = 1.;
00352             prod = 1.;
00353             oldp = 1.;
00354             for (i__ = *n - 1; i__ >= 1; --i__) {
00355                 if (prod <= eps) {
00356                     prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
00357                              work[*n + i__]) * oldp;
00358                 } else {
00359                     prod *= (d__1 = work[*n + i__], absMACRO(d__1));
00360                 }
00361                 oldp = prod;
00362 /* Computing 2nd power */
00363                 d__1 = prod;
00364                 znm2 += d__1 * d__1;
00365 /* Computing MAX */
00366                 d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, absMACRO(d__1));
00367                 tmp = maxMACRO(d__2,d__3);
00368 /* L15: */
00369             }
00370             rrr1 = tmp / (*spdiam * template_blas_sqrt(znm2));
00371             if (rrr1 <= 8.) {
00372                 *sigma = lsigma;
00373                 shift = 1;
00374                 goto L100;
00375             }
00376         } else if (indx == 2) {
00377             tmp = (d__1 = work[*n], absMACRO(d__1));
00378             znm2 = 1.;
00379             prod = 1.;
00380             oldp = 1.;
00381             for (i__ = *n - 1; i__ >= 1; --i__) {
00382                 if (prod <= eps) {
00383                     prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * 
00384                             lplus[i__]) * oldp;
00385                 } else {
00386                     prod *= (d__1 = lplus[i__], absMACRO(d__1));
00387                 }
00388                 oldp = prod;
00389 /* Computing 2nd power */
00390                 d__1 = prod;
00391                 znm2 += d__1 * d__1;
00392 /* Computing MAX */
00393                 d__2 = tmp, d__3 = (d__1 = work[i__] * prod, absMACRO(d__1));
00394                 tmp = maxMACRO(d__2,d__3);
00395 /* L16: */
00396             }
00397             rrr2 = tmp / (*spdiam * template_blas_sqrt(znm2));
00398             if (rrr2 <= 8.) {
00399                 *sigma = rsigma;
00400                 shift = 2;
00401                 goto L100;
00402             }
00403         }
00404     }
00405 L50:
00406     if (ktry < 1) {
00407 /*        If we are here, both shifts failed also the RRR test. */
00408 /*        Back off to the outside */
00409 /* Computing MAX */
00410         d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
00411         lsigma = maxMACRO(d__1,d__2);
00412 /* Computing MIN */
00413         d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
00414         rsigma = minMACRO(d__1,d__2);
00415         ldelta *= 2.;
00416         rdelta *= 2.;
00417         ++ktry;
00418         goto L5;
00419     } else {
00420 /*        None of the representations investigated satisfied our */
00421 /*        criteria. Take the best one we found. */
00422         if (smlgrowth < fail || nofail) {
00423             lsigma = bestshift;
00424             rsigma = bestshift;
00425             forcer = TRUE_;
00426             goto L5;
00427         } else {
00428             *info = 1;
00429             return 0;
00430         }
00431     }
00432 L100:
00433     if (shift == 1) {
00434     } else if (shift == 2) {
00435 /*        store new L and D back into DPLUS, LPLUS */
00436         template_blas_copy(n, &work[1], &c__1, &dplus[1], &c__1);
00437         i__1 = *n - 1;
00438         template_blas_copy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
00439     }
00440     return 0;
00441 
00442 /*     End of DLARRF */
00443 
00444 } /* dlarrf_ */
00445 
00446 #endif

Generated on Mon Sep 17 14:30:40 2012 for ergo by  doxygen 1.4.7