template_lapack_lasq4.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LASQ4_HEADER
00036 #define TEMPLATE_LAPACK_LASQ4_HEADER
00037 
00038 template<class Treal>
00039 int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, 
00040         integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, 
00041         Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, 
00042         Treal *tau, integer *ttype, Treal *g)
00043 {
00044     /* System generated locals */
00045     integer i__1;
00046     Treal d__1, d__2;
00047 
00048 
00049     /* Local variables */
00050     Treal s = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00051     Treal a2, b1, b2;
00052     integer i4, nn, np;
00053     Treal gam, gap1, gap2;
00054 
00055 
00056 /*  -- LAPACK routine (version 3.2)                                    -- */
00057 
00058 /*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
00059 /*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
00060 /*  -- Berkeley                                                        -- */
00061 /*  -- November 2008                                                   -- */
00062 
00063 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
00064 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
00065 
00066 /*     .. Scalar Arguments .. */
00067 /*     .. */
00068 /*     .. Array Arguments .. */
00069 /*     .. */
00070 
00071 /*  Purpose */
00072 /*  ======= */
00073 
00074 /*  DLASQ4 computes an approximation TAU to the smallest eigenvalue */
00075 /*  using values of d from the previous transform. */
00076 
00077 /*  I0    (input) INTEGER */
00078 /*        First index. */
00079 
00080 /*  N0    (input) INTEGER */
00081 /*        Last index. */
00082 
00083 /*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N ) */
00084 /*        Z holds the qd array. */
00085 
00086 /*  PP    (input) INTEGER */
00087 /*        PP=0 for ping, PP=1 for pong. */
00088 
00089 /*  NOIN  (input) INTEGER */
00090 /*        The value of N0 at start of EIGTEST. */
00091 
00092 /*  DMIN  (input) DOUBLE PRECISION */
00093 /*        Minimum value of d. */
00094 
00095 /*  DMIN1 (input) DOUBLE PRECISION */
00096 /*        Minimum value of d, excluding D( N0 ). */
00097 
00098 /*  DMIN2 (input) DOUBLE PRECISION */
00099 /*        Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
00100 
00101 /*  DN    (input) DOUBLE PRECISION */
00102 /*        d(N) */
00103 
00104 /*  DN1   (input) DOUBLE PRECISION */
00105 /*        d(N-1) */
00106 
00107 /*  DN2   (input) DOUBLE PRECISION */
00108 /*        d(N-2) */
00109 
00110 /*  TAU   (output) DOUBLE PRECISION */
00111 /*        This is the shift. */
00112 
00113 /*  TTYPE (output) INTEGER */
00114 /*        Shift type. */
00115 
00116 /*  G     (input/output) REAL */
00117 /*        G is passed as an argument in order to save its value between */
00118 /*        calls to DLASQ4. */
00119 
00120 /*  Further Details */
00121 /*  =============== */
00122 /*  CNST1 = 9/16 */
00123 
00124 /*  ===================================================================== */
00125 
00126 /*     .. Parameters .. */
00127 /*     .. */
00128 /*     .. Local Scalars .. */
00129 /*     .. */
00130 /*     .. Intrinsic Functions .. */
00131 /*     .. */
00132 /*     .. Executable Statements .. */
00133 
00134 /*     A negative DMIN forces the shift to take that absolute value */
00135 /*     TTYPE records the type of shift. */
00136 
00137     /* Parameter adjustments */
00138     --z__;
00139 
00140     /* Function Body */
00141     if (*dmin__ <= 0.) {
00142         *tau = -(*dmin__);
00143         *ttype = -1;
00144         return 0;
00145     }
00146 
00147     nn = (*n0 << 2) + *pp;
00148     if (*n0in == *n0) {
00149 
00150 /*        No eigenvalues deflated. */
00151 
00152         if (*dmin__ == *dn || *dmin__ == *dn1) {
00153 
00154             b1 = template_blas_sqrt(z__[nn - 3]) * template_blas_sqrt(z__[nn - 5]);
00155             b2 = template_blas_sqrt(z__[nn - 7]) * template_blas_sqrt(z__[nn - 9]);
00156             a2 = z__[nn - 7] + z__[nn - 5];
00157 
00158 /*           Cases 2 and 3. */
00159 
00160             if (*dmin__ == *dn && *dmin1 == *dn1) {
00161                 gap2 = *dmin2 - a2 - *dmin2 * .25;
00162                 if (gap2 > 0. && gap2 > b2) {
00163                     gap1 = a2 - *dn - b2 / gap2 * b2;
00164                 } else {
00165                     gap1 = a2 - *dn - (b1 + b2);
00166                 }
00167                 if (gap1 > 0. && gap1 > b1) {
00168 /* Computing MAX */
00169                     d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
00170                     s = maxMACRO(d__1,d__2);
00171                     *ttype = -2;
00172                 } else {
00173                     s = 0.;
00174                     if (*dn > b1) {
00175                         s = *dn - b1;
00176                     }
00177                     if (a2 > b1 + b2) {
00178 /* Computing MIN */
00179                         d__1 = s, d__2 = a2 - (b1 + b2);
00180                         s = minMACRO(d__1,d__2);
00181                     }
00182 /* Computing MAX */
00183                     d__1 = s, d__2 = *dmin__ * .333;
00184                     s = maxMACRO(d__1,d__2);
00185                     *ttype = -3;
00186                 }
00187             } else {
00188 
00189 /*              Case 4. */
00190 
00191                 *ttype = -4;
00192                 s = *dmin__ * .25;
00193                 if (*dmin__ == *dn) {
00194                     gam = *dn;
00195                     a2 = 0.;
00196                     if (z__[nn - 5] > z__[nn - 7]) {
00197                         return 0;
00198                     }
00199                     b2 = z__[nn - 5] / z__[nn - 7];
00200                     np = nn - 9;
00201                 } else {
00202                     np = nn - (*pp << 1);
00203                     b2 = z__[np - 2];
00204                     gam = *dn1;
00205                     if (z__[np - 4] > z__[np - 2]) {
00206                         return 0;
00207                     }
00208                     a2 = z__[np - 4] / z__[np - 2];
00209                     if (z__[nn - 9] > z__[nn - 11]) {
00210                         return 0;
00211                     }
00212                     b2 = z__[nn - 9] / z__[nn - 11];
00213                     np = nn - 13;
00214                 }
00215 
00216 /*              Approximate contribution to norm squared from I < NN-1. */
00217 
00218                 a2 += b2;
00219                 i__1 = (*i0 << 2) - 1 + *pp;
00220                 for (i4 = np; i4 >= i__1; i4 += -4) {
00221                     if (b2 == 0.) {
00222                         goto L20;
00223                     }
00224                     b1 = b2;
00225                     if (z__[i4] > z__[i4 - 2]) {
00226                         return 0;
00227                     }
00228                     b2 *= z__[i4] / z__[i4 - 2];
00229                     a2 += b2;
00230                     if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
00231                         goto L20;
00232                     }
00233 /* L10: */
00234                 }
00235 L20:
00236                 a2 *= 1.05;
00237 
00238 /*              Rayleigh quotient residual bound. */
00239 
00240                 if (a2 < .563) {
00241                     s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
00242                 }
00243             }
00244         } else if (*dmin__ == *dn2) {
00245 
00246 /*           Case 5. */
00247 
00248             *ttype = -5;
00249             s = *dmin__ * .25;
00250 
00251 /*           Compute contribution to norm squared from I > NN-2. */
00252 
00253             np = nn - (*pp << 1);
00254             b1 = z__[np - 2];
00255             b2 = z__[np - 6];
00256             gam = *dn2;
00257             if (z__[np - 8] > b2 || z__[np - 4] > b1) {
00258                 return 0;
00259             }
00260             a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
00261 
00262 /*           Approximate contribution to norm squared from I < NN-2. */
00263 
00264             if (*n0 - *i0 > 2) {
00265                 b2 = z__[nn - 13] / z__[nn - 15];
00266                 a2 += b2;
00267                 i__1 = (*i0 << 2) - 1 + *pp;
00268                 for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
00269                     if (b2 == 0.) {
00270                         goto L40;
00271                     }
00272                     b1 = b2;
00273                     if (z__[i4] > z__[i4 - 2]) {
00274                         return 0;
00275                     }
00276                     b2 *= z__[i4] / z__[i4 - 2];
00277                     a2 += b2;
00278                     if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
00279                         goto L40;
00280                     }
00281 /* L30: */
00282                 }
00283 L40:
00284                 a2 *= 1.05;
00285             }
00286 
00287             if (a2 < .563) {
00288                 s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
00289             }
00290         } else {
00291 
00292 /*           Case 6, no information to guide us. */
00293 
00294             if (*ttype == -6) {
00295                 *g += (1. - *g) * .333;
00296             } else if (*ttype == -18) {
00297                 *g = .083250000000000005;
00298             } else {
00299                 *g = .25;
00300             }
00301             s = *g * *dmin__;
00302             *ttype = -6;
00303         }
00304 
00305     } else if (*n0in == *n0 + 1) {
00306 
00307 /*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
00308 
00309         if (*dmin1 == *dn1 && *dmin2 == *dn2) {
00310 
00311 /*           Cases 7 and 8. */
00312 
00313             *ttype = -7;
00314             s = *dmin1 * .333;
00315             if (z__[nn - 5] > z__[nn - 7]) {
00316                 return 0;
00317             }
00318             b1 = z__[nn - 5] / z__[nn - 7];
00319             b2 = b1;
00320             if (b2 == 0.) {
00321                 goto L60;
00322             }
00323             i__1 = (*i0 << 2) - 1 + *pp;
00324             for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
00325                 a2 = b1;
00326                 if (z__[i4] > z__[i4 - 2]) {
00327                     return 0;
00328                 }
00329                 b1 *= z__[i4] / z__[i4 - 2];
00330                 b2 += b1;
00331                 if (maxMACRO(b1,a2) * 100. < b2) {
00332                     goto L60;
00333                 }
00334 /* L50: */
00335             }
00336 L60:
00337             b2 = template_blas_sqrt(b2 * 1.05);
00338 /* Computing 2nd power */
00339             d__1 = b2;
00340             a2 = *dmin1 / (d__1 * d__1 + 1.);
00341             gap2 = *dmin2 * .5 - a2;
00342             if (gap2 > 0. && gap2 > b2 * a2) {
00343 /* Computing MAX */
00344                 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
00345                 s = maxMACRO(d__1,d__2);
00346             } else {
00347 /* Computing MAX */
00348                 d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
00349                 s = maxMACRO(d__1,d__2);
00350                 *ttype = -8;
00351             }
00352         } else {
00353 
00354 /*           Case 9. */
00355 
00356             s = *dmin1 * .25;
00357             if (*dmin1 == *dn1) {
00358                 s = *dmin1 * .5;
00359             }
00360             *ttype = -9;
00361         }
00362 
00363     } else if (*n0in == *n0 + 2) {
00364 
00365 /*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
00366 
00367 /*        Cases 10 and 11. */
00368 
00369         if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
00370             *ttype = -10;
00371             s = *dmin2 * .333;
00372             if (z__[nn - 5] > z__[nn - 7]) {
00373                 return 0;
00374             }
00375             b1 = z__[nn - 5] / z__[nn - 7];
00376             b2 = b1;
00377             if (b2 == 0.) {
00378                 goto L80;
00379             }
00380             i__1 = (*i0 << 2) - 1 + *pp;
00381             for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
00382                 if (z__[i4] > z__[i4 - 2]) {
00383                     return 0;
00384                 }
00385                 b1 *= z__[i4] / z__[i4 - 2];
00386                 b2 += b1;
00387                 if (b1 * 100. < b2) {
00388                     goto L80;
00389                 }
00390 /* L70: */
00391             }
00392 L80:
00393             b2 = template_blas_sqrt(b2 * 1.05);
00394 /* Computing 2nd power */
00395             d__1 = b2;
00396             a2 = *dmin2 / (d__1 * d__1 + 1.);
00397             gap2 = z__[nn - 7] + z__[nn - 9] - template_blas_sqrt(z__[nn - 11]) * template_blas_sqrt(z__[
00398                     nn - 9]) - a2;
00399             if (gap2 > 0. && gap2 > b2 * a2) {
00400 /* Computing MAX */
00401                 d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
00402                 s = maxMACRO(d__1,d__2);
00403             } else {
00404 /* Computing MAX */
00405                 d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
00406                 s = maxMACRO(d__1,d__2);
00407             }
00408         } else {
00409             s = *dmin2 * .25;
00410             *ttype = -11;
00411         }
00412     } else if (*n0in > *n0 + 2) {
00413 
00414 /*        Case 12, more than two eigenvalues deflated. No information. */
00415 
00416         s = 0.;
00417         *ttype = -12;
00418     }
00419 
00420     *tau = s;
00421     return 0;
00422 
00423 /*     End of DLASQ4 */
00424 
00425 } /* dlasq4_ */
00426 
00427 #endif

Generated on Mon Sep 17 14:32:56 2012 for ergo by  doxygen 1.4.7