template_lapack_lag2.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LAG2_HEADER
00036 #define TEMPLATE_LAPACK_LAG2_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b, 
00041         const integer *ldb, const Treal *safmin, Treal *scale1, Treal *
00042         scale2, Treal *wr1, Treal *wr2, Treal *wi)
00043 {
00044 /*  -- LAPACK auxiliary routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        March 31, 1993   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue   
00054     problem  A - w B, with scaling as necessary to avoid over-/underflow.   
00055 
00056     The scaling factor "s" results in a modified eigenvalue equation   
00057 
00058         s A - w B   
00059 
00060     where  s  is a non-negative scaling factor chosen so that  w,  w B,   
00061     and  s A  do not overflow and, if possible, do not underflow, either.   
00062 
00063     Arguments   
00064     =========   
00065 
00066     A       (input) DOUBLE PRECISION array, dimension (LDA, 2)   
00067             On entry, the 2 x 2 matrix A.  It is assumed that its 1-norm   
00068             is less than 1/SAFMIN.  Entries less than   
00069             sqrt(SAFMIN)*norm(A) are subject to being treated as zero.   
00070 
00071     LDA     (input) INTEGER   
00072             The leading dimension of the array A.  LDA >= 2.   
00073 
00074     B       (input) DOUBLE PRECISION array, dimension (LDB, 2)   
00075             On entry, the 2 x 2 upper triangular matrix B.  It is   
00076             assumed that the one-norm of B is less than 1/SAFMIN.  The   
00077             diagonals should be at least sqrt(SAFMIN) times the largest   
00078             element of B (in absolute value); if a diagonal is smaller   
00079             than that, then  +/- sqrt(SAFMIN) will be used instead of   
00080             that diagonal.   
00081 
00082     LDB     (input) INTEGER   
00083             The leading dimension of the array B.  LDB >= 2.   
00084 
00085     SAFMIN  (input) DOUBLE PRECISION   
00086             The smallest positive number s.t. 1/SAFMIN does not   
00087             overflow.  (This should always be DLAMCH('S') -- it is an   
00088             argument in order to avoid having to call DLAMCH frequently.)   
00089 
00090     SCALE1  (output) DOUBLE PRECISION   
00091             A scaling factor used to avoid over-/underflow in the   
00092             eigenvalue equation which defines the first eigenvalue.  If   
00093             the eigenvalues are complex, then the eigenvalues are   
00094             ( WR1  +/-  WI i ) / SCALE1  (which may lie outside the   
00095             exponent range of the machine), SCALE1=SCALE2, and SCALE1   
00096             will always be positive.  If the eigenvalues are real, then   
00097             the first (real) eigenvalue is  WR1 / SCALE1 , but this may   
00098             overflow or underflow, and in fact, SCALE1 may be zero or   
00099             less than the underflow threshhold if the exact eigenvalue   
00100             is sufficiently large.   
00101 
00102     SCALE2  (output) DOUBLE PRECISION   
00103             A scaling factor used to avoid over-/underflow in the   
00104             eigenvalue equation which defines the second eigenvalue.  If   
00105             the eigenvalues are complex, then SCALE2=SCALE1.  If the   
00106             eigenvalues are real, then the second (real) eigenvalue is   
00107             WR2 / SCALE2 , but this may overflow or underflow, and in   
00108             fact, SCALE2 may be zero or less than the underflow   
00109             threshhold if the exact eigenvalue is sufficiently large.   
00110 
00111     WR1     (output) DOUBLE PRECISION   
00112             If the eigenvalue is real, then WR1 is SCALE1 times the   
00113             eigenvalue closest to the (2,2) element of A B**(-1).  If the   
00114             eigenvalue is complex, then WR1=WR2 is SCALE1 times the real   
00115             part of the eigenvalues.   
00116 
00117     WR2     (output) DOUBLE PRECISION   
00118             If the eigenvalue is real, then WR2 is SCALE2 times the   
00119             other eigenvalue.  If the eigenvalue is complex, then   
00120             WR1=WR2 is SCALE1 times the real part of the eigenvalues.   
00121 
00122     WI      (output) DOUBLE PRECISION   
00123             If the eigenvalue is real, then WI is zero.  If the   
00124             eigenvalue is complex, then WI is SCALE1 times the imaginary   
00125             part of the eigenvalues.  WI will always be non-negative.   
00126 
00127     =====================================================================   
00128 
00129 
00130        Parameter adjustments */
00131     /* System generated locals */
00132     integer a_dim1, a_offset, b_dim1, b_offset;
00133     Treal d__1, d__2, d__3, d__4, d__5, d__6;
00134     /* Local variables */
00135      Treal diff, bmin, wbig, wabs, wdet, r__, binv11, binv22, 
00136             discr, anorm, bnorm, bsize, shift, c1, c2, c3, c4, c5, rtmin, 
00137             rtmax, wsize, s1, s2, a11, a12, a21, a22, b11, b12, b22, ascale, 
00138             bscale, pp, qq, ss, wscale, safmax, wsmall, as11, as12, as22, sum,
00139              abi22;
00140 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00141 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00142 
00143     a_dim1 = *lda;
00144     a_offset = 1 + a_dim1 * 1;
00145     a -= a_offset;
00146     b_dim1 = *ldb;
00147     b_offset = 1 + b_dim1 * 1;
00148     b -= b_offset;
00149 
00150     /* Function Body */
00151     rtmin = template_blas_sqrt(*safmin);
00152     rtmax = 1. / rtmin;
00153     safmax = 1. / *safmin;
00154 
00155 /*     Scale A   
00156 
00157    Computing MAX */
00158     d__5 = (d__1 = a_ref(1, 1), absMACRO(d__1)) + (d__2 = a_ref(2, 1), absMACRO(d__2)), 
00159             d__6 = (d__3 = a_ref(1, 2), absMACRO(d__3)) + (d__4 = a_ref(2, 2), absMACRO(
00160             d__4)), d__5 = maxMACRO(d__5,d__6);
00161     anorm = maxMACRO(d__5,*safmin);
00162     ascale = 1. / anorm;
00163     a11 = ascale * a_ref(1, 1);
00164     a21 = ascale * a_ref(2, 1);
00165     a12 = ascale * a_ref(1, 2);
00166     a22 = ascale * a_ref(2, 2);
00167 
00168 /*     Perturb B if necessary to insure non-singularity */
00169 
00170     b11 = b_ref(1, 1);
00171     b12 = b_ref(1, 2);
00172     b22 = b_ref(2, 2);
00173 /* Computing MAX */
00174     d__1 = absMACRO(b11), d__2 = absMACRO(b12), d__1 = maxMACRO(d__1,d__2), d__2 = absMACRO(b22), 
00175             d__1 = maxMACRO(d__1,d__2);
00176     bmin = rtmin * maxMACRO(d__1,rtmin);
00177     if (absMACRO(b11) < bmin) {
00178         b11 = template_lapack_d_sign(&bmin, &b11);
00179     }
00180     if (absMACRO(b22) < bmin) {
00181         b22 = template_lapack_d_sign(&bmin, &b22);
00182     }
00183 
00184 /*     Scale B   
00185 
00186    Computing MAX */
00187     d__1 = absMACRO(b11), d__2 = absMACRO(b12) + absMACRO(b22), d__1 = maxMACRO(d__1,d__2);
00188     bnorm = maxMACRO(d__1,*safmin);
00189 /* Computing MAX */
00190     d__1 = absMACRO(b11), d__2 = absMACRO(b22);
00191     bsize = maxMACRO(d__1,d__2);
00192     bscale = 1. / bsize;
00193     b11 *= bscale;
00194     b12 *= bscale;
00195     b22 *= bscale;
00196 
00197 /*     Compute larger eigenvalue by method described by C. van Loan   
00198 
00199        ( AS is A shifted by -SHIFT*B ) */
00200 
00201     binv11 = 1. / b11;
00202     binv22 = 1. / b22;
00203     s1 = a11 * binv11;
00204     s2 = a22 * binv22;
00205     if (absMACRO(s1) <= absMACRO(s2)) {
00206         as12 = a12 - s1 * b12;
00207         as22 = a22 - s1 * b22;
00208         ss = a21 * (binv11 * binv22);
00209         abi22 = as22 * binv22 - ss * b12;
00210         pp = abi22 * .5;
00211         shift = s1;
00212     } else {
00213         as12 = a12 - s2 * b12;
00214         as11 = a11 - s2 * b11;
00215         ss = a21 * (binv11 * binv22);
00216         abi22 = -ss * b12;
00217         pp = (as11 * binv11 + abi22) * .5;
00218         shift = s2;
00219     }
00220     qq = ss * as12;
00221     if ((d__1 = pp * rtmin, absMACRO(d__1)) >= 1.) {
00222 /* Computing 2nd power */
00223         d__1 = rtmin * pp;
00224         discr = d__1 * d__1 + qq * *safmin;
00225         r__ = template_blas_sqrt((absMACRO(discr))) * rtmax;
00226     } else {
00227 /* Computing 2nd power */
00228         d__1 = pp;
00229         if (d__1 * d__1 + absMACRO(qq) <= *safmin) {
00230 /* Computing 2nd power */
00231             d__1 = rtmax * pp;
00232             discr = d__1 * d__1 + qq * safmax;
00233             r__ = template_blas_sqrt((absMACRO(discr))) * rtmin;
00234         } else {
00235 /* Computing 2nd power */
00236             d__1 = pp;
00237             discr = d__1 * d__1 + qq;
00238             r__ = template_blas_sqrt((absMACRO(discr)));
00239         }
00240     }
00241 
00242 /*     Note: the test of R in the following IF is to cover the case when   
00243              DISCR is small and negative and is flushed to zero during   
00244              the calculation of R.  On machines which have a consistent   
00245              flush-to-zero threshhold and handle numbers above that   
00246              threshhold correctly, it would not be necessary. */
00247 
00248     if (discr >= 0. || r__ == 0.) {
00249         sum = pp + template_lapack_d_sign(&r__, &pp);
00250         diff = pp - template_lapack_d_sign(&r__, &pp);
00251         wbig = shift + sum;
00252 
00253 /*        Compute smaller eigenvalue */
00254 
00255         wsmall = shift + diff;
00256 /* Computing MAX */
00257         d__1 = absMACRO(wsmall);
00258         if (absMACRO(wbig) * .5 > maxMACRO(d__1,*safmin)) {
00259             wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22);
00260             wsmall = wdet / wbig;
00261         }
00262 
00263 /*        Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)   
00264           for WR1. */
00265 
00266         if (pp > abi22) {
00267             *wr1 = minMACRO(wbig,wsmall);
00268             *wr2 = maxMACRO(wbig,wsmall);
00269         } else {
00270             *wr1 = maxMACRO(wbig,wsmall);
00271             *wr2 = minMACRO(wbig,wsmall);
00272         }
00273         *wi = 0.;
00274     } else {
00275 
00276 /*        Complex eigenvalues */
00277 
00278         *wr1 = shift + pp;
00279         *wr2 = *wr1;
00280         *wi = r__;
00281     }
00282 
00283 /*     Further scaling to avoid underflow and overflow in computing   
00284        SCALE1 and overflow in computing w*B.   
00285 
00286        This scale factor (WSCALE) is bounded from above using C1 and C2,   
00287        and from below using C3 and C4.   
00288           C1 implements the condition  s A  must never overflow.   
00289           C2 implements the condition  w B  must never overflow.   
00290           C3, with C2,   
00291              implement the condition that s A - w B must never overflow.   
00292           C4 implements the condition  s    should not underflow.   
00293           C5 implements the condition  max(s,|w|) should be at least 2. */
00294 
00295     c1 = bsize * (*safmin * maxMACRO(1.,ascale));
00296     c2 = *safmin * maxMACRO(1.,bnorm);
00297     c3 = bsize * *safmin;
00298     if (ascale <= 1. && bsize <= 1.) {
00299 /* Computing MIN */
00300         d__1 = 1., d__2 = ascale / *safmin * bsize;
00301         c4 = minMACRO(d__1,d__2);
00302     } else {
00303         c4 = 1.;
00304     }
00305     if (ascale <= 1. || bsize <= 1.) {
00306 /* Computing MIN */
00307         d__1 = 1., d__2 = ascale * bsize;
00308         c5 = minMACRO(d__1,d__2);
00309     } else {
00310         c5 = 1.;
00311     }
00312 
00313 /*     Scale first eigenvalue */
00314 
00315     wabs = absMACRO(*wr1) + absMACRO(*wi);
00316 /* Computing MAX   
00317    Computing MIN */
00318     d__3 = c4, d__4 = maxMACRO(wabs,c5) * .5;
00319     d__1 = maxMACRO(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001, 
00320             d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,d__4);
00321     wsize = maxMACRO(d__1,d__2);
00322     if (wsize != 1.) {
00323         wscale = 1. / wsize;
00324         if (wsize > 1.) {
00325             *scale1 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
00326         } else {
00327             *scale1 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
00328         }
00329         *wr1 *= wscale;
00330         if (*wi != 0.) {
00331             *wi *= wscale;
00332             *wr2 = *wr1;
00333             *scale2 = *scale1;
00334         }
00335     } else {
00336         *scale1 = ascale * bsize;
00337         *scale2 = *scale1;
00338     }
00339 
00340 /*     Scale second eigenvalue (if real) */
00341 
00342     if (*wi == 0.) {
00343 /* Computing MAX   
00344    Computing MIN   
00345    Computing MAX */
00346         d__5 = absMACRO(*wr2);
00347         d__3 = c4, d__4 = maxMACRO(d__5,c5) * .5;
00348         d__1 = maxMACRO(*safmin,c1), d__2 = (absMACRO(*wr2) * c2 + c3) * 
00349                 1.0000100000000001, d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,
00350                 d__4);
00351         wsize = maxMACRO(d__1,d__2);
00352         if (wsize != 1.) {
00353             wscale = 1. / wsize;
00354             if (wsize > 1.) {
00355                 *scale2 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
00356             } else {
00357                 *scale2 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
00358             }
00359             *wr2 *= wscale;
00360         } else {
00361             *scale2 = ascale * bsize;
00362         }
00363     }
00364 
00365 /*     End of DLAG2 */
00366 
00367     return 0;
00368 } /* dlag2_ */
00369 
00370 #undef b_ref
00371 #undef a_ref
00372 
00373 
00374 #endif

Generated on Mon Sep 17 14:32:56 2012 for ergo by  doxygen 1.4.7