00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LARRB_HEADER 00036 #define TEMPLATE_LAPACK_LARRB_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, 00040 integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, 00041 integer *offset, Treal *w, Treal *wgap, Treal *werr, 00042 Treal *work, integer *iwork, Treal *pivmin, Treal * 00043 spdiam, integer *twist, integer *info) 00044 { 00045 /* System generated locals */ 00046 integer i__1; 00047 Treal d__1, d__2; 00048 00049 /* Local variables */ 00050 integer i__, k, r__, i1, ii, ip; 00051 Treal gap, mid, tmp, back, lgap, rgap, left; 00052 integer iter, nint, prev, next; 00053 Treal cvrgd, right, width; 00054 integer negcnt; 00055 Treal mnwdth; 00056 integer olnint, maxitr; 00057 00058 00059 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00060 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00061 /* November 2006 */ 00062 00063 /* .. Scalar Arguments .. */ 00064 /* .. */ 00065 /* .. Array Arguments .. */ 00066 /* .. */ 00067 00068 /* Purpose */ 00069 /* ======= */ 00070 00071 /* Given the relatively robust representation(RRR) L D L^T, DLARRB */ 00072 /* does "limited" bisection to refine the eigenvalues of L D L^T, */ 00073 /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ 00074 /* guesses for these eigenvalues are input in W, the corresponding estimate */ 00075 /* of the error in these guesses and their gaps are input in WERR */ 00076 /* and WGAP, respectively. During bisection, intervals */ 00077 /* [left, right] are maintained by storing their mid-points and */ 00078 /* semi-widths in the arrays W and WERR respectively. */ 00079 00080 /* Arguments */ 00081 /* ========= */ 00082 00083 /* N (input) INTEGER */ 00084 /* The order of the matrix. */ 00085 00086 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00087 /* The N diagonal elements of the diagonal matrix D. */ 00088 00089 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ 00090 /* The (N-1) elements L(i)*L(i)*D(i). */ 00091 00092 /* IFIRST (input) INTEGER */ 00093 /* The index of the first eigenvalue to be computed. */ 00094 00095 /* ILAST (input) INTEGER */ 00096 /* The index of the last eigenvalue to be computed. */ 00097 00098 /* RTOL1 (input) DOUBLE PRECISION */ 00099 /* RTOL2 (input) DOUBLE PRECISION */ 00100 /* Tolerance for the convergence of the bisection intervals. */ 00101 /* An interval [LEFT,RIGHT] has converged if */ 00102 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ 00103 /* where GAP is the (estimated) distance to the nearest */ 00104 /* eigenvalue. */ 00105 00106 /* OFFSET (input) INTEGER */ 00107 /* Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET */ 00108 /* through ILAST-OFFSET elements of these arrays are to be used. */ 00109 00110 /* W (input/output) DOUBLE PRECISION array, dimension (N) */ 00111 /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ 00112 /* estimates of the eigenvalues of L D L^T indexed IFIRST throug */ 00113 /* ILAST. */ 00114 /* On output, these estimates are refined. */ 00115 00116 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N-1) */ 00117 /* On input, the (estimated) gaps between consecutive */ 00118 /* eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between */ 00119 /* eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST */ 00120 /* then WGAP(IFIRST-OFFSET) must be set to ZERO. */ 00121 /* On output, these gaps are refined. */ 00122 00123 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ 00124 /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ 00125 /* the errors in the estimates of the corresponding elements in W. */ 00126 /* On output, these errors are refined. */ 00127 00128 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ 00129 /* Workspace. */ 00130 00131 /* IWORK (workspace) INTEGER array, dimension (2*N) */ 00132 /* Workspace. */ 00133 00134 /* PIVMIN (input) DOUBLE PRECISION */ 00135 /* The minimum pivot in the Sturm sequence. */ 00136 00137 /* SPDIAM (input) DOUBLE PRECISION */ 00138 /* The spectral diameter of the matrix. */ 00139 00140 /* TWIST (input) INTEGER */ 00141 /* The twist index for the twisted factorization that is used */ 00142 /* for the negcount. */ 00143 /* TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T */ 00144 /* TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T */ 00145 /* TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) */ 00146 00147 /* INFO (output) INTEGER */ 00148 /* Error flag. */ 00149 00150 /* Further Details */ 00151 /* =============== */ 00152 00153 /* Based on contributions by */ 00154 /* Beresford Parlett, University of California, Berkeley, USA */ 00155 /* Jim Demmel, University of California, Berkeley, USA */ 00156 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00157 /* Osni Marques, LBNL/NERSC, USA */ 00158 /* Christof Voemel, University of California, Berkeley, USA */ 00159 00160 /* ===================================================================== */ 00161 00162 /* .. Parameters .. */ 00163 /* .. */ 00164 /* .. Local Scalars .. */ 00165 /* .. */ 00166 /* .. External Functions .. */ 00167 00168 /* .. */ 00169 /* .. Intrinsic Functions .. */ 00170 /* .. */ 00171 /* .. Executable Statements .. */ 00172 00173 /* Parameter adjustments */ 00174 --iwork; 00175 --work; 00176 --werr; 00177 --wgap; 00178 --w; 00179 --lld; 00180 --d__; 00181 00182 /* Function Body */ 00183 *info = 0; 00184 00185 maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) + 00186 2; 00187 mnwdth = *pivmin * 2.; 00188 00189 r__ = *twist; 00190 if (r__ < 1 || r__ > *n) { 00191 r__ = *n; 00192 } 00193 00194 /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ 00195 /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ 00196 /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ 00197 /* for an unconverged interval is set to the index of the next unconverged */ 00198 /* interval, and is -1 or 0 for a converged interval. Thus a linked */ 00199 /* list of unconverged intervals is set up. */ 00200 00201 i1 = *ifirst; 00202 /* The number of unconverged intervals */ 00203 nint = 0; 00204 /* The last unconverged interval found */ 00205 prev = 0; 00206 rgap = wgap[i1 - *offset]; 00207 i__1 = *ilast; 00208 for (i__ = i1; i__ <= i__1; ++i__) { 00209 k = i__ << 1; 00210 ii = i__ - *offset; 00211 left = w[ii] - werr[ii]; 00212 right = w[ii] + werr[ii]; 00213 lgap = rgap; 00214 rgap = wgap[ii]; 00215 gap = minMACRO(lgap,rgap); 00216 /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ 00217 /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT */ 00218 00219 /* Do while( NEGCNT(LEFT).GT.I-1 ) */ 00220 00221 back = werr[ii]; 00222 L20: 00223 negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &left, pivmin, &r__); 00224 if (negcnt > i__ - 1) { 00225 left -= back; 00226 back *= 2.; 00227 goto L20; 00228 } 00229 00230 /* Do while( NEGCNT(RIGHT).LT.I ) */ 00231 /* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT */ 00232 00233 back = werr[ii]; 00234 L50: 00235 negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &right, pivmin, &r__); 00236 if (negcnt < i__) { 00237 right += back; 00238 back *= 2.; 00239 goto L50; 00240 } 00241 width = (d__1 = left - right, absMACRO(d__1)) * .5; 00242 /* Computing MAX */ 00243 d__1 = absMACRO(left), d__2 = absMACRO(right); 00244 tmp = maxMACRO(d__1,d__2); 00245 /* Computing MAX */ 00246 d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; 00247 cvrgd = maxMACRO(d__1,d__2); 00248 if (width <= cvrgd || width <= mnwdth) { 00249 /* This interval has already converged and does not need refinement. */ 00250 /* (Note that the gaps might change through refining the */ 00251 /* eigenvalues, however, they can only get bigger.) */ 00252 /* Remove it from the list. */ 00253 iwork[k - 1] = -1; 00254 /* Make sure that I1 always points to the first unconverged interval */ 00255 if (i__ == i1 && i__ < *ilast) { 00256 i1 = i__ + 1; 00257 } 00258 if (prev >= i1 && i__ <= *ilast) { 00259 iwork[(prev << 1) - 1] = i__ + 1; 00260 } 00261 } else { 00262 /* unconverged interval found */ 00263 prev = i__; 00264 ++nint; 00265 iwork[k - 1] = i__ + 1; 00266 iwork[k] = negcnt; 00267 } 00268 work[k - 1] = left; 00269 work[k] = right; 00270 /* L75: */ 00271 } 00272 00273 /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ 00274 /* and while (ITER.LT.MAXITR) */ 00275 00276 iter = 0; 00277 L80: 00278 prev = i1 - 1; 00279 i__ = i1; 00280 olnint = nint; 00281 i__1 = olnint; 00282 for (ip = 1; ip <= i__1; ++ip) { 00283 k = i__ << 1; 00284 ii = i__ - *offset; 00285 rgap = wgap[ii]; 00286 lgap = rgap; 00287 if (ii > 1) { 00288 lgap = wgap[ii - 1]; 00289 } 00290 gap = minMACRO(lgap,rgap); 00291 next = iwork[k - 1]; 00292 left = work[k - 1]; 00293 right = work[k]; 00294 mid = (left + right) * .5; 00295 /* semiwidth of interval */ 00296 width = right - mid; 00297 /* Computing MAX */ 00298 d__1 = absMACRO(left), d__2 = absMACRO(right); 00299 tmp = maxMACRO(d__1,d__2); 00300 /* Computing MAX */ 00301 d__1 = *rtol1 * gap, d__2 = *rtol2 * tmp; 00302 cvrgd = maxMACRO(d__1,d__2); 00303 if (width <= cvrgd || width <= mnwdth || iter == maxitr) { 00304 /* reduce number of unconverged intervals */ 00305 --nint; 00306 /* Mark interval as converged. */ 00307 iwork[k - 1] = 0; 00308 if (i1 == i__) { 00309 i1 = next; 00310 } else { 00311 /* Prev holds the last unconverged interval previously examined */ 00312 if (prev >= i1) { 00313 iwork[(prev << 1) - 1] = next; 00314 } 00315 } 00316 i__ = next; 00317 goto L100; 00318 } 00319 prev = i__; 00320 00321 /* Perform one bisection step */ 00322 00323 negcnt = template_lapack_laneg(n, &d__[1], &lld[1], &mid, pivmin, &r__); 00324 if (negcnt <= i__ - 1) { 00325 work[k - 1] = mid; 00326 } else { 00327 work[k] = mid; 00328 } 00329 i__ = next; 00330 L100: 00331 ; 00332 } 00333 ++iter; 00334 /* do another loop if there are still unconverged intervals */ 00335 /* However, in the last iteration, all intervals are accepted */ 00336 /* since this is the best we can do. */ 00337 if (nint > 0 && iter <= maxitr) { 00338 goto L80; 00339 } 00340 00341 00342 /* At this point, all the intervals have converged */ 00343 i__1 = *ilast; 00344 for (i__ = *ifirst; i__ <= i__1; ++i__) { 00345 k = i__ << 1; 00346 ii = i__ - *offset; 00347 /* All intervals marked by '0' have been refined. */ 00348 if (iwork[k - 1] == 0) { 00349 w[ii] = (work[k - 1] + work[k]) * .5; 00350 werr[ii] = work[k] - w[ii]; 00351 } 00352 /* L110: */ 00353 } 00354 00355 i__1 = *ilast; 00356 for (i__ = *ifirst + 1; i__ <= i__1; ++i__) { 00357 k = i__ << 1; 00358 ii = i__ - *offset; 00359 /* Computing MAX */ 00360 d__1 = 0., d__2 = w[ii] - werr[ii] - w[ii - 1] - werr[ii - 1]; 00361 wgap[ii - 1] = maxMACRO(d__1,d__2); 00362 /* L111: */ 00363 } 00364 return 0; 00365 00366 /* End of DLARRB */ 00367 00368 } /* dlarrb_ */ 00369 00370 #endif