00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_SYTRD_HEADER 00036 #define TEMPLATE_LAPACK_SYTRD_HEADER 00037 00038 #include "template_lapack_common.h" 00039 00040 template<class Treal> 00041 int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer * 00042 lda, Treal *d__, Treal *e, Treal *tau, Treal * 00043 work, const integer *lwork, integer *info) 00044 { 00045 /* -- LAPACK routine (version 3.0) -- 00046 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00047 Courant Institute, Argonne National Lab, and Rice University 00048 June 30, 1999 00049 00050 00051 Purpose 00052 ======= 00053 00054 DSYTRD reduces a real symmetric matrix A to real symmetric 00055 tridiagonal form T by an orthogonal similarity transformation: 00056 Q**T * A * Q = T. 00057 00058 Arguments 00059 ========= 00060 00061 UPLO (input) CHARACTER*1 00062 = 'U': Upper triangle of A is stored; 00063 = 'L': Lower triangle of A is stored. 00064 00065 N (input) INTEGER 00066 The order of the matrix A. N >= 0. 00067 00068 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00069 On entry, the symmetric matrix A. If UPLO = 'U', the leading 00070 N-by-N upper triangular part of A contains the upper 00071 triangular part of the matrix A, and the strictly lower 00072 triangular part of A is not referenced. If UPLO = 'L', the 00073 leading N-by-N lower triangular part of A contains the lower 00074 triangular part of the matrix A, and the strictly upper 00075 triangular part of A is not referenced. 00076 On exit, if UPLO = 'U', the diagonal and first superdiagonal 00077 of A are overwritten by the corresponding elements of the 00078 tridiagonal matrix T, and the elements above the first 00079 superdiagonal, with the array TAU, represent the orthogonal 00080 matrix Q as a product of elementary reflectors; if UPLO 00081 = 'L', the diagonal and first subdiagonal of A are over- 00082 written by the corresponding elements of the tridiagonal 00083 matrix T, and the elements below the first subdiagonal, with 00084 the array TAU, represent the orthogonal matrix Q as a product 00085 of elementary reflectors. See Further Details. 00086 00087 LDA (input) INTEGER 00088 The leading dimension of the array A. LDA >= max(1,N). 00089 00090 D (output) DOUBLE PRECISION array, dimension (N) 00091 The diagonal elements of the tridiagonal matrix T: 00092 D(i) = A(i,i). 00093 00094 E (output) DOUBLE PRECISION array, dimension (N-1) 00095 The off-diagonal elements of the tridiagonal matrix T: 00096 E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. 00097 00098 TAU (output) DOUBLE PRECISION array, dimension (N-1) 00099 The scalar factors of the elementary reflectors (see Further 00100 Details). 00101 00102 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 00103 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00104 00105 LWORK (input) INTEGER 00106 The dimension of the array WORK. LWORK >= 1. 00107 For optimum performance LWORK >= N*NB, where NB is the 00108 optimal blocksize. 00109 00110 If LWORK = -1, then a workspace query is assumed; the routine 00111 only calculates the optimal size of the WORK array, returns 00112 this value as the first entry of the WORK array, and no error 00113 message related to LWORK is issued by XERBLA. 00114 00115 INFO (output) INTEGER 00116 = 0: successful exit 00117 < 0: if INFO = -i, the i-th argument had an illegal value 00118 00119 Further Details 00120 =============== 00121 00122 If UPLO = 'U', the matrix Q is represented as a product of elementary 00123 reflectors 00124 00125 Q = H(n-1) . . . H(2) H(1). 00126 00127 Each H(i) has the form 00128 00129 H(i) = I - tau * v * v' 00130 00131 where tau is a real scalar, and v is a real vector with 00132 v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in 00133 A(1:i-1,i+1), and tau in TAU(i). 00134 00135 If UPLO = 'L', the matrix Q is represented as a product of elementary 00136 reflectors 00137 00138 Q = H(1) H(2) . . . H(n-1). 00139 00140 Each H(i) has the form 00141 00142 H(i) = I - tau * v * v' 00143 00144 where tau is a real scalar, and v is a real vector with 00145 v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), 00146 and tau in TAU(i). 00147 00148 The contents of A on exit are illustrated by the following examples 00149 with n = 5: 00150 00151 if UPLO = 'U': if UPLO = 'L': 00152 00153 ( d e v2 v3 v4 ) ( d ) 00154 ( d e v3 v4 ) ( e d ) 00155 ( d e v4 ) ( v1 e d ) 00156 ( d e ) ( v1 v2 e d ) 00157 ( d ) ( v1 v2 v3 e d ) 00158 00159 where d and e denote diagonal and off-diagonal elements of T, and vi 00160 denotes an element of the vector defining H(i). 00161 00162 ===================================================================== 00163 00164 00165 Test the input parameters 00166 00167 Parameter adjustments */ 00168 /* Table of constant values */ 00169 integer c__1 = 1; 00170 integer c_n1 = -1; 00171 integer c__3 = 3; 00172 integer c__2 = 2; 00173 Treal c_b22 = -1.; 00174 Treal c_b23 = 1.; 00175 00176 /* System generated locals */ 00177 integer a_dim1, a_offset, i__1, i__2, i__3; 00178 /* Local variables */ 00179 integer i__, j; 00180 integer nbmin, iinfo; 00181 logical upper; 00182 integer nb, kk, nx; 00183 integer ldwork, lwkopt; 00184 logical lquery; 00185 integer iws; 00186 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00187 00188 00189 a_dim1 = *lda; 00190 a_offset = 1 + a_dim1 * 1; 00191 a -= a_offset; 00192 --d__; 00193 --e; 00194 --tau; 00195 --work; 00196 00197 /* Initialization added by Elias to get rid of compiler warnings. */ 00198 lwkopt = 0; 00199 /* Function Body */ 00200 *info = 0; 00201 upper = template_blas_lsame(uplo, "U"); 00202 lquery = *lwork == -1; 00203 if (! upper && ! template_blas_lsame(uplo, "L")) { 00204 *info = -1; 00205 } else if (*n < 0) { 00206 *info = -2; 00207 } else if (*lda < maxMACRO(1,*n)) { 00208 *info = -4; 00209 } else if (*lwork < 1 && ! lquery) { 00210 *info = -9; 00211 } 00212 00213 if (*info == 0) { 00214 00215 /* Determine the block size. */ 00216 00217 nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, 00218 (ftnlen)1); 00219 lwkopt = *n * nb; 00220 work[1] = (Treal) lwkopt; 00221 } 00222 00223 if (*info != 0) { 00224 i__1 = -(*info); 00225 template_blas_erbla("SYTRD ", &i__1); 00226 return 0; 00227 } else if (lquery) { 00228 return 0; 00229 } 00230 00231 /* Quick return if possible */ 00232 00233 if (*n == 0) { 00234 work[1] = 1.; 00235 return 0; 00236 } 00237 00238 nx = *n; 00239 iws = 1; 00240 if (nb > 1 && nb < *n) { 00241 00242 /* Determine when to cross over from blocked to unblocked code 00243 (last block is always handled by unblocked code). 00244 00245 Computing MAX */ 00246 i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, & 00247 c_n1, (ftnlen)6, (ftnlen)1); 00248 nx = maxMACRO(i__1,i__2); 00249 if (nx < *n) { 00250 00251 /* Determine if workspace is large enough for blocked code. */ 00252 00253 ldwork = *n; 00254 iws = ldwork * nb; 00255 if (*lwork < iws) { 00256 00257 /* Not enough workspace to use optimal NB: determine the 00258 minimum value of NB, and reduce NB or force use of 00259 unblocked code by setting NX = N. 00260 00261 Computing MAX */ 00262 i__1 = *lwork / ldwork; 00263 nb = maxMACRO(i__1,1); 00264 nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, 00265 (ftnlen)6, (ftnlen)1); 00266 if (nb < nbmin) { 00267 nx = *n; 00268 } 00269 } 00270 } else { 00271 nx = *n; 00272 } 00273 } else { 00274 nb = 1; 00275 } 00276 00277 if (upper) { 00278 00279 /* Reduce the upper triangle of A. 00280 Columns 1:kk are handled by the unblocked method. */ 00281 00282 kk = *n - (*n - nx + nb - 1) / nb * nb; 00283 i__1 = kk + 1; 00284 i__2 = -nb; 00285 for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 00286 i__2) { 00287 00288 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 00289 matrix W which is needed to update the unreduced part of 00290 the matrix */ 00291 00292 i__3 = i__ + nb - 1; 00293 template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], & 00294 work[1], &ldwork); 00295 00296 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an 00297 update of the form: A := A - V*W' - W*V' */ 00298 00299 i__3 = i__ - 1; 00300 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__), 00301 lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda); 00302 00303 /* Copy superdiagonal elements back into A, and diagonal 00304 elements into D */ 00305 00306 i__3 = i__ + nb - 1; 00307 for (j = i__; j <= i__3; ++j) { 00308 a_ref(j - 1, j) = e[j - 1]; 00309 d__[j] = a_ref(j, j); 00310 /* L10: */ 00311 } 00312 /* L20: */ 00313 } 00314 00315 /* Use unblocked code to reduce the last or only block */ 00316 00317 template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo); 00318 } else { 00319 00320 /* Reduce the lower triangle of A */ 00321 00322 i__2 = *n - nx; 00323 i__1 = nb; 00324 for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { 00325 00326 /* Reduce columns i:i+nb-1 to tridiagonal form and form the 00327 matrix W which is needed to update the unreduced part of 00328 the matrix */ 00329 00330 i__3 = *n - i__ + 1; 00331 template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[ 00332 i__], &work[1], &ldwork); 00333 00334 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using 00335 an update of the form: A := A - V*W' - W*V' */ 00336 00337 i__3 = *n - i__ - nb + 1; 00338 template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb, 00339 i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ + 00340 nb, i__ + nb), lda); 00341 00342 /* Copy subdiagonal elements back into A, and diagonal 00343 elements into D */ 00344 00345 i__3 = i__ + nb - 1; 00346 for (j = i__; j <= i__3; ++j) { 00347 a_ref(j + 1, j) = e[j]; 00348 d__[j] = a_ref(j, j); 00349 /* L30: */ 00350 } 00351 /* L40: */ 00352 } 00353 00354 /* Use unblocked code to reduce the last or only block */ 00355 00356 i__1 = *n - i__ + 1; 00357 template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[ 00358 i__], &iinfo); 00359 } 00360 00361 work[1] = (Treal) lwkopt; 00362 return 0; 00363 00364 /* End of DSYTRD */ 00365 00366 } /* dsytrd_ */ 00367 00368 #undef a_ref 00369 00370 00371 #endif