template_lapack_latrd.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LATRD_HEADER
00036 #define TEMPLATE_LAPACK_LATRD_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *
00041         a, const integer *lda, Treal *e, Treal *tau, Treal *w, 
00042         const integer *ldw)
00043 {
00044 /*  -- LAPACK auxiliary routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        October 31, 1992   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DLATRD reduces NB rows and columns of a real symmetric matrix A to   
00054     symmetric tridiagonal form by an orthogonal similarity   
00055     transformation Q' * A * Q, and returns the matrices V and W which are   
00056     needed to apply the transformation to the unreduced part of A.   
00057 
00058     If UPLO = 'U', DLATRD reduces the last NB rows and columns of a   
00059     matrix, of which the upper triangle is supplied;   
00060     if UPLO = 'L', DLATRD reduces the first NB rows and columns of a   
00061     matrix, of which the lower triangle is supplied.   
00062 
00063     This is an auxiliary routine called by DSYTRD.   
00064 
00065     Arguments   
00066     =========   
00067 
00068     UPLO    (input) CHARACTER   
00069             Specifies whether the upper or lower triangular part of the   
00070             symmetric matrix A is stored:   
00071             = 'U': Upper triangular   
00072             = 'L': Lower triangular   
00073 
00074     N       (input) INTEGER   
00075             The order of the matrix A.   
00076 
00077     NB      (input) INTEGER   
00078             The number of rows and columns to be reduced.   
00079 
00080     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00081             On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
00082             n-by-n upper triangular part of A contains the upper   
00083             triangular part of the matrix A, and the strictly lower   
00084             triangular part of A is not referenced.  If UPLO = 'L', the   
00085             leading n-by-n lower triangular part of A contains the lower   
00086             triangular part of the matrix A, and the strictly upper   
00087             triangular part of A is not referenced.   
00088             On exit:   
00089             if UPLO = 'U', the last NB columns have been reduced to   
00090               tridiagonal form, with the diagonal elements overwriting   
00091               the diagonal elements of A; the elements above the diagonal   
00092               with the array TAU, represent the orthogonal matrix Q as a   
00093               product of elementary reflectors;   
00094             if UPLO = 'L', the first NB columns have been reduced to   
00095               tridiagonal form, with the diagonal elements overwriting   
00096               the diagonal elements of A; the elements below the diagonal   
00097               with the array TAU, represent the  orthogonal matrix Q as a   
00098               product of elementary reflectors.   
00099             See Further Details.   
00100 
00101     LDA     (input) INTEGER   
00102             The leading dimension of the array A.  LDA >= (1,N).   
00103 
00104     E       (output) DOUBLE PRECISION array, dimension (N-1)   
00105             If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal   
00106             elements of the last NB columns of the reduced matrix;   
00107             if UPLO = 'L', E(1:nb) contains the subdiagonal elements of   
00108             the first NB columns of the reduced matrix.   
00109 
00110     TAU     (output) DOUBLE PRECISION array, dimension (N-1)   
00111             The scalar factors of the elementary reflectors, stored in   
00112             TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.   
00113             See Further Details.   
00114 
00115     W       (output) DOUBLE PRECISION array, dimension (LDW,NB)   
00116             The n-by-nb matrix W required to update the unreduced part   
00117             of A.   
00118 
00119     LDW     (input) INTEGER   
00120             The leading dimension of the array W. LDW >= max(1,N).   
00121 
00122     Further Details   
00123     ===============   
00124 
00125     If UPLO = 'U', the matrix Q is represented as a product of elementary   
00126     reflectors   
00127 
00128        Q = H(n) H(n-1) . . . H(n-nb+1).   
00129 
00130     Each H(i) has the form   
00131 
00132        H(i) = I - tau * v * v'   
00133 
00134     where tau is a real scalar, and v is a real vector with   
00135     v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),   
00136     and tau in TAU(i-1).   
00137 
00138     If UPLO = 'L', the matrix Q is represented as a product of elementary   
00139     reflectors   
00140 
00141        Q = H(1) H(2) . . . H(nb).   
00142 
00143     Each H(i) has the form   
00144 
00145        H(i) = I - tau * v * v'   
00146 
00147     where tau is a real scalar, and v is a real vector with   
00148     v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),   
00149     and tau in TAU(i).   
00150 
00151     The elements of the vectors v together form the n-by-nb matrix V   
00152     which is needed, with W, to apply the transformation to the unreduced   
00153     part of the matrix, using a symmetric rank-2k update of the form:   
00154     A := A - V*W' - W*V'.   
00155 
00156     The contents of A on exit are illustrated by the following examples   
00157     with n = 5 and nb = 2:   
00158 
00159     if UPLO = 'U':                       if UPLO = 'L':   
00160 
00161       (  a   a   a   v4  v5 )              (  d                  )   
00162       (      a   a   v4  v5 )              (  1   d              )   
00163       (          a   1   v5 )              (  v1  1   a          )   
00164       (              d   1  )              (  v1  v2  a   a      )   
00165       (                  d  )              (  v1  v2  a   a   a  )   
00166 
00167     where d denotes a diagonal element of the reduced matrix, a denotes   
00168     an element of the original matrix that is unchanged, and vi denotes   
00169     an element of the vector defining H(i).   
00170 
00171     =====================================================================   
00172 
00173 
00174        Quick return if possible   
00175 
00176        Parameter adjustments */
00177     /* Table of constant values */
00178      Treal c_b5 = -1.;
00179      Treal c_b6 = 1.;
00180      integer c__1 = 1;
00181      Treal c_b16 = 0.;
00182     
00183     /* System generated locals */
00184     integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
00185     /* Local variables */
00186      integer i__;
00187      Treal alpha;
00188      integer iw;
00189 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00190 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1]
00191 
00192 
00193     a_dim1 = *lda;
00194     a_offset = 1 + a_dim1 * 1;
00195     a -= a_offset;
00196     --e;
00197     --tau;
00198     w_dim1 = *ldw;
00199     w_offset = 1 + w_dim1 * 1;
00200     w -= w_offset;
00201 
00202     /* Function Body */
00203     if (*n <= 0) {
00204         return 0;
00205     }
00206 
00207     if (template_blas_lsame(uplo, "U")) {
00208 
00209 /*        Reduce last NB columns of upper triangle */
00210 
00211         i__1 = *n - *nb + 1;
00212         for (i__ = *n; i__ >= i__1; --i__) {
00213             iw = i__ - *n + *nb;
00214             if (i__ < *n) {
00215 
00216 /*              Update A(1:i,i) */
00217 
00218                 i__2 = *n - i__;
00219                 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1),
00220                          lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__),
00221                          &c__1);
00222                 i__2 = *n - i__;
00223                 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1), 
00224                         ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__),
00225                          &c__1);
00226             }
00227             if (i__ > 1) {
00228 
00229 /*              Generate elementary reflector H(i) to annihilate   
00230                 A(1:i-2,i) */
00231 
00232                 i__2 = i__ - 1;
00233                 template_lapack_larfg(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, &
00234                         tau[i__ - 1]);
00235                 e[i__ - 1] = a_ref(i__ - 1, i__);
00236                 a_ref(i__ - 1, i__) = 1.;
00237 
00238 /*              Compute W(1:i-1,i) */
00239 
00240                 i__2 = i__ - 1;
00241                 template_blas_symv("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1, 
00242                         i__), &c__1, &c_b16, &w_ref(1, iw), &c__1);
00243                 if (i__ < *n) {
00244                     i__2 = i__ - 1;
00245                     i__3 = *n - i__;
00246                     template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1)
00247                             , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__ 
00248                             + 1, iw), &c__1);
00249                     i__2 = i__ - 1;
00250                     i__3 = *n - i__;
00251                     template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__ 
00252                             + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
00253                             w_ref(1, iw), &c__1);
00254                     i__2 = i__ - 1;
00255                     i__3 = *n - i__;
00256                     template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ + 
00257                             1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref(
00258                             i__ + 1, iw), &c__1);
00259                     i__2 = i__ - 1;
00260                     i__3 = *n - i__;
00261                     template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw 
00262                             + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
00263                             w_ref(1, iw), &c__1);
00264                 }
00265                 i__2 = i__ - 1;
00266                 template_blas_scal(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1);
00267                 i__2 = i__ - 1;
00268                 alpha = tau[i__ - 1] * -.5 * template_blas_dot(&i__2, &w_ref(1, iw), &
00269                         c__1, &a_ref(1, i__), &c__1);
00270                 i__2 = i__ - 1;
00271                 template_blas_axpy(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), &
00272                         c__1);
00273             }
00274 
00275 /* L10: */
00276         }
00277     } else {
00278 
00279 /*        Reduce first NB columns of lower triangle */
00280 
00281         i__1 = *nb;
00282         for (i__ = 1; i__ <= i__1; ++i__) {
00283 
00284 /*           Update A(i:n,i) */
00285 
00286             i__2 = *n - i__ + 1;
00287             i__3 = i__ - 1;
00288             template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, &
00289                     w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1);
00290             i__2 = *n - i__ + 1;
00291             i__3 = i__ - 1;
00292             template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, &
00293                     a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1);
00294             if (i__ < *n) {
00295 
00296 /*              Generate elementary reflector H(i) to annihilate   
00297                 A(i+2:n,i)   
00298 
00299    Computing MIN */
00300                 i__2 = i__ + 2;
00301                 i__3 = *n - i__;
00302                 template_lapack_larfg(&i__3, &a_ref(i__ + 1, i__), &a_ref(minMACRO(i__2,*n), i__)
00303                         , &c__1, &tau[i__]);
00304                 e[i__] = a_ref(i__ + 1, i__);
00305                 a_ref(i__ + 1, i__) = 1.;
00306 
00307 /*              Compute W(i+1:n,i) */
00308 
00309                 i__2 = *n - i__;
00310                 template_blas_symv("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, &
00311                         a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1, 
00312                         i__), &c__1);
00313                 i__2 = *n - i__;
00314                 i__3 = i__ - 1;
00315                 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1), 
00316                         ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 
00317                         i__), &c__1);
00318                 i__2 = *n - i__;
00319                 i__3 = i__ - 1;
00320                 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1)
00321                         , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 
00322                         i__), &c__1);
00323                 i__2 = *n - i__;
00324                 i__3 = i__ - 1;
00325                 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1), 
00326                         lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1, 
00327                         i__), &c__1);
00328                 i__2 = *n - i__;
00329                 i__3 = i__ - 1;
00330                 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1)
00331                         , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1, 
00332                         i__), &c__1);
00333                 i__2 = *n - i__;
00334                 template_blas_scal(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1);
00335                 i__2 = *n - i__;
00336                 alpha = tau[i__] * -.5 * template_blas_dot(&i__2, &w_ref(i__ + 1, i__), &
00337                         c__1, &a_ref(i__ + 1, i__), &c__1);
00338                 i__2 = *n - i__;
00339                 template_blas_axpy(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__ 
00340                         + 1, i__), &c__1);
00341             }
00342 
00343 /* L20: */
00344         }
00345     }
00346 
00347     return 0;
00348 
00349 /*     End of DLATRD */
00350 
00351 } /* dlatrd_ */
00352 
00353 #undef w_ref
00354 #undef a_ref
00355 
00356 
00357 #endif

Generated on Wed Nov 21 09:32:01 2012 for ergo by  doxygen 1.4.7