template_lapack_sygs2.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_SYGS2_HEADER
00036 #define TEMPLATE_LAPACK_SYGS2_HEADER
00037 
00038 #include "template_lapack_common.h"
00039 
00040 template<class Treal>
00041 int template_lapack_sygs2(const integer *itype, const char *uplo, const integer *n, 
00042         Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *
00043         info)
00044 {
00045 /*  -- LAPACK routine (version 3.0) --   
00046        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00047        Courant Institute, Argonne National Lab, and Rice University   
00048        February 29, 1992   
00049 
00050 
00051     Purpose   
00052     =======   
00053 
00054     DSYGS2 reduces a real symmetric-definite generalized eigenproblem   
00055     to standard form.   
00056 
00057     If ITYPE = 1, the problem is A*x = lambda*B*x,   
00058     and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L')   
00059 
00060     If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or   
00061     B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L.   
00062 
00063     B must have been previously factorized as U'*U or L*L' by DPOTRF.   
00064 
00065     Arguments   
00066     =========   
00067 
00068     ITYPE   (input) INTEGER   
00069             = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L');   
00070             = 2 or 3: compute U*A*U' or L'*A*L.   
00071 
00072     UPLO    (input) CHARACTER   
00073             Specifies whether the upper or lower triangular part of the   
00074             symmetric matrix A is stored, and how B has been factorized.   
00075             = 'U':  Upper triangular   
00076             = 'L':  Lower triangular   
00077 
00078     N       (input) INTEGER   
00079             The order of the matrices A and B.  N >= 0.   
00080 
00081     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00082             On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
00083             n by n upper triangular part of A contains the upper   
00084             triangular part of the matrix A, and the strictly lower   
00085             triangular part of A is not referenced.  If UPLO = 'L', the   
00086             leading n by n lower triangular part of A contains the lower   
00087             triangular part of the matrix A, and the strictly upper   
00088             triangular part of A is not referenced.   
00089 
00090             On exit, if INFO = 0, the transformed matrix, stored in the   
00091             same format as A.   
00092 
00093     LDA     (input) INTEGER   
00094             The leading dimension of the array A.  LDA >= max(1,N).   
00095 
00096     B       (input) DOUBLE PRECISION array, dimension (LDB,N)   
00097             The triangular factor from the Cholesky factorization of B,   
00098             as returned by DPOTRF.   
00099 
00100     LDB     (input) INTEGER   
00101             The leading dimension of the array B.  LDB >= max(1,N).   
00102 
00103     INFO    (output) INTEGER   
00104             = 0:  successful exit.   
00105             < 0:  if INFO = -i, the i-th argument had an illegal value.   
00106 
00107     =====================================================================   
00108 
00109 
00110        Test the input parameters.   
00111 
00112        Parameter adjustments */
00113     /* Table of constant values */
00114      Treal c_b6 = -1.;
00115      integer c__1 = 1;
00116      Treal c_b27 = 1.;
00117     
00118     /* System generated locals */
00119     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
00120     Treal d__1;
00121     /* Local variables */
00122      integer k;
00123      logical upper;
00124      Treal ct;
00125      Treal akk, bkk;
00126 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00127 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00128 
00129 
00130     a_dim1 = *lda;
00131     a_offset = 1 + a_dim1 * 1;
00132     a -= a_offset;
00133     b_dim1 = *ldb;
00134     b_offset = 1 + b_dim1 * 1;
00135     b -= b_offset;
00136 
00137     /* Function Body */
00138     *info = 0;
00139     upper = template_blas_lsame(uplo, "U");
00140     if (*itype < 1 || *itype > 3) {
00141         *info = -1;
00142     } else if (! upper && ! template_blas_lsame(uplo, "L")) {
00143         *info = -2;
00144     } else if (*n < 0) {
00145         *info = -3;
00146     } else if (*lda < maxMACRO(1,*n)) {
00147         *info = -5;
00148     } else if (*ldb < maxMACRO(1,*n)) {
00149         *info = -7;
00150     }
00151     if (*info != 0) {
00152         i__1 = -(*info);
00153         template_blas_erbla("SYGS2 ", &i__1);
00154         return 0;
00155     }
00156 
00157     if (*itype == 1) {
00158         if (upper) {
00159 
00160 /*           Compute inv(U')*A*inv(U) */
00161 
00162             i__1 = *n;
00163             for (k = 1; k <= i__1; ++k) {
00164 
00165 /*              Update the upper triangle of A(k:n,k:n) */
00166 
00167                 akk = a_ref(k, k);
00168                 bkk = b_ref(k, k);
00169 /* Computing 2nd power */
00170                 d__1 = bkk;
00171                 akk /= d__1 * d__1;
00172                 a_ref(k, k) = akk;
00173                 if (k < *n) {
00174                     i__2 = *n - k;
00175                     d__1 = 1. / bkk;
00176                     template_blas_scal(&i__2, &d__1, &a_ref(k, k + 1), lda);
00177                     ct = akk * -.5;
00178                     i__2 = *n - k;
00179                     template_blas_axpy(&i__2, &ct, &b_ref(k, k + 1), ldb, &a_ref(k, k + 1)
00180                             , lda);
00181                     i__2 = *n - k;
00182                     template_blas_syr2(uplo, &i__2, &c_b6, &a_ref(k, k + 1), 
00183                                         lda, &b_ref(k, k + 1), ldb, 
00184                                         &a_ref(k + 1, k + 1), lda);
00185                     i__2 = *n - k;
00186                     template_blas_axpy(&i__2, &ct, &b_ref(k, k + 1), ldb, &a_ref(k, k + 1)
00187                             , lda);
00188                     i__2 = *n - k;
00189                     template_blas_trsv(uplo, "Transpose", "Non-unit", &i__2, &b_ref(k + 1,
00190                              k + 1), ldb, &a_ref(k, k + 1), lda);
00191                 }
00192 /* L10: */
00193             }
00194         } else {
00195 
00196 /*           Compute inv(L)*A*inv(L') */
00197 
00198             i__1 = *n;
00199             for (k = 1; k <= i__1; ++k) {
00200 
00201 /*              Update the lower triangle of A(k:n,k:n) */
00202 
00203                 akk = a_ref(k, k);
00204                 bkk = b_ref(k, k);
00205 /* Computing 2nd power */
00206                 d__1 = bkk;
00207                 akk /= d__1 * d__1;
00208                 a_ref(k, k) = akk;
00209                 if (k < *n) {
00210                     i__2 = *n - k;
00211                     d__1 = 1. / bkk;
00212                     template_blas_scal(&i__2, &d__1, &a_ref(k + 1, k), &c__1);
00213                     ct = akk * -.5;
00214                     i__2 = *n - k;
00215                     template_blas_axpy(&i__2, &ct, &b_ref(k + 1, k), &c__1, &a_ref(k + 1, 
00216                             k), &c__1);
00217                     i__2 = *n - k;
00218                     template_blas_syr2(uplo, &i__2, &c_b6, &a_ref(k + 1, k), 
00219                                        &c__1, &b_ref(k + 1, k), 
00220                                        &c__1, &a_ref(k + 1, k + 1), 
00221                                        lda);
00222 
00223                     i__2 = *n - k;
00224                     template_blas_axpy(&i__2, &ct, &b_ref(k + 1, k), &c__1, &a_ref(k + 1, 
00225                             k), &c__1);
00226                     i__2 = *n - k;
00227                     template_blas_trsv(uplo, "No transpose", "Non-unit", &i__2, &b_ref(k 
00228                             + 1, k + 1), ldb, &a_ref(k + 1, k), &c__1);
00229                 }
00230 /* L20: */
00231             }
00232         }
00233     } else {
00234         if (upper) {
00235 
00236 /*           Compute U*A*U' */
00237 
00238             i__1 = *n;
00239             for (k = 1; k <= i__1; ++k) {
00240 
00241 /*              Update the upper triangle of A(1:k,1:k) */
00242 
00243                 akk = a_ref(k, k);
00244                 bkk = b_ref(k, k);
00245                 i__2 = k - 1;
00246                 template_blas_trmv(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], 
00247                         ldb, &a_ref(1, k), &c__1);
00248                 ct = akk * .5;
00249                 i__2 = k - 1;
00250                 template_blas_axpy(&i__2, &ct, &b_ref(1, k), &c__1, &a_ref(1, k), &c__1);
00251                 i__2 = k - 1;
00252                 template_blas_syr2(uplo, &i__2, &c_b27, &a_ref(1, k), &c__1, &b_ref(1, k),
00253                                    &c__1, &a[a_offset], lda);
00254                 i__2 = k - 1;
00255                 template_blas_axpy(&i__2, &ct, &b_ref(1, k), &c__1, &a_ref(1, k), &c__1);
00256                 i__2 = k - 1;
00257                 template_blas_scal(&i__2, &bkk, &a_ref(1, k), &c__1);
00258 /* Computing 2nd power */
00259                 d__1 = bkk;
00260                 a_ref(k, k) = akk * (d__1 * d__1);
00261 /* L30: */
00262             }
00263         } else {
00264 
00265 /*           Compute L'*A*L */
00266 
00267             i__1 = *n;
00268             for (k = 1; k <= i__1; ++k) {
00269 
00270 /*              Update the lower triangle of A(1:k,1:k) */
00271 
00272                 akk = a_ref(k, k);
00273                 bkk = b_ref(k, k);
00274                 i__2 = k - 1;
00275                 template_blas_trmv(uplo, "Transpose", "Non-unit", &i__2, &b[b_offset], 
00276                         ldb, &a_ref(k, 1), lda);
00277                 ct = akk * .5;
00278                 i__2 = k - 1;
00279                 template_blas_axpy(&i__2, &ct, &b_ref(k, 1), ldb, &a_ref(k, 1), lda);
00280                 i__2 = k - 1;
00281                 template_blas_syr2(uplo, &i__2, &c_b27, &a_ref(k, 1), lda, &b_ref(k, 1), 
00282                                    ldb, &a[a_offset], lda);
00283                 i__2 = k - 1;
00284                 template_blas_axpy(&i__2, &ct, &b_ref(k, 1), ldb, &a_ref(k, 1), lda);
00285                 i__2 = k - 1;
00286                 template_blas_scal(&i__2, &bkk, &a_ref(k, 1), lda);
00287 /* Computing 2nd power */
00288                 d__1 = bkk;
00289                 a_ref(k, k) = akk * (d__1 * d__1);
00290 /* L40: */
00291             }
00292         }
00293     }
00294     return 0;
00295 
00296 /*     End of DSYGS2 */
00297 
00298 } /* dsygs2_ */
00299 
00300 #undef b_ref
00301 #undef a_ref
00302 
00303 
00304 #endif

Generated on Mon Sep 17 14:32:56 2012 for ergo by  doxygen 1.4.7