template_lapack_sygst.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_SYGST_HEADER
00036 #define TEMPLATE_LAPACK_SYGST_HEADER
00037 
00038 #include "template_lapack_sygs2.h"
00039 
00040 template<class Treal>
00041 int template_lapack_sygst(const integer *itype, const char *uplo, const integer *n, 
00042         Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *
00043         info)
00044 {
00045 /*  -- LAPACK routine (version 3.0) --   
00046        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00047        Courant Institute, Argonne National Lab, and Rice University   
00048        September 30, 1994   
00049 
00050 
00051     Purpose   
00052     =======   
00053 
00054     DSYGST reduces a real symmetric-definite generalized eigenproblem   
00055     to standard form.   
00056 
00057     If ITYPE = 1, the problem is A*x = lambda*B*x,   
00058     and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)   
00059 
00060     If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or   
00061     B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.   
00062 
00063     B must have been previously factorized as U**T*U or L*L**T by DPOTRF.   
00064 
00065     Arguments   
00066     =========   
00067 
00068     ITYPE   (input) INTEGER   
00069             = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);   
00070             = 2 or 3: compute U*A*U**T or L**T*A*L.   
00071 
00072     UPLO    (input) CHARACTER   
00073             = 'U':  Upper triangle of A is stored and B is factored as   
00074                     U**T*U;   
00075             = 'L':  Lower triangle of A is stored and B is factored as   
00076                     L*L**T.   
00077 
00078     N       (input) INTEGER   
00079             The order of the matrices A and B.  N >= 0.   
00080 
00081     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00082             On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
00083             N-by-N upper triangular part of A contains the upper   
00084             triangular part of the matrix A, and the strictly lower   
00085             triangular part of A is not referenced.  If UPLO = 'L', the   
00086             leading N-by-N lower triangular part of A contains the lower   
00087             triangular part of the matrix A, and the strictly upper   
00088             triangular part of A is not referenced.   
00089 
00090             On exit, if INFO = 0, the transformed matrix, stored in the   
00091             same format as A.   
00092 
00093     LDA     (input) INTEGER   
00094             The leading dimension of the array A.  LDA >= max(1,N).   
00095 
00096     B       (input) DOUBLE PRECISION array, dimension (LDB,N)   
00097             The triangular factor from the Cholesky factorization of B,   
00098             as returned by DPOTRF.   
00099 
00100     LDB     (input) INTEGER   
00101             The leading dimension of the array B.  LDB >= max(1,N).   
00102 
00103     INFO    (output) INTEGER   
00104             = 0:  successful exit   
00105             < 0:  if INFO = -i, the i-th argument had an illegal value   
00106 
00107     =====================================================================   
00108 
00109 
00110        Test the input parameters.   
00111 
00112        Parameter adjustments */
00113     /* Table of constant values */
00114      integer c__1 = 1;
00115      integer c_n1 = -1;
00116      Treal c_b14 = 1.;
00117      Treal c_b16 = -.5;
00118      Treal c_b19 = -1.;
00119      Treal c_b52 = .5;
00120     
00121     /* System generated locals */
00122     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00123     /* Local variables */
00124      integer k;
00125      logical upper;
00126      integer kb;
00127      integer nb;
00128 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00129 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00130 
00131 
00132     a_dim1 = *lda;
00133     a_offset = 1 + a_dim1 * 1;
00134     a -= a_offset;
00135     b_dim1 = *ldb;
00136     b_offset = 1 + b_dim1 * 1;
00137     b -= b_offset;
00138 
00139     /* Function Body */
00140     *info = 0;
00141     upper = template_blas_lsame(uplo, "U");
00142     if (*itype < 1 || *itype > 3) {
00143         *info = -1;
00144     } else if (! upper && ! template_blas_lsame(uplo, "L")) {
00145         *info = -2;
00146     } else if (*n < 0) {
00147         *info = -3;
00148     } else if (*lda < maxMACRO(1,*n)) {
00149         *info = -5;
00150     } else if (*ldb < maxMACRO(1,*n)) {
00151         *info = -7;
00152     }
00153     if (*info != 0) {
00154         i__1 = -(*info);
00155         template_blas_erbla("SYGST ", &i__1);
00156         return 0;
00157     }
00158 
00159 /*     Quick return if possible */
00160 
00161     if (*n == 0) {
00162         return 0;
00163     }
00164 
00165 /*     Determine the block size for this environment. */
00166 
00167     nb = template_lapack_ilaenv(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
00168             ftnlen)1);
00169 
00170     if (nb <= 1 || nb >= *n) {
00171 
00172 /*        Use unblocked code */
00173 
00174         template_lapack_sygs2(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
00175     } else {
00176 
00177 /*        Use blocked code */
00178 
00179         if (*itype == 1) {
00180             if (upper) {
00181 
00182 /*              Compute inv(U')*A*inv(U) */
00183 
00184                 i__1 = *n;
00185                 i__2 = nb;
00186                 for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
00187 /* Computing MIN */
00188                     i__3 = *n - k + 1;
00189                     kb = minMACRO(i__3,nb);
00190 
00191 /*                 Update the upper triangle of A(k:n,k:n) */
00192 
00193                     template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
00194                              ldb, info);
00195                     if (k + kb <= *n) {
00196                         i__3 = *n - k - kb + 1;
00197                         template_blas_trsm("Left", uplo, "Transpose", "Non-unit", &kb, &
00198                                 i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k 
00199                                 + kb), lda);
00200                         i__3 = *n - k - kb + 1;
00201                         template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
00202                                  lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
00203                                 k, k + kb), lda);
00204                         i__3 = *n - k - kb + 1;
00205                         template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref(
00206                                 k, k + kb), lda, &b_ref(k, k + kb), ldb, &
00207                                 c_b14, &a_ref(k + kb, k + kb), lda);
00208                         i__3 = *n - k - kb + 1;
00209                         template_blas_symm("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
00210                                  lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
00211                                 k, k + kb), lda);
00212                         i__3 = *n - k - kb + 1;
00213                         template_blas_trsm("Right", uplo, "No transpose", "Non-unit", &kb,
00214                                  &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, &
00215                                 a_ref(k, k + kb), lda);
00216                     }
00217 /* L10: */
00218                 }
00219             } else {
00220 
00221 /*              Compute inv(L)*A*inv(L') */
00222 
00223                 i__2 = *n;
00224                 i__1 = nb;
00225                 for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
00226 /* Computing MIN */
00227                     i__3 = *n - k + 1;
00228                     kb = minMACRO(i__3,nb);
00229 
00230 /*                 Update the lower triangle of A(k:n,k:n) */
00231 
00232                     template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
00233                              ldb, info);
00234                     if (k + kb <= *n) {
00235                         i__3 = *n - k - kb + 1;
00236                         template_blas_trsm("Right", uplo, "Transpose", "Non-unit", &i__3, 
00237                                 &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb,
00238                                  k), lda);
00239                         i__3 = *n - k - kb + 1;
00240                         template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
00241                                 , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
00242                                 k + kb, k), lda);
00243                         i__3 = *n - k - kb + 1;
00244                         template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b19, &
00245                                 a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb,
00246                                  &c_b14, &a_ref(k + kb, k + kb), lda);
00247                         i__3 = *n - k - kb + 1;
00248                         template_blas_symm("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
00249                                 , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
00250                                 k + kb, k), lda);
00251                         i__3 = *n - k - kb + 1;
00252                         template_blas_trsm("Left", uplo, "No transpose", "Non-unit", &
00253                                 i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), 
00254                                 ldb, &a_ref(k + kb, k), lda);
00255                     }
00256 /* L20: */
00257                 }
00258             }
00259         } else {
00260             if (upper) {
00261 
00262 /*              Compute U*A*U' */
00263 
00264                 i__1 = *n;
00265                 i__2 = nb;
00266                 for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
00267 /* Computing MIN */
00268                     i__3 = *n - k + 1;
00269                     kb = minMACRO(i__3,nb);
00270 
00271 /*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
00272 
00273                     i__3 = k - 1;
00274                     template_blas_trmm("Left", uplo, "No transpose", "Non-unit", &i__3, &
00275                             kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda);
00276                     i__3 = k - 1;
00277                     template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
00278                             lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
00279                     i__3 = k - 1;
00280                     template_blas_syr2k(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref(
00281                             1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[
00282                             a_offset], lda);
00283                     i__3 = k - 1;
00284                     template_blas_symm("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
00285                             lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
00286                     i__3 = k - 1;
00287                     template_blas_trmm("Right", uplo, "Transpose", "Non-unit", &i__3, &kb,
00288                              &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda);
00289                     template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
00290                              ldb, info);
00291 /* L30: */
00292                 }
00293             } else {
00294 
00295 /*              Compute L'*A*L */
00296 
00297                 i__2 = *n;
00298                 i__1 = nb;
00299                 for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
00300 /* Computing MIN */
00301                     i__3 = *n - k + 1;
00302                     kb = minMACRO(i__3,nb);
00303 
00304 /*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
00305 
00306                     i__3 = k - 1;
00307                     template_blas_trmm("Right", uplo, "No transpose", "Non-unit", &kb, &
00308                             i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), 
00309                             lda);
00310                     i__3 = k - 1;
00311                     template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
00312                             lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
00313                     i__3 = k - 1;
00314                     template_blas_syr2k(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 
00315                             1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], 
00316                             lda);
00317                     i__3 = k - 1;
00318                     template_blas_symm("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
00319                             lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
00320                     i__3 = k - 1;
00321                     template_blas_trmm("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, 
00322                             &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda);
00323                     template_lapack_sygs2(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
00324                              ldb, info);
00325 /* L40: */
00326                 }
00327             }
00328         }
00329     }
00330     return 0;
00331 
00332 /*     End of DSYGST */
00333 
00334 } /* dsygst_ */
00335 
00336 #undef b_ref
00337 #undef a_ref
00338 
00339 
00340 #endif

Generated on Wed Nov 21 09:32:37 2012 for ergo by  doxygen 1.4.7