template_blas_symm.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_SYMM_HEADER
00036 #define TEMPLATE_BLAS_SYMM_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_symm(const char *side, const char *uplo, const integer *m, const integer *n, 
00041         const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, 
00042         const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
00043 {
00044     /* System generated locals */
00045     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
00046             i__3;
00047     /* Local variables */
00048      integer info;
00049      Treal temp1, temp2;
00050      integer i__, j, k;
00051      integer nrowa;
00052      logical upper;
00053 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00054 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00055 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
00056 /*  Purpose   
00057     =======   
00058     DSYMM  performs one of the matrix-matrix operations   
00059        C := alpha*A*B + beta*C,   
00060     or   
00061        C := alpha*B*A + beta*C,   
00062     where alpha and beta are scalars,  A is a symmetric matrix and  B and   
00063     C are  m by n matrices.   
00064     Parameters   
00065     ==========   
00066     SIDE   - CHARACTER*1.   
00067              On entry,  SIDE  specifies whether  the  symmetric matrix  A   
00068              appears on the  left or right  in the  operation as follows:   
00069                 SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,   
00070                 SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,   
00071              Unchanged on exit.   
00072     UPLO   - CHARACTER*1.   
00073              On  entry,   UPLO  specifies  whether  the  upper  or  lower   
00074              triangular  part  of  the  symmetric  matrix   A  is  to  be   
00075              referenced as follows:   
00076                 UPLO = 'U' or 'u'   Only the upper triangular part of the   
00077                                     symmetric matrix is to be referenced.   
00078                 UPLO = 'L' or 'l'   Only the lower triangular part of the   
00079                                     symmetric matrix is to be referenced.   
00080              Unchanged on exit.   
00081     M      - INTEGER.   
00082              On entry,  M  specifies the number of rows of the matrix  C.   
00083              M  must be at least zero.   
00084              Unchanged on exit.   
00085     N      - INTEGER.   
00086              On entry, N specifies the number of columns of the matrix C.   
00087              N  must be at least zero.   
00088              Unchanged on exit.   
00089     ALPHA  - DOUBLE PRECISION.   
00090              On entry, ALPHA specifies the scalar alpha.   
00091              Unchanged on exit.   
00092     A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is   
00093              m  when  SIDE = 'L' or 'l'  and is  n otherwise.   
00094              Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of   
00095              the array  A  must contain the  symmetric matrix,  such that   
00096              when  UPLO = 'U' or 'u', the leading m by m upper triangular   
00097              part of the array  A  must contain the upper triangular part   
00098              of the  symmetric matrix and the  strictly  lower triangular   
00099              part of  A  is not referenced,  and when  UPLO = 'L' or 'l',   
00100              the leading  m by m  lower triangular part  of the  array  A   
00101              must  contain  the  lower triangular part  of the  symmetric   
00102              matrix and the  strictly upper triangular part of  A  is not   
00103              referenced.   
00104              Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of   
00105              the array  A  must contain the  symmetric matrix,  such that   
00106              when  UPLO = 'U' or 'u', the leading n by n upper triangular   
00107              part of the array  A  must contain the upper triangular part   
00108              of the  symmetric matrix and the  strictly  lower triangular   
00109              part of  A  is not referenced,  and when  UPLO = 'L' or 'l',   
00110              the leading  n by n  lower triangular part  of the  array  A   
00111              must  contain  the  lower triangular part  of the  symmetric   
00112              matrix and the  strictly upper triangular part of  A  is not   
00113              referenced.   
00114              Unchanged on exit.   
00115     LDA    - INTEGER.   
00116              On entry, LDA specifies the first dimension of A as declared   
00117              in the calling (sub) program.  When  SIDE = 'L' or 'l'  then   
00118              LDA must be at least  max( 1, m ), otherwise  LDA must be at   
00119              least  max( 1, n ).   
00120              Unchanged on exit.   
00121     B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
00122              Before entry, the leading  m by n part of the array  B  must   
00123              contain the matrix B.   
00124              Unchanged on exit.   
00125     LDB    - INTEGER.   
00126              On entry, LDB specifies the first dimension of B as declared   
00127              in  the  calling  (sub)  program.   LDB  must  be  at  least   
00128              max( 1, m ).   
00129              Unchanged on exit.   
00130     BETA   - DOUBLE PRECISION.   
00131              On entry,  BETA  specifies the scalar  beta.  When  BETA  is   
00132              supplied as zero then C need not be set on input.   
00133              Unchanged on exit.   
00134     C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
00135              Before entry, the leading  m by n  part of the array  C must   
00136              contain the matrix  C,  except when  beta  is zero, in which   
00137              case C need not be set on entry.   
00138              On exit, the array  C  is overwritten by the  m by n updated   
00139              matrix.   
00140     LDC    - INTEGER.   
00141              On entry, LDC specifies the first dimension of C as declared   
00142              in  the  calling  (sub)  program.   LDC  must  be  at  least   
00143              max( 1, m ).   
00144              Unchanged on exit.   
00145     Level 3 Blas routine.   
00146     -- Written on 8-February-1989.   
00147        Jack Dongarra, Argonne National Laboratory.   
00148        Iain Duff, AERE Harwell.   
00149        Jeremy Du Croz, Numerical Algorithms Group Ltd.   
00150        Sven Hammarling, Numerical Algorithms Group Ltd.   
00151        Set NROWA as the number of rows of A.   
00152        Parameter adjustments */
00153     a_dim1 = *lda;
00154     a_offset = 1 + a_dim1 * 1;
00155     a -= a_offset;
00156     b_dim1 = *ldb;
00157     b_offset = 1 + b_dim1 * 1;
00158     b -= b_offset;
00159     c_dim1 = *ldc;
00160     c_offset = 1 + c_dim1 * 1;
00161     c__ -= c_offset;
00162     /* Function Body */
00163     if (template_blas_lsame(side, "L")) {
00164         nrowa = *m;
00165     } else {
00166         nrowa = *n;
00167     }
00168     upper = template_blas_lsame(uplo, "U");
00169 /*     Test the input parameters. */
00170     info = 0;
00171     if (! template_blas_lsame(side, "L") && ! template_blas_lsame(side, "R")) {
00172         info = 1;
00173     } else if (! upper && ! template_blas_lsame(uplo, "L")) {
00174         info = 2;
00175     } else if (*m < 0) {
00176         info = 3;
00177     } else if (*n < 0) {
00178         info = 4;
00179     } else if (*lda < maxMACRO(1,nrowa)) {
00180         info = 7;
00181     } else if (*ldb < maxMACRO(1,*m)) {
00182         info = 9;
00183     } else if (*ldc < maxMACRO(1,*m)) {
00184         info = 12;
00185     }
00186     if (info != 0) {
00187         template_blas_erbla("SYMM  ", &info);
00188         return 0;
00189     }
00190 /*     Quick return if possible. */
00191     if (*m == 0 || *n == 0 || ( *alpha == 0. && *beta == 1. ) ) {
00192         return 0;
00193     }
00194 /*     And when  alpha.eq.zero. */
00195     if (*alpha == 0.) {
00196         if (*beta == 0.) {
00197             i__1 = *n;
00198             for (j = 1; j <= i__1; ++j) {
00199                 i__2 = *m;
00200                 for (i__ = 1; i__ <= i__2; ++i__) {
00201                     c___ref(i__, j) = 0.;
00202 /* L10: */
00203                 }
00204 /* L20: */
00205             }
00206         } else {
00207             i__1 = *n;
00208             for (j = 1; j <= i__1; ++j) {
00209                 i__2 = *m;
00210                 for (i__ = 1; i__ <= i__2; ++i__) {
00211                     c___ref(i__, j) = *beta * c___ref(i__, j);
00212 /* L30: */
00213                 }
00214 /* L40: */
00215             }
00216         }
00217         return 0;
00218     }
00219 /*     Start the operations. */
00220     if (template_blas_lsame(side, "L")) {
00221 /*        Form  C := alpha*A*B + beta*C. */
00222         if (upper) {
00223             i__1 = *n;
00224             for (j = 1; j <= i__1; ++j) {
00225                 i__2 = *m;
00226                 for (i__ = 1; i__ <= i__2; ++i__) {
00227                     temp1 = *alpha * b_ref(i__, j);
00228                     temp2 = 0.;
00229                     i__3 = i__ - 1;
00230                     for (k = 1; k <= i__3; ++k) {
00231                         c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__);
00232                         temp2 += b_ref(k, j) * a_ref(k, i__);
00233 /* L50: */
00234                     }
00235                     if (*beta == 0.) {
00236                         c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha * 
00237                                 temp2;
00238                     } else {
00239                         c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * 
00240                                 a_ref(i__, i__) + *alpha * temp2;
00241                     }
00242 /* L60: */
00243                 }
00244 /* L70: */
00245             }
00246         } else {
00247             i__1 = *n;
00248             for (j = 1; j <= i__1; ++j) {
00249                 for (i__ = *m; i__ >= 1; --i__) {
00250                     temp1 = *alpha * b_ref(i__, j);
00251                     temp2 = 0.;
00252                     i__2 = *m;
00253                     for (k = i__ + 1; k <= i__2; ++k) {
00254                         c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__);
00255                         temp2 += b_ref(k, j) * a_ref(k, i__);
00256 /* L80: */
00257                     }
00258                     if (*beta == 0.) {
00259                         c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha * 
00260                                 temp2;
00261                     } else {
00262                         c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * 
00263                                 a_ref(i__, i__) + *alpha * temp2;
00264                     }
00265 /* L90: */
00266                 }
00267 /* L100: */
00268             }
00269         }
00270     } else {
00271 /*        Form  C := alpha*B*A + beta*C. */
00272         i__1 = *n;
00273         for (j = 1; j <= i__1; ++j) {
00274             temp1 = *alpha * a_ref(j, j);
00275             if (*beta == 0.) {
00276                 i__2 = *m;
00277                 for (i__ = 1; i__ <= i__2; ++i__) {
00278                     c___ref(i__, j) = temp1 * b_ref(i__, j);
00279 /* L110: */
00280                 }
00281             } else {
00282                 i__2 = *m;
00283                 for (i__ = 1; i__ <= i__2; ++i__) {
00284                     c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * b_ref(
00285                             i__, j);
00286 /* L120: */
00287                 }
00288             }
00289             i__2 = j - 1;
00290             for (k = 1; k <= i__2; ++k) {
00291                 if (upper) {
00292                     temp1 = *alpha * a_ref(k, j);
00293                 } else {
00294                     temp1 = *alpha * a_ref(j, k);
00295                 }
00296                 i__3 = *m;
00297                 for (i__ = 1; i__ <= i__3; ++i__) {
00298                     c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k);
00299 /* L130: */
00300                 }
00301 /* L140: */
00302             }
00303             i__2 = *n;
00304             for (k = j + 1; k <= i__2; ++k) {
00305                 if (upper) {
00306                     temp1 = *alpha * a_ref(j, k);
00307                 } else {
00308                     temp1 = *alpha * a_ref(k, j);
00309                 }
00310                 i__3 = *m;
00311                 for (i__ = 1; i__ <= i__3; ++i__) {
00312                     c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k);
00313 /* L150: */
00314                 }
00315 /* L160: */
00316             }
00317 /* L170: */
00318         }
00319     }
00320     return 0;
00321 /*     End of DSYMM . */
00322 } /* dsymm_ */
00323 #undef c___ref
00324 #undef b_ref
00325 #undef a_ref
00326 
00327 #endif

Generated on Wed Nov 21 09:32:01 2012 for ergo by  doxygen 1.4.7