template_blas_trmm.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_TRMM_HEADER
00036 #define TEMPLATE_BLAS_TRMM_HEADER
00037 
00038 #include "template_blas_common.h"
00039 
00040 template<class Treal>
00041 int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, 
00042         const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
00043         lda, Treal *b, const integer *ldb)
00044 {
00045     /* System generated locals */
00046     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00047     /* Local variables */
00048      integer info;
00049      Treal temp;
00050      integer i__, j, k;
00051      logical lside;
00052      integer nrowa;
00053      logical upper;
00054      logical nounit;
00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00057 /*  Purpose   
00058     =======   
00059     DTRMM  performs one of the matrix-matrix operations   
00060        B := alpha*op( A )*B,   or   B := alpha*B*op( A ),   
00061     where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or   
00062     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of   
00063        op( A ) = A   or   op( A ) = A'.   
00064     Parameters   
00065     ==========   
00066     SIDE   - CHARACTER*1.   
00067              On entry,  SIDE specifies whether  op( A ) multiplies B from   
00068              the left or right as follows:   
00069                 SIDE = 'L' or 'l'   B := alpha*op( A )*B.   
00070                 SIDE = 'R' or 'r'   B := alpha*B*op( A ).   
00071              Unchanged on exit.   
00072     UPLO   - CHARACTER*1.   
00073              On entry, UPLO specifies whether the matrix A is an upper or   
00074              lower triangular matrix as follows:   
00075                 UPLO = 'U' or 'u'   A is an upper triangular matrix.   
00076                 UPLO = 'L' or 'l'   A is a lower triangular matrix.   
00077              Unchanged on exit.   
00078     TRANSA - CHARACTER*1.   
00079              On entry, TRANSA specifies the form of op( A ) to be used in   
00080              the matrix multiplication as follows:   
00081                 TRANSA = 'N' or 'n'   op( A ) = A.   
00082                 TRANSA = 'T' or 't'   op( A ) = A'.   
00083                 TRANSA = 'C' or 'c'   op( A ) = A'.   
00084              Unchanged on exit.   
00085     DIAG   - CHARACTER*1.   
00086              On entry, DIAG specifies whether or not A is unit triangular   
00087              as follows:   
00088                 DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
00089                 DIAG = 'N' or 'n'   A is not assumed to be unit   
00090                                     triangular.   
00091              Unchanged on exit.   
00092     M      - INTEGER.   
00093              On entry, M specifies the number of rows of B. M must be at   
00094              least zero.   
00095              Unchanged on exit.   
00096     N      - INTEGER.   
00097              On entry, N specifies the number of columns of B.  N must be   
00098              at least zero.   
00099              Unchanged on exit.   
00100     ALPHA  - DOUBLE PRECISION.   
00101              On entry,  ALPHA specifies the scalar  alpha. When  alpha is   
00102              zero then  A is not referenced and  B need not be set before   
00103              entry.   
00104              Unchanged on exit.   
00105     A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m   
00106              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.   
00107              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k   
00108              upper triangular part of the array  A must contain the upper   
00109              triangular matrix  and the strictly lower triangular part of   
00110              A is not referenced.   
00111              Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k   
00112              lower triangular part of the array  A must contain the lower   
00113              triangular matrix  and the strictly upper triangular part of   
00114              A is not referenced.   
00115              Note that when  DIAG = 'U' or 'u',  the diagonal elements of   
00116              A  are not referenced either,  but are assumed to be  unity.   
00117              Unchanged on exit.   
00118     LDA    - INTEGER.   
00119              On entry, LDA specifies the first dimension of A as declared   
00120              in the calling (sub) program.  When  SIDE = 'L' or 'l'  then   
00121              LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'   
00122              then LDA must be at least max( 1, n ).   
00123              Unchanged on exit.   
00124     B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
00125              Before entry,  the leading  m by n part of the array  B must   
00126              contain the matrix  B,  and  on exit  is overwritten  by the   
00127              transformed matrix.   
00128     LDB    - INTEGER.   
00129              On entry, LDB specifies the first dimension of B as declared   
00130              in  the  calling  (sub)  program.   LDB  must  be  at  least   
00131              max( 1, m ).   
00132              Unchanged on exit.   
00133     Level 3 Blas routine.   
00134     -- Written on 8-February-1989.   
00135        Jack Dongarra, Argonne National Laboratory.   
00136        Iain Duff, AERE Harwell.   
00137        Jeremy Du Croz, Numerical Algorithms Group Ltd.   
00138        Sven Hammarling, Numerical Algorithms Group Ltd.   
00139        Test the input parameters.   
00140        Parameter adjustments */
00141     a_dim1 = *lda;
00142     a_offset = 1 + a_dim1 * 1;
00143     a -= a_offset;
00144     b_dim1 = *ldb;
00145     b_offset = 1 + b_dim1 * 1;
00146     b -= b_offset;
00147     /* Function Body */
00148     lside = template_blas_lsame(side, "L");
00149     if (lside) {
00150         nrowa = *m;
00151     } else {
00152         nrowa = *n;
00153     }
00154     nounit = template_blas_lsame(diag, "N");
00155     upper = template_blas_lsame(uplo, "U");
00156     info = 0;
00157     if (! lside && ! template_blas_lsame(side, "R")) {
00158         info = 1;
00159     } else if (! upper && ! template_blas_lsame(uplo, "L")) {
00160         info = 2;
00161     } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
00162              "T") && ! template_blas_lsame(transa, "C")) {
00163         info = 3;
00164     } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 
00165             "N")) {
00166         info = 4;
00167     } else if (*m < 0) {
00168         info = 5;
00169     } else if (*n < 0) {
00170         info = 6;
00171     } else if (*lda < maxMACRO(1,nrowa)) {
00172         info = 9;
00173     } else if (*ldb < maxMACRO(1,*m)) {
00174         info = 11;
00175     }
00176     if (info != 0) {
00177         template_blas_erbla("TRMM  ", &info);
00178         return 0;
00179     }
00180 /*     Quick return if possible. */
00181     if (*n == 0) {
00182         return 0;
00183     }
00184 /*     And when  alpha.eq.zero. */
00185     if (*alpha == 0.) {
00186         i__1 = *n;
00187         for (j = 1; j <= i__1; ++j) {
00188             i__2 = *m;
00189             for (i__ = 1; i__ <= i__2; ++i__) {
00190                 b_ref(i__, j) = 0.;
00191 /* L10: */
00192             }
00193 /* L20: */
00194         }
00195         return 0;
00196     }
00197 /*     Start the operations. */
00198     if (lside) {
00199         if (template_blas_lsame(transa, "N")) {
00200 /*           Form  B := alpha*A*B. */
00201             if (upper) {
00202                 i__1 = *n;
00203                 for (j = 1; j <= i__1; ++j) {
00204                     i__2 = *m;
00205                     for (k = 1; k <= i__2; ++k) {
00206                         if (b_ref(k, j) != 0.) {
00207                             temp = *alpha * b_ref(k, j);
00208                             i__3 = k - 1;
00209                             for (i__ = 1; i__ <= i__3; ++i__) {
00210                                 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
00211                                         i__, k);
00212 /* L30: */
00213                             }
00214                             if (nounit) {
00215                                 temp *= a_ref(k, k);
00216                             }
00217                             b_ref(k, j) = temp;
00218                         }
00219 /* L40: */
00220                     }
00221 /* L50: */
00222                 }
00223             } else {
00224                 i__1 = *n;
00225                 for (j = 1; j <= i__1; ++j) {
00226                     for (k = *m; k >= 1; --k) {
00227                         if (b_ref(k, j) != 0.) {
00228                             temp = *alpha * b_ref(k, j);
00229                             b_ref(k, j) = temp;
00230                             if (nounit) {
00231                                 b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
00232                             }
00233                             i__2 = *m;
00234                             for (i__ = k + 1; i__ <= i__2; ++i__) {
00235                                 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
00236                                         i__, k);
00237 /* L60: */
00238                             }
00239                         }
00240 /* L70: */
00241                     }
00242 /* L80: */
00243                 }
00244             }
00245         } else {
00246 /*           Form  B := alpha*A'*B. */
00247             if (upper) {
00248                 i__1 = *n;
00249                 for (j = 1; j <= i__1; ++j) {
00250                     for (i__ = *m; i__ >= 1; --i__) {
00251                         temp = b_ref(i__, j);
00252                         if (nounit) {
00253                             temp *= a_ref(i__, i__);
00254                         }
00255                         i__2 = i__ - 1;
00256                         for (k = 1; k <= i__2; ++k) {
00257                             temp += a_ref(k, i__) * b_ref(k, j);
00258 /* L90: */
00259                         }
00260                         b_ref(i__, j) = *alpha * temp;
00261 /* L100: */
00262                     }
00263 /* L110: */
00264                 }
00265             } else {
00266                 i__1 = *n;
00267                 for (j = 1; j <= i__1; ++j) {
00268                     i__2 = *m;
00269                     for (i__ = 1; i__ <= i__2; ++i__) {
00270                         temp = b_ref(i__, j);
00271                         if (nounit) {
00272                             temp *= a_ref(i__, i__);
00273                         }
00274                         i__3 = *m;
00275                         for (k = i__ + 1; k <= i__3; ++k) {
00276                             temp += a_ref(k, i__) * b_ref(k, j);
00277 /* L120: */
00278                         }
00279                         b_ref(i__, j) = *alpha * temp;
00280 /* L130: */
00281                     }
00282 /* L140: */
00283                 }
00284             }
00285         }
00286     } else {
00287         if (template_blas_lsame(transa, "N")) {
00288 /*           Form  B := alpha*B*A. */
00289             if (upper) {
00290                 for (j = *n; j >= 1; --j) {
00291                     temp = *alpha;
00292                     if (nounit) {
00293                         temp *= a_ref(j, j);
00294                     }
00295                     i__1 = *m;
00296                     for (i__ = 1; i__ <= i__1; ++i__) {
00297                         b_ref(i__, j) = temp * b_ref(i__, j);
00298 /* L150: */
00299                     }
00300                     i__1 = j - 1;
00301                     for (k = 1; k <= i__1; ++k) {
00302                         if (a_ref(k, j) != 0.) {
00303                             temp = *alpha * a_ref(k, j);
00304                             i__2 = *m;
00305                             for (i__ = 1; i__ <= i__2; ++i__) {
00306                                 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
00307                                         i__, k);
00308 /* L160: */
00309                             }
00310                         }
00311 /* L170: */
00312                     }
00313 /* L180: */
00314                 }
00315             } else {
00316                 i__1 = *n;
00317                 for (j = 1; j <= i__1; ++j) {
00318                     temp = *alpha;
00319                     if (nounit) {
00320                         temp *= a_ref(j, j);
00321                     }
00322                     i__2 = *m;
00323                     for (i__ = 1; i__ <= i__2; ++i__) {
00324                         b_ref(i__, j) = temp * b_ref(i__, j);
00325 /* L190: */
00326                     }
00327                     i__2 = *n;
00328                     for (k = j + 1; k <= i__2; ++k) {
00329                         if (a_ref(k, j) != 0.) {
00330                             temp = *alpha * a_ref(k, j);
00331                             i__3 = *m;
00332                             for (i__ = 1; i__ <= i__3; ++i__) {
00333                                 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
00334                                         i__, k);
00335 /* L200: */
00336                             }
00337                         }
00338 /* L210: */
00339                     }
00340 /* L220: */
00341                 }
00342             }
00343         } else {
00344 /*           Form  B := alpha*B*A'. */
00345             if (upper) {
00346                 i__1 = *n;
00347                 for (k = 1; k <= i__1; ++k) {
00348                     i__2 = k - 1;
00349                     for (j = 1; j <= i__2; ++j) {
00350                         if (a_ref(j, k) != 0.) {
00351                             temp = *alpha * a_ref(j, k);
00352                             i__3 = *m;
00353                             for (i__ = 1; i__ <= i__3; ++i__) {
00354                                 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
00355                                         i__, k);
00356 /* L230: */
00357                             }
00358                         }
00359 /* L240: */
00360                     }
00361                     temp = *alpha;
00362                     if (nounit) {
00363                         temp *= a_ref(k, k);
00364                     }
00365                     if (temp != 1.) {
00366                         i__2 = *m;
00367                         for (i__ = 1; i__ <= i__2; ++i__) {
00368                             b_ref(i__, k) = temp * b_ref(i__, k);
00369 /* L250: */
00370                         }
00371                     }
00372 /* L260: */
00373                 }
00374             } else {
00375                 for (k = *n; k >= 1; --k) {
00376                     i__1 = *n;
00377                     for (j = k + 1; j <= i__1; ++j) {
00378                         if (a_ref(j, k) != 0.) {
00379                             temp = *alpha * a_ref(j, k);
00380                             i__2 = *m;
00381                             for (i__ = 1; i__ <= i__2; ++i__) {
00382                                 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
00383                                         i__, k);
00384 /* L270: */
00385                             }
00386                         }
00387 /* L280: */
00388                     }
00389                     temp = *alpha;
00390                     if (nounit) {
00391                         temp *= a_ref(k, k);
00392                     }
00393                     if (temp != 1.) {
00394                         i__1 = *m;
00395                         for (i__ = 1; i__ <= i__1; ++i__) {
00396                             b_ref(i__, k) = temp * b_ref(i__, k);
00397 /* L290: */
00398                         }
00399                     }
00400 /* L300: */
00401                 }
00402             }
00403         }
00404     }
00405     return 0;
00406 /*     End of DTRMM . */
00407 } /* dtrmm_ */
00408 #undef b_ref
00409 #undef a_ref
00410 
00411 #endif

Generated on Mon Sep 17 14:30:40 2012 for ergo by  doxygen 1.4.7