template_blas_trsm.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_TRSM_HEADER
00036 #define TEMPLATE_BLAS_TRSM_HEADER
00037 
00038 #include "template_blas_common.h"
00039 
00040 template<class Treal>
00041 int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag, 
00042         const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
00043         lda, Treal *b, const integer *ldb)
00044 {
00045     /* System generated locals */
00046     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00047     /* Local variables */
00048      integer info;
00049      Treal temp;
00050      integer i__, j, k;
00051      logical lside;
00052      integer nrowa;
00053      logical upper;
00054      logical nounit;
00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00057 /*  Purpose   
00058     =======   
00059     DTRSM  solves one of the matrix equations   
00060        op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,   
00061     where alpha is a scalar, X and B are m by n matrices, A is a unit, or   
00062     non-unit,  upper or lower triangular matrix  and  op( A )  is one  of   
00063        op( A ) = A   or   op( A ) = A'.   
00064     The matrix X is overwritten on B.   
00065     Parameters   
00066     ==========   
00067     SIDE   - CHARACTER*1.   
00068              On entry, SIDE specifies whether op( A ) appears on the left   
00069              or right of X as follows:   
00070                 SIDE = 'L' or 'l'   op( A )*X = alpha*B.   
00071                 SIDE = 'R' or 'r'   X*op( A ) = alpha*B.   
00072              Unchanged on exit.   
00073     UPLO   - CHARACTER*1.   
00074              On entry, UPLO specifies whether the matrix A is an upper or   
00075              lower triangular matrix as follows:   
00076                 UPLO = 'U' or 'u'   A is an upper triangular matrix.   
00077                 UPLO = 'L' or 'l'   A is a lower triangular matrix.   
00078              Unchanged on exit.   
00079     TRANSA - CHARACTER*1.   
00080              On entry, TRANSA specifies the form of op( A ) to be used in   
00081              the matrix multiplication as follows:   
00082                 TRANSA = 'N' or 'n'   op( A ) = A.   
00083                 TRANSA = 'T' or 't'   op( A ) = A'.   
00084                 TRANSA = 'C' or 'c'   op( A ) = A'.   
00085              Unchanged on exit.   
00086     DIAG   - CHARACTER*1.   
00087              On entry, DIAG specifies whether or not A is unit triangular   
00088              as follows:   
00089                 DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
00090                 DIAG = 'N' or 'n'   A is not assumed to be unit   
00091                                     triangular.   
00092              Unchanged on exit.   
00093     M      - INTEGER.   
00094              On entry, M specifies the number of rows of B. M must be at   
00095              least zero.   
00096              Unchanged on exit.   
00097     N      - INTEGER.   
00098              On entry, N specifies the number of columns of B.  N must be   
00099              at least zero.   
00100              Unchanged on exit.   
00101     ALPHA  - DOUBLE PRECISION.   
00102              On entry,  ALPHA specifies the scalar  alpha. When  alpha is   
00103              zero then  A is not referenced and  B need not be set before   
00104              entry.   
00105              Unchanged on exit.   
00106     A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m   
00107              when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.   
00108              Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k   
00109              upper triangular part of the array  A must contain the upper   
00110              triangular matrix  and the strictly lower triangular part of   
00111              A is not referenced.   
00112              Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k   
00113              lower triangular part of the array  A must contain the lower   
00114              triangular matrix  and the strictly upper triangular part of   
00115              A is not referenced.   
00116              Note that when  DIAG = 'U' or 'u',  the diagonal elements of   
00117              A  are not referenced either,  but are assumed to be  unity.   
00118              Unchanged on exit.   
00119     LDA    - INTEGER.   
00120              On entry, LDA specifies the first dimension of A as declared   
00121              in the calling (sub) program.  When  SIDE = 'L' or 'l'  then   
00122              LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'   
00123              then LDA must be at least max( 1, n ).   
00124              Unchanged on exit.   
00125     B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).   
00126              Before entry,  the leading  m by n part of the array  B must   
00127              contain  the  right-hand  side  matrix  B,  and  on exit  is   
00128              overwritten by the solution matrix  X.   
00129     LDB    - INTEGER.   
00130              On entry, LDB specifies the first dimension of B as declared   
00131              in  the  calling  (sub)  program.   LDB  must  be  at  least   
00132              max( 1, m ).   
00133              Unchanged on exit.   
00134     Level 3 Blas routine.   
00135     -- Written on 8-February-1989.   
00136        Jack Dongarra, Argonne National Laboratory.   
00137        Iain Duff, AERE Harwell.   
00138        Jeremy Du Croz, Numerical Algorithms Group Ltd.   
00139        Sven Hammarling, Numerical Algorithms Group Ltd.   
00140        Test the input parameters.   
00141        Parameter adjustments */
00142     a_dim1 = *lda;
00143     a_offset = 1 + a_dim1 * 1;
00144     a -= a_offset;
00145     b_dim1 = *ldb;
00146     b_offset = 1 + b_dim1 * 1;
00147     b -= b_offset;
00148     /* Function Body */
00149     lside = template_blas_lsame(side, "L");
00150     if (lside) {
00151         nrowa = *m;
00152     } else {
00153         nrowa = *n;
00154     }
00155     nounit = template_blas_lsame(diag, "N");
00156     upper = template_blas_lsame(uplo, "U");
00157     info = 0;
00158     if (! lside && ! template_blas_lsame(side, "R")) {
00159         info = 1;
00160     } else if (! upper && ! template_blas_lsame(uplo, "L")) {
00161         info = 2;
00162     } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
00163              "T") && ! template_blas_lsame(transa, "C")) {
00164         info = 3;
00165     } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 
00166             "N")) {
00167         info = 4;
00168     } else if (*m < 0) {
00169         info = 5;
00170     } else if (*n < 0) {
00171         info = 6;
00172     } else if (*lda < maxMACRO(1,nrowa)) {
00173         info = 9;
00174     } else if (*ldb < maxMACRO(1,*m)) {
00175         info = 11;
00176     }
00177     if (info != 0) {
00178         template_blas_erbla("TRSM  ", &info);
00179         return 0;
00180     }
00181 /*     Quick return if possible. */
00182     if (*n == 0) {
00183         return 0;
00184     }
00185 /*     And when  alpha.eq.zero. */
00186     if (*alpha == 0.) {
00187         i__1 = *n;
00188         for (j = 1; j <= i__1; ++j) {
00189             i__2 = *m;
00190             for (i__ = 1; i__ <= i__2; ++i__) {
00191                 b_ref(i__, j) = 0.;
00192 /* L10: */
00193             }
00194 /* L20: */
00195         }
00196         return 0;
00197     }
00198 /*     Start the operations. */
00199     if (lside) {
00200         if (template_blas_lsame(transa, "N")) {
00201 /*           Form  B := alpha*inv( A )*B. */
00202             if (upper) {
00203                 i__1 = *n;
00204                 for (j = 1; j <= i__1; ++j) {
00205                     if (*alpha != 1.) {
00206                         i__2 = *m;
00207                         for (i__ = 1; i__ <= i__2; ++i__) {
00208                             b_ref(i__, j) = *alpha * b_ref(i__, j);
00209 /* L30: */
00210                         }
00211                     }
00212                     for (k = *m; k >= 1; --k) {
00213                         if (b_ref(k, j) != 0.) {
00214                             if (nounit) {
00215                                 b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
00216                             }
00217                             i__2 = k - 1;
00218                             for (i__ = 1; i__ <= i__2; ++i__) {
00219                                 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) * 
00220                                         a_ref(i__, k);
00221 /* L40: */
00222                             }
00223                         }
00224 /* L50: */
00225                     }
00226 /* L60: */
00227                 }
00228             } else {
00229                 i__1 = *n;
00230                 for (j = 1; j <= i__1; ++j) {
00231                     if (*alpha != 1.) {
00232                         i__2 = *m;
00233                         for (i__ = 1; i__ <= i__2; ++i__) {
00234                             b_ref(i__, j) = *alpha * b_ref(i__, j);
00235 /* L70: */
00236                         }
00237                     }
00238                     i__2 = *m;
00239                     for (k = 1; k <= i__2; ++k) {
00240                         if (b_ref(k, j) != 0.) {
00241                             if (nounit) {
00242                                 b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
00243                             }
00244                             i__3 = *m;
00245                             for (i__ = k + 1; i__ <= i__3; ++i__) {
00246                                 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) * 
00247                                         a_ref(i__, k);
00248 /* L80: */
00249                             }
00250                         }
00251 /* L90: */
00252                     }
00253 /* L100: */
00254                 }
00255             }
00256         } else {
00257 /*           Form  B := alpha*inv( A' )*B. */
00258             if (upper) {
00259                 i__1 = *n;
00260                 for (j = 1; j <= i__1; ++j) {
00261                     i__2 = *m;
00262                     for (i__ = 1; i__ <= i__2; ++i__) {
00263                         temp = *alpha * b_ref(i__, j);
00264                         i__3 = i__ - 1;
00265                         for (k = 1; k <= i__3; ++k) {
00266                             temp -= a_ref(k, i__) * b_ref(k, j);
00267 /* L110: */
00268                         }
00269                         if (nounit) {
00270                             temp /= a_ref(i__, i__);
00271                         }
00272                         b_ref(i__, j) = temp;
00273 /* L120: */
00274                     }
00275 /* L130: */
00276                 }
00277             } else {
00278                 i__1 = *n;
00279                 for (j = 1; j <= i__1; ++j) {
00280                     for (i__ = *m; i__ >= 1; --i__) {
00281                         temp = *alpha * b_ref(i__, j);
00282                         i__2 = *m;
00283                         for (k = i__ + 1; k <= i__2; ++k) {
00284                             temp -= a_ref(k, i__) * b_ref(k, j);
00285 /* L140: */
00286                         }
00287                         if (nounit) {
00288                             temp /= a_ref(i__, i__);
00289                         }
00290                         b_ref(i__, j) = temp;
00291 /* L150: */
00292                     }
00293 /* L160: */
00294                 }
00295             }
00296         }
00297     } else {
00298         if (template_blas_lsame(transa, "N")) {
00299 /*           Form  B := alpha*B*inv( A ). */
00300             if (upper) {
00301                 i__1 = *n;
00302                 for (j = 1; j <= i__1; ++j) {
00303                     if (*alpha != 1.) {
00304                         i__2 = *m;
00305                         for (i__ = 1; i__ <= i__2; ++i__) {
00306                             b_ref(i__, j) = *alpha * b_ref(i__, j);
00307 /* L170: */
00308                         }
00309                     }
00310                     i__2 = j - 1;
00311                     for (k = 1; k <= i__2; ++k) {
00312                         if (a_ref(k, j) != 0.) {
00313                             i__3 = *m;
00314                             for (i__ = 1; i__ <= i__3; ++i__) {
00315                                 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) * 
00316                                         b_ref(i__, k);
00317 /* L180: */
00318                             }
00319                         }
00320 /* L190: */
00321                     }
00322                     if (nounit) {
00323                         temp = 1. / a_ref(j, j);
00324                         i__2 = *m;
00325                         for (i__ = 1; i__ <= i__2; ++i__) {
00326                             b_ref(i__, j) = temp * b_ref(i__, j);
00327 /* L200: */
00328                         }
00329                     }
00330 /* L210: */
00331                 }
00332             } else {
00333                 for (j = *n; j >= 1; --j) {
00334                     if (*alpha != 1.) {
00335                         i__1 = *m;
00336                         for (i__ = 1; i__ <= i__1; ++i__) {
00337                             b_ref(i__, j) = *alpha * b_ref(i__, j);
00338 /* L220: */
00339                         }
00340                     }
00341                     i__1 = *n;
00342                     for (k = j + 1; k <= i__1; ++k) {
00343                         if (a_ref(k, j) != 0.) {
00344                             i__2 = *m;
00345                             for (i__ = 1; i__ <= i__2; ++i__) {
00346                                 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) * 
00347                                         b_ref(i__, k);
00348 /* L230: */
00349                             }
00350                         }
00351 /* L240: */
00352                     }
00353                     if (nounit) {
00354                         temp = 1. / a_ref(j, j);
00355                         i__1 = *m;
00356                         for (i__ = 1; i__ <= i__1; ++i__) {
00357                             b_ref(i__, j) = temp * b_ref(i__, j);
00358 /* L250: */
00359                         }
00360                     }
00361 /* L260: */
00362                 }
00363             }
00364         } else {
00365 /*           Form  B := alpha*B*inv( A' ). */
00366             if (upper) {
00367                 for (k = *n; k >= 1; --k) {
00368                     if (nounit) {
00369                         temp = 1. / a_ref(k, k);
00370                         i__1 = *m;
00371                         for (i__ = 1; i__ <= i__1; ++i__) {
00372                             b_ref(i__, k) = temp * b_ref(i__, k);
00373 /* L270: */
00374                         }
00375                     }
00376                     i__1 = k - 1;
00377                     for (j = 1; j <= i__1; ++j) {
00378                         if (a_ref(j, k) != 0.) {
00379                             temp = a_ref(j, k);
00380                             i__2 = *m;
00381                             for (i__ = 1; i__ <= i__2; ++i__) {
00382                                 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
00383                                         i__, k);
00384 /* L280: */
00385                             }
00386                         }
00387 /* L290: */
00388                     }
00389                     if (*alpha != 1.) {
00390                         i__1 = *m;
00391                         for (i__ = 1; i__ <= i__1; ++i__) {
00392                             b_ref(i__, k) = *alpha * b_ref(i__, k);
00393 /* L300: */
00394                         }
00395                     }
00396 /* L310: */
00397                 }
00398             } else {
00399                 i__1 = *n;
00400                 for (k = 1; k <= i__1; ++k) {
00401                     if (nounit) {
00402                         temp = 1. / a_ref(k, k);
00403                         i__2 = *m;
00404                         for (i__ = 1; i__ <= i__2; ++i__) {
00405                             b_ref(i__, k) = temp * b_ref(i__, k);
00406 /* L320: */
00407                         }
00408                     }
00409                     i__2 = *n;
00410                     for (j = k + 1; j <= i__2; ++j) {
00411                         if (a_ref(j, k) != 0.) {
00412                             temp = a_ref(j, k);
00413                             i__3 = *m;
00414                             for (i__ = 1; i__ <= i__3; ++i__) {
00415                                 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
00416                                         i__, k);
00417 /* L330: */
00418                             }
00419                         }
00420 /* L340: */
00421                     }
00422                     if (*alpha != 1.) {
00423                         i__2 = *m;
00424                         for (i__ = 1; i__ <= i__2; ++i__) {
00425                             b_ref(i__, k) = *alpha * b_ref(i__, k);
00426 /* L350: */
00427                         }
00428                     }
00429 /* L360: */
00430                 }
00431             }
00432         }
00433     }
00434     return 0;
00435 /*     End of DTRSM . */
00436 } /* dtrsm_ */
00437 #undef b_ref
00438 #undef a_ref
00439 
00440 #endif

Generated on Wed Nov 21 09:32:36 2012 for ergo by  doxygen 1.4.7