template_blas_tpmv.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_TPMV_HEADER
00036 #define TEMPLATE_BLAS_TPMV_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_tpmv(const char *uplo, const char *trans, const char *diag, const integer *n, 
00041         const Treal *ap, Treal *x, const integer *incx)
00042 {
00043     /* System generated locals */
00044     integer i__1, i__2;
00045     /* Local variables */
00046      integer info;
00047      Treal temp;
00048      integer i__, j, k;
00049      integer kk, ix, jx, kx;
00050      logical nounit;
00051 /*  Purpose   
00052     =======   
00053     DTPMV  performs one of the matrix-vector operations   
00054        x := A*x,   or   x := A'*x,   
00055     where x is an n element vector and  A is an n by n unit, or non-unit,   
00056     upper or lower triangular matrix, supplied in packed form.   
00057     Parameters   
00058     ==========   
00059     UPLO   - CHARACTER*1.   
00060              On entry, UPLO specifies whether the matrix is an upper or   
00061              lower triangular matrix as follows:   
00062                 UPLO = 'U' or 'u'   A is an upper triangular matrix.   
00063                 UPLO = 'L' or 'l'   A is a lower triangular matrix.   
00064              Unchanged on exit.   
00065     TRANS  - CHARACTER*1.   
00066              On entry, TRANS specifies the operation to be performed as   
00067              follows:   
00068                 TRANS = 'N' or 'n'   x := A*x.   
00069                 TRANS = 'T' or 't'   x := A'*x.   
00070                 TRANS = 'C' or 'c'   x := A'*x.   
00071              Unchanged on exit.   
00072     DIAG   - CHARACTER*1.   
00073              On entry, DIAG specifies whether or not A is unit   
00074              triangular as follows:   
00075                 DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
00076                 DIAG = 'N' or 'n'   A is not assumed to be unit   
00077                                     triangular.   
00078              Unchanged on exit.   
00079     N      - INTEGER.   
00080              On entry, N specifies the order of the matrix A.   
00081              N must be at least zero.   
00082              Unchanged on exit.   
00083     AP     - DOUBLE PRECISION array of DIMENSION at least   
00084              ( ( n*( n + 1 ) )/2 ).   
00085              Before entry with  UPLO = 'U' or 'u', the array AP must   
00086              contain the upper triangular matrix packed sequentially,   
00087              column by column, so that AP( 1 ) contains a( 1, 1 ),   
00088              AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )   
00089              respectively, and so on.   
00090              Before entry with UPLO = 'L' or 'l', the array AP must   
00091              contain the lower triangular matrix packed sequentially,   
00092              column by column, so that AP( 1 ) contains a( 1, 1 ),   
00093              AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )   
00094              respectively, and so on.   
00095              Note that when  DIAG = 'U' or 'u', the diagonal elements of   
00096              A are not referenced, but are assumed to be unity.   
00097              Unchanged on exit.   
00098     X      - DOUBLE PRECISION array of dimension at least   
00099              ( 1 + ( n - 1 )*abs( INCX ) ).   
00100              Before entry, the incremented array X must contain the n   
00101              element vector x. On exit, X is overwritten with the   
00102              tranformed vector x.   
00103     INCX   - INTEGER.   
00104              On entry, INCX specifies the increment for the elements of   
00105              X. INCX must not be zero.   
00106              Unchanged on exit.   
00107     Level 2 Blas routine.   
00108     -- Written on 22-October-1986.   
00109        Jack Dongarra, Argonne National Lab.   
00110        Jeremy Du Croz, Nag Central Office.   
00111        Sven Hammarling, Nag Central Office.   
00112        Richard Hanson, Sandia National Labs.   
00113        Test the input parameters.   
00114        Parameter adjustments */
00115     --x;
00116     --ap;
00117     /* Initialization added by Elias to get rid of compiler warnings. */
00118     kx = 0;
00119     /* Function Body */
00120     info = 0;
00121     if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) {
00122         info = 1;
00123     } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 
00124             "T") && ! template_blas_lsame(trans, "C")) {
00125         info = 2;
00126     } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 
00127             "N")) {
00128         info = 3;
00129     } else if (*n < 0) {
00130         info = 4;
00131     } else if (*incx == 0) {
00132         info = 7;
00133     }
00134     if (info != 0) {
00135         template_blas_erbla("TPMV  ", &info);
00136         return 0;
00137     }
00138 /*     Quick return if possible. */
00139     if (*n == 0) {
00140         return 0;
00141     }
00142     nounit = template_blas_lsame(diag, "N");
00143 /*     Set up the start point in X if the increment is not unity. This   
00144        will be  ( N - 1 )*INCX  too small for descending loops. */
00145     if (*incx <= 0) {
00146         kx = 1 - (*n - 1) * *incx;
00147     } else if (*incx != 1) {
00148         kx = 1;
00149     }
00150 /*     Start the operations. In this version the elements of AP are   
00151        accessed sequentially with one pass through AP. */
00152     if (template_blas_lsame(trans, "N")) {
00153 /*        Form  x:= A*x. */
00154         if (template_blas_lsame(uplo, "U")) {
00155             kk = 1;
00156             if (*incx == 1) {
00157                 i__1 = *n;
00158                 for (j = 1; j <= i__1; ++j) {
00159                     if (x[j] != 0.) {
00160                         temp = x[j];
00161                         k = kk;
00162                         i__2 = j - 1;
00163                         for (i__ = 1; i__ <= i__2; ++i__) {
00164                             x[i__] += temp * ap[k];
00165                             ++k;
00166 /* L10: */
00167                         }
00168                         if (nounit) {
00169                             x[j] *= ap[kk + j - 1];
00170                         }
00171                     }
00172                     kk += j;
00173 /* L20: */
00174                 }
00175             } else {
00176                 jx = kx;
00177                 i__1 = *n;
00178                 for (j = 1; j <= i__1; ++j) {
00179                     if (x[jx] != 0.) {
00180                         temp = x[jx];
00181                         ix = kx;
00182                         i__2 = kk + j - 2;
00183                         for (k = kk; k <= i__2; ++k) {
00184                             x[ix] += temp * ap[k];
00185                             ix += *incx;
00186 /* L30: */
00187                         }
00188                         if (nounit) {
00189                             x[jx] *= ap[kk + j - 1];
00190                         }
00191                     }
00192                     jx += *incx;
00193                     kk += j;
00194 /* L40: */
00195                 }
00196             }
00197         } else {
00198             kk = *n * (*n + 1) / 2;
00199             if (*incx == 1) {
00200                 for (j = *n; j >= 1; --j) {
00201                     if (x[j] != 0.) {
00202                         temp = x[j];
00203                         k = kk;
00204                         i__1 = j + 1;
00205                         for (i__ = *n; i__ >= i__1; --i__) {
00206                             x[i__] += temp * ap[k];
00207                             --k;
00208 /* L50: */
00209                         }
00210                         if (nounit) {
00211                             x[j] *= ap[kk - *n + j];
00212                         }
00213                     }
00214                     kk -= *n - j + 1;
00215 /* L60: */
00216                 }
00217             } else {
00218                 kx += (*n - 1) * *incx;
00219                 jx = kx;
00220                 for (j = *n; j >= 1; --j) {
00221                     if (x[jx] != 0.) {
00222                         temp = x[jx];
00223                         ix = kx;
00224                         i__1 = kk - (*n - (j + 1));
00225                         for (k = kk; k >= i__1; --k) {
00226                             x[ix] += temp * ap[k];
00227                             ix -= *incx;
00228 /* L70: */
00229                         }
00230                         if (nounit) {
00231                             x[jx] *= ap[kk - *n + j];
00232                         }
00233                     }
00234                     jx -= *incx;
00235                     kk -= *n - j + 1;
00236 /* L80: */
00237                 }
00238             }
00239         }
00240     } else {
00241 /*        Form  x := A'*x. */
00242         if (template_blas_lsame(uplo, "U")) {
00243             kk = *n * (*n + 1) / 2;
00244             if (*incx == 1) {
00245                 for (j = *n; j >= 1; --j) {
00246                     temp = x[j];
00247                     if (nounit) {
00248                         temp *= ap[kk];
00249                     }
00250                     k = kk - 1;
00251                     for (i__ = j - 1; i__ >= 1; --i__) {
00252                         temp += ap[k] * x[i__];
00253                         --k;
00254 /* L90: */
00255                     }
00256                     x[j] = temp;
00257                     kk -= j;
00258 /* L100: */
00259                 }
00260             } else {
00261                 jx = kx + (*n - 1) * *incx;
00262                 for (j = *n; j >= 1; --j) {
00263                     temp = x[jx];
00264                     ix = jx;
00265                     if (nounit) {
00266                         temp *= ap[kk];
00267                     }
00268                     i__1 = kk - j + 1;
00269                     for (k = kk - 1; k >= i__1; --k) {
00270                         ix -= *incx;
00271                         temp += ap[k] * x[ix];
00272 /* L110: */
00273                     }
00274                     x[jx] = temp;
00275                     jx -= *incx;
00276                     kk -= j;
00277 /* L120: */
00278                 }
00279             }
00280         } else {
00281             kk = 1;
00282             if (*incx == 1) {
00283                 i__1 = *n;
00284                 for (j = 1; j <= i__1; ++j) {
00285                     temp = x[j];
00286                     if (nounit) {
00287                         temp *= ap[kk];
00288                     }
00289                     k = kk + 1;
00290                     i__2 = *n;
00291                     for (i__ = j + 1; i__ <= i__2; ++i__) {
00292                         temp += ap[k] * x[i__];
00293                         ++k;
00294 /* L130: */
00295                     }
00296                     x[j] = temp;
00297                     kk += *n - j + 1;
00298 /* L140: */
00299                 }
00300             } else {
00301                 jx = kx;
00302                 i__1 = *n;
00303                 for (j = 1; j <= i__1; ++j) {
00304                     temp = x[jx];
00305                     ix = jx;
00306                     if (nounit) {
00307                         temp *= ap[kk];
00308                     }
00309                     i__2 = kk + *n - j;
00310                     for (k = kk + 1; k <= i__2; ++k) {
00311                         ix += *incx;
00312                         temp += ap[k] * x[ix];
00313 /* L150: */
00314                     }
00315                     x[jx] = temp;
00316                     jx += *incx;
00317                     kk += *n - j + 1;
00318 /* L160: */
00319                 }
00320             }
00321         }
00322     }
00323     return 0;
00324 /*     End of DTPMV . */
00325 } /* dtpmv_ */
00326 
00327 #endif

Generated on Wed Nov 21 09:32:36 2012 for ergo by  doxygen 1.4.7