template_blas_spmv.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_SPMV_HEADER
00036 #define TEMPLATE_BLAS_SPMV_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_spmv(const char *uplo, const integer *n, const Treal *alpha, 
00041         Treal *ap, const Treal *x, const integer *incx, const Treal *beta, 
00042         Treal *y, const integer *incy)
00043 {
00044     /* System generated locals */
00045     integer i__1, i__2;
00046     /* Local variables */
00047      integer info;
00048      Treal temp1, temp2;
00049      integer i__, j, k;
00050      integer kk, ix, iy, jx, jy, kx, ky;
00051 /*  Purpose   
00052     =======   
00053     DSPMV  performs the matrix-vector operation   
00054        y := alpha*A*x + beta*y,   
00055     where alpha and beta are scalars, x and y are n element vectors and   
00056     A is an n by n symmetric matrix, supplied in packed form.   
00057     Parameters   
00058     ==========   
00059     UPLO   - CHARACTER*1.   
00060              On entry, UPLO specifies whether the upper or lower   
00061              triangular part of the matrix A is supplied in the packed   
00062              array AP as follows:   
00063                 UPLO = 'U' or 'u'   The upper triangular part of A is   
00064                                     supplied in AP.   
00065                 UPLO = 'L' or 'l'   The lower triangular part of A is   
00066                                     supplied in AP.   
00067              Unchanged on exit.   
00068     N      - INTEGER.   
00069              On entry, N specifies the order of the matrix A.   
00070              N must be at least zero.   
00071              Unchanged on exit.   
00072     ALPHA  - DOUBLE PRECISION.   
00073              On entry, ALPHA specifies the scalar alpha.   
00074              Unchanged on exit.   
00075     AP     - DOUBLE PRECISION array of DIMENSION at least   
00076              ( ( n*( n + 1 ) )/2 ).   
00077              Before entry with UPLO = 'U' or 'u', the array AP must   
00078              contain the upper triangular part of the symmetric matrix   
00079              packed sequentially, column by column, so that AP( 1 )   
00080              contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )   
00081              and a( 2, 2 ) respectively, and so on.   
00082              Before entry with UPLO = 'L' or 'l', the array AP must   
00083              contain the lower triangular part of the symmetric matrix   
00084              packed sequentially, column by column, so that AP( 1 )   
00085              contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )   
00086              and a( 3, 1 ) respectively, and so on.   
00087              Unchanged on exit.   
00088     X      - DOUBLE PRECISION array of dimension at least   
00089              ( 1 + ( n - 1 )*abs( INCX ) ).   
00090              Before entry, the incremented array X must contain the n   
00091              element vector x.   
00092              Unchanged on exit.   
00093     INCX   - INTEGER.   
00094              On entry, INCX specifies the increment for the elements of   
00095              X. INCX must not be zero.   
00096              Unchanged on exit.   
00097     BETA   - DOUBLE PRECISION.   
00098              On entry, BETA specifies the scalar beta. When BETA is   
00099              supplied as zero then Y need not be set on input.   
00100              Unchanged on exit.   
00101     Y      - DOUBLE PRECISION array of dimension at least   
00102              ( 1 + ( n - 1 )*abs( INCY ) ).   
00103              Before entry, the incremented array Y must contain the n   
00104              element vector y. On exit, Y is overwritten by the updated   
00105              vector y.   
00106     INCY   - INTEGER.   
00107              On entry, INCY specifies the increment for the elements of   
00108              Y. INCY must not be zero.   
00109              Unchanged on exit.   
00110     Level 2 Blas routine.   
00111     -- Written on 22-October-1986.   
00112        Jack Dongarra, Argonne National Lab.   
00113        Jeremy Du Croz, Nag Central Office.   
00114        Sven Hammarling, Nag Central Office.   
00115        Richard Hanson, Sandia National Labs.   
00116        Test the input parameters.   
00117        Parameter adjustments */
00118     --y;
00119     --x;
00120     --ap;
00121     /* Function Body */
00122     info = 0;
00123     if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) {
00124         info = 1;
00125     } else if (*n < 0) {
00126         info = 2;
00127     } else if (*incx == 0) {
00128         info = 6;
00129     } else if (*incy == 0) {
00130         info = 9;
00131     }
00132     if (info != 0) {
00133         template_blas_erbla("SPMV  ", &info);
00134         return 0;
00135     }
00136 /*     Quick return if possible. */
00137     if (*n == 0 || ( *alpha == 0. && *beta == 1. ) ) {
00138         return 0;
00139     }
00140 /*     Set up the start points in  X  and  Y. */
00141     if (*incx > 0) {
00142         kx = 1;
00143     } else {
00144         kx = 1 - (*n - 1) * *incx;
00145     }
00146     if (*incy > 0) {
00147         ky = 1;
00148     } else {
00149         ky = 1 - (*n - 1) * *incy;
00150     }
00151 /*     Start the operations. In this version the elements of the array AP   
00152        are accessed sequentially with one pass through AP.   
00153        First form  y := beta*y. */
00154     if (*beta != 1.) {
00155         if (*incy == 1) {
00156             if (*beta == 0.) {
00157                 i__1 = *n;
00158                 for (i__ = 1; i__ <= i__1; ++i__) {
00159                     y[i__] = 0.;
00160 /* L10: */
00161                 }
00162             } else {
00163                 i__1 = *n;
00164                 for (i__ = 1; i__ <= i__1; ++i__) {
00165                     y[i__] = *beta * y[i__];
00166 /* L20: */
00167                 }
00168             }
00169         } else {
00170             iy = ky;
00171             if (*beta == 0.) {
00172                 i__1 = *n;
00173                 for (i__ = 1; i__ <= i__1; ++i__) {
00174                     y[iy] = 0.;
00175                     iy += *incy;
00176 /* L30: */
00177                 }
00178             } else {
00179                 i__1 = *n;
00180                 for (i__ = 1; i__ <= i__1; ++i__) {
00181                     y[iy] = *beta * y[iy];
00182                     iy += *incy;
00183 /* L40: */
00184                 }
00185             }
00186         }
00187     }
00188     if (*alpha == 0.) {
00189         return 0;
00190     }
00191     kk = 1;
00192     if (template_blas_lsame(uplo, "U")) {
00193 /*        Form  y  when AP contains the upper triangle. */
00194         if (*incx == 1 && *incy == 1) {
00195             i__1 = *n;
00196             for (j = 1; j <= i__1; ++j) {
00197                 temp1 = *alpha * x[j];
00198                 temp2 = 0.;
00199                 k = kk;
00200                 i__2 = j - 1;
00201                 for (i__ = 1; i__ <= i__2; ++i__) {
00202                     y[i__] += temp1 * ap[k];
00203                     temp2 += ap[k] * x[i__];
00204                     ++k;
00205 /* L50: */
00206                 }
00207                 y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
00208                 kk += j;
00209 /* L60: */
00210             }
00211         } else {
00212             jx = kx;
00213             jy = ky;
00214             i__1 = *n;
00215             for (j = 1; j <= i__1; ++j) {
00216                 temp1 = *alpha * x[jx];
00217                 temp2 = 0.;
00218                 ix = kx;
00219                 iy = ky;
00220                 i__2 = kk + j - 2;
00221                 for (k = kk; k <= i__2; ++k) {
00222                     y[iy] += temp1 * ap[k];
00223                     temp2 += ap[k] * x[ix];
00224                     ix += *incx;
00225                     iy += *incy;
00226 /* L70: */
00227                 }
00228                 y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
00229                 jx += *incx;
00230                 jy += *incy;
00231                 kk += j;
00232 /* L80: */
00233             }
00234         }
00235     } else {
00236 /*        Form  y  when AP contains the lower triangle. */
00237         if (*incx == 1 && *incy == 1) {
00238             i__1 = *n;
00239             for (j = 1; j <= i__1; ++j) {
00240                 temp1 = *alpha * x[j];
00241                 temp2 = 0.;
00242                 y[j] += temp1 * ap[kk];
00243                 k = kk + 1;
00244                 i__2 = *n;
00245                 for (i__ = j + 1; i__ <= i__2; ++i__) {
00246                     y[i__] += temp1 * ap[k];
00247                     temp2 += ap[k] * x[i__];
00248                     ++k;
00249 /* L90: */
00250                 }
00251                 y[j] += *alpha * temp2;
00252                 kk += *n - j + 1;
00253 /* L100: */
00254             }
00255         } else {
00256             jx = kx;
00257             jy = ky;
00258             i__1 = *n;
00259             for (j = 1; j <= i__1; ++j) {
00260                 temp1 = *alpha * x[jx];
00261                 temp2 = 0.;
00262                 y[jy] += temp1 * ap[kk];
00263                 ix = jx;
00264                 iy = jy;
00265                 i__2 = kk + *n - j;
00266                 for (k = kk + 1; k <= i__2; ++k) {
00267                     ix += *incx;
00268                     iy += *incy;
00269                     y[iy] += temp1 * ap[k];
00270                     temp2 += ap[k] * x[ix];
00271 /* L110: */
00272                 }
00273                 y[jy] += *alpha * temp2;
00274                 jx += *incx;
00275                 jy += *incy;
00276                 kk += *n - j + 1;
00277 /* L120: */
00278             }
00279         }
00280     }
00281     return 0;
00282 /*     End of DSPMV . */
00283 } /* dspmv_ */
00284 
00285 #endif

Generated on Wed Nov 21 09:32:01 2012 for ergo by  doxygen 1.4.7