template_lapack_ormqr.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_ORMQR_HEADER
00036 #define TEMPLATE_LAPACK_ORMQR_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n, 
00041         const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *
00042         c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
00043 {
00044 /*  -- LAPACK routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        June 30, 1999   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DORMQR overwrites the general real M-by-N matrix C with   
00054 
00055                     SIDE = 'L'     SIDE = 'R'   
00056     TRANS = 'N':      Q * C          C * Q   
00057     TRANS = 'T':      Q**T * C       C * Q**T   
00058 
00059     where Q is a real orthogonal matrix defined as the product of k   
00060     elementary reflectors   
00061 
00062           Q = H(1) H(2) . . . H(k)   
00063 
00064     as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N   
00065     if SIDE = 'R'.   
00066 
00067     Arguments   
00068     =========   
00069 
00070     SIDE    (input) CHARACTER*1   
00071             = 'L': apply Q or Q**T from the Left;   
00072             = 'R': apply Q or Q**T from the Right.   
00073 
00074     TRANS   (input) CHARACTER*1   
00075             = 'N':  No transpose, apply Q;   
00076             = 'T':  Transpose, apply Q**T.   
00077 
00078     M       (input) INTEGER   
00079             The number of rows of the matrix C. M >= 0.   
00080 
00081     N       (input) INTEGER   
00082             The number of columns of the matrix C. N >= 0.   
00083 
00084     K       (input) INTEGER   
00085             The number of elementary reflectors whose product defines   
00086             the matrix Q.   
00087             If SIDE = 'L', M >= K >= 0;   
00088             if SIDE = 'R', N >= K >= 0.   
00089 
00090     A       (input) DOUBLE PRECISION array, dimension (LDA,K)   
00091             The i-th column must contain the vector which defines the   
00092             elementary reflector H(i), for i = 1,2,...,k, as returned by   
00093             DGEQRF in the first k columns of its array argument A.   
00094             A is modified by the routine but restored on exit.   
00095 
00096     LDA     (input) INTEGER   
00097             The leading dimension of the array A.   
00098             If SIDE = 'L', LDA >= max(1,M);   
00099             if SIDE = 'R', LDA >= max(1,N).   
00100 
00101     TAU     (input) DOUBLE PRECISION array, dimension (K)   
00102             TAU(i) must contain the scalar factor of the elementary   
00103             reflector H(i), as returned by DGEQRF.   
00104 
00105     C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)   
00106             On entry, the M-by-N matrix C.   
00107             On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.   
00108 
00109     LDC     (input) INTEGER   
00110             The leading dimension of the array C. LDC >= max(1,M).   
00111 
00112     WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
00113             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   
00114 
00115     LWORK   (input) INTEGER   
00116             The dimension of the array WORK.   
00117             If SIDE = 'L', LWORK >= max(1,N);   
00118             if SIDE = 'R', LWORK >= max(1,M).   
00119             For optimum performance LWORK >= N*NB if SIDE = 'L', and   
00120             LWORK >= M*NB if SIDE = 'R', where NB is the optimal   
00121             blocksize.   
00122 
00123             If LWORK = -1, then a workspace query is assumed; the routine   
00124             only calculates the optimal size of the WORK array, returns   
00125             this value as the first entry of the WORK array, and no error   
00126             message related to LWORK is issued by XERBLA.   
00127 
00128     INFO    (output) INTEGER   
00129             = 0:  successful exit   
00130             < 0:  if INFO = -i, the i-th argument had an illegal value   
00131 
00132     =====================================================================   
00133 
00134 
00135        Test the input arguments   
00136 
00137        Parameter adjustments */
00138     /* Table of constant values */
00139      integer c__1 = 1;
00140      integer c_n1 = -1;
00141      integer c__2 = 2;
00142      integer c__65 = 65;
00143     
00144     /* System generated locals */
00145     address a__1[2];
00146     integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4, 
00147             i__5;
00148     char ch__1[2];
00149     /* Local variables */
00150      logical left;
00151      integer i__;
00152      Treal t[4160]      /* was [65][64] */;
00153      integer nbmin, iinfo, i1, i2, i3;
00154      integer ib, ic, jc, nb, mi, ni;
00155      integer nq, nw;
00156      logical notran;
00157      integer ldwork, lwkopt;
00158      logical lquery;
00159      integer iws;
00160 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00161 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
00162 
00163 
00164     a_dim1 = *lda;
00165     a_offset = 1 + a_dim1 * 1;
00166     a -= a_offset;
00167     --tau;
00168     c_dim1 = *ldc;
00169     c_offset = 1 + c_dim1 * 1;
00170     c__ -= c_offset;
00171     --work;
00172 
00173     /* Initialization added by Elias to get rid of compiler warnings. */
00174     lwkopt = 0;
00175     nb = 0;
00176     /* Function Body */
00177     *info = 0;
00178     left = template_blas_lsame(side, "L");
00179     notran = template_blas_lsame(trans, "N");
00180     lquery = *lwork == -1;
00181 
00182 /*     NQ is the order of Q and NW is the minimum dimension of WORK */
00183 
00184     if (left) {
00185         nq = *m;
00186         nw = *n;
00187     } else {
00188         nq = *n;
00189         nw = *m;
00190     }
00191     if (! left && ! template_blas_lsame(side, "R")) {
00192         *info = -1;
00193     } else if (! notran && ! template_blas_lsame(trans, "T")) {
00194         *info = -2;
00195     } else if (*m < 0) {
00196         *info = -3;
00197     } else if (*n < 0) {
00198         *info = -4;
00199     } else if (*k < 0 || *k > nq) {
00200         *info = -5;
00201     } else if (*lda < maxMACRO(1,nq)) {
00202         *info = -7;
00203     } else if (*ldc < maxMACRO(1,*m)) {
00204         *info = -10;
00205     } else if (*lwork < maxMACRO(1,nw) && ! lquery) {
00206         *info = -12;
00207     }
00208 
00209     if (*info == 0) {
00210 
00211 /*        Determine the block size.  NB may be at most NBMAX, where NBMAX   
00212           is used to define the local array T.   
00213 
00214    Computing MIN   
00215    Writing concatenation */
00216         i__3[0] = 1, a__1[0] = side;
00217         i__3[1] = 1, a__1[1] = trans;
00218         template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
00219         i__1 = 64, i__2 = template_lapack_ilaenv(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
00220                 ftnlen)6, (ftnlen)2);
00221         nb = minMACRO(i__1,i__2);
00222         lwkopt = maxMACRO(1,nw) * nb;
00223         work[1] = (Treal) lwkopt;
00224     }
00225 
00226     if (*info != 0) {
00227         i__1 = -(*info);
00228         template_blas_erbla("ORMQR ", &i__1);
00229         return 0;
00230     } else if (lquery) {
00231         return 0;
00232     }
00233 
00234 /*     Quick return if possible */
00235 
00236     if (*m == 0 || *n == 0 || *k == 0) {
00237         work[1] = 1.;
00238         return 0;
00239     }
00240 
00241     nbmin = 2;
00242     ldwork = nw;
00243     if (nb > 1 && nb < *k) {
00244         iws = nw * nb;
00245         if (*lwork < iws) {
00246             nb = *lwork / ldwork;
00247 /* Computing MAX   
00248    Writing concatenation */
00249             i__3[0] = 1, a__1[0] = side;
00250             i__3[1] = 1, a__1[1] = trans;
00251             template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
00252             i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
00253                     ftnlen)6, (ftnlen)2);
00254             nbmin = maxMACRO(i__1,i__2);
00255         }
00256     } else {
00257         iws = nw;
00258     }
00259 
00260     if (nb < nbmin || nb >= *k) {
00261 
00262 /*        Use unblocked code */
00263 
00264         template_lapack_orm2r(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
00265                 c_offset], ldc, &work[1], &iinfo);
00266     } else {
00267 
00268 /*        Use blocked code */
00269 
00270       if ( ( left && ! notran ) || ( ! left && notran ) ) {
00271             i1 = 1;
00272             i2 = *k;
00273             i3 = nb;
00274         } else {
00275             i1 = (*k - 1) / nb * nb + 1;
00276             i2 = 1;
00277             i3 = -nb;
00278         }
00279 
00280         if (left) {
00281             ni = *n;
00282             jc = 1;
00283         } else {
00284             mi = *m;
00285             ic = 1;
00286         }
00287 
00288         i__1 = i2;
00289         i__2 = i3;
00290         for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
00291 /* Computing MIN */
00292             i__4 = nb, i__5 = *k - i__ + 1;
00293             ib = minMACRO(i__4,i__5);
00294 
00295 /*           Form the triangular factor of the block reflector   
00296              H = H(i) H(i+1) . . . H(i+ib-1) */
00297 
00298             i__4 = nq - i__ + 1;
00299             template_lapack_larft("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__), 
00300                     lda, &tau[i__], t, &c__65);
00301             if (left) {
00302 
00303 /*              H or H' is applied to C(i:m,1:n) */
00304 
00305                 mi = *m - i__ + 1;
00306                 ic = i__;
00307             } else {
00308 
00309 /*              H or H' is applied to C(1:m,i:n) */
00310 
00311                 ni = *n - i__ + 1;
00312                 jc = i__;
00313             }
00314 
00315 /*           Apply H or H' */
00316 
00317             template_lapack_larfb(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &
00318                     a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &
00319                     work[1], &ldwork);
00320 /* L10: */
00321         }
00322     }
00323     work[1] = (Treal) lwkopt;
00324     return 0;
00325 
00326 /*     End of DORMQR */
00327 
00328 } /* dormqr_ */
00329 
00330 #undef c___ref
00331 #undef a_ref
00332 
00333 
00334 #endif

Generated on Mon Sep 17 14:30:40 2012 for ergo by  doxygen 1.4.7