template_lapack_orgqr.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_ORGQR_HEADER
00036 #define TEMPLATE_LAPACK_ORGQR_HEADER
00037 
00038 #include "template_lapack_common.h"
00039 
00040 template<class Treal>
00041 int template_lapack_orgqr(
00042                           const integer *m, 
00043                           const integer *n, 
00044                           const integer *k, 
00045                           Treal * a, 
00046                           const integer *lda, 
00047                           const Treal *tau, 
00048                           Treal *work, 
00049                           const integer *lwork, 
00050                           integer *info
00051                           )
00052 {
00053 /*  -- LAPACK routine (version 3.0) --   
00054        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00055        Courant Institute, Argonne National Lab, and Rice University   
00056        June 30, 1999   
00057 
00058 
00059     Purpose   
00060     =======   
00061 
00062     DORGQR generates an M-by-N real matrix Q with orthonormal columns,   
00063     which is defined as the first N columns of a product of K elementary   
00064     reflectors of order M   
00065 
00066           Q  =  H(1) H(2) . . . H(k)   
00067 
00068     as returned by DGEQRF.   
00069 
00070     Arguments   
00071     =========   
00072 
00073     M       (input) INTEGER   
00074             The number of rows of the matrix Q. M >= 0.   
00075 
00076     N       (input) INTEGER   
00077             The number of columns of the matrix Q. M >= N >= 0.   
00078 
00079     K       (input) INTEGER   
00080             The number of elementary reflectors whose product defines the   
00081             matrix Q. N >= K >= 0.   
00082 
00083     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00084             On entry, the i-th column must contain the vector which   
00085             defines the elementary reflector H(i), for i = 1,2,...,k, as   
00086             returned by DGEQRF in the first k columns of its array   
00087             argument A.   
00088             On exit, the M-by-N matrix Q.   
00089 
00090     LDA     (input) INTEGER   
00091             The first dimension of the array A. LDA >= max(1,M).   
00092 
00093     TAU     (input) DOUBLE PRECISION array, dimension (K)   
00094             TAU(i) must contain the scalar factor of the elementary   
00095             reflector H(i), as returned by DGEQRF.   
00096 
00097     WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
00098             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   
00099 
00100     LWORK   (input) INTEGER   
00101             The dimension of the array WORK. LWORK >= max(1,N).   
00102             For optimum performance LWORK >= N*NB, where NB is the   
00103             optimal blocksize.   
00104 
00105             If LWORK = -1, then a workspace query is assumed; the routine   
00106             only calculates the optimal size of the WORK array, returns   
00107             this value as the first entry of the WORK array, and no error   
00108             message related to LWORK is issued by XERBLA.   
00109 
00110     INFO    (output) INTEGER   
00111             = 0:  successful exit   
00112             < 0:  if INFO = -i, the i-th argument has an illegal value   
00113 
00114     =====================================================================   
00115 
00116 
00117        Test the input arguments   
00118 
00119        Parameter adjustments */
00120     /* Table of constant values */
00121      integer c__1 = 1;
00122      integer c_n1 = -1;
00123      integer c__3 = 3;
00124      integer c__2 = 2;
00125     
00126     /* System generated locals */
00127     integer a_dim1, a_offset, i__1, i__2, i__3;
00128     /* Local variables */
00129      integer i__, j, l, nbmin, iinfo;
00130      integer ib, nb, ki, kk;
00131      integer nx;
00132      integer ldwork, lwkopt;
00133      logical lquery;
00134      integer iws;
00135 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00136 
00137 
00138     a_dim1 = *lda;
00139     a_offset = 1 + a_dim1 * 1;
00140     a -= a_offset;
00141     --tau;
00142     --work;
00143 
00144     /* Initialization added by Elias to get rid of compiler warnings. */
00145     ki = 0;
00146     /* Function Body */
00147     *info = 0;
00148     nb = template_lapack_ilaenv(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
00149     lwkopt = maxMACRO(1,*n) * nb;
00150     work[1] = (Treal) lwkopt;
00151     lquery = *lwork == -1;
00152     if (*m < 0) {
00153         *info = -1;
00154     } else if (*n < 0 || *n > *m) {
00155         *info = -2;
00156     } else if (*k < 0 || *k > *n) {
00157         *info = -3;
00158     } else if (*lda < maxMACRO(1,*m)) {
00159         *info = -5;
00160     } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
00161         *info = -8;
00162     }
00163     if (*info != 0) {
00164         i__1 = -(*info);
00165         template_blas_erbla("ORGQR ", &i__1);
00166         return 0;
00167     } else if (lquery) {
00168         return 0;
00169     }
00170 
00171 /*     Quick return if possible */
00172 
00173     if (*n <= 0) {
00174         work[1] = 1.;
00175         return 0;
00176     }
00177 
00178     nbmin = 2;
00179     nx = 0;
00180     iws = *n;
00181     if (nb > 1 && nb < *k) {
00182 
00183 /*        Determine when to cross over from blocked to unblocked code.   
00184 
00185    Computing MAX */
00186         i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
00187                 ftnlen)6, (ftnlen)1);
00188         nx = maxMACRO(i__1,i__2);
00189         if (nx < *k) {
00190 
00191 /*           Determine if workspace is large enough for blocked code. */
00192 
00193             ldwork = *n;
00194             iws = ldwork * nb;
00195             if (*lwork < iws) {
00196 
00197 /*              Not enough workspace to use optimal NB:  reduce NB and   
00198                 determine the minimum value of NB. */
00199 
00200                 nb = *lwork / ldwork;
00201 /* Computing MAX */
00202                 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQR", " ", m, n, k, &c_n1,
00203                          (ftnlen)6, (ftnlen)1);
00204                 nbmin = maxMACRO(i__1,i__2);
00205             }
00206         }
00207     }
00208 
00209     if (nb >= nbmin && nb < *k && nx < *k) {
00210 
00211 /*        Use blocked code after the last block.   
00212           The first kk columns are handled by the block method. */
00213 
00214         ki = (*k - nx - 1) / nb * nb;
00215 /* Computing MIN */
00216         i__1 = *k, i__2 = ki + nb;
00217         kk = minMACRO(i__1,i__2);
00218 
00219 /*        Set A(1:kk,kk+1:n) to zero. */
00220 
00221         i__1 = *n;
00222         for (j = kk + 1; j <= i__1; ++j) {
00223             i__2 = kk;
00224             for (i__ = 1; i__ <= i__2; ++i__) {
00225                 a_ref(i__, j) = 0.;
00226 /* L10: */
00227             }
00228 /* L20: */
00229         }
00230     } else {
00231         kk = 0;
00232     }
00233 
00234 /*     Use unblocked code for the last or only block. */
00235 
00236     if (kk < *n) {
00237         i__1 = *m - kk;
00238         i__2 = *n - kk;
00239         i__3 = *k - kk;
00240         template_lapack_org2r(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1]
00241                 , &work[1], &iinfo);
00242     }
00243 
00244     if (kk > 0) {
00245 
00246 /*        Use blocked code */
00247 
00248         i__1 = -nb;
00249         for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
00250 /* Computing MIN */
00251             i__2 = nb, i__3 = *k - i__ + 1;
00252             ib = minMACRO(i__2,i__3);
00253             if (i__ + ib <= *n) {
00254 
00255 /*              Form the triangular factor of the block reflector   
00256                 H = H(i) H(i+1) . . . H(i+ib-1) */
00257 
00258                 i__2 = *m - i__ + 1;
00259                 template_lapack_larft("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__),
00260                          lda, &tau[i__], &work[1], &ldwork);
00261 
00262 /*              Apply H to A(i:m,i+ib:n) from the left */
00263 
00264                 i__2 = *m - i__ + 1;
00265                 i__3 = *n - i__ - ib + 1;
00266                 template_lapack_larfb("Left", "No transpose", "Forward", "Columnwise", &
00267                         i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], &
00268                         ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], &
00269                         ldwork);
00270             }
00271 
00272 /*           Apply H to rows i:m of current block */
00273 
00274             i__2 = *m - i__ + 1;
00275             template_lapack_org2r(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[
00276                     1], &iinfo);
00277 
00278 /*           Set rows 1:i-1 of current block to zero */
00279 
00280             i__2 = i__ + ib - 1;
00281             for (j = i__; j <= i__2; ++j) {
00282                 i__3 = i__ - 1;
00283                 for (l = 1; l <= i__3; ++l) {
00284                     a_ref(l, j) = 0.;
00285 /* L30: */
00286                 }
00287 /* L40: */
00288             }
00289 /* L50: */
00290         }
00291     }
00292 
00293     work[1] = (Treal) iws;
00294     return 0;
00295 
00296 /*     End of DORGQR */
00297 
00298 } /* dorgqr_ */
00299 
00300 #undef a_ref
00301 
00302 
00303 #endif

Generated on Wed Nov 21 09:32:11 2012 for ergo by  doxygen 1.4.7