template_lapack_orgql.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_ORGQL_HEADER
00036 #define TEMPLATE_LAPACK_ORGQL_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *
00041         a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, 
00042         integer *info)
00043 {
00044 /*  -- LAPACK routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        June 30, 1999   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DORGQL generates an M-by-N real matrix Q with orthonormal columns,   
00054     which is defined as the last N columns of a product of K elementary   
00055     reflectors of order M   
00056 
00057           Q  =  H(k) . . . H(2) H(1)   
00058 
00059     as returned by DGEQLF.   
00060 
00061     Arguments   
00062     =========   
00063 
00064     M       (input) INTEGER   
00065             The number of rows of the matrix Q. M >= 0.   
00066 
00067     N       (input) INTEGER   
00068             The number of columns of the matrix Q. M >= N >= 0.   
00069 
00070     K       (input) INTEGER   
00071             The number of elementary reflectors whose product defines the   
00072             matrix Q. N >= K >= 0.   
00073 
00074     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00075             On entry, the (n-k+i)-th column must contain the vector which   
00076             defines the elementary reflector H(i), for i = 1,2,...,k, as   
00077             returned by DGEQLF in the last k columns of its array   
00078             argument A.   
00079             On exit, the M-by-N matrix Q.   
00080 
00081     LDA     (input) INTEGER   
00082             The first dimension of the array A. LDA >= max(1,M).   
00083 
00084     TAU     (input) DOUBLE PRECISION array, dimension (K)   
00085             TAU(i) must contain the scalar factor of the elementary   
00086             reflector H(i), as returned by DGEQLF.   
00087 
00088     WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
00089             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   
00090 
00091     LWORK   (input) INTEGER   
00092             The dimension of the array WORK. LWORK >= max(1,N).   
00093             For optimum performance LWORK >= N*NB, where NB is the   
00094             optimal blocksize.   
00095 
00096             If LWORK = -1, then a workspace query is assumed; the routine   
00097             only calculates the optimal size of the WORK array, returns   
00098             this value as the first entry of the WORK array, and no error   
00099             message related to LWORK is issued by XERBLA.   
00100 
00101     INFO    (output) INTEGER   
00102             = 0:  successful exit   
00103             < 0:  if INFO = -i, the i-th argument has an illegal value   
00104 
00105     =====================================================================   
00106 
00107 
00108        Test the input arguments   
00109 
00110        Parameter adjustments */
00111     /* Table of constant values */
00112      integer c__1 = 1;
00113      integer c_n1 = -1;
00114      integer c__3 = 3;
00115      integer c__2 = 2;
00116     
00117     /* System generated locals */
00118     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
00119     /* Local variables */
00120      integer i__, j, l, nbmin, iinfo;
00121      integer ib, nb, kk;
00122      integer nx;
00123      integer ldwork, lwkopt;
00124      logical lquery;
00125      integer iws;
00126 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00127 
00128 
00129     a_dim1 = *lda;
00130     a_offset = 1 + a_dim1 * 1;
00131     a -= a_offset;
00132     --tau;
00133     --work;
00134 
00135     /* Function Body */
00136     *info = 0;
00137     nb = template_lapack_ilaenv(&c__1, "DORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
00138     lwkopt = maxMACRO(1,*n) * nb;
00139     work[1] = (Treal) lwkopt;
00140     lquery = *lwork == -1;
00141     if (*m < 0) {
00142         *info = -1;
00143     } else if (*n < 0 || *n > *m) {
00144         *info = -2;
00145     } else if (*k < 0 || *k > *n) {
00146         *info = -3;
00147     } else if (*lda < maxMACRO(1,*m)) {
00148         *info = -5;
00149     } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
00150         *info = -8;
00151     }
00152     if (*info != 0) {
00153         i__1 = -(*info);
00154         template_blas_erbla("ORGQL ", &i__1);
00155         return 0;
00156     } else if (lquery) {
00157         return 0;
00158     }
00159 
00160 /*     Quick return if possible */
00161 
00162     if (*n <= 0) {
00163         work[1] = 1.;
00164         return 0;
00165     }
00166 
00167     nbmin = 2;
00168     nx = 0;
00169     iws = *n;
00170     if (nb > 1 && nb < *k) {
00171 
00172 /*        Determine when to cross over from blocked to unblocked code.   
00173 
00174    Computing MAX */
00175         i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQL", " ", m, n, k, &c_n1, (
00176                 ftnlen)6, (ftnlen)1);
00177         nx = maxMACRO(i__1,i__2);
00178         if (nx < *k) {
00179 
00180 /*           Determine if workspace is large enough for blocked code. */
00181 
00182             ldwork = *n;
00183             iws = ldwork * nb;
00184             if (*lwork < iws) {
00185 
00186 /*              Not enough workspace to use optimal NB:  reduce NB and   
00187                 determine the minimum value of NB. */
00188 
00189                 nb = *lwork / ldwork;
00190 /* Computing MAX */
00191                 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQL", " ", m, n, k, &c_n1,
00192                          (ftnlen)6, (ftnlen)1);
00193                 nbmin = maxMACRO(i__1,i__2);
00194             }
00195         }
00196     }
00197 
00198     if (nb >= nbmin && nb < *k && nx < *k) {
00199 
00200 /*        Use blocked code after the first block.   
00201           The last kk columns are handled by the block method.   
00202 
00203    Computing MIN */
00204         i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb;
00205         kk = minMACRO(i__1,i__2);
00206 
00207 /*        Set A(m-kk+1:m,1:n-kk) to zero. */
00208 
00209         i__1 = *n - kk;
00210         for (j = 1; j <= i__1; ++j) {
00211             i__2 = *m;
00212             for (i__ = *m - kk + 1; i__ <= i__2; ++i__) {
00213                 a_ref(i__, j) = 0.;
00214 /* L10: */
00215             }
00216 /* L20: */
00217         }
00218     } else {
00219         kk = 0;
00220     }
00221 
00222 /*     Use unblocked code for the first or only block. */
00223 
00224     i__1 = *m - kk;
00225     i__2 = *n - kk;
00226     i__3 = *k - kk;
00227     template_lapack_org2l(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo)
00228             ;
00229 
00230     if (kk > 0) {
00231 
00232 /*        Use blocked code */
00233 
00234         i__1 = *k;
00235         i__2 = nb;
00236         for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
00237                 i__2) {
00238 /* Computing MIN */
00239             i__3 = nb, i__4 = *k - i__ + 1;
00240             ib = minMACRO(i__3,i__4);
00241             if (*n - *k + i__ > 1) {
00242 
00243 /*              Form the triangular factor of the block reflector   
00244                 H = H(i+ib-1) . . . H(i+1) H(i) */
00245 
00246                 i__3 = *m - *k + i__ + ib - 1;
00247                 template_lapack_larft("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - *
00248                         k + i__), lda, &tau[i__], &work[1], &ldwork);
00249 
00250 /*              Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
00251 
00252                 i__3 = *m - *k + i__ + ib - 1;
00253                 i__4 = *n - *k + i__ - 1;
00254                 template_lapack_larfb("Left", "No transpose", "Backward", "Columnwise", &
00255                         i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, &
00256                         work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
00257                         ldwork);
00258             }
00259 
00260 /*           Apply H to rows 1:m-k+i+ib-1 of current block */
00261 
00262             i__3 = *m - *k + i__ + ib - 1;
00263             template_lapack_org2l(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__],
00264                      &work[1], &iinfo);
00265 
00266 /*           Set rows m-k+i+ib:m of current block to zero */
00267 
00268             i__3 = *n - *k + i__ + ib - 1;
00269             for (j = *n - *k + i__; j <= i__3; ++j) {
00270                 i__4 = *m;
00271                 for (l = *m - *k + i__ + ib; l <= i__4; ++l) {
00272                     a_ref(l, j) = 0.;
00273 /* L30: */
00274                 }
00275 /* L40: */
00276             }
00277 /* L50: */
00278         }
00279     }
00280 
00281     work[1] = (Treal) iws;
00282     return 0;
00283 
00284 /*     End of DORGQL */
00285 
00286 } /* dorgql_ */
00287 
00288 #undef a_ref
00289 
00290 
00291 #endif

Generated on Wed Nov 21 09:32:37 2012 for ergo by  doxygen 1.4.7