00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef TEMPLATE_LAPACK_STEIN_HEADER
00036 #define TEMPLATE_LAPACK_STEIN_HEADER
00037
00038
00039 template<class Treal>
00040 int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e,
00041 const integer *m, const Treal *w, const integer *iblock, const integer *isplit,
00042 Treal *z__, const integer *ldz, Treal *work, integer *iwork,
00043 integer *ifail, integer *info)
00044 {
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 integer c__2 = 2;
00142 integer c__1 = 1;
00143 integer c_n1 = -1;
00144
00145
00146 integer z_dim1, z_offset, i__1, i__2, i__3;
00147 Treal d__1, d__2, d__3, d__4, d__5;
00148
00149 integer jblk, nblk;
00150 integer jmax;
00151 integer i__, j;
00152 integer iseed[4], gpind, iinfo;
00153 integer b1;
00154 integer j1;
00155 Treal ortol;
00156 integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
00157 Treal xj;
00158 integer nrmchk;
00159 integer blksiz;
00160 Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol;
00161 integer its;
00162 Treal xjm, ztr, eps1;
00163 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
00164
00165
00166 --d__;
00167 --e;
00168 --w;
00169 --iblock;
00170 --isplit;
00171 z_dim1 = *ldz;
00172 z_offset = 1 + z_dim1 * 1;
00173 z__ -= z_offset;
00174 --work;
00175 --iwork;
00176 --ifail;
00177
00178
00179 ortol = dtpcrt = xjm = onenrm = gpind = 0;
00180
00181 *info = 0;
00182 i__1 = *m;
00183 for (i__ = 1; i__ <= i__1; ++i__) {
00184 ifail[i__] = 0;
00185
00186 }
00187
00188 if (*n < 0) {
00189 *info = -1;
00190 } else if (*m < 0 || *m > *n) {
00191 *info = -4;
00192 } else if (*ldz < maxMACRO(1,*n)) {
00193 *info = -9;
00194 } else {
00195 i__1 = *m;
00196 for (j = 2; j <= i__1; ++j) {
00197 if (iblock[j] < iblock[j - 1]) {
00198 *info = -6;
00199 goto L30;
00200 }
00201 if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
00202 *info = -5;
00203 goto L30;
00204 }
00205
00206 }
00207 L30:
00208 ;
00209 }
00210
00211 if (*info != 0) {
00212 i__1 = -(*info);
00213 template_blas_erbla("STEIN ", &i__1);
00214 return 0;
00215 }
00216
00217
00218
00219 if (*n == 0 || *m == 0) {
00220 return 0;
00221 } else if (*n == 1) {
00222 z___ref(1, 1) = 1.;
00223 return 0;
00224 }
00225
00226
00227
00228 eps = template_lapack_lamch("Precision", (Treal)0);
00229
00230
00231
00232 for (i__ = 1; i__ <= 4; ++i__) {
00233 iseed[i__ - 1] = 1;
00234
00235 }
00236
00237
00238
00239 indrv1 = 0;
00240 indrv2 = indrv1 + *n;
00241 indrv3 = indrv2 + *n;
00242 indrv4 = indrv3 + *n;
00243 indrv5 = indrv4 + *n;
00244
00245
00246
00247 j1 = 1;
00248 i__1 = iblock[*m];
00249 for (nblk = 1; nblk <= i__1; ++nblk) {
00250
00251
00252
00253 if (nblk == 1) {
00254 b1 = 1;
00255 } else {
00256 b1 = isplit[nblk - 1] + 1;
00257 }
00258 bn = isplit[nblk];
00259 blksiz = bn - b1 + 1;
00260 if (blksiz == 1) {
00261 goto L60;
00262 }
00263 gpind = b1;
00264
00265
00266
00267 onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2));
00268
00269 d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1],
00270 absMACRO(d__2));
00271 onenrm = maxMACRO(d__3,d__4);
00272 i__2 = bn - 1;
00273 for (i__ = b1 + 1; i__ <= i__2; ++i__) {
00274
00275 d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[
00276 i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3));
00277 onenrm = maxMACRO(d__4,d__5);
00278
00279 }
00280 ortol = onenrm * .001;
00281
00282 dtpcrt = template_blas_sqrt(.1 / blksiz);
00283
00284
00285
00286 L60:
00287 jblk = 0;
00288 i__2 = *m;
00289 for (j = j1; j <= i__2; ++j) {
00290 if (iblock[j] != nblk) {
00291 j1 = j;
00292 goto L160;
00293 }
00294 ++jblk;
00295 xj = w[j];
00296
00297
00298
00299 if (blksiz == 1) {
00300 work[indrv1 + 1] = 1.;
00301 goto L120;
00302 }
00303
00304
00305
00306
00307 if (jblk > 1) {
00308 eps1 = (d__1 = eps * xj, absMACRO(d__1));
00309 pertol = eps1 * 10.;
00310 sep = xj - xjm;
00311 if (sep < pertol) {
00312 xj = xjm + pertol;
00313 }
00314 }
00315
00316 its = 0;
00317 nrmchk = 0;
00318
00319
00320
00321 template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
00322
00323
00324
00325 template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
00326 i__3 = blksiz - 1;
00327 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
00328 i__3 = blksiz - 1;
00329 template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
00330
00331
00332
00333 tol = 0.;
00334 template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
00335 indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
00336
00337
00338
00339 L70:
00340 ++its;
00341 if (its > 5) {
00342 goto L100;
00343 }
00344
00345
00346
00347
00348 d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1));
00349 scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[
00350 indrv1 + 1], &c__1);
00351 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
00352
00353
00354
00355 template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
00356 work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
00357 indrv1 + 1], &tol, &iinfo);
00358
00359
00360
00361
00362 if (jblk == 1) {
00363 goto L90;
00364 }
00365 if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) {
00366 gpind = j;
00367 }
00368 if (gpind != j) {
00369 i__3 = j - 1;
00370 for (i__ = gpind; i__ <= i__3; ++i__) {
00371 ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(
00372 b1, i__), &c__1);
00373 template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[
00374 indrv1 + 1], &c__1);
00375
00376 }
00377 }
00378
00379
00380
00381 L90:
00382 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
00383 nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1));
00384
00385
00386
00387
00388 if (nrm < dtpcrt) {
00389 goto L70;
00390 }
00391 ++nrmchk;
00392 if (nrmchk < 3) {
00393 goto L70;
00394 }
00395
00396 goto L110;
00397
00398
00399
00400
00401 L100:
00402 ++(*info);
00403 ifail[*info] = j;
00404
00405
00406
00407 L110:
00408 scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1);
00409 jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
00410 if (work[indrv1 + jmax] < 0.) {
00411 scl = -scl;
00412 }
00413 template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
00414 L120:
00415 i__3 = *n;
00416 for (i__ = 1; i__ <= i__3; ++i__) {
00417 z___ref(i__, j) = 0.;
00418
00419 }
00420 i__3 = blksiz;
00421 for (i__ = 1; i__ <= i__3; ++i__) {
00422 z___ref(b1 + i__ - 1, j) = work[indrv1 + i__];
00423
00424 }
00425
00426
00427
00428
00429 xjm = xj;
00430
00431
00432 }
00433 L160:
00434 ;
00435 }
00436
00437 return 0;
00438
00439
00440
00441 }
00442
00443 #undef z___ref
00444
00445
00446 #endif