00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER
00036 #define TEMPLATE_LAPACK_GGBAL_HEADER
00037
00038
00039 template<class Treal>
00040 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
00041 lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi,
00042 Treal *lscale, Treal *rscale, Treal *work, integer *
00043 info)
00044 {
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 integer c__1 = 1;
00144 Treal c_b34 = 10.;
00145 Treal c_b70 = .5;
00146
00147
00148 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
00149 Treal d__1, d__2, d__3;
00150
00151 integer lcab;
00152 Treal beta, coef;
00153 integer irab, lrab;
00154 Treal basl, cmax;
00155 Treal coef2, coef5;
00156 integer i__, j, k, l, m;
00157 Treal gamma, t, alpha;
00158 Treal sfmin, sfmax;
00159 integer iflow;
00160 integer kount, jc;
00161 Treal ta, tb, tc;
00162 integer ir, it;
00163 Treal ew;
00164 integer nr;
00165 Treal pgamma;
00166 integer lsfmin, lsfmax, ip1, jp1, lm1;
00167 Treal cab, rab, ewc, cor, sum;
00168 integer nrp2, icab;
00169 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00170 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00171
00172
00173 a_dim1 = *lda;
00174 a_offset = 1 + a_dim1 * 1;
00175 a -= a_offset;
00176 b_dim1 = *ldb;
00177 b_offset = 1 + b_dim1 * 1;
00178 b -= b_offset;
00179 --lscale;
00180 --rscale;
00181 --work;
00182
00183
00184 pgamma = 0;
00185
00186 *info = 0;
00187 if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S")
00188 && ! template_blas_lsame(job, "B")) {
00189 *info = -1;
00190 } else if (*n < 0) {
00191 *info = -2;
00192 } else if (*lda < maxMACRO(1,*n)) {
00193 *info = -4;
00194 } else if (*ldb < maxMACRO(1,*n)) {
00195 *info = -5;
00196 }
00197 if (*info != 0) {
00198 i__1 = -(*info);
00199 template_blas_erbla("GGBAL ", &i__1);
00200 return 0;
00201 }
00202
00203 k = 1;
00204 l = *n;
00205
00206
00207
00208 if (*n == 0) {
00209 return 0;
00210 }
00211
00212 if (template_blas_lsame(job, "N")) {
00213 *ilo = 1;
00214 *ihi = *n;
00215 i__1 = *n;
00216 for (i__ = 1; i__ <= i__1; ++i__) {
00217 lscale[i__] = 1.;
00218 rscale[i__] = 1.;
00219
00220 }
00221 return 0;
00222 }
00223
00224 if (k == l) {
00225 *ilo = 1;
00226 *ihi = 1;
00227 lscale[1] = 1.;
00228 rscale[1] = 1.;
00229 return 0;
00230 }
00231
00232 if (template_blas_lsame(job, "S")) {
00233 goto L190;
00234 }
00235
00236 goto L30;
00237
00238
00239
00240
00241
00242 L20:
00243 l = lm1;
00244 if (l != 1) {
00245 goto L30;
00246 }
00247
00248 rscale[1] = 1.;
00249 lscale[1] = 1.;
00250 goto L190;
00251
00252 L30:
00253 lm1 = l - 1;
00254 for (i__ = l; i__ >= 1; --i__) {
00255 i__1 = lm1;
00256 for (j = 1; j <= i__1; ++j) {
00257 jp1 = j + 1;
00258 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00259 goto L50;
00260 }
00261
00262 }
00263 j = l;
00264 goto L70;
00265
00266 L50:
00267 i__1 = l;
00268 for (j = jp1; j <= i__1; ++j) {
00269 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00270 goto L80;
00271 }
00272
00273 }
00274 j = jp1 - 1;
00275
00276 L70:
00277 m = l;
00278 iflow = 1;
00279 goto L160;
00280 L80:
00281 ;
00282 }
00283 goto L100;
00284
00285
00286
00287 L90:
00288 ++k;
00289
00290 L100:
00291 i__1 = l;
00292 for (j = k; j <= i__1; ++j) {
00293 i__2 = lm1;
00294 for (i__ = k; i__ <= i__2; ++i__) {
00295 ip1 = i__ + 1;
00296 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00297 goto L120;
00298 }
00299
00300 }
00301 i__ = l;
00302 goto L140;
00303 L120:
00304 i__2 = l;
00305 for (i__ = ip1; i__ <= i__2; ++i__) {
00306 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
00307 goto L150;
00308 }
00309
00310 }
00311 i__ = ip1 - 1;
00312 L140:
00313 m = k;
00314 iflow = 2;
00315 goto L160;
00316 L150:
00317 ;
00318 }
00319 goto L190;
00320
00321
00322
00323 L160:
00324 lscale[m] = (Treal) i__;
00325 if (i__ == m) {
00326 goto L170;
00327 }
00328 i__1 = *n - k + 1;
00329 template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
00330 i__1 = *n - k + 1;
00331 template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
00332
00333
00334
00335 L170:
00336 rscale[m] = (Treal) j;
00337 if (j == m) {
00338 goto L180;
00339 }
00340 template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
00341 template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
00342
00343 L180:
00344 switch (iflow) {
00345 case 1: goto L20;
00346 case 2: goto L90;
00347 }
00348
00349 L190:
00350 *ilo = k;
00351 *ihi = l;
00352
00353 if (*ilo == *ihi) {
00354 return 0;
00355 }
00356
00357 if (template_blas_lsame(job, "P")) {
00358 return 0;
00359 }
00360
00361
00362
00363 nr = *ihi - *ilo + 1;
00364 i__1 = *ihi;
00365 for (i__ = *ilo; i__ <= i__1; ++i__) {
00366 rscale[i__] = 0.;
00367 lscale[i__] = 0.;
00368
00369 work[i__] = 0.;
00370 work[i__ + *n] = 0.;
00371 work[i__ + (*n << 1)] = 0.;
00372 work[i__ + *n * 3] = 0.;
00373 work[i__ + (*n << 2)] = 0.;
00374 work[i__ + *n * 5] = 0.;
00375
00376 }
00377
00378
00379
00380 basl = template_blas_lg10(&c_b34);
00381 i__1 = *ihi;
00382 for (i__ = *ilo; i__ <= i__1; ++i__) {
00383 i__2 = *ihi;
00384 for (j = *ilo; j <= i__2; ++j) {
00385 tb = b_ref(i__, j);
00386 ta = a_ref(i__, j);
00387 if (ta == 0.) {
00388 goto L210;
00389 }
00390 d__1 = absMACRO(ta);
00391 ta = template_blas_lg10(&d__1) / basl;
00392 L210:
00393 if (tb == 0.) {
00394 goto L220;
00395 }
00396 d__1 = absMACRO(tb);
00397 tb = template_blas_lg10(&d__1) / basl;
00398 L220:
00399 work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
00400 work[j + *n * 5] = work[j + *n * 5] - ta - tb;
00401
00402 }
00403
00404 }
00405
00406 coef = 1. / (Treal) (nr << 1);
00407 coef2 = coef * coef;
00408 coef5 = coef2 * .5;
00409 nrp2 = nr + 2;
00410 beta = 0.;
00411 it = 1;
00412
00413
00414
00415 L250:
00416
00417 gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
00418 , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
00419 n * 5], &c__1);
00420
00421 ew = 0.;
00422 ewc = 0.;
00423 i__1 = *ihi;
00424 for (i__ = *ilo; i__ <= i__1; ++i__) {
00425 ew += work[i__ + (*n << 2)];
00426 ewc += work[i__ + *n * 5];
00427
00428 }
00429
00430
00431 d__1 = ew;
00432
00433 d__2 = ewc;
00434
00435 d__3 = ew - ewc;
00436 gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
00437 d__3 * d__3);
00438 if (gamma == 0.) {
00439 goto L350;
00440 }
00441 if (it != 1) {
00442 beta = gamma / pgamma;
00443 }
00444 t = coef5 * (ewc - ew * 3.);
00445 tc = coef5 * (ew - ewc * 3.);
00446
00447 template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
00448 template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
00449
00450 template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
00451 c__1);
00452 template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
00453
00454 i__1 = *ihi;
00455 for (i__ = *ilo; i__ <= i__1; ++i__) {
00456 work[i__] += tc;
00457 work[i__ + *n] += t;
00458
00459 }
00460
00461
00462
00463 i__1 = *ihi;
00464 for (i__ = *ilo; i__ <= i__1; ++i__) {
00465 kount = 0;
00466 sum = 0.;
00467 i__2 = *ihi;
00468 for (j = *ilo; j <= i__2; ++j) {
00469 if (a_ref(i__, j) == 0.) {
00470 goto L280;
00471 }
00472 ++kount;
00473 sum += work[j];
00474 L280:
00475 if (b_ref(i__, j) == 0.) {
00476 goto L290;
00477 }
00478 ++kount;
00479 sum += work[j];
00480 L290:
00481 ;
00482 }
00483 work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
00484
00485 }
00486
00487 i__1 = *ihi;
00488 for (j = *ilo; j <= i__1; ++j) {
00489 kount = 0;
00490 sum = 0.;
00491 i__2 = *ihi;
00492 for (i__ = *ilo; i__ <= i__2; ++i__) {
00493 if (a_ref(i__, j) == 0.) {
00494 goto L310;
00495 }
00496 ++kount;
00497 sum += work[i__ + *n];
00498 L310:
00499 if (b_ref(i__, j) == 0.) {
00500 goto L320;
00501 }
00502 ++kount;
00503 sum += work[i__ + *n];
00504 L320:
00505 ;
00506 }
00507 work[j + *n * 3] = (Treal) kount * work[j] + sum;
00508
00509 }
00510
00511 sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1)
00512 + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
00513 alpha = gamma / sum;
00514
00515
00516
00517 cmax = 0.;
00518 i__1 = *ihi;
00519 for (i__ = *ilo; i__ <= i__1; ++i__) {
00520 cor = alpha * work[i__ + *n];
00521 if (absMACRO(cor) > cmax) {
00522 cmax = absMACRO(cor);
00523 }
00524 lscale[i__] += cor;
00525 cor = alpha * work[i__];
00526 if (absMACRO(cor) > cmax) {
00527 cmax = absMACRO(cor);
00528 }
00529 rscale[i__] += cor;
00530
00531 }
00532 if (cmax < .5) {
00533 goto L350;
00534 }
00535
00536 d__1 = -alpha;
00537 template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
00538 , &c__1);
00539 d__1 = -alpha;
00540 template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
00541 c__1);
00542
00543 pgamma = gamma;
00544 ++it;
00545 if (it <= nrp2) {
00546 goto L250;
00547 }
00548
00549
00550
00551 L350:
00552 sfmin = template_lapack_lamch("S", (Treal)0);
00553 sfmax = 1. / sfmin;
00554 lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
00555 lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
00556 i__1 = *ihi;
00557 for (i__ = *ilo; i__ <= i__1; ++i__) {
00558 i__2 = *n - *ilo + 1;
00559 irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
00560 rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00561 i__2 = *n - *ilo + 1;
00562 irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
00563
00564 d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
00565 rab = maxMACRO(d__2,d__3);
00566 d__1 = rab + sfmin;
00567 lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00568 ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
00569
00570 i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
00571 ir = minMACRO(i__2,i__3);
00572 lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
00573 icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
00574 cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
00575 icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
00576
00577 d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
00578 cab = maxMACRO(d__2,d__3);
00579 d__1 = cab + sfmin;
00580 lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
00581 jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
00582
00583 i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
00584 jc = minMACRO(i__2,i__3);
00585 rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
00586
00587 }
00588
00589
00590
00591 i__1 = *ihi;
00592 for (i__ = *ilo; i__ <= i__1; ++i__) {
00593 i__2 = *n - *ilo + 1;
00594 template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
00595 i__2 = *n - *ilo + 1;
00596 template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
00597
00598 }
00599
00600
00601
00602 i__1 = *ihi;
00603 for (j = *ilo; j <= i__1; ++j) {
00604 template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
00605 template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
00606
00607 }
00608
00609 return 0;
00610
00611
00612
00613 }
00614
00615 #undef b_ref
00616 #undef a_ref
00617
00618
00619 #endif