c This is a test program to exercise the block tridiagonal solvers c (1) bl3dfac/bl3dsol. c (2) bl3dfacp/bl3dsolp, c (3) bl3dfac/bl3dsolmod. c In bl3dfac pivoting is done only within blocks. Multiple right hand c sides for bl3dsol are stored by blocks. c In bl3dfacp, column pivoting is done, resulting in fill-in. Multiple c right hand sides for bl3dsolp are stored with each right had side c contiguous,as required by Lapack. c The multiple right hand sides for bl3dsolmod are stored as for c Lapack, and bl3dsolp. c c Input: the number of blocks, the size of the blocks, and the c number of right hand sides. c Comparison is made with dgetrf/dgetrs from Lapack, applied to the c corresponding full system. c c The block tridiagonal matrix is given by c c diag{ F(j-1), D(j), E(j)}, j = 1,2,...,n. c c The elements of F,D,E are generated randomly, in (0,1). c The matrix is re-stored as a full matrix in A. c The right hand sides are set up so that the solution for the c jth right hand side is [j,j,...,j]^T. c So that the same systems are given to both bl3dfac/bl3dsol c and to bl3dfacp/bl3dsolp, once D,E,F are generated for use with c bl3dfac/bl3dsol, copies are made in DC,EC,FC for use with c bl3dfacp/bl3dsolp. c The right hand sides for lapack are stored in larhs; for c bl3dfac/bl3dsol they are stored in rhs; for bl3dfacp/bl3dsolp c they are stored in pivrhs; and for bl3dsolmod they are stored c in modrhs. Initially, of course, larhs, pivrhs and modrhs c are identical. c The systems are solved using Lapack, bl3dfacp/bl3dsolp, c bl3dfac/bl3dsol, and bl3dfac/bl3dsolmod, and the L-1 norm of c the errors is printed. c As a measure of how much pivoting is done, the number of c swaps is recorded for each factoring routine, as a fraction of c the size of the system, for both the block pivoting and full c pivoting codes. c RESTRICTIONS: integer maxn, maxk, maxrhs, M, MM, MMM parameter ( maxn = 200, maxk = 20, M = maxk*maxn, MM = M*M ) parameter ( maxrhs = 10, MMM = M*maxk ) double precision E(maxk * maxk * maxn), D(maxk * maxk * maxn), * F(maxk * maxk * maxn), G(maxk * maxk * maxn) double precision EC(maxk * maxk * maxn), DC(maxk * maxk * maxn), * FC(maxk * maxk * maxn) double precision A(M, M), rhs(maxrhs*maxn*maxk), rcond, anorm double precision larhs(maxrhs*maxn*maxk) double precision pivrhs(maxrhs*maxn*maxk) double precision modrhs(maxrhs*maxn*maxk) double precision temp, error, total intrinsic abs, float double precision work(4*M) integer iwork(M) integer n, k, pivot(maxn*maxk), nels, nrhs, nswaps integer j, i, p, q, info, nk character *1 norm c Timing variables. real dtime, lap, lapcon, nopiv, nopmod, piv, y, t(2) norm = '1' 1 continue read(*,*,end=7000)n, k, nrhs nk = n*k nels = nk*k call g05faf(-1.0d0, 1.0d0, nels, E) call g05faf(-1.0d0, 1.0d0, nels, D) call g05faf(-1.0d0, 1.0d0, nels, F) do 2 i = 1,nels EC(i) = E(i) FC(i) = F(i) DC(i) = D(i) 2 continue print 9200,'n = ',n,' k = ',k,' nrhs = ',nrhs, * ' system size = ',nk do 3 i = 1, n*k do 3 j = 1, n*k A(i,j) = 0.0d0 3 continue do 20 p = 1,n do 10 j = 1,k do 5 i = 1,k A(i+(p-1)*k,j + (p-1)*k) = D((j-1)*k+i+(p-1)*k*k) 5 continue 10 continue 20 continue do 40 p = 1,n-1 do 30 j = 1,k do 25 i = 1,k A(i+(p-1)*k,j + p*k) = E((j-1)*k+i+(p-1)*k*k) A(i+p*k,j + (p-1)*k) = F((j-1)*k+i+(p-1)*k*k) 25 continue 30 continue 40 continue anorm = 0.0d0 do 41 p = 1, n*k temp = 0.0d0 do 31 j = 1,n*k temp = temp + abs(A(p,j)) 31 continue if ( temp .gt. anorm ) anorm = temp 41 continue c Set up right hand sides for a solution = (1,1,1,..,1). do 90 p = 1,n do 80 q = 1, nrhs do 60 j = 1,k rhs(((p-1)*nrhs+q-1)*k+j) = 0.0d0 do 50 i = 1, nk rhs(((p-1)*nrhs+q-1)*k+j) = * rhs(((p-1)*nrhs+q-1)*k+j) + q*A((p-1)*k+j,i) 50 continue larhs(((p-1)+(q-1)*n)*k+j) = * rhs(((p-1)*nrhs+q-1)*k+j) pivrhs(((p-1)+(q-1)*n)*k+j) = * larhs(((p-1)+(q-1)*n)*k+j) modrhs(((p-1)+(q-1)*n)*k+j) = * larhs(((p-1)+(q-1)*n)*k+j) 60 continue 80 continue 90 continue y = dtime(t) call dgetrf(nk, nk, A, M, pivot, info) y = dtime(t) lap = t(1) y = dtime(t) call dgecon(norm, nk, A, M, anorm, rcond, work, iwork, info) y = dtime(t) lapcon = t(1) print *,' ' print 9100, 'norm(A) = ', anorm, ' cond(A) = ',1.d0/rcond print *,' ' c Get Lapack solution for verification: y = dtime(t) call dgetrs('N', nk, nrhs, A, M, pivot, larhs, n*k, info) y = dtime(t) lap = lap + t(1) total = 0.0d0 do 210 q = 1, nrhs error = 0.0d0 do 200 j = 1, nk error = error + abs(larhs((q-1)*nk + j) - float(q)) 200 continue total = total + error 210 continue print 9500,'Time for Lapack: ',lap*1000.0,' milliseconds' print 9500,'Time for dgecon: ',lapcon*1000.0,' milliseconds' print 9000,'Total error, dgetrs = ', total do 211 p = 1,n do 211 j = 1,k pivot((p-1)*k+j) = (p-1)*k+j 211 continue y = dtime(t) call bl3dfacp(n, k, E, D, F, G, pivot, info) y = dtime(t) piv = t(1) print *,' ' y = dtime(t) call bl3dsolp('N', n, k, nk, E, D, F, G, pivot, nrhs, pivrhs) y = dtime(t) piv = piv + t(1) nswaps = 0 do 215 p = 1, n do 212 j = 1, k-1 if ( pivot((p-1)*k+j) .ne. j) nswaps = nswaps + 1 212 continue 215 continue total = 0.0d0 do 230 q = 1, nrhs error = 0.0d0 do 225 p = 1,n do 220 j = 1, k error = error * + abs(pivrhs(((p-1)+(q-1)*n)*k+j) - float(q)) 220 continue 225 continue total = total + error 230 continue print 9500,'Time for bl3dpiv: ', piv*1000.0,' milliseconds' print 9000,'Total error, bl3dsolp = ', total print 9600,'swap ratio = ',nswaps/float(n*k) y = dtime(t) call bl3dfac(n, k, EC, DC, FC, pivot) y = dtime(t) nopiv = t(1) nopmod = nopiv nswaps = 0 do 315 p = 1, n do 312 j = 1, k-1 if ( pivot((p-1)*k+j) .ne. j) nswaps = nswaps + 1 312 continue 315 continue y = dtime(t) call bl3dsol('N', n, k, EC, DC, FC, pivot, nrhs, rhs) y = dtime(t) nopiv = nopiv + t(1) total = 0.0d0 do 330 q = 1, nrhs error = 0.0d0 do 325 p = 1,n do 320 j = 1, k error = error * + abs(rhs(((q-1)+(p-1)*nrhs)*k+j) - float(q)) 320 continue 325 continue total = total + error 330 continue print *,' ' print 9500,'Time for bl3dsol: ', nopiv*1000.0,' milliseconds' print 9000,'Total error, bl3dsol = ', total print 9600,'swap ratio = ',nswaps/float(n*k) y = dtime(t) call bl3dsolmod(n, k, nk, EC, DC, FC, pivot, nrhs, modrhs) y = dtime(t) nopmod = nopmod + t(1) total = 0.0d0 do 350 q = 1, nrhs error = 0.0d0 do 355 p = 1,n do 340 j = 1, k error = error * + abs(modrhs(((p-1)+(q-1)*n)*k+j) - float(q)) 340 continue 355 continue total = total + error 350 continue print *,' ' print 9500,'Time for bl3dmod: ',nopmod*1000.0,' milliseconds' print 9000,'Total error, bl3dsolmod = ', total print 9600,'swap ratio = ',nswaps/float(n*k) print *,' ' print *,' ' print *,' ' print *,' ' go to 1 7000 continue 8000 continue 9000 format(a25,d8.2) 9100 format(a10,d8.1,a11,d8.1) 9200 format(a4,i4,a5,i4,a8,i4,a15,i6) 9500 format(a17,f10.2,a13) 9600 format(a13,f6.2) end