C-----Take a MatrixMarket HB data and Implement the Power Method in Parallel
C-----i.e. Compute the maximal eigenvalue and its eigenvector
c-----Swap Column partition to Row partition

      PROGRAM cup3   !-------! MPI SAMPLE PROGRAM 3   (Section 16.1.2)
       implicit none
c-----MPI Declaration
      include 'mpif.h'       ! This C-like inclusion is necessary
      INTEGER MY_ID, ierr, MASTER, NPROC,NLAST, INFO, N,I,
c    +        status(MPI_STATUS_SIZE),
     + K2,K1, NDIV,MAXIT,ITER,I,NLA
      DOUBLE PRECISION TT0,TT1,TT2, WHO(2),WHO_ALL(2)
      CHARACTER FILE_IN*20
c------------------------------------------------ readmt
      character title*256, key*8, type*3, guesol*2
      integer nrow,ncol, nnz,nzmax,nmax,nrhs, job,iounit
c------------------------------------------------ readmt
c-----Matrix Declaration: NMAX for max N  /  NZMAX for MAX nnz in A
      parameter(NMAX=38744, NZMAX=1771722)
      DOUBLE PRECISION A0(NZMAX), A(NZMAX)
      INTEGER       ICOL(NZMAX),ISTAR0(NMAX+1) ! Triple (A0,IROW,ISTAR0)
      INTEGER       IROW(NZMAX),ISTART(NMAX+1) ! Note of (A,ICOL,ISTART)
      INTEGER SGATHER(NMAX+1),LGATHER(NMAX), IWK(NMAX+1) !  Work spaces
c:::::Power Method Use
      DOUBLE PRECISION Y(NMAX), Z(NMAX), MU, MU_0, TOL
C=======================================================================
      CALL MPI_INIT(ierr)                            ! Minimal CALL 1
      CALL MPI_COMM_RANK(MPI_COMM_WORLD, MY_ID,ierr) ! Minimal CALL 2
      CALL MPI_COMM_SIZE(MPI_COMM_WORLD, NPROC,ierr) ! Minimal CALL 3 
           NLAST = NPROC - 1 !  Proc ID for the last processor
           IF (NPROC.LE.1) NLAST=NPROC
           MASTER = 0        !  Master can be others say 1 if nproc>1 
      TT1 = MPI_WTIME()      !  Set the initial CPU clock
C----------------------------!---------------------------------
        FILE_IN='cup23.in'
        OPEN(15,FILE=FILE_IN,ERR=11111) ! Warning only to Label 1111
        READ(15,*) FILE_IN
        CLOSE(15)
      IF (MY_ID .EQ. 0) THEN ! Main Proc access the Screen ----
ccccc   PRINT*,'Type in the File Name of a HB data (e.g. nc261.rua)?'
       CALL MPI_GET_PROCESSOR_NAME(title,ierr,INFO)
	PRINT*,'====================cup3.f===================='
	PRINT*,'Number of processes started: ',NPROC,' ',title(1:9)
	PRINT*,'Matrix loaded from HB file : ',FILE_IN(1:12)
      TT2 = MPI_WTIME() - TT1   !! Use reduce to compute max CPU
      print'(A,G12.2)','  CPU after read =',TT2
      ENDIF 

c Let other procs know about FILE_IN (from the main MY_ID=0)
*     CALL MPI_bcast(FILE_IN,20,MPI_CHARACTER, 0, MPI_COMM_WORLD,ierr)
*     CALL MPI_barrier(MPI_COMM_WORLD, ierr)  !Synchronize for FILE_IN 

c All to fetch the sparse matrix directly
          iounit=25
      OPEN(iounit,FILE=FILE_IN,ERR=11111) ! Warning only to Label 1111
            job = 3
      CALL readmt(my_id,nmax,nzmax,job,iounit,A0,IROW,ISTAR0, Y,nrhs,
     *           guesol,nrow,ncol,nnz,title,key,type,ierr)
      close(iounit)
          N = nrow
      DO I = 1, N
       Y(I) = 0.0D0
      ENDDO

c All to Flip the matrix into a ROW storage format
      CALL COL2RW(N, nnz,A0,IROW,ISTAR0, A,ICOL,ISTART, IWK) 

      IF (MY_ID .EQ. 0) THEN ! Main Proc access the Screen ----
ccccc print'(/,3A)','Read the file = ',file_in(1:9),' (HB format)'
      print'(A,I7,A,I5)',' Matrix Size  = ', nrow, ' X ',ncol
      print*,'Matrix Title = ',title(1:60)
      print*,'Market Title = ',key,   '    GUESOL Symbol :',guesol
      print'(A,A3,A,I1,A)','  Market Type = ',type,'  (error =',ierr,')'
      ENDIF ! Output Info Done
      GOTO 22222 ! Carry on if read OK
11111 continue
          PRINT*,'File Reading Error - Check file ==>',FILE_IN
          CALL MPI_ABORT(MPI_COMM_WORLD,info,ierr) 
          STOP
22222 continue !! Generate the initial Z for starting the Power Method
           CALL nagrand(Z, N, 0.1D0,1.1D0, 0) ! Same Seed = 0
           MU   = 1.0D0 ! Initial \mu
           MU_0 = 0.0D0 ! Assume the eventual \lambda_1 is not 0
           TOL = 1.0D-6 ! Tolerance to stop the method
         MAXIT = N*2    ! Max iterations
C=======================================================================
           NDIV = N / NPROC    ! Subtask for each processor
           K1 = MY_ID*NDIV+1   ! Start of Columns for MY_ID
           K2 = (MY_ID+1)*NDIV ! End of the Columns for MY_ID
           NLA = NDIV + (N-NDIV*NPROC) !Subtask for NLAST

*______Variable Lengths and Starting Positions of Row Partitions
       LGATHER(1)=NDIV
       SGATHER(1)=0  ! Starting position (before position 1) !!!
      DO I=2, NPROC
       LGATHER(I)= NDIV
       SGATHER(I)= SGATHER(I-1)+NDIV
      ENDDO
       LGATHER(NPROC)= NLA
ccccc  IF (NPROC.GT.1) SGATHER(NPROC)= SGATHER(NLAST)+NLA

      IF (MY_ID.eq.NLAST) THEN
             K2=N !Adjust the last subtask
           NDIV=NLA
      ENDIF
      TT0 = MPI_WTIME()      !  Set the soln-only CPU clock

      DO ITER = 1, MAXIT !!____________________________________Main Loop
       
* Y=A*Z/mu in part rows - using matrix (A,ICOL,ISTART)
      CALL sparbyrw(A,ICOL,ISTART, K1,K2,N,nzmax, mu,Z,Y)

* New Z = Join(Y) = A*Z/mu in whole

      CALL MPI_ALLGATHERV(Y,NDIV,MPI_DOUBLE_PRECISION, 
     +         Z,LGATHER,SGATHER,MPI_DOUBLE_PRECISION, 
     +         MPI_COMM_WORLD,ierr) 

      CALL MUasMAX(Z, N, mu) ! Find the MAX component in ABS
       
* Check for convergence
      IF ( DABS(MU-MU_0) .LE. TOL .and. ITER.GT.1 ) THEN
          TOL = DABS(MU-MU_0)
          GOTO 33333  !! Note Y will be the eigenvector for MU
      ELSE
          MU_0 = MU 
      ENDIF

      CALL MPI_barrier(MPI_COMM_WORLD, ierr)  !Synchronize for iteration

      ENDDO        !! iter ____________________________________Main Loop 
33333 continue !! Exit the Main Loop

c_________________________________________STOP here to check CPU________
      TT2 = MPI_WTIME() - TT1   !! Use reduce to compute max CPU
      TT0 = MPI_WTIME() - TT0   !! Use reduce to compute max CPU
C=======================================================================

       who(1)=TT2
       who(2)=MY_ID
       CALL MPI_REDUCE(who,who_all,1,MPI_2DOUBLE_PRECISION,
     +   MPI_MAXLOC, MASTER, MPI_COMM_WORLD, ierr) 

       IF (MY_ID .EQ. MASTER) then
       WRITE(*,'(''Max eigenvalue='',2G20.12,''found by cup3.f after'',
     + I6,'' iterations'')') MU_0,MU, ITER
       WRITE(*,'(''ENTIRE : CPU  ='',G10.2,'' on Proc '',
     +      I2,'' using'',I3,'' Processors (Err ='',E10.2,'')'')')
     + who_all(1),INT(who_all(2)),NPROC,TOL
       ENDIF
      CALL MPI_barrier(MPI_COMM_WORLD, ierr)  !Synchronize for iteration
       who(1)=TT0
       who(2)=MY_ID
       CALL MPI_REDUCE(who,who_all,1,MPI_2DOUBLE_PRECISION,
     +   MPI_MAXLOC, MASTER, MPI_COMM_WORLD, ierr) 

       IF (MY_ID .EQ. MASTER) then
       WRITE(*,'(''Parallel CPU  ='',G10.2,'' on Proc '',
     +      I2,'' using'',I3,'' Processors (Err ='',E10.2,'')'')')
     + who_all(1),INT(who_all(2)),NPROC,TOL
       ENDIF
c================================================================== END

      CALL MPI_FINALIZE(ierr)                         ! Minimal CALL 4
       STOP
        END

c-----Multiply by rows-----------------------------------------------------
      subroutine sparbyrw(A,ICOL,ISTART, K1,K2,N,nzmax, Amu,x,y) 
      implicit none !Output y=A*x where A is in (A,ICOL,ISTART) 
      integer J,K,JK,K1,K2, N,nzmax, ICOL(nzmax),ISTART(N+1)
      double precision A(nzmax),x(N),y(N), Amu
         DO J=K1, K2     !------------------- Parallel Rows
           JK = J-K1+1
          y(JK)=0.0d0
          DO K=ISTART(j), ISTART(j+1)-1
           y(JK)=y(JK)+A(k)*x(ICOL(k))
          enddo
         enddo
       do J=1, K2-K1+1 ! Small row segment
          y(J)=y(J)/Amu
       enddo
      return
      end 
c--------------------------------------------------------------------------
      subroutine readmt(my_id,nmax,nzmax,job,iounit,a,ja,ia,rhs,nrhs,
     +                   guesol,nrow,ncol,nnz,title,key,type,ierr)
c--------------------------------------------------------------------------
c This subroutine reads  a boeing/harwell matrix / handles right hand 
c sides in full format only (no sparse right hand sides).
c Also the matrix must be in assembled forms.
c      Author: Youcef Saad - Date: Sept. 1989  and  Updated Oct 31, 1989.
c      http://www-users.cs.umn.edu/~saad/software/SPARSKIT/sparskit.html
c-Part of Utilities' Routines in sparskit package (University of Minnisota)
c Refer to the documentation in SPARSKIT on the Harwell-Boeing formats
c       for details on the format assumed by readmt.
c See the Terms of Conditions for Any Further Distribution of this routine
c--------------------------------------------------------------------------
      character title*72, key*8, type*3, ptrfmt*16, indfmt*16,
     1       valfmt*20, rhsfmt*20, rhstyp*3, guesol*2
      integer totcrd, ptrcrd, indcrd, valcrd, rhscrd, nrow, ncol,
     1     nnz, neltvl, nrhs, nmax, nzmax, nrwindx
      integer ia (nmax+1), ja (nzmax) 
      double precision a(nzmax), rhs(*) 
c-----------------------------------------------------------------------
      ierr = 0
      lenrhs = nrhs
      read (iounit,10) title, key, totcrd, ptrcrd, indcrd, valcrd, 
     1     rhscrd, type, nrow, ncol, nnz, neltvl, ptrfmt, indfmt, 
     2     valfmt, rhsfmt
 10   format (a72, a8 / 5i14 / a3, 11x, 4i14 / 2a16, 2a20)
      if (rhscrd .gt. 0) read (iounit,11) rhstyp, nrhs, nrwindx
 11   format (a3,11x,i14,i14)
c
c anything else to read ?
      if (job .le. 0) return
c     ---- check whether matrix is readable ------ 
      n = ncol
      if (ncol .gt. nmax) then
         ierr = 1
         if (my_id.eq.0)print*,' ncol > nmax =', ncol, nmax
      endif
      if (nnz .gt. nzmax) then
          ierr = ierr + 2
         if (my_id.eq.0)print*,' nnz > nzmax =', nnz, nzmax
      endif
      if (ierr .ne. 0) return
c     ---- read pointer and row numbers ---------- 
      read (iounit,ptrfmt) (ia (i), i = 1, n+1)
          if (my_id.eq.0)print*,'... Pointers read ok : ', n+1
          if (my_id.eq.0)print*,'... Reading row indices for nnz : ',
     +       nnz
      read (iounit,indfmt) (ja (i), i = 1, nnz)
          if (my_id.eq.0)print*,'... Reading row indices done'
c     --- reading values of matrix if required....
      if (job .le. 1)  return
c     --- and if available ----------------------- 
      if (valcrd .le. 0) then
	 job = 1
	 return
      endif
      if (my_id.eq.0)print*,'... Reading A starts for nnz =',nnz,
     +    ' FORMAT: ',valfmt
      read (iounit,valfmt) (a(j),j=1,nnz)
      if (my_id.eq.0)print*,'... Reading A values done'
c     --- reading rhs if required ---------------- 
      if (job .le. 2)  return
c     --- and if available ----------------------- 
      if ( rhscrd .le. 0) then
	 job = 2
	 return
      endif
c     
c     --- read right-hand-side.-------------------- 
      if (rhstyp(1:1) .eq. 'M') then 
         ierr = 4
         return
      endif
c
      guesol = rhstyp(2:3) 
      nvec = 1 
      if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') nvec=nvec+1
      if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') nvec=nvec+1
c     
      len = nrhs*nrow 
      if (len*nvec .gt. lenrhs) then
         ierr = 5
         return
      endif
c
c read right-hand-sides
      next = 1
      iend = len
      read(iounit,rhsfmt) (rhs(i), i = next, iend)
c
c read initial guesses if available
      if (guesol(1:1) .eq. 'G' .or. guesol(1:1) .eq. 'g') then
         next = next+len
         iend = iend+ len
         read(iounit,valfmt) (rhs(i), i = next, iend)
      endif
c     
c read exact solutions if available
      if (guesol(2:2) .eq. 'X' .or. guesol(2:2) .eq. 'x') then
         next = next+len
         iend = iend+ len
         read(iounit,valfmt) (rhs(i), i = next, iend)
      endif
      return
c--------- end of readmt -----------------------------------------------
      end
      subroutine nagrand(X, N, a,b, seed) !G05FAF --- NAG RANDOM NUM Gen
	INTEGER  N, seed
        DOUBLE PRECISION X(N),A,B
	EXTERNAL G05CBF,G05FAF
         CALL G05CBF(seed)      ! Like rand('seed',9)
	 CALL G05FAF(A,B, N,X)	! NAG random vector
      return
c--------- end of nagrand ----------------------------------------------
      end
      subroutine MUasMAX(X, N, Amu) ! Find the MAX component in ABS
	INTEGER  J, K, N
        DOUBLE PRECISION X(N),Amu,Bmu
          K=1
        Amu=ABS(X(K))
        DO J=2,N
         Bmu = ABS(X(J))
         IF (Bmu.GT.Amu) THEN 
           Amu=Bmu
             K=J
         ENDIF
        ENDDO
         Amu = X(K)
      return
c--------- end of MUasMAX ----------------------------------------------
      end
************************************************************************
* Feb 6 2004 from ROWC.f95  (Ke Chen)
*_____COL to ROW Sparse Storage Change________________________________
      SUBROUTINE COL2RW(N, NEA,ACOL,ICOL,IPCOL, AROW,IROW,IPROW, IWK) 
       implicit none
      INTEGER N,NEA, K
      INTEGER IPCOL(N+1),IPROW(N+1), row,rowc,rowg,pos,IC
      INTEGER ICOL(NEA), IROW(NEA), IWK(N+1)
      DOUBLE PRECISION ACOL(NEA), AROW(NEA)

cccccccccccccccccccccccccc Set Initially: IWK=IP cccccccccccc
      DO k=1,N+1
        IWK(k)=IPCOL(k) ! Used for conversion / Will be reset
      ENDDO

      IPROW(1)=1        ! Input Tuples A = (Acol,Icol,IPcol)
         rowg = 0       !  with NEA nnz's of DIM n x n
cccccccccccccccccccccccccc cccccccccccccccccccccccccccccccccc
      DO row=1,N     !! ---- Global Conversion
         rowc = 0
      DO IC = 1, N   !! ==== Search row-index by COLS
      DO pos = IWK(IC), IWK(IC+1)-1  !Col IC / Tuples B = (Arow,Irow,IProw)
         if (ICOL(pos).EQ.row) THEN
           rowc = rowc + 1  !! local rowc
           AROW( rowc + rowg ) = ACOL(pos)
           IROW( rowc + rowg ) = IC
           IWK(IC) = pos+1
           GOTO 100 !!! Done with this COL (found)
         endif 
         if (ICOL(pos).GT.row) THEN
           IWK(IC) = pos
           GOTO 100 !!! Done with this COL (not found)
         endif 
      ENDDO !pos

100   CONTINUE
      ENDDO !IC
           rowg = rowc + rowg  !! local rowc added to "rowg"

        IPROW(row+1)=rowc + IPROW(row)

      ENDDO ! row=1,N   !--- Global Conversion
cccccccccccccccccccccccccc cccccccccccccccccccccccccccccccccc
c--------- end of COL2RW -----------------------------------------------
      end
