! (C) Copyright 2005- ECMWF.
! (C) Copyright 2013- Meteo-France.
! 
! This software is licensed under the terms of the Apache Licence Version 2.0
! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
! In applying this licence, ECMWF does not waive the privileges and immunities
! granted to it by virtue of its status as an intergovernmental organisation
! nor does it submit to any jurisdiction.
!

MODULE MPL_ALLREDUCE_MOD

!**** MPL_ALLREDUCE Perform collective communication

!     Purpose.
!     --------
!     To calculate global MIN,MAX,SUM or IEOR and return result to all processes.
!     The data may be REAL*4, REAL*8,or INTEGER, one dimensional array or scalar

!**   Interface.
!     ----------
!        CALL MPL_ALLREDUCE

!        Input required arguments :
!        -------------------------
!           PSENDBUF -  buffer containing message to be collectively communicated
!                       (can be type REAL*4, REAL*8 or INTEGER) (also output)
!           CDOPER   -  Global operation to be performed : 'MAX', 'MIN', 'SUM' or 'IEOR'

!        Input optional arguments :
!        -------------------------
!           LDREPROD -  Reproducibility flag for SUMmation-operator.
!                       Meaningful only for REAL-numbers.
!                       Three modes (applicable for REAL-number only):
!                       1) Not provided at all (the default) ==> MPL_ABORT
!                       2) Provided and .TRUE. ==> Use home-written binary tree
!                          No MPI_ALLREDUCE used.
!                       3) Provided, but .FALSE. ==> let MPI_ALLREDUCE do the summation.
!           KCOMM    -  Communicator number if different from MPI_COMM_WORLD 
!                       or from that established as the default 
!                       by an MPL communicator routine
!                       the incoming data
!           CDSTRING -  Character string for ABORT messages
!                       used when KERROR is not provided

!        Output required arguments :
!        -------------------------
!           none

!        Output optional arguments :
!        -------------------------
!           KERROR   -  return error code.     If not supplied, 
!                       MPL_ALLREDUCE aborts when an error is detected.
!     Author.
!     -------
!        D.Dent, M.Hamrud, S.Saarinen     ECMWF

!     Modifications.
!     --------------
!        Original: 2001-02-02
!      F. Vana  05-Mar-2015  Support for single precision

!     ------------------------------------------------------------------

USE EC_PARKIND , ONLY : JPRD, JPIM, JPRM, JPIB
USE OML_MOD   ,ONLY : OML_MY_THREAD

USE MPL_MPIF
USE MPL_DATA_MODULE
USE MPL_STATS_MOD
USE YOMMPLSTATS
USE MPL_MESSAGE_MOD
USE MPL_SEND_MOD
USE MPL_RECV_MOD
USE MPL_WAIT_MOD
USE MPL_BROADCAST_MOD

IMPLICIT NONE

PRIVATE

LOGICAL :: LLABORT=.TRUE.

INTERFACE MPL_ALLREDUCE
MODULE PROCEDURE MPL_ALLREDUCE_REAL8, MPL_ALLREDUCE_REAL4, MPL_ALLREDUCE_INT, &
  MPL_ALLREDUCE_INT8, &
  MPL_ALLREDUCE_REAL8_SCALAR, MPL_ALLREDUCE_REAL4_SCALAR, &
  MPL_ALLREDUCE_INT_SCALAR, MPL_ALLREDUCE_INT8_SCALAR
END INTERFACE

PUBLIC MPL_ALLREDUCE

CONTAINS

SUBROUTINE MPL_ALLREDUCE_INT_SCALAR(KSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)

INTEGER(KIND=JPIM),INTENT(INOUT)     :: KSENDBUF
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
INTEGER(KIND=JPIM) ISENDBUF(1)

ISENDBUF(1) = KSENDBUF
CALL MPL_ALLREDUCE(ISENDBUF,CDOPER,LDREPROD,KCOMM,KERROR,CDSTRING)
KSENDBUF = ISENDBUF(1)

END SUBROUTINE MPL_ALLREDUCE_INT_SCALAR

SUBROUTINE MPL_ALLREDUCE_INT8_SCALAR(KSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)
INTEGER(KIND=JPIB),INTENT(INOUT)     :: KSENDBUF
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
INTEGER(KIND=JPIB) ISENDBUF(1)

ISENDBUF(1) = KSENDBUF
CALL MPL_ALLREDUCE(ISENDBUF,CDOPER,LDREPROD,KCOMM,KERROR,CDSTRING)
KSENDBUF = ISENDBUF(1)

END SUBROUTINE MPL_ALLREDUCE_INT8_SCALAR



SUBROUTINE MPL_ALLREDUCE_REAL8_SCALAR(PSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)
REAL(KIND=JPRD),INTENT(INOUT)     :: PSENDBUF
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
REAL(KIND=JPRD) ZSENDBUF(1)

ZSENDBUF(1) = PSENDBUF
CALL MPL_ALLREDUCE(ZSENDBUF,CDOPER,LDREPROD,KCOMM,KERROR,CDSTRING)
PSENDBUF = ZSENDBUF(1)

END SUBROUTINE MPL_ALLREDUCE_REAL8_SCALAR


SUBROUTINE MPL_ALLREDUCE_REAL4_SCALAR(PSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)
REAL(KIND=JPRM),INTENT(INOUT)     :: PSENDBUF
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
REAL(KIND=JPRM) ZSENDBUF(1)

ZSENDBUF(1) = PSENDBUF
CALL MPL_ALLREDUCE(ZSENDBUF,CDOPER,LDREPROD,KCOMM,KERROR,CDSTRING)
PSENDBUF = ZSENDBUF(1)

END SUBROUTINE MPL_ALLREDUCE_REAL4_SCALAR


SUBROUTINE MPL_ALLREDUCE_INT(KSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)

#ifdef USE_8_BYTE_WORDS
  USE MPI4TO8, ONLY : &
    MPI_ALLREDUCE => MPI_ALLREDUCE8
#endif

INTEGER(KIND=JPIM),INTENT(INOUT)     :: KSENDBUF(:)
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
INTEGER(KIND=JPIM) :: IRECVBUF(SIZE(KSENDBUF))
INTEGER(KIND=JPIM) :: ISENDCOUNT,ICOMM,IERROR,IOPER
INTEGER(KIND=JPIM) :: ITID
ITID = OML_MY_THREAD()
IERROR = 0
IF(MPL_NUMPROC < 1) CALL MPL_MESSAGE( &
  & CDMESSAGE='MPL_ALLREDUCE: MPL NOT INITIALISED ',LDABORT=LLABORT) 

IF(CDOPER(1:3) == 'MAX' .OR. CDOPER(1:3) == 'max' ) THEN
  IOPER = MPI_MAX
ELSEIF(CDOPER(1:3) == 'MIN' .OR. CDOPER(1:3) == 'min' ) THEN
  IOPER = MPI_MIN
ELSEIF(CDOPER(1:3) == 'SUM' .OR. CDOPER(1:3) == 'sum' ) THEN
  IOPER = MPI_SUM
ELSEIF(CDOPER(1:4) == 'IEOR' .OR. CDOPER(1:4) == 'ieor' ) THEN
  IOPER = MPI_BXOR
ELSEIF(CDOPER(1:4) == 'XOR' .OR. CDOPER(1:4) == 'xor' ) THEN
  IOPER = MPI_BXOR
ELSE
  CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE: ERROR UNKNOWN OPERATOR',&
   & CDSTRING,LDABORT=LLABORT)
ENDIF

IF(PRESENT(KCOMM)) THEN
  ICOMM=KCOMM
ELSE
  ICOMM=MPL_COMM_OML(ITID)
ENDIF

ISENDCOUNT = SIZE(KSENDBUF)

IF (ISENDCOUNT > 0) THEN
#ifndef NAG
  IF( (LOC(KSENDBUF(UBOUND(KSENDBUF,1)))-LOC(KSENDBUF(LBOUND(KSENDBUF,1)))) /= 4_JPIB*(ISENDCOUNT - 1) ) THEN
    CALL MPL_MESSAGE(CDMESSAGE='MPL_ALLREDUCE: BUFFER NOT CONTIGUOUS ',LDABORT=LLABORT)
  ENDIF
ENDIF
#endif

IF ( MPL_NUMPROC > 1 ) &
CALL MPI_ALLREDUCE(KSENDBUF,IRECVBUF,ISENDCOUNT,INT(MPI_INTEGER), &
                &  IOPER,ICOMM,IERROR)

IF(LMPLSTATS) THEN
  CALL MPL_SENDSTATS(ISENDCOUNT,INT(MPI_INTEGER))
  CALL MPL_RECVSTATS(ISENDCOUNT,INT(MPI_INTEGER))
ENDIF

IF(MPL_OUTPUT > 1 )THEN
  WRITE(MPL_UNIT,'(A,5I8)') ' MPL_ALLREDUCE ',ISENDCOUNT,ICOMM,IOPER
ENDIF
IF(PRESENT(KERROR)) THEN
  KERROR=IERROR
ELSE
  IF(IERROR /= 0 ) CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE',CDSTRING,LDABORT=LLABORT)
ENDIF

IF ( MPL_NUMPROC > 1 ) &
KSENDBUF(:) = IRECVBUF(:)

END SUBROUTINE MPL_ALLREDUCE_INT

SUBROUTINE MPL_ALLREDUCE_INT8(KSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)
INTEGER(KIND=JPIB),INTENT(INOUT)     :: KSENDBUF(:)
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
INTEGER(KIND=JPIB) :: IRECVBUF(SIZE(KSENDBUF))
INTEGER(KIND=JPIM) :: ISENDCOUNT,ICOMM,IERROR,IOPER
INTEGER(KIND=JPIM) :: ITID
IERROR = 0
ITID = OML_MY_THREAD()
IF(MPL_NUMPROC < 1) CALL MPL_MESSAGE( &
  & CDMESSAGE='MPL_ALLREDUCE: MPL NOT INITIALISED ',LDABORT=LLABORT) 

IF(CDOPER(1:3) == 'MAX' .OR. CDOPER(1:3) == 'max' ) THEN
  IOPER = MPI_MAX
ELSEIF(CDOPER(1:3) == 'MIN' .OR. CDOPER(1:3) == 'min' ) THEN
  IOPER = MPI_MIN
ELSEIF(CDOPER(1:3) == 'SUM' .OR. CDOPER(1:3) == 'sum' ) THEN
  IOPER = MPI_SUM
ELSEIF(CDOPER(1:4) == 'IEOR' .OR. CDOPER(1:4) == 'ieor' ) THEN
  IOPER = MPI_BXOR
ELSEIF(CDOPER(1:4) == 'XOR' .OR. CDOPER(1:4) == 'xor' ) THEN
  IOPER = MPI_BXOR
ELSE
  CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE: ERROR UNKNOWN OPERATOR',&
   & CDSTRING,LDABORT=LLABORT)
ENDIF

IF(PRESENT(KCOMM)) THEN
  ICOMM=KCOMM
ELSE
  ICOMM=MPL_COMM_OML(ITID)
ENDIF

ISENDCOUNT = SIZE(KSENDBUF)
#ifndef NAG
IF (ISENDCOUNT > 0) THEN
  IF( (LOC(KSENDBUF(UBOUND(KSENDBUF,1)))-LOC(KSENDBUF(LBOUND(KSENDBUF,1)))) /= 8_JPIB*(ISENDCOUNT - 1) ) THEN
    CALL MPL_MESSAGE(CDMESSAGE='MPL_ALLREDUCE: BUFFER NOT CONTIGUOUS ',LDABORT=LLABORT)
  ENDIF
ENDIF
#endif

IF ( MPL_NUMPROC > 1 ) &
CALL MPI_ALLREDUCE(KSENDBUF,IRECVBUF,ISENDCOUNT,MPI_INTEGER8, &
                &  IOPER,ICOMM,IERROR)

IF(LMPLSTATS) THEN
  CALL MPL_SENDSTATS(ISENDCOUNT,INT(MPI_INTEGER8))
  CALL MPL_RECVSTATS(ISENDCOUNT,INT(MPI_INTEGER8))
ENDIF

IF(MPL_OUTPUT > 1 )THEN
  WRITE(MPL_UNIT,'(A,5I8)') ' MPL_ALLREDUCE ',ISENDCOUNT,ICOMM,IOPER
ENDIF
IF(PRESENT(KERROR)) THEN
  KERROR=IERROR
ELSE
  IF(IERROR /= 0 ) CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE',CDSTRING,LDABORT=LLABORT)
ENDIF

IF ( MPL_NUMPROC > 1 ) &
KSENDBUF(:) = IRECVBUF(:)

END SUBROUTINE MPL_ALLREDUCE_INT8


SUBROUTINE MPL_ALLREDUCE_REAL8(PSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)


#ifdef USE_8_BYTE_WORDS
  USE MPI4TO8, ONLY : &
    MPI_ALLREDUCE => MPI_ALLREDUCE8
#endif

REAL(KIND=JPRD),INTENT(INOUT)        :: PSENDBUF(:)
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
REAL(KIND=JPRD)            :: ZRECVBUF(SIZE(PSENDBUF))
INTEGER(KIND=JPIM) ITAG, ICOUNT
LOGICAL LLREPRODSUM
INTEGER(KIND=JPIM) :: ISENDCOUNT,ICOMM,IERROR,IOPER
INTEGER(KIND=JPIM) :: IP2,II,IHALF,JSTAGE,ISEND,IRECV,IMSENT
INTEGER(KIND=JPIM) :: ISREQ(MPL_NUMPROC)
INTEGER(KIND=JPIM) :: ITID
IERROR = 0
ITID = OML_MY_THREAD()
LLREPRODSUM = .FALSE.

IF(MPL_NUMPROC < 1) CALL MPL_MESSAGE( &
  & CDMESSAGE='MPL_ALLREDUCE: MPL NOT INITIALISED ',LDABORT=LLABORT) 

IF(CDOPER(1:3) == 'MAX' .OR. CDOPER(1:3) == 'max' ) THEN
  IOPER = MPI_MAX
ELSEIF(CDOPER(1:3) == 'MIN' .OR. CDOPER(1:3) == 'min' ) THEN
  IOPER = MPI_MIN
ELSEIF(CDOPER(1:3) == 'SUM' .OR. CDOPER(1:3) == 'sum' ) THEN
  IOPER = MPI_SUM
  IF (PRESENT(LDREPROD)) THEN
    LLREPRODSUM = LDREPROD
  ELSE
    CALL MPL_MESSAGE(IERROR,&
     & 'MPL_ALLREDUCE: SUMMATION OPERATOR NOT REPRODUCIBLE IN REAL MODE',&
     & CDSTRING,LDABORT=LLABORT)
  ENDIF
ELSE
  CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE: ERROR UNKNOWN OPERATOR',&
   & CDSTRING,LDABORT=LLABORT)
ENDIF

IF(PRESENT(KCOMM)) THEN
  ICOMM=KCOMM
ELSE
  ICOMM=MPL_COMM_OML(ITID)
ENDIF

ISENDCOUNT = SIZE(PSENDBUF)
#ifndef NAG
IF (ISENDCOUNT > 0) THEN
  IF( (LOC(PSENDBUF(UBOUND(PSENDBUF,1)))-LOC(PSENDBUF(LBOUND(PSENDBUF,1)))) /= 8_JPIB*(ISENDCOUNT - 1) ) THEN
    CALL MPL_MESSAGE(CDMESSAGE='MPL_ALLREDUCE: BUFFER NOT CONTIGUOUS ',LDABORT=LLABORT)
  ENDIF
ENDIF
#endif

IF (LLREPRODSUM) THEN
!-- Near reproducible summation (independent of number of threads)

  IP2=0
  DO
    IP2=IP2+1
    IF(2**IP2 >= MPL_NUMPROC) EXIT
  ENDDO

  IMSENT=0
  DO JSTAGE=IP2,1,-1
!    WRITE(0,*) 'STAGE ',JSTAGE
    ITAG  = 2001+JSTAGE
    II    = 2**JSTAGE
    IHALF = II/2
    ISEND = MPL_RANK - IHALF
    IF(ISEND > 0 .AND. MPL_RANK <= II) THEN
      IMSENT=IMSENT+1
      CALL MPL_SEND(PSENDBUF,KDEST=ISEND,KCOMM=ICOMM,KTAG=ITAG,KERROR=IERROR,&
       &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISREQ(IMSENT),CDSTRING='MPLS_SEND')
!      write(0,*) 'I SEND TO ',MPL_RANK,ISEND
    ENDIF
    IRECV=MPL_RANK + IHALF
    IF(IRECV <=MPL_NUMPROC .AND. MPL_RANK <= IHALF) THEN
      CALL MPL_RECV(ZRECVBUF,KSOURCE=IRECV,KCOMM=ICOMM,KTAG=ITAG,&
       &KERROR=IERROR,KOUNT=ICOUNT)
!      write(0,*) 'I RECV FROM ',MPL_RANK,IRECV
      PSENDBUF(:) = PSENDBUF(:) + ZRECVBUF(:)
    ENDIF
  ENDDO
  IF(IMSENT > 0) THEN
    CALL MPL_WAIT(KREQUEST=ISREQ(1:IMSENT),CDSTRING='MPLS_SEND')
  ENDIF
  IF (MPL_RANK == 1) THEN
    ZRECVBUF(:) = PSENDBUF(:)
  ENDIF
!  write(0,*) 'enter broadcast '
  CALL MPL_BROADCAST(ZRECVBUF,KTAG=ITAG,KCOMM=ICOMM,KROOT=1,KERROR=IERROR)
!  write(0,*) 'exit broadcast '

!!$  IMSENT=0
!!$  DO JSTAGE=1,IP2
!!$    ITAG = 2001+JSTAGE
!!$    WRITE(0,*) 'STAGE BACK  ',JSTAGE
!!$    II = 2**JSTAGE
!!$    IHALF = II/2
!!$    ISEND=MPL_RANK + IHALF
!!$    IF(ISEND <=MPL_NUMPROC .AND. MPL_RANK <= IHALF) THEN
!!$      IMSENT=IMSENT+1
!!$      CALL MPL_SEND(PSENDBUF,KDEST=ISEND,KCOMM=ICOMM,KTAG=ITAG,KERROR=IERROR,&
!!$       &KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=ISREQ(IMSENT),CDSTRING='MPLS_SEND')
!!$      WRITE(0,*) 'I SEND BACK TO ',MPL_RANK,ISEND
!!$    ENDIF
!!$    IRECV=MPL_RANK - IHALF
!!$    IF(IRECV > 0 .AND. MPL_RANK <= II) THEN
!!$      WRITE(0,*) 'I RECV BACK FROM ',MPL_RANK,IRECV
!!$      CALL MPL_RECV(ZRECVBUF,KSOURCE=IRECV,KCOMM=ICOMM,KTAG=ITAG,&
!!$       &KERROR=IERROR,KOUNT=ICOUNT)
!!$    ENDIF
!!$  ENDDO
!!$  IF(IMSENT > 0) THEN
!!$    CALL MPL_WAIT(KREQUEST=ISREQ(1:IMSENT),CDSTRING='MPLS_SEND')
!!$  ENDIF
  
ELSE  
  IF ( MPL_NUMPROC > 1 ) &
  CALL MPI_ALLREDUCE(PSENDBUF,ZRECVBUF,ISENDCOUNT,INT(MPI_REAL8), &
                  &  IOPER,ICOMM,IERROR)

  IF(LMPLSTATS) THEN
    CALL MPL_SENDSTATS(ISENDCOUNT,INT(MPI_REAL8))
    CALL MPL_RECVSTATS(ISENDCOUNT,INT(MPI_REAL8))
  ENDIF

ENDIF

IF(MPL_OUTPUT > 1 )THEN
  WRITE(MPL_UNIT,'(A,5I8)') ' MPL_ALLREDUCE ',ISENDCOUNT,ICOMM,IOPER
ENDIF
IF(PRESENT(KERROR)) THEN
  KERROR=IERROR
ELSE
  IF(IERROR /= 0 ) CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE',CDSTRING,LDABORT=LLABORT)
ENDIF

IF ( MPL_NUMPROC > 1 ) &
PSENDBUF(:) = ZRECVBUF(:)

END SUBROUTINE MPL_ALLREDUCE_REAL8


SUBROUTINE MPL_ALLREDUCE_REAL4(PSENDBUF,CDOPER,LDREPROD, &
                            & KCOMM,KERROR,CDSTRING)


#ifdef USE_8_BYTE_WORDS
  USE MPI4TO8, ONLY : &
    MPI_ALLREDUCE => MPI_ALLREDUCE8
#endif

REAL(KIND=JPRM),INTENT(INOUT)        :: PSENDBUF(:)
CHARACTER(LEN=*),INTENT(IN)    :: CDOPER
LOGICAL,INTENT(IN),OPTIONAL :: LDREPROD
INTEGER(KIND=JPIM),INTENT(IN),OPTIONAL :: KCOMM
INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KERROR
CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING
REAL(KIND=JPRM)            :: ZRECVBUF(SIZE(PSENDBUF))
INTEGER(KIND=JPIM) IPROC, ITAG, ICOUNT
LOGICAL LLREPRODSUM
INTEGER(KIND=JPIM) :: ISENDCOUNT,ICOMM,IERROR,IOPER
INTEGER(KIND=JPIM) :: ITID
IERROR = 0
ITID = OML_MY_THREAD()
LLREPRODSUM = .FALSE.

IF(MPL_NUMPROC < 1) CALL MPL_MESSAGE( &
  & CDMESSAGE='MPL_ALLREDUCE: MPL NOT INITIALISED ',LDABORT=LLABORT) 

IF(CDOPER(1:3) == 'MAX' .OR. CDOPER(1:3) == 'max' ) THEN
  IOPER = MPI_MAX
ELSEIF(CDOPER(1:3) == 'MIN' .OR. CDOPER(1:3) == 'min' ) THEN
  IOPER = MPI_MIN
ELSEIF(CDOPER(1:3) == 'SUM' .OR. CDOPER(1:3) == 'sum' ) THEN
  IOPER = MPI_SUM
  IF (PRESENT(LDREPROD)) THEN
    LLREPRODSUM = LDREPROD
  ELSE
    CALL MPL_MESSAGE(IERROR,&
     & 'MPL_ALLREDUCE: SUMMATION OPERATOR NOT REPRODUCIBLE IN REAL MODE',&
     & CDSTRING,LDABORT=LLABORT)
  ENDIF
ELSE
  CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE: ERROR UNKNOWN OPERATOR',&
   & CDSTRING,LDABORT=LLABORT)
ENDIF

IF(PRESENT(KCOMM)) THEN
  ICOMM=KCOMM
ELSE
  ICOMM=MPL_COMM_OML(ITID)
ENDIF

ISENDCOUNT = SIZE(PSENDBUF)
#ifndef NAG
IF (ISENDCOUNT > 0) THEN
  IF( (LOC(PSENDBUF(UBOUND(PSENDBUF,1)))-LOC(PSENDBUF(LBOUND(PSENDBUF,1)))) /= 4_JPIB*(ISENDCOUNT - 1) ) THEN
    CALL MPL_MESSAGE(CDMESSAGE='MPL_ALLREDUCE: BUFFER NOT CONTIGUOUS ',LDABORT=LLABORT)
  ENDIF
ENDIF
#endif

IF (LLREPRODSUM) THEN
!-- Near reproducible summation
  ITAG = 2001
  IF (MPL_RANK == 1) THEN
    DO IPROC=2,MPL_NUMPROC
      CALL MPL_RECV(ZRECVBUF,KSOURCE=IPROC,KCOMM=ICOMM,KTAG=ITAG,&
        &KERROR=IERROR,KOUNT=ICOUNT)
      IF (ICOUNT /= ISENDCOUNT) THEN
        WRITE(MPL_ERRUNIT,'(A,I10,A,I6,A,I10)')&
        & 'MPL_ALLREDUCE: RECEIVED UNEXPECTED NUMBER OF ELEMENTS ', &
        & ICOUNT,' FROM PROC ',IPROC,'. EXPECTED=',ISENDCOUNT
        CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE',CDSTRING,LDABORT=LLABORT)
      ENDIF
      PSENDBUF(:) = PSENDBUF(:) + ZRECVBUF(:)
    ENDDO
    ZRECVBUF(:) = PSENDBUF(:)
  ELSE
    CALL MPL_SEND(PSENDBUF,KDEST=1,KCOMM=ICOMM,KTAG=ITAG,KERROR=IERROR,&
      &KMP_TYPE=JP_BLOCKING_STANDARD,CDSTRING='MPLS_SEND')
  ENDIF
  ITAG = ITAG + 1
  CALL MPL_BROADCAST(ZRECVBUF,KTAG=ITAG,KCOMM=ICOMM,KROOT=1,KERROR=IERROR)
ELSE  
  IF ( MPL_NUMPROC > 1 ) &
  CALL MPI_ALLREDUCE(PSENDBUF,ZRECVBUF,ISENDCOUNT,INT(MPI_REAL4), &
                  &  IOPER,ICOMM,IERROR)

  IF(LMPLSTATS) THEN
    CALL MPL_SENDSTATS(ISENDCOUNT,INT(MPI_REAL4))
    CALL MPL_RECVSTATS(ISENDCOUNT,INT(MPI_REAL4))
  ENDIF

ENDIF

IF(MPL_OUTPUT > 1 )THEN
  WRITE(MPL_UNIT,'(A,5I8)') ' MPL_ALLREDUCE ',ISENDCOUNT,ICOMM,IOPER
ENDIF
IF(PRESENT(KERROR)) THEN
  KERROR=IERROR
ELSE
  IF(IERROR /= 0 ) CALL MPL_MESSAGE(IERROR,'MPL_ALLREDUCE',CDSTRING,LDABORT=LLABORT)
ENDIF

IF ( MPL_NUMPROC > 1 ) &
PSENDBUF(:) = ZRECVBUF(:)

END SUBROUTINE MPL_ALLREDUCE_REAL4

END MODULE MPL_ALLREDUCE_MOD


