diff -ruN as-is/vasp.5.2/dfast.F open64/vasp.5.2/dfast.F --- as-is/vasp.5.2/dfast.F 2010-02-04 09:00:12.000000000 -0800 +++ open64/vasp.5.2/dfast.F 2012-12-17 06:19:44.000000000 -0800 @@ -22,17 +22,17 @@ ! this is done in blocks to save storage for the transformed wavefunctions INTEGER :: NBLK=256 - INTERFACE - SUBROUTINE LINCOM(MODE,CF,CPROF,CTRANS,NIN,NOUT,NPL, & - & NPRO,NPLDIM,NPROD,LDTRAN,CFA,CPROFA) - USE prec - IMPLICIT COMPLEX(q) (C) - IMPLICIT REAL(q) (A-B,D-H,O-Z) - CHARACTER (1) MODE - GDEF CPROF,CPROFA - GDEF CTRANS - END SUBROUTINE LINCOM - END INTERFACE +! INTERFACE +! SUBROUTINE LINCOM(MODE,CF,CPROF,CTRANS,NIN,NOUT,NPL, & +! & NPRO,NPLDIM,NPROD,LDTRAN,CFA,CPROFA) +! USE prec +! IMPLICIT COMPLEX(q) (C) +! IMPLICIT REAL(q) (A-B,D-H,O-Z) +! CHARACTER (1) MODE +! GDEF CPROF,CPROFA +! GDEF CTRANS +! END SUBROUTINE LINCOM +! END INTERFACE INTERFACE SUBROUTINE ORTH1(CSEL,CPTWFP,CFW,CPROJ,CPROW,NBANDS, & diff -ruN as-is/vasp.5.2/fock.F open64/vasp.5.2/fock.F --- as-is/vasp.5.2/fock.F 2010-02-04 11:13:46.000000000 -0800 +++ open64/vasp.5.2/fock.F 2012-12-17 02:47:33.000000000 -0800 @@ -208,33 +208,6 @@ TYPE (wavefun1) :: WQ ! wavefunction END TYPE fock_handle - INTERFACE - SUBROUTINE FOCK_QDER(GRID, LMDIM, LATT_CUR, W, & - NONLR_S, NONLR_D, NONL_S, NONL_D, IDIR, NK, ISP, NPOS, NSTRIPN, & - CH, P, CQIJ) - - USE mgrid - USE lattice - USE pseudo - USE wave - USE nonl_high - IMPLICIT NONE - - ! passed variables - TYPE (grid_3d) GRID - INTEGER LMDIM - TYPE (latt) LATT_CUR - TYPE (wavespin) W - TYPE (nonlr_struct) NONLR_S, NONLR_D - TYPE (nonl_struct) NONL_S, NONL_D - TYPE (potcar) P(:) - OVERLAP CQIJ (LMDIM,LMDIM,W%WDES%NIONS,W%WDES%NCDIJ) - INTEGER IDIR, NK, ISP, NPOS, NSTRIPN - COMPLEX(q) :: CH - END SUBROUTINE FOCK_QDER - - - END INTERFACE CONTAINS !********************************************************************** ! diff -ruN as-is/vasp.5.2/hamil.F open64/vasp.5.2/hamil.F --- as-is/vasp.5.2/hamil.F 2010-02-04 09:00:14.000000000 -0800 +++ open64/vasp.5.2/hamil.F 2012-11-30 02:36:34.000000000 -0800 @@ -117,26 +117,24 @@ END INTERFACE INTERFACE - SUBROUTINE PW_CHARGE_TRACE(WDES1, CHARGE, NDIM, CR1, CR2) + SUBROUTINE PW_CHARGE_TRACE(WDES1, CHARGE, CR1, CR2) USE prec USE mgrid USE wave TYPE (grid_3d) GRID TYPE (wavedes1) WDES1 - INTEGER NDIM GDEF CHARGE COMPLEX(q) :: CR1,CR2 END SUBROUTINE PW_CHARGE_TRACE END INTERFACE INTERFACE - SUBROUTINE PW_CHARGE_TRACE_NO_CONJG(WDES1, CHARGE, NDIM, CR1, CR2) + SUBROUTINE PW_CHARGE_TRACE_NO_CONJG(WDES1, CHARGE, CR1, CR2) USE prec USE mgrid USE wave TYPE (grid_3d) GRID TYPE (wavedes1) WDES1 - INTEGER NDIM GDEF CHARGE COMPLEX(q) :: CR1,CR2 END SUBROUTINE PW_CHARGE_TRACE_NO_CONJG diff -ruN as-is/vasp.5.2/Makefile open64/vasp.5.2/Makefile --- as-is/vasp.5.2/Makefile 2011-07-18 04:21:10.000000000 -0700 +++ open64/vasp.5.2/Makefile 2012-12-17 23:30:53.000000000 -0800 @@ -15,8 +15,8 @@ #----------------------------------------------------------------------- # fortran compiler and linker #----------------------------------------------------------------------- -FC = ifort -CC = icc +FC = openf90 +CC = opencc # fortran linker FCL = $(FC) @@ -56,18 +56,18 @@ # -assume byterecl set record length for unformatted files in bytes #----------------------------------------------------------------------- -FFLAGS = -FR -cm -w95 -lowercase -w -safe_cray_ptr -fno-alias -assume byterecl -fpp +FFLAGS = -fno-second-underscore -freeform -ftpp -woff1278,1279,1615 -r8 #----------------------------------------------------------------------- # optimization #----------------------------------------------------------------------- -OFLAG = -O3 -ip -xSSE4.2 +OFLAG = -O3 -OPT:Olimit=0 -march=bdver1 -mavx -mfma4 OFLAG_HIGH = $(OFLAG) -OFLAG_MED = -O2 -ip -xSSE4.2 -ftz -OFLAG_LOW = -O1 -ip -ftz -OFLAG_NOOPT = -O0 -ip -ftz -DEBUG = -g -traceback +OFLAG_MED = -O2 -march=bdver1 -mavx -mfma4 +OFLAG_LOW = -O1 -march=bdver1 +OFLAG_NOOPT = -O0 -march=bdver1 +DEBUG = -g INLINE = $(OFLAG) @@ -75,7 +75,7 @@ # the following lines specify the position of BLAS and LAPACK #----------------------------------------------------------------------- -LIB_DMY = ../../vasp.5.lib/libdmy.a +LIB_DMY = ../../vasp.5.lib.open64/libdmy.a # GOTO is a partial replacement for BLAS by Kazushige Goto # (see http://www.cs.utexas.edu/users/kgoto/signup_first.html) @@ -83,10 +83,13 @@ LIB_GOTO = -L/opt/goto/lib -lgoto # Use the mkl Intel libraries for BLAS and LAPACK (www.intel.com) -LIB_MKL = -L/opt/apps/intel/11.1/mkl/lib/em64t -lmkl_lapack -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,-rpath,/opt/apps/intel/11.1/mkl/lib/em64t +LIB_MKL = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core + +# Use the AMD ACML library for BLAS and LAPACK +LIB_ACML = -L${HOME}/opt/acml/5.2.0/open64_64_fma4/lib -lacml -lfortran # Use here $(LIB_MKL) with or without $(GOTO) -BLAS = $(LIB_MKL) ../../vasp.5.lib/linpack_double.o +BLAS = $(LIB_ACML) ../../vasp.5.lib.open64/linpack_double.o # LAPACK, simplest use vasp.5.lib/lapack_double LAPACK = @@ -119,7 +122,7 @@ # suggested above, you can use the following line #----------------------------------------------------------------------- -FC = ifort +FC = openf90 FCL = $(FC) #----------------------------------------------------------------------- @@ -138,10 +141,10 @@ # routines #----------------------------------------------------------------------- -CPP = -DMPI -DHOST=\"Altix\ ICE\" \ - -Dkind8 -DwNGZhalf -DNGZhalf -DCACHE_SIZE=262144 \ - -DscaLAPACK -DSGI_OPT -Davoidalloc -Duse_collective \ - -DZGEMM_3M +CPP = -DMPI -DHOST=\"Dell\ Open64\" \ + -Dkind8 -DNGZhalf -DCACHE_SIZE=262144 \ + -DSGI_OPT -Davoidalloc -Duse_collective \ + -DscaLAPACK #----------------------------------------------------------------------- # location of SCALAPACK @@ -149,7 +152,7 @@ # Intel Cluster MKL with support for Intel MPI is used here. #----------------------------------------------------------------------- -SCALAPACK = -lmkl_scalapack_lp64 +SCALAPACK = ${HOME}/opt/scalapack/mvapich2/2.0.2/lib/libscalapack.a #----------------------------------------------------------------------- # libraries for mpi @@ -170,13 +173,11 @@ #BLACS = -lmkl_blacs_lp64 # MVAPICH2 -MPI_DIR = /opt/apps/intel11_1/mvapich2/1.6 +MPI_DIR = $(HOME)/opt/mvapich2/open64/1.8.1-ofa INC_MPI = -I$(MPI_DIR)/include -LIB_MPI = $(MPI_DIR)/lib/libmpich.a \ - -L/opt/ofed/lib64 -libverbs -libumad -lpthread -lrdmacm -lrt \ - -Wl,-rpath,/opt/ofed/lib64 /opt/apps/limic2/0.5.4/lib/liblimic2.a -BLACS = -lmkl_blacs_intelmpi_lp64 - +LIB_MPI = $(MPI_DIR)/lib/libmpich.a $(MPI_DIR)/lib/libmpl.a \ + -libverbs -libumad -lpthread -lrdmacm -lrt + # MPT #MPI_DIR = /usr #INC_MPI = -I$(MPI_DIR)/include @@ -185,14 +186,14 @@ #----------------------------------------------------------------------- -LIB = $(LIB_DMY) $(BLAS) $(SCALAPACK) $(BLACS) $(LIB_MPI) ../../vasp.5.lib/linpack_double.o +LIB = $(LIB_DMY) $(BLAS) $(SCALAPACK) $(BLACS) $(LIB_MPI) #../../vasp.5.lib/linpack_double.o # FFT: use fftmpi.o with fft3dlib of Juergen Furthmueller or FFTW3 # NOTE: if you use FFTW3, don't forget to copy /include/fftw3.F to # the current directory! # Juergen Furthmueller's FFT implementation (part of VASP) -#FFT3D = fftmpi.o fftmpi_map.o fft3dlib.o +FFT3D = fftmpi.o fftmpi_map.o fft3dfurth.o fft3dlib.o # FFTW 3.x #FFT3D = fftmpiw.o fftmpi_map.o fft3dlib.o /store/chem/martinh/lib/FFTW/3.1.2/x86_64/lib/libfftw3.a @@ -202,7 +203,7 @@ # # % cd /interfaces/fftw3xf # % make -f makefile libem64t -FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o /work/01760/martinh/opt/mkl_wrappers/fftw3/intel/libfftw3xf_intel.a +#FFT3D = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o $(HOME)/martinh/opt/mkl/lib/em64t/libfftw3xf_intel.a #----------------------------------------------------------------------- # general rules and compile lines @@ -249,9 +250,9 @@ # General rules and compile lines #----------------------------------------------------------------------- OBJ_HIGH = -OBJ_MED = +OBJ_MED = relativistic.o LDApU.o paw_base.o OBJ_LOW = fftmpiw.o fftmpi.o fftw3d.o wave_high.o -OBJ_NOOPT = +OBJ_NOOPT = nmr.o vasp: $(SOURCE) $(FFT3D) $(INC) main.o rm -f vasp diff -ruN as-is/vasp.5.2/subrot_cluster.F open64/vasp.5.2/subrot_cluster.F --- as-is/vasp.5.2/subrot_cluster.F 2010-02-04 09:00:16.000000000 -0800 +++ open64/vasp.5.2/subrot_cluster.F 2012-12-17 03:56:30.000000000 -0800 @@ -487,9 +487,6 @@ END SUBROUTINE SUBROT_DEG_ALL -END MODULE subrot_cluster - - !************************* SUBROT_DEG_CLUSTERS ************************* ! ! perform a sub space rotation in the space spanned by cluster using @@ -503,7 +500,6 @@ CW_RED, CPROJ_RED, DEG_CLUSTER_, LCONJG, LCOMMULATIVE) USE wave_mpi USE wave - USE subrot_cluster TYPE (wavedes) WDES TYPE (eigenf_cluster),TARGET :: DEG_CLUSTER_ @@ -555,3 +551,6 @@ END SUBROUTINE SUBROT_DEG_CLUSTERS +END MODULE subrot_cluster + + diff -ruN as-is/vasp.5.2/subrot_lr.F open64/vasp.5.2/subrot_lr.F --- as-is/vasp.5.2/subrot_lr.F 2010-02-04 09:00:16.000000000 -0800 +++ open64/vasp.5.2/subrot_lr.F 2012-12-17 04:34:14.000000000 -0800 @@ -77,6 +77,8 @@ TYPE (wavefuna) WNONL ! array to hold non local part D * wave function character GDEF,ALLOCATABLE,TARGET:: CHAM(:,:),COVL(:,:) + COMPLEX(q), POINTER :: CW_RED(:,:), CW0_RED(:,:), CWXI_RED(:,:) + GDEF , POINTER :: CPROJ_RED(:,:), CPROJ0_RED(:,:), CPROJXI_RED(:,:) ! redistributed plane wave coefficients INTEGER NCPU @@ -248,12 +250,18 @@ IF (PRESENT(DEG_CLUSTER) .AND. PRESENT(RESOLVE_DEG)) THEN IF (ASSOCIATED(DEG_CLUSTER(NK,ISP)%DEG_CLUSTER)) THEN + CW0_RED => W0%CW(:,:,NK,ISP) + CPROJ0_RED => W0%CPROJ(:,:,NK,ISP) + CW_RED => W%CW(:,:,NK,ISP) + CPROJ_RED => W%CPROJ(:,:,NK,ISP) + CWXI_RED => WXI%CW(:,:,NK,ISP) + CPROJXI_RED => WXI%CPROJ(:,:,NK,ISP) CALL SUBROT_DEG_CLUSTERS(W0%WDES, WDES1%NPL_RED, WDES1%NPRO_RED, WDES1%NRPLWV_RED, WDES1%NPROD_RED, & - W0%CW(1,1,NK,ISP), W0%CPROJ(1,1,NK,ISP), DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) + CW0_RED, CPROJ0_RED, DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) CALL SUBROT_DEG_CLUSTERS(W0%WDES, WDES1%NPL_RED, WDES1%NPRO_RED, WDES1%NRPLWV_RED, WDES1%NPROD_RED, & - W%CW(1,1,NK,ISP), W%CPROJ(1,1,NK,ISP), DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) + CW_RED, CPROJ_RED, DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) CALL SUBROT_DEG_CLUSTERS(W0%WDES, WDES1%NPL_RED, WDES1%NPRO_RED, WDES1%NRPLWV_RED, WDES1%NPROD_RED, & - WXI%CW(1,1,NK,ISP), WXI%CPROJ(1,1,NK,ISP), DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) + CWXI_RED, CPROJXI_RED, DEG_CLUSTER(NK,ISP)%DEG_CLUSTER, .FALSE., .FALSE.) ENDIF ENDIF diff -ruN as-is/vasp.5.2/us.F open64/vasp.5.2/us.F --- as-is/vasp.5.2/us.F 2010-02-04 09:00:18.000000000 -0800 +++ open64/vasp.5.2/us.F 2012-12-17 02:35:08.000000000 -0800 @@ -26,6 +26,39 @@ #ifndef PGF90 INTERFACE + SUBROUTINE SETDIJ_(WDES, GRIDC_,GRIDUS,C_TO_US,LATT_CUR,P,T_INFO, LOVERL, & + LMDIM,CDIJ,CQIJ, CVTOT_, LDIAGONAL_TERMS, IRDMAA,IRDMAX, DISPL) + USE prec + USE pseudo + USE poscar + USE mpimy + USE mgrid + USE lattice + USE wave + USE asa + USE paw + USE constant + IMPLICIT COMPLEX(q) (C) + IMPLICIT REAL(q) (A-B,D-H,O-Z) + + TYPE (type_info) T_INFO + TYPE (potcar) P(T_INFO%NTYP) + TYPE (grid_3d) GRIDC_,GRIDUS + TYPE (transit) C_TO_US ! index table between GRIDC and GRIDUS + TYPE (latt) LATT_CUR + TYPE (wavedes) WDES + + INTEGER IRDMAX ! allocation required for augmentation + INTEGER IRDMAA ! actual maximum augmentation index + OVERLAP CDIJ(LMDIM,LMDIM,WDES%NIONS,WDES%NCDIJ), & + CQIJ(LMDIM,LMDIM,WDES%NIONS,WDES%NCDIJ) + COMPLEX(q) CVTOT_(GRIDC_%MPLWV,WDES%NCDIJ) + LOGICAL LOVERL + REAL(q) DISPL(3,T_INFO%NIONS) + LOGICAL LDIAGONAL_TERMS + ! add atomic reference diagonal terms + END SUBROUTINE + SUBROUTINE SETDIJ(WDES, GRIDC_,GRIDUS,C_TO_US,LATT_CUR,P,T_INFO, LOVERL, & LMDIM,CDIJ,CQIJ, CVTOT_, IRDMAA,IRDMAX) USE prec @@ -89,6 +122,43 @@ COMPLEX(q) CHDEN(GRID_SOFT%MPLWV,WDES%NCDIJ) LOGICAL LOVERL END SUBROUTINE + + SUBROUTINE AUGMENTATION_CHARGE( & + WDES, GRIDC_, GRIDUS, C_TO_US, & + LATT_CUR, P, T_INFO, SYMM, LOVERL, & + LMDIM, CRHODE, CHTOT_, IRDMAX, DISPL) + USE prec + USE base + USE charge + USE pseudo + USE poscar + USE mpimy + USE mgrid + USE lattice + USE wave + USE asa + USE paw + USE constant + + IMPLICIT COMPLEX(q) (C) + IMPLICIT REAL(q) (A-B,D-H,O-Z) + + TYPE (type_info) T_INFO + TYPE (potcar) P(T_INFO%NTYP) + TYPE (grid_3d) GRIDC_,GRIDUS + TYPE (transit) C_TO_US + TYPE (latt) LATT_CUR + TYPE (wavedes) WDES + TYPE (symmetry) SYMM + + INTEGER IRDMAX ! allocation required for augmentation + INTEGER LMDIM + OVERLAP CRHODE(LMDIM,LMDIM,WDES%NIONS,WDES%NCDIJ) + RGRID CHTOT_(DIMREAL(GRIDC_%MPLWV),WDES%NCDIJ) + LOGICAL LOVERL,LADDITIONAL + REAL(q) DISPL(3,T_INFO%NIONS) + END SUBROUTINE + END INTERFACE #endif INTERFACE