diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index fb8cfe959a..91f251e7c9 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -443,6 +443,10 @@ units="-" description="Method to use for exchanging halos" possible_values="`mpas_dmpar', `mpas_halo'"/> + diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index 787e7719a1..6c19ed7931 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t nullify(tend) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') if (associated(tend)) then - !$acc enter data copyin(tend) else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) - !$acc enter data copyin(tend_scalars) ! Ensure the integer pointed to by idx_ptr is copied to the gpu device call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr) idx = idx_ptr end if - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc parallel default(present) if (associated(tend)) then @@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t end if !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') - if (associated(tend)) then - !$acc exit data delete(tend) - else - !$acc exit data delete(tend_scalars) - end if - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') end subroutine mpas_atm_get_bdy_tend @@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del ! query the field as a scalar constituent ! if (associated(tend) .and. associated(state)) then - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data copyin(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang vector collapse(2) @@ -546,9 +532,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data delete(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2) @@ -556,10 +539,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del idx=idx_ptr ! Avoid non-array pointer for OpenACC - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data copyin(tend_scalars, state_scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(2) do i=1, horizDim+1 @@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data delete(tend_scalars, state_scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') end if end subroutine mpas_atm_get_bdy_state_2d @@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc enter data copyin(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(3) do i=1, horizDim+1 @@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc exit data delete(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - end subroutine mpas_atm_get_bdy_state_3d diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F index 654fd3ae82..7459de89b4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_iau.F +++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F @@ -5,6 +5,15 @@ ! Additional copyright and license information can be found in the LICENSE file ! distributed with this code, or at http://mpas-dev.github.com/license.html ! + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atm_iau use mpas_derived_types @@ -13,9 +22,10 @@ module mpas_atm_iau use mpas_dmpar use mpas_constants use mpas_log, only : mpas_log_write + use mpas_timer + + !public :: atm_compute_iau_coef, atm_add_tend_anal_incr - !public :: atm_compute_iau_coef, atm_add_tend_anal_incr - contains !================================================================================================== @@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau) end if end function atm_iau_coef + +!================================================================================================== + subroutine update_d2h_pre_add_tend_anal_incr(configs,structs) +!================================================================================================== + + implicit none + + type (mpas_pool_type), intent(in) :: configs + type (mpas_pool_type), intent(inout) :: structs + + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + + real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m + real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars + + call mpas_pool_get_subpool(structs, 'tend', tend) + call mpas_pool_get_subpool(structs, 'state', state) + call mpas_pool_get_subpool(structs, 'diag', diag) + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 1) + call mpas_pool_get_array(state, 'scalars', scalars, 1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2) + call mpas_pool_get_array(diag , 'rho_edge', rho_edge) + !$acc update self(theta_m, scalars, rho_zz, rho_edge) + + call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars) + !$acc update self(tend_scalars) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine update_d2h_pre_add_tend_anal_incr !================================================================================================== subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 5591ffe6d4..4ac8ca3145 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -28,6 +28,7 @@ module atm_time_integration #ifdef DO_PHYSICS use mpas_atmphys_driver_microphysics + use mpas_atmphys_interface, only: pre_microphysics, post_microphysics use mpas_atmphys_todynamics use mpas_atmphys_utilities #endif @@ -72,6 +73,7 @@ module atm_time_integration !$acc declare create(s_max_arr, s_min_arr) !$acc declare create(flux_array, flux_upwind_tmp_arr) !$acc declare create(flux_tmp_arr, wdtn_arr) + !$acc declare create(rho_zz_int) real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_tend ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_tend ! regional_MPAS addition @@ -295,6 +297,8 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init + real (kind=RKIND), dimension(:,:), pointer :: t_init real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2 real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2 real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs @@ -320,6 +324,7 @@ subroutine mpas_atm_dynamics_init(domain) nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + MPAS_ACC_TIMER_START('mpas_dynamics_init [ACC_data_xfer]') call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -496,24 +501,971 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc enter data copyin(meshScalingDel4) + + call mpas_pool_get_array(mesh, 'u_init', u_init) + !$acc enter data copyin(u_init) + call mpas_pool_get_array(mesh, 'v_init', v_init) + !$acc enter data copyin(v_init) + call mpas_pool_get_array(mesh, 't_init', t_init) + !$acc enter data copyin(t_init) + call mpas_pool_get_array(mesh, 'qv_init', qv_init) + !$acc enter data copyin(qv_init) + + call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) + !$acc enter data copyin(deformation_coef_c2) + + call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) + !$acc enter data copyin(deformation_coef_s2) + + call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) + !$acc enter data copyin(deformation_coef_cs) + + call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) + !$acc enter data copyin(deformation_coef_c) + + call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) + !$acc enter data copyin(deformation_coef_s) + + MPAS_ACC_TIMER_STOP('mpas_dynamics_init [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_dynamics_init + + subroutine mpas_atm_pre_compute_solve_diagnostics(block) + + implicit none + + type (block_type), intent(inout) :: block + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc enter data create(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc enter data copyin(u) + + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc enter data copyin(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data copyin(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data copyin(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc enter data copyin(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc enter data copyin(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc enter data copyin(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc enter data copyin(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc enter data copyin(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc enter data copyin(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc enter data copyin(zb3) + + ! Required by atm_compute_solve_diagnostics + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc enter data copyin(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc enter data copyin(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc enter data copyin(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc enter data copyin(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc enter data copyin(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc enter data copyin(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc enter data copyin(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc enter data copyin(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc enter data copyin(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc enter data copyin(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc enter data copyin(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc enter data copyin(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc enter data copyin(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc enter data copyin(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc enter data copyin(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc enter data copyin(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc enter data copyin(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc enter data copyin(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc enter data copyin(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc enter data copyin(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_pre_compute_solve_diagnostics + + + subroutine mpas_atm_post_compute_solve_diagnostics(block) + + implicit none + + type (block_type), intent(inout) :: block + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') + + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc exit data copyout(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc exit data copyout(u) + + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc exit data copyout(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data copyout(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data copyout(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data copyout(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc exit data delete(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc exit data delete(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc exit data delete(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc exit data delete(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc exit data delete(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc exit data delete(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc exit data delete(zb3) + + + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc exit data delete(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc exit data delete(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc exit data delete(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc exit data delete(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc exit data delete(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc exit data delete(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc exit data delete(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc exit data delete(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc exit data delete(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc exit data delete(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc exit data delete(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc exit data delete(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc exit data delete(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc exit data delete(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc exit data delete(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc exit data delete(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc exit data delete(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc exit data delete(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc exit data delete(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc exit data delete(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_post_compute_solve_diagnostics + + subroutine mpas_atm_pre_dynamics(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc + + logical, pointer :: config_apply_lbcs_ptr + logical :: config_apply_lbcs + + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt + + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + integer, pointer :: nCells_ptr + integer :: nCells + real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ + + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars + + nullify(mesh) + nullify(state) + nullify(diag) + nullify(tend) + nullify(tend_physics) + nullify(lbc) + call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + + call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr) + config_apply_lbcs = config_apply_lbcs_ptr + + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') + call mpas_pool_get_array(diag, 'ru', ru) + !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc enter data copyin(ru_p) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc enter data copyin(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc enter data copyin(rw_p) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc enter data copyin(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc enter data copyin(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc enter data copyin(exner_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc enter data copyin(rtheta_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc enter data copyin(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc enter data copyin(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc enter data copyin(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc enter data copyin(theta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc enter data copyin(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc enter data copyin(rho_pp) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc enter data copyin(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc enter data copyin(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc enter data copyin(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc enter data copyin(pressure) + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc enter data copyin(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc enter data copyin(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc enter data copyin(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data copyin(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data copyin(pv_cell) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc enter data copyin(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc enter data copyin(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc enter data copyin(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc enter data copyin(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc enter data copyin(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc enter data copyin(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc enter data copyin(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc enter data copyin(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc enter data copyin(cofrz) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc enter data copyin(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc enter data copyin(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc enter data copyin(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc enter data copyin(wwAvg_split) + + call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr) + nCells = nCells_ptr + call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX) + !$acc enter data create(uReconstructX(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY) + !$acc enter data create(uReconstructY(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) + !$acc enter data create(uReconstructZ(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) + !$acc enter data copyin(uReconstructZonal(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + !$acc enter data copyin(uReconstructMeridional(:,1:nCells)) + + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc enter data copyin(u_1) + call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc enter data copyin(u_2) + call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc enter data copyin(w_1) + call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc enter data copyin(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc enter data copyin(theta_m_1) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc enter data copyin(theta_m_2) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc enter data copyin(rho_zz_1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc enter data copyin(rho_zz_2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc enter data copyin(scalars_1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc enter data copyin(scalars_2) - call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) - !$acc enter data copyin(deformation_coef_c2) - call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) - !$acc enter data copyin(deformation_coef_s2) + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc enter data copyin(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc enter data copyin(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc enter data copyin(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc enter data copyin(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc enter data copyin(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc enter data copyin(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc enter data copyin(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc enter data copyin(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc enter data copyin(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc enter data copyin(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc enter data copyin(scalar_tend_save) - call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) - !$acc enter data copyin(deformation_coef_cs) - call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) - !$acc enter data copyin(deformation_coef_c) + if(config_apply_lbcs) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc enter data copyin(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc enter data copyin(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc enter data copyin(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc enter data copyin(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc enter data copyin(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc enter data copyin(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc enter data copyin(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc enter data copyin(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc enter data copyin(lbc_scalars) + + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc enter data copyin(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc enter data copyin(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc enter data copyin(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc enter data copyin(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc enter data copyin(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc enter data copyin(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc enter data copyin(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc enter data copyin(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc enter data copyin(lbc_tend_scalars) + end if - call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) - !$acc enter data copyin(deformation_coef_s) + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc enter data copyin(rthdynten) + + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') #endif - end subroutine mpas_atm_dynamics_init + end subroutine mpas_atm_pre_dynamics + + + subroutine mpas_atm_post_dynamics(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc + + logical, pointer :: config_apply_lbcs_ptr + logical :: config_apply_lbcs + + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt + + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + integer, pointer :: nCells_ptr + integer :: nCells + real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ + + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars + + nullify(mesh) + nullify(state) + nullify(diag) + nullify(tend) + nullify(tend_physics) + nullify(lbc) + call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + + call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr) + config_apply_lbcs = config_apply_lbcs_ptr + + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') + call mpas_pool_get_array(diag, 'ru', ru) + !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc exit data copyout(ru_p) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc exit data delete(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc exit data copyout(rw_p) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc exit data delete(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc exit data delete(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc exit data copyout(exner_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc exit data copyout(rtheta_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc exit data copyout(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc exit data copyout(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc exit data copyout(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc exit data copyout(theta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc exit data delete(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc exit data copyout(rho_pp) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc exit data delete(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc exit data delete(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc exit data copyout(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc exit data copyout(pressure_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc exit data copyout(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc exit data copyout(pressure) + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc exit data copyout(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc exit data copyout(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc exit data copyout(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data delete(pv_cell) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc exit data copyout(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data delete(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data delete(gradPVt) + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc exit data delete(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc exit data delete(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc exit data delete(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc exit data delete(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc exit data delete(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc exit data delete(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc exit data delete(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc exit data delete(cofrz) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc exit data copyout(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc exit data copyout(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc exit data copyout(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc exit data copyout(wwAvg_split) + + call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr) + nCells = nCells_ptr + call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX) + !$acc exit data copyout(uReconstructX(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY) + !$acc exit data copyout(uReconstructY(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) + !$acc exit data copyout(uReconstructZ(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) + !$acc exit data copyout(uReconstructZonal(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + !$acc exit data copyout(uReconstructMeridional(:,1:nCells)) + + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc exit data copyout(u_1) + call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc exit data copyout(u_2) + call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc exit data copyout(w_1) + call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc exit data copyout(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc exit data copyout(theta_m_1) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc exit data copyout(theta_m_2) ! Delete gives incorrect results + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc exit data copyout(rho_zz_1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc exit data copyout(rho_zz_2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc exit data copyout(scalars_1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc exit data copyout(scalars_2) ! Delete gives incorrect results + + + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc exit data copyout(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc exit data copyout(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc exit data copyout(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc exit data copyout(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc exit data copyout(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc exit data copyout(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc exit data copyout(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc exit data copyout(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc exit data copyout(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc exit data copyout(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc exit data copyout(scalar_tend_save) + + if(config_apply_lbcs) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc exit data delete(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc exit data delete(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc exit data delete(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc exit data delete(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc exit data delete(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc exit data delete(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc exit data delete(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc exit data delete(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc exit data delete(lbc_scalars) + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc exit data delete(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc exit data delete(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc exit data delete(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc exit data delete(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc exit data delete(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc exit data delete(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc exit data delete(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc exit data delete(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc exit data delete(lbc_tend_scalars) + end if + + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc exit data copyout(rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_post_dynamics !---------------------------------------------------------------------------- @@ -865,12 +1817,14 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) config_apply_lbcs = config_apply_lbcs_ptr + call mpas_atm_pre_dynamics(domain) if (trim(config_time_integration) == 'SRK3') then call atm_srk3(domain, dt, itimestep, exchange_halo_group) else call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR) call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT) end if + call mpas_atm_post_dynamics(domain) call mpas_set_timeInterval(dtInterval, dt=dt) currTime = nowTime + dtInterval @@ -941,6 +1895,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) logical, pointer :: config_scalar_advection logical, pointer :: config_positive_definite logical, pointer :: config_monotonic + logical, pointer :: config_gpu_aware_mpi character (len=StrKIND), pointer :: config_microp_scheme character (len=StrKIND), pointer :: config_convection_scheme @@ -964,6 +1919,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 real (kind=RKIND), dimension(:,:), pointer :: rqvdynten, rthdynten, theta_m + real (kind=RKIND), dimension(:,:), pointer :: pressure_p, rtheta_p, exner, tend_u + real (kind=RKIND), dimension(:,:), pointer :: rho_pp, rtheta_pp, ru_p, rw_p, pv_edge, rho_edge real (kind=RKIND) :: theta_local, fac_m #ifndef MPAS_CAM_DYCORE @@ -983,6 +1940,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_config(block % configs, 'config_scalar_advection', config_scalar_advection) call mpas_pool_get_config(block % configs, 'config_positive_definite', config_positive_definite) call mpas_pool_get_config(block % configs, 'config_monotonic', config_monotonic) + call mpas_pool_get_config(block % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi) call mpas_pool_get_config(block % configs, 'config_IAU_option', config_IAU_option) ! config variables for dynamics-transport splitting, WCS 18 November 2014 call mpas_pool_get_config(block % configs, 'config_split_dynamics_transport', config_split_dynamics_transport) @@ -1131,7 +2089,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for theta_m, scalars, pressure_p, and rtheta_p ! - call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 1) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', config_gpu_aware_mpi) + !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_rk_integration_setup') @@ -1166,6 +2132,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_stop('atm_compute_moist_coefficients') #ifdef DO_PHYSICS + call pre_physics_get_tend(block % configs, state, diag, tend) call mpas_timer_start('physics_get_tend') rk_step = 1 dynamics_substep = 1 @@ -1174,6 +2141,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) tend_ru_physics, tend_rtheta_physics, tend_rho_physics, & exchange_halo_group ) call mpas_timer_stop('physics_get_tend') + call post_physics_get_tend(block % configs, state, diag, tend) #else #ifndef MPAS_CAM_DYCORE ! @@ -1189,10 +2157,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! IAU - Incremental Analysis Update ! if (trim(config_IAU_option) /= 'off') then + call update_d2h_pre_add_tend_anal_incr(block % configs, block % structs) call atm_add_tend_anal_incr(block % configs, block % structs, itimestep, dt, & tend_ru_physics, tend_rtheta_physics, tend_rho_physics) end if + !$acc enter data copyin(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) + DYNAMICS_SUBSTEPS : do dynamics_substep = 1, dynamics_split @@ -1212,8 +2183,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !$OMP END PARALLEL DO call mpas_timer_stop('atm_compute_vert_imp_coefs') - call exchange_halo_group(domain, 'dynamics:exner') - + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'exner', exner) + !$acc update self(exner) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:exner', config_gpu_aware_mpi) + !$acc update device(exner) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! BEGIN Runge-Kutta loop @@ -1292,7 +2267,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !*********************************** ! tend_u - call exchange_halo_group(domain, 'dynamics:tend_u') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(tend, 'u', tend_u) + !$acc update self(tend_u) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:tend_u', config_gpu_aware_mpi) + !$acc update device(tend_u) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('small_step_prep') @@ -1368,7 +2348,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) do small_step = 1, number_sub_steps(rk_step) - call exchange_halo_group(domain, 'dynamics:rho_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rho_pp', config_gpu_aware_mpi) + !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_advance_acoustic_step') @@ -1390,8 +2375,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! rtheta_pp ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells - - call exchange_halo_group(domain, 'dynamics:rtheta_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rtheta_pp', config_gpu_aware_mpi) + !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -1411,7 +2400,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2] ! - call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'ru_p', ru_p) + call mpas_pool_get_array(diag, 'rw_p', rw_p) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', config_gpu_aware_mpi) + !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_recover_large_step_variables') @@ -1446,7 +2443,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'u', time_dyn_step, ru_driving_values) ! do this inline at present - it is simple enough - !$acc enter data copyin(u) !$acc parallel default(present) !$acc loop gang worker do iEdge = 1, nEdgesSolve @@ -1458,12 +2454,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if end do !$acc end parallel - !$acc exit data copyout(u) call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values) call mpas_pool_get_array(diag, 'ru', u) ! do this inline at present - it is simple enough - !$acc enter data copyin(u) !$acc parallel default(present) !$acc loop gang worker do iEdge = 1, nEdges @@ -1475,7 +2469,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if end do !$acc end parallel - !$acc exit data copyout(u) deallocate(ru_driving_values) @@ -1483,12 +2476,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !------------------------------------------------------------------- + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'u', u, 2) + !$acc update self(u) if (.not. config_gpu_aware_mpi) ! u if (config_apply_lbcs) then - call exchange_halo_group(domain, 'dynamics:u_123') + call exchange_halo_group(domain, 'dynamics:u_123', config_gpu_aware_mpi) else - call exchange_halo_group(domain, 'dynamics:u_3') + call exchange_halo_group(domain, 'dynamics:u_3', config_gpu_aware_mpi) end if + !$acc update device(u) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). ! PD or monotonicity constraints applied only on the final Runge-Kutta substep. @@ -1496,11 +2494,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1552,17 +2555,27 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_stop('atm_compute_solve_diagnostics') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'w', w, 2) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2] ! - call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) else ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2] ! - call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge') + call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', config_gpu_aware_mpi) end if + !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! set the zero-gradient condition on w for regional_MPAS @@ -1575,8 +2588,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end do !$OMP END PARALLEL DO + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! w halo values needs resetting after regional boundary update - call exchange_halo_group(domain, 'dynamics:w') + call mpas_pool_get_array(state, 'w', w, 2) + !$acc update self(w) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:w', config_gpu_aware_mpi) + !$acc update device(w) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if ! end of regional_MPAS addition @@ -1587,7 +2605,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for theta_m[1,2], pressure_p[1,2], and rtheta_p[1,2] ! - call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', config_gpu_aware_mpi) + !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! ! Note: A halo exchange for 'exner' here as well as after the call @@ -1624,6 +2649,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) deallocate(qtot) ! we are finished with these now + !$acc exit data delete(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) #ifndef MPAS_CAM_DYCORE call mpas_deallocate_scratch_field(tend_rtheta_physicsField) call mpas_deallocate_scratch_field(tend_rho_physicsField) @@ -1647,12 +2673,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! need to fill halo for horizontal filter - call exchange_halo_group(domain, 'dynamics:scalars') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1678,7 +2709,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !------------------------------------------------------------------------------------------------------------------------ if (rk_step < 3) then - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if end do RK3_SPLIT_TRANSPORT @@ -1700,7 +2736,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) uReconstructY, & uReconstructZ, & uReconstructZonal, & - uReconstructMeridional & + uReconstructMeridional, & + lACC = .true. & ) @@ -1710,16 +2747,24 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! #ifdef DO_PHYSICS + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc update self(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') if(config_convection_scheme == 'cu_grell_freitas' .or. & config_convection_scheme == 'cu_ntiedtke') then + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') call mpas_pool_get_array(tend_physics, 'rqvdynten', rqvdynten) call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + !$acc update self(theta_m) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc update self(rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo @@ -1744,8 +2789,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) where ( scalars_2(:,:,:) < 0.0) & scalars_2(:,:,:) = 0.0 + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + !$acc update device(scalars_2, rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') !call microphysics schemes: if (trim(config_microp_scheme) /= 'off') then + call pre_microphysics( block % configs, state, diag, 2) call mpas_timer_start('microphysics') !$OMP PARALLEL DO do thread=1,nThreads @@ -1754,6 +2803,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end do !$OMP END PARALLEL DO call mpas_timer_stop('microphysics') + call post_microphysics( block % configs, state, diag, tend, 2) end if ! @@ -1791,7 +2841,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary values for regional_MPAS scalar transport - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1844,7 +2899,7 @@ end subroutine atm_srk3 ! !----------------------------------------------------------------------- subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) implicit none @@ -1857,6 +2912,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono logical, intent(in) :: config_positive_definite integer, intent(in) :: config_time_integration_order logical, intent(in) :: config_split_dynamics_transport + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group ! Local variables @@ -1988,7 +3044,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono edgeThreadStart(thread), edgeThreadEnd(thread), & cellSolveThreadStart(thread), cellSolveThreadEnd(thread), & scalar_old_arr, scalar_new_arr, s_max_arr, s_min_arr, wdtn_arr, & - flux_array, flux_upwind_tmp_arr, flux_tmp_arr, & + flux_array, flux_upwind_tmp_arr, flux_tmp_arr, config_gpu_aware_mpi, & exchange_halo_group, & advance_density=config_split_dynamics_transport, rho_zz_int=rho_zz_int) end if @@ -2068,12 +3124,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_1, 1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') !$acc kernels theta_m_2(:,cellEnd+1) = 0.0_RKIND @@ -2121,12 +3171,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') end subroutine atm_rk_integration_setup @@ -2177,11 +3221,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_start = moist_start_ptr moist_end = moist_end_ptr - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc enter data create(cqw, cqu) & - !$acc copyin(scalars) - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker ! do iCell = cellSolveStart,cellSolveEnd @@ -2230,10 +3269,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc exit data copyout(cqw, cqu) & - !$acc delete(scalars) - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -2371,11 +3406,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, real (kind=RKIND) :: dtseps, c2, qtotal, rcv real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri - MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc enter data copyin(cqw, p, t, rb, rtb, rt, pb) - !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) - MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') + !$acc enter data create(b_tri, c_tri) ! set coefficients rcv = rgas/(cp-rgas) @@ -2461,11 +3492,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) - !$acc exit data delete(cqw, p, t, rb, rtb, rt, pb) - MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') + !$acc exit data delete(b_tri, c_tri) end subroutine atm_compute_vert_imp_coefs_work @@ -2569,9 +3596,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & integer :: iCell, iEdge, i, k real (kind=RKIND) :: flux - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc enter data copyin(u_tend, w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') ! we solve for omega instead of w (see Klemp et al MWR 2007), ! so here we change the w_p tendency to an omega_p tendency @@ -2604,10 +3628,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc exit data delete(u_tend) - !$acc exit data copyout(w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') end subroutine atm_set_smlstep_pert_variables_work @@ -2847,17 +3867,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart c2 = cp * rcv rdts = 1./dts - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) - !$acc enter data create(rtheta_pp_old) - if(small_step == 1) then - !$acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - else - !$acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - end if - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') if(small_step /= 1) then ! not needed on first small step @@ -3086,13 +4095,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart end do ! end of loop over cells !$acc end parallel - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) - !$acc exit data copyout(rtheta_pp_old,ru_p,ruAvg,rho_pp, & - !$acc rtheta_pp,wwAvg,rw_p) - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') end subroutine atm_advance_acoustic_step_work @@ -3144,9 +4146,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart nCellsSolve = nCellsSolve_ptr nVertLevels = nVertLevels_ptr - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old, theta_m) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -3179,10 +4178,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart end do ! end loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc exit data copyout(ru_p) & - !$acc delete(rtheta_pp, rtheta_pp_old, theta_m) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') end subroutine atm_divergence_damping_3d @@ -3373,17 +4368,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE integer :: i, iCell, iEdge, k, cell1, cell2 real (kind=RKIND) :: invNs, rcv, p0, flux - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p,wwAvg,ruAvg) & - !$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u) - if (rk_step == 3) then - !$acc enter data copyin(rt_diabatic_tend,exner_base) & - !$acc create(exner,pressure_p) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') rcv = rgas/(cp-rgas) p0 = 1.0e+05 ! this should come from somewhere else... @@ -3529,17 +4513,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p) & - !$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u,wwAvg,ruAvg) - if (rk_step == 3) then - !$acc exit data delete(rt_diabatic_tend,exner_base) & - !$acc copyout(exner,pressure_p) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') end subroutine atm_recover_large_step_variables_work @@ -3774,10 +4747,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & weight_time_old = 1. - weight_time_new - MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc enter data copyin(uhAvg, scalar_new) - MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - !$acc parallel async !$acc loop gang worker private(scalar_weight2, ica) do iEdge=edgeStart,edgeEnd @@ -3872,12 +4841,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & ! MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') -#ifndef DO_PHYSICS - !$acc enter data create(scalar_tend_save) -#else - !$acc enter data copyin(scalar_tend_save) -#endif - !$acc enter data copyin(scalar_old, fnm, fnp, rdnw, wwAvg, rho_zz_old, rho_zz_new) !$acc enter data create(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') @@ -3960,9 +4923,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc exit data copyout(scalar_new) - !$acc exit data delete(scalar_tend_column, uhAvg, wwAvg, scalar_old, fnm, fnp, & - !$acc rdnw, rho_zz_old, rho_zz_new, scalar_tend_save) + !$acc exit data delete(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') end subroutine atm_advance_scalars_work @@ -3983,7 +4944,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, cellStart, cellEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, & scalar_old, scalar_new, s_max, s_min, wdtn, flux_arr, & - flux_upwind_tmp, flux_tmp, exchange_halo_group, advance_density, rho_zz_int) + flux_upwind_tmp, flux_tmp, config_gpu_aware_mpi, exchange_halo_group, advance_density, rho_zz_int) implicit none @@ -4004,6 +4965,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, real (kind=RKIND), dimension(:,:), intent(inout) :: wdtn real (kind=RKIND), dimension(:,:), intent(inout) :: flux_arr real (kind=RKIND), dimension(:,:), intent(inout) :: flux_upwind_tmp, flux_tmp + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group logical, intent(in), optional :: advance_density real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int @@ -4082,7 +5044,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, & advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, & wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, & - bdyMaskCell, bdyMaskEdge, & + bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, & exchange_halo_group, advance_density, rho_zz_int) call mpas_deallocate_scratch_field(scale) @@ -4130,7 +5092,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, & advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, & wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, & - bdyMaskCell, bdyMaskEdge, & + bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, & exchange_halo_group, advance_density, rho_zz_int) use mpas_atm_dimensions, only : nVertLevels @@ -4146,6 +5108,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge real (kind=RKIND), intent(in) :: dt integer, intent(in) :: cellStart, cellEnd, edgeStart, edgeEnd integer, intent(in) :: cellSolveStart, cellSolveEnd + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group logical, intent(in), optional :: advance_density real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int @@ -4220,22 +5183,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! The transport will maintain this positive definite solution and optionally, shape preservation (monotonicity). - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc data present(nEdgesOnCell, edgesOnCell, edgesOnCell_sign, & - !$acc invAreaCell, cellsOnCell, cellsOnEdge, nAdvCellsForEdge, & - !$acc advCellsForEdge, adv_coefs, adv_coefs_3rd, dvEdge, bdyMaskCell) - -#ifdef DO_PHYSICS - !$acc enter data copyin(scalar_tend) -#else - !$acc enter data create(scalar_tend) -#endif - if (local_advance_density) then - !$acc enter data copyin(rho_zz_int) - end if - !$acc enter data copyin(scalars_old, rho_zz_old, rdnw, uhAvg, wwAvg) - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc parallel !$acc loop gang worker @@ -4258,19 +5205,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc exit data copyout(scalar_tend) - - !$acc update self(scalars_old) + !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER !$OMP MASTER - call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old') + call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', config_gpu_aware_mpi) !$OMP END MASTER !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scalars_old) + !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') ! @@ -4322,13 +5267,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - if (.not. local_advance_density) then - !$acc enter data copyin(rho_zz_new) - end if - !$acc enter data copyin(scalars_new, fnm, fnp) !$acc enter data create(scale_arr) - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') do iScalar = 1, num_scalars @@ -4673,17 +5612,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update self(scale_arr) + !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER !$OMP MASTER - call exchange_halo_group(block % domain, 'dynamics:scale') + call exchange_halo_group(block % domain, 'dynamics:scale', config_gpu_aware_mpi) !$OMP END MASTER !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scale_arr) + !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$acc parallel @@ -4831,18 +5770,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - if (local_advance_density) then - !$acc exit data copyout(rho_zz_int) - else - !$acc exit data delete(rho_zz_new) - end if - !$acc exit data copyout(scalars_new) - !$acc exit data delete(scalars_old, scale_arr, rho_zz_old, wwAvg, & - !$acc uhAvg, fnm, fnp, rdnw) - - !$acc end data - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') + !$acc exit data delete(scale_arr) end subroutine atm_advance_scalars_mono_work @@ -5373,57 +6301,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (perturbation_coriolis) then - !$acc enter data copyin(u_init, v_init) - end if if (les_model_opt /= LES_MODEL_NONE) then - !$acc enter data copyin(exner, pressure_b, bn2) + !$acc enter data copyin(bn2) end if !$acc enter data copyin(ustm, hfx, qfx) - if (rk_step == 1) then - !$acc enter data create(tend_w_euler) - !$acc enter data create(tend_u_euler) - !$acc enter data create(tend_theta_euler) - !$acc enter data create(tend_rho) - - !$acc enter data copyin(tend_rho_physics) - !$acc enter data copyin(rb, rr_save) - !$acc enter data copyin(divergence, vorticity) - !$acc enter data copyin(v) - else - !$acc enter data copyin(tend_w_euler) - !$acc enter data copyin(tend_u_euler) - !$acc enter data copyin(tend_theta_euler) - !$acc enter data copyin(tend_rho) - end if - !$acc enter data create(tend_u) - !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc enter data create(h_divergence) - !$acc enter data copyin(ru, rw) !$acc enter data create(rayleigh_damp_coef) - !$acc enter data copyin(tend_ru_physics) - !$acc enter data create(tend_w) - !$acc enter data copyin(rho_zz) - !$acc enter data create(tend_theta) - !$acc enter data copyin(theta_m) - !$acc enter data copyin(ru_save, theta_m_save) - !$acc enter data copyin(cqw) - !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data copyin(rw_save, rt_diabatic_tend) - !$acc enter data create(rthdynten) - !$acc enter data copyin(t_init) - if (les_model_opt /= LES_MODEL_NONE) then - !$acc enter data copyin(ur_cell, vr_cell) - else -#ifdef CURVATURE - !$acc enter data copyin(ur_cell, vr_cell) -#endif - end if !$acc enter data create(eddy_visc_horz) !$acc enter data create(eddy_visc_vert) !$acc enter data create(prandtl_3d_inv) - !$acc enter data copyin(scalars) - !$acc enter data copyin(tend_scalars) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -6058,58 +6943,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (perturbation_coriolis) then - !$acc exit data delete(u_init, v_init) - end if if (les_model_opt /= LES_MODEL_NONE) then - !$acc exit data delete(exner, pressure_b) !$acc exit data copyout(bn2) end if !$acc exit data delete(ustm, hfx, qfx) - if (rk_step == 1) then - !$acc exit data copyout(tend_w_euler) - !$acc exit data copyout(tend_u_euler) - !$acc exit data copyout(tend_theta_euler) - !$acc exit data copyout(tend_rho) - - !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(rb, rr_save) - !$acc exit data delete(divergence, vorticity) - !$acc exit data delete(v) - else - !$acc exit data delete(tend_w_euler) - !$acc exit data delete(tend_u_euler) - !$acc exit data delete(tend_theta_euler) - !$acc exit data delete(tend_rho) - end if - !$acc exit data copyout(tend_u) - !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc exit data copyout(h_divergence) - !$acc exit data delete(ru, rw) !$acc exit data delete(rayleigh_damp_coef) - !$acc exit data delete(tend_ru_physics) - !$acc exit data copyout(tend_w) - !$acc exit data delete(rho_zz) - !$acc exit data copyout(tend_theta) - !$acc exit data delete(theta_m) - !$acc exit data delete(ru_save, theta_m_save) - !$acc exit data delete(cqw) - !$acc exit data delete(tend_rtheta_physics) - !$acc exit data delete(rw_save, rt_diabatic_tend) - !$acc exit data copyout(rthdynten) - !$acc exit data delete(t_init) - if (les_model_opt /= LES_MODEL_NONE) then - !$acc exit data delete(ur_cell, vr_cell) - else -#ifdef CURVATURE - !$acc exit data delete(ur_cell, vr_cell) -#endif - end if !$acc exit data delete(eddy_visc_horz) !$acc exit data delete(eddy_visc_vert) !$acc exit data delete(prandtl_3d_inv) - !$acc exit data delete(scalars) - !$acc exit data copyout(tend_scalars) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work @@ -6278,26 +7119,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & logical :: reconstruct_v - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc fVertex, & - !$acc verticesOnEdge, & - !$acc invDvEdge,invDcEdge) - !$acc enter data copyin(u,h) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') ! ! Compute height on cell edges at velocity locations ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(h_edge,vorticity,divergence) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iEdge=edgeStart,edgeEnd @@ -6382,9 +7207,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Replace 2.0 with 2 in exponentiation to avoid outside chance that ! compiler will actually allow "float raised to float" operation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(ke) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -6479,14 +7301,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & if(rk_step /= 3) reconstruct_v = .false. end if - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then - !$acc enter data create(v) - else - !$acc enter data copyin(v) - end if - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then !$acc parallel default(present) !$acc loop gang @@ -6514,9 +7328,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Avoid dividing h_vertex by areaTriangle and move areaTriangle into ! numerator for the pv_vertex calculation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_vertex) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop collapse(2) do iVertex = vertexStart,vertexEnd @@ -6540,9 +7351,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute pv at the edges ! ( this computes pv_edge at all edges bounding real cells ) ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop collapse(2) do iEdge = edgeStart,edgeEnd @@ -6560,9 +7368,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ( this computes pv_cell for all real cells ) ! only needed for APVM upwinding ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_cell) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -6601,9 +7406,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Merged loops for calculating gradPVt, gradPVn and pv_edge ! Also precomputed inverses of dvEdge and dcEdge to avoid repeated divisions ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') r = config_apvm_upwinding * dt !$acc parallel default(present) !$acc loop gang @@ -6620,31 +7422,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(pv_cell,gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end if ! apvm upwinding - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc verticesOnEdge, & - !$acc fVertex,invDvEdge,invDcEdge) - !$acc exit data delete(u,h) - !$acc exit data copyout(h_edge,vorticity,divergence, & - !$acc ke, & - !$acc v, & - !$acc pv_vertex, & - !$acc pv_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') - end subroutine atm_compute_solve_diagnostics_work @@ -6733,17 +7514,13 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! copyin invariant fields - !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) ! copyin the data that is only on the right-hand side - !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc enter data copyin(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyin the data that will be modified in this routine - !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc enter data create(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -6867,17 +7644,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc end parallel MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! delete invariant fields - !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) - ! delete the data that is only on the right-hand side - !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc exit data delete(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyout the data that will be modified in this routine - !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc exit data copyout(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -6944,13 +7716,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & - !$acc w_1, rho_zz_1) & - !$acc copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split) - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - ! Interim fix for the atm_compute_dyn_tend_work subroutine accessing uninitialized values ! in garbage cells of theta_m !$acc kernels @@ -7055,13 +7820,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su !$acc end parallel end if - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, & - !$acc w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, & - !$acc wwAvg_split) & - !$acc delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, rho_zz_old_split) - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') end subroutine atm_rk_dynamics_substep_finish @@ -7116,9 +7874,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, integer :: iCell, k - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc enter data copyin(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -7134,9 +7889,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc exit data copyout(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') end subroutine atm_zero_gradient_w_bdy_work @@ -7177,11 +7929,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_ru,tend_rho,tend_rt,tend_rw, & - !$acc rt_diabatic_tend) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd @@ -7208,11 +7955,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end if end do !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_ru,tend_rho,tend_rt, & - !$acc tend_rw,rt_diabatic_tend) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_speczone_tend @@ -7298,10 +8040,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me divdamp_coef = divdamp_coef_ptr vertexDegree = vertexDegree_ptr - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, tend_ru, ru) !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') ! First, Rayleigh damping terms for ru, rtheta_m and rho_zz !$acc parallel default(present) @@ -7446,11 +8185,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_rho, tend_rt, tend_ru) - !$acc exit data delete(rho_zz, theta_m, ru, & - !$acc divergence1, divergence2, vorticity1, vorticity2) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') + !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2) end subroutine atm_bdy_adjust_dynamics_relaxzone_tend @@ -7484,10 +8219,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & call mpas_pool_get_array(state, 'theta_m', theta_m, 2) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) - - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc enter data copyin(rtheta_base, theta_m, rtheta_p) - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -7502,11 +8233,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc exit data copyout(theta_m, rtheta_p) & - !$acc delete(rtheta_base) - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') - end subroutine atm_bdy_reset_speczone_values !------------------------------------------------------------------------- @@ -7595,10 +8321,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, integer :: iCell, iEdge, iScalar, i, k, cell1, cell2 !--- - MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc enter data create(scalars_tmp) & - !$acc copyin(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc enter data create(scalars_tmp) !$acc parallel default(present) !$acc loop gang worker @@ -7680,10 +8403,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc exit data delete(scalars_tmp) & - !$acc copyout(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc exit data delete(scalars_tmp) end subroutine atm_bdy_adjust_scalars_work @@ -7753,10 +8473,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !--- - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc enter data copyin(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells @@ -7777,10 +8493,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & end do ! updates now in temp storage !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc exit data copyout(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') end subroutine atm_bdy_set_scalars_work @@ -7850,16 +8562,6 @@ subroutine summarize_timestep(domain) nVertLevels = nVertLevels_ptr num_scalars = num_scalars_ptr - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - if (config_print_detailed_minmax_vel) then - !$acc enter data copyin(w,u,v) - else if (config_print_global_minmax_vel) then - !$acc enter data copyin(w,u) - end if - if (config_print_global_minmax_sca) then - !$acc enter data copyin(scalars) - end if - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') if (config_print_detailed_minmax_vel) then call mpas_log_write('') @@ -8218,17 +8920,6 @@ subroutine summarize_timestep(domain) end if - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - if (config_print_detailed_minmax_vel) then - !$acc exit data delete(w,u,v) - else if (config_print_global_minmax_vel) then - !$acc exit data delete(w,u) - end if - if (config_print_global_minmax_sca) then - !$acc exit data delete(scalars) - end if - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') - end subroutine summarize_timestep end module atm_time_integration diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index 35c4034815..7a79527910 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -30,7 +30,8 @@ function atm_core_init(domain, startTimeStamp) result(ierr) use mpas_atm_dimensions, only : mpas_atm_set_dims use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup use mpas_atm_threading, only : mpas_atm_threading_init - use atm_time_integration, only : mpas_atm_dynamics_init + use atm_time_integration, only : mpas_atm_dynamics_init, & + mpas_atm_pre_dynamics, mpas_atm_post_dynamics use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_attlist, only : mpas_modify_att use mpas_string_utils, only : mpas_string_replace @@ -507,6 +508,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadStart', edgeSolveThreadStart) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadEnd', edgeSolveThreadEnd) + call mpas_atm_pre_compute_solve_diagnostics(block) !$OMP PARALLEL DO do thread=1,nThreads if (.not. config_do_restart .or. (config_do_restart .and. config_do_DAcycling)) then @@ -525,6 +527,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) edgeThreadStart(thread), edgeThreadEnd(thread)) end do !$OMP END PARALLEL DO + call mpas_atm_post_compute_solve_diagnostics(block) deallocate(ke_vertex) deallocate(ke_edge) @@ -538,13 +541,18 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + call mpas_reconstruct_2d_h2d(mesh, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional) call mpas_reconstruct(mesh, u, & uReconstructX, & uReconstructY, & uReconstructZ, & uReconstructZonal, & - uReconstructMeridional & + uReconstructMeridional, & + lACC = .true. & ) + call mpas_reconstruct_2d_d2h(mesh, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional) #ifdef DO_PHYSICS !proceed with initialization of physics parameterization if moist_physics is set to true: diff --git a/src/core_atmosphere/mpas_atm_halos.F b/src/core_atmosphere/mpas_atm_halos.F index 983c529673..d1e0478688 100644 --- a/src/core_atmosphere/mpas_atm_halos.F +++ b/src/core_atmosphere/mpas_atm_halos.F @@ -48,18 +48,23 @@ subroutine atm_build_halo_groups(domain, ierr) ! Local variables character(len=StrKIND), pointer :: config_halo_exch_method + logical, pointer :: config_gpu_aware_mpi ! ! Determine from the namelist option config_halo_exch_method which halo exchange method to employ ! call mpas_pool_get_config(domain % blocklist % configs, 'config_halo_exch_method', config_halo_exch_method) + call mpas_pool_get_config(domain % blocklist % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi) if (trim(config_halo_exch_method) == 'mpas_dmpar') then call mpas_log_write('') call mpas_log_write('*** Using ''mpas_dmpar'' routines for exchanging halos') call mpas_log_write('') + if (config_gpu_aware_mpi) then + call mpas_log_write('GPU-aware MPI is not presently supported with config_halo_exch_method = mpas_dmpar',MPAS_LOG_CRIT) + end if ! ! Set up halo exchange groups used during atmosphere core initialization ! diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F index 67d744bd78..680594e71b 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_interface.F +++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F @@ -6,17 +6,29 @@ ! distributed with this code, or at http://mpas-dev.github.com/license.html ! !================================================================================================================= + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atmphys_interface use mpas_kind_types use mpas_pool_routines use mpas_atmphys_constants use mpas_atmphys_vars + use mpas_timer implicit none private public:: allocate_forall_physics, & deallocate_forall_physics, & + pre_microphysics, & + post_microphysics, & MPAS_to_physics, & microphysics_from_MPAS, & microphysics_to_MPAS @@ -546,6 +558,40 @@ subroutine MPAS_to_physics(configs,mesh,state,time_lev,diag,diag_physics,its,ite end subroutine MPAS_to_physics +!================================================================================================================= + subroutine pre_microphysics(configs,state,diag,time_lev) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + + integer:: time_lev + +!local pointers: + real(kind=RKIND),dimension(:,:),pointer :: exner,pressure_b,w + real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + + + MPAS_ACC_TIMER_START('pre_microphysics [ACC_data_xfer]') + call mpas_pool_get_array(diag,'exner' ,exner ) + call mpas_pool_get_array(diag,'pressure_base',pressure_b) + call mpas_pool_get_array(diag,'pressure_p' ,pressure_p) + + call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) + call mpas_pool_get_array(state,'theta_m',theta_m,time_lev) + call mpas_pool_get_array(state,'w' ,w ,time_lev) + !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w) + + call mpas_pool_get_array(state,'scalars',scalars,time_lev) + !$acc update host(scalars) + + MPAS_ACC_TIMER_STOP('pre_microphysics [ACC_data_xfer]') + +end subroutine pre_microphysics + !================================================================================================================= subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,tend_physics,its,ite) !================================================================================================================= @@ -600,7 +646,7 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics, call mpas_pool_get_dimension(state,'index_qv',index_qv) call mpas_pool_get_dimension(state,'index_qc',index_qc) call mpas_pool_get_dimension(state,'index_qr',index_qr) - call mpas_pool_get_array(state,'scalars',scalars,time_lev) + call mpas_pool_get_array(state,'scalars',scalars,time_lev) qv => scalars(index_qv,:,:) qc => scalars(index_qc,:,:) qr => scalars(index_qr,:,:) @@ -1089,6 +1135,48 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te end subroutine microphysics_to_MPAS + !================================================================================================================= + subroutine post_microphysics(configs,state,diag,tend,time_lev) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(inout):: tend + + + integer:: time_lev + +!local pointers: + real(kind=RKIND),dimension(:,:),pointer :: exner,exner_b,pressure_b,rtheta_p,rtheta_b + real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + real(kind=RKIND),dimension(:,:),pointer :: rt_diabatic_tend + + call mpas_pool_get_array(diag,'exner' ,exner ) + call mpas_pool_get_array(diag,'exner_base' ,exner_b ) + call mpas_pool_get_array(diag,'pressure_base',pressure_b) + call mpas_pool_get_array(diag,'pressure_p' ,pressure_p) + call mpas_pool_get_array(diag,'rtheta_base' ,rtheta_b ) + call mpas_pool_get_array(diag,'rtheta_p' ,rtheta_p ) + + call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) + call mpas_pool_get_array(state,'theta_m',theta_m,time_lev) + + call mpas_pool_get_array(state,'scalars',scalars,time_lev) + + call mpas_pool_get_array(tend,'rt_diabatic_tend',rt_diabatic_tend) + + + MPAS_ACC_TIMER_START('post_microphysics [ACC_data_xfer]') + !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b) + !$acc update device(rtheta_p, rho_zz, theta_m, scalars) + !$acc update device(rt_diabatic_tend) + MPAS_ACC_TIMER_STOP('post_microphysics [ACC_data_xfer]') + +end subroutine post_microphysics + !================================================================================================================= end module mpas_atmphys_interface !================================================================================================================= diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F index cebf566cc4..d80fd09e5b 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F +++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F @@ -6,6 +6,15 @@ ! distributed with this code, or at http://mpas-dev.github.com/license.html ! !================================================================================================================= + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atmphys_todynamics use mpas_kind_types use mpas_pool_routines @@ -13,10 +22,11 @@ module mpas_atmphys_todynamics use mpas_atm_dimensions use mpas_atmphys_constants, only: R_d,R_v,degrad + use mpas_timer implicit none private - public:: physics_get_tend + public:: physics_get_tend, pre_physics_get_tend, post_physics_get_tend !Interface between the physics parameterizations and the non-hydrostatic dynamical core. @@ -47,6 +57,40 @@ module mpas_atmphys_todynamics contains + +!================================================================================================================= + subroutine pre_physics_get_tend(configs,state,diag,tend) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(in):: tend + +!local variables: + real(kind=RKIND),dimension(:,:),pointer:: mass ! time level 2 rho_zz + real(kind=RKIND),dimension(:,:),pointer:: mass_edge ! diag rho_edge + real(kind=RKIND),dimension(:,:),pointer:: theta_m ! time level 1 + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + + real(kind=RKIND),dimension(:,:),pointer:: tend_u_phys + real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(state,'theta_m' ,theta_m,1) + call mpas_pool_get_array(state,'scalars' ,scalars,1) + call mpas_pool_get_array(state,'rho_zz' ,mass,2 ) + call mpas_pool_get_array(diag ,'rho_edge',mass_edge) + call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys) + + !$acc update self(theta_m, scalars, mass, mass_edge) + + call mpas_pool_get_array(tend,'scalars_tend',tend_scalars) + !$acc update self(tend_scalars) ! Probably not needed + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine pre_physics_get_tend !================================================================================================================= subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_step,dynamics_substep, & @@ -232,6 +276,26 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s end subroutine physics_get_tend + !================================================================================================================= + subroutine post_physics_get_tend(configs,state,diag,tend) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(in):: tend + +!local variables: + real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(tend,'scalars_tend',tend_scalars) + !$acc update device(tend_scalars) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine post_physics_get_tend + !================================================================================================================= subroutine physics_get_tend_work( & block,mesh,nCells,nEdges,nCellsSolve,nEdgesSolve,rk_step,dynamics_substep, & diff --git a/src/framework/Makefile b/src/framework/Makefile index 0e5f792b5e..0f57454310 100644 --- a/src/framework/Makefile +++ b/src/framework/Makefile @@ -112,7 +112,7 @@ mpas_c_interfacing.o: xml_stream_parser.o: xml_stream_parser.c $(CC) $(CFLAGS) $(CPPFLAGS) $(CPPINCLUDES) -I../external/ezxml -c xml_stream_parser.c -mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o +mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o mpas_timer.o mpas_ptscotch_interface.o : mpas_derived_types.o mpas_log.o diff --git a/src/framework/mpas_dmpar.F b/src/framework/mpas_dmpar.F index 42ed757d01..2b33bcec47 100644 --- a/src/framework/mpas_dmpar.F +++ b/src/framework/mpas_dmpar.F @@ -7448,19 +7448,28 @@ end subroutine mpas_dmpar_exch_group_end_halo_exch!}}} !> exchange is complete. ! !----------------------------------------------------------------------- - subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, iErr)!{{{ + subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)!{{{ type (domain_type), intent(inout) :: domain character (len=*), intent(in) :: groupName + logical, optional, intent(in) :: withGPUAwareMPI integer, optional, intent(out) :: iErr type (mpas_exchange_group), pointer :: exchGroupPtr integer :: nLen + logical :: useGPUAwareMPI if ( present(iErr) ) then iErr = MPAS_DMPAR_NOERR end if + useGPUAwareMPI = .false. + if (present(withGPUAwareMPI)) then + if (withGPUAwareMPI) then + call mpas_log_write(' GPU-aware MPI not implemented in this module', MPAS_LOG_CRIT) + end if + end if + nLen = len_trim(groupName) DMPAR_DEBUG_WRITE(' -- Trying to perform a full exchange for group ' // trim(groupName)) diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F index 4ab8817c23..8877d6330d 100644 --- a/src/framework/mpas_halo.F +++ b/src/framework/mpas_halo.F @@ -280,6 +280,30 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr) call refactor_lists(domain, groupName, iErr) + ! Logic to return early if there are no neighbors to send to + if ( newGroup% nGroupSendNeighbors <=0 ) then + return + end if + + + ! Always copy in the main data member first + !$acc enter data copyin(newGroup) + ! Then the data in the members of the type + !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:)) + ! !$acc enter data copyin(newGroup % sendBuf(:)) + !$acc enter data copyin(newGroup % fields(:)) + do i = 1, newGroup % nFields + !$acc enter data copyin(newGroup % fields(i)) + !$acc enter data copyin(newGroup % fields(i) % nSendLists(:,:)) + !$acc enter data copyin(newGroup % fields(i) % packOffsets(:)) + !$acc enter data copyin(newGroup % fields(i) % sendListSrc(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % sendListDst(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % nRecvLists(:,:)) + !$acc enter data copyin(newGroup % fields(i) % unpackOffsets(:)) + !$acc enter data copyin(newGroup % fields(i) % recvListSrc(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % recvListDst(:,:,:)) + end do + end subroutine mpas_halo_exch_group_complete @@ -349,15 +373,26 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr) deallocate(cursor % fields(i) % compactHaloInfo) deallocate(cursor % fields(i) % compactSendLists) deallocate(cursor % fields(i) % compactRecvLists) + !$acc exit data delete(cursor % fields(i) % nSendLists(:,:)) deallocate(cursor % fields(i) % nSendLists) + !$acc exit data delete(cursor % fields(i) % sendListSrc(:,:,:)) deallocate(cursor % fields(i) % sendListSrc) + !$acc exit data delete(cursor % fields(i) % sendListDst(:,:,:)) deallocate(cursor % fields(i) % sendListDst) + !$acc exit data delete(cursor % fields(i) % packOffsets(:)) deallocate(cursor % fields(i) % packOffsets) + !$acc exit data delete(cursor % fields(i) % nRecvLists(:,:)) deallocate(cursor % fields(i) % nRecvLists) + !$acc exit data delete(cursor % fields(i) % recvListSrc(:,:,:)) deallocate(cursor % fields(i) % recvListSrc) + !$acc exit data delete(cursor % fields(i) % recvListDst(:,:,:)) deallocate(cursor % fields(i) % recvListDst) + !$acc exit data delete(cursor % fields(i) % unpackOffsets(:)) deallocate(cursor % fields(i) % unpackOffsets) + !$acc exit data delete(cursor % fields(i)) end do + ! Use finalize here in-case the copyins in ..._complete increment the reference counter + !$acc exit data finalize delete(cursor % fields(:)) deallocate(cursor % fields) deallocate(cursor % groupPackOffsets) deallocate(cursor % groupSendNeighbors) @@ -368,10 +403,14 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr) deallocate(cursor % groupToFieldRecvIdx) deallocate(cursor % groupRecvOffsets) deallocate(cursor % groupRecvCounts) + !$acc exit data delete(cursor % sendBuf(:)) deallocate(cursor % sendBuf) + !$acc exit data delete(cursor % recvBuf(:)) deallocate(cursor % recvBuf) deallocate(cursor % sendRequests) deallocate(cursor % recvRequests) + ! Finalize here as well, just in-case + !$acc exit data finalize delete(cursor) deallocate(cursor) end subroutine mpas_halo_exch_group_destroy @@ -495,7 +534,7 @@ end subroutine mpas_halo_exch_group_add_field !> exchange group. ! !----------------------------------------------------------------------- - subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) + subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr) #ifdef MPAS_USE_MPI_F08 use mpi_f08, only : MPI_Datatype, MPI_Comm @@ -508,6 +547,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT use mpas_pool_routines, only : mpas_pool_get_array use mpas_log, only : mpas_log_write + use mpas_timer, only : mpas_timer_start, mpas_timer_stop ! Parameters #ifdef MPAS_USE_MPI_F08 @@ -527,6 +567,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! Arguments type (domain_type), intent(inout) :: domain character (len=*), intent(in) :: groupName + logical, optional, intent(in) :: withGPUAwareMPI integer, optional, intent(out) :: iErr ! Local variables @@ -542,6 +583,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) integer :: comm #endif integer :: mpi_ierr + logical:: useGPUAwareMPI type (mpas_halo_group), pointer :: group integer, dimension(:), pointer :: compactHaloInfo integer, dimension(:), pointer :: compactSendLists @@ -554,12 +596,17 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) integer :: maxNRecvList integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets - + if (present(iErr)) then iErr = 0 end if + useGPUAwareMPI = .false. + if (present(withGPUAwareMPI)) then + useGPUAwareMPI = withGPUAwareMPI + end if + ! ! Find this halo exhange group in the list of groups ! @@ -577,6 +624,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) messageType=MPAS_LOG_CRIT) end if + ! Logic to return early if there are no neighbors to send to + if ( group% nGroupSendNeighbors <= 0 ) then + return + end if + + call mpas_timer_start('full_halo_exch') ! ! Get the rank of this task and the MPI communicator to use from the first field in ! the group; all fields should be using the same communicator, so this should not @@ -589,6 +642,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) #endif rank = group % fields(1) % compactHaloInfo(8) + !$acc data present(group % recvBuf(:), group % sendBuf(:)) if(useGPUAwareMPI) ! ! Initiate non-blocking MPI receives for all neighbors @@ -598,9 +652,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) bufstart = group % groupRecvOffsets(i) bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1 !TO DO: how do we determine appropriate type here? + !$acc host_data use_device(group % recvBuf) if(useGPUAwareMPI) call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, & group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, & group % recvRequests(i), mpi_ierr) + !$acc end host_data else group % recvRequests(i) = MPI_REQUEST_NULL end if @@ -638,12 +694,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) case (1) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel) - + ! ! Pack send buffer for all neighbors for current field ! + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI) + !$acc loop gang collapse(2) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos + !$acc loop vector do j = 1, maxNSendList if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)) = & @@ -652,20 +712,31 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do - + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') + ! ! Packing code for 2-d real-valued fields ! case (2) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & group % fields(i) % r2arr, timeLevel=group % fields(i) % timeLevel) - + ! ! Pack send buffer for all neighbors for current field - ! + ! + ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of + ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action' + ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:)) + + + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI) + !$acc loop gang collapse(3) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos do j = 1, maxNSendList + !$acc loop vector do i1 = 1, dim1 if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1) = & @@ -675,21 +746,27 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do - + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') + ! ! Packing code for 3-d real-valued fields ! case (3) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & - group % fields(i) % r3arr, group % fields(i) % timeLevel) - + group % fields(i) % r3arr, group % fields(i) % timeLevel) + ! ! Pack send buffer for all neighbors for current field ! + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI) + !$acc loop gang collapse(4) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos do j = 1, maxNSendList do i2 = 1, dim2 + !$acc loop vector do i1 = 1, dim1 if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) & @@ -701,6 +778,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') end select end if @@ -714,9 +793,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) bufstart = group % groupSendOffsets(i) bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1 !TO DO: how do we determine appropriate type here? + !$acc host_data use_device(group % sendBuf) if(useGPUAwareMPI) call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, & group % groupSendNeighbors(i), rank, comm, & group % sendRequests(i), mpi_ierr) + !$acc end host_data else group % sendRequests(i) = MPI_REQUEST_NULL end if @@ -771,7 +852,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI) + !$acc loop gang do iHalo = 1, nHalos + !$acc loop vector do j = 1, maxNRecvList if (j <= nRecvLists(iHalo,iEndp)) then group % fields(i) % r1arr(recvListDst(j,iHalo,iEndp)) = & @@ -779,6 +864,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end if end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') ! ! Unpacking code for 2-d real-valued fields @@ -787,8 +874,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI) + !$acc loop gang do iHalo = 1, nHalos + !$acc loop worker do j = 1, maxNRecvList + !$acc loop vector do i1 = 1, dim1 if (j <= nRecvLists(iHalo,iEndp)) then group % fields(i) % r2arr(i1, recvListDst(j,iHalo,iEndp)) = & @@ -797,6 +889,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') ! ! Unpacking code for 3-d real-valued fields @@ -805,8 +899,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI) + !$acc loop gang collapse(2) do iHalo = 1, nHalos do j = 1, maxNRecvList + !$acc loop vector collapse(2) do i2 = 1, dim2 do i1 = 1, dim1 if (j <= nRecvLists(iHalo,iEndp)) then @@ -818,12 +916,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') end select end if end do end do - + ! For the present(group % recvBuf(:), group % sendBuf(:)) + !$acc end data + ! ! Nullify array pointers - not necessary for correctness, but helpful when debugging ! to not leave pointers to what might later be incorrect targets @@ -843,6 +945,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! call MPI_Waitall(group % nGroupSendNeighbors, group % sendRequests, MPI_STATUSES_IGNORE, mpi_ierr) + call mpas_timer_stop('full_halo_exch') + end subroutine mpas_halo_exch_group_full_halo_exch diff --git a/src/framework/mpas_halo_interface.inc b/src/framework/mpas_halo_interface.inc index 8f0934fbb0..b1dd9a9c99 100644 --- a/src/framework/mpas_halo_interface.inc +++ b/src/framework/mpas_halo_interface.inc @@ -3,12 +3,13 @@ ! in a named group ! abstract interface - subroutine halo_exchange_routine(domain, halo_group, ierr) + subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr) use mpas_derived_types, only : domain_type type (domain_type), intent(inout) :: domain character(len=*), intent(in) :: halo_group + logical, intent(in), optional :: withGPUAwareMPI integer, intent(out), optional :: ierr end subroutine halo_exchange_routine diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 605da9cd6d..88d87474ab 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -202,7 +202,8 @@ end subroutine mpas_init_reconstruct!}}} !> Input: grid meta data and vector component data residing at cell edges !> Output: reconstructed vector field (measured in X,Y,Z) located at cell centers !----------------------------------------------------------------------- - subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional, includeHalos, lACC)!{{{ implicit none @@ -214,9 +215,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + logical, optional, intent(in) :: lACC !< Input: Optional logical that controls execution on the GPU with OpenACC ! temporary arrays needed in the compute procedure logical :: includeHalosLocal + logical :: lACCLocal integer, pointer :: nCells_ptr, nVertLevels_ptr integer :: nCells, nVertLevels integer, dimension(:,:), pointer :: edgesOnCell @@ -236,6 +239,12 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon includeHalosLocal = .false. end if + if ( present(lACC) ) then + lACCLocal = lACC + else + lACCLocal = .false. + end if + ! stored arrays used during compute procedure call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) @@ -258,19 +267,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere) - MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') - ! Only use sections needed, nCells may be all cells or only non-halo cells - !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & - !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) - !$acc enter data copyin(u(:,:)) - !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & - !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & - !$acc uReconstructMeridional(:,1:nCells)) - MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') - ! loop over cell centers !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang do iCell = 1, nCells ! initialize the reconstructed vectors @@ -305,7 +304,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon if (on_a_sphere) then !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang do iCell = 1, nCells clat = cos(latCell(iCell)) @@ -325,7 +324,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do else !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang vector collapse(2) do iCell = 1, nCells do k = 1, nVertLevels @@ -337,6 +336,109 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do end if + end subroutine mpas_reconstruct_2d!}}} + + + subroutine mpas_reconstruct_2d_h2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + + implicit none + + type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information + real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers + logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + + logical :: includeHalosLocal + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nEdgesOnCell + integer :: nCells + integer, pointer :: nCells_ptr + real(kind=RKIND), dimension(:), pointer :: latCell, lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + + if ( present(includeHalos) ) then + includeHalosLocal = includeHalos + else + includeHalosLocal = .false. + end if + + ! stored arrays used during compute procedure + call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) + + ! temporary variables + call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell) + call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) + call mpas_pool_get_array(meshPool, 'latCell', latCell) + call mpas_pool_get_array(meshPool, 'lonCell', lonCell) + + if ( includeHalosLocal ) then + call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr) + else + call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr) + end if + nCells = nCells_ptr + + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') + ! Only use sections needed, nCells may be all cells or only non-halo cells + !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc enter data copyin(u(:,:)) + !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & + !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & + !$acc uReconstructMeridional(:,1:nCells)) + MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') + + end subroutine mpas_reconstruct_2d_h2d + + + + subroutine mpas_reconstruct_2d_d2h(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + + implicit none + + type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information + real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers + logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + + logical :: includeHalosLocal + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nEdgesOnCell + integer :: nCells + integer, pointer :: nCells_ptr + real(kind=RKIND), dimension(:), pointer :: latCell, lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + + if ( present(includeHalos) ) then + includeHalosLocal = includeHalos + else + includeHalosLocal = .false. + end if + + ! stored arrays used during compute procedure + call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) + + ! temporary variables + call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell) + call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) + call mpas_pool_get_array(meshPool, 'latCell', latCell) + call mpas_pool_get_array(meshPool, 'lonCell', lonCell) + + if ( includeHalosLocal ) then + call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr) + else + call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr) + end if + nCells = nCells_ptr + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) @@ -346,7 +448,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$acc uReconstructMeridional(:,1:nCells)) MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') - end subroutine mpas_reconstruct_2d!}}} + end subroutine mpas_reconstruct_2d_d2h !***********************************************************************