Evaluate the variation of the Energy = Force... F_j=-(D_U W(U))_j = -DW(u_h)*testfunc_j NOTE: set callEvalaux TRUE if not called before for the same dofs_in !!
!CALL par_AllReduce(F_MHD3D%X1,'SUM') !<< possible alternative !CALL par_Reduce(F_MHD3D%X1(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<<<< possible alternative !CALL par_AllReduce(F_MHD3D%X2,'SUM') !<< possible alternative !CALL par_Reduce(F_MHD3D%X2(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<< possible alternative !CALL par_AllReduce(F_MHD3D%LA,'SUM') !<< possible alternative !CALL par_Reduce(F_MHD3D%LA(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<< possible alternative
| Type | Intent | Optional | Attributes | Name | ||
|---|---|---|---|---|---|---|
| class(t_sol_var_MHD3D), | intent(in) | :: | dofs_in |
input solution |
||
| logical, | intent(in) | :: | callEvalAux |
set True if evalAux was not called on dofs_in |
||
| integer, | intent(inout) | :: | JacCheck |
if 1 on input: abort if detJ<0. if 2 on input, no abort, unchanged if detJ>0 ,return -1 if detJ<=0 |
||
| class(t_sol_var_MHD3D), | intent(inout) | :: | F_MHD3D |
variation of the energy projected onto the basis functions of dofs_in |
||
| logical, | intent(in), | optional | :: | noBC |
SUBROUTINE EvalForce(dofs_in,callEvalAux,JacCheck,F_MHD3D,noBC) ! MODULES USE MODgvec_Globals, ONLY : nRanks USE MODgvec_MPI, ONLY : par_IReduce,par_IBcast,par_Wait,req1,req2,req3,par_Barrier,par_BCast USE MODgvec_MHD3D_Vars, ONLY : X1_base,X2_base,LA_base,hmap,mu_0,PrecondType USE MODgvec_MHD3D_Vars, ONLY : X1_BC_type,X2_BC_type,LA_BC_type USE MODgvec_sol_var_MHD3D, ONLY : t_sol_var_MHD3D IMPLICIT NONE !----------------------------------------------------------------------------------------------------------------------------------- ! INPUT VARIABLES CLASS(t_sol_var_MHD3D), INTENT(IN ) :: dofs_in !! input solution LOGICAL , INTENT(IN ) :: callEvalAux !! set True if evalAux was not called on dofs_in INTEGER , INTENT(INOUT) :: JacCheck !! if 1 on input: abort if detJ<0. !! if 2 on input, no abort, unchanged if detJ>0 ,return -1 if detJ<=0 LOGICAL, OPTIONAL , INTENT(IN) :: noBC !----------------------------------------------------------------------------------------------------------------------------------- ! OUTPUT VARIABLES CLASS(t_sol_var_MHD3D), INTENT(INOUT) :: F_MHD3D !! variation of the energy projected onto the basis functions of dofs_in !----------------------------------------------------------------------------------------------------------------------------------- ! LOCAL VARIABLES INTEGER :: ibase,nBase,iMode,modes,iGP,i_mn,Deg,iElem,modes_str,modes_end,iRank,offset_modes(0:nRanks) REAL(wp) :: w_GP_IP,p_mu_0 REAL(wp) :: F_X1_GP_IP(nGP_str:nGP_end,1:X1_base%f%modes) REAL(wp) :: F_X1ds_GP_IP(nGP_str:nGP_end,1:X1_base%f%modes) REAL(wp) :: F_X2_GP_IP(nGP_str:nGP_end,1:X2_base%f%modes) REAL(wp) :: F_X2ds_GP_IP(nGP_str:nGP_end,1:X2_base%f%modes) REAL(wp) :: F_LA_GP_IP(nGP_str:nGP_end,1:LA_base%f%modes) REAL(wp) :: dW(1:mn_IP,nGP_str:nGP_end) != p+1/2*B^2=p(s)+|Phi'(s)|^2 (b^alpha *g_{alpha,beta} *b^beta)/(2 *detJ^2) REAL(wp),DIMENSION(1:mn_IP,nGP_str:nGP_end) :: btt_sJ,btz_sJ,bzz_sJ, & != b^theta*b^theta/detJ, b^theta*b^zeta/detJ,b^zeta*b^zeta/detJ coefY,coefY_thet,coefY_zeta,coefY_s !=================================================================================================================================== ! SWRITE(UNIT_stdOut,'(4X,A)',ADVANCE='NO')'COMPUTE FORCE...' #if MPIDEBUG==1 WRITE(UNIT_stdOut,'(4X,A,I4)')'COMPUTE FORCE...',myRank CALL par_Barrier(beforeScreenOut="DEBUG ENTER FORCE") #endif __PERFON('EvalForce') IF(callEvalAux) THEN CALL EvalAux(dofs_in,JacCheck) END IF IF(JacCheck.LE.-1) THEN CALL abort(__STAMP__, & 'bad Jacobian was found when you call EvalAux before!!!', IntInfo=JacCheck ) END IF __PERFON('buildPrecond') IF(PrecondType.GT.0) CALL BuildPrecond() __PERFOFF('buildPrecond') !additional auxiliary variables for X1 and X2 force __PERFON('loop_prepare') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iGP,i_mn,p_mu_0) DO iGP=nGP_str,nGP_end p_mu_0=mu_0*pres_GP(iGP) DO i_mn=1,mn_IP dW( i_mn,iGP)= 0.5_wp*bbcov_sJ(i_mn,iGP) *sdetJ(i_mn,iGP) + p_mu_0 !=1/(2)*B^2+p btt_sJ(i_mn,iGP)= 0.5_wp*b_thet( i_mn,iGP)*b_thet(i_mn,iGP)*sdetJ(i_mn,iGP) btz_sJ(i_mn,iGP)= 0.5_wp*b_thet( i_mn,iGP)*b_zeta(i_mn,iGP)*sdetJ(i_mn,iGP) bzz_sJ(i_mn,iGP)= 0.5_wp*b_zeta( i_mn,iGP)*b_zeta(i_mn,iGP)*sdetJ(i_mn,iGP) END DO !i_mn END DO !iGP !$OMP END PARALLEL DO __PERFOFF('loop_prepare') nBase = X1_Base%s%nBase modes = X1_Base%f%modes modes_str = X1_Base%f%modes_str modes_end = X1_Base%f%modes_end offset_modes = X1_Base%f%offset_modes deg = X1_base%s%deg __PERFON('EvalForce_modes1') __PERFON('loop_prep_coefs') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(NONE) & !$OMP PRIVATE(iGP,i_mn) & !$OMP SHARED(nGP_str,nGP_end,mn_IP,dW,J_h,J_p,dX2_dthet,dX2_ds,btt_sJ,bzz_sJ,btz_sJ, & !$OMP coefY,coefY_thet,coefY_zeta,coefY_s, & !$OMP Jh_dq1,g_t1,gtt_dq1,g_z1,gzz_dq1,gtz_dq1) DO iGP=nGP_str,nGP_end DO i_mn=1,mn_IP ! ADDED TO F_X1_GP_IP(iGP,iMode): ! -dW( i_mn,iGP)*J_h(i_mn,iGP)*dX2_ds( i_mn,iGP)*Y1_thet & ![deltaJ]_Y1 ! +dW( i_mn,iGP)*J_p(i_mn,iGP)*Jh_dq1(i_mn,iGP)*Y1 & ![deltaJ]_Y1 ! -btt_sJ(i_mn,iGP)*2.0_wp*g_t1( i_mn,iGP)*Y1_thet & ![delta g_tt]_Y1 ! -btt_sJ(i_mn,iGP)* gtt_dq1( i_mn,iGP)*Y1 & ![delta g_tt]_Y1 ! -bzz_sJ(i_mn,iGP)*2.0_wp*g_z1( i_mn,iGP)*Y1_zeta & ![delta g_zz]_Y1 ! -bzz_sJ(i_mn,iGP)* gzz_dq1( i_mn,iGP)*Y1 & ![delta g_zz]_Y1 ! -btz_sJ(i_mn,iGP)*2.0_wp*g_t1( i_mn,iGP)*Y1_zeta & !2*[delta g_tz]_y1 ! -btz_sJ(i_mn,iGP)*2.0_wp*g_z1( i_mn,iGP)*Y1_thet & !2*[delta g_tz]_y1 ! -btz_sJ(i_mn,iGP)*2.0_wp*gtz_dq1( i_mn,iGP)*Y1 & !2*[delta g_tz]_y1 coefY (i_mn,iGP)=( dW( i_mn,iGP)*J_p(i_mn,iGP)*Jh_dq1(i_mn,iGP) & ![deltaJ]_Y1 -btt_sJ(i_mn,iGP)* gtt_dq1(i_mn,iGP) & ![delta g_tt]_Y1 -bzz_sJ(i_mn,iGP)* gzz_dq1(i_mn,iGP) & ![delta g_zz]_Y1 -btz_sJ(i_mn,iGP)*2.0_wp*gtz_dq1(i_mn,iGP) ) !2*[delta g_tz]_y1 coefY_thet(i_mn,iGP)=(-dW(i_mn,iGP)*J_h(i_mn,iGP)*dX2_ds(i_mn,iGP) & ![deltaJ]_Y1 +2.0_wp*(-btt_sJ(i_mn,iGP)*g_t1(i_mn,iGP) & ![delta g_tt]_Y1 -btz_sJ(i_mn,iGP)*g_z1(i_mn,iGP) ) ) !2*[delta g_tz]_y1 coefY_zeta(i_mn,iGP)=( 2.0_wp*(-bzz_sJ(i_mn,iGP)*g_z1(i_mn,iGP) & ![delta g_zz]_Y1 -btz_sJ(i_mn,iGP)*g_t1(i_mn,iGP) ) ) !2*[delta g_tz]_y1 coefY_s (i_mn,iGP)=(dW(i_mn,iGP)*J_h(i_mn,iGP)*dX2_dthet( i_mn,iGP)) END DO !i_mn END DO !iGP !$OMP END PARALLEL DO __PERFOFF('loop_prep_coefs') __PERFON('fbase') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iGP,w_GP_IP) DO iGP=nGP_str,nGP_end w_GP_IP=w_GP(iGP)*dthet_dzeta CALL X1_base%f%projectIPtoDOF(.FALSE.,w_GP_IP, 0,coefY( :,iGP),F_X1_GP_IP( iGP,:)) CALL X1_base%f%projectIPtoDOF(.TRUE. ,w_GP_IP,DERIV_THET,coefY_thet(:,iGP),F_X1_GP_IP( iGP,:))!d/dthet CALL X1_base%f%projectIPtoDOF(.TRUE. ,w_GP_IP,DERIV_ZETA,coefY_zeta(:,iGP),F_X1_GP_IP( iGP,:))!d/dzeta CALL X1_base%f%projectIPtoDOF(.FALSE.,w_GP_IP, 0,coefY_s( :,iGP),F_X1ds_GP_IP(iGP,:)) END DO !iGP !$OMP END PARALLEL DO __PERFOFF('fbase') __PERFON('sbase') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iMode,iElem,iGP,iBase) DO iMode=1,modes F_MHD3D%X1(:,iMode)=0.0_wp DO iElem=nElems_str,nElems_end iGP=(iElem-1)*(degGP+1)+1 iBase=X1_base%s%base_offset(iElem) F_MHD3D%X1(iBase:iBase+deg,iMode) = F_MHD3D%X1(iBase:iBase+deg,iMode) & + MATMUL(F_X1_GP_IP( iGP:iGP+degGP,iMode),X1_base%s%base_GP( 0:degGP,0:deg,iElem)) & + MATMUL(F_X1ds_GP_IP(iGP:iGP+degGP,iMode),X1_base%s%base_ds_GP(0:degGP,0:deg,iElem)) END DO !iElem END DO !iMode !$OMP END PARALLEL DO __PERFOFF('sbase') #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG BEFORE FIRST REDUCE") #endif !. add up all the pieces of X1 calculated by the different MPI tasks __PERFON('reduce_solution_X1') !!!CALL par_AllReduce(F_MHD3D%X1,'SUM') !<< possible alternative DO iRank=0,nRanks-1 !<<<< IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & !!!CALL par_Reduce(F_MHD3D%X1(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<<<< possible alternative CALL par_IReduce(F_MHD3D%X1(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank,req1(iRank)) !<<<< I-reduce different mode ranges to different ranks END DO __PERFOFF('reduce_solution_X1') __PERFOFF('EvalForce_modes1') __PERFON('EvalForce_modes2') __PERFON('loop_prep_coefs') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(NONE) & !$OMP PRIVATE(iGP,i_mn) & !$OMP SHARED(nGP_str,nGP_end,mn_IP,dW,J_h,J_p,dX1_dthet,dX1_ds,btt_sJ,bzz_sJ,btz_sJ, & !$OMP coefY,coefY_thet,coefY_zeta,coefY_s, & !$OMP Jh_dq2,g_t2,gtt_dq2,g_z2,gzz_dq2,gtz_dq2) DO iGP=nGP_str,nGP_end DO i_mn=1,mn_IP ! ADDED TO F_X2_GP_IP(iGP,iMode): ! +dW( i_mn,iGP)*J_h(i_mn,iGP)*dX1_ds(i_mn,iGP)*Y2_thet & ! [deltaJ]_Y2 ! +dW( i_mn,iGP)*J_p(i_mn,iGP)*Jh_dq2 *Y2 & ! [deltaJ]_Y2 ! -btt_sJ(i_mn,iGP)*2.0_wp*g_t2 *Y2_thet & ! [delta g_tt]_Y2 ! -btt_sJ(i_mn,iGP)* gtt_dq2 *Y2 & ! [delta g_tt]_Y2 ! -bzz_sJ(i_mn,iGP)*2.0_wp*g_z2 *Y2_zeta & ! [delta g_zz]_Y2 ! -bzz_sJ(i_mn,iGP)* gzz_dq2 *Y2 & ! [delta g_zz]_Y2 ! -btz_sJ(i_mn,iGP)*2.0_wp*g_t2 *Y2_zeta & ! 2*[delta g_tz]_Y1 ! -btz_sJ(i_mn,iGP)*2.0_wp*g_z2 *Y2_thet & ! 2*[delta g_tz]_Y1 ! -btz_sJ(i_mn,iGP)*2.0_wp*gtz_dq2 *Y2 & ! 2*[delta g_tz]_Y1 coefY (i_mn,iGP)=( dW(i_mn,iGP)*J_p(i_mn,iGP)*Jh_dq2(i_mn,iGP) & ! [deltaJ]_Y2 -btt_sJ(i_mn,iGP)* gtt_dq2(i_mn,iGP) & ! [delta g_tt]_Y2 -bzz_sJ(i_mn,iGP)* gzz_dq2(i_mn,iGP) & ! [delta g_zz]_Y2 -btz_sJ(i_mn,iGP)*2.0_wp*gtz_dq2(i_mn,iGP) ) ! 2*[delta g_tz]_Y1 coefY_thet(i_mn,iGP)=( dW(i_mn,iGP)*J_h(i_mn,iGP)*dX1_ds(i_mn,iGP) & ! [deltaJ]_Y2 +2.0_wp*(-btt_sJ(i_mn,iGP)*g_t2(i_mn,iGP) & ! [delta g_tt]_Y2 -btz_sJ(i_mn,iGP)*g_z2(i_mn,iGP) ) ) ! 2*[delta g_tz]_Y1 coefY_zeta(i_mn,iGP)=( 2.0_wp*(-bzz_sJ(i_mn,iGP)*g_z2(i_mn,iGP) & ! [delta g_zz]_Y2 -btz_sJ(i_mn,iGP)*g_t2(i_mn,iGP) ) ) ! 2*[delta g_tz]_Y1 coefY_s (i_mn,iGP)=(-dW(i_mn,iGP)*J_h(i_mn,iGP)*dX1_dthet( i_mn,iGP)) END DO !i_mn END DO !iGP !$OMP END PARALLEL DO __PERFOFF('loop_prep_coefs') nBase = X2_base%s%nBase modes = X2_base%f%modes modes_str = X2_base%f%modes_str modes_end = X2_base%f%modes_end offset_modes = X2_Base%f%offset_modes deg = X2_base%s%deg __PERFON('fbase') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iGP,w_GP_IP) DO iGP=nGP_str,nGP_end w_GP_IP = w_GP(iGP)*dthet_dzeta CALL X2_base%f%projectIPtoDOF(.FALSE.,w_GP_IP, 0,coefY( :,iGP),F_X2_GP_IP( iGP,:)) CALL X2_base%f%projectIPtoDOF(.TRUE. ,w_GP_IP,DERIV_THET,coefY_thet(:,iGP),F_X2_GP_IP( iGP,:))!d/dthet CALL X2_base%f%projectIPtoDOF(.TRUE. ,w_GP_IP,DERIV_ZETA,coefY_zeta(:,iGP),F_X2_GP_IP( iGP,:))!d/dzeta CALL X2_base%f%projectIPtoDOF(.FALSE.,w_GP_IP, 0,coefY_s( :,iGP),F_X2ds_GP_IP(iGP,:)) END DO !iGP !$OMP END PARALLEL DO __PERFOFF('fbase') __PERFON('sbase') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iMode,iElem,iGP,iBase) DO iMode=1,modes F_MHD3D%X2(:,iMode)=0.0_wp DO iElem=nElems_str,nElems_end iGP=(iElem-1)*(degGP+1)+1 ibase=X2_base%s%base_offset(iElem) F_MHD3D%X2(iBase:iBase+deg,iMode) = F_MHD3D%X2(iBase:iBase+deg,iMode) & + MATMUL(F_X2_GP_IP( iGP:iGP+degGP,iMode),X2_base%s%base_GP( 0:degGP,0:deg,iElem)) & + MATMUL(F_X2ds_GP_IP(iGP:iGP+degGP,iMode),X2_base%s%base_ds_GP(0:degGP,0:deg,iElem)) END DO !iElem END DO !iMode !$OMP END PARALLEL DO __PERFOFF('sbase') #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG BEFORE X2 REDUCE") #endif __PERFON('reduce_solution_X2') !. add up all the pieces of X2 calculated by the different MPI tasks !!!CALL par_AllReduce(F_MHD3D%X2,'SUM') !<< possible alternative DO iRank=0,nRanks-1 IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & !!!CALL par_Reduce(F_MHD3D%X2(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<< possible alternative CALL par_IReduce(F_MHD3D%X2(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank,req2(iRank)) !<<<< I-reduce different mode ranges to different ranks END DO __PERFOFF('reduce_solution_X2') __PERFOFF('EvalForce_modes2') __PERFON('EvalForce_modes3') nBase = LA_base%s%nBase modes = LA_base%f%modes modes_str = LA_base%f%modes_str modes_end = LA_base%f%modes_end offset_modes = LA_Base%f%offset_modes deg = LA_base%s%deg __PERFON('fbase') ! coefY_zeta(i_mn,iGP)= w_GP_IP*sJ_bcov_thet(i_mn,iGP) ! coefY_thet(i_mn,iGP)=-w_GP_IP*sJ_bcov_zeta(i_mn,iGP) !$OMP PARALLEL DO SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iGP,w_GP_IP) DO iGP=nGP_str,nGP_end w_GP_IP=PhiPrime_GP(iGP)*w_GP(iGP)*dthet_dzeta CALL LA_base%f%projectIPtoDOF(.FALSE., w_GP_IP,DERIV_ZETA, sJ_bcov_thet(:,iGP),F_LA_GP_IP(iGP,:)) !d/dzeta CALL LA_base%f%projectIPtoDOF(.TRUE. ,-w_GP_IP,DERIV_THET, sJ_bcov_zeta(:,iGP),F_LA_GP_IP(iGP,:)) !d/dthet END DO !iGP !$OMP END PARALLEL DO __PERFOFF('fbase') __PERFON('sbase') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) PRIVATE(iMode,iElem,iGP,iBase) DO iMode=1,modes F_MHD3D%LA(:,iMode)=0.0_wp DO iElem=nElems_str,nElems_end iGP=(iElem-1)*(degGP+1)+1 ibase=LA_base%s%base_offset(iElem) F_MHD3D%LA(iBase:iBase+deg,iMode) = F_MHD3D%LA(iBase:iBase+deg,iMode) & + MATMUL(F_LA_GP_IP(iGP:iGP+degGP,iMode),LA_base%s%base_GP(0:degGP,0:deg,iElem)) END DO !iElem END DO !iMode !$OMP END PARALLEL DO __PERFOFF('sbase') #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG BEFORE LA REDUCE") #endif __PERFON('reduce_solution_LA') !. Add up all the pieces of LA calculated by the different MPI tasks !!!CALL par_AllReduce(F_MHD3D%LA,'SUM') !<< possible alternative DO iRank=0,nRanks-1 IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & !!!CALL par_Reduce(F_MHD3D%LA(:,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank) !<< possible alternative CALL par_IReduce(F_MHD3D%LA(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),'SUM',iRank,req3(iRank)) !<<<< I-reduce different mode ranges to different ranks END DO __PERFOFF('reduce_solution_LA') __PERFOFF('EvalForce_modes3') __PERFON('EvalForce_modes1_finalize') nBase = X1_base%s%nbase modes = X1_base%f%modes modes_str = X1_base%f%modes_str modes_end = X1_base%f%modes_end offset_modes = X1_Base%f%offset_modes __PERFON('reduce_solution_X1') CALL par_Wait(req1(0:nRanks-1)) #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG AFTER FINISH REDUCE") #endif __PERFOFF('reduce_solution_X1') __PERFON('apply_precond') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(NONE) SHARED(modes_str,modes_end,F_MHD3D,X1_BC_type,X1_base) PRIVATE(iMode) DO iMode=modes_str,modes_end CALL X1_base%s%applyBCtoRHS(F_MHD3D%X1(:,iMode),X1_BC_type(:,iMode)) END DO !iMode !$OMP END PARALLEL DO IF(PrecondType.GT.0)THEN SELECT TYPE(precond_X1); TYPE IS(sll_t_spline_matrix_banded) !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end !<<<< CALL ApplyPrecond(nBase,precond_X1(iMode),F_MHD3D%X1(:,iMode)) END DO !iMode !$OMP END PARALLEL DO END SELECT !TYPE(precond_X1) END IF !PrecondType.GT.0 __PERFOFF('apply_precond') IF(PrecondType.LE.0)THEN !apply strong BC to X1 if no Precond CALL ApplyBC_Fstrong(1,F_MHD3D) END IF __PERFON('Bcast_solution_X1') DO iRank=0,nRanks-1 IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & CALL par_Bcast(F_MHD3D%X1(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank) !<<<< broadcast different mode ranges to different ranks !CALL par_IBcast(F_MHD3D%X1(:,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank,req1(iRank)) !<<<< broadcast different mode ranges to different ranks END DO __PERFOFF('Bcast_solution_X1') __PERFOFF('EvalForce_modes1_finalize') __PERFON('EvalForce_modes2_finalize') nBase = X2_base%s%nBase modes = X2_base%f%modes modes_str = X2_base%f%modes_str modes_end = X2_base%f%modes_end offset_modes = X2_Base%f%offset_modes __PERFON('reduce_solution_X2') CALL par_Wait(req2(0:nRanks-1)) #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG AFTER FINISH REDUCE X2") #endif __PERFOFF('reduce_solution_X2') __PERFON('apply_precond') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end CALL X2_base%s%applyBCtoRHS(F_MHD3D%X2(:,iMode),X2_BC_type(:,iMode)) END DO !iMode !$OMP END PARALLEL DO IF(PrecondType.GT.0)THEN SELECT TYPE(precond_X2); TYPE IS(sll_t_spline_matrix_banded) !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end CALL ApplyPrecond(nBase,precond_X2(iMode),F_MHD3D%X2(:,iMode)) END DO !iMode !$OMP END PARALLEL DO END SELECT !TYPE(precond_X2) END IF !PrecondType.GT.0 __PERFOFF('apply_precond') IF(PrecondType.LE.0)THEN !apply strong BC to X2 if no Precond CALL ApplyBC_Fstrong(2,F_MHD3D) END IF __PERFON('Bcast_solution_X2') DO iRank=0,nRanks-1 IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & CALL par_Bcast(F_MHD3D%X2(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank) !<<<< reduce different mode ranges to different ranks !CALL par_IBcast(F_MHD3D%X2(:,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank,req2(iRank)) !<<<< reduce different mode ranges to different ranks END DO __PERFOFF('Bcast_solution_X2') __PERFOFF('EvalForce_modes2_finalize') __PERFON('EvalForce_modes3_finalize') nBase = LA_base%s%nBase modes = LA_base%f%modes modes_str = LA_base%f%modes_str modes_end = LA_base%f%modes_end offset_modes = LA_Base%f%offset_modes __PERFON('reduce_solution_LA') CALL par_Wait(req3(0:nRanks-1)) #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG AFTER FINISH REDUCE LA") #endif __PERFOFF('reduce_solution_LA') __PERFON('apply_precond') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end CALL LA_base%s%applyBCtoRHS(F_MHD3D%LA(:,iMode),LA_BC_type(:,iMode)) END DO !iMode !$OMP END PARALLEL DO IF(PrecondType.GT.0)THEN SELECT TYPE(precond_LA); TYPE IS(sll_t_spline_matrix_banded) !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end CALL ApplyPrecond(nBase,precond_LA(iMode),F_MHD3D%LA(:,iMode)) END DO !iMode !$OMP END PARALLEL DO END SELECT !TYPE(precond_LA) ELSE !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end CALL LA_base%s%mass%solve_inplace(1,F_MHD3D%LA(:,iMode)) END DO !iMode !$OMP END PARALLEL DO END IF !PrecondType.GT.0 __PERFOFF('apply_precond') IF(PrecondType.LE.0)THEN !apply strong BC to LA if no Precond CALL ApplyBC_Fstrong(3,F_MHD3D) END IF #if MPIDEBUG==1 WRITE(UNIT_stdout,*)'DEBUG',myRank, offset_modes(myRank),offset_modes(myRank+1) #endif __PERFON('Bcast_solution_LA') DO iRank=0,nRanks-1 IF(offset_modes(iRank+1)-offset_modes(iRank).GT.0) & ! CALL par_Bcast(F_MHD3D%LA(:,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank) !<< possible alternative CALL par_IBcast(F_MHD3D%LA(1:nBase,offset_modes(iRank)+1:offset_modes(iRank+1)),iRank,req3(iRank)) !<<<< reduce different mode ranges to different ranks END DO ! CALL par_Wait(req1(0:nRanks-1)) ! CALL par_Wait(req2(0:nRanks-1)) #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG BEFORE FINISH LA BCAST") #endif CALL par_Wait(req3(0:nRanks-1)) #if MPIDEBUG==1 CALL par_Barrier(beforeScreenOut="DEBUG AFTER FINISH BCASTS") #endif __PERFOFF('Bcast_solution_LA') __PERFOFF('EvalForce_modes3_finalize') IF(PRESENT(noBC))THEN IF(noBC)THEN __PERFOFF('EvalForce') RETURN !DEBUG END IF END IF __PERFOFF('EvalForce') ! SWRITE(UNIT_stdOut,'(A,3E21.11)')'... DONE: Norm of force |X1|,|X2|,|LA|: ',SQRT(F_MHD3D%norm_2()) END SUBROUTINE EvalForce