Build preconditioner matrices for X1,X2,LA and factorize, for all modes
the matrix is only radially dependent, and has the form
K_ij = int(s,0,1) d/ds sbase_i(s) (s) + |Phi'(s)|^2 (-m^2
NOTE: Jac_dq3, gij_dq3 are not yet used, but are non-zero for a general hmap!
SUBROUTINE BuildPrecond() ! MODULES USE MODgvec_MPI, ONLY : par_AllReduce USE MODgvec_MHD3D_Vars, ONLY : X1_base,X2_base,LA_base USE MODgvec_MHD3D_Vars, ONLY : X1_BC_Type,X2_BC_Type,LA_BC_type IMPLICIT NONE !----------------------------------------------------------------------------------------------------------------------------------- ! INPUT VARIABLES !----------------------------------------------------------------------------------------------------------------------------------- ! OUTPUT VARIABLES !----------------------------------------------------------------------------------------------------------------------------------- ! LOCAL VARIABLES INTEGER :: ibase,nBase,iMode,modes_str,modes_end,iGP,Deg,iElem,i,j INTEGER :: nD,tBC REAL(wp) :: smn_IP,smn_IP_w_GP,norm_mn ! REAL(wp),DIMENSION(nGP_str:nGP_end) :: DX1_tt, DX1_tz, DX1_zz, DX1, DX1_ss ! REAL(wp),DIMENSION(nGP_str:nGP_end) :: DX2_tt, DX2_tz, DX2_zz, DX2, DX2_ss ! REAL(wp),DIMENSION(nGP_str:nGP_end) :: DLA_tt, DLA_tz, DLA_zz REAL(wp),ALLOCATABLE :: D_mn(:),P_BCaxis(:,:), P_BCedge(:,:) !only needed on MPIroot !=================================================================================================================================== ! WRITE(*,*)'BUILD PRECONDITIONER MATRICES' __PERFON('loop_1') !WHEN COMM CHANGED TO GATHER, ZEROING NOT NEEDED ANYMORE D_buf=0.0_wp smn_IP=1.0_wp/REAL(mn_IP,wp) !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) DEFAULT(SHARED) & !$OMP PRIVATE(iGP,smn_IP_w_GP) loop_nGP: DO iGP=nGP_str,nGP_end !<<<< !dont forget to average !additional variables smn_IP_w_GP=smn_IP*w_GP(iGP) !include gauss weight here! !averaged quantities !X1 DX1_ss(iGP) =smn_IP_w_GP*SUM(bbcov_sJ(:,iGP)*( sJ_p(:,iGP)*dX2_dthet(:,iGP) )**2 ) DX1( iGP) =smn_IP_w_GP*SUM((sJ_h(:,iGP)*Jh_dq1(:,iGP))*(bbcov_sJ(:,iGP)*(sJ_h(:,iGP)*Jh_dq1(:,iGP)) & -sdetJ(:,iGP)*( b_thet(:,iGP)*( b_thet(:,iGP)*gtt_dq1(:,iGP) & +2.0_wp*b_zeta(:,iGP)*gtz_dq1(:,iGP) & ) & +b_zeta(:,iGP)* b_zeta(:,iGP)*gzz_dq1(:,iGP) & ) & ) & ) DX1_tt(iGP) =smn_IP_w_GP*SUM( bbcov_sJ(:,iGP)*( sJ_p(:,iGP)*dX2_ds( :,iGP) )**2 & +b_thet(:,iGP)*sdetJ(:,iGP)*( (2.0_wp*(sJ_p(:,iGP)*dX2_ds( :,iGP) ) & *( b_thet(:,iGP)*g_t1(:,iGP) & +b_zeta(:,iGP)*g_z1(:,iGP) & ) & ) & +b_thet(:,iGP)*Gh11(:,iGP) & ) & ) DX1_tz(iGP) =smn_IP_w_GP*SUM(b_zeta(:,iGP)*sdetJ(:,iGP)*( (2.0_wp*(sJ_p(:,iGP)*dX2_ds( :,iGP)) & *( b_thet(:,iGP)*g_t1(:,iGP) & +b_zeta(:,iGP)*g_z1(:,iGP) & )& ) & +b_thet(:,iGP)*2.0_wp*Gh11(:,iGP) & ) & ) DX1_zz(iGP) =smn_IP_w_GP*SUM(b_zeta(:,iGP)*b_zeta(:,iGP)*sdetJ(:,iGP)*Gh11(:,iGP)) !X2 DX2_ss(iGP) =smn_IP_w_GP*SUM(bbcov_sJ(:,iGP)*( sJ_p(:,iGP)*dX1_dthet(:,iGP) )**2 ) DX2( iGP) =smn_IP_w_GP*SUM((sJ_h(:,iGP)*Jh_dq2(:,iGP))*(bbcov_sJ(:,iGP)*( sJ_h(:,iGP)*Jh_dq2(:,iGP)) & -sdetJ(:,iGP)*( b_thet(:,iGP)*( b_thet(:,iGP)*gtt_dq2(:,iGP) & +2.0_wp*b_zeta(:,iGP)*gtz_dq2(:,iGP) & ) & +b_zeta(:,iGP)* b_zeta(:,iGP)*gzz_dq2(:,iGP) & ) & )& ) DX2_tt(iGP) =smn_IP_w_GP*SUM( bbcov_sJ(:,iGP)*( sJ_p(:,iGP)*dX1_ds( :,iGP) )**2 & +b_thet(:,iGP)*sdetJ(:,iGP)*(-(2.0_wp*(sJ_p(:,iGP)*dX1_ds( :,iGP) ) & *( b_thet(:,iGP)*g_t2(:,iGP) & +b_zeta(:,iGP)*g_z2(:,iGP) & ) & ) & +b_thet(:,iGP)*Gh22(:,iGP) & ) & ) DX2_tz(iGP) =smn_IP_w_GP*SUM(b_zeta(:,iGP)*sdetJ(:,iGP)*(-(2.0_wp*(sJ_p(:,iGP)*dX1_ds( :,iGP) ) & *( b_thet(:,iGP)*g_t2(:,iGP) & +b_zeta(:,iGP)*g_z2(:,iGP) & ) & ) & +b_thet(:,iGP)*2.0_wp*Gh22(:,iGP) & ) & ) DX2_zz(iGP) =smn_IP_w_GP*SUM(b_zeta(:,iGP)*b_zeta(:,iGP)*sdetJ(:,iGP)*Gh22(:,iGP)) !LA DLA_tt(iGP) = smn_IP_w_GP*phiPrime2_GP(iGP)*SUM(g_zz(:,iGP)*sdetJ(:,iGP)) DLA_tz(iGP) = -2.0_wp*smn_IP_w_GP*phiPrime2_GP(iGP)*SUM(g_tz(:,iGP)*sdetJ(:,iGP)) DLA_zz(iGP) = smn_IP_w_GP*phiPrime2_GP(iGP)*SUM(g_tt(:,iGP)*sdetJ(:,iGP)) END DO loop_nGP !iGP !$OMP END PARALLEL DO __PERFOFF('loop_1') __PERFON('par_Reduce_D_buf') !gather all D** (already stored contiguously in D_buf) !THIS SHOULD BE A ALLGATHERV of (nGP_str:nGP_end,:)<=>(1:nGP,:) , NOT A ALLREDUCE (BUT CHEAP ANYWAYS)! CALL par_AllReduce(D_buf,'SUM') __PERFOFF('par_Reduce_D_buf') ALLOCATE(D_mn(1:nGP)) SELECT TYPE(precond_X1); TYPE IS(sll_t_spline_matrix_banded) nBase = X1_Base%s%nBase modes_str = X1_Base%f%modes_str modes_end = X1_Base%f%modes_end deg = X1_base%s%deg ALLOCATE(P_BCaxis(1:deg+1,1:2*deg+1),P_BCedge(nBase-deg:nBase,nBase-2*deg:nBase)) !CHECK =0 IF(MPIroot)THEN IF(SUM(ABS(DX1_ss(1:nGP))).LT.REAL(nGP,wp)*1.0E-10) & WRITE(UNIT_stdout,*)'WARNING: very small DX1_ss: m,n,SUM(|DX1_ss|)= ',SUM(ABS(DX1_ss(1:nGP))) END IF __PERFON('modes_loop_1') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) & !$OMP PRIVATE(iMode,iGP,D_mn,iElem,i,j,iBase,tBC,nD,norm_mn) & !$OMP FIRSTPRIVATE(P_BCaxis,P_BCedge) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end !<<< norm_mn=1.0_wp/X1_base%f%snorm_base(iMode) CALL precond_X1(iMode)%reset() !set all values to zero D_mn(1:nGP)= (DX1(1:nGP)+(X1_Base%f%Xmn(1,iMode)**2) *DX1_tt(1:nGP) & -(X1_Base%f%Xmn(1,iMode)*X1_Base%f%Xmn(2,iMode)) *DX1_tz(1:nGP) & !correct sign of theta,zeta derivative! + (X1_Base%f%Xmn(2,iMode)**2) *DX1_zz(1:nGP) ) iGP=1 DO iElem=1,nElems iBase=X1_base%s%base_offset(iElem) DO i=0,deg DO j=0,deg CALL precond_X1(iMode)%add_element(iBase+i,iBase+j, & (SUM( X1_base%s%base_ds_GP(0:degGP,i,iElem) & *DX1_ss(iGP:iGP+degGP) & *X1_base%s%base_ds_GP(0:degGP,j,iElem) & + X1_base%s%base_GP(0:degGP,i,iElem) & *D_mn(iGP:iGP+degGP) & *X1_base%s%base_GP(0:degGP,j,iElem) & )*norm_mn) ) END DO !j=0,deg END DO !i=0,deg iGP=iGP+(degGP+1) END DO !iElem=1,nElems !ACCOUNT FOR BOUNDARY CONDITIONS! P_BCaxis=0.0_wp; P_BCedge=0.0_wp tBC = X1_BC_Type(BC_AXIS,iMode) nD = X1_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save 1:deg rows DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) P_BCaxis(i,j)=precond_X1(iMode)%get_element(i,j) END DO; END DO !j,i P_BCaxis(:,1:MIN(2*deg+1,nBase)) =MATMUL(X1_base%s%R_axis(:,:,tBC),P_BCaxis(:,1:MIN(2*deg+1,nBase))) !also sets rows 1:nD =0 P_BCaxis(1:nD,1:deg+1) =X1_base%s%A_axis(1:nD,:,tBC) DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) CALL Precond_X1(iMode)%set_element( i,j,P_BCaxis(i,j)) END DO; END DO !j,i END IF !nDOF_BCaxis>0 tBC = X1_BC_Type(BC_EDGE,iMode) nD = X1_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save nBase-deg:nBase rows DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase P_BCedge(i,j)=precond_X1(iMode)%get_element(i,j) END DO; END DO !j,i P_BCedge(:,MAX(1,nBase-2*deg):nBase) =MATMUL(X1_base%s%R_edge(:,:,tBC),P_BCedge(:,MAX(1,nBase-2*deg):nBase)) !also sets rows nBase-nD+1:nBase =0 P_BCedge(nBase-nD+1:nBase,nBase-deg:nBase)=X1_base%s%A_edge(nBase-nD+1:nBase,:,tBC) DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase CALL Precond_X1(iMode)%set_element( i,j,P_BCedge(i,j) ) END DO; END DO !j,i END IF !nDOF_BCedge>0 END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_1') DEALLOCATE(P_BCaxis,P_BCedge) END SELECT !TYPE X1 SELECT TYPE(precond_X2); TYPE IS(sll_t_spline_matrix_banded) nBase = X2_Base%s%nBase modes_str = X2_Base%f%modes_str modes_end = X2_Base%f%modes_end deg = X2_base%s%deg ALLOCATE(P_BCaxis(1:deg+1,1:2*deg+1),P_BCedge(nBase-deg:nBase,nBase-2*deg:nBase)) IF(MPIroot)THEN !CHECK =0 IF(SUM(ABS(DX2_ss(1:nGP))).LT.REAL(nGP,wp)*1.0E-10) & WRITE(UNIT_stdout,*)'WARNING: very small DX2_ss: m,n,SUM(|DX2_ss|)= ',SUM(ABS(DX2_ss(1:nGP))) END IF __PERFON('modes_loop_2') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) & !$OMP PRIVATE(iMode,iGP,D_mn,iElem,i,j,iBase,tBC,nD,norm_mn) & !$OMP FIRSTPRIVATE(P_BCaxis,P_BCedge) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end !<<< norm_mn=1.0_wp/X2_base%f%snorm_base(iMode) CALL precond_X2(iMode)%reset() !set all values to zero D_mn(1:nGP)= (DX2(1:nGP)+(X2_Base%f%Xmn(1,iMode)**2) *DX2_tt(1:nGP) & -(X2_Base%f%Xmn(1,iMode)*X2_Base%f%Xmn(2,iMode)) *DX2_tz(1:nGP) & !correct sign of theta,zeta derivative! + (X2_Base%f%Xmn(2,iMode)**2) *DX2_zz(1:nGP) ) iGP=1 DO iElem=1,nElems iBase=X2_base%s%base_offset(iElem) DO i=0,deg DO j=0,deg CALL precond_X2(iMode)%add_element(iBase+i,iBase+j, & (SUM( X2_base%s%base_ds_GP(0:degGP,i,iElem) & *DX2_ss(iGP:iGP+degGP) & *X2_base%s%base_ds_GP(0:degGP,j,iElem) & + X2_base%s%base_GP(0:degGP,i,iElem) & *D_mn(iGP:iGP+degGP) & *X2_base%s%base_GP(0:degGP,j,iElem) & )*norm_mn) ) END DO !j=0,deg END DO !i=0,deg iGP=iGP+(degGP+1) END DO !iElem=1,nElems !ACCOUNT FOR BOUNDARY CONDITIONS! P_BCaxis=0.0_wp; P_BCedge=0.0_wp tBC = X2_BC_Type(BC_AXIS,iMode) nD = X2_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save 1:deg rows DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) P_BCaxis(i,j)=precond_X2(iMode)%get_element(i,j) END DO; END DO !j,i P_BCaxis(:,1:MIN(2*deg+1,nBase))=MATMUL(X2_base%s%R_axis(:,:,tBC),P_BCaxis(:,1:MIN(2*deg+1,nBase))) !also sets rows 1:nD =0 P_BCaxis(1:nD,1:deg+1) =X2_base%s%A_axis(1:nD,:,tBC) DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) CALL Precond_X2(iMode)%set_element( i,j,P_BCaxis(i,j)) END DO; END DO !j,i END IF !nDOF_BCaxis>0 tBC = X2_BC_Type(BC_EDGE,iMode) nD = X2_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save nBase-deg:nBase rows DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase P_BCedge(i,j)=precond_X2(iMode)%get_element(i,j) END DO; END DO !j,i P_BCedge(:,MAX(1,nBase-2*deg):nBase) =MATMUL(X2_base%s%R_edge(:,:,tBC),P_BCedge(:,MAX(1,nBase-2*deg):nBase)) !also sets rows nBase-nD+1:nBase =0 P_BCedge(nBase-nD+1:nBase,nBase-deg:nBase)=X2_base%s%A_edge(nBase-nD+1:nBase,:,tBC) DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase CALL Precond_X2(iMode)%set_element( i,j,P_BCedge(i,j) ) END DO; END DO !j,i END IF !nDOF_BCedge>0 END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_2') DEALLOCATE(P_BCaxis,P_BCedge) END SELECT !TYPE X2 SELECT TYPE(precond_LA); TYPE IS(sll_t_spline_matrix_banded) nBase = LA_Base%s%nBase modes_str = LA_Base%f%modes_str modes_end = LA_Base%f%modes_end deg = LA_base%s%deg ALLOCATE(P_BCaxis(1:deg+1,1:2*deg+1),P_BCedge(nBase-deg:nBase,nBase-2*deg:nBase)) __PERFON('modes_loop_3') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) & !$OMP PRIVATE(iMode,iGP,D_mn,iElem,i,j,iBase,tBC,nD,norm_mn) & !$OMP FIRSTPRIVATE(P_BCaxis,P_BCedge) & !$OMP DEFAULT(SHARED) DO iMode=modes_str,modes_end !<<< norm_mn=1.0_wp/LA_base%f%snorm_base(iMode) CALL precond_LA(iMode)%reset() !set all values to zero IF(LA_base%f%zero_odd_even(iMode) .NE. MN_ZERO) THEN !MN_ZERO should not exist D_mn(1:nGP)=( ( (LA_Base%f%Xmn(1,iMode)**2) *DLA_tt(1:nGP) & -(LA_Base%f%Xmn(1,iMode)*LA_Base%f%Xmn(2,iMode)) *DLA_tz(1:nGP) & !correct sign of theta,zeta derivative! + (LA_Base%f%Xmn(2,iMode)**2) *DLA_zz(1:nGP) ))*norm_mn !CHECK =0 IF(SUM(ABS(D_mn(1:nGP))).LT.REAL(nGP,wp)*1.0E-10) WRITE(UNIT_stdout,*)'WARNING: small DLA: m,n,SUM(|DLA_mn|)= ', & LA_Base%f%Xmn(1,iMode),LA_Base%f%Xmn(2,iMode),SUM(D_mn(1:nGP)) iGP=1 DO iElem=1,nElems iBase=LA_base%s%base_offset(iElem) DO i=0,deg DO j=0,deg CALL precond_LA(iMode)%add_element(iBase+i,iBase+j, & (SUM( LA_base%s%base_GP(0:degGP,i,iElem) & *D_mn(iGP:iGP+degGP) & *LA_base%s%base_GP(0:degGP,j,iElem))) ) END DO !j=0,deg END DO !i=0,deg iGP=iGP+(degGP+1) END DO !iElem=1,nElems ELSE !safety, unit matrix, if MN_ZERO exists DO iBase=1,nBase CALL precond_LA(iMode)%set_element(iBase,iBase,1.0_wp) END DO END IF !MN_ZERO does not exists !ACCOUNT FOR BOUNDARY CONDITIONS! P_BCaxis=0.0_wp; P_BCedge=0.0_wp tBC = LA_BC_Type(BC_AXIS,iMode) nD = LA_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save 1:deg rows DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) P_BCaxis(i,j)=precond_LA(iMode)%get_element(i,j) END DO; END DO !j,i P_BCaxis(:,1:MIN(2*deg+1,nBase)) =MATMUL(LA_base%s%R_axis(:,:,tBC),P_BCaxis(:,1:MIN(2*deg+1,nBase))) !also sets rows 1:nD =0 P_BCaxis(1:nD,1:deg+1) =LA_base%s%A_axis(1:nD,:,tBC) DO i=1,deg+1; DO j=1,MIN(deg+i,nBase) CALL Precond_LA(iMode)%set_element( i,j,P_BCaxis(i,j)) END DO; END DO !j,i END IF !nDOF_BCaxis>0 tBC = LA_BC_Type(BC_EDGE,iMode) nD = LA_base%s%nDOF_BC(tBC) IF(nD.GT.0)THEN !save nBase-deg:nBase rows DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase P_BCedge(i,j)=precond_LA(iMode)%get_element(i,j) END DO; END DO !j,i P_BCedge(:,MAX(1,nBase-2*deg):nBase) =MATMUL(LA_base%s%R_edge(:,:,tBC),P_BCedge(:,MAX(1,nBase-2*deg):nBase)) !also sets rows nBase-nD+1:nBase =0 P_BCedge(nBase-nD+1:nBase,nBase-deg:nBase)=LA_base%s%A_edge(nBase-nD+1:nBase,:,tBC) DO i=nBase-deg,nBase; DO j=MAX(1,i-deg),nBase CALL Precond_LA(iMode)%set_element( i,j,P_BCedge(i,j) ) END DO; END DO !j,i END IF !nDOF_BCedge>0 END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_3') DEALLOCATE(P_BCaxis,P_BCedge) END SELECT !TYPE LA DEALLOCATE(D_mn) ! WRITE(*,*)' ---> FACTORIZE PRECONDITIONER MATRICES ...' SELECT TYPE(precond_X1); TYPE IS(sll_t_spline_matrix_banded) __PERFON('modes_loop_4') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=X1_Base%f%modes_str,X1_Base%f%modes_end !<<< CALL precond_X1(iMode)%factorize() END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_4') END SELECT !TYPE X1 SELECT TYPE(precond_X2); TYPE IS(sll_t_spline_matrix_banded) __PERFON('modes_loop_5') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=X2_base%f%modes_str,X2_Base%f%modes_end !<<< CALL precond_X2(iMode)%factorize() END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_5') END SELECT !TYPE X2 SELECT TYPE(precond_LA); TYPE IS(sll_t_spline_matrix_banded) __PERFON('modes_loop_6') !$OMP PARALLEL DO & !$OMP SCHEDULE(STATIC) PRIVATE(iMode) & !$OMP DEFAULT(SHARED) DO iMode=LA_base%f%modes_str,LA_Base%f%modes_end !<<< CALL precond_LA(iMode)%factorize() END DO !iMode !$OMP END PARALLEL DO __PERFOFF('modes_loop_6') END SELECT !TYPE LA END SUBROUTINE BuildPrecond