20 #ifndef SELDON_FILE_SUPERLU_CXX
22 #include "SuperLU.hxx"
26 #ifndef SELDON_WITH_SUPERLU_DIST
27 void SetComplexOne(superlu::doublecomplex& one)
39 void LUextract(superlu::SuperMatrix *L, superlu::SuperMatrix *U,
40 T *Lval,
int *Lrow,
long *Lcol,
41 T *Uval,
int *Urow,
long *Ucol,
int *snnzL,
46 int fsupc, istart, nsupr;
47 long lastl = 0, lastu = 0;
53 #ifdef SELDON_WITH_SUPERLU_MT
54 superlu::SCPformat *Lstore;
55 superlu::NCPformat *Ustore;
56 Lstore =
static_cast<superlu::SCPformat*
>(L->Store);
57 Ustore =
static_cast<superlu::NCPformat*
>(U->Store);
59 superlu::SCformat *Lstore;
60 superlu::NCformat *Ustore;
61 Lstore =
static_cast<superlu::SCformat*
>(L->Store);
62 Ustore =
static_cast<superlu::NCformat*
>(U->Store);
69 for (k = 0; k <= Lstore->nsuper; ++k) {
71 fsupc = L_FST_SUPC(k);
72 istart = L_SUB_START(fsupc);
73 nsupr = L_SUB_START(fsupc+1) - istart;
77 for (j = fsupc; j < L_FST_SUPC(k+1); ++j) {
78 SNptr = &(
static_cast<T*
>(Lstore->nzval))[L_NZ_START(j)];
81 for (i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) {
82 Uval[lastu] = (
static_cast<T*
>(Ustore->nzval))[i];
83 Urow[lastu++] = U_SUB(i);
85 for (i = 0; i < upper; ++i) {
86 Uval[lastu] = SNptr[i];
87 Urow[lastu++] = L_SUB(istart+i);
93 Lrow[lastl++] = L_SUB(istart + upper - 1);
94 for (i = upper; i < nsupr; ++i) {
95 Lval[lastl] = SNptr[i];
96 Lrow[lastl++] = L_SUB(istart+i);
123 #ifndef SELDON_WITH_SUPERLU_DIST
125 permc_spec = superlu::COLAMD;
129 #ifdef SELDON_WITH_SUPERLU_MT
131 diag_pivot_thresh = 0.01;
136 superlu::set_default_options(&options);
142 superlu::set_default_options_dist(&options);
144 options.ParSymbFact = superlu::YES;
145 options.ColPerm = superlu::PARMETIS;
146 options.IterRefine = superlu::NOREFINE;
151 display_info =
false;
170 #ifndef SELDON_WITH_SUPERLU_DIST
172 superlu::Destroy_SuperNode_Matrix(&L);
173 superlu::Destroy_CompCol_Matrix(&U);
174 if (permc_spec != superlu::MY_PERMC)
180 superlu::StatFree(&stat);
184 superlu::PStatFree(&stat);
187 superlu::Destroy_CompRowLoc_Matrix_dist(&A);
189 superlu::superlu_gridexit(&grid);
203 #ifndef SELDON_WITH_SUPERLU_DIST
204 panel_size = superlu::sp_ienv(1);
205 relax = superlu::sp_ienv(2);
207 #ifdef SELDON_WITH_SUPERLU_MT
208 fact = superlu::EQUILIBRATE;
209 refact = superlu::NO;
211 superlu::StatAlloc(n, nprocs, panel_size, relax, &stat);
212 superlu::StatInit(n, nprocs, &stat);
214 superlu::StatInit(&stat);
220 superlu::PStatInit(&stat);
230 #ifdef SELDON_WITH_SUPERLU_MT
240 display_info =
false;
255 if (
sizeof(int_t) == 8)
301 permc_spec = (colperm_t) type;
308 permc_spec = superlu::MY_PERMC;
309 perm_c.Reallocate(permut.GetM());
310 for (
int i = 0; i < permut.GetM(); i++)
311 perm_c(i) = permut(i);
313 perm_r.Reallocate(perm_c.GetM());
324 #ifdef SELDON_WITH_SUPERLU_DIST
325 superlu::dScalePermstructFree(&ScalePermstruct);
326 superlu::dDestroy_LU(
n, &grid, &LUstruct);
327 superlu::dLUstructFree(&LUstruct);
334 #ifndef SELDON_WITH_SUPERLU_DIST
345 template<
class Prop,
class Allocator>
351 #ifdef SELDON_WITH_SUPERLU_MT
352 Lstore =
static_cast<superlu::SCPformat*
>(L.Store);
353 Ustore =
static_cast<superlu::NCPformat*
>(U.Store);
355 Lstore =
static_cast<superlu::SCformat*
>(L.Store);
356 Ustore =
static_cast<superlu::NCformat*
>(U.Store);
359 int Lnnz = Lstore->nnz;
360 int Unnz = Ustore->nnz;
375 LUextract(&L, &U, Lval.GetData(), Lrow.GetData(), Lcol.GetData(),
376 Uval.GetData(), Urow.GetData(), Ucol.GetData(), &Lsnnz, &Usnnz);
378 Lmat.SetData(m, n, Lval, Lcol, Lrow);
379 Umat.SetData(m, n, Uval, Ucol, Urow);
391 Sort(row_perm_orig, row_perm);
392 Sort(col_perm_orig, col_perm);
394 ApplyInversePermutation(Lmat, row_perm, col_perm);
395 ApplyInversePermutation(Umat, row_perm, col_perm);
412 template<
class Prop,
class Allocator>
423 GetLU(Lmat_col, Umat_col, permuted);
425 Copy(Lmat_col, Lmat);
427 Copy(Umat_col, Umat);
435 size_t taille =
sizeof(int)*(
perm_r.GetM()+perm_c.GetM());
438 #ifdef SELDON_WITH_SUPERLU_MT
439 superlu::superlu_memusage_t mem_usage;
440 int_t panel_size = superlu::sp_ienv(1);
441 superlu::superlu_dQuerySpace(nprocs,
const_cast<superlu::SuperMatrix*
>(&
L),
442 const_cast<superlu::SuperMatrix*
>(&U),
443 panel_size, &mem_usage);
445 superlu::mem_usage_t mem_usage;
446 superlu::dQuerySpace(
const_cast<superlu::SuperMatrix*
>(&
L),
447 const_cast<superlu::SuperMatrix*
>(&U), &mem_usage);
450 taille += mem_usage.total_needed;
458 template<
class T0,
class Prop,
class Storage,
class Allocator>
471 ConvertToCSC(mat, prop, Ptr, IndRow, Val,
false);
475 FactorizeCSC(Ptr, IndRow, Val,
false);
485 int_t panel_size, relax;
487 Init(Ptr.GetM()-1, panel_size, relax);
489 superlu::SuperMatrix AA;
490 int_t nnz = IndRow.GetM();
491 superlu::dCreate_CompCol_Matrix(&AA, n, n, nnz, Val.GetData(),
492 reinterpret_cast<int_t*
>(IndRow.GetData()),
493 reinterpret_cast<int_t*
>(Ptr.GetData()),
495 superlu::SLU_D, superlu::SLU_GE);
498 options.ColPerm = permc_spec;
499 if (permc_spec != superlu::MY_PERMC)
501 perm_r.Reallocate(n);
502 perm_c.Reallocate(n);
506 superlu::get_perm_c(permc_spec, &AA, perm_c.GetData());
509 #ifdef SELDON_WITH_SUPERLU_MT
510 superlu::SuperMatrix AC;
511 superlu::trans_t trans = superlu::NOTRANS;
513 superlu::pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax,
514 diag_pivot_thresh, usepr, drop_tol,
515 perm_c.GetData(), perm_r.GetData(),
516 NULL, lwork, &AA, &AC, &options, &stat);
519 superlu::pdgstrf(&options, &AC, perm_r.GetData(), &L, &U, &stat, &info);
522 superlu::pxgstrf_finalize(&options, &AC);
524 if (info_facto == 0 && display_info)
526 superlu::PrintStat(&stat);
530 superlu::SuperMatrix A;
533 sp_preorder(&options, &AA, perm_c.GetData(), etree.GetData(), &A);
536 superlu::dgstrf(&options, &A, relax, panel_size, etree.GetData(),
537 NULL, lwork, perm_c.GetData(), perm_r.GetData(), &L, &U,
538 &Glu, &stat, &info_facto);
540 if (info_facto == 0 && display_info)
542 superlu::mem_usage_t mem_usage;
543 Lstore = (superlu::SCformat *) L.Store;
544 Ustore = (superlu::NCformat *) U.Store;
545 cout <<
"Number of nonzeros in factor L = " << Lstore->nnz << endl;
546 cout <<
"Number of nonzeros in factor U = " << Ustore->nnz << endl;
547 cout <<
"Number of nonzeros in L+U = "
548 << Lstore->nnz + Ustore->nnz << endl;
549 superlu::dQuerySpace(&L, &U, &mem_usage);
550 cout <<
"Memory used for factorization in MB: "
551 << mem_usage.total_needed / (1024. * 1024.) << endl;
554 superlu::Destroy_CompCol_Permuted(&A);
558 superlu::Destroy_CompCol_Matrix(&AA);
560 Ptr.Nullify(); IndRow.Nullify(); Val.Nullify();
565 template<
class Allocator2>
568 Solve(SeldonNoTrans, x);
573 template<
class Allocator2>
577 Solve(TransA, x.GetData(), 1);
583 double* x_ptr,
int nrhs_)
585 superlu::trans_t trans;
586 if (TransA.NoTrans())
587 trans = superlu::NOTRANS;
589 trans = superlu::TRANS;
591 int_t nb_rhs = nrhs_, info;
593 superlu::dCreate_Dense_Matrix(&B,
n, nb_rhs, x_ptr,
n,
594 superlu::SLU_DN, superlu::SLU_D, superlu::SLU_GE);
596 #ifdef SELDON_WITH_SUPERLU_MT
597 superlu::dgstrs(trans, &
L, &U,
perm_r.GetData(),
598 perm_c.GetData(), &B, &
stat, &info);
600 superlu::dgstrs(trans, &
L, &U, perm_c.GetData(),
604 superlu::Destroy_SuperMatrix_Store(&B);
609 template<
class Allocator2>
612 Solve(SeldonNoTrans, x);
617 template<
class Allocator2>
621 Solve(TransA, x.GetData(), x.GetN());
638 superlu::superlu_dist_mem_usage_t mem_usage;
639 superlu::dQuerySpace_dist(
n,
const_cast<superlu::dLUstruct_t*
>(&LUstruct),
640 const_cast<superlu::gridinfo_t*
>(&grid),
641 const_cast<superlu::SuperLUStat_t*
>(&
stat), &mem_usage);
643 size += mem_usage.total;
651 template<
class T0,
class Prop,
class Storage,
class Allocator>
652 void MatrixSuperLU<double>::
653 FactorizeMatrix(Matrix<T0, Prop, Storage, Allocator> & mat,
656 Vector<long> Ptr; Vector<int> IndRow;
657 Vector<double> Val; General prop;
658 ConvertToCSC(mat, prop, Ptr, IndRow, Val,
false);
668 Vector<double>& Val,
bool sym)
670 Vector<int> glob_num(1);
672 MPI_Comm comm = MPI_COMM_SELF;
673 FactorizeDistributedMatrix(comm, Ptr, IndRow, Val,
674 glob_num, sym,
false);
679 template<
class Allocator2>
682 Solve(SeldonNoTrans, x);
687 template<
class Allocator2>
689 Vector<double, VectFull, Allocator2>& x)
691 Vector<int> glob_num(1);
693 MPI_Comm comm = MPI_COMM_SELF;
694 SolveDistributed(comm, TransA, x, glob_num);
699 double* x_ptr,
int nrhs_)
701 Vector<int> glob_num(1);
703 MPI_Comm comm = MPI_COMM_SELF;
704 SolveDistributed(comm, TransA, x_ptr, nrhs_, glob_num);
709 template<
class Allocator2>
712 Solve(SeldonNoTrans, x);
717 template<
class Allocator2>
719 Matrix<double, General, ColMajor, Allocator2>& x)
721 Vector<int> glob_num(1);
723 MPI_Comm comm = MPI_COMM_SELF;
724 SolveDistributed(comm, TransA, x, glob_num);
728 void MatrixSuperLU<double>::
729 FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<long>& Ptr,
730 Vector<int>& IndRow, Vector<double>& Val,
731 const Vector<int>& glob_number,
732 bool sym,
bool keep_matrix)
734 Vector<int> PtrInt(Ptr.GetM());
735 for (
int i = 0; i < Ptr.GetM(); i++)
738 FactorizeParallel(comm_facto, PtrInt, IndRow, Val,
739 glob_number, sym, keep_matrix);
743 void MatrixSuperLU<double>::
744 FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<int64_t>& Ptr,
745 Vector<int64_t>& IndRow, Vector<double>& Val,
746 const Vector<int>& glob_number,
747 bool sym,
bool keep_matrix)
749 FactorizeParallel(comm_facto, Ptr, IndRow, Val,
750 glob_number, sym, keep_matrix);
755 void MatrixSuperLU<double>::
756 FactorizeParallel(MPI_Comm& comm_facto,
757 Vector<Tint>&, Vector<Tint>&,
759 const Vector<int>& glob_num,
760 bool sym,
bool keep_matrix)
762 cout <<
"Not available for this type of integers " << endl;
763 cout <<
"Size of Tint = " <<
sizeof(Tint) << endl;
768 void MatrixSuperLU<double>::
769 FactorizeParallel(MPI_Comm& comm_facto,
770 Vector<superlu_int_t>& Ptr, Vector<superlu_int_t>& Row,
771 Vector<double>& Val,
const Vector<int>& glob_num,
772 bool sym,
bool keep_matrix)
774 #ifdef SELDON_WITH_SUPERLU_DOUBLE
780 cout <<
"Problem with option TRANS of SuperLU" << endl;
786 int m_loc_ = Ptr.GetM()-1;
787 int N = m_loc_; nloc = m_loc_;
788 MPI_Allreduce(&m_loc_, &N, 1, MPI_INTEGER, MPI_SUM, comm_facto);
789 int_t m_loc = m_loc_;
792 int_t panel_size, relax;
793 Init(N, panel_size, relax);
794 superlu::dScalePermstructInit(N, N, &ScalePermstruct);
795 superlu::dLUstructInit(
n, &LUstruct);
798 MPI_Comm_size(comm_facto, &nprow);
802 superlu::superlu_gridinit(comm_facto, nprow, npcol, &grid);
806 int_t fst_row = glob_num(0);
807 int_t nnz_loc = Row.GetM();
810 dCreate_CompRowLoc_Matrix_dist(&A,
n,
n, nnz_loc, m_loc, fst_row,
811 Val.GetData(), Row.GetData(), Ptr.GetData(),
812 superlu::SLU_NR_loc, superlu::SLU_D,
817 options.Trans = superlu::NOTRANS;
818 options.Fact = superlu::DOFACT;
820 pdgssvx(&
options, &A, &ScalePermstruct,
821 NULL, m_loc, nrhs, &grid,
822 &LUstruct, &SOLVEstruct, NULL, &
stat, &info_facto);
824 Ptr.Nullify(); Row.Nullify(); Val.Nullify();
826 cout <<
"Recompile Seldon with SELDON_WITH_SUPERLU_DOUBLE" << endl;
827 cout <<
"Double and complex<double> cannot be handled simultaneously" << endl;
833 template<
class Allocator2>
834 void MatrixSuperLU<double>::
835 SolveDistributed(MPI_Comm& comm_facto,
836 const SeldonTranspose& TransA,
837 Vector<double, VectFull, Allocator2>& x,
838 const IVect& glob_num)
840 SolveDistributed(comm_facto, TransA, x.GetData(), 1, glob_num);
844 template<
class Allocator2>
845 void MatrixSuperLU<double>::
846 SolveDistributed(MPI_Comm& comm_facto,
847 const SeldonTranspose& TransA,
848 Matrix<double, General, ColMajor, Allocator2>& x,
849 const IVect& glob_num)
851 SolveDistributed(comm_facto, TransA, x.GetData(), x.GetN(), glob_num);
855 void MatrixSuperLU<double>::SolveDistributed(MPI_Comm& comm_facto,
856 const SeldonTranspose& TransA,
857 double* x_ptr,
int nrhs_,
858 const IVect& glob_num)
860 #ifdef SELDON_WITH_SUPERLU_DOUBLE
861 options.Fact = superlu::FACTORED;
863 if (TransA.NoTrans())
864 options.Trans = superlu::TRANS;
866 options.Trans = superlu::NOTRANS;
868 options.Trans = superlu::NOTRANS;
869 Vector<double> berr(nrhs_);
870 for (
int k = 0; k < nrhs_; k++)
873 int_t nrhs = 1; int_t info;
876 pdgssvx(&
options, &A, &ScalePermstruct, x_ptr,
877 n, nrhs, &grid, &LUstruct, &SOLVEstruct,
878 berr.GetData(), &
stat, &info);
891 void MatrixSuperLU<complex<double> >::Clear()
893 #ifdef SELDON_WITH_SUPERLU_DIST
894 superlu::zScalePermstructFree(&ScalePermstruct);
895 superlu::zDestroy_LU(n, &grid, &LUstruct);
896 superlu::zLUstructFree(&LUstruct);
899 MatrixSuperLU_Base<complex<double> >::Clear();
903 #ifndef SELDON_WITH_SUPERLU_DIST
914 template<
class Prop,
class Allocator>
915 void MatrixSuperLU<complex<double> >
920 #ifdef SELDON_WITH_SUPERLU_MT
921 Lstore =
static_cast<superlu::SCPformat*
>(L.Store);
922 Ustore =
static_cast<superlu::NCPformat*
>(U.Store);
924 Lstore =
static_cast<superlu::SCformat*
>(L.Store);
925 Ustore =
static_cast<superlu::NCformat*
>(U.Store);
928 int Lnnz = Lstore->nnz;
929 int Unnz = Ustore->nnz;
944 LUextract(&L, &U,
reinterpret_cast<superlu::doublecomplex*
>(Lval.GetData()),
945 Lrow.GetData(), Lcol.GetData(),
946 reinterpret_cast<superlu::doublecomplex*
>(Uval.GetData()),
947 Urow.GetData(), Ucol.GetData(), &Lsnnz, &Usnnz);
949 Lmat.SetData(m, n, Lval, Lcol, Lrow);
950 Umat.SetData(m, n, Uval, Ucol, Urow);
962 Sort(row_perm_orig, row_perm);
963 Sort(col_perm_orig, col_perm);
965 ApplyInversePermutation(Lmat, row_perm, col_perm);
966 ApplyInversePermutation(Umat, row_perm, col_perm);
984 template<
class Prop,
class Allocator>
995 GetLU(Lmat_col, Umat_col, permuted);
997 Copy(Lmat_col, Lmat);
999 Copy(Umat_col, Umat);
1007 size_t taille =
sizeof(int)*(perm_r.GetM()+perm_c.GetM());
1010 #ifdef SELDON_WITH_SUPERLU_MT
1011 superlu::superlu_memusage_t mem_usage;
1012 int_t panel_size = superlu::sp_ienv(1);
1013 superlu::superlu_zQuerySpace(nprocs,
const_cast<superlu::SuperMatrix*
>(&L),
1014 const_cast<superlu::SuperMatrix*
>(&U),
1015 panel_size, &mem_usage);
1017 #ifdef SELDON_WITH_SUPERLU_DIST
1018 superlu::superlu_dist_mem_usage_t mem_usage;
1019 superlu::zQuerySpace(
const_cast<superlu::SuperMatrix*
>(&L),
1020 const_cast<superlu::SuperMatrix*
>(&U), &mem_usage);
1023 #ifdef SELDON_WITH_SUPERLU_DIST
1024 superlu::superlu_dist_mem_usage_t mem_usage;
1025 superlu::zQuerySpace(
const_cast<superlu::SuperMatrix*
>(&L),
1026 const_cast<superlu::SuperMatrix*
>(&U), &mem_usage);
1029 #ifdef SELDON_WITH_SUPERLU_SEQ
1030 superlu::mem_usage_t mem_usage;
1031 superlu::zQuerySpace(
const_cast<superlu::SuperMatrix*
>(&L),
1032 const_cast<superlu::SuperMatrix*
>(&U), &mem_usage);
1035 taille += mem_usage.total_needed;
1043 template<
class T0,
class Prop,
class Storage,
class Allocator>
1056 ConvertToCSC(mat, prop, Ptr, IndRow, Val,
false);
1060 FactorizeCSC(Ptr, IndRow, Val,
false);
1066 Vector<complex<double> >& Val,
bool sym)
1069 int_t panel_size, relax;
1071 Init(Ptr.GetM()-1, panel_size, relax);
1073 superlu::SuperMatrix AA;
1074 int_t nnz = IndRow.GetM();
1076 zCreate_CompCol_Matrix(&AA, n, n, nnz,
1077 reinterpret_cast<superlu::doublecomplex*
>(Val.GetData()),
1078 reinterpret_cast<int_t*
>(IndRow.GetData()),
1079 reinterpret_cast<int_t*
>(Ptr.GetData()),
1080 superlu::SLU_NC, superlu::SLU_Z, superlu::SLU_GE);
1083 options.ColPerm = permc_spec;
1084 if (permc_spec != superlu::MY_PERMC)
1086 perm_r.Reallocate(n);
1087 perm_c.Reallocate(n);
1091 superlu::get_perm_c(permc_spec, &AA, perm_c.GetData());
1094 #ifdef SELDON_WITH_SUPERLU_MT
1095 superlu::SuperMatrix AC;
1096 superlu::trans_t trans = superlu::NOTRANS;
1097 perm_r.Reallocate(n);
1098 perm_c.Reallocate(n);
1101 pzgstrf_init(nprocs, fact, trans, refact, panel_size, relax,
1102 diag_pivot_thresh, usepr, drop_tol,
1103 perm_c.GetData(), perm_r.GetData(),
1104 NULL, lwork, &AA, &AC, &options, &stat);
1108 pzgstrf(&options, &AC, perm_r.GetData(), &L, &U, &stat, &info);
1111 superlu::pxgstrf_finalize(&options, &AC);
1113 if (info_facto == 0 && display_info)
1115 cout <<
"Memory used for factorization in MiB: "
1116 << this->GetMemorySize() / (1024. * 1024.) << endl;
1120 superlu::SuperMatrix A;
1122 Vector<int> etree(n);
1123 superlu::sp_preorder(&options, &AA, perm_c.GetData(), etree.GetData(), &A);
1126 superlu::zgstrf(&options, &A, relax, panel_size, etree.GetData(),
1127 NULL, lwork, perm_c.GetData(), perm_r.GetData(), &L, &U,
1128 &Glu, &stat, &info_facto);
1130 if (info_facto == 0 && display_info)
1132 superlu::mem_usage_t mem_usage;
1133 Lstore = (superlu::SCformat *) L.Store;
1134 Ustore = (superlu::NCformat *) U.Store;
1135 cout <<
"Number of nonzeros in factor L = " << Lstore->nnz<<endl;
1136 cout <<
"Number of nonzeros in factor U = " << Ustore->nnz<<endl;
1137 cout <<
"Number of nonzeros in L+U = "
1138 << Lstore->nnz + Ustore->nnz<<endl;
1139 superlu::zQuerySpace(&L, &U, &mem_usage);
1140 cout <<
"Memory used for factorization in MiB: "
1141 << mem_usage.total_needed / (1024. * 1024.) << endl;
1144 superlu::Destroy_CompCol_Permuted(&A);
1149 superlu::Destroy_CompCol_Matrix(&AA);
1151 Ptr.Nullify(); IndRow.Nullify(); Val.Nullify();
1156 template<
class Allocator2>
1157 void MatrixSuperLU<complex<double> >::
1160 Solve(SeldonNoTrans, x);
1165 template<
class Allocator2>
1170 Solve(TransA, x.GetData(), 1);
1178 superlu::trans_t trans = superlu::NOTRANS;
1180 trans = superlu::TRANS;
1182 int_t nb_rhs = nrhs_, info;
1184 zCreate_Dense_Matrix(&B, n, nb_rhs,
1185 reinterpret_cast<superlu::doublecomplex*
>(x_ptr),
1186 n, superlu::SLU_DN, superlu::SLU_Z, superlu::SLU_GE);
1188 #ifdef SELDON_WITH_SUPERLU_MT
1189 superlu::zgstrs(trans, &L, &U, perm_r.GetData(),
1190 perm_c.GetData(), &B, &stat, &info);
1192 superlu::zgstrs(trans, &L, &U, perm_c.GetData(),
1193 perm_r.GetData(), &B, &stat, &info);
1196 superlu::Destroy_SuperMatrix_Store(&B);
1201 template<
class Allocator2>
1205 Solve(SeldonNoTrans, x);
1210 template<
class Allocator2>
1215 Solve(TransA, x.GetData(), x.GetN());
1230 #ifndef SELDON_WITH_SUPERLU_DOUBLE
1233 superlu::superlu_dist_mem_usage_t mem_usage;
1234 superlu::zQuerySpace_dist(n,
const_cast<superlu::zLUstruct_t*
>(&LUstruct),
1235 const_cast<superlu::gridinfo_t*
>(&grid),
1236 const_cast<superlu::SuperLUStat_t*
>(&stat), &mem_usage);
1238 size += mem_usage.total;
1247 template<
class T0,
class Prop,
class Storage,
class Allocator>
1248 void MatrixSuperLU<complex<double> >::
1249 FactorizeMatrix(Matrix<T0, Prop, Storage, Allocator> & mat,
1252 Vector<long> Ptr; Vector<int> IndRow;
1253 Vector<complex<double> > Val; General prop;
1254 ConvertToCSC(mat, prop, Ptr, IndRow, Val,
false);
1262 void MatrixSuperLU<complex<double> >
1263 ::FactorizeCSC(Vector<long>& Ptr, Vector<int>& IndRow,
1264 Vector<complex<double> >& Val,
bool sym)
1266 Vector<int> glob_num(1);
1268 MPI_Comm comm = MPI_COMM_SELF;
1269 FactorizeDistributedMatrix(comm, Ptr, IndRow, Val,
1270 glob_num, sym,
false);
1275 template<
class Allocator2>
1276 void MatrixSuperLU<complex<double> >::
1277 Solve(Vector<complex<double>, VectFull, Allocator2>& x)
1279 Solve(SeldonNoTrans, x);
1284 template<
class Allocator2>
1285 void MatrixSuperLU<complex<double> >::
1286 Solve(
const SeldonTranspose& TransA,
1287 Vector<complex<double>, VectFull, Allocator2>& x)
1289 Vector<int> glob_num(1);
1291 MPI_Comm comm = MPI_COMM_SELF;
1292 SolveDistributed(comm, TransA, x, glob_num);
1296 void MatrixSuperLU<complex<double> >
1297 ::Solve(
const SeldonTranspose& TransA,
1298 complex<double>* x_ptr,
int nrhs_)
1300 Vector<int> glob_num(1);
1302 MPI_Comm comm = MPI_COMM_SELF;
1303 SolveDistributed(comm, TransA, x_ptr, nrhs_, glob_num);
1308 template<
class Allocator2>
1309 void MatrixSuperLU<complex<double> >::
1310 Solve(Matrix<complex<double>, General, ColMajor, Allocator2>& x)
1312 Solve(SeldonNoTrans, x);
1317 template<
class Allocator2>
1318 void MatrixSuperLU<complex<double> >::
1319 Solve(
const SeldonTranspose& TransA,
1320 Matrix<complex<double>, General, ColMajor, Allocator2>& x)
1322 Vector<int> glob_num(1);
1324 MPI_Comm comm = MPI_COMM_SELF;
1325 SolveDistributed(comm, TransA, x, glob_num);
1329 void MatrixSuperLU<complex<double> >::
1330 FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<long>& Ptr,
1331 Vector<int>& IndRow, Vector<complex<double> >& Val,
1332 const Vector<int>& glob_number,
1333 bool sym,
bool keep_matrix)
1335 Vector<int> PtrInt(Ptr.GetM());
1336 for (
int i = 0; i < Ptr.GetM(); i++)
1339 FactorizeParallel(comm_facto, PtrInt, IndRow, Val,
1340 glob_number, sym, keep_matrix);
1344 void MatrixSuperLU<complex<double> >::
1345 FactorizeDistributedMatrix(MPI_Comm& comm_facto, Vector<int64_t>& Ptr,
1346 Vector<int64_t>& IndRow, Vector<complex<double> >& Val,
1347 const Vector<int>& glob_number,
1348 bool sym,
bool keep_matrix)
1350 FactorizeParallel(comm_facto, Ptr, IndRow, Val,
1351 glob_number, sym, keep_matrix);
1355 template<
class T
int>
1356 void MatrixSuperLU<complex<double> >::
1357 FactorizeParallel(MPI_Comm& comm_facto,
1358 Vector<Tint>&, Vector<Tint>&,
1359 Vector<complex<double> >&,
1360 const Vector<int>& glob_num,
1361 bool sym,
bool keep_matrix)
1363 cout <<
"Not available for this type of integers " << endl;
1364 cout <<
"Size of Tint = " <<
sizeof(Tint) << endl;
1369 void MatrixSuperLU<complex<double> >::
1370 FactorizeParallel(MPI_Comm& comm_facto,
1371 Vector<superlu_int_t>& Ptr, Vector<superlu_int_t>& Row,
1372 Vector<complex<double> >& Val,
1373 const Vector<int>& glob_num,
1374 bool sym,
bool keep_matrix)
1376 #ifndef SELDON_WITH_SUPERLU_DOUBLE
1382 cout <<
"Problem with option TRANS of SuperLU" << endl;
1388 int m_loc_ = Ptr.GetM()-1;
1389 int N = m_loc_; nloc = m_loc_;
1390 MPI_Allreduce(&m_loc_, &N, 1, MPI_INTEGER, MPI_SUM, comm_facto);
1391 int_t m_loc = m_loc_;
1394 int_t panel_size, relax;
1395 Init(N, panel_size, relax);
1396 superlu::zScalePermstructInit(N, N, &ScalePermstruct);
1397 superlu::zLUstructInit(n, &LUstruct);
1400 MPI_Comm_size(comm_facto, &nprow);
1404 superlu::superlu_gridinit(comm_facto, nprow, npcol, &grid);
1408 int_t fst_row = glob_num(0);
1409 int_t nnz_loc = Row.GetM();
1412 zCreate_CompRowLoc_Matrix_dist(&A, n, n, nnz_loc, m_loc, fst_row,
1413 reinterpret_cast<superlu::doublecomplex*
>
1415 reinterpret_cast<int_t*
>(Row.GetData()),
1416 reinterpret_cast<int_t*
>(Ptr.GetData()),
1417 superlu::SLU_NR_loc, superlu::SLU_Z,
1422 options.Trans = superlu::NOTRANS;
1423 options.Fact = superlu::DOFACT;
1425 pzgssvx(&options, &A, &ScalePermstruct,
1426 NULL, m_loc, nrhs, &grid,
1427 &LUstruct, &SOLVEstruct, NULL, &stat, &info_facto);
1429 Ptr.Nullify(); Row.Nullify(); Val.Nullify();
1431 cout <<
"Recompile Seldon without SELDON_WITH_SUPERLU_DOUBLE" << endl;
1432 cout <<
"Double and complex<double> cannot be handled simultaneously" << endl;
1438 template<
class Allocator2>
1439 void MatrixSuperLU<complex<double> >::
1440 SolveDistributed(MPI_Comm& comm_facto,
1441 const SeldonTranspose& TransA,
1442 Vector<complex<double>, VectFull, Allocator2>& x,
1443 const Vector<int>& glob_num)
1445 SolveDistributed(comm_facto, TransA, x.GetData(), 1, glob_num);
1449 template<
class Allocator2>
1450 void MatrixSuperLU<complex<double> >::
1451 SolveDistributed(MPI_Comm& comm_facto,
1452 const SeldonTranspose& TransA,
1453 Matrix<complex<double>, General, ColMajor, Allocator2>& x,
1454 const Vector<int>& glob_num)
1456 SolveDistributed(comm_facto, TransA, x.GetData(), x.GetN(), glob_num);
1460 void MatrixSuperLU<complex<double> >::
1461 SolveDistributed(MPI_Comm& comm_facto,
1462 const SeldonTranspose& TransA,
1463 complex<double>* x_ptr,
int nrhs_,
1464 const IVect& glob_num)
1466 #ifndef SELDON_WITH_SUPERLU_DOUBLE
1467 options.Fact = superlu::FACTORED;
1469 if (TransA.NoTrans())
1470 options.Trans = superlu::TRANS;
1472 options.Trans = superlu::NOTRANS;
1475 options.Trans = superlu::NOTRANS;
1476 Vector<double> berr(nrhs_);
1477 for (
int k = 0; k < nrhs_; k++)
1480 int_t nrhs = 1; int_t info;
1483 pzgssvx(&options, &A, &ScalePermstruct,
1484 reinterpret_cast<superlu::doublecomplex*
>(x_ptr),
1485 n, nrhs, &grid, &LUstruct, &SOLVEstruct,
1486 berr.GetData(), &stat, &info);
1501 template<
class MatrixSparse,
class T>
1504 mat_lu.FactorizeMatrix(A, keep_matrix);
1509 template<
class MatrixSparse,
class T>
1511 bool keep_matrix, complex<T>& x)
1513 throw WrongArgument(
"GetLU(Matrix<complex<T> >& A, MatrixSuperLU<T>& mat_lu, bool)",
1514 "The LU matrix must be complex");
1519 template<
class MatrixSparse,
class T>
1522 throw WrongArgument(
"GetLU(Matrix<T>& A, MatrixSuperLU<complex<T> >& mat_lu, bool)",
1523 "The sparse matrix must be complex");
1528 template<
class T0,
class Prop,
class Storage,
class Allocator,
class T>
1538 GetLU(A, mat_lu, keep_matrix, x);
1543 template<
class T,
class Allocator>
1552 template<
class T,
class Allocator>
1556 mat_lu.Solve(TransA, x);
1561 template<
class T,
class Prop,
class Allocator>
1565 mat_lu.Solve(SeldonNoTrans, x);
1571 template<
class T,
class Prop,
class Allocator>
1575 mat_lu.Solve(TransA, x);
1580 template<
class Allocator>
1586 for (
int i = 0; i < x.GetM(); i++)
1588 y(i, 0) = real(x(i));
1589 y(i, 1) = imag(x(i));
1594 for (
int i = 0; i < x.GetM(); i++)
1595 x(i) = complex<double>(y(i, 0), y(i, 1));
1600 template<
class Allocator>
1607 for (
int i = 0; i < x.GetM(); i++)
1609 y(i, 0) = real(x(i));
1610 y(i, 1) = imag(x(i));
1613 SolveLU(TransA, mat_lu, y);
1615 for (
int i = 0; i < x.GetM(); i++)
1616 x(i) = complex<double>(y(i, 0), y(i, 1));
1622 template<
class Allocator>
1626 throw WrongArgument(
"SolveLU(MatrixSuperLU<complex<double> >, Vector<double>)",
1627 "The result should be a complex vector");
1632 template<
class Allocator>
1637 throw WrongArgument(
"SolveLU(MatrixSuperLU<complex<double> >, Vector<double>)",
1638 "The result should be a complex vector");
1643 #define SELDON_FILE_SUPERLU_CXX