//ff-c++-LIBRARY-dep: htool [mkl|blas] mpi pthread bemtool boost [metis]
//ff-c++-cpp-dep:
// for def  M_PI under windows in <cmath>
#define _USE_MATH_DEFINES

#define BOOST_NO_CXX17_IF_CONSTEXPR
#include <ff++.hpp>
#include <problem.hpp>
#include <AFunction_ext.hpp>
#include <lgfem.hpp>
#include <R3.hpp>

// include the bemtool library .... path define in where library
//#include <bemtool/operator/block_op.hpp>
#include <bemtool/tools.hpp>
#include <bemtool/fem/dof.hpp>
#include <bemtool/operator/operator.hpp>
#include <bemtool/miscellaneous/htool_wrap.hpp>
#include "PlotStream.hpp"

#include "common.hpp"

extern FILE *ThePlotStream;

using namespace std;
using namespace htool;
using namespace bemtool;

#include <type_traits>
#include "common_bem.hpp"
#include "bem.hpp"

template< class K >
AnyType AddIncrement(Stack stack, const AnyType &a) {
    K m = GetAny< K >(a);
    m->increment( );
    Add2StackOfPtr2FreeRC(stack, m);
    if (mpirank==0 && verbosity > 1) cout << "AddIncrement:: increment + Add2StackOfPtr2FreeRC " << endl;
    return a;
}

template<class Type, class K>
AnyType To(Stack stack,Expression emat,Expression einter,int init)
{
    ffassert(einter);
    HMatrixVirt<K>** Hmat = GetAny<HMatrixVirt<K>** >((*einter)(stack));
    ffassert(Hmat && *Hmat);
    HMatrixVirt<K>& H = **Hmat;
    if(std::is_same<Type, KNM<K>>::value) {
        Matrix<K> mdense = H.get_local_dense();
        KNM<K>* M = GetAny<KNM<K>*>((*emat)(stack));
        for (int i=0; i< H.nb_rows(); i++)
            for (int j=0; j< H.nb_cols(); j++)
                (*M)(i,j) = 0;
        for (int i=0; i< mdense.nb_rows(); i++)
            for (int j=0; j< mdense.nb_cols(); j++)
                (*M)(H.get_permt(i+H.get_local_offset()),H.get_perms(j)) = mdense(i,j);

        return M;
    }
    else {
        ffassert(0);
    }
}

template<class Type, class K, int init>
AnyType To(Stack stack,Expression emat,Expression einter)
{ return To<Type, K>(stack,emat,einter,init);}

template<class V, class K>
class Prod {
public:
    const HMatrixVirt<K>* h;
    const V u;
    Prod(HMatrixVirt<K>** v, V w) : h(*v), u(w) {}
    
    void prod(V x) const {int mu = this->u->n/h->nb_cols(); h->mvprod_global(*(this->u), *x, mu);};
    
    static V mv(V Ax, Prod<V, K> A) {
        *Ax = K();
        A.prod(Ax);
        return Ax;
    }
    static V init(V Ax, Prod<V, K> A) {
        Ax->init(A.u->n);
        return mv(Ax, A);
    }
    
};


// post treatment for HMatrix

template<class K>
std::map<std::string, std::string>* get_infos(HMatrixVirt<K>** const& H) {
    return new std::map<std::string, std::string>((*H)->get_infos());
}

string* get_info(std::map<std::string, std::string>* const& infos, string* const& s){
    return new string((*infos)[*s]);
}

ostream & operator << (ostream &out, const std::map<std::string, std::string> & infos)
{
    for (std::map<std::string,std::string>::const_iterator it = infos.begin() ; it != infos.end() ; ++it){
        out<<it->first<<"\t"<<it->second<<std::endl;
    }
    out << std::endl;
    return out;
}

template<class A>
struct PrintPinfos {
    using first_argument_type  = ostream*;
    using second_argument_type = A;
    using result_type          = ostream*;
    static ostream* f(ostream* const  & a,const A & b)  {  *a << *b;
        return a;
    }
};

template<class K>
class plotHMatrix : public OneOperator {
public:
    
    class Op : public E_F0info {
    public:
        Expression a;
        
        static const int n_name_param = 2;
        static basicAC_F0::name_and_type name_param[] ;
        Expression nargs[n_name_param];
        bool arg(int i,Stack stack,bool a) const{ return nargs[i] ? GetAny<bool>( (*nargs[i])(stack) ): a;}
        long argl(int i,Stack stack,long a) const{ return nargs[i] ? GetAny<long>( (*nargs[i])(stack) ): a;}
        
    public:
        Op(const basicAC_F0 &  args,Expression aa) : a(aa) {
            args.SetNameParam(n_name_param,name_param,nargs);
        }
        
        AnyType operator()(Stack stack) const{
            
            bool wait = arg(0,stack,false);
            long dim = argl(1,stack,2);
            
            HMatrixVirt<K>** H =GetAny<HMatrixVirt<K>** >((*a)(stack));
            
            PlotStream theplot(ThePlotStream);
            
            if (mpirank == 0 && ThePlotStream) {
                theplot.SendNewPlot();
                theplot << 3L;
                theplot <= wait;
                theplot << 17L;
                theplot <= dim;
                theplot << 4L;
                theplot <= true;
                theplot.SendEndArgPlot();
                theplot.SendMeshes();
                theplot << 0L;
                theplot.SendPlots();
                theplot << 1L;
                theplot << 31L;
            }
            
            if (!H || !(*H)) {
                if (mpirank == 0&& ThePlotStream) {
                    theplot << 0;
                    theplot << 0;
                    theplot << 0L;
                    theplot << 0L;
                }
            }
            else {
                std::vector<const HMatrix<K> *> dmats = (*H)->get_dense_blocks();
                std::vector<const HMatrix<K> *> lrmats = (*H)->get_low_rank_blocks();
                int nbdense = dmats.size();
                int nblr = lrmats.size();
                
                int sizeworld = (*H)->get_sizeworld();
                int rankworld = (*H)->get_rankworld();
                
                int nbdenseworld[sizeworld];
                int nblrworld[sizeworld];
                MPI_Allgather(&nbdense, 1, MPI_INT, nbdenseworld, 1, MPI_INT, (*H)->get_comm());
                MPI_Allgather(&nblr, 1, MPI_INT, nblrworld, 1, MPI_INT, (*H)->get_comm());
                int nbdenseg = 0;
                int nblrg = 0;
                for (int i=0; i<sizeworld; i++) {
                    nbdenseg += nbdenseworld[i];
                    nblrg += nblrworld[i];
                }
                int* buf = new int[4*(mpirank==0?nbdenseg:nbdense) + 5*(mpirank==0?nblrg:nblr)];
                
                for (int i=0;i<nbdense;i++) {
                    const HMatrix<K>& l = *(dmats[i]);
                    buf[4*i] = l.get_target_cluster().get_offset();
                    buf[4*i+1] = l.get_source_cluster().get_offset();
                    buf[4*i+2] = l.get_target_cluster().get_size();
                    buf[4*i+3] = l.get_source_cluster().get_size();
                }
                
                int displs[sizeworld];
                int recvcounts[sizeworld];
                displs[0] = 0;
                
                for (int i=0; i<sizeworld; i++) {
                    recvcounts[i] = 4*nbdenseworld[i];
                    if (i > 0)    displs[i] = displs[i-1] + recvcounts[i-1];
                }
                MPI_Gatherv(rankworld==0?MPI_IN_PLACE:buf, recvcounts[rankworld], MPI_INT, buf, recvcounts, displs, MPI_INT, 0, (*H)->get_comm());
                
                int* buflr = buf + 4*(mpirank==0?nbdenseg:nbdense);
                double* bufcomp = new double[mpirank==0?nblrg:nblr];
                
                for (int i=0;i<nblr;i++) {
                    const HMatrix<K>& l = *lrmats[i];
                    buflr[5*i] = l.get_target_cluster().get_offset();
                    buflr[5*i+1] = l.get_source_cluster().get_offset();
                    buflr[5*i+2] = l.get_target_cluster().get_size();
                    buflr[5*i+3] = l.get_source_cluster().get_size();
                    buflr[5*i+4] = l.get_low_rank_data()->rank_of();
                    bufcomp[i] = l.get_low_rank_data()->space_saving();
                }
                
                for (int i=0; i<sizeworld; i++) {
                    recvcounts[i] = 5*nblrworld[i];
                    if (i > 0)    displs[i] = displs[i-1] + recvcounts[i-1];
                }
                
                MPI_Gatherv(rankworld==0?MPI_IN_PLACE:buflr, recvcounts[rankworld], MPI_INT, buflr, recvcounts, displs, MPI_INT, 0, (*H)->get_comm());
                
                for (int i=0; i<sizeworld; i++) {
                    recvcounts[i] = nblrworld[i];
                    if (i > 0)    displs[i] = displs[i-1] + recvcounts[i-1];
                }
                
                MPI_Gatherv(rankworld==0?MPI_IN_PLACE:bufcomp, recvcounts[rankworld], MPI_DOUBLE, bufcomp, recvcounts, displs, MPI_DOUBLE, 0, (*H)->get_comm());
                
                if (mpirank == 0 && ThePlotStream ) {
                    
                    int si = (*H)->nb_rows();
                    int sj = (*H)->nb_cols();
                    
                    theplot << si;
                    theplot << sj;
                    theplot << (long)nbdenseg;
                    theplot << (long)nblrg;
                    
                    for (int i=0;i<nbdenseg;i++) {
                        theplot << buf[4*i];
                        theplot << buf[4*i+1];
                        theplot << buf[4*i+2];
                        theplot << buf[4*i+3];
                    }
                    
                    for (int i=0;i<nblrg;i++) {
                        theplot << buflr[5*i];
                        theplot << buflr[5*i+1];
                        theplot << buflr[5*i+2];
                        theplot << buflr[5*i+3];
                        theplot << buflr[5*i+4];
                        theplot << bufcomp[i];
                    }
                    
                    theplot.SendEndPlot();
                    
                }
                delete [] buf;
                delete [] bufcomp;
                
            }
            
            return 0L;
        }
    };
    
    plotHMatrix() : OneOperator(atype<long>(),atype<HMatrixVirt<K> **>()) {}
    
    E_F0 * code(const basicAC_F0 & args) const
    {
        return  new Op(args,t[0]->CastTo(args[0]));
    }
};

template<class K>
basicAC_F0::name_and_type  plotHMatrix<K>::Op::name_param[]= {
    {  "wait", &typeid(bool)},
    {  "dim", &typeid(long)}
};

template<class T, class U, class K, char trans>
class HMatrixInv {
public:
    const T t;
    const U u;
    
    struct HMatVirt: CGMatVirt<int,K> {
        const T tt;
        
        HMatVirt(T ttt) : tt(ttt), CGMatVirt<int,K>((*ttt)->nb_rows()) {}
        K*  addmatmul(K* x,K* Ax) const { (*tt)->mvprod_global(x, Ax); return Ax;}
    };
    
    struct HMatVirtPrec: CGMatVirt<int,K> {
        const T tt;
        std::vector<K> invdiag;
        
        HMatVirtPrec(T ttt) : tt(ttt), CGMatVirt<int,K>((*ttt)->nb_rows()), invdiag((*ttt)->nb_rows(),0) {
            std::vector<const HMatrix<K>*> diagblocks = (*tt)->get_diagonal_blocks();
            std::vector<K> tmp((*ttt)->nb_rows(),0);
            for (int j=0;j<diagblocks.size();j++){
                const HMatrix<K>& submat = *(diagblocks[j]);
                const Matrix<K>& subdensemat= *submat.get_dense_data();
                int local_nr = submat.get_target_cluster().get_size();
                int local_nc = submat.get_source_cluster().get_size();
                int offset_i = submat.get_target_cluster().get_offset();
                int offset_j = submat.get_source_cluster().get_offset();
                for (int i=offset_i;i<offset_i+std::min(local_nr,local_nc);i++){
                    tmp[i] = 1./subdensemat(i-offset_i,i-offset_i);
                }
            }
            (*tt)->cluster_to_target_permutation(tmp.data(),invdiag.data());
            MPI_Allreduce(MPI_IN_PLACE, &(invdiag[0]), (*ttt)->nb_rows(), wrapper_mpi<K>::mpi_type(), MPI_SUM, (*tt)->get_comm());
        }
        
        K*  addmatmul(K* x,K* Ax) const {
            for (int i=0; i<(*tt)->nb_rows(); i++)
                Ax[i] = invdiag[i] * x[i];
            return Ax;
        }
    };
    
    HMatrixInv(T v, U w) : t(v), u(w) {}
    
    void solve(U out) const {
        HMatVirt A(t);
        HMatVirtPrec P(t);
        double eps =1e-6;
        int niterx=3000;
        bool res=fgmres(A,P,1,(K*)*u,(K*)*out,eps,niterx,niterx,(mpirank==0)*verbosity);
        //bool res=fgmres(A,P,1,(K*)*u,(K*)*out,eps,niterx,200,(mpirank==0)*verbosity);
    }
    
    static U inv(U Ax, HMatrixInv<T, U, K, trans> A) {
        A.solve(Ax);
        return Ax;
    }
    static U init(U Ax, HMatrixInv<T, U, K, trans> A) {
        Ax->init(A.u->n);
        return inv(Ax, A);
    }
};

template<class K>
class CompressMat : public OneOperator {
        public:
        class Op : public E_F0info {
                public:
                Expression a,b,c,d;

                static const int n_name_param = 7;
                static basicAC_F0::name_and_type name_param[] ;
                Expression nargs[n_name_param];
                long argl(int i,Stack stack,long a) const{ return nargs[i] ? GetAny<long>( (*nargs[i])(stack) ): a;}
                string* args(int i,Stack stack,string* a) const{ return nargs[i] ? GetAny<string*>( (*nargs[i])(stack) ): a;}
                double arg(int i,Stack stack,double a) const{ return nargs[i] ? GetAny<double>( (*nargs[i])(stack) ): a;}
                pcommworld argc(int i,Stack stack,pcommworld a ) const{ return nargs[i] ? GetAny<pcommworld>( (*nargs[i])(stack) ): a;}

                Op(const basicAC_F0 &  args,Expression aa,Expression bb,Expression cc, Expression dd) : a(aa),b(bb),c(cc),d(dd) {
                        args.SetNameParam(n_name_param,name_param,nargs);
                }
        };

        CompressMat() : OneOperator(atype<const typename CompressMat<K>::Op *>(),
                                    atype<KNM<K>*>(),
                                    atype<KN<double>*>(),
                                    atype<KN<double>*>(),
                                    atype<KN<double>*>()) {}

        E_F0 * code(const basicAC_F0 & args) const {
          return  new Op(args,t[0]->CastTo(args[0]),
                              t[1]->CastTo(args[1]),
                              t[2]->CastTo(args[2]),
                              t[3]->CastTo(args[3]));
        }
};

template<class K>
basicAC_F0::name_and_type  CompressMat<K>::Op::name_param[]= {
  {  "eps", &typeid(double)},
  {  "commworld", &typeid(pcommworld)},
  {  "eta", &typeid(double)},
  {  "minclustersize", &typeid(long)},
  {  "mintargetdepth", &typeid(long)},
  {  "minsourcedepth", &typeid(long)},
  {  "compressor", &typeid(string*)},
};

template<class K>
class MyMatrix: public VirtualGenerator<K>{
        const KNM<K> &M;

public:
        MyMatrix(const KNM<K> &mat):M(mat) {}

        K get_coef(const int& i, const int& j)const {return M(i,j);}
        void copy_submatrix(int m, int n, const int *const rows, const int *const cols, K *ptr) const {
            std::fill_n(ptr,m*n,0);
            for (int i=0;i<m;i++){
                for (int j=0;j<n;j++){
                    ptr[i+m*j]=M(rows[i],cols[j]);
                }

            }
        }
};

template<class K, int init>
AnyType SetCompressMat(Stack stack,Expression emat,Expression einter)
{ return SetCompressMat<K>(stack,emat,einter,init);}

template<class K>
AnyType SetCompressMat(Stack stack,Expression emat,Expression einter,int init)
{
  HMatrixVirt<K>** Hmat =GetAny<HMatrixVirt<K>** >((*emat)(stack));
  const typename CompressMat<K>::Op * mi(dynamic_cast<const typename CompressMat<K>::Op *>(einter));

  double epsilon=mi->arg(0,stack,ff_htoolEpsilon);
  pcommworld pcomm=mi->argc(1,stack,nullptr);
  double eta=mi->arg(2,stack,ff_htoolEta);
  int minclustersize=mi->argl(3,stack,ff_htoolMinclustersize);
  int mintargetdepth=mi->argl(4,stack,ff_htoolMintargetdepth);
  int minsourcedepth=mi->argl(5,stack,ff_htoolMinsourcedepth);
  string* pcompressor=mi->args(6,stack,0);

  string compressor = pcompressor ? *pcompressor : "partialACA";

  MPI_Comm comm = pcomm ? *(MPI_Comm*)pcomm : MPI_COMM_WORLD;

  ffassert(einter);
  KNM<K> * pM = GetAny< KNM<K> * >((* mi->a)(stack));
  KNM<K> & M = *pM;

  KN<double> * px = GetAny< KN<double> * >((* mi->b)(stack));
  KN<double> & xx = *px;

  KN<double> * py = GetAny< KN<double> * >((* mi->c)(stack));
  KN<double> & yy = *py;

  KN<double> * pz = GetAny< KN<double> * >((* mi->d)(stack));
  KN<double> & zz = *pz;

  MyMatrix<K> A(M);

  ffassert(xx.n == M.N());
  ffassert(yy.n == M.N());
  ffassert(zz.n == M.N());

  std::vector<double> p(3*xx.n);
  for (int i=0; i<xx.n; i++) {
    p[3*i+0] = xx[i];
    p[3*i+1] = yy[i];
    p[3*i+2] = zz[i];
  }
  
  int sizeWorld,rankWorld;
  MPI_Comm_size(comm, &sizeWorld);
  MPI_Comm_rank(comm, &rankWorld);
  htool::ClusterTreeBuilder<double> cluster_builder;
  std::shared_ptr<Cluster<double>> t;
  cluster_builder.set_minclustersize(minclustersize);
  t = std::make_shared<htool::Cluster<double>>(cluster_builder.create_cluster_tree(xx.n,3,p.data(),2,sizeWorld));

  //cout << M.N() << " " << xx.M() << " " << yy.n << " " << zz.n<< endl;
  if (init) delete *Hmat;
    
  auto hmatrix_builder = htool::HMatrixTreeBuilder<K, double>(*t, *t, epsilon, eta, 'N','N', -1, rankWorld,rankWorld);
  std::shared_ptr<htool::VirtualLowRankGenerator<K,double>> LowRankGenerator = nullptr;

  if ( compressor == "" || compressor == "partialACA")
    LowRankGenerator = std::make_shared<htool::partialACA<K>>();
   else if (compressor == "fullACA")
    LowRankGenerator = std::make_shared<htool::fullACA<K>>();
   else if (compressor == "SVD")
    LowRankGenerator = std::make_shared<htool::SVD<K>>();
   else {
       cerr << "Error: unknown htool compressor \""+compressor+"\"" << endl;
       ffassert(0);
   }
  hmatrix_builder.set_low_rank_generator(LowRankGenerator);
  hmatrix_builder.set_minimal_target_depth(mintargetdepth);
  hmatrix_builder.set_minimal_source_depth(minsourcedepth);
  *Hmat = new HMatrixImpl<K>(A, t,t,hmatrix_builder,comm);

  return Hmat;
}

template<class K>
void addHmat() {
   // Dcl_Type<HMatrixVirt<K>**>(Initialize<HMatrixVirt<K>*>, Delete<HMatrixVirt<K>*>);
    Dcl_TypeandPtr<HMatrixVirt<K>*>(0,0,::InitializePtr<HMatrixVirt<K>*>,::DeletePtr<HMatrixVirt<K>*>);
    //atype<HMatrix<LR ,K>**>()->Add("(","",new OneOperator2_<string*, HMatrix<LR ,K>**, string*>(get_infos<LR,K>));
    
    Add<HMatrixVirt<K>**>("infos",".",new OneOperator1_<std::map<std::string, std::string>*, HMatrixVirt<K>**>(get_infos));
    
    Dcl_Type<Prod<KN<K>*, K>>();
    TheOperators->Add("*", new OneOperator2<Prod<KN<K>*, K>, HMatrixVirt<K>**, KN<K>*>(Build));
    TheOperators->Add("=", new OneOperator2<KN<K>*, KN<K>*, Prod<KN<K>*, K>>(Prod<KN<K>*, K>::mv));
    TheOperators->Add("<-", new OneOperator2<KN<K>*, KN<K>*, Prod<KN<K>*, K>>(Prod<KN<K>*, K>::init));
    
    addInv<HMatrixVirt<K>*, HMatrixInv, KN<K>, K>();
    
    Global.Add("display","(",new plotHMatrix<K>);
    
    // to dense:
    TheOperators->Add("=",
                      new OneOperator2_<KNM<K>*, KNM<K>*, HMatrixVirt<K>**,E_F_StackF0F0>(To<KNM<K>, K, 1>));
    TheOperators->Add("<-",
                      new OneOperator2_<KNM<K>*, KNM<K>*, HMatrixVirt<K>**,E_F_StackF0F0>(To<KNM<K>, K, 0>));
    Dcl_Type<const typename CompressMat<K>::Op *>();
    //Add<const typename assembleHMatrix<LR, K>::Op *>("<-","(", new assembleHMatrix<LR, K>);

    TheOperators->Add("=",
    new OneOperator2_<HMatrixVirt<K>**,HMatrixVirt<K>**,const typename CompressMat<K>::Op*,E_F_StackF0F0>(SetCompressMat<K, 1>));
    TheOperators->Add("<-",
    new OneOperator2_<HMatrixVirt<K>**,HMatrixVirt<K>**,const typename CompressMat<K>::Op*,E_F_StackF0F0>(SetCompressMat<K, 0>));

    Global.Add("compress","(",new CompressMat<K>);
}

template<class R, class MMesh, class v_fes1, class v_fes2>
struct OpHMatrixtoBEMForm
: public OneOperator
{
    typedef typename Call_FormBilinear<v_fes1,v_fes2>::const_iterator const_iterator;
    int init;
    class Op : public E_F0mps {
    public:
        Call_FormBilinear<v_fes1,v_fes2> *b;
        Expression a;
        int init;
        AnyType operator()(Stack s)  const ;
        
        Op(Expression aa,Expression  bb,int initt)
        : b(new Call_FormBilinear<v_fes1,v_fes2>(* dynamic_cast<const Call_FormBilinear<v_fes1,v_fes2> *>(bb))),a(aa),init(initt)
        { 
            assert(b && b->nargs);

            // Check the nbitem of inconnu and test in BemFormBilinear
            checkNbItemFEspacesInconnuAndTest(b->largs,b->N,b->M);
        }
        operator aType () const { return atype<HMatrixVirt<R> **>();}
        
    };
    
    E_F0 * code(const basicAC_F0 & args) const
    {
        Expression p=args[1];
        Call_FormBilinear<v_fes1,v_fes2> *t( new Call_FormBilinear<v_fes1,v_fes2>(* dynamic_cast<const Call_FormBilinear<v_fes1,v_fes2> *>(p))) ;
        return  new Op(to<HMatrixVirt<R> **>(args[0]),args[1],init);}
  
     OpHMatrixtoBEMForm(int initt=0) :
     OneOperator(atype<HMatrixVirt<R> **>(),atype<HMatrixVirt<R> **>(),atype<const Call_FormBilinear<v_fes1,v_fes2>*>()),
        init(initt)
        {}
    
};

// the operator
template<class R,class MMesh, class v_fes1,class v_fes2>
AnyType OpHMatrixtoBEMForm<R,MMesh,v_fes1,v_fes2>::Op::operator()(Stack stack)  const
{
    typedef typename v_fes1::pfes pfes1;
    typedef typename v_fes2::pfes pfes2;
    typedef typename v_fes1::FESpace FESpace1;
    typedef typename v_fes2::FESpace FESpace2;
    typedef typename FESpace1::Mesh SMesh;
    typedef typename FESpace2::Mesh TMesh;
    typedef typename SMesh::RdHat SRdHat;
    typedef typename TMesh::RdHat TRdHat;
    
    
    typedef typename std::conditional<SMesh::RdHat::d==1,Mesh1D,Mesh2D>::type MeshBemtool;
    typedef typename std::conditional<SMesh::RdHat::d==1,P0_1D,P0_2D>::type P0;
    typedef typename std::conditional<SMesh::RdHat::d==1,P1_1D,P1_2D>::type P1;
    typedef typename std::conditional<SMesh::RdHat::d==1,P2_1D,P2_2D>::type P2;

    assert(b && b->nargs);
    const list<C_F0> & largs=b->largs;
    
    // FE space
    pfes1  * pUh= GetAny<pfes1 *>((*b->euh)(stack));
    FESpace1 * Uh = **pUh;
    
    pfes2  * pVh= GetAny<pfes2 *>((*b->evh)(stack));
    FESpace2 * Vh = **pVh;

    int NUh =Uh->N;
    int NVh =Vh->N;

    ffassert(Vh);
    ffassert(Uh);
    
    int n=Uh->NbOfDF;
    int m=Vh->NbOfDF;
    
    // VFBEM =1 kernel VF   =2 Potential VF
    int VFBEM = typeVFBEM(largs,stack);
    if (mpirank == 0 && verbosity>5)
        cout << "test VFBEM type (1 kernel / 2 potential) "  << VFBEM << endl;
    
 
    HMatrixVirt<R>** Hmat =GetAny<HMatrixVirt<R>** >((*a)(stack));
    
    // info about HMatrix and type solver
    Data_Bem_Solver ds;
    ds.factorize=0;
    ds.initmat=true;
    SetEnd_Data_Bem_Solver<R>(stack,ds, b->nargs,OpCall_FormBilinear_np::n_name_param);  // LIST_NAME_PARM_HMAT
    WhereStackOfPtr2Free(stack)=new StackOfPtr2Free(stack);

    bool samemesh = (void*)&Uh->Th == (void*)&Vh->Th;  // same Fem2D::Mesh     +++ pot or kernel
    if (VFBEM==1)
        ffassert (samemesh);
     if(init)
        *Hmat =0;
      *Hmat =0;
    if( *Hmat)
        delete *Hmat;
    *Hmat =0;

    creationHMatrixtoBEMForm<R,MMesh,FESpace1,FESpace2>( Uh, Vh, VFBEM, largs, stack, ds, Hmat);

    return Hmat;
}

bool C_args::IsBemBilinearOperator() const {
for (const_iterator i=largs.begin(); i != largs.end();i++) {
    C_F0  c= *i;
    aType r=c.left();
    if  ( r!= atype<const class BemFormBilinear *>() )
         return false;
}
return true;}

// bem + fem
bool C_args::IsMixedBilinearOperator() const {
if ( this->IsBilinearOperator() && this->IsBemBilinearOperator()  ) return true;
    
return false;}






EquationEnum whatEquationEnum(BemKernel *K,int i) {
    int nk=2; // restriction, max 2 kernels for combined
    int type[2]={-1,-1};
    EquationEnum equation[3] = {LA,HE,YU};
    double wavenumRe, wavenumImag;
        
    for (int i=0;i<nk;i++) {
      wavenumRe=K->wavenum[i].real(); wavenumImag=K->wavenum[i].imag();
      if(wavenumRe==0 && wavenumImag==0) type[i] = 0;
      else if(wavenumRe>0 && wavenumImag==0) type[i] = 1;
      else if(wavenumRe==0 && wavenumImag>0) type[i] = 2;
    }
    if(type[0]-type[1]) {
      cerr << "Error, the combined kernels must be the same equation " << equation[type[0]] << " " << equation[type[1]] << endl;
      throw(ErrorExec("exit",1));}
    
    const EquationEnum equEnum = equation[0];
    return equEnum;
    
}
//  Begin Array of HMatrix[int]
template< class A >
inline AnyType DestroyKNmat(Stack, const AnyType &x) {
  KN< A > *a = GetAny< KN< A > * >(x);
  for (int i = 0; i < a->N( ); i++)
    if ((*a)[i]) delete (*a)[i];
  a->destroy( );
  return Nothing;
}

template< class RR, class A, class B >
RR *get_elementp_(const A &a, const B &b) {
  if (b < 0 || a->N( ) <= b) {
    cerr << " Out of bound  0 <=" << b << " < " << a->N( ) << " array type = " << typeid(A).name( )
         << endl;
    ExecError("Out of bound in operator []");
  }
  return &((*a)[b]);
}

template< class R >
R *set_initinit(R *const &a, const long &n) {
  SHOWVERB(cout << " set_init " << typeid(R).name( ) << " " << n << endl);
  a->init(n);
  for (int i = 0; i < n; i++) (*a)[i] = 0;
  return a;
}

template<class R>
void ArrayofHmat()
{
    typedef HMatrixVirt< R > *Mat;
    typedef Mat *PMat;
    typedef KN< Mat > AMat;

    Dcl_Type< AMat * >(0, ::DestroyKNmat< Mat >);
    // to declare HMatrix[int]
    map_type_of_map[make_pair(atype< long >( ), atype< PMat >( )->right())] = atype< AMat * >( );
    atype<  AMat * >( )->Add(
      "[", "",
      new OneOperator2_<  PMat, AMat *, long >(get_elementp_< Mat, AMat *, long >));

    TheOperators->Add("<-", new OneOperator2_< AMat*, AMat* , long >(&set_initinit));
  

    // resize mars 2006 v2.4-1
    Dcl_Type< Resize< AMat > >( );
    Add< AMat *>("resize", ".",
                         new OneOperator1< Resize< AMat >, AMat * >(to_Resize));
    Add< Resize< AMat > >(
      "(", "", new OneOperator2_< AMat*, Resize< AMat >, long >(resizeandclean1));

}
//  End Array of HMatrix[int]

template<class R, class v_fes1, class v_fes2>
class OpHMatrixUser : public OneOperator
{
    public:
        typedef typename v_fes1::pfes pfes1;
        typedef typename v_fes2::pfes pfes2;
        class Op : public E_F0info {
            public:
                Expression g, uh1, uh2;
                static const int n_name_param = 8;
                static basicAC_F0::name_and_type name_param[] ;
                Expression nargs[n_name_param];
                long argl(int i,Stack stack,long a) const{ return nargs[i] ? GetAny<long>( (*nargs[i])(stack) ): a;}
                string* args(int i,Stack stack,string* a) const{ return nargs[i] ? GetAny<string*>( (*nargs[i])(stack) ): a;}
                double arg(int i,Stack stack,double a) const{ return nargs[i] ? GetAny<double>( (*nargs[i])(stack) ): a;}
                pcommworld argc(int i,Stack stack,pcommworld a ) const{ return nargs[i] ? GetAny<pcommworld>( (*nargs[i])(stack) ): a;}
                Op(const basicAC_F0 &  args, Expression  bb, Expression  cc, Expression  dd) : g(bb),uh1(cc), uh2(dd) {
                    args.SetNameParam(n_name_param,name_param,nargs);
                }
        };

        E_F0 * code(const basicAC_F0 & args) const {
            return  new Op(args,t[0]->CastTo(args[0]),t[1]->CastTo(args[1]),t[2]->CastTo(args[2]));
        }

        OpHMatrixUser() :
        OneOperator(atype<const typename OpHMatrixUser<R,v_fes1,v_fes2>::Op*>(),atype<VirtualGenerator<R>**>(),atype<pfes1*>(),atype<pfes2*>()) {}
};

template<class K, class v_fes1, class v_fes2>
basicAC_F0::name_and_type  OpHMatrixUser<K,v_fes1,v_fes2>::Op::name_param[]= {
  {  "eps", &typeid(double)},
  {  "commworld", &typeid(pcommworld)},
  {  "eta", &typeid(double)},
  {  "minclustersize", &typeid(long)},
  {  "mintargetdepth", &typeid(long)},
  {  "minsourcedepth", &typeid(long)},
  {  "compressor", &typeid(string*)},
  {  "initialclustering", &typeid(string*)}
};

template<class R, class v_fes1,class v_fes2, int init>
AnyType SetOpHMatrixUser(Stack stack,Expression emat, Expression eop)
{
    typedef typename v_fes1::pfes pfes1;
    typedef typename v_fes2::pfes pfes2;
    typedef typename v_fes1::FESpace FESpace1;
    typedef typename v_fes2::FESpace FESpace2;
    typedef typename FESpace1::Mesh SMesh;
    typedef typename FESpace2::Mesh TMesh;
    typedef typename SMesh::RdHat SRdHat;
    typedef typename TMesh::RdHat TRdHat;

    typedef typename OpHMatrixUser<R,v_fes1,v_fes2>::Op UOp;
    const UOp * op(dynamic_cast<const UOp *>(eop));
    pfes1  * pUh= GetAny<pfes1 *>((*op->uh1)(stack));
    FESpace1 * Uh = **pUh;
    int NUh =Uh->N;
    pfes2  * pVh= GetAny<pfes2 *>((*op->uh2)(stack));
    FESpace2 * Vh = **pVh;
    int NVh =Vh->N;
    ffassert(Vh);
    ffassert(Uh);

    int m=Uh->NbOfDF;
    int n=Vh->NbOfDF;

    HMatrixVirt<R>** Hmat =GetAny<HMatrixVirt<R>** >((*emat)(stack));

    Data_Bem_Solver ds;
    ds.factorize=0;
    ds.initmat=true;

    ds.epsilon = op->arg(0,stack,ds.epsilon);
    ds.commworld = op->argc(1,stack,ds.commworld);
    ds.eta = op->arg(2,stack,ds.eta);
    ds.minclustersize = op->argl(3,stack,ds.minclustersize);
    ds.mintargetdepth = op->argl(4,stack,ds.mintargetdepth);
    ds.minsourcedepth = op->argl(5,stack,ds.minsourcedepth);
    ds.compressor = *(op->args(6,stack,&ds.compressor));
    ds.initialclustering = *(op->args(7,stack,&ds.initialclustering));

    const SMesh & ThU =Uh->Th;
    const TMesh & ThV =Vh->Th;
    bool samemesh = (void*)&Uh->Th == (void*)&Vh->Th;

     if(init)
        *Hmat =0;
      *Hmat =0;
    if( *Hmat)
            delete *Hmat;

    *Hmat =0;

    vector<double> pt(3*n);
    vector<double> ps(3*m);
    Fem2D::R3 pp;
    bemtool::R3 p;
    SRdHat pbs;
    TRdHat pbt;
    pbs[0] = 1./(SRdHat::d+1);
    pbs[1] = 1./(SRdHat::d+1);
    if (SRdHat::d == 3) pbs[2] = 1./(SRdHat::d+1);
    pbt[0] = 1./(TRdHat::d+1);
    pbt[1] = 1./(TRdHat::d+1);
    if (TRdHat::d == 3) pbt[2] = 1./(TRdHat::d+1);

    int Snbv = Uh->TFE[0]->ndfonVertex;
    int Snbe = Uh->TFE[0]->ndfonEdge;
    int Snbt = Uh->TFE[0]->ndfonFace;
    bool SP0 = SRdHat::d == 1 ? (Snbv == 0) && (Snbe == 1) && (Snbt == 0) : (Snbv == 0) && (Snbe == 0) && (Snbt == 1);
    bool SP1 = (Snbv == 1) && (Snbe == 0) && (Snbt == 0);

    int Tnbv = Vh->TFE[0]->ndfonVertex;
    int Tnbe = Vh->TFE[0]->ndfonEdge;
    int Tnbt = Vh->TFE[0]->ndfonFace;
    bool TP0 = TRdHat::d == 1 ? (Tnbv == 0) && (Tnbe == 1) && (Tnbt == 0) : (Tnbv == 0) && (Tnbe == 0) && (Tnbt == 1);
    bool TP1 = (Tnbv == 1) && (Tnbe == 0) && (Tnbt == 0);

    for (int i=0; i<m; i++) {
        if (SP1)
            pp = ThU.vertices[i];
        else if (SP0)
            pp = ThU[i](pbs);
        else {
            if (mpirank == 0) std::cerr << "ff-Htool error: only P0 and P1 discretizations are available for now." << std::endl;
            ffassert(0);
        }
        ps[3*i+0] = pp.x;
        ps[3*i+1] = pp.y;
        ps[3*i+2] = pp.z;
    }

    if(!samemesh) {
        for (int i=0; i<n; i++) {
            if (TP1)
                pp = ThV.vertices[i];
            else if (TP0)
                pp = ThV[i](pbt);
            else {
                if (mpirank == 0) std::cerr << "ff-Htool error: only P0 and P1 discretizations are available for now." << std::endl;
                ffassert(0);
            }
            pt[3*i+0] = pp.x;
            pt[3*i+1] = pp.y;
            pt[3*i+2] = pp.z;
        }
    }
    else{
        pt=ps;
    }

    VirtualGenerator<R>** generator = GetAny<VirtualGenerator<R>**>((*op->g)(stack));

    MPI_Comm comm = ds.commworld ? *(MPI_Comm*)ds.commworld : MPI_COMM_WORLD;
    std::shared_ptr<Cluster<double>> t, s;
    s = build_clustering(m, Uh, ps, ds, comm);
    t = build_clustering(n, Vh, pt, ds, comm);
    buildHmat(Hmat, *generator, ds, t, s, pt, ps, comm);

    return Hmat;
}

template<class R, class v_fes1, class v_fes2>
void AddHMatrixUser() {
    typedef typename OpHMatrixUser< R, v_fes1, v_fes2 >::Op OpT;
    Dcl_Type<const OpT*>();
    Global.Add("Build","(", new OpHMatrixUser< R, v_fes1, v_fes2 >);
    Add<const OpT*>("<-","(", new OpHMatrixUser< R, v_fes1, v_fes2 >);
    TheOperators->Add("<-", new OneOperator2_<HMatrixVirt<R>**,HMatrixVirt<R>**,const OpT*,E_F_StackF0F0>(SetOpHMatrixUser<R,v_fes1,v_fes2,0>));
    TheOperators->Add("=", new OneOperator2_<HMatrixVirt<R>**,HMatrixVirt<R>**,const OpT*,E_F_StackF0F0>(SetOpHMatrixUser<R,v_fes1,v_fes2,1>));
}

static void Init_Bem() {
    if(mpirank == 0 && verbosity > 0) cout << "\nInit_Bem\n";

    map_type[typeid(const BemFormBilinear *).name( )] = new TypeFormBEM;
    
    map_type[typeid(const BemKFormBilinear *).name( )] = new ForEachType< BemKFormBilinear >;
    map_type[typeid(const BemPFormBilinear *).name( )] = new ForEachType< BemPFormBilinear >;
    
    basicForEachType *t_BEM = atype< const C_args * >( ); //atype< const BemFormBilinear * >( );
    basicForEachType *t_fbem = atype< const BemFormBilinear * >( );
    aType t_C_args = map_type[typeid(const C_args *).name( )];
    atype< const C_args * >( )->AddCast(new OneOperatorCode< C_args >(t_C_args, t_fbem) );    // bad
    
    typedef  const BemKernel fkernel;
    typedef  const BemPotential fpotential;
    // new type for bem
    typedef const BemKernel *pBemKernel;
    typedef const BemPotential *pBemPotential;
    
    
   // Dcl_Type< fkernel * >( );  // a bem kernel
   // Dcl_Type< fpotential * >( ); // a bem potential
    Dcl_Type< const OP_MakeBemKernelFunc::Op * >( );
    Dcl_Type< const OP_MakeBemPotentialFunc::Op * >( );
    
    Dcl_TypeandPtr< pBemKernel >(0, 0, ::InitializePtr< pBemKernel >, ::DestroyPtr< pBemKernel >,
                                 AddIncrement< pBemKernel >, NotReturnOfthisType);
    // pBemPotential initialize
    Dcl_TypeandPtr< pBemPotential >(0, 0, ::InitializePtr< pBemPotential >, ::DestroyPtr< pBemPotential >,
                                    AddIncrement< pBemPotential >, NotReturnOfthisType);
 
    
    zzzfff->Add("BemKernel", atype< pBemKernel * >( ));
    zzzfff->Add("BemPotential", atype< pBemPotential * >( ));
    
     // pBemKernel initialize
    atype<pBemKernel>()->AddCast( new E_F1_funcT<pBemKernel,pBemKernel*>(UnRef<pBemKernel>));
    // BemPotential
    atype<pBemPotential>()->AddCast( new E_F1_funcT<pBemPotential,pBemPotential*>(UnRef<pBemPotential>));
   
    
    // simplified type/function to define varf bem
    Dcl_Type< const FoperatorKBEM * >( );
    Dcl_Type< const FoperatorPBEM * >( );
    Dcl_Type<std::map<std::string, std::string>*>( );
        
    TheOperators->Add("<<",new OneBinaryOperator<PrintPinfos<std::map<std::string, std::string>*>>);
    Add<std::map<std::string, std::string>*>("[","",new OneOperator2_<string*, std::map<std::string, std::string>*, string*>(get_info));

    addHmat<double>();
    addHmat<std::complex<double>>();

    //BemKernel
    TheOperators->Add("<-", new OneOperatorCode< OP_MakeBemKernel >);
    //BemPotential
    TheOperators->Add("<-", new OneOperatorCode< OP_MakeBemPotential >);
   
    zzzfff->Add("HMatrix", atype<HMatrixVirt<double> **>());
    map_type_of_map[make_pair(atype<HMatrixVirt<double>**>(), atype<double*>())] = atype<HMatrixVirt<double>**>();
    map_type_of_map[make_pair(atype<HMatrixVirt<double>**>(), atype<Complex*>())] = atype<HMatrixVirt<std::complex<double> >**>();
    // bem integration space/target space must be review Axel 08/2020    
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fesS > (1) );
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fesS > );
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fes3 > (1) );
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fes3 > );
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fesL > (1) );
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fesL > );
       
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fes > (1) );
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fes > );
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fesS > (1) );
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshL, v_fesL, v_fesS > );

    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fes > (1));
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fes > );
    TheOperators->Add("<-", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fesL > (1));
    TheOperators->Add("=", new OpHMatrixtoBEMForm< std::complex<double>, MeshS, v_fesS, v_fesL > );

    // operation on BemKernel
    Dcl_Type<listBemKernel> ();
    TheOperators->Add("+",new OneBinaryOperator_st< Op_addBemKernel<listBemKernel,pBemKernel,pBemKernel> >);
    //TheOperators->Add("+",new OneBinaryOperator_st< Op_addBemKernel<listBemKernel,listBemKernel,pBemKernel> >); // no need is the combinaison is only with 2 kernels

    TheOperators->Add("=",new OneBinaryOperator_st< Op_setBemKernel<false,pBemKernel*,pBemKernel*,pBemKernel> >);
    TheOperators->Add("<-", new OneBinaryOperator_st< Op_setBemKernel<true,pBemKernel*,pBemKernel*,pBemKernel> >);

    TheOperators->Add("=",new OneBinaryOperator_st< Op_setCombBemKernel<false,pBemKernel*,pBemKernel*,listBemKernel> >);
    TheOperators->Add("<-", new OneBinaryOperator_st< Op_setCombBemKernel<true,pBemKernel*,pBemKernel*,listBemKernel> >);

    TheOperators->Add("*",new OneBinaryOperator_st< Op_coeffBemKernel1<pBemKernel,Complex,pBemKernel> >);
    
    Dcl_Type< const CBemDomainOfIntegration * >( );
    Dcl_Type< const CPartBemDI * >( );
    
    Add< const CPartBemDI * >("(", "", new OneOperatorCode< CBemDomainOfIntegration >);
    
    Add< const CBemDomainOfIntegration * >("(", "", new OneOperatorCode< BemKFormBilinear >);
    Add< const CDomainOfIntegration * >("(", "", new OneOperatorCode< BemPFormBilinear >);
        
    Global.Add("BEM","(",new FormalKBEMcode);
    Global.Add("BEM","(",new FormalKBEMcodeArray);
    Global.Add("POT","(",new FormalPBEMcode);
    Global.Add("POT","(",new FormalPBEMcodeArray);
 //   Global.Add("Kernel","(",new FormalBemKernel);
    Add< pBemKernel >("<--","(",new FormalBemKernel);
//    Global.Add("Potential","(",new FormalBemPotential);
    Add< pBemPotential >("<--","(",new FormalBemPotential);

    Global.Add("int2dx2d","(",new OneOperatorCode<CPartBemDI2d2d>);
    Global.Add("int1dx1d","(",new OneOperatorCode<CPartBemDI1d1d>);
    Global.Add("int1dx2d","(",new OneOperatorCode<CPartBemDI1d2d>);
    Global.Add("int2dx1d","(",new OneOperatorCode<CPartBemDI2d1d>);

    Global.New("htoolEta",CPValue<double>(ff_htoolEta));
    Global.New("htoolEpsilon",CPValue<double>(ff_htoolEpsilon));
    Global.New("htoolMinclustersize",CPValue<long>(ff_htoolMinclustersize));
    Global.New("htoolMintargetdepth",CPValue<long>(ff_htoolMintargetdepth));
    Global.New("htoolMinsourcedepth",CPValue<long>(ff_htoolMinsourcedepth));
    ArrayofHmat<double>();
    ArrayofHmat<complex<double>>();

    // Build HMatrix from custom user generator
    Dcl_TypeandPtr< VirtualGenerator<double>* >(0, 0,::InitializePtr< VirtualGenerator<double>* >, ::DeletePtr< VirtualGenerator<double>*> );
    Dcl_TypeandPtr< VirtualGenerator<std::complex<double>>* >(0, 0,::InitializePtr< VirtualGenerator<std::complex<double>>* >, ::DeletePtr< VirtualGenerator<std::complex<double>>*> );
    zzzfff->Add("Generator", atype<VirtualGenerator<double>** >( ));
    map_type_of_map[make_pair(atype<VirtualGenerator<double>**>(), atype<double*>())] = atype<VirtualGenerator<double>**>();
    map_type_of_map[make_pair(atype<VirtualGenerator<double>**>(), atype<Complex*>())] = atype<VirtualGenerator<std::complex<double> >**>();

    AddHMatrixUser<double,v_fesL, v_fesL>();
    AddHMatrixUser<double,v_fesS, v_fesS>();
    AddHMatrixUser<double,v_fes3, v_fes3>();
    AddHMatrixUser<std::complex<double>,v_fesL, v_fesL>();
    AddHMatrixUser<std::complex<double>,v_fesS, v_fesS>();
    AddHMatrixUser<std::complex<double>,v_fes3, v_fes3>();
    AddHMatrixUser<double,v_fesL, v_fesS>();
    AddHMatrixUser<double,v_fesS, v_fes3>();
    AddHMatrixUser<std::complex<double>,v_fesL, v_fesS>();
    AddHMatrixUser<std::complex<double>,v_fesS, v_fes3>();
}

LOADFUNC(Init_Bem)

