Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RNS double to fully support moduli beyond 26 bits #383

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion autotune/winograd.C
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int main () {
typedef FIELD Field;
Field F(17);
typedef Field::Element Element ;
size_t n=512, nmax=4000, prec=512, nbest=0, count=0;
size_t n=512, nmax=10000, prec=512, nbest=0, count=0;
TTimer chrono;
bool bound=false;

Expand Down
3 changes: 2 additions & 1 deletion benchmarks/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ endif

PERFPUBLISHERFILE=benchmarks-report.xml

FFLA_BENCH = benchmark-fgemm benchmark-fgemm-rns benchmark-wino benchmark-ftrsm benchmark-fgesv benchmark-ftrsv benchmark-ftrtri benchmark-inverse benchmark-fsytrf benchmark-fsyrk benchmark-lqup benchmark-fsyr2k benchmark-pluq benchmark-charpoly benchmark-charpoly-mp benchmark-fgemm-mp benchmark-fgemv-mp benchmark-ftrsm-mp benchmark-lqup-mp benchmark-checkers benchmark-fadd-lvl2 benchmark-fdot benchmark-fgemv benchmark-quasisep benchmark-sss benchmark-storage-transpose benchmark-qscomp
FFLA_BENCH = benchmark-fgemm benchmark-fgemm-rns benchmark-wino benchmark-ftrsm benchmark-fgesv benchmark-ftrsv benchmark-ftrtri benchmark-inverse benchmark-fsytrf benchmark-fsyrk benchmark-lqup benchmark-fsyr2k benchmark-pluq benchmark-charpoly benchmark-charpoly-mp benchmark-fgemm-mp benchmark-fgemv-mp benchmark-ftrsm-mp benchmark-lqup-mp benchmark-checkers benchmark-fadd-lvl2 benchmark-fdot benchmark-fgemv benchmark-quasisep benchmark-sss benchmark-storage-transpose benchmark-qscomp benchmark-rns-double

BLAS_BENCH = benchmark-sgemm$(EXEEXT) benchmark-dgemm benchmark-dtrsm
LAPA_BENCH = benchmark-dtrtri benchmark-dgetri benchmark-dgetrf benchmark-dsytrf
Expand Down Expand Up @@ -87,6 +87,7 @@ benchmark_fadd_lvl2_SOURCES = benchmark-fadd-lvl2.C
benchmark_fdot_SOURCES = benchmark-fdot.C
benchmark_fgemv_SOURCES = benchmark-fgemv.C
benchmark_storage_transpose_SOURCES = benchmark-storage-transpose.C
benchmark_rns_double_SOURCES = benchmark-rns-double.C

benchmark_sgemm_CXXFLAGS = $(AM_CXXFLAGS) -D__SGEMM__

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark-dtrsm.C
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ int main(int argc, char** argv) {
// -----------
// Standard output for benchmark - Alexis Breust 2014/11/14
std::cout << "Time: " << time / double(iter)
<< " Gfops: " << (2.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter) / 3.;
<< " Gfops: " << (1.*double(n)/1000.*double(n)/1000.*double(n)/1000.0) / time * double(iter) ;
FFLAS::writeCommandString(std::cout, as) << std::endl;

return 0;
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/benchmark-fgemm-mp.C
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
// everywhere in the call stack
#define __FFLASFFPACK_OPENBLAS_NT_ALREADY_SET 1

#define INTEGER_NO_RNS 1
//#define INTEGER_NO_RNS 1
#define PROFILE_FGEMM_MP

#if not defined(MG_DEFAULT)
#define MG_DEFAULT MG_ACTIVE
Expand Down Expand Up @@ -88,7 +89,7 @@ static Argument as[] = {
};

template<typename Ints>
int tmain(){
int tmain(){
srand( (int)seed);
srand48(seed);
Givaro::Integer::seeding(seed);
Expand Down Expand Up @@ -183,17 +184,16 @@ int tmain(){
fmpz_mat_clear(BB);
#endif
//END FLINT CODE //
using FFLAS::CuttingStrategy::Recursive;
using FFLAS::StrategyParameter::TwoDAdaptive;
//using FFLAS::CuttingStrategy::Recursive;
//using FFLAS::StrategyParameter::TwoDAdaptive;
// RNS MUL_LA
chrono.clear();chrono.start();
// PAR_BLOCK{
// FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc, SPLITTER(NUM_THREADS,Recursive,TwoDAdaptive) );
// }
{
FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential());
FFLAS::fgemm(F,FFLAS::FflasNoTrans,FFLAS::FflasNoTrans,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,FFLAS::ParSeqHelper::Sequential());
}

chrono.stop();
time+=chrono.realtime();

Expand Down Expand Up @@ -234,7 +234,7 @@ int tmain(){
int main(int argc, char** argv){

#ifdef __FFLASFFPACK_OPENBLAS_NUM_THREADS
openblas_set_num_threads(__FFLASFFPACK_OPENBLAS_NUM_THREADS);
openblas_set_num_threads(__FFLASFFPACK_OPENBLAS_NUM_THREADS);
#endif

FFLAS::parseArguments(argc,argv,as);
Expand Down
18 changes: 12 additions & 6 deletions benchmarks/benchmark-fgemm.C
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
//#define WINO_PARALLEL_TMPS
//#define __FFLASFFPACK_FORCE_SEQ
//#define PFGEMM_WINO_SEQ 32
//#define CLASSIC_SEQ
#define CLASSIC_HYBRID
#define CLASSIC_SEQ
//#define CLASSIC_HYBRID
//#define WINO_SEQ
//#define FFT_PROFILER
//#define PROFILE_FGEMM_MP
Expand Down Expand Up @@ -82,15 +82,18 @@ int main(int argc, char** argv) {
// typedef Givaro::Modular<Givaro::Integer> Field;
// typedef Givaro::Modular<int64_t> Field;
typedef Givaro::Modular<double> Field;
// typedef Givaro::Modular<float> Field;
//typedef Givaro::Modular<float> Field;
// typedef Givaro::ModularBalanced<float> Field;
// typedef Givaro::ModularBalanced<double> Field;
//typedef Givaro::ModularBalanced<double> Field;
// typedef Givaro::ModularBalanced<int64_t> Field;
// typedef Givaro::Modular<Givaro::Integer> Field;
//typedef Givaro::DoubleDomain Field;


typedef Field::Element Element;

Field F(q);
if (q > F.maxCardinality()) return 1;
//if (q > F.maxCardinality()) return 1;

Timer chrono, TimFreivalds;
double timev=0.0;
Expand All @@ -99,6 +102,8 @@ int main(int argc, char** argv) {
Element * A, * B, * C;

Field::RandIter G(F);
//G.setBitsize(20);

A = fflas_new(F,m,k,Alignment::CACHE_PAGESIZE);
//#pragma omp parallel for collapse(2) schedule(runtime)
PAR_BLOCK { pfrand(F,G, m,k,A,m/size_t(NBK)); }
Expand All @@ -112,6 +117,7 @@ int main(int argc, char** argv) {
PAR_BLOCK { pfzero(F, m,n,C,m/NBK); }



for (size_t i=0;i<=iter;++i){

chrono.clear();
Expand Down Expand Up @@ -205,7 +211,7 @@ int main(int argc, char** argv) {
}
else{

MMHelper<Field,MMHelperAlgo::Winograd>//,
MMHelper<Field,MMHelperAlgo::Winograd>//,
//typename FieldTraits<Field>::value,
//ParSeqHelper::Sequential>
WH (F, nbw, ParSeqHelper::Sequential());
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/benchmark-ftrsm.C
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ int main(int argc, char** argv) {
FFLAS::ReadMatrix (file1.c_str(),F,m,m,A);
}
else{
A = FFLAS::fflas_new (F,m,m,Alignment::CACHE_PAGESIZE);
A = FFLAS::fflas_new (F,m,m,Alignment::CACHE_PAGESIZE);
PAR_BLOCK{ FFLAS::pfrand(F,G,m,m,A,m/NBK); }

for (size_t k=0;k<(size_t)m;++k)
Expand All @@ -102,10 +102,10 @@ for (size_t i=0;i<=iter;++i){
if (i) chrono.start();

if (!p){
FFLAS::ParSeqHelper::Sequential H;
FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
m,n, F.one, A, m, B, n, H);
FFLAS::ParSeqHelper::Sequential H;
FFLAS::ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower,
FFLAS::FflasNoTrans, FFLAS::FflasNonUnit,
m,n, F.one, A, m, B, n, H);
}
else{
FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Block,FFLAS::StrategyParameter::Threads> PSH(t);
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/benchmark-pluq.C
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ int main(int argc, char** argv) {
if (slab)
R = FFPACK::LUdivine (F, diag, FFLAS::FflasNoTrans, m, n, A, n, P, Q);
else
R = FFPACK::PLUQ(F, diag, m, n, A, n, P, Q);
R = FFPACK::PLUQ(F, diag, m, n, A, n, P, Q);
}
if (i) {chrono.stop(); time[i-1]=chrono.realtime();}

Expand Down
Loading
Loading