C++ Mathematical Expression Library (ExprTk) http://www.partow.net/programming/exprtk/index.html

This commit is contained in:
Arash Partow 2016-08-27 13:31:09 +10:00
parent 288644de2d
commit 379317db93
1 changed files with 397 additions and 252 deletions

View File

@ -4221,14 +4221,15 @@ namespace exprtk
namespace loop_unroll namespace loop_unroll
{ {
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
const std::size_t loop_batch_size = 8; const std::size_t global_loop_batch_size = 16;
#else #else
const std::size_t loop_batch_size = 4; const std::size_t global_loop_batch_size = 4;
#endif #endif
struct details struct details
{ {
details(const std::size_t& vsize) details(const std::size_t& vsize,
const std::size_t loop_batch_size = global_loop_batch_size)
: batch_size(loop_batch_size), : batch_size(loop_batch_size),
remainder (vsize % batch_size), remainder (vsize % batch_size),
upper_bound(static_cast<int>(vsize - (remainder ? loop_batch_size : 0))) upper_bound(static_cast<int>(vsize - (remainder ? loop_batch_size : 0)))
@ -7406,34 +7407,49 @@ namespace exprtk
char* s1 = const_cast<char*>(str1_base_ptr_->base() + str1_r0); char* s1 = const_cast<char*>(str1_base_ptr_->base() + str1_r0);
loop_unroll::details lud(max_size); loop_unroll::details lud(max_size);
int i = 0; const char* upper_bound = s0 + lud.upper_bound;
for (; i < lud.upper_bound; i += lud.batch_size) while (s0 < upper_bound)
{ {
std::swap(s0[i ], s1[i ]); #define exprtk_loop(N) \
std::swap(s0[i + 1], s1[i + 1]); std::swap(s0[N], s1[N]); \
std::swap(s0[i + 2], s1[i + 2]);
std::swap(s0[i + 3], s1[i + 3]); exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
std::swap(s0[i + 4], s1[i + 4]); exprtk_loop( 4) exprtk_loop( 5)
std::swap(s0[i + 5], s1[i + 5]); exprtk_loop( 6) exprtk_loop( 7)
std::swap(s0[i + 6], s1[i + 6]); exprtk_loop( 8) exprtk_loop( 9)
std::swap(s0[i + 7], s1[i + 7]); exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
s0 += lud.batch_size;
s1 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { std::swap(s0[i],s1[i]); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { std::swap(s0[i],s1[i]); ++i; } case_stmt(15) case_stmt(14)
case 6 : { std::swap(s0[i],s1[i]); ++i; } case_stmt(13) case_stmt(12)
case 5 : { std::swap(s0[i],s1[i]); ++i; } case_stmt(11) case_stmt(10)
case 4 : { std::swap(s0[i],s1[i]); ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { std::swap(s0[i],s1[i]); ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { std::swap(s0[i],s1[i]); ++i; } case_stmt( 1)
case 1 : { std::swap(s0[i],s1[i]); ++i; }
} }
#undef exprtk_loop
#undef case_stmt
} }
} }
@ -8687,35 +8703,47 @@ namespace exprtk
T* vec = vec_node_ptr_->ref().data(); T* vec = vec_node_ptr_->ref().data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec + lud.upper_bound;
for (; i < lud.upper_bound; i += lud.batch_size) while (vec < upper_bound)
{ {
vec[i ] = v; #define exprtk_loop(N) \
vec[i + 1] = v; vec[N] = v; \
vec[i + 2] = v;
vec[i + 3] = v; exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
vec[i + 4] = v; exprtk_loop( 4) exprtk_loop( 5)
vec[i + 5] = v; exprtk_loop( 6) exprtk_loop( 7)
vec[i + 6] = v; exprtk_loop( 8) exprtk_loop( 9)
vec[i + 7] = v; exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec += lud.batch_size;
} }
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : *vec++ = v; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : vec[i++] = v; case_stmt(15) case_stmt(14)
case 6 : vec[i++] = v; case_stmt(13) case_stmt(12)
case 5 : vec[i++] = v; case_stmt(11) case_stmt(10)
case 4 : vec[i++] = v; case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : vec[i++] = v; case_stmt( 3) case_stmt( 2)
case 2 : vec[i++] = v; case_stmt( 1)
case 1 : vec[i++] = v;
} }
#undef exprtk_loop
#undef case_stmt
return vec_node_ptr_->value(); return vec_node_ptr_->value();
} }
else else
@ -8804,35 +8832,48 @@ namespace exprtk
T* vec1 = vec1_node_ptr_->ref().data(); T* vec1 = vec1_node_ptr_->ref().data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec0 + lud.upper_bound;
for (; i < lud.upper_bound; i += lud.batch_size) while (vec0 < upper_bound)
{ {
vec0[i ] = vec1[i ]; #define exprtk_loop(N) \
vec0[i + 1] = vec1[i + 1]; vec0[N] = vec1[N]; \
vec0[i + 2] = vec1[i + 2];
vec0[i + 3] = vec1[i + 3]; exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
vec0[i + 4] = vec1[i + 4]; exprtk_loop( 4) exprtk_loop( 5)
vec0[i + 5] = vec1[i + 5]; exprtk_loop( 6) exprtk_loop( 7)
vec0[i + 6] = vec1[i + 6]; exprtk_loop( 8) exprtk_loop( 9)
vec0[i + 7] = vec1[i + 7]; exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec0 += lud.batch_size;
vec1 += lud.batch_size;
} }
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : *vec0++ = *vec1++; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { vec0[i] = vec1[i]; ++i; } case_stmt(15) case_stmt(14)
case 6 : { vec0[i] = vec1[i]; ++i; } case_stmt(13) case_stmt(12)
case 5 : { vec0[i] = vec1[i]; ++i; } case_stmt(11) case_stmt(10)
case 4 : { vec0[i] = vec1[i]; ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { vec0[i] = vec1[i]; ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { vec0[i] = vec1[i]; ++i; } case_stmt( 1)
case 1 : { vec0[i] = vec1[i]; ++i; }
} }
#undef exprtk_loop
#undef case_stmt
return vec0_node_ptr_->value(); return vec0_node_ptr_->value();
} }
else else
@ -8973,35 +9014,47 @@ namespace exprtk
T* vec = vec_node_ptr_->ref().data(); T* vec = vec_node_ptr_->ref().data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec + lud.upper_bound;
for (; i < lud.upper_bound; i += lud.batch_size) while (vec < upper_bound)
{ {
Operation::assign(vec[i ],v); #define exprtk_loop(N) \
Operation::assign(vec[i + 1],v); Operation::assign(vec[N],v); \
Operation::assign(vec[i + 2],v);
Operation::assign(vec[i + 3],v); exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
Operation::assign(vec[i + 4],v); exprtk_loop( 4) exprtk_loop( 5)
Operation::assign(vec[i + 5],v); exprtk_loop( 6) exprtk_loop( 7)
Operation::assign(vec[i + 6],v); exprtk_loop( 8) exprtk_loop( 9)
Operation::assign(vec[i + 7],v); exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec += lud.batch_size;
} }
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : Operation::assign(*vec++,v); \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : Operation::assign(vec[i++],v); case_stmt(15) case_stmt(14)
case 6 : Operation::assign(vec[i++],v); case_stmt(13) case_stmt(12)
case 5 : Operation::assign(vec[i++],v); case_stmt(11) case_stmt(10)
case 4 : Operation::assign(vec[i++],v); case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : Operation::assign(vec[i++],v); case_stmt( 3) case_stmt( 2)
case 2 : Operation::assign(vec[i++],v); case_stmt( 1)
case 1 : Operation::assign(vec[i++],v);
} }
#undef exprtk_loop
#undef case_stmt
return vec_node_ptr_->value(); return vec_node_ptr_->value();
} }
else else
@ -9091,35 +9144,49 @@ namespace exprtk
T* vec1 = vec1_node_ptr_->ref().data(); T* vec1 = vec1_node_ptr_->ref().data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec0 + lud.upper_bound;
#define exprtk_loop(N) \ while (vec0 < upper_bound)
vec0[i + N] = Operation::process(vec0[i + N],vec1[i + N]); \
for (; i < lud.upper_bound; i += lud.batch_size)
{ {
exprtk_loop(0) exprtk_loop(1) #define exprtk_loop(N) \
exprtk_loop(2) exprtk_loop(3) vec0[N] = Operation::process(vec0[N],vec1[N]); \
exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
exprtk_loop(4) exprtk_loop(5) exprtk_loop( 4) exprtk_loop( 5)
exprtk_loop(6) exprtk_loop(7) exprtk_loop( 6) exprtk_loop( 7)
exprtk_loop( 8) exprtk_loop( 9)
exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec0 += lud.batch_size;
vec1 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { vec0[i] = Operation::process(vec0[i],vec1[i]); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { exprtk_loop(0) ++i; } case_stmt(15) case_stmt(14)
case 6 : { exprtk_loop(0) ++i; } case_stmt(13) case_stmt(12)
case 5 : { exprtk_loop(0) ++i; } case_stmt(11) case_stmt(10)
case 4 : { exprtk_loop(0) ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { exprtk_loop(0) ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { exprtk_loop(0) ++i; } case_stmt( 1)
case 1 : { exprtk_loop(0) ++i; }
} }
#undef exprtk_loop #undef exprtk_loop
#undef case_stmt
return vec0_node_ptr_->value(); return vec0_node_ptr_->value();
} }
@ -9516,37 +9583,52 @@ namespace exprtk
T* vec2 = (*temp_).data(); T* vec2 = (*temp_).data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec2 + lud.upper_bound;
#define exprtk_loop(N) \ while (vec2 < upper_bound)
vec2[i + N] = Operation::process(vec0[i + N],vec1[i + N]); \
for (; i < lud.upper_bound; i += lud.batch_size)
{ {
exprtk_loop(0) exprtk_loop(1) #define exprtk_loop(N) \
exprtk_loop(2) exprtk_loop(3) vec2[N] = Operation::process(vec0[N],vec1[N]); \
exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
exprtk_loop(4) exprtk_loop(5) exprtk_loop( 4) exprtk_loop( 5)
exprtk_loop(6) exprtk_loop(7) exprtk_loop( 6) exprtk_loop( 7)
exprtk_loop( 8) exprtk_loop( 9)
exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec0 += lud.batch_size;
vec1 += lud.batch_size;
vec2 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { vec2[i] = Operation::process(vec0[i],vec1[i]); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { exprtk_loop(0); ++i; } case_stmt(15) case_stmt(14)
case 6 : { exprtk_loop(0); ++i; } case_stmt(13) case_stmt(12)
case 5 : { exprtk_loop(0); ++i; } case_stmt(11) case_stmt(10)
case 4 : { exprtk_loop(0); ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { exprtk_loop(0); ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { exprtk_loop(0); ++i; } case_stmt( 1)
case 1 : { exprtk_loop(0); ++i; }
} }
#undef exprtk_loop #undef exprtk_loop
#undef case_stmt
return vec2[0]; return ((*temp_).data())[0];
} }
else else
return std::numeric_limits<T>::quiet_NaN(); return std::numeric_limits<T>::quiet_NaN();
@ -9646,37 +9728,51 @@ namespace exprtk
T* vec1 = (*temp_).data(); T* vec1 = (*temp_).data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec0 + lud.upper_bound;
#define exprtk_loop(N) \ while (vec0 < upper_bound)
vec1[i + N] = Operation::process(vec0[i + N],v); \
for (; i < lud.upper_bound; i += lud.batch_size)
{ {
exprtk_loop(0) exprtk_loop(1) #define exprtk_loop(N) \
exprtk_loop(2) exprtk_loop(3) vec1[N] = Operation::process(vec0[N],v); \
exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
exprtk_loop(4) exprtk_loop(5) exprtk_loop( 4) exprtk_loop( 5)
exprtk_loop(6) exprtk_loop(7) exprtk_loop( 6) exprtk_loop( 7)
exprtk_loop( 8) exprtk_loop( 9)
exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec0 += lud.batch_size;
vec1 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { vec1[i] = Operation::process(vec0[i],v); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { exprtk_loop(0); ++i; } case_stmt(15) case_stmt(14)
case 6 : { exprtk_loop(0); ++i; } case_stmt(13) case_stmt(12)
case 5 : { exprtk_loop(0); ++i; } case_stmt(11) case_stmt(10)
case 4 : { exprtk_loop(0); ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { exprtk_loop(0); ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { exprtk_loop(0); ++i; } case_stmt( 1)
case 1 : { exprtk_loop(0); ++i; }
} }
#undef exprtk_loop #undef exprtk_loop
#undef case_stmt
return vec1[0]; return (vec0_node_ptr_->ref().data())[0];
} }
else else
return std::numeric_limits<T>::quiet_NaN(); return std::numeric_limits<T>::quiet_NaN();
@ -9774,37 +9870,51 @@ namespace exprtk
T* vec2 = (*temp_).data(); T* vec2 = (*temp_).data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec1 + lud.upper_bound;
#define exprtk_loop(N) \ while (vec1 < upper_bound)
vec2[i + N] = Operation::process(v,vec1[i + N]); \
for (; i < lud.upper_bound; i += lud.batch_size)
{ {
exprtk_loop(0) exprtk_loop(1) #define exprtk_loop(N) \
exprtk_loop(2) exprtk_loop(3) vec2[N] = Operation::process(v,vec1[N]); \
exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
exprtk_loop(4) exprtk_loop(5) exprtk_loop( 4) exprtk_loop( 5)
exprtk_loop(6) exprtk_loop(7) exprtk_loop( 6) exprtk_loop( 7)
exprtk_loop( 8) exprtk_loop( 9)
exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec1 += lud.batch_size;
vec2 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { vec2[i] = Operation::process(v,vec1[i]); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { exprtk_loop(0); ++i; } case_stmt(15) case_stmt(14)
case 6 : { exprtk_loop(0); ++i; } case_stmt(13) case_stmt(12)
case 5 : { exprtk_loop(0); ++i; } case_stmt(11) case_stmt(10)
case 4 : { exprtk_loop(0); ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { exprtk_loop(0); ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { exprtk_loop(0); ++i; } case_stmt( 1)
case 1 : { exprtk_loop(0); ++i; }
} }
#undef exprtk_loop #undef exprtk_loop
#undef case_stmt
return vec2[0]; return ((*temp_).data())[0];
} }
else else
return std::numeric_limits<T>::quiet_NaN(); return std::numeric_limits<T>::quiet_NaN();
@ -9899,36 +10009,51 @@ namespace exprtk
T* vec1 = (*temp_).data(); T* vec1 = (*temp_).data();
loop_unroll::details lud(vec_size_); loop_unroll::details lud(vec_size_);
int i = 0; const T* upper_bound = vec0 + lud.upper_bound;
for (; i < lud.upper_bound; i += lud.batch_size) while (vec0 < upper_bound)
{ {
vec1[i ] = Operation::process(vec0[i ]); #define exprtk_loop(N) \
vec1[i + 1] = Operation::process(vec0[i + 1]); vec1[N] = Operation::process(vec0[N]); \
vec1[i + 2] = Operation::process(vec0[i + 2]);
vec1[i + 3] = Operation::process(vec0[i + 3]); exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
vec1[i + 4] = Operation::process(vec0[i + 4]); exprtk_loop( 4) exprtk_loop( 5)
vec1[i + 5] = Operation::process(vec0[i + 5]); exprtk_loop( 6) exprtk_loop( 7)
vec1[i + 6] = Operation::process(vec0[i + 6]); exprtk_loop( 8) exprtk_loop( 9)
vec1[i + 7] = Operation::process(vec0[i + 7]); exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec0 += lud.batch_size;
vec1 += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : { vec1[i] = Operation::process(vec0[i]); ++i; } \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt(15) case_stmt(14)
case 6 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt(13) case_stmt(12)
case 5 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt(11) case_stmt(10)
case 4 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt( 3) case_stmt( 2)
case 2 : { vec1[i] = Operation::process(vec0[i]); ++i; } case_stmt( 1)
case 1 : { vec1[i] = Operation::process(vec0[i]); ++i; }
} }
return vec1[0]; #undef exprtk_loop
#undef case_stmt
return ((*temp_).data())[0];
} }
else else
return std::numeric_limits<T>::quiet_NaN(); return std::numeric_limits<T>::quiet_NaN();
@ -11816,61 +11941,87 @@ namespace exprtk
const std::size_t vec_size = v->vec()->ref().size(); const std::size_t vec_size = v->vec()->ref().size();
loop_unroll::details lud(vec_size); loop_unroll::details lud(vec_size);
int i = 0;
if (vec_size <= static_cast<std::size_t>(lud.batch_size)) if (vec_size <= static_cast<std::size_t>(lud.batch_size))
{ {
T result = T(0); T result = T(0);
int i = 0;
switch (vec_size) switch (vec_size)
{ {
#define case_stmt(N) \
case N : result += vec[i++]; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 8 : result += vec[i++]; case_stmt(16) case_stmt(15)
case 7 : result += vec[i++]; case_stmt(14) case_stmt(13)
case 6 : result += vec[i++]; case_stmt(12) case_stmt(11)
case 5 : result += vec[i++]; case_stmt(10) case_stmt( 9)
case_stmt( 8) case_stmt( 7)
case_stmt( 6) case_stmt( 5)
#endif #endif
case 4 : result += vec[i++]; case_stmt( 4) case_stmt( 3)
case 3 : result += vec[i++]; case_stmt( 2) case_stmt( 1)
case 2 : result += vec[i++];
case 1 : result += vec[i++];
} }
#undef case_stmt
return result; return result;
} }
T r[8] = { T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0) }; T r[] = {
T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0),
T(0), T(0), T(0), T(0), T(0), T(0), T(0), T(0)
};
for (; i < lud.upper_bound; i += lud.batch_size) const T* upper_bound = vec + lud.upper_bound;
while (vec < upper_bound)
{ {
r[0] += vec[i ]; #define exprtk_loop(N) \
r[1] += vec[i + 1]; r[N] += vec[N]; \
r[2] += vec[i + 2];
r[3] += vec[i + 3]; exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
r[4] += vec[i + 4]; exprtk_loop( 4) exprtk_loop( 5)
r[5] += vec[i + 5]; exprtk_loop( 6) exprtk_loop( 7)
r[6] += vec[i + 6]; exprtk_loop( 8) exprtk_loop( 9)
r[7] += vec[i + 7]; exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : r[0] += vec[i++]; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : r[0] += vec[i++]; case_stmt(15) case_stmt(14)
case 6 : r[0] += vec[i++]; case_stmt(13) case_stmt(12)
case 5 : r[0] += vec[i++]; case_stmt(11) case_stmt(10)
case 4 : r[0] += vec[i++]; case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : r[0] += vec[i++]; case_stmt( 3) case_stmt( 2)
case 2 : r[0] += vec[i++]; case_stmt( 1)
case 1 : r[0] += vec[i++];
} }
return (r[0] + r[1] + r[2] + r[3]) #undef exprtk_loop
#undef case_stmt
return (r[ 0] + r[ 1] + r[ 2] + r[ 3])
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
+ (r[4] + r[5] + r[6] + r[7]) + (r[ 4] + r[ 5] + r[ 6] + r[ 7])
+ (r[ 8] + r[ 9] + r[10] + r[11])
+ (r[12] + r[13] + r[14] + r[15])
#endif #endif
; ;
} }
@ -11887,61 +12038,87 @@ namespace exprtk
const std::size_t vec_size = v->vec()->ref().size(); const std::size_t vec_size = v->vec()->ref().size();
loop_unroll::details lud(vec_size); loop_unroll::details lud(vec_size);
int i = 0;
if (vec_size <= static_cast<std::size_t>(lud.batch_size)) if (vec_size <= static_cast<std::size_t>(lud.batch_size))
{ {
T result = T(1); T result = T(1);
int i = 0;
switch (vec_size) switch (vec_size)
{ {
#define case_stmt(N) \
case N : result *= vec[i++]; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 8 : result *= vec[i++]; case_stmt(16) case_stmt(15)
case 7 : result *= vec[i++]; case_stmt(14) case_stmt(13)
case 6 : result *= vec[i++]; case_stmt(12) case_stmt(11)
case 5 : result *= vec[i++]; case_stmt(10) case_stmt( 9)
case_stmt( 8) case_stmt( 7)
case_stmt( 6) case_stmt( 5)
#endif #endif
case 4 : result *= vec[i++]; case_stmt( 4) case_stmt( 3)
case 3 : result *= vec[i++]; case_stmt( 2) case_stmt( 1)
case 2 : result *= vec[i++];
case 1 : result *= vec[i++];
} }
#undef case_stmt
return result; return result;
} }
T r[8] = { T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1) }; T r[] = {
T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1),
T(1), T(1), T(1), T(1), T(1), T(1), T(1), T(1)
};
for (; i < lud.upper_bound; i += lud.batch_size) const T* upper_bound = vec + lud.upper_bound;
while (vec < upper_bound)
{ {
r[0] *= (vec[i ]); #define exprtk_loop(N) \
r[1] *= (vec[i + 1]); r[N] *= vec[N]; \
r[2] *= (vec[i + 2]);
r[3] *= (vec[i + 3]); exprtk_loop( 0) exprtk_loop( 1)
exprtk_loop( 2) exprtk_loop( 3)
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
r[4] *= (vec[i + 4]); exprtk_loop( 4) exprtk_loop( 5)
r[5] *= (vec[i + 5]); exprtk_loop( 6) exprtk_loop( 7)
r[6] *= (vec[i + 6]); exprtk_loop( 8) exprtk_loop( 9)
r[7] *= (vec[i + 7]); exprtk_loop(10) exprtk_loop(11)
exprtk_loop(12) exprtk_loop(13)
exprtk_loop(14) exprtk_loop(15)
#endif #endif
vec += lud.batch_size;
} }
int i = 0;
switch (lud.remainder) switch (lud.remainder)
{ {
#define case_stmt(N) \
case N : r[0] *= vec[i++]; \
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
case 7 : r[0] *= (vec[i++]); case_stmt(15) case_stmt(14)
case 6 : r[0] *= (vec[i++]); case_stmt(13) case_stmt(12)
case 5 : r[0] *= (vec[i++]); case_stmt(11) case_stmt(10)
case 4 : r[0] *= (vec[i++]); case_stmt( 9) case_stmt( 8)
case_stmt( 7) case_stmt( 6)
case_stmt( 5) case_stmt( 4)
#endif #endif
case 3 : r[0] *= (vec[i++]); case_stmt( 3) case_stmt( 2)
case 2 : r[0] *= (vec[i++]); case_stmt( 1)
case 1 : r[0] *= (vec[i++]);
} }
return (r[0] * r[1] * r[2] * r[3]) #undef exprtk_loop
#undef case_stmt
return (r[ 0] * r[ 1] * r[ 2] * r[ 3])
#ifndef exprtk_disable_superscalar_unroll #ifndef exprtk_disable_superscalar_unroll
* (r[4] * r[5] * r[6] * r[7]) + (r[ 4] * r[ 5] * r[ 6] * r[ 7])
+ (r[ 8] * r[ 9] * r[10] * r[11])
+ (r[12] * r[13] * r[14] * r[15])
#endif #endif
; ;
} }
@ -11954,42 +12131,9 @@ namespace exprtk
static inline T process(const ivector_ptr v) static inline T process(const ivector_ptr v)
{ {
const T* vec = v->vec()->ref().data();
const std::size_t vec_size = v->vec()->ref().size(); const std::size_t vec_size = v->vec()->ref().size();
T result = T(0); return vec_add_op<T>::process(v) / vec_size;
loop_unroll::details lud(vec_size);
int i = 0;
for (; i < lud.upper_bound; i += lud.batch_size)
{
result += vec[i ];
result += vec[i + 1];
result += vec[i + 2];
result += vec[i + 3];
#ifndef exprtk_disable_superscalar_unroll
result += vec[i + 4];
result += vec[i + 5];
result += vec[i + 6];
result += vec[i + 7];
#endif
}
switch (lud.remainder)
{
#ifndef exprtk_disable_superscalar_unroll
case 7 : result += vec[i++];
case 6 : result += vec[i++];
case 5 : result += vec[i++];
case 4 : result += vec[i++];
#endif
case 3 : result += vec[i++];
case 2 : result += vec[i++];
case 1 : result += vec[i++];
}
return result / vec_size;
} }
}; };
@ -12032,6 +12176,7 @@ namespace exprtk
for (std::size_t i = 1; i < vec_size; ++i) for (std::size_t i = 1; i < vec_size; ++i)
{ {
T v_i = vec[i]; T v_i = vec[i];
if (v_i > result) if (v_i > result)
result = v_i; result = v_i;
} }