Désoptimisation et comment faire un benchmark ?
Hello,
j'essaye d'avoir un code simple de benchmarking, mais mon cas semble trop simple et complètement optimisé par le compilo...
Voici le code
Code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
| #include <chrono>
#include <iostream>
struct A {
enum class Type { B, C, };
A(Type _t) : type(_t) {}
virtual ~A() = 0 {}
Type type;
template<class T>
bool is() const { return T::StaticType == type; }
template<class T>
bool is_using_dynamic_cast() const { return dynamic_cast<const T*>(this) != nullptr; }
template<class T>
T* as() { return static_cast<T*>(this); }
};
struct B : A {
static const Type StaticType = Type::B;
B() : A(Type::B) {}
size_t value() const { return 1; }
};
struct C : A {
static const Type StaticType = Type::C;
C() : A(Type::C) {}
};
int main() {
B b;
A* a = &b;
{
size_t n = 0;
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (auto b = dynamic_cast<B*>(a))
{
n += b->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with dynamic_cast" << std::endl;
}
{
size_t n = 0;
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (typeid(*a) == typeid(B))
{
n += static_cast<B*>(a)->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with typeid" << std::endl;
}
{
size_t n = 0;
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (&typeid(*a) == &typeid(B))
{
n += static_cast<B*>(a)->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with &typeid" << std::endl;
}
{
size_t n = 0;
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (a->is_using_dynamic_cast<B>())
{
n += a->as<B>()->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with is_using_dynamic_cast (internal dynamic_cast)" << std::endl;
}
{
size_t n = 0;
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (a->is<B>())
{
n += a->as<B>()->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with is (enum)" << std::endl;
}
return 0;
} |
Rien de fabuleux mais voilà quelques sorties
Code:
1 2 3 4 5
| 1479042506 ns with dynamic_cast
541149801 ns with typeid
386570206 ns with &typeid
1355755663 ns with is_using_dynamic_cast (internal dynamic_cast)
256 ns with is (enum) |
Code:
1 2 3 4 5
| 1479745740 ns with dynamic_cast
536910174 ns with typeid
385768924 ns with &typeid
1392601325 ns with is_using_dynamic_cast (internal dynamic_cast)
0 ns with is (enum) |
Code:
1 2 3 4 5
| 1473431996 ns with dynamic_cast
554813324 ns with typeid
384597977 ns with &typeid
1270466484 ns with is_using_dynamic_cast (internal dynamic_cast)
0 ns with is (enum) |
On peut voir que le dernier cas, celui qui m'intéresse particulièrement, est un peu... rapide ?
Vu que ça implique du template, et que le main est simple, je pense que c'est optimisé et calculé pendant la compilation...
Du coup j'ai tenté avec un genre de class intermédiaire
Code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
| #include <chrono>
#include <iostream>
struct A {
enum class Type { B, C, };
A(Type _t) : type(_t) {}
virtual ~A() = 0 {}
Type type;
template<class T>
bool is() const { return T::StaticType == type; }
template<class T>
bool is_using_dynamic_cast() const { return dynamic_cast<const T*>(this) != nullptr; }
template<class T>
T* as() { return static_cast<T*>(this); }
};
struct B : A {
static const Type StaticType = Type::B;
B() : A(Type::B) {}
size_t value() const { return 1; }
};
struct C : A {
static const Type StaticType = Type::C;
C() : A(Type::C) {}
};
#include <functional>
class Checker {
public:
Checker(std::function<bool(const A*)> _f) : func(_f) {}
bool operator()(const A* a) const { return func(a); }
private:
std::function<bool(const A*)> func;
};
int main() {
B b;
A* a = &b;
{
size_t n = 0;
Checker check([&](const A* a) { return dynamic_cast<const B*>(a) != nullptr; });
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (check(a))
{
n += static_cast<B*>(a)->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with dynamic_cast" << std::endl;
}
{
size_t n = 0;
Checker check([&](const A* a) { return typeid(*a) == typeid(B); });
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (check(a))
{
n += static_cast<B*>(a)->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with typeid" << std::endl;
}
{
size_t n = 0;
Checker check([&](const A* a) { return &typeid(*a) == &typeid(B); });
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (check(a))
{
n += static_cast<B*>(a)->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with &typeid" << std::endl;
}
{
size_t n = 0;
Checker check([&](const A* a) { return a->is_using_dynamic_cast<B>(); });
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (check(a))
{
n += a->as<B>()->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with is_using_dynamic_cast (internal dynamic_cast)" << std::endl;
}
{
size_t n = 0;
Checker check([&](const A* a) { return a->is<B>(); });
const auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < 100000000; ++i)
{
if (check(a))
{
n += a->as<B>()->value();
}
}
const auto end = std::chrono::high_resolution_clock::now();
const auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
std::cout << elapsed << " ns with is (enum)" << std::endl;
}
return 0;
} |
La sortie semble plus normale:
Code:
1 2 3 4 5
| 1193004782 ns with dynamic_cast
561807262 ns with typeid
449779071 ns with &typeid
1097718778 ns with is_using_dynamic_cast (internal dynamic_cast)
141872748 ns with is (enum) |
Code:
1 2 3 4 5
| 1713590819 ns with dynamic_cast
723503932 ns with typeid
553266825 ns with &typeid
1423687229 ns with is_using_dynamic_cast (internal dynamic_cast)
154689676 ns with is (enum) |
Et les résultats vont dans le sens que j'attends, mais est-ce vraiment probant selon vous ?
Merci