Salut tout le monde!

Ca fait plaisir de revenir dans le coin lol

Alors mon problème du jour, c'est le suivant. Je m'amuse à faire un formatter Newick générique (un format d'arbre qui plaît beaucoup aux biologistes) en essayant d'utiliser les features de C++20 (depuis le temps que je voulais tâter du concept!).

Malheureusement, après avoir bien embếté la communauté anglophone avec des exemples minimalistes, j'ai atteint la limite de ce que j'arrive à empiler sans que le compilateur ne me bougonne. Cela marchait relativement bien avant que je n'ajoute des classes de politique à l'affaire, et maintenant ça ne veut plus compiler avec GCC. Sauriez-vous pourquoi? Le code ci-dessous est un peu long, mais compile online avec Clang14: https://gcc.godbolt.org/z/cq954nx8W

Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
 
 
#include<concepts>
#include<regex>
#include<string>
#include<random>
#include<cassert>
#include<iostream>
 
namespace quetzal
{
  namespace format
  {
    ///
    /// @brief Tag
    ///
    struct parenthesis {};
    ///
    /// @brief Tag
    ///
    struct square_bracket {};
    ///
    /// @brief Check if the string is balanced for open/close symbols (parenthesis,brackets)
    ///
    ///
    /// @note Since parenthesis checking is a context-free grammar, it requires a stack.
    ///       Regex can not accomplish that since they do not have memory.
    ///
    bool check_if_balanced(const std::string & input, const char & open='(', const char & close=')')
    {
      int count = 0;
      for(const auto & ch : input)
      {
        if (ch == open ) count++;
        if (ch == close ) count--;
        // if a parenthesis is closed without being opened return false
        if(count < 0)
        return false;
      }
      // in the end the test is passed only if count is zero
      return count == 0;
    }
    ///
    /// @brief Do not change anything
    ///
    struct identity
    {
      static std::string edit(const std::string& s)
      {
        return s;
      }
    };
    ///
    /// @brief Class template
    ///
    template<class tag>
    struct is_balanced
    {};
    ///
    /// @brief Specialization for parenthesis
    ///
    template<> struct is_balanced<parenthesis>
    {
      static bool check(const std::string& s)
      {
        return check_if_balanced(s, '(', ')' );
      }
    };
    ///
    /// @brief Specialization for square bracket
    ///
    template<> struct is_balanced<square_bracket>
    {
      static bool check(const std::string& s)
      {
        return check_if_balanced(s, '[', ']');
      }
    };
 
    namespace newick
    {
      ///
      /// @brief Node names can be any character except blanks, colons, semicolons, parentheses, and square brackets.
      ///
      // static inline constexpr std::vector<std::string> forbidden_labels = {" ",",",";","(",")","()",")(","[","]","[]","]["};
      ///
      /// @brief Underscore characters in unquoted labels are converted to blanks.
      ///
      /// @detail Because you may want to include a blank in a name, it is assumed
      ///         that an underscore character ("_") stands for a blank; any of
      ///         these in a name will be converted to a blank when it is read in.
      ///
      //static inline constexpr char blank = '_';
      ///
      /// @brief Template class.
      ///
      template<unsigned int N>
      struct remove_comments_of_depth
      {};
      ///
      /// @brief Do not remove anything
      ///
      template<> struct remove_comments_of_depth<0> : identity
      {};
      ///
      /// @brief Remove all comments substrings contained between square brackets
      ///
      /// @note Assumes that text is well formatted so there are no such things like [[]] or unclosed bracket
      ///
      template<> struct remove_comments_of_depth<1>
      {
        static std::string edit(const std::string& s)
        {
          return std::regex_replace(s, std::regex(R"(\[[^()]*\])"), "");
        }
      };
      ///
      /// @brief Remove all comments substrings contained between square brackets of depth 2
      ///
      /// @note Assumes that text is well formatted so there are no such things like [[]] or unclosed bracket
      ///
      template<> struct remove_comments_of_depth<2>
      {
        static std::string edit(const std::string& s)
        {
          std::string buffer;
          int counter = 0;
          for (const auto & ch : s)
          {
            if (ch == '[' ) counter++;
            if (ch == ']' ) counter--;
            if ( ch == '[' && counter == 2) continue;  // do nothing, that was the opening
            if ( ch == ']' && counter == 1) continue; // do nothing, that was the closing
            if ( !( counter >=2 || (counter == 1 && ch == ']') ) ) buffer.append(std::string(1, ch));
          }
          return buffer;
        }
      };
      ///
      /// @brief Allow nested comments.
      ///
      struct PAUP
      {
        // return empty string
        static inline std::string root_branch_length() { return "";}
        // do nothing
        static inline std::string treat_comments(std::string & s)
        {
          return s;
        }
      };
      ///
      /// @brief Set a root node branch length to zero. Remove all nested comments.
      ///
      struct TreeAlign
      {
        // Set explicit null branch length for root node
        static inline std::string root_branch_length() { return ":0.0";}
        // Remove comments that are nested, keep comments of depth 1
        static inline std::string treat_comments(const std::string &s)
        {
          return remove_comments_of_depth<2>::edit(s);
        }
      };
      ///
      /// @brief Requires that an unrooted tree begin with a trifurcation; it will not "uproot" a rooted tree.
      ///
      struct PHYLIP
      {
        // Branch length for root node is not explicit.
        static inline std::string root_branch_length(){return "";}
        // Remove comments that are nested, keep comments of depth 1
        static inline std::string treat_comments(std::string & s)
        {
          // Allow comments of depth 1, but does not allow nested comments.
          return remove_comments_of_depth<2>::edit(s);
        }
      };
      ///
      /// @brief Concept for label name
      ///
      template<class F, class... Args>
      concept Formattable = std::invocable<F, Args...> &&
      std::convertible_to<std::invoke_result_t<F, Args...>, std::string>;
 
      ///
      /// @brief Generic algorithm to generate the Newick formula of a tree.
      ///
      template<class T, std::predicate<T> P1 , std::predicate<T> P2, Formattable<T> F1, Formattable<T> F2, class Policy=PAUP>
      class Formatter : public Policy
      {
      public:
        ///
        /// @brief Type of node being formatted
        ///
        using node_type = T;
        ///
        /// @brief Type of formula being generated
        ///
        using formula_type = std::string;
        ///
        /// @brief Type of formula being generated
        ///
        using policy_type = Policy;
 
      private:
        ///
        /// @brief End character.
        ///
        static inline constexpr char _end = ';';
        ///
        /// @brief The string formula to be updated.
        ///
        mutable formula_type _formula;
        ///
        /// @brief Functor inspecting if the node being visited has a parent.
        ///
        P1 _has_parent;
 
        ///
        /// @brief Functor inspecting if the node being visited has children.
        ///
        P2 _has_children;
 
        ///
        /// @brief Retrieve the name of the node being visited.
        ///
        /// @detail A name can be any string of printable characters except blanks,
        ///         colons, semicolons, parentheses, and square brackets.
        ///
        /// @remark Return type must be convertible to std::string
        ///
        F1 _label;
 
        ///
        /// @brief Retrieve the branch length immediately above the node being visited.
        ///
        /// @details Branch lengths can be incorporated into a tree by putting a
        ///          real number, with or without decimal point, after a node and
        ///          preceded by a colon. This represents the length of the branch
        ///          immediately above that node (that is, distance to parent node)
        /// @remark Return type must be convertible to std::string
        ///
        F2 _branch_length;
 
        ///
        /// @brief Operation called in the general DFS algorithm to open a parenthesis if node has children to be visited.
        ///
        /// @param node the node currently visited
        ///
        void _pre_order(const node_type & node) const
        {
          if(std::invoke(_has_children, node))
          {
            _formula += "(";
          }
        }
 
        ///
        /// @brief Operation called in the general DFS algorithm to add a comma between visited nodes.
        ///
        void _in_order() const
        {
          _formula += ",";
        }
 
        ///
        /// @brief Operation called in the general DFS algorithm to open a parenthesis if node has children to be visited.
        ///
        /// @param node the node currently visited
        ///
        void _post_order(const node_type & node) const
        {
          if(std::invoke(_has_children, node))
          {
            // Remove comma
            _formula.pop_back();
            _formula += ")";
          }
 
          if(_has_parent(node))
          {
            _formula += std::invoke(_label, node);
            auto branch = std::invoke(_branch_length, node);
            if( branch != "")
            {
              _formula += ":";
              _formula += branch;
            }
          }else{
            _formula += std::invoke(_label, node);
            _formula += policy_type::root_branch_length();
          }
        }
 
      public:
 
        ///
        /// @brief Constructor
        ///
        Formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 &&branch_length):
        _has_parent(std::forward<P1>(has_parent)),
        _has_children(std::forward<P2>(has_children)),
        _label(std::forward<F1>(label)),
        _branch_length(std::forward<F2>(branch_length))
        {
        }
 
        ///
        /// @brief Operation called in the general DFS algorithm to open a parenthesis if node has children to be visited.
        ///
        /// @param node the node currently visited
        ///
        auto pre_order()
        {
          return [this](const node_type & node){this->_pre_order(node);};
        }
        ///
        /// @brief Operation called in the general DFS algorithm to add a comma between visited nodes.
        ///
        auto in_order()
        {
          return [this](){this->_in_order();};
        }
        ///
        /// @brief Operation to be passed to a generic DFS algorithm to open a parenthesis if node has children to be visited.
        ///
        /// @param node the node currently visited
        ///
        auto post_order()
        {
          return [this](const node_type & node){this->_post_order(node);};
        }
        ///
        /// @brief Clear the formula buffer.
        ///
        void clear()
        {
          _formula.clear();
        }
        ///
        /// @brief Retrieve the formatted string of the given node in the specified format
        ///
        formula_type get() const
        {
          // that or expose orders
          // root.visit_by_generic_DFS(preorder, inorder, postorder);
 
          _formula.push_back(this->_end);
 
          _formula = policy_type::treat_comments(_formula);
 
          if(is_balanced<parenthesis>::check(_formula) == false)
          {
            throw std::runtime_error(std::string("Failed: formula parenthesis are not balanced:") + _formula);
          }
 
          if(is_balanced<square_bracket>::check(_formula) == false)
          {
            throw std::runtime_error(std::string("Failed: formula square brackets are not balanced:") + _formula);
          }
 
          return _formula;
        }
      }; // end structrure Newick
 
      // Replacement for `std::function<T(U)>::argument_type`
      template<typename T> struct single_function_argument;
      template<typename Ret, typename Arg> struct single_function_argument<std::function<Ret(Arg)>> { using type = Arg; };
 
      // type alias for passed "P1"'s function argument type
      template<typename P1>
      using single_function_argument_t = typename single_function_argument<decltype(std::function{ std::declval<P1>() }) >::type;
 
      // Deduction guide: type T is deduced from P1
      template<class P1, class P2, class F1, class F2, class Policy=PAUP>
      Formatter(P1 &&, P2 &&, F1 &&, F2 &&) -> Formatter <single_function_argument_t<P1>, P1, P2, F1, F2, Policy>;
 
      ///
      /// @brief to use for template `lambda [](const auto& s) {}` e.g. `make_formatter<Node>``
      ///
      template<class P1, class P2, class F1, class F2, class Policy=PAUP>
      auto make_formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 && branch_length)
      {
        // Use Class template argument deduction (CTAD)
        return Formatter<single_function_argument_t<P1>, P1, P2, F1, F2, Policy>(
          std::forward<P1>(has_parent),
          std::forward<P2>(has_children),
          std::forward<F1>(label),
          std::forward<F2>(branch_length)
        );
      }
 
      ///
      /// @brief Can still specify type manually if you want, to use for template `lambda [](const auto& s) {}` e.g. `make_formatter<Node>``
      ///
      template<class T, std::predicate<T> P1, std::predicate<T> P2, Formattable F1, Formattable F2, class Policy>
      auto make_formatter(P1 &&has_parent, P2 &&has_children, F1 &&label, F2 && branch_length)
      {
        // Use Class template argument deduction (CTAD)
        return Formatter<T, P1, P2, F1, F2, Policy>(
          std::forward<P1>(has_parent),
          std::forward<P2>(has_children),
          std::forward<F1>(label),
          std::forward<F2>(branch_length)
        );
      }
    } // end namespace newick
  } // end namespace format
} // end namespace quetzal
 
// Simplistic tree for testing
struct Node
{
  Node *parent;
  Node *left;
  Node *right;
  char data;
 
  template<class Op1, class Op2, class Op3>
  void depth_first_search(Op1 pre_order, Op2 in_order, Op3 post_order){
    pre_order(*this);
    if(this->left != nullptr && this->right != nullptr)
    {
      this->left->depth_first_search(pre_order, in_order, post_order);
      in_order();
      this->right->depth_first_search(pre_order, in_order, post_order);
      in_order();
      post_order(*this);
    }
  }
} ;
 
int main()
{
  namespace newick = quetzal::format::newick;
 
  /* Topology :
  *             a
  *           /  \
  *          /    c
  *         /      \\
  *        b       d e
   */
 
   Node a; a.data = 'a';
   Node b; b.data = 'b';
   Node c; c.data = 'c';
   Node d; d.data = 'd';
   Node e; e.data = 'e';
 
   a.left = &b ; b.parent = &a;
   a.right = &c; c.parent = &a;
   c.left = &d ; d.parent = &c;
   c.right = &e; e.parent = &c;
 
  // Simplest case
 
  // Interface Quetzal formatter with non-quetzal tree types
  std::predicate<Node> auto has_parent = [](const Node& n){return n.parent != nullptr;};
  std::predicate<Node> auto has_children = [](const Node& n){return n.left != nullptr && n.right != nullptr;};
 
  // No data to format, just the topology
  newick::Formattable<Node> auto no_label = [](const Node& ){return "";};
  newick::Formattable<Node> auto no_branch_length = [](const Node&){return "";};
 
  // Create a formatter
  auto formatter_1 = newick::make_formatter(has_parent, has_children, no_label, no_branch_length);
  // Expose its interface to your data-specific DFS algorithm
  a.depth_first_search(formatter_1.pre_order(), formatter_1.in_order(), formatter_1.post_order());
 
  // Retrieving the string
  std::string s = formatter_1.get();
  assert(s == "(,(,));");
 
  // Non-trivial data acquisition and formatting
 
  // Get a seed for the random number engine
  std::random_device rd;
  // Standard mersenne_twister_engine seeded with rd()
  std::mt19937 gen(rd());
  // Arbitrary banch length distribution
  std::uniform_real_distribution<> dis(0.0, 2.0);
  // Random data generation
  newick::Formattable<Node> auto branch_length = [&gen,&dis](const Node&){return std::to_string(dis(gen));};
  // More sophisticated label formatting
  newick::Formattable<Node> auto label = [](const Node& n ){return std::string(1, n.data) + "[my[comment]]";};
  // Call the formatter
  auto formatter_2 = newick::make_formatter(has_parent, has_children, label, branch_length);
  a.depth_first_search(formatter_2.pre_order(), formatter_2.in_order(), formatter_2.post_order());
  std::cout << formatter_2.get() << std::endl;
  return 0;
}
Comme je disais, cela compile avec Clang 14, but not gcc 10.3, et l'output est suspicieux: https://gcc.godbolt.org/z/cq954nx8W

Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 
<source>: In instantiation of 'auto quetzal::format::newick::make_formatter(P1&&, P2&&, F1&&, F2&&) [with P1 = main()::<lambda(const Node&)>&; P2 = main()::<lambda(const Node&)>&; F1 = main()::<lambda(const Node&)>&; F2 = main()::<lambda(const Node&)>&; Policy = quetzal::format::newick::PAUP]':
<source>:467:97:   required from here
<source>:385:16: error: cannot bind non-const lvalue reference of type 'main()::<lambda(const Node&)>&' to an rvalue of type 'main()::<lambda(const Node&)>'
  385 |         return Formatter<single_function_argument_t<P1>, P1, P2, F1, F2, Policy>(
      |                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  386 |           std::forward<P1>(has_parent),
      |           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  387 |           std::forward<P2>(has_children),
      |           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  388 |           std::forward<F1>(label),
      |           ~~~~~~~~~~~~~~~~~~~~~~~~
  389 |           std::forward<F2>(branch_length)
      |           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  390 |         );
      |         ~       
<source>: In instantiation of 'auto quetzal::format::newick::make_formatter(P1&&, P2&&, F1&&, F2&&) [with P1 = main()::<lambda(const Node&)>&; P2 = main()::<lambda(const Node&)>&; F1 = main()::<lambda(const Node&)>&; F2 = main()::<lambda(const Node&)>&; Policy = quetzal::format::newick::PAUP]':
<source>:488:91:   required from here
<source>:385:16: error: cannot bind non-const lvalue reference of type 'main()::<lambda(const Node&)>&' to an rvalue of type 'main()::<lambda(const Node&)>'