summaryrefslogtreecommitdiffstats
path: root/meta/recipes-support/re2c/re2c/CVE-2018-21232-2.patch
blob: 820a6decbca5c68ab7a14e7ee7eab4581d631fe8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
From 7b5643476bd99c994c4f51b8143f942982d85521 Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Wed, 22 Apr 2020 22:37:24 +0100
Subject: [PATCH] Rewrite recursion into iteration (fixed tags computation).

This is to avoid stack overflow on large RE (especially on instrumented
builds that have larger stack frames, like AddressSanitizer).

Partial fix for #219 "overflow-1.re test fails on system with small stack".

Upstream-Stauts: Backport:
https://github.com/skvadrik/re2c/commit/7b5643476bd99c994c4f51b8143f942982d85521

CVE: CVE-2018-21232

Signed-off-by: Davide Gardenal <davide.gardenal@huawei.com>
---
diff --git a/src/re/tag.cc b/src/re/tag.cc
--- a/src/re/tag.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/tag.cc	(date 1646986908580)
@@ -6,7 +6,7 @@
 {
 
 const size_t Tag::RIGHTMOST = std::numeric_limits<size_t>::max();
-const size_t Tag::VARDIST = std::numeric_limits<size_t>::max();
+const uint32_t Tag::VARDIST = std::numeric_limits<uint32_t>::max();
 const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1;
 
 } // namespace re2c


diff --git a/src/re/tag.h b/src/re/tag.h
--- a/src/re/tag.h	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/tag.h	(date 1646986922376)
@@ -19,7 +19,7 @@
 struct Tag
 {
 	static const size_t RIGHTMOST;
-	static const size_t VARDIST;
+    static const uint32_t VARDIST;
 	static const size_t FICTIVE;
 
 	const std::string *name;


diff --git a/src/re/fixed_tags.cc b/src/re/fixed_tags.cc
--- a/src/re/fixed_tags.cc	(revision e58939b34bb4c37cd990f82dc286f21cb405743e)
+++ b/src/re/fixed_tags.cc	(date 1646991137317)
@@ -7,78 +7,131 @@
 #include "src/re/tag.h"
 
 namespace re2c {
+namespace {
 
 /* note [fixed and variable tags]
  *
- * If distance between two tags is constant (equal for all strings that
- * match the given regexp), then lexer only needs to track one of them:
- * the second tag equals the first tag plus static offset.
+ * If distance between two tags is constant (equal for all strings that match
+ * the given regexp), then lexer only needs to track one of them: the second
+ * tag equals the first tag plus static offset.
  *
- * However, this optimization is applied only to tags in top-level
- * concatenation, because other tags may be uninitialized and we don't
- * want to mess with conditional calculation of fixed tags.
- *
+ * This optimization is applied only to tags in top-level concatenation,
+ * because in other cases the base tag may be NULL, and the calculation of
+ * the fixed tag value is not as simple as substracting a fixed offset.
  * Furthermore, fixed tags are fobidden with generic API because it cannot
- * express fixed offsets.
- *
- * Tags with history also cannot be fixed.
+ * express fixed offsets. M-tags (with history) also cannot be fixed.
  *
  * Another special case is fictive tags (those that exist only to impose
- * hierarchical laws of POSIX disambiguation). We treat them as fixed
- * in order to suppress code generation.
+ * hierarchical laws of POSIX disambiguation). We treat them as fixed in order
+ * to suppress code generation.
  */
 
-static void find_fixed_tags(RE *re, std::vector<Tag> &tags,
-	size_t &dist, size_t &base, bool toplevel)
+struct StackItem {
+    RE       *re;       // current sub-RE
+    uint32_t  dist;     // distance backup for alternative, unused for other RE
+    uint8_t   succ;     // index of the next successor to be visited
+    bool      toplevel; // if this sub-RE is in top-level concatenation
+};
+
+static void find_fixed_tags(RESpec &spec, std::vector<StackItem> &stack, RE *re0)
 {
-	switch (re->type) {
-		case RE::NIL: break;
-		case RE::SYM:
-			if (dist != Tag::VARDIST) ++dist;
-			break;
-		case RE::ALT: {
-			size_t d1 = dist, d2 = dist;
-			find_fixed_tags(re->alt.re1, tags, d1, base, false);
-			find_fixed_tags(re->alt.re2, tags, d2, base, false);
-			dist = (d1 == d2) ? d1 : Tag::VARDIST;
-			break;
-		}
-		case RE::CAT:
-			find_fixed_tags(re->cat.re2, tags, dist, base, toplevel);
-			find_fixed_tags(re->cat.re1, tags, dist, base, toplevel);
-			break;
-		case RE::ITER:
-			find_fixed_tags(re->iter.re, tags, dist, base, false);
-			dist = Tag::VARDIST;
-			break;
-		case RE::TAG: {
-			// see note [fixed and variable tags]
-			Tag &tag = tags[re->tag.idx];
-			if (fictive(tag)) {
-				tag.base = tag.dist = 0;
-			} else if (toplevel && dist != Tag::VARDIST && !history(tag)) {
-				tag.base = base;
-				tag.dist = dist;
-			} else if (toplevel) {
-				base = re->tag.idx;
-				dist = 0;
-			}
-			if (trailing(tag)) dist = 0;
-			break;
-		}
-	}
+    static const uint32_t VARDIST = Tag::VARDIST;
+    bool toplevel = spec.opts->input_api != INPUT_CUSTOM;
+
+    // base tag, intially the fake "rightmost tag" (the end of RE)
+    size_t base = Tag::RIGHTMOST;
+
+    // the distance to the nearest top-level tag to the right (base tag)
+    uint32_t dist = 0;
+
+    const StackItem i0 = {re0, VARDIST, 0, toplevel};
+    stack.push_back(i0);
+
+    while (!stack.empty()) {
+        const StackItem i = stack.back();
+        stack.pop_back();
+        RE *re = i.re;
+
+        if (re->type == RE::SYM) {
+            if (dist != VARDIST) ++dist;
+        }
+        else if (re->type == RE::ALT) {
+            if (i.succ == 0) {
+                // save the current distance on stack (from the alternative end
+                // to base) and recurse into the left sub-RE
+                StackItem k = {re, dist, 1, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->alt.re1, VARDIST, 0, false};
+                stack.push_back(j);
+            }
+            else if (i.succ == 1) {
+                // save the current distance on stack (from the left sub-RE to
+                // base), reset distance to the distance popped from stack (from
+                // the alternative end to base), recurse into the right sub-RE
+                StackItem k = {re, dist, 2, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->alt.re2, VARDIST, 0, false};
+                stack.push_back(j);
+                dist = i.dist;
+            }
+            else {
+                // both sub-RE visited, compare the distance on stack (from the
+                // left sub-RE to base) to the current distance (from the right
+                // sub-RE to base), if not equal set variable distance
+                dist = (i.dist == dist) ? i.dist : VARDIST;
+            }
+        }
+        else if (re->type == RE::ITER) {
+            if (i.succ == 0) {
+                // recurse into the sub-RE
+                StackItem k = {re, VARDIST, 1, i.toplevel};
+                stack.push_back(k);
+                StackItem j = {re->iter.re, VARDIST, 0, false};
+                stack.push_back(j);
+            }
+            else {
+                // sub-RE visited, assume unknown number of iterations
+                // TODO: find precise distance for fixed repetition counter
+                dist = VARDIST;
+            }
+        }
+        else if (re->type == RE::CAT) {
+            // the right sub-RE is pushed on stack after the left sub-RE and
+            // visited earlier (because distance is computed from right to left)
+            StackItem j1 = {re->cat.re1, VARDIST, 0, i.toplevel};
+            stack.push_back(j1);
+            StackItem j2 = {re->cat.re2, VARDIST, 0, i.toplevel};
+            stack.push_back(j2);
+        }
+        else if (re->type == RE::TAG) {
+            // see note [fixed and variable tags]
+            Tag &tag = spec.tags[re->tag.idx];
+            if (fictive(tag)) {
+                tag.base = tag.dist = 0;
+            }
+            else if (i.toplevel && dist != VARDIST && !history(tag)) {
+                tag.base = base;
+                tag.dist = dist;
+            }
+            else if (i.toplevel) {
+                base = re->tag.idx;
+                dist = 0;
+            }
+            if (trailing(tag)) {
+                dist = 0;
+            }
+        }
+    }
 }
+
+} // anonymous namespace
 
-void find_fixed_tags(RESpec &spec)
-{
-	const bool generic = spec.opts->input_api == INPUT_CUSTOM;
-	std::vector<RE*>::iterator
-		i = spec.res.begin(),
-		e = spec.res.end();
-	for (; i != e; ++i) {
-		size_t base = Tag::RIGHTMOST, dist = 0;
-		find_fixed_tags(*i, spec.tags, dist, base, !generic);
-	}
-}
+    void find_fixed_tags(RESpec &spec)
+    {
+        std::vector<StackItem> stack;
+        for (std::vector<RE*>::iterator i = spec.res.begin(); i != spec.res.end(); ++i) {
+            find_fixed_tags(spec, stack, *i);
+        }
+    }
 
-} // namespace re2c
+} // namespace re2c
\ No newline at end of file