summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/go/go-1.14/CVE-2022-41715.patch
blob: fac0ebe94ce45ab5201b0d5981bdda8d331cb585 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
From e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Wed, 28 Sep 2022 11:18:51 -0400
Subject: [PATCH] [release-branch.go1.18] regexp: limit size of parsed regexps

Set a 128 MB limit on the amount of space used by []syntax.Inst
in the compiled form corresponding to a given regexp.

Also set a 128 MB limit on the rune storage in the *syntax.Regexp
tree itself.

Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue.

Fixes CVE-2022-41715.
Updates #55949.
Fixes #55950.

Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Julie Qiu <julieqiu@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/438501
Run-TryBot: Carlos Amedee <carlos@golang.org>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>

Upstream-Status: Backport [https://github.com/golang/go/commit/e9017c2416ad0ef642f5e0c2eab2dbf3cba4d997]
CVE: CVE-2022-41715
Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>

---
 src/regexp/syntax/parse.go      | 145 ++++++++++++++++++++++++++++++--
 src/regexp/syntax/parse_test.go |  13 +--
 2 files changed, 148 insertions(+), 10 deletions(-)

diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
index 55bd20d..60491d5 100644
--- a/src/regexp/syntax/parse.go
+++ b/src/regexp/syntax/parse.go
@@ -90,15 +90,49 @@ const (
 // until we've allocated at least maxHeight Regexp structures.
 const maxHeight = 1000
 
+// maxSize is the maximum size of a compiled regexp in Insts.
+// It too is somewhat arbitrarily chosen, but the idea is to be large enough
+// to allow significant regexps while at the same time small enough that
+// the compiled form will not take up too much memory.
+// 128 MB is enough for a 3.3 million Inst structures, which roughly
+// corresponds to a 3.3 MB regexp.
+const (
+	maxSize  = 128 << 20 / instSize
+	instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
+)
+
+// maxRunes is the maximum number of runes allowed in a regexp tree
+// counting the runes in all the nodes.
+// Ignoring character classes p.numRunes is always less than the length of the regexp.
+// Character classes can make it much larger: each \pL adds 1292 runes.
+// 128 MB is enough for 32M runes, which is over 26k \pL instances.
+// Note that repetitions do not make copies of the rune slices,
+// so \pL{1000} is only one rune slice, not 1000.
+// We could keep a cache of character classes we've seen,
+// so that all the \pL we see use the same rune list,
+// but that doesn't remove the problem entirely:
+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
+// And because the Rune slice is exposed directly in the Regexp,
+// there is not an opportunity to change the representation to allow
+// partial sharing between different character classes.
+// So the limit is the best we can do.
+const (
+	maxRunes = 128 << 20 / runeSize
+	runeSize = 4 // rune is int32
+)
+
 type parser struct {
 	flags       Flags     // parse mode flags
 	stack       []*Regexp // stack of parsed expressions
 	free        *Regexp
 	numCap      int // number of capturing groups seen
 	wholeRegexp string
-	tmpClass    []rune          // temporary char class work space
-	numRegexp   int             // number of regexps allocated
-	height      map[*Regexp]int // regexp height for height limit check
+	tmpClass    []rune            // temporary char class work space
+	numRegexp   int               // number of regexps allocated
+	numRunes    int               // number of runes in char classes
+	repeats     int64             // product of all repetitions seen
+	height      map[*Regexp]int   // regexp height, for height limit check
+	size        map[*Regexp]int64 // regexp compiled size, for size limit check
 }
 
 func (p *parser) newRegexp(op Op) *Regexp {
@@ -122,6 +156,104 @@ func (p *parser) reuse(re *Regexp) {
 	p.free = re
 }
 
+func (p *parser) checkLimits(re *Regexp) {
+	if p.numRunes > maxRunes {
+		panic(ErrInternalError)
+	}
+	p.checkSize(re)
+	p.checkHeight(re)
+}
+
+func (p *parser) checkSize(re *Regexp) {
+	if p.size == nil {
+		// We haven't started tracking size yet.
+		// Do a relatively cheap check to see if we need to start.
+		// Maintain the product of all the repeats we've seen
+		// and don't track if the total number of regexp nodes
+		// we've seen times the repeat product is in budget.
+		if p.repeats == 0 {
+			p.repeats = 1
+		}
+		if re.Op == OpRepeat {
+			n := re.Max
+			if n == -1 {
+				n = re.Min
+			}
+			if n <= 0 {
+				n = 1
+			}
+			if int64(n) > maxSize/p.repeats {
+				p.repeats = maxSize
+			} else {
+				p.repeats *= int64(n)
+			}
+		}
+		if int64(p.numRegexp) < maxSize/p.repeats {
+			return
+		}
+
+		// We need to start tracking size.
+		// Make the map and belatedly populate it
+		// with info about everything we've constructed so far.
+		p.size = make(map[*Regexp]int64)
+		for _, re := range p.stack {
+			p.checkSize(re)
+		}
+	}
+
+	if p.calcSize(re, true) > maxSize {
+		panic(ErrInternalError)
+	}
+}
+
+func (p *parser) calcSize(re *Regexp, force bool) int64 {
+	if !force {
+		if size, ok := p.size[re]; ok {
+			return size
+		}
+	}
+
+	var size int64
+	switch re.Op {
+	case OpLiteral:
+		size = int64(len(re.Rune))
+	case OpCapture, OpStar:
+		// star can be 1+ or 2+; assume 2 pessimistically
+		size = 2 + p.calcSize(re.Sub[0], false)
+	case OpPlus, OpQuest:
+		size = 1 + p.calcSize(re.Sub[0], false)
+	case OpConcat:
+		for _, sub := range re.Sub {
+			size += p.calcSize(sub, false)
+		}
+	case OpAlternate:
+		for _, sub := range re.Sub {
+			size += p.calcSize(sub, false)
+		}
+		if len(re.Sub) > 1 {
+			size += int64(len(re.Sub)) - 1
+		}
+	case OpRepeat:
+		sub := p.calcSize(re.Sub[0], false)
+		if re.Max == -1 {
+			if re.Min == 0 {
+				size = 2 + sub // x*
+			} else {
+				size = 1 + int64(re.Min)*sub // xxx+
+			}
+			break
+		}
+		// x{2,5} = xx(x(x(x)?)?)?
+		size = int64(re.Max)*sub + int64(re.Max-re.Min)
+	}
+
+	if size < 1 {
+		size = 1
+	}
+	p.size[re] = size
+	return size
+}
+
 func (p *parser) checkHeight(re *Regexp) {
 	if p.numRegexp < maxHeight {
 		return
@@ -158,6 +290,7 @@ func (p *parser) calcHeight(re *Regexp, force bool) int {
 
 // push pushes the regexp re onto the parse stack and returns the regexp.
 func (p *parser) push(re *Regexp) *Regexp {
+	p.numRunes += len(re.Rune)
 	if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
 		// Single rune.
 		if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
@@ -189,7 +322,7 @@ func (p *parser) push(re *Regexp) *Regexp {
 	}
 
 	p.stack = append(p.stack, re)
-	p.checkHeight(re)
+	p.checkLimits(re)
 	return re
 }
 
@@ -305,7 +438,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
 	re.Sub = re.Sub0[:1]
 	re.Sub[0] = sub
 	p.stack[n-1] = re
-	p.checkHeight(re)
+	p.checkLimits(re)
 
 	if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
 		return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
@@ -509,6 +642,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
 
 			for j := start; j < i; j++ {
 				sub[j] = p.removeLeadingString(sub[j], len(str))
+				p.checkLimits(sub[j])
 			}
 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
 
@@ -566,6 +700,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
 			for j := start; j < i; j++ {
 				reuse := j != start // prefix came from sub[start]
 				sub[j] = p.removeLeadingRegexp(sub[j], reuse)
+				p.checkLimits(sub[j])
 			}
 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
 
diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
index 1ef6d8a..67e3c56 100644
--- a/src/regexp/syntax/parse_test.go
+++ b/src/regexp/syntax/parse_test.go
@@ -484,12 +484,15 @@ var invalidRegexps = []string{
 	`(?P<>a)`,
 	`[a-Z]`,
 	`(?i)[a-Z]`,
-	`a{100000}`,
-	`a{100000,}`,
-	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
-	strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
-	strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
 	`\Q\E*`,
+	`a{100000}`,  // too much repetition
+	`a{100000,}`, // too much repetition
+	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",    // too much repetition
+	strings.Repeat("(", 1000) + strings.Repeat(")", 1000),    // too deep
+	strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
+	"(" + strings.Repeat("(xx?)", 1000) + "){1000}",          // too long
+	strings.Repeat("(xx?){1000}", 1000),                      // too long
+	strings.Repeat(`\pL`, 27000),                             // too many runes
 }
 
 var onlyPerl = []string{
-- 
2.25.1