meta-python/recipes-devtools/python/python3-django/CVE-2023-43665.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199

From b269a0063e9b10a6c88c92b24d1b92c7421950de Mon Sep 17 00:00:00 2001
From: Natalia <124304+nessita@users.noreply.github.com>
Date: Wed, 29 Nov 2023 12:20:01 +0000
Subject: [PATCH 1/2] Fixed CVE-2023-43665 -- Mitigated potential DoS in
 django.utils.text.Truncator when truncating HTML text.

Thanks Wenchao Li of Alibaba Group for the report.

CVE: CVE-2023-43665

Upstream-Status: Backport [https://github.com/django/django/commit/ccdade1a0262537868d7ca64374de3d957ca50c5]

Signed-off-by: Narpat Mali <narpat.mali@windriver.com>
---
 django/utils/text.py            | 18 ++++++++++++++++-
 docs/ref/templates/builtins.txt | 20 +++++++++++++++++++
 docs/releases/2.2.28.txt        | 20 +++++++++++++++++++
 tests/utils_tests/test_text.py  | 35 ++++++++++++++++++++++++---------
 4 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/django/utils/text.py b/django/utils/text.py
index 1fae7b2..06a377b 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -57,7 +57,14 @@ def wrap(text, width):
 class Truncator(SimpleLazyObject):
     """
     An object used to truncate text, either by characters or words.
+
+    When truncating HTML text (either chars or words), input will be limited to
+    at most `MAX_LENGTH_HTML` characters.
     """
+
+    # 5 million characters are approximately 4000 text pages or 3 web pages.
+    MAX_LENGTH_HTML = 5_000_000
+
     def __init__(self, text):
         super().__init__(lambda: str(text))

@@ -154,6 +161,11 @@ class Truncator(SimpleLazyObject):
         if words and length <= 0:
             return ''

+        size_limited = False
+        if len(text) > self.MAX_LENGTH_HTML:
+            text = text[: self.MAX_LENGTH_HTML]
+            size_limited = True
+
         html4_singlets = (
             'br', 'col', 'link', 'base', 'img',
             'param', 'area', 'hr', 'input'
@@ -203,10 +215,14 @@ class Truncator(SimpleLazyObject):
                 # Add it to the start of the open tags list
                 open_tags.insert(0, tagname)

+        truncate_text = self.add_truncation_text("", truncate)
+
         if current_len <= length:
+            if size_limited and truncate_text:
+                text += truncate_text
             return text
+
         out = text[:end_text_pos]
-        truncate_text = self.add_truncation_text('', truncate)
         if truncate_text:
             out += truncate_text
         # Close any tags still open
diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt
index c4b0fa3..4faab38 100644
--- a/docs/ref/templates/builtins.txt
+++ b/docs/ref/templates/builtins.txt
@@ -2318,6 +2318,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be

 Newlines in the HTML content will be preserved.

+.. admonition:: Size of input string
+
+    Processing large, potentially malformed HTML strings can be
+    resource-intensive and impact service performance. ``truncatechars_html``
+    limits input to the first five million characters.
+
+.. versionchanged:: 2.2.28
+
+    In older versions, strings over five million characters were processed.
+
 .. templatefilter:: truncatewords

 ``truncatewords``
@@ -2356,6 +2366,16 @@ If ``value`` is ``"<p>Joel is a slug</p>"``, the output will be

 Newlines in the HTML content will be preserved.

+.. admonition:: Size of input string
+
+    Processing large, potentially malformed HTML strings can be
+    resource-intensive and impact service performance. ``truncatewords_html``
+    limits input to the first five million characters.
+
+.. versionchanged:: 2.2.28
+
+    In older versions, strings over five million characters were processed.
+
 .. templatefilter:: unordered_list

 ``unordered_list``
diff --git a/docs/releases/2.2.28.txt b/docs/releases/2.2.28.txt
index 40eb230..6a38e9c 100644
--- a/docs/releases/2.2.28.txt
+++ b/docs/releases/2.2.28.txt
@@ -56,3 +56,23 @@ CVE-2023-41164: Potential denial of service vulnerability in ``django.utils.enco
 ``django.utils.encoding.uri_to_iri()`` was subject to potential denial of
 service attack via certain inputs with a very large number of Unicode
 characters.
+
+Backporting the CVE-2023-43665 fix on Django 2.2.28.
+
+CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator``
+================================================================================
+
+Following the fix for :cve:`2019-14232`, the regular expressions used in the
+implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()``
+methods (with ``html=True``) were revised and improved. However, these regular
+expressions still exhibited linear backtracking complexity, so when given a
+very long, potentially malformed HTML input, the evaluation would still be
+slow, leading to a potential denial of service vulnerability.
+
+The ``chars()`` and ``words()`` methods are used to implement the
+:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template
+filters, which were thus also vulnerable.
+
+The input processed by ``Truncator``, when operating in HTML mode, has been
+limited to the first five million characters in order to avoid potential
+performance and memory issues.
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index 27e440b..cb3063d 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -1,5 +1,6 @@
 import json
 import sys
+from unittest.mock import patch

 from django.core.exceptions import SuspiciousFileOperation
 from django.test import SimpleTestCase
@@ -87,11 +88,17 @@ class TestUtilsText(SimpleTestCase):
         # lazy strings are handled correctly
         self.assertEqual(text.Truncator(lazystr('The quick brown fox')).chars(10), 'The quick…')

-    def test_truncate_chars_html(self):
+    @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
+    def test_truncate_chars_html_size_limit(self):
+        max_len = text.Truncator.MAX_LENGTH_HTML
+        bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
+        valid_html = "<p>Joel is a slug</p>"  # 14 chars
         perf_test_values = [
-            (('</a' + '\t' * 50000) + '//>', None),
-            ('&' * 50000, '&' * 9 + '…'),
-            ('_X<<<<<<<<<<<>', None),
+            ("</a" + "\t" * (max_len - 6) + "//>", None),
+            ("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * 6 + "…"),
+            ("&" * bigger_len, "&" * 9 + "…"),
+            ("_X<<<<<<<<<<<>", None),
+            (valid_html * bigger_len, "<p>Joel is a…</p>"),  # 10 chars
         ]
         for value, expected in perf_test_values:
             with self.subTest(value=value):
@@ -149,15 +156,25 @@ class TestUtilsText(SimpleTestCase):
         truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
         self.assertEqual('<p>I &lt;3 python,…</p>', truncator.words(3, html=True))

+    @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000)
+    def test_truncate_words_html_size_limit(self):
+        max_len = text.Truncator.MAX_LENGTH_HTML
+        bigger_len = text.Truncator.MAX_LENGTH_HTML + 1
+        valid_html = "<p>Joel is a slug</p>"  # 4 words
         perf_test_values = [
-            ('</a' + '\t' * 50000) + '//>',
-            '&' * 50000,
-            '_X<<<<<<<<<<<>',
+            ("</a" + "\t" * (max_len - 6) + "//>", None),
+            ("</p" + "\t" * bigger_len + "//>", "</p" + "\t" * (max_len - 3) + "…"),
+            ("&" * max_len, None),  # no change
+            ("&" * bigger_len, "&" * max_len + "…"),
+            ("_X<<<<<<<<<<<>", None),
+            (valid_html * bigger_len, valid_html * 12 + "<p>Joel is…</p>"),  # 50 words
         ]
-        for value in perf_test_values:
+        for value, expected in perf_test_values:
             with self.subTest(value=value):
                 truncator = text.Truncator(value)
-                self.assertEqual(value, truncator.words(50, html=True))
+                self.assertEqual(
+                    expected if expected else value, truncator.words(50, html=True)
+                )

     def test_wrap(self):
         digits = '1234 67 9'
--
2.40.0