aboutsummaryrefslogtreecommitdiffstats
path: root/meta-python/recipes-devtools/python/python3-lxml/CVE-2022-2309.patch
blob: 5ec55dfd2a33a235a3904d9045fbe281e3dacd19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Fri, 1 Jul 2022 21:06:10 +0200
Subject: [PATCH] Fix a crash when incorrect parser input occurs together with
 usages of iterwalk() on trees generated by the same parser.

CVE: CVE-2022-2309

Upstream-Status: Backport
[https://github.com/lxml/lxml/commit/86368e9cf70a0ad23cccd5ee32de847149af0c6f]

Signed-off-by: Yue Tao <yue.tao@windriver.com>

---
 src/lxml/apihelpers.pxi      |  7 ++++---
 src/lxml/iterparse.pxi       | 11 ++++++-----
 src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index c1662762..9fae9fb1 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
     while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
         c_ns = c_node.nsDef
         while c_ns is not NULL:
-            prefix = funicodeOrNone(c_ns.prefix)
-            if prefix not in nsmap:
-                nsmap[prefix] = funicodeOrNone(c_ns.href)
+            if c_ns.prefix or c_ns.href:
+                prefix = funicodeOrNone(c_ns.prefix)
+                if prefix not in nsmap:
+                    nsmap[prefix] = funicodeOrNone(c_ns.href)
             c_ns = c_ns.next
         c_node = c_node.parent
     return nsmap
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
index 138c23a6..a7299da6 100644
--- a/src/lxml/iterparse.pxi
+++ b/src/lxml/iterparse.pxi
@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
     count = 0
     c_ns = c_node.nsDef
     while c_ns is not NULL:
-        count += 1
+        count += (c_ns.href is not NULL)
         c_ns = c_ns.next
     return count
 
@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
     count = 0
     c_ns = c_node.nsDef
     while c_ns is not NULL:
-        ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
-                    funicode(c_ns.href))
-        event_list.append( (u"start-ns", ns_tuple) )
-        count += 1
+        if c_ns.href:
+            ns_tuple = (funicodeOrEmpty(c_ns.prefix),
+                        funicode(c_ns.href))
+            event_list.append( (u"start-ns", ns_tuple) )
+            count += 1
         c_ns = c_ns.next
     return count
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index e5f08469..285313f6 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase):
             [1,2,1,4],
             counts)
 
+    def test_walk_after_parse_failure(self):
+        # This used to be an issue because libxml2 can leak empty namespaces
+        # between failed parser runs.  iterwalk() failed to handle such a tree.
+        try:
+            etree.XML('''<anot xmlns="1">''')
+        except etree.XMLSyntaxError:
+            pass
+        else:
+            assert False, "invalid input did not fail to parse"
+
+        et = etree.XML('''<root>  </root>''')
+        try:
+            ns = next(etree.iterwalk(et, events=('start-ns',)))
+        except StopIteration:
+            # This would be the expected result, because there was no namespace
+            pass
+        else:
+            # This is a bug in libxml2
+            assert not ns, repr(ns)
+
     def test_itertext_comment_pi(self):
         # https://bugs.launchpad.net/lxml/+bug/1844674
         XML = self.etree.XML
-- 
2.17.1