티스토리 수익 글 보기

티스토리 수익 글 보기

[6.0.x] Fixed CVE-2025-64460 — Corrected quadratic inner text accumu… · django/django@1dbd07a · GitHub
Skip to content

Commit 1dbd07a

Browse files
shaibnessita
authored andcommitted
[6.0.x] Fixed CVE-2025-64460 — Corrected quadratic inner text accumulation in XML serializer.
Previously, `getInnerText()` recursively used `list.extend()` on strings, which added each character from child nodes as a separate list element. On deeply nested XML content, this caused the overall deserialization work to grow quadratically with input size, potentially allowing disproportionate CPU consumption for crafted XML. The fix separates collection of inner texts from joining them, so that each subtree is joined only once, reducing the complexity to linear in the size of the input. These changes also include a mitigation for a xml.dom.minidom performance issue. Thanks Seokchan Yoon (https://ch4n3.kr/) for report. Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com> Backport of 50efb71 from main.
1 parent 56aea00 commit 1dbd07a

File tree

6 files changed

+119
6
lines changed

6 files changed

+119
6
lines changed

django/core/serializers/xml_serializer.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"""
44

55
import json
6-
from xml.dom import pulldom
6+
from contextlib import contextmanager
7+
from xml.dom import minidom, pulldom
78
from xml.sax import handler
89
from xml.sax.expatreader import ExpatParser as _ExpatParser
910

@@ -15,6 +16,25 @@
1516
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
1617

1718

19+
@contextmanager
20+
def fast_cache_clearing():
21+
"""Workaround for performance issues in minidom document checks.
22+
23+
Speeds up repeated DOM operations by skipping unnecessary full traversal
24+
of the DOM tree.
25+
"""
26+
module_helper_was_lambda = False
27+
if original_fn := getattr(minidom, "_in_document", None):
28+
module_helper_was_lambda = original_fn.__name__ == "<lambda>"
29+
if not module_helper_was_lambda:
30+
minidom._in_document = lambda node: bool(node.ownerDocument)
31+
try:
32+
yield
33+
finally:
34+
if original_fn and not module_helper_was_lambda:
35+
minidom._in_document = original_fn
36+
37+
1838
class Serializer(base.Serializer):
1939
"""Serialize a QuerySet to XML."""
2040

@@ -210,7 +230,8 @@ def _make_parser(self):
210230
def __next__(self):
211231
for event, node in self.event_stream:
212232
if event == "START_ELEMENT" and node.nodeName == "object":
213-
self.event_stream.expandNode(node)
233+
with fast_cache_clearing():
234+
self.event_stream.expandNode(node)
214235
return self._handle_object(node)
215236
raise StopIteration
216237

@@ -397,20 +418,26 @@ def _get_model_from_node(self, node, attr):
397418

398419
def getInnerText(node):
399420
"""Get all the inner text of a DOM node (recursively)."""
421+
inner_text_list = getInnerTextList(node)
422+
return "".join(inner_text_list)
423+
424+
425+
def getInnerTextList(node):
426+
"""Return a list of the inner texts of a DOM node (recursively)."""
400427
# inspired by
401428
# https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
402-
inner_text = []
429+
result = []
403430
for child in node.childNodes:
404431
if (
405432
child.nodeType == child.TEXT_NODE
406433
or child.nodeType == child.CDATA_SECTION_NODE
407434
):
408-
inner_text.append(child.data)
435+
result.append(child.data)
409436
elif child.nodeType == child.ELEMENT_NODE:
410-
inner_text.extend(getInnerText(child))
437+
result.extend(getInnerTextList(child))
411438
else:
412439
pass
413-
return "".join(inner_text)
440+
return result
414441

415442

416443
# Below code based on Christian Heimes' defusedxml

docs/releases/4.2.27.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/releases/5.1.15.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/releases/5.2.9.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
1515
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
1616
PostgreSQL.
1717

18+
CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
19+
=================================================================================
20+
21+
:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
22+
denial-of-service attack due to quadratic time complexity when deserializing
23+
crafted documents containing many nested invalid elements. The internal helper
24+
``django.core.serializers.xml_serializer.getInnerText()`` previously
25+
accumulated inner text inefficiently during recursion. It now collects text per
26+
element, avoiding excessive resource usage.
27+
1828
Bugfixes
1929
========
2030

docs/topics/serialization.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ Identifier Information
173173
.. _jsonl: https://jsonlines.org/
174174
.. _PyYAML: https://pyyaml.org/
175175

176+
.. _serialization-formats-xml:
177+
176178
XML
177179
---
178180

tests/serializers/test_deserialization.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
import json
2+
import time
23
import unittest
34

45
from django.core.serializers.base import DeserializationError, DeserializedObject
56
from django.core.serializers.json import Deserializer as JsonDeserializer
67
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
78
from django.core.serializers.python import Deserializer
9+
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
10+
from django.db import models
811
from django.test import SimpleTestCase
12+
from django.test.utils import garbage_collect
913

1014
from .models import Author
1115

@@ -133,3 +137,53 @@ def test_yaml_bytes_input(self):
133137

134138
self.assertEqual(first_item.object, self.jane)
135139
self.assertEqual(second_item.object, self.joe)
140+
141+
def test_crafted_xml_performance(self):
142+
"""The time to process invalid inputs is not quadratic."""
143+
144+
def build_crafted_xml(depth, leaf_text_len):
145+
nested_open = "<nested>" * depth
146+
nested_close = "</nested>" * depth
147+
leaf = "x" * leaf_text_len
148+
field_content = f"{nested_open}{leaf}{nested_close}"
149+
return f"""
150+
<django-objects version="1.0">
151+
<object model="contenttypes.contenttype" pk="1">
152+
<field name="app_label">{field_content}</field>
153+
<field name="model">m</field>
154+
</object>
155+
</django-objects>
156+
"""
157+
158+
def deserialize(crafted_xml):
159+
iterator = XMLDeserializer(crafted_xml)
160+
garbage_collect()
161+
162+
start_time = time.perf_counter()
163+
result = list(iterator)
164+
end_time = time.perf_counter()
165+
166+
self.assertEqual(len(result), 1)
167+
self.assertIsInstance(result[0].object, models.Model)
168+
return end_time - start_time
169+
170+
def assertFactor(label, params, factor=2):
171+
factors = []
172+
prev_time = None
173+
for depth, length in params:
174+
crafted_xml = build_crafted_xml(depth, length)
175+
elapsed = deserialize(crafted_xml)
176+
if prev_time is not None:
177+
factors.append(elapsed / prev_time)
178+
prev_time = elapsed
179+
180+
with self.subTest(label):
181+
# Assert based on the average factor to reduce test flakiness.
182+
self.assertLessEqual(sum(factors) / len(factors), factor)
183+
184+
assertFactor(
185+
"varying depth, varying length",
186+
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
187+
2,
188+
)
189+
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)

0 commit comments

Comments
 (0)