test_ir_builder_security.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """Security-focused tests for native DOCX IR drawing asset handling."""
  2. from __future__ import annotations
  3. import pytest
  4. from lightrag.parser.docx.ir_builder import NativeDocxIRBuilder
  5. def _build_ir(content: str):
  6. return NativeDocxIRBuilder().normalize(
  7. [
  8. {
  9. "uuid": "p1",
  10. "uuid_end": "p1",
  11. "heading": "Section",
  12. "content": content,
  13. "parent_headings": [],
  14. "level": 1,
  15. }
  16. ],
  17. document_name="doc.docx",
  18. asset_dir_name="doc.blocks.assets",
  19. )
  20. @pytest.mark.offline
  21. def test_native_docx_ir_accepts_safe_local_drawing_asset() -> None:
  22. ir = _build_ir(
  23. '<drawing id="1" name="fig" format="png" ' 'path="doc.blocks.assets/fig.png" />'
  24. )
  25. drawing = ir.blocks[0].drawings[0]
  26. assert drawing.asset_ref == "fig.png"
  27. assert drawing.path_override is None
  28. assert len(ir.assets) == 1
  29. assert ir.assets[0].ref == "fig.png"
  30. assert ir.assets[0].suggested_name == "fig.png"
  31. assert ir.assets[0].source is None
  32. @pytest.mark.offline
  33. @pytest.mark.parametrize(
  34. "path",
  35. [
  36. "doc.blocks.assets/../secret.png",
  37. "doc.blocks.assets//tmp/secret.png",
  38. r"doc.blocks.assets/..\secret.png",
  39. ],
  40. )
  41. def test_native_docx_ir_rejects_unsafe_local_drawing_asset(path: str) -> None:
  42. ir = _build_ir(f'<drawing id="1" name="fig" format="png" path="{path}" />')
  43. drawing = ir.blocks[0].drawings[0]
  44. assert drawing.asset_ref == ""
  45. assert drawing.path_override is None
  46. assert ir.assets == []
  47. @pytest.mark.offline
  48. def test_native_docx_ir_preserves_non_asset_external_drawing_path() -> None:
  49. ir = _build_ir(
  50. '<drawing id="1" name="fig" format="gif" ' 'path="../images/legacy.gif" />'
  51. )
  52. drawing = ir.blocks[0].drawings[0]
  53. assert drawing.asset_ref == ""
  54. assert drawing.path_override == "../images/legacy.gif"
  55. assert ir.assets == []