test_atomic_write_nano.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. """Atomicity coverage for the NanoVectorDB save path.
  2. ``NanoVectorDB.save()`` is third-party and writes via plain
  3. ``open(self.storage_file, "w") + json.dump``. ``NanoVectorDBStorage`` wraps
  4. that call by swapping ``storage_file`` to a per-writer tmp under
  5. ``atomic_write``. The contract we have to preserve is:
  6. 1. On success: the destination file is updated, no tmp residue remains.
  7. 2. On failure: the destination is unchanged, ``client.storage_file`` is
  8. restored to the real path (so subsequent reads don't keep pointing at
  9. the tmp).
  10. """
  11. import json
  12. import os
  13. from unittest.mock import patch
  14. import pytest
  15. nano_vectordb = pytest.importorskip("nano_vectordb") # noqa: F841
  16. from lightrag.file_atomic import atomic_write # noqa: E402
  17. from nano_vectordb import NanoVectorDB # noqa: E402
  18. def _make_client(tmp_path) -> tuple[NanoVectorDB, str]:
  19. target = str(tmp_path / "vdb_test.json")
  20. client = NanoVectorDB(4, storage_file=target)
  21. return client, target
  22. def _save_atomic(client: NanoVectorDB, target: str) -> None:
  23. """Mirrors the production callback's save lambda — kept inline so the
  24. test exercises the exact swap-and-restore pattern."""
  25. def _do(tmp: str) -> None:
  26. original = client.storage_file
  27. client.storage_file = tmp
  28. try:
  29. client.save()
  30. finally:
  31. client.storage_file = original
  32. atomic_write(target, _do)
  33. @pytest.mark.offline
  34. def test_nano_save_atomic_publishes_file_and_restores_storage_file(tmp_path):
  35. client, target = _make_client(tmp_path)
  36. _save_atomic(client, target)
  37. assert os.path.exists(target)
  38. assert client.storage_file == target
  39. # Sanity check the payload is valid JSON written by NanoVectorDB.
  40. json.load(open(target))
  41. leftovers = [p for p in os.listdir(tmp_path) if ".tmp." in p]
  42. assert leftovers == [], f"Unexpected tmp residue: {leftovers}"
  43. @pytest.mark.offline
  44. def test_nano_save_atomic_replace_crash_preserves_prior(tmp_path):
  45. client, target = _make_client(tmp_path)
  46. _save_atomic(client, target)
  47. original_payload = open(target).read()
  48. with patch(
  49. "lightrag.file_atomic.os.replace",
  50. side_effect=OSError("simulated crash"),
  51. ):
  52. with pytest.raises(OSError, match="simulated crash"):
  53. _save_atomic(client, target)
  54. # Destination unchanged.
  55. assert open(target).read() == original_payload
  56. # storage_file must have been restored even though the outer atomic_write
  57. # raised — otherwise NanoVectorDB would silently start writing to a path
  58. # that no longer exists.
  59. assert client.storage_file == target
  60. leftovers = [p for p in os.listdir(tmp_path) if ".tmp." in p]
  61. assert leftovers == [], f"Python-exception path must clean tmp, got {leftovers}"
  62. @pytest.mark.offline
  63. def test_nano_save_failure_inside_save_restores_storage_file(tmp_path):
  64. """If ``NanoVectorDB.save()`` itself raises (e.g. encoding failure), the
  65. inner ``finally`` must restore ``storage_file`` before the exception
  66. bubbles up through ``atomic_write``."""
  67. client, target = _make_client(tmp_path)
  68. # Patch the third-party save to blow up.
  69. with patch.object(NanoVectorDB, "save", side_effect=RuntimeError("boom")):
  70. with pytest.raises(RuntimeError, match="boom"):
  71. _save_atomic(client, target)
  72. assert (
  73. client.storage_file == target
  74. ), "Inner save failure must still restore storage_file to the real path"
  75. leftovers = [p for p in os.listdir(tmp_path) if ".tmp." in p]
  76. assert leftovers == [], f"save failure must clean tmp, got {leftovers}"