test_parse_docling_sidecar.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. """Integration tests for ``parse_docling`` with the unified sidecar pipeline.
  2. Stubs :class:`DoclingRawClient.download_into` so no real docling-serve is
  3. contacted; the focus is on:
  4. - happy path: cache miss → fake bundle written → sidecar emitted with all
  5. expected files at the spec-compliant locations
  6. - cache hit: a pre-existing valid ``*.docling_raw/`` + manifest causes
  7. ``DoclingRawClient.download_into`` NOT to be called
  8. - ``LIGHTRAG_FORCE_REPARSE_DOCLING=true`` forces a re-download even when
  9. the manifest is valid
  10. - source content swap → cache miss
  11. - options_signature change (``DOCLING_OCR_LANG`` toggle) → cache miss
  12. - adapter sees zero blocks → parse fails loudly (no half-baked sidecar)
  13. """
  14. from __future__ import annotations
  15. import asyncio
  16. import json
  17. from datetime import datetime, timezone
  18. from pathlib import Path
  19. from typing import Any
  20. import numpy as np
  21. import pytest
  22. from lightrag import LightRAG
  23. from lightrag.constants import FULL_DOCS_FORMAT_LIGHTRAG
  24. from lightrag.parser.external import (
  25. Manifest,
  26. ManifestFile,
  27. compute_size_and_hash,
  28. write_manifest,
  29. )
  30. from lightrag.parser.external.docling.cache import (
  31. compute_options_signature,
  32. snapshot_tunable_env,
  33. )
  34. from lightrag.parser.external.docling.client import FIXED_CONSTANTS
  35. from lightrag.utils import EmbeddingFunc, Tokenizer
  36. class _SimpleTokenizerImpl:
  37. def encode(self, content: str) -> list[int]:
  38. return [ord(ch) for ch in content]
  39. def decode(self, tokens: list[int]) -> str:
  40. return "".join(chr(t) for t in tokens)
  41. async def _mock_embedding(texts: list[str]) -> np.ndarray:
  42. return np.random.rand(len(texts), 32)
  43. async def _mock_llm(prompt: Any, **kwargs: Any) -> str:
  44. return '{"name":"x","summary":"s","detail_description":"d"}'
  45. def _new_rag(tmp_path: Path) -> LightRAG:
  46. return LightRAG(
  47. working_dir=str(tmp_path),
  48. workspace=f"test-docling-sidecar-{tmp_path.name}",
  49. llm_model_func=_mock_llm,
  50. embedding_func=EmbeddingFunc(
  51. embedding_dim=32,
  52. max_token_size=4096,
  53. func=_mock_embedding,
  54. ),
  55. tokenizer=Tokenizer("mock-tokenizer", _SimpleTokenizerImpl()),
  56. vlm_process_enable=False,
  57. )
  58. _FAKE_DOCLING_JSON = {
  59. "schema_name": "DoclingDocument",
  60. "version": "1.10.0",
  61. "origin": {"filename": "demo.pdf", "mimetype": "application/pdf"},
  62. "body": {
  63. "self_ref": "#/body",
  64. "children": [
  65. {"$ref": "#/texts/0"},
  66. ],
  67. "content_layer": "body",
  68. "label": "unspecified",
  69. },
  70. "groups": [],
  71. "texts": [
  72. {
  73. "self_ref": "#/texts/0",
  74. "label": "section_header",
  75. "text": "Intro",
  76. "orig": "Intro",
  77. "level": 1,
  78. "content_layer": "body",
  79. "children": [
  80. {"$ref": "#/texts/1"},
  81. {"$ref": "#/tables/0"},
  82. {"$ref": "#/pictures/0"},
  83. {"$ref": "#/texts/2"},
  84. ],
  85. "prov": [
  86. {
  87. "page_no": 1,
  88. "bbox": {
  89. "l": 10.0,
  90. "t": 100.0,
  91. "r": 200.0,
  92. "b": 80.0,
  93. "coord_origin": "BOTTOMLEFT",
  94. },
  95. "charspan": [0, 5],
  96. }
  97. ],
  98. },
  99. {
  100. "self_ref": "#/texts/1",
  101. "label": "text",
  102. "text": "Body paragraph.",
  103. "orig": "Body paragraph.",
  104. "content_layer": "body",
  105. "prov": [
  106. {
  107. "page_no": 1,
  108. "bbox": {
  109. "l": 10.0,
  110. "t": 60.0,
  111. "r": 200.0,
  112. "b": 40.0,
  113. "coord_origin": "BOTTOMLEFT",
  114. },
  115. "charspan": [0, 15],
  116. }
  117. ],
  118. },
  119. {
  120. "self_ref": "#/texts/2",
  121. "label": "formula",
  122. "text": "E = mc^2",
  123. "orig": "E = mc^2",
  124. "content_layer": "body",
  125. "prov": [],
  126. },
  127. ],
  128. "tables": [
  129. {
  130. "self_ref": "#/tables/0",
  131. "label": "table",
  132. "content_layer": "body",
  133. "data": {
  134. "num_rows": 2,
  135. "num_cols": 2,
  136. "grid": [
  137. [{"text": "h1"}, {"text": "h2"}],
  138. [{"text": "a"}, {"text": "b"}],
  139. ],
  140. },
  141. "prov": [],
  142. }
  143. ],
  144. "pictures": [
  145. {
  146. "self_ref": "#/pictures/0",
  147. "label": "picture",
  148. "content_layer": "body",
  149. "image": {"uri": "artifacts/img_000000.png", "mimetype": "image/png"},
  150. "prov": [],
  151. }
  152. ],
  153. "key_value_items": [],
  154. "form_items": [],
  155. "pages": {"1": {"size": {"width": 612.0, "height": 792.0}, "page_no": 1}},
  156. }
  157. def _install_fake_download(monkeypatch: pytest.MonkeyPatch) -> dict[str, int]:
  158. """Replace ``DoclingRawClient.download_into`` with a recorder that
  159. writes a synthetic raw bundle and a valid manifest."""
  160. import lightrag.parser.external.docling.client as client_mod
  161. counters = {"calls": 0}
  162. async def _fake_download(self, raw_dir: Path, source_file_path: Path, **_kwargs):
  163. counters["calls"] += 1
  164. raw_dir.mkdir(parents=True, exist_ok=True)
  165. main_json = raw_dir / "demo.json"
  166. main_json.write_text(json.dumps(_FAKE_DOCLING_JSON), encoding="utf-8")
  167. (raw_dir / "demo.md").write_text("# fake md", encoding="utf-8")
  168. art = raw_dir / "artifacts"
  169. art.mkdir(exist_ok=True)
  170. (art / "img_000000.png").write_bytes(b"\x89PNG fake")
  171. src_size, src_hash = compute_size_and_hash(source_file_path)
  172. crit_size, crit_hash = compute_size_and_hash(main_json)
  173. others = [
  174. ManifestFile(path="demo.md", size=(raw_dir / "demo.md").stat().st_size),
  175. ManifestFile(
  176. path="artifacts/img_000000.png",
  177. size=(art / "img_000000.png").stat().st_size,
  178. ),
  179. ]
  180. options_signature = compute_options_signature(
  181. tunable_env=snapshot_tunable_env(),
  182. fixed_constants=FIXED_CONSTANTS,
  183. )
  184. manifest = Manifest(
  185. engine="docling",
  186. source_content_hash=src_hash,
  187. source_size_bytes=src_size,
  188. source_filename_at_parse=source_file_path.name,
  189. critical_file=ManifestFile(
  190. path="demo.json", size=crit_size, sha256=crit_hash
  191. ),
  192. files=others,
  193. total_size_bytes=crit_size + sum(f.size for f in others),
  194. task_id=f"fake-{counters['calls']}",
  195. endpoint_signature="http://docling.test",
  196. options_signature=options_signature,
  197. extras={"fixed_constants": dict(FIXED_CONSTANTS)},
  198. downloaded_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
  199. )
  200. write_manifest(raw_dir, manifest)
  201. return manifest
  202. monkeypatch.setattr(client_mod.DoclingRawClient, "download_into", _fake_download)
  203. return counters
  204. def _stub_pipeline(monkeypatch: pytest.MonkeyPatch, rag: LightRAG, src: Path) -> None:
  205. """Common pipeline-level stubs: avoid moving the source file and pin
  206. the file resolver to the synthetic path."""
  207. async def _noop_archive(_p: str) -> None:
  208. return None
  209. import lightrag.pipeline as pipeline_module
  210. monkeypatch.setattr(
  211. pipeline_module,
  212. "archive_docx_source_after_full_docs_sync",
  213. _noop_archive,
  214. )
  215. monkeypatch.setattr(rag, "_resolve_source_file_for_parser", lambda _p: str(src))
  216. def _seed_doc_status(rag: LightRAG, doc_id: str) -> Any:
  217. return rag.doc_status.upsert(
  218. {
  219. doc_id: {
  220. "status": "PARSING",
  221. "content_summary": "",
  222. "content_length": 0,
  223. "chunks_count": 0,
  224. "chunks_list": [],
  225. "created_at": "2026-05-18T00:00:00+00:00",
  226. "updated_at": "2026-05-18T00:00:00+00:00",
  227. "file_path": "demo.pdf",
  228. "track_id": "trk",
  229. "content_hash": "",
  230. "metadata": {},
  231. }
  232. }
  233. )
  234. # ---------------------------------------------------------------------------
  235. # Tests
  236. # ---------------------------------------------------------------------------
  237. @pytest.mark.offline
  238. def test_parse_docling_emits_compliant_sidecar(
  239. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  240. ) -> None:
  241. async def _run() -> None:
  242. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  243. counters = _install_fake_download(monkeypatch)
  244. input_dir = tmp_path / "inputs" / "ws"
  245. input_dir.mkdir(parents=True)
  246. src = input_dir / "demo.pdf"
  247. src.write_bytes(b"PDFPDF" * 256)
  248. rag = _new_rag(tmp_path)
  249. await rag.initialize_storages()
  250. try:
  251. _stub_pipeline(monkeypatch, rag, src)
  252. doc_id = "doc-abcdef0123456789abcdef0123456789"
  253. await _seed_doc_status(rag, doc_id)
  254. parsed = await rag.parse_docling(
  255. doc_id=doc_id,
  256. file_path="demo.pdf",
  257. content_data={},
  258. )
  259. assert counters["calls"] == 1
  260. parsed_dir = Path(parsed["blocks_path"]).parent
  261. assert parsed["parse_format"] == FULL_DOCS_FORMAT_LIGHTRAG
  262. assert parsed_dir.name == "demo.pdf.parsed"
  263. files = {p.name for p in parsed_dir.iterdir() if p.is_file()}
  264. assert "demo.blocks.jsonl" in files
  265. assert "demo.tables.json" in files
  266. assert "demo.drawings.json" in files
  267. assert "demo.equations.json" in files
  268. assert (parsed_dir / "demo.blocks.assets").is_dir()
  269. assert (parsed_dir / "demo.blocks.assets" / "img_000000.png").is_file()
  270. blocks_raw = (parsed_dir / "demo.blocks.jsonl").read_text()
  271. lines = blocks_raw.splitlines()
  272. meta = json.loads(lines[0])
  273. rows = [json.loads(line) for line in lines[1:]]
  274. assert meta["parse_engine"] == "docling"
  275. assert meta["bbox_attributes"] == {"origin": "LEFTBOTTOM"}
  276. assert "max" not in meta["bbox_attributes"]
  277. assert "page_sizes" not in meta["bbox_attributes"]
  278. assert meta["table_file"] is True
  279. assert meta["drawing_file"] is True
  280. assert meta["equation_file"] is True
  281. # No label="title" in the fixture (matches the typical PDF case
  282. # where docling produces only section_headers) → doc_title falls
  283. # back to the document stem.
  284. assert meta["doc_title"] == "demo"
  285. contents = " ".join(row.get("content", "") for row in rows)
  286. assert '<table id="tb-' in contents
  287. assert "<drawing" in contents
  288. assert "<equation" in contents
  289. # Raw bundle preserved next to sidecar
  290. raw_dir = parsed_dir.parent / "demo.pdf.docling_raw"
  291. assert (raw_dir / "_manifest.json").is_file()
  292. assert (raw_dir / "demo.json").is_file()
  293. assert (raw_dir / "demo.md").is_file()
  294. assert (raw_dir / "artifacts" / "img_000000.png").is_file()
  295. # Drawing path correctly resolved
  296. drawings = json.loads((parsed_dir / "demo.drawings.json").read_text())[
  297. "drawings"
  298. ]
  299. (drawing_id, drawing_item) = next(iter(drawings.items()))
  300. assert drawing_id.startswith("im-")
  301. assert drawing_item["path"] == "demo.blocks.assets/img_000000.png"
  302. # Table self_ref propagated
  303. tables = json.loads((parsed_dir / "demo.tables.json").read_text())["tables"]
  304. (_, table_item) = next(iter(tables.items()))
  305. assert table_item.get("self_ref") == "#/tables/0"
  306. finally:
  307. await rag.finalize_storages()
  308. asyncio.new_event_loop().run_until_complete(_run())
  309. @pytest.mark.offline
  310. def test_parse_docling_cache_hit_skips_download(
  311. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  312. ) -> None:
  313. async def _run() -> None:
  314. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  315. counters = _install_fake_download(monkeypatch)
  316. input_dir = tmp_path / "inputs" / "ws"
  317. input_dir.mkdir(parents=True)
  318. src = input_dir / "demo.pdf"
  319. src.write_bytes(b"PDFPDF" * 256)
  320. rag = _new_rag(tmp_path)
  321. await rag.initialize_storages()
  322. try:
  323. _stub_pipeline(monkeypatch, rag, src)
  324. doc_id = "doc-abcdef0123456789abcdef0123456789"
  325. await _seed_doc_status(rag, doc_id)
  326. await rag.parse_docling(
  327. doc_id=doc_id,
  328. file_path="demo.pdf",
  329. content_data={},
  330. )
  331. assert counters["calls"] == 1
  332. await rag.parse_docling(
  333. doc_id=doc_id,
  334. file_path="demo.pdf",
  335. content_data={},
  336. )
  337. assert counters["calls"] == 1, "cache hit must not re-download"
  338. monkeypatch.setenv("LIGHTRAG_FORCE_REPARSE_DOCLING", "true")
  339. await rag.parse_docling(
  340. doc_id=doc_id,
  341. file_path="demo.pdf",
  342. content_data={},
  343. )
  344. assert counters["calls"] == 2
  345. finally:
  346. await rag.finalize_storages()
  347. asyncio.new_event_loop().run_until_complete(_run())
  348. @pytest.mark.offline
  349. def test_parse_docling_cache_invalidates_on_source_change(
  350. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  351. ) -> None:
  352. async def _run() -> None:
  353. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  354. counters = _install_fake_download(monkeypatch)
  355. input_dir = tmp_path / "inputs" / "ws"
  356. input_dir.mkdir(parents=True)
  357. src = input_dir / "demo.pdf"
  358. src.write_bytes(b"PDFPDF" * 256)
  359. rag = _new_rag(tmp_path)
  360. await rag.initialize_storages()
  361. try:
  362. _stub_pipeline(monkeypatch, rag, src)
  363. doc_id = "doc-abcdef0123456789abcdef0123456789"
  364. await _seed_doc_status(rag, doc_id)
  365. await rag.parse_docling(
  366. doc_id=doc_id,
  367. file_path="demo.pdf",
  368. content_data={},
  369. )
  370. assert counters["calls"] == 1
  371. data = src.read_bytes()
  372. src.write_bytes(b"\x00" + data[1:])
  373. await rag.parse_docling(
  374. doc_id=doc_id,
  375. file_path="demo.pdf",
  376. content_data={},
  377. )
  378. assert counters["calls"] == 2
  379. finally:
  380. await rag.finalize_storages()
  381. asyncio.new_event_loop().run_until_complete(_run())
  382. @pytest.mark.offline
  383. def test_parse_docling_options_signature_invalidates_cache(
  384. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  385. ) -> None:
  386. async def _run() -> None:
  387. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  388. counters = _install_fake_download(monkeypatch)
  389. input_dir = tmp_path / "inputs" / "ws"
  390. input_dir.mkdir(parents=True)
  391. src = input_dir / "demo.pdf"
  392. src.write_bytes(b"PDFPDF" * 256)
  393. rag = _new_rag(tmp_path)
  394. await rag.initialize_storages()
  395. try:
  396. _stub_pipeline(monkeypatch, rag, src)
  397. doc_id = "doc-abcdef0123456789abcdef0123456789"
  398. await _seed_doc_status(rag, doc_id)
  399. await rag.parse_docling(
  400. doc_id=doc_id,
  401. file_path="demo.pdf",
  402. content_data={},
  403. )
  404. assert counters["calls"] == 1
  405. # Flip an env var that participates in the options signature
  406. monkeypatch.setenv("DOCLING_OCR_LANG", "en,zh")
  407. await rag.parse_docling(
  408. doc_id=doc_id,
  409. file_path="demo.pdf",
  410. content_data={},
  411. )
  412. assert (
  413. counters["calls"] == 2
  414. ), "DOCLING_OCR_LANG change must invalidate the bundle cache"
  415. finally:
  416. await rag.finalize_storages()
  417. asyncio.new_event_loop().run_until_complete(_run())
  418. @pytest.mark.offline
  419. def test_parse_docling_endpoint_signature_invalidates_cache(
  420. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  421. ) -> None:
  422. async def _run() -> None:
  423. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  424. counters = _install_fake_download(monkeypatch)
  425. input_dir = tmp_path / "inputs" / "ws"
  426. input_dir.mkdir(parents=True)
  427. src = input_dir / "demo.pdf"
  428. src.write_bytes(b"PDFPDF" * 256)
  429. rag = _new_rag(tmp_path)
  430. await rag.initialize_storages()
  431. try:
  432. _stub_pipeline(monkeypatch, rag, src)
  433. doc_id = "doc-abcdef0123456789abcdef0123456789"
  434. await _seed_doc_status(rag, doc_id)
  435. await rag.parse_docling(
  436. doc_id=doc_id,
  437. file_path="demo.pdf",
  438. content_data={},
  439. )
  440. assert counters["calls"] == 1
  441. # Pointing at a different docling-serve instance must not silently
  442. # reuse a bundle that was produced by the previous one.
  443. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling-other.test")
  444. await rag.parse_docling(
  445. doc_id=doc_id,
  446. file_path="demo.pdf",
  447. content_data={},
  448. )
  449. assert (
  450. counters["calls"] == 2
  451. ), "DOCLING_ENDPOINT change must invalidate the bundle cache"
  452. finally:
  453. await rag.finalize_storages()
  454. asyncio.new_event_loop().run_until_complete(_run())
  455. @pytest.mark.offline
  456. def test_parse_docling_zero_blocks_raises(
  457. tmp_path: Path, monkeypatch: pytest.MonkeyPatch
  458. ) -> None:
  459. """When the docling bundle yields no body blocks (e.g. everything was
  460. classified as furniture/background) ``parse_docling`` must fail loudly
  461. so the document is marked failed — never persist a half-baked sidecar.
  462. """
  463. async def _run() -> None:
  464. monkeypatch.setenv("DOCLING_ENDPOINT", "http://docling.test")
  465. # Install a fake download that writes a valid bundle whose body has
  466. # no children — the adapter then produces zero IR blocks.
  467. import lightrag.parser.external.docling.client as client_mod
  468. empty_json: dict[str, Any] = {
  469. "schema_name": "DoclingDocument",
  470. "version": "1.10.0",
  471. "origin": {"filename": "demo.pdf", "mimetype": "application/pdf"},
  472. "body": {
  473. "self_ref": "#/body",
  474. "children": [],
  475. "content_layer": "body",
  476. "label": "unspecified",
  477. },
  478. "groups": [],
  479. "texts": [],
  480. "tables": [],
  481. "pictures": [],
  482. "key_value_items": [],
  483. "form_items": [],
  484. "pages": {},
  485. }
  486. async def _fake_download(
  487. self, raw_dir: Path, source_file_path: Path, **_kwargs
  488. ):
  489. raw_dir.mkdir(parents=True, exist_ok=True)
  490. main_json = raw_dir / "demo.json"
  491. main_json.write_text(json.dumps(empty_json), encoding="utf-8")
  492. (raw_dir / "demo.md").write_text("# empty", encoding="utf-8")
  493. src_size, src_hash = compute_size_and_hash(source_file_path)
  494. crit_size, crit_hash = compute_size_and_hash(main_json)
  495. others = [
  496. ManifestFile(path="demo.md", size=(raw_dir / "demo.md").stat().st_size),
  497. ]
  498. options_signature = compute_options_signature(
  499. tunable_env=snapshot_tunable_env(),
  500. fixed_constants=FIXED_CONSTANTS,
  501. )
  502. manifest = Manifest(
  503. engine="docling",
  504. source_content_hash=src_hash,
  505. source_size_bytes=src_size,
  506. source_filename_at_parse=source_file_path.name,
  507. critical_file=ManifestFile(
  508. path="demo.json", size=crit_size, sha256=crit_hash
  509. ),
  510. files=others,
  511. total_size_bytes=crit_size + sum(f.size for f in others),
  512. task_id="fake-empty",
  513. endpoint_signature="http://docling.test",
  514. options_signature=options_signature,
  515. extras={"fixed_constants": dict(FIXED_CONSTANTS)},
  516. downloaded_at=datetime.now(timezone.utc).isoformat(timespec="seconds"),
  517. )
  518. write_manifest(raw_dir, manifest)
  519. return manifest
  520. monkeypatch.setattr(
  521. client_mod.DoclingRawClient, "download_into", _fake_download
  522. )
  523. input_dir = tmp_path / "inputs" / "ws"
  524. input_dir.mkdir(parents=True)
  525. src = input_dir / "demo.pdf"
  526. src.write_bytes(b"PDFPDF" * 256)
  527. rag = _new_rag(tmp_path)
  528. await rag.initialize_storages()
  529. try:
  530. _stub_pipeline(monkeypatch, rag, src)
  531. doc_id = "doc-abcdef0123456789abcdef0123456789"
  532. await _seed_doc_status(rag, doc_id)
  533. with pytest.raises(ValueError, match="zero blocks"):
  534. await rag.parse_docling(
  535. doc_id=doc_id,
  536. file_path="demo.pdf",
  537. content_data={},
  538. )
  539. # Sidecar must NOT have been emitted: ``write_sidecar`` is reached
  540. # only after the zero-blocks check, so no ``*.blocks.jsonl`` may
  541. # exist anywhere under the workspace.
  542. blocks_files = list(tmp_path.rglob("*.blocks.jsonl"))
  543. assert (
  544. not blocks_files
  545. ), f"sidecar emitted despite zero-blocks failure: {blocks_files}"
  546. finally:
  547. await rag.finalize_storages()
  548. asyncio.new_event_loop().run_until_complete(_run())