"""Placeholder token rendering for spec-shaped multimodal tags. Adapters populate :attr:`IRBlock.content_template` with ``{{TBL:k}}``, ``{{IMG:k}}``, ``{{EQ:k}}`` and ``{{EQI:k}}`` tokens. The writer assigns ``tb-`` / ``im-`` / ``eq-`` ids, then calls :func:`render_template` to substitute the spec-shaped XML-style tags described in ``LightRAGSidecarFormat-zh.md`` §3.3. """ from __future__ import annotations import json import re from typing import Callable _TOKEN_RE = re.compile(r"\{\{(TBL|IMG|EQ|EQI):([A-Za-z0-9_\-]+)\}\}") def xml_attr_escape(value: str) -> str: """Escape an attribute value for an XML-style tag attribute.""" return ( str(value) .replace("&", "&") .replace("<", "<") .replace(">", ">") .replace('"', """) ) def caption_attr(caption: str) -> str: """Render a leading-space ``caption="..."`` attribute; empty when absent. Matches the existing native_docx adapter convention exactly so consumers that grep for ``caption="``-prefixed substrings keep working. """ return f' caption="{xml_attr_escape(caption)}"' if caption else "" def render_table_tag(table_id: str, fmt: str, body: str) -> str: """``body
`` per spec §3.3. ``body`` is the table content; for ``json`` it is the JSON array, for ``html`` it is the raw ``...
`` HTML inside (the outer wrapper is added here). """ return ( f'{body}
' ) def render_drawing_tag( drawing_id: str, fmt: str, caption: str, path: str, src: str, ) -> str: """````.""" return ( f'' ) def render_equation_tag( eq_id: str | None, latex: str, caption: str = "", ) -> str: """Block equation: ``latex``. Inline equation (``eq_id is None``): ``latex`` — no id, never written to ``equations.json``. Caption is preserved for both forms (spec §3.3 allows ``caption`` on ````). """ if eq_id is None: return f'{latex}' return ( f'{latex}' ) def render_template( template: str, *, table_renderer: Callable[[str], str], drawing_renderer: Callable[[str], str], equation_renderer: Callable[[str], str], inline_equation_renderer: Callable[[str], str], ) -> str: """Replace ``{{TBL:k}}`` / ``{{IMG:k}}`` / ``{{EQ:k}}`` / ``{{EQI:k}}``. Each renderer takes the placeholder *key* (the ``k`` portion) and returns the rendered XML-style tag. """ def _replace(match: "re.Match[str]") -> str: kind, key = match.group(1), match.group(2) if kind == "TBL": return table_renderer(key) if kind == "IMG": return drawing_renderer(key) if kind == "EQ": return equation_renderer(key) return inline_equation_renderer(key) return _TOKEN_RE.sub(_replace, template) def table_body_for_rows(rows: list[list[str]]) -> str: """Encode rows as the JSON body that lives inside ````.""" return json.dumps(rows, ensure_ascii=False)