addon_params.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. """Addon parameters: observable mapping + normalization helper.
  2. ``addon_params`` is a free-form configuration dict on :class:`LightRAG` that
  3. controls things like summary language and entity-type prompt overrides. The
  4. module exposes:
  5. - :class:`ObservableAddonParams` — a ``dict`` subclass that calls a callback
  6. whenever the contents change so the LightRAG runtime can invalidate cached
  7. derived state.
  8. - :func:`default_addon_params` — environment-driven defaults.
  9. - :func:`normalize_addon_params` — converts an arbitrary input into a plain
  10. ``dict`` with the env-driven defaults backfilled.
  11. """
  12. from __future__ import annotations
  13. from typing import Any, Callable, Mapping
  14. from lightrag.constants import DEFAULT_SUMMARY_LANGUAGE
  15. from lightrag.utils import get_env_value, logger
  16. # Keys that used to live in addon_params but have been superseded by
  17. # per-document ``process_options``. We log once when callers still pass them
  18. # so existing configs surface their drift without breaking.
  19. _DEPRECATED_ADDON_PARAM_KEYS: tuple[str, ...] = ("enable_multimodal_pipeline",)
  20. _warned_deprecated_keys: set[str] = set()
  21. def _emit_deprecated_addon_warnings(params: Mapping[str, Any]) -> None:
  22. for key in _DEPRECATED_ADDON_PARAM_KEYS:
  23. if key in params and key not in _warned_deprecated_keys:
  24. logger.warning(
  25. f"addon_params['{key}'] is deprecated and ignored; per-document "
  26. f"behaviour is now controlled by filename-hint process_options "
  27. f"(see docs/FileProcessingConfiguration-zh.md)."
  28. )
  29. _warned_deprecated_keys.add(key)
  30. def default_addon_params() -> dict[str, Any]:
  31. # Lazy import to avoid the parser_routing → utils → … cycle that
  32. # would otherwise form when parser_routing imports back into this
  33. # module via ``LightRAG`` construction paths.
  34. from lightrag.parser.routing import default_chunker_config
  35. return {
  36. "language": get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE, str),
  37. "entity_type_prompt_file": get_env_value("ENTITY_TYPE_PROMPT_FILE", "", str),
  38. # Per-strategy chunker parameters; mutate at runtime (e.g.
  39. # ``rag.addon_params["chunker"]["recursive_character"]["separators"]
  40. # = [...]``) to change defaults applied to subsequently
  41. # enqueued documents. Per-document snapshots are persisted to
  42. # ``full_docs[doc_id]["chunk_options"]`` at enqueue time and
  43. # are not affected by later runtime mutations.
  44. "chunker": default_chunker_config(),
  45. }
  46. def normalize_addon_params(addon_params: Mapping[str, Any] | None) -> dict[str, Any]:
  47. """Coerce ``addon_params`` to a plain dict with env defaults backfilled."""
  48. from lightrag.parser.routing import default_chunker_config
  49. if addon_params is None:
  50. normalized = default_addon_params()
  51. elif isinstance(addon_params, Mapping):
  52. _emit_deprecated_addon_warnings(addon_params)
  53. normalized = {
  54. k: v
  55. for k, v in addon_params.items()
  56. if k not in _DEPRECATED_ADDON_PARAM_KEYS
  57. }
  58. else:
  59. raise TypeError(
  60. "addon_params must be a Mapping or None, got "
  61. f"{type(addon_params).__name__}"
  62. )
  63. # When the caller supplies addon_params explicitly, the dataclass
  64. # default_factory is skipped — fall back to environment variables so
  65. # ENTITY_TYPE_PROMPT_FILE / SUMMARY_LANGUAGE / chunker still apply.
  66. normalized.setdefault(
  67. "language", get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE, str)
  68. )
  69. normalized.setdefault(
  70. "entity_type_prompt_file",
  71. get_env_value("ENTITY_TYPE_PROMPT_FILE", "", str),
  72. )
  73. # Build the chunker default lazily — `default_chunker_config()` reads env
  74. # vars (e.g. CHUNK_R_SEPARATORS via json.loads) and would raise on a
  75. # malformed value, which would prevent an explicit caller-supplied
  76. # `chunker` from bypassing a broken environment.
  77. if "chunker" not in normalized:
  78. normalized["chunker"] = default_chunker_config()
  79. return normalized
  80. class ObservableAddonParams(dict[str, Any]):
  81. def __init__(
  82. self,
  83. *args: Any,
  84. on_change: Callable[[], None] | None = None,
  85. **kwargs: Any,
  86. ) -> None:
  87. super().__init__(*args, **kwargs)
  88. self._on_change = on_change
  89. def _changed(self) -> None:
  90. if self._on_change is not None:
  91. self._on_change()
  92. def __setitem__(self, key: str, value: Any) -> None:
  93. super().__setitem__(key, value)
  94. self._changed()
  95. def __delitem__(self, key: str) -> None:
  96. super().__delitem__(key)
  97. self._changed()
  98. def clear(self) -> None:
  99. if self:
  100. super().clear()
  101. self._changed()
  102. def pop(self, key: str, default: Any = ...):
  103. existed = key in self
  104. if default is ...:
  105. value = super().pop(key)
  106. self._changed()
  107. else:
  108. value = super().pop(key, default)
  109. if existed:
  110. self._changed()
  111. return value
  112. def popitem(self) -> tuple[str, Any]:
  113. item = super().popitem()
  114. self._changed()
  115. return item
  116. def setdefault(self, key: str, default: Any = None) -> Any:
  117. if key in self:
  118. return self[key]
  119. value = super().setdefault(key, default)
  120. self._changed()
  121. return value
  122. def update(self, *args: Any, **kwargs: Any) -> None:
  123. if not args and not kwargs:
  124. return
  125. super().update(*args, **kwargs)
  126. self._changed()
  127. def __ior__(self, other: Mapping[str, Any]):
  128. super().__ior__(other)
  129. self._changed()
  130. return self