__init__.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. """Adapters for external document parsing services.
  2. Each subpackage under ``parser/external/`` integrates one external parser
  3. (docling, mineru, ...) by handling:
  4. - request/upload/poll choreography against the parser's HTTP API,
  5. - on-disk caching of the raw bundle under ``<base>.<engine>_raw/``,
  6. - normalization into LightRAG IR (``IRDoc``) for the sidecar writer.
  7. Shared cross-engine helpers (size/hash, atomic manifest IO, safe zip
  8. extraction, env coercion) live at this package root in private modules
  9. prefixed ``_``. Engine-specific cache validation, manifest construction,
  10. and IR adaptation live in each subpackage.
  11. """
  12. from lightrag.parser.external._common import (
  13. clear_dir_contents,
  14. compute_size_and_hash,
  15. env_bool,
  16. env_int,
  17. env_json,
  18. raw_dir_for_parsed_dir,
  19. )
  20. from lightrag.parser.external._manifest import (
  21. MANIFEST_FILENAME,
  22. MANIFEST_VERSION,
  23. Manifest,
  24. ManifestFile,
  25. load_manifest,
  26. manifest_path,
  27. write_manifest,
  28. )
  29. from lightrag.parser.external._zip import safe_extract_zip
  30. __all__ = [
  31. "MANIFEST_FILENAME",
  32. "MANIFEST_VERSION",
  33. "Manifest",
  34. "ManifestFile",
  35. "clear_dir_contents",
  36. "compute_size_and_hash",
  37. "env_bool",
  38. "env_int",
  39. "env_json",
  40. "load_manifest",
  41. "manifest_path",
  42. "raw_dir_for_parsed_dir",
  43. "safe_extract_zip",
  44. "write_manifest",
  45. ]