sshuair09 hace 3 semanas
padre
commit
e738dff6f2
Se han modificado 100 ficheros con 21018 adiciones y 57 borrados
  1. 257 0
      .clinerules/01-basic.md
  2. 69 0
      .dockerignore
  3. 2 0
      .gitattributes
  4. 163 0
      .github/CONTRIBUTING.md
  5. 61 0
      .github/ISSUE_TEMPLATE/bug_report.yml
  6. 1 0
      .github/ISSUE_TEMPLATE/config.yml
  7. 26 0
      .github/ISSUE_TEMPLATE/feature_request.yml
  8. 26 0
      .github/ISSUE_TEMPLATE/question.yml
  9. 206 0
      .github/dependabot.yml
  10. 32 0
      .github/pull_request_template.md
  11. 58 0
      .github/workflows/copilot-setup-steps.yml
  12. 113 0
      .github/workflows/docker-build-lite.yml
  13. 109 0
      .github/workflows/docker-build-manual.yml
  14. 120 0
      .github/workflows/docker-publish.yml
  15. 134 0
      .github/workflows/linting.yaml
  16. 101 0
      .github/workflows/pypi-publish.yml
  17. 27 0
      .github/workflows/stale.yaml
  18. 61 0
      .github/workflows/tests.yml
  19. 89 0
      .gitignore
  20. 28 0
      .pre-commit-config.yaml
  21. 330 0
      AGENTS.md
  22. 1 0
      CLAUDE.md
  23. 110 0
      Dockerfile
  24. 111 0
      Dockerfile.lite
  25. 43 0
      Dockerfile.postgres
  26. 21 0
      LICENSE
  27. 5 0
      MANIFEST.in
  28. 99 0
      Makefile
  29. 436 0
      README-zh.md
  30. BIN
      README.assets/b2aaf634151b4706892693ffb43d9093.png
  31. BIN
      README.assets/iShot_2025-03-23_12.40.08.png
  32. 402 57
      README.md
  33. 18 0
      SECURITY.md
  34. BIN
      assets/LiteWrite.png
  35. BIN
      assets/logo.png
  36. 52 0
      config.ini.example
  37. 77 0
      docker-build-push.sh
  38. 244 0
      docker-compose-full.yml
  39. 37 0
      docker-compose.podman.yml
  40. 27 0
      docker-compose.yml
  41. 233 0
      docs/AdvancedFeatures.md
  42. 4 0
      docs/Algorithm.md
  43. 179 0
      docs/AsymmetricEmbedding.md
  44. 344 0
      docs/DockerDeployment.md
  45. 853 0
      docs/FileProcessingPipeline-zh.md
  46. 853 0
      docs/FileProcessingPipeline.md
  47. 216 0
      docs/FrontendBuildGuide.md
  48. 303 0
      docs/InteractiveSetup.md
  49. 1155 0
      docs/LightRAG-API-Server-zh.md
  50. BIN
      docs/LightRAG-API-Server.assets/image-20250323122538997.png
  51. BIN
      docs/LightRAG-API-Server.assets/image-20250323122754387.png
  52. BIN
      docs/LightRAG-API-Server.assets/image-20250323123011220.png
  53. BIN
      docs/LightRAG-API-Server.assets/image-20250323194750379.png
  54. 1155 0
      docs/LightRAG-API-Server.md
  55. 399 0
      docs/LightRAGSidecarFormat-zh.md
  56. 399 0
      docs/LightRAGSidecarFormat.md
  57. 197 0
      docs/MilvusConfigurationGuide.md
  58. 382 0
      docs/MultiSiteDeployment.md
  59. 316 0
      docs/OfflineDeployment.md
  60. 404 0
      docs/ParagraphSemanticChunking-zh.md
  61. 404 0
      docs/ParagraphSemanticChunking.md
  62. 129 0
      docs/ParserDebugCLI-zh.md
  63. 129 0
      docs/ParserDebugCLI.md
  64. 1222 0
      docs/ProgramingWithCore.md
  65. 235 0
      docs/Reproduce.md
  66. 376 0
      docs/RoleSpecificLLMConfiguration-zh.md
  67. 376 0
      docs/RoleSpecificLLMConfiguration.md
  68. 170 0
      docs/UV_LOCK_GUIDE.md
  69. 1033 0
      env.docker-compose-full
  70. 989 0
      env.example
  71. 57 0
      examples/generate_query.py
  72. 34 0
      examples/graph_visual_with_html.py
  73. 186 0
      examples/graph_visual_with_neo4j.py
  74. 166 0
      examples/graph_visual_with_opensearch.py
  75. 115 0
      examples/insert_custom_kg.py
  76. 296 0
      examples/lightrag_ag2_multiagent_demo.py
  77. 125 0
      examples/lightrag_azure_openai_demo.py
  78. 122 0
      examples/lightrag_gemini_demo.py
  79. 178 0
      examples/lightrag_gemini_postgres_demo.py
  80. 131 0
      examples/lightrag_gemini_workspace_demo.py
  81. 219 0
      examples/lightrag_ollama_demo.py
  82. 229 0
      examples/lightrag_openai_compatible_demo.py
  83. 187 0
      examples/lightrag_openai_demo.py
  84. 108 0
      examples/lightrag_openai_mongodb_graph_demo.py
  85. 178 0
      examples/lightrag_openai_opensearch_graph_demo.py
  86. 180 0
      examples/lightrag_vllm_demo.py
  87. 113 0
      examples/milvus_kwargs_configuration_demo.py
  88. 355 0
      examples/opensearch_storage_demo.py
  89. 234 0
      examples/rerank_example.py
  90. 114 0
      examples/unofficial-sample/copy_llm_cache_to_another_storage.py
  91. 56 0
      examples/unofficial-sample/lightrag_bedrock_demo.py
  92. 354 0
      examples/unofficial-sample/lightrag_cloudflare_demo.py
  93. 235 0
      examples/unofficial-sample/lightrag_embedding_prefixes.py
  94. 79 0
      examples/unofficial-sample/lightrag_hf_demo.py
  95. 139 0
      examples/unofficial-sample/lightrag_llamaindex_direct_demo.py
  96. 141 0
      examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py
  97. 152 0
      examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py
  98. 107 0
      examples/unofficial-sample/lightrag_lmdeploy_demo.py
  99. 168 0
      examples/unofficial-sample/lightrag_nvidia_demo.py
  100. 109 0
      examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py

+ 257 - 0
.clinerules/01-basic.md

@@ -0,0 +1,257 @@
+# LightRAG Project Intelligence (.clinerules)
+
+## Project Overview
+LightRAG is a mature, production-ready Retrieval-Augmented Generation (RAG) system with comprehensive knowledge graph capabilities. The system has evolved from experimental to production-ready status with extensive functionality across all major components.
+
+## Current System State (August 15, 2025)
+- **Status**: Production Ready - Stable and Mature
+- **Configuration**: Gemini 2.5 Flash + BAAI/bge-m3 embeddings via custom endpoints
+- **Storage**: Default in-memory with file persistence (JsonKVStorage, NetworkXStorage, NanoVectorDBStorage)
+- **Language**: Chinese for summaries
+- **Workspace**: `space1` for data isolation
+- **Authentication**: JWT-based with admin/user accounts
+
+## Critical Implementation Patterns
+
+### 1. Embedding Format Compatibility (CRITICAL)
+**Pattern**: Always handle both base64 and raw array embedding formats
+**Location**: `lightrag/llm/openai.py` - `openai_embed` function
+**Issue**: Custom OpenAI-compatible endpoints return embeddings as raw arrays, not base64 strings
+**Solution**:
+```python
+np.array(dp.embedding, dtype=np.float32) if isinstance(dp.embedding, list)
+else np.frombuffer(base64.b64decode(dp.embedding), dtype=np.float32)
+```
+**Impact**: Document processing fails completely without this dual format support
+
+### 2. Async Pattern Consistency (CRITICAL)
+**Pattern**: Always await coroutines before calling methods on the result
+**Common Error**: `coroutine.method()` instead of `(await coroutine).method()`
+**Locations**: MongoDB implementations, Neo4j operations
+**Example**: `await self._data.list_indexes()` then `await cursor.to_list()`
+
+### 3. Storage Layer Data Compatibility (CRITICAL)
+**Pattern**: Always filter deprecated/incompatible fields during deserialization
+**Common Fields to Remove**: `content`, `_id` (MongoDB), database-specific fields
+**Implementation**: `data.pop('field_name', None)` before creating dataclass objects
+**Locations**: All storage implementations (JSON, Redis, MongoDB, PostgreSQL)
+
+### 4. Lock Key Generation (CRITICAL)
+**Pattern**: Always sort relationship pairs for consistent lock keys
+**Implementation**: `sorted_key_parts = sorted([src, tgt])` then `f"{sorted_key_parts[0]}-{sorted_key_parts[1]}"`
+**Impact**: Prevents deadlocks in concurrent relationship processing
+
+### 5. Event Loop Management (CRITICAL)
+**Pattern**: Handle event loop mismatches during shutdown gracefully
+**Implementation**: Timeout + specific RuntimeError handling for "attached to a different loop"
+**Location**: Neo4j storage finalization
+**Impact**: Prevents application shutdown failures
+
+### 6. Async Generator Lock Management (CRITICAL)
+**Pattern**: Never hold locks across async generator yields - create snapshots instead
+**Issue**: Holding locks while yielding causes deadlock when consumers need the same lock
+**Location**: `lightrag/tools/migrate_llm_cache.py` - `stream_default_caches_json`
+**Solution**: Create snapshot of data while holding lock, release lock, then iterate over snapshot
+```python
+# WRONG - Deadlock prone:
+async with storage._storage_lock:
+    for key, value in storage._data.items():
+        batch[key] = value
+        if len(batch) >= batch_size:
+            yield batch  # Lock still held!
+
+# CORRECT - Snapshot approach:
+async with storage._storage_lock:
+    matching_items = [(k, v) for k, v in storage._data.items() if condition]
+# Lock released here
+for key, value in matching_items:
+    batch[key] = value
+    if len(batch) >= batch_size:
+        yield batch  # No lock held
+```
+**Impact**: Prevents deadlocks in Json→Json migrations and similar scenarios where source/target share locks
+**Applicable To**: Any async generator that needs to access shared resources while yielding
+
+## Architecture Patterns
+
+### 1. Dependency Injection
+**Pattern**: Pass configuration through object constructors, not direct imports
+**Example**: OllamaAPI receives configuration through LightRAG object
+**Benefit**: Better testability and modularity
+
+### 2. Memory Bank Documentation
+**Pattern**: Maintain comprehensive memory bank for development continuity
+**Structure**: Core files (projectbrief.md, activeContext.md, progress.md, etc.)
+**Purpose**: Essential for context preservation across development sessions
+
+### 3. Configuration Management
+**Pattern**: Centralize defaults in constants.py, use environment variables for runtime config
+**Implementation**: Default values in constants, override via .env file
+**Benefit**: Consistent configuration across components
+
+## Development Workflow Patterns
+
+### 1. Frontend Development (CRITICAL)
+**Package Manager**: **ALWAYS USE BUN** - Never use npm or yarn unless Bun is unavailable
+**Commands**:
+- `bun install` - Install dependencies
+- `bun run dev` - Start development server
+- `bun run build` - Build for production
+- `bun run lint` - Run linting
+- `bun test` - Run tests
+- `bun run preview` - Preview production build
+
+**Pattern**: All frontend operations must use Bun commands
+**Fallback**: Only use npm/yarn if Bun installation fails
+**Testing**: Use `bun test` for all frontend testing
+
+### 2. Bug Fix Approach
+1. **Identify root cause** - Don't just fix symptoms
+2. **Implement robust solution** - Handle edge cases and format variations
+3. **Maintain backward compatibility** - Preserve existing functionality
+4. **Add comprehensive error handling** - Graceful degradation
+5. **Document the fix** - Update memory bank with technical details
+
+### 3. Feature Implementation
+1. **Follow existing patterns** - Maintain architectural consistency
+2. **Use dependency injection** - Avoid direct imports between modules
+3. **Implement comprehensive error handling** - Handle all failure modes
+4. **Add proper logging** - Debug and warning messages
+5. **Update documentation** - Memory bank and code comments
+6. **Comment Language** - Use English for comments and documentation
+
+### 4. Performance Optimization
+1. **Profile before optimizing** - Identify actual bottlenecks
+2. **Maintain algorithmic correctness** - Don't sacrifice functionality for speed
+3. **Use appropriate data structures** - Match structure to access patterns
+4. **Implement caching strategically** - Cache expensive operations
+5. **Monitor memory usage** - Prevent memory leaks
+
+### 5. Testing Workflow (CRITICAL)
+**Pattern**: All tests must use pytest markers for proper CI/CD execution
+**Test Categories**:
+- **Offline Tests**: Use `@pytest.mark.offline` - No external dependencies (runs in CI)
+- **Integration Tests**: Use `@pytest.mark.integration` - Requires databases/APIs (skipped by default)
+
+**Commands**:
+- `pytest tests/ -m offline -v` - CI default (~3 seconds for 21 tests)
+- `pytest tests/ --run-integration -v` - Full test suite (all 46 tests)
+
+**Best Practices**:
+1. **Prefer offline tests** - Use mocks for LLM, embeddings, databases
+2. **Mock external dependencies** - AsyncMock for async functions
+3. **Test isolation** - Each test should be independent
+4. **Documentation** - Add docstrings explaining purpose and scope
+
+**Configuration**:
+- `tests/pytest.ini` - Marker definitions and test discovery
+- `tests/conftest.py` - Fixtures and custom options
+- `.github/workflows/tests.yml` - CI/CD workflow (Python 3.10/3.11/3.12)
+
+**Documentation**: See `memory-bank/testing-guidelines.md` for complete testing guidelines
+
+**Impact**: Ensures all tests run reliably in CI without external services while maintaining comprehensive integration test coverage for local development
+
+## Technology Stack Intelligence
+
+### 1. LLM Integration
+- **Primary**: Gemini 2.5 Flash via custom endpoint
+- **Embedding**: BAAI/bge-m3 via custom endpoint
+- **Reranking**: BAAI/bge-reranker-v2-m3
+- **Pattern**: Always handle multiple provider formats
+
+### 2. Storage Backends
+- **Default**: In-memory with file persistence
+- **Production Options**: PostgreSQL, MongoDB, Redis, Neo4j
+- **Pattern**: Abstract storage interface with multiple implementations
+
+### 3. API Architecture
+- **Framework**: FastAPI with Gunicorn for production
+- **Authentication**: JWT-based with role support
+- **Compatibility**: Ollama-compatible endpoints for easy integration
+
+### 4. Frontend
+- **Framework**: React with TypeScript
+- **Package Manager**: **BUN (REQUIRED)** - Always use Bun for all frontend operations
+- **Build Tool**: Vite with Bun runtime
+- **Visualization**: Sigma.js for graph rendering
+- **State Management**: React hooks with context
+- **Internationalization**: i18next for multi-language support
+
+## Common Pitfalls and Solutions
+
+### 1. Embedding Format Issues
+**Pitfall**: Assuming all endpoints return base64-encoded embeddings
+**Solution**: Always check format and handle both base64 and raw arrays
+
+### 2. Async/Await Patterns
+**Pitfall**: Calling methods on coroutines instead of awaited results
+**Solution**: Always await coroutines before accessing their methods
+
+### 3. Data Model Evolution
+**Pitfall**: Breaking changes when removing fields from dataclasses
+**Solution**: Filter deprecated fields during deserialization, don't break storage
+
+### 4. Concurrency Issues
+**Pitfall**: Inconsistent lock key generation causing deadlocks
+**Solution**: Always sort keys for deterministic lock ordering
+
+### 5. Event Loop Management
+**Pitfall**: Event loop mismatches during shutdown
+**Solution**: Implement timeout and specific error handling for loop issues
+
+## Performance Considerations
+
+### 1. Query Context Building
+- **Algorithm**: Linear gradient weighted polling for fair resource allocation
+- **Optimization**: Round-robin merging to eliminate mode bias
+- **Pattern**: Smart chunk selection based on cross-entity occurrence
+
+### 2. Graph Operations
+- **Optimization**: Batch operations where possible
+- **Pattern**: Use appropriate indexing for large datasets
+- **Consideration**: Memory usage with large graphs
+
+### 3. LLM Request Management
+- **Pattern**: Priority-based queue for request ordering
+- **Optimization**: Connection pooling and retry mechanisms
+- **Consideration**: Rate limiting and cost management
+
+## Security Patterns
+
+### 1. Authentication
+- **Implementation**: JWT tokens with role-based access
+- **Pattern**: Stateless authentication with configurable expiration
+- **Security**: Proper token validation and refresh mechanisms
+
+### 2. API Security
+- **Pattern**: Input validation and sanitization
+- **Implementation**: FastAPI dependency injection for auth
+- **Consideration**: Rate limiting and abuse prevention
+
+## Maintenance Guidelines
+
+### 1. Memory Bank Updates
+- **Trigger**: After significant changes or bug fixes
+- **Pattern**: Update activeContext.md and progress.md
+- **Purpose**: Maintain development continuity
+
+### 2. Configuration Management
+- **Pattern**: Environment-based configuration with sensible defaults
+- **Implementation**: .env files with example templates
+- **Consideration**: Security for production deployments
+
+### 3. Error Handling
+- **Pattern**: Comprehensive logging with appropriate levels
+- **Implementation**: Graceful degradation where possible
+- **Consideration**: User-friendly error messages
+
+## Project Evolution Notes
+
+The project has evolved from experimental to production-ready status. Key milestones:
+- **Early 2025**: Basic RAG implementation
+- **Mid 2025**: Multiple storage backends and LLM providers
+- **July 2025**: Major query optimization and algorithm improvements
+- **August 2025**: Production-ready stable state
+
+The system now supports enterprise-level deployments with comprehensive functionality across all components.

+ 69 - 0
.dockerignore

@@ -0,0 +1,69 @@
+# Python-related files and directories
+__pycache__
+.cache
+
+# Virtual environment directories
+*.venv
+
+# Env
+env/
+*.env*
+.env_example
+
+# Distribution / build files
+site
+dist/
+build/
+.eggs/
+*.egg-info/
+*.tgz
+*.tar.gz
+
+# Exclude siles and folders
+*.yml
+.dockerignore
+Dockerfile
+Makefile
+
+# Exclude other projects
+/tests
+/scripts
+/data
+/dickens
+/reproduce
+/output_complete
+/rag_storage
+/inputs
+
+# Python version manager file
+.python-version
+
+# Reports
+*.coverage/
+*.log
+log/
+*.logfire
+
+# Cache
+.cache/
+.mypy_cache
+.pytest_cache
+.ruff_cache
+.gradio
+.logfire
+temp/
+
+# MacOS-related files
+.DS_Store
+
+# VS Code settings (local configuration files)
+.vscode
+
+# file
+TODO.md
+
+# Exclude Git-related files
+.git
+.github
+.gitignore
+.pre-commit-config.yaml

+ 2 - 0
.gitattributes

@@ -0,0 +1,2 @@
+lightrag/api/webui/** binary
+lightrag/api/webui/** linguist-generated

+ 163 - 0
.github/CONTRIBUTING.md

@@ -0,0 +1,163 @@
+# Contributing to LightRAG
+
+Thank you for your interest in contributing! This guide covers everything you need to get started.
+
+## Table of Contents
+
+- [Ways to Contribute](#ways-to-contribute)
+- [Development Setup](#development-setup)
+- [Code Style](#code-style)
+- [Running Tests](#running-tests)
+- [Submitting a Pull Request](#submitting-a-pull-request)
+- [Reporting Bugs](#reporting-bugs)
+- [Requesting Features](#requesting-features)
+
+---
+
+## Ways to Contribute
+
+- **Bug reports** — open an [issue](https://github.com/HKUDS/LightRAG/issues) using the Bug Report template
+- **Feature requests** — open an [issue](https://github.com/HKUDS/LightRAG/issues) using the Feature Request template
+- **Documentation** — fix typos, clarify explanations, or add examples
+- **Code** — fix bugs, implement features, or add storage/LLM backends
+- **Testing** — add test coverage for untested code paths
+
+---
+
+## Development Setup
+
+```bash
+# Clone the repository
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+
+# Install in development mode (requires uv)
+uv sync
+source .venv/bin/activate        # Linux/macOS
+# .venv\Scripts\activate         # Windows
+
+# Install with optional extras as needed
+uv sync --extra api              # FastAPI server
+uv sync --extra test             # Test dependencies
+uv sync --extra offline-storage  # Storage backends
+uv sync --extra offline-llm      # Additional LLM providers
+
+# Set up pre-commit hooks (run once)
+pip install pre-commit
+pre-commit install
+```
+
+---
+
+## Code Style
+
+This project uses [Ruff](https://docs.astral.sh/ruff/) for formatting and linting, enforced via [pre-commit](https://pre-commit.com/).
+
+### Automatic fixing
+
+Running `pre-commit run --all-files` will automatically fix most style issues:
+
+```bash
+# Fix all files
+pre-commit run --all-files
+
+# Fix only staged files (faster during development)
+pre-commit run
+```
+
+### What is checked
+
+| Hook | What it does |
+|------|-------------|
+| `trailing-whitespace` | Removes trailing whitespace |
+| `end-of-file-fixer` | Ensures files end with a newline |
+| `requirements-txt-fixer` | Keeps `requirements.txt` entries sorted |
+| `ruff-format` | Formats Python code (Black-compatible) |
+| `ruff` | Fixes Python lint errors |
+
+### CI check
+
+The same checks run automatically on every pull request. If the CI check fails, run `pre-commit run --all-files` locally, commit the fixes, and push again.
+
+### Language conventions
+
+- **Python code and comments**: English
+- **Frontend (WebUI)**: uses i18next for internationalization — add translation keys rather than hardcoding strings
+
+---
+
+## Running Tests
+
+```bash
+# Run offline tests (no external services required)
+python -m pytest tests
+
+# Run integration tests (requires configured external services)
+python -m pytest tests --run-integration
+
+# Run a specific test file
+python -m pytest tests/chunker/test_chunking.py
+
+# Keep test artifacts for debugging
+python -m pytest tests --keep-artifacts
+```
+
+Set `LIGHTRAG_RUN_INTEGRATION=true` as an environment variable as an alternative to `--run-integration`.
+
+---
+
+## Submitting a Pull Request
+
+1. **Fork** the repository and create a branch from `main`:
+   ```bash
+   git checkout -b fix/your-descriptive-branch-name
+   ```
+
+2. **Make your changes** and ensure:
+   - Pre-commit checks pass: `pre-commit run --all-files`
+   - Relevant tests pass: `python -m pytest tests`
+   - New behavior is covered by tests where applicable
+
+3. **Commit** with a clear message describing *why* the change was made:
+   ```bash
+   git commit -m "fix: handle permission-only encrypted PDFs without password"
+   ```
+
+4. **Push** and open a pull request against `main`. Fill out the pull request template completely.
+
+5. **Respond to review feedback** — a maintainer will review your PR and may request changes.
+
+### Pull request checklist
+
+- [ ] Changes tested locally
+- [ ] Pre-commit checks pass (`pre-commit run --all-files`)
+- [ ] Unit/integration tests added or updated where applicable
+- [ ] Documentation updated if behavior changes
+- [ ] PR description explains the *why*, not just the *what*
+
+---
+
+## Reporting Bugs
+
+Please use the [Bug Report issue template](https://github.com/HKUDS/LightRAG/issues/new?template=bug_report.yml). Include:
+
+- LightRAG version and Python version
+- Storage backend and LLM provider being used
+- Minimal reproducible example
+- Full error traceback
+
+---
+
+## Requesting Features
+
+Please use the [Feature Request issue template](https://github.com/HKUDS/LightRAG/issues/new?template=feature_request.yml). Describe:
+
+- The problem you're trying to solve
+- Your proposed solution
+- Any alternatives you've considered
+
+---
+
+## Questions
+
+For usage questions, check the [Discussions](https://github.com/HKUDS/LightRAG/discussions) tab or open a [Question issue](https://github.com/HKUDS/LightRAG/issues/new?template=question.yml).

+ 61 - 0
.github/ISSUE_TEMPLATE/bug_report.yml

@@ -0,0 +1,61 @@
+name: Bug Report
+description: File a bug report
+title: "[Bug]:"
+labels: ["bug", "triage"]
+
+body:
+  - type: checkboxes
+    id: existingcheck
+    attributes:
+      label: Do you need to file an issue?
+      description: Please help us manage our time by avoiding duplicates and common bugs with the steps below.
+      options:
+        - label: I have searched the existing issues and this bug is not already filed.
+        - label: I believe this is a legitimate bug, not just a question or feature request.
+  - type: textarea
+    id: description
+    attributes:
+      label: Describe the bug
+      description: A clear and concise description of what the bug is.
+      placeholder: What went wrong?
+  - type: textarea
+    id: reproduce
+    attributes:
+      label: Steps to reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: How can we replicate the issue?
+  - type: textarea
+    id: expected_behavior
+    attributes:
+      label: Expected Behavior
+      description: A clear and concise description of what you expected to happen.
+      placeholder: What should have happened?
+  - type: textarea
+    id: configused
+    attributes:
+      label: LightRAG Config Used
+      description: The LightRAG configuration used for the run.
+      placeholder: The settings content or LightRAG configuration
+      value: |
+        # Paste your config here
+  - type: textarea
+    id: screenshotslogs
+    attributes:
+      label: Logs and screenshots
+      description: If applicable, add screenshots and logs to help explain your problem.
+      placeholder: Add logs and screenshots here
+  - type: textarea
+    id: additional_information
+    attributes:
+      label: Additional Information
+      description: |
+        - LightRAG Version: e.g., v0.1.1
+        - Operating System: e.g., Windows 10, Ubuntu 20.04
+        - Python Version: e.g., 3.8
+        - Related Issues: e.g., #1
+        - Any other relevant information.
+      value: |
+        - LightRAG Version:
+        - Operating System:
+        - Python Version:
+        - Related Issues:

+ 1 - 0
.github/ISSUE_TEMPLATE/config.yml

@@ -0,0 +1 @@
+blank_issues_enabled: false

+ 26 - 0
.github/ISSUE_TEMPLATE/feature_request.yml

@@ -0,0 +1,26 @@
+name: Feature Request
+description: File a feature request
+labels: ["enhancement"]
+title: "[Feature Request]:"
+
+body:
+  - type: checkboxes
+    id: existingcheck
+    attributes:
+      label: Do you need to file a feature request?
+      description: Please help us manage our time by avoiding duplicates and common feature request with the steps below.
+      options:
+        - label: I have searched the existing feature request and this feature request is not already filed.
+        - label: I believe this is a legitimate feature request, not just a question or bug.
+  - type: textarea
+    id: feature_request_description
+    attributes:
+      label: Feature Request Description
+      description: A clear and concise description of the feature request you would like.
+      placeholder: What this feature request add more or improve?
+  - type: textarea
+    id: additional_context
+    attributes:
+      label: Additional Context
+      description: Add any other context or screenshots about the feature request here.
+      placeholder: Any additional information

+ 26 - 0
.github/ISSUE_TEMPLATE/question.yml

@@ -0,0 +1,26 @@
+name: Question
+description: Ask a general question
+labels: ["question"]
+title: "[Question]:"
+
+body:
+  - type: checkboxes
+    id: existingcheck
+    attributes:
+      label: Do you need to ask a question?
+      description: Please help us manage our time by avoiding duplicates and common questions with the steps below.
+      options:
+        - label: I have searched the existing question and discussions and this question is not already answered.
+        - label: I believe this is a legitimate question, not just a bug or feature request.
+  - type: textarea
+    id: question
+    attributes:
+      label: Your Question
+      description: A clear and concise description of your question.
+      placeholder: What is your question?
+  - type: textarea
+    id: context
+    attributes:
+      label: Additional Context
+      description: Provide any additional context or details that might help us understand your question better.
+      placeholder: Add any relevant information here

+ 206 - 0
.github/dependabot.yml

@@ -0,0 +1,206 @@
+# Keep GitHub Actions up to date with GitHub's Dependabot...
+# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
+version: 2
+updates:
+  # ============================================================
+  # GitHub Actions
+  # PR Strategy:
+  #   - All updates (major/minor/patch): Grouped into a single PR
+  # ============================================================
+  - package-ecosystem: github-actions
+    directory: /
+    groups:
+      github-actions:
+        patterns:
+          - "*"  # Group all Actions updates into a single larger pull request
+    schedule:
+      interval: weekly
+      day: monday
+      time: "02:00"
+      timezone: "Asia/Shanghai"
+    labels:
+      - "dependencies"
+      - "github-actions"
+    open-pull-requests-limit: 2
+
+  # ============================================================
+  # Python (pip) Dependencies
+  # PR Strategy:
+  #   - Major updates: Individual PR per package (except numpy which is ignored)
+  #   - Minor updates: Grouped by category (llm-providers, storage, etc.)
+  #   - Patch updates: Grouped by category
+  # ============================================================
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "wednesday"
+      time: "02:00"
+      timezone: "Asia/Shanghai"
+    cooldown:
+      default-days: 5
+      semver-major-days: 30
+      semver-minor-days: 7
+      semver-patch-days: 3
+    groups:
+      # Core dependencies - LLM providers and embeddings
+      llm-providers:
+        patterns:
+          - "openai"
+          - "anthropic"
+          - "google-*"
+          - "boto3"
+          - "botocore"
+          - "ollama"
+        update-types:
+          - "minor"
+          - "patch"
+      # Storage backends
+      storage:
+        patterns:
+          - "neo4j"
+          - "pymongo"
+          - "redis"
+          - "psycopg*"
+          - "asyncpg"
+          - "milvus*"
+          - "qdrant*"
+        update-types:
+          - "minor"
+          - "patch"
+      # Data processing and ML
+      data-processing:
+        patterns:
+          - "numpy"
+          - "scipy"
+          - "pandas"
+          - "tiktoken"
+          - "transformers"
+          - "torch*"
+        update-types:
+          - "minor"
+          - "patch"
+      # Web framework and API
+      web-framework:
+        patterns:
+          - "fastapi"
+          - "uvicorn"
+          - "gunicorn"
+          - "starlette"
+          - "pydantic*"
+        update-types:
+          - "minor"
+          - "patch"
+      # Development and testing tools
+      dev-tools:
+        patterns:
+          - "pytest*"
+          - "ruff"
+          - "pre-commit"
+          - "black"
+          - "mypy"
+        update-types:
+          - "minor"
+          - "patch"
+      # Minor and patch updates for everything else
+      python-minor-patch:
+        patterns:
+          - "*"
+        update-types:
+          - "minor"
+          - "patch"
+    ignore:
+      - dependency-name: "numpy"
+        update-types:
+          - "version-update:semver-major"
+    labels:
+      - "dependencies"
+      - "python"
+    open-pull-requests-limit: 5
+
+  # ============================================================
+  # Frontend (bun) Dependencies
+  # PR Strategy:
+  #   - Major updates: Individual PR per package
+  #   - Minor updates: Grouped by category (react, ui-components, etc.)
+  #   - Patch updates: Grouped by category
+  # ============================================================
+  - package-ecosystem: "bun"
+    directory: "/lightrag_webui"
+    schedule:
+      interval: "weekly"
+      day: "friday"
+      time: "02:00"
+      timezone: "Asia/Shanghai"
+    cooldown:
+      default-days: 5
+      semver-major-days: 30
+      semver-minor-days: 7
+      semver-patch-days: 3
+    groups:
+      # React ecosystem
+      react:
+        patterns:
+          - "react"
+          - "react-dom"
+          - "react-router*"
+          - "@types/react*"
+        update-types:
+          - "minor"
+          - "patch"
+      # UI components and styling
+      ui-components:
+        patterns:
+          - "@radix-ui/*"
+          - "tailwind*"
+          - "@tailwindcss/*"
+          - "lucide-react"
+          - "class-variance-authority"
+          - "clsx"
+        update-types:
+          - "minor"
+          - "patch"
+      # Graph visualization
+      graph-viz:
+        patterns:
+          - "sigma"
+          - "@sigma/*"
+          - "graphology*"
+        update-types:
+          - "minor"
+          - "patch"
+      # Build tools and dev dependencies
+      build-tools:
+        patterns:
+          - "vite"
+          - "@vitejs/*"
+          - "typescript"
+          - "eslint*"
+          - "@eslint/*"
+          - "typescript-eslint"
+          - "prettier"
+          - "prettier-*"
+          - "@types/bun"
+        update-types:
+          - "minor"
+          - "patch"
+      # Content rendering libraries (math, diagrams, etc.)
+      content-rendering:
+        patterns:
+          - "katex"
+          - "mermaid"
+        update-types:
+          - "minor"
+          - "patch"
+      # All other minor and patch updates
+      frontend-minor-patch:
+        patterns:
+          - "*"
+        update-types:
+          - "minor"
+          - "patch"
+    labels:
+      - "dependencies"
+      - "frontend"
+    open-pull-requests-limit: 5

+ 32 - 0
.github/pull_request_template.md

@@ -0,0 +1,32 @@
+<!--
+Thanks for contributing to LightRAG!
+
+Please ensure your pull request is ready for review before submitting.
+
+About this template
+
+This template helps contributors provide a clear and concise description of their changes. Feel free to adjust it as needed.
+-->
+
+## Description
+
+[Briefly describe the changes made in this pull request.]
+
+## Related Issues
+
+[Reference any related issues or tasks addressed by this pull request.]
+
+## Changes Made
+
+[List the specific changes made in this pull request.]
+
+## Checklist
+
+- [ ] Changes tested locally
+- [ ] Code reviewed
+- [ ] Documentation updated (if necessary)
+- [ ] Unit tests added (if applicable)
+
+## Additional Notes
+
+[Add any additional notes or context for the reviewer(s).]

+ 58 - 0
.github/workflows/copilot-setup-steps.yml

@@ -0,0 +1,58 @@
+name: "Copilot Setup Steps"
+
+# Automatically run the setup steps when they are changed to allow for easy validation, and
+# allow manual testing through the repository's "Actions" tab
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - .github/workflows/copilot-setup-steps.yml
+  pull_request:
+    paths:
+      - .github/workflows/copilot-setup-steps.yml
+
+jobs:
+  # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot.
+  copilot-setup-steps:
+    runs-on: ubuntu-latest
+
+    # Timeout after 30 minutes (maximum is 59)
+    timeout-minutes: 30
+
+    # You can define any steps you want, and they will run before the agent starts.
+    # If you do not check out your code, Copilot will do this for you.
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.11'
+
+      - name: Cache pip packages
+        uses: actions/cache@v5
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-copilot-${{ hashFiles('**/pyproject.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-copilot-
+            ${{ runner.os }}-pip-
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[api]"
+          pip install pytest pytest-asyncio httpx
+
+      - name: Create minimal frontend stub for Copilot agent
+        run: |
+          mkdir -p lightrag/api/webui
+          echo '<!DOCTYPE html><html><head><title>LightRAG - Copilot Agent</title></head><body><h1>Copilot Agent Mode</h1></body></html>' > lightrag/api/webui/index.html
+          echo "Created minimal frontend stub for Copilot agent environment"
+
+      - name: Verify installation
+        run: |
+          python --version
+          pip list | grep lightrag
+          lightrag-server --help || echo "Note: Server requires .env configuration to run"

+ 113 - 0
.github/workflows/docker-build-lite.yml

@@ -0,0 +1,113 @@
+name: Build Lite Docker Image
+
+on:
+  workflow_dispatch:
+    inputs:
+      _notes_:
+        description: '⚠️ Create lite Docker images only after non-trivial version releases.'
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+  id-token: write
+  packages: write
+
+jobs:
+  build-and-push-lite:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.x"
+
+      - name: Get latest tag
+        id: get_tag
+        run: |
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+          if [ -z "$LATEST_TAG" ]; then
+            LATEST_TAG="sha-$(git rev-parse --short HEAD)"
+            echo "No tags found, using commit SHA: $LATEST_TAG"
+          else
+            echo "Latest tag found: $LATEST_TAG"
+          fi
+          PACKAGE_VERSION="${LATEST_TAG#v}"
+          echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
+          echo "package_version=$PACKAGE_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Prepare lite tag
+        id: lite_tag
+        run: |
+          LITE_TAG="${{ steps.get_tag.outputs.tag }}-lite"
+          echo "Lite image tag: $LITE_TAG"
+          echo "lite_tag=$LITE_TAG" >> $GITHUB_OUTPUT
+
+      - name: Update version definitions
+        run: |
+          python scripts/release/set_version.py --core-version "${{ steps.get_tag.outputs.package_version }}"
+          grep '__version__ = ' lightrag/_version.py
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v4
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install cosign
+        uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6  # v4.1.2
+
+      - name: Extract metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v6
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=${{ steps.lite_tag.outputs.lite_tag }}
+            type=raw,value=lite
+
+      - name: Build and push lite Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v7
+        with:
+          context: .
+          file: ./Dockerfile.lite
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=min
+
+      - name: Sign lite Docker image
+        if: steps.build-and-push.outputs.digest != ''
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+          TAGS: ${{ steps.meta.outputs.tags }}
+        run: |
+          set -euo pipefail
+          echo "Signing manifest digest: $DIGEST"
+          while IFS= read -r tag; do
+            if [ -z "$tag" ]; then
+              continue
+            fi
+            echo "Signing ${tag}@${DIGEST}"
+            cosign sign --yes "${tag}@${DIGEST}"
+          done <<< "$TAGS"
+
+      - name: Output image details
+        run: |
+          echo "Lite Docker image built and pushed successfully!"
+          echo "Image tag: ghcr.io/${{ github.repository }}:${{ steps.lite_tag.outputs.lite_tag }}"
+          echo "Signed manifest digest: ${{ steps.build-and-push.outputs.digest }}"
+          echo "Base Git tag used: ${{ steps.get_tag.outputs.tag }}"

+ 109 - 0
.github/workflows/docker-build-manual.yml

@@ -0,0 +1,109 @@
+name: Build Test Docker Image manually
+
+on:
+  workflow_dispatch:
+    inputs:
+      _notes_:
+        description: '⚠️ Please create a new git tag before building the docker image.'
+        required: false
+        type: boolean
+        default: false
+
+permissions:
+  contents: read
+  id-token: write
+  packages: write
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # Fetch all history for tags
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.x"
+
+      - name: Get latest tag
+        id: get_tag
+        run: |
+          # Get the latest tag, fallback to commit SHA if no tags exist
+          LATEST_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+          if [ -z "$LATEST_TAG" ]; then
+            LATEST_TAG="sha-$(git rev-parse --short HEAD)"
+            echo "No tags found, using commit SHA: $LATEST_TAG"
+          else
+            echo "Latest tag found: $LATEST_TAG"
+          fi
+          PACKAGE_VERSION="${LATEST_TAG#v}"
+          echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
+          echo "image_tag=$LATEST_TAG" >> $GITHUB_OUTPUT
+          echo "package_version=$PACKAGE_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Update version definitions
+        run: |
+          python scripts/release/set_version.py --core-version "${{ steps.get_tag.outputs.package_version }}"
+          echo "Updated version definitions with ${{ steps.get_tag.outputs.package_version }}"
+          grep '__version__ = ' lightrag/_version.py
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v4
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install cosign
+        uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6  # v4.1.2
+
+      - name: Extract metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v6
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=${{ steps.get_tag.outputs.tag }}
+
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v7
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Sign Docker image
+        if: steps.build-and-push.outputs.digest != ''
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+          TAGS: ${{ steps.meta.outputs.tags }}
+        run: |
+          set -euo pipefail
+          echo "Signing manifest digest: $DIGEST"
+          while IFS= read -r tag; do
+            if [ -z "$tag" ]; then
+              continue
+            fi
+            echo "Signing ${tag}@${DIGEST}"
+            cosign sign --yes "${tag}@${DIGEST}"
+          done <<< "$TAGS"
+
+      - name: Output image details
+        run: |
+          echo "Docker image built and pushed successfully!"
+          echo "Image tags:"
+          echo "  - ghcr.io/${{ github.repository }}:${{ steps.get_tag.outputs.tag }}"
+          echo "Signed manifest digest: ${{ steps.build-and-push.outputs.digest }}"
+          echo "Latest Git tag used: ${{ steps.get_tag.outputs.tag }}"

+ 120 - 0
.github/workflows/docker-publish.yml

@@ -0,0 +1,120 @@
+name: Build Latest Docker Image on Release
+
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  id-token: write
+  packages: write
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # Fetch all history for tags
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.x"
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v4
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v4
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install cosign
+        uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6  # v4.1.2
+
+      - name: Get latest tag
+        id: get_tag
+        run: |
+          if [ "${{ github.event_name }}" = "release" ] && [ -n "${{ github.event.release.tag_name }}" ]; then
+            TAG="${{ github.event.release.tag_name }}"
+          else
+            TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+          fi
+          if [ -z "$TAG" ]; then
+            echo "No git tag found for docker publish"
+            exit 1
+          fi
+          PACKAGE_VERSION="${TAG#v}"
+          echo "Found tag: $TAG"
+          echo "tag=$TAG" >> $GITHUB_OUTPUT
+          echo "package_version=$PACKAGE_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Check if pre-release
+        id: check_prerelease
+        run: |
+          TAG="${{ steps.get_tag.outputs.tag }}"
+          if [[ "$TAG" == *"rc"* ]] || [[ "$TAG" == *"dev"* ]]; then
+            echo "is_prerelease=true" >> $GITHUB_OUTPUT
+            echo "This is a pre-release version: $TAG"
+          else
+            echo "is_prerelease=false" >> $GITHUB_OUTPUT
+            echo "This is a stable release: $TAG"
+          fi
+
+      - name: Update version definitions
+        run: |
+          python scripts/release/set_version.py --core-version "${{ steps.get_tag.outputs.package_version }}"
+          echo "Updated version definitions with ${{ steps.get_tag.outputs.package_version }}"
+          grep '__version__ = ' lightrag/_version.py
+
+      - name: Extract metadata for Docker
+        id: meta
+        uses: docker/metadata-action@v6
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: |
+            type=raw,value=${{ steps.get_tag.outputs.tag }}
+            type=raw,value=latest,enable=${{ steps.check_prerelease.outputs.is_prerelease == 'false' }}
+
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v7
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Sign Docker image
+        if: steps.build-and-push.outputs.digest != ''
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+          TAGS: ${{ steps.meta.outputs.tags }}
+        run: |
+          set -euo pipefail
+          echo "Signing manifest digest: $DIGEST"
+          while IFS= read -r tag; do
+            if [ -z "$tag" ]; then
+              continue
+            fi
+            echo "Signing ${tag}@${DIGEST}"
+            cosign sign --yes "${tag}@${DIGEST}"
+          done <<< "$TAGS"
+
+      - name: Output image details
+        run: |
+          echo "Docker image built and pushed successfully!"
+          echo "Image tags:"
+          echo "  - ghcr.io/${{ github.repository }}:${{ steps.get_tag.outputs.tag }}"
+          echo "  - ghcr.io/${{ github.repository }}:latest"
+          echo "Signed manifest digest: ${{ steps.build-and-push.outputs.digest }}"
+          echo "Latest Git tag used: ${{ steps.get_tag.outputs.tag }}"

+ 134 - 0
.github/workflows/linting.yaml

@@ -0,0 +1,134 @@
+name: Linting and Formatting
+
+on:
+    push:
+        branches: [ main, dev ]
+    pull_request:
+        branches: [ main, dev ]
+        types: [opened, synchronize, reopened, ready_for_review]
+
+permissions:
+    pull-requests: write
+
+jobs:
+    lint-and-format:
+        name: Linting and Formatting
+        if: ${{ github.event_name != 'pull_request' || !github.event.pull_request.draft }}
+        runs-on: ubuntu-latest
+
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@v6
+
+            - name: Set up Python
+              uses: actions/setup-python@v6
+              with:
+                python-version: '3.x'
+
+            - name: Install dependencies
+              run: |
+                python -m pip install --upgrade pip
+                pip install pre-commit
+
+            - name: Run pre-commit
+              id: pre-commit
+              run: pre-commit run --all-files --show-diff-on-failure
+
+            - name: Post fix instructions on failure
+              if: failure() && steps.pre-commit.outcome == 'failure'
+              run: |
+                cat >> "$GITHUB_STEP_SUMMARY" << 'EOF'
+                ## ❌ Linting / Formatting checks failed
+
+                Pre-commit found issues in your code. Fix them locally and push again:
+
+                ```bash
+                # Install pre-commit (one-time setup)
+                pip install pre-commit
+                pre-commit install
+
+                # Auto-fix all issues
+                pre-commit run --all-files
+
+                # Commit the fixes
+                git add -u
+                git commit -m "fix: apply pre-commit formatting fixes"
+                git push
+                ```
+
+                ### What was checked
+
+                | Hook | Tool | What it fixes |
+                |------|------|---------------|
+                | `trailing-whitespace` | pre-commit-hooks | Removes trailing whitespace |
+                | `end-of-file-fixer` | pre-commit-hooks | Ensures files end with a newline |
+                | `requirements-txt-fixer` | pre-commit-hooks | Sorts requirements.txt entries |
+                | `ruff-format` | Ruff | Auto-formats Python code (like Black) |
+                | `ruff` | Ruff | Fixes Python lint errors (`--fix`) |
+
+                > See the diff above for the exact changes needed.
+                EOF
+
+            - name: Comment on PR with fix instructions
+              if: failure() && steps.pre-commit.outcome == 'failure' && github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+              uses: actions/github-script@v9
+              with:
+                script: |
+                  const body = `## ❌ Linting / Formatting checks failed
+
+                  Pre-commit found issues in your code. Run the following locally, then push again:
+
+                  \`\`\`bash
+                  # Install pre-commit (one-time setup)
+                  pip install pre-commit
+                  pre-commit install
+
+                  # Auto-fix all issues
+                  pre-commit run --all-files
+
+                  # Commit the fixes
+                  git add -u
+                  git commit -m "fix: apply pre-commit formatting fixes"
+                  git push
+                  \`\`\`
+
+                  <details>
+                  <summary>What was checked</summary>
+
+                  | Hook | Tool | What it fixes |
+                  |------|------|---------------|
+                  | \`trailing-whitespace\` | pre-commit-hooks | Removes trailing whitespace |
+                  | \`end-of-file-fixer\` | pre-commit-hooks | Ensures files end with a newline |
+                  | \`requirements-txt-fixer\` | pre-commit-hooks | Sorts requirements.txt entries |
+                  | \`ruff-format\` | Ruff | Auto-formats Python code (like Black) |
+                  | \`ruff\` | Ruff | Fixes Python lint errors (\`--fix\`) |
+
+                  </details>
+
+                  > See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for the exact diff of required changes.`;
+
+                  // Find existing bot comment to avoid duplicates
+                  const { data: comments } = await github.rest.issues.listComments({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    issue_number: context.issue.number,
+                  });
+                  const existing = comments.find(c =>
+                    c.user.type === 'Bot' && c.body.includes('Linting / Formatting checks failed')
+                  );
+
+                  if (existing) {
+                    await github.rest.issues.updateComment({
+                      owner: context.repo.owner,
+                      repo: context.repo.repo,
+                      comment_id: existing.id,
+                      body,
+                    });
+                  } else {
+                    await github.rest.issues.createComment({
+                      owner: context.repo.owner,
+                      repo: context.repo.repo,
+                      issue_number: context.issue.number,
+                      body,
+                    });
+                  }

+ 101 - 0
.github/workflows/pypi-publish.yml

@@ -0,0 +1,101 @@
+name: Upload LightRAG-hku Package
+
+on:
+  release:
+    types: [published]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  release-build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0  # Fetch all history for tags
+
+      # Build frontend WebUI
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Build Frontend WebUI
+        run: |
+          cd lightrag_webui
+          bun install --frozen-lockfile
+          bun run build
+          cd ..
+
+      - name: Verify Frontend Build
+        run: |
+          if [ ! -f "lightrag/api/webui/index.html" ]; then
+            echo "❌ Error: Frontend build failed - index.html not found"
+            exit 1
+          fi
+          echo "✅ Frontend build verified"
+          echo "Frontend files:"
+          ls -lh lightrag/api/webui/ | head -10
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.x"
+
+      - name: Resolve release version
+        id: get_version
+        run: |
+          if [ "${{ github.event_name }}" = "release" ] && [ -n "${{ github.event.release.tag_name }}" ]; then
+            TAG="${{ github.event.release.tag_name }}"
+          else
+            TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
+          fi
+          if [ -z "$TAG" ]; then
+            echo "No git tag found for release build"
+            exit 1
+          fi
+          PACKAGE_VERSION="${TAG#v}"
+          echo "Found tag: $TAG"
+          echo "Package version: $PACKAGE_VERSION"
+          echo "version=$TAG" >> $GITHUB_OUTPUT
+          echo "package_version=$PACKAGE_VERSION" >> $GITHUB_OUTPUT
+
+      - name: Update version definitions
+        run: |
+          python scripts/release/set_version.py --core-version "${{ steps.get_version.outputs.package_version }}"
+          grep '__version__ = ' lightrag/_version.py
+
+      - name: Build release distributions
+        run: |
+          python -m pip install build
+          python -m build
+
+      - name: Upload distributions
+        uses: actions/upload-artifact@v7
+        with:
+          name: release-dists
+          path: dist/
+
+  pypi-publish:
+    runs-on: ubuntu-latest
+    needs:
+      - release-build
+    permissions:
+      id-token: write
+
+    environment:
+      name: pypi
+
+    steps:
+      - name: Retrieve release distributions
+        uses: actions/download-artifact@v8
+        with:
+          name: release-dists
+          path: dist/
+
+      - name: Publish release distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/

+ 27 - 0
.github/workflows/stale.yaml

@@ -0,0 +1,27 @@
+# .github/workflows/stale.yml
+name: Mark stale issues and pull requests
+
+on:
+  schedule:
+    - cron: '30 22 * * *' # run at 22:30+08 every day
+
+permissions:
+  issues: write
+  pull-requests: write
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v10
+        with:
+          days-before-stale: 90 # 90 days
+          days-before-close: 7 # 7 days after marked as stale
+          stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
+          close-issue-message: 'This issue has been automatically closed because it has not had recent activity. Please open a new issue if you still have this problem.'
+          stale-pr-message: 'This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs.'
+          close-pr-message: 'This pull request has been automatically closed because it has not had recent activity.'
+          # If there are specific labels, exempt them from being marked as stale, for example:
+          exempt-issue-labels: 'enhancement,tracked'
+          # exempt-pr-labels: 'bug,enhancement,help wanted'
+          repo-token: ${{ secrets.GITHUB_TOKEN }} # token provided by GitHub

+ 61 - 0
.github/workflows/tests.yml

@@ -0,0 +1,61 @@
+name: Offline Unit Tests
+
+on:
+  push:
+    branches: [ main, dev ]
+  pull_request:
+    branches: [ main, dev ]
+    types: [opened, synchronize, reopened, ready_for_review]
+
+jobs:
+  offline-tests:
+    name: Offline Tests
+    if: ${{ github.event_name != 'pull_request' || !github.event.pull_request.draft }}
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        python-version: ['3.12', '3.14']
+
+    steps:
+    - uses: actions/checkout@v6
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v6
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip packages
+      uses: actions/cache@v5
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements*.txt', '**/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        # Install optional-storage and optional-llm deps too — the offline
+        # test suite contains mock-based unit tests that construct real
+        # data classes from these libraries (qdrant-client, pymongo,
+        # anthropic, boto3, etc.). Without them, importorskip falls back
+        # and the tests are silently skipped.
+        pip install -e ".[api,offline-storage,offline-llm]"
+        pip install pytest pytest-asyncio
+
+    - name: Run offline tests
+      run: |
+        # Run only tests marked as 'offline' (no external dependencies)
+        # Integration tests requiring databases/APIs are skipped by default
+        pytest tests/ -m offline -v --tb=short
+
+    - name: Upload test results
+      if: always()
+      uses: actions/upload-artifact@v7
+      with:
+        name: test-results-py${{ matrix.python-version }}
+        path: |
+          .pytest_cache/
+          test-results.xml
+        retention-days: 7

+ 89 - 0
.gitignore

@@ -0,0 +1,89 @@
+# Python-related files
+__pycache__/
+*.py[cod]
+*.egg-info/
+.eggs/
+*.tgz
+*.tar.gz
+*.ini
+
+# Virtual Environment
+.venv/
+venv/
+
+# Enviroment Variable Files
+.env
+.env.backup.*
+
+# Generated Docker Compose files (output of setup wizard)
+docker-compose.*.yml
+!docker-compose.podman.yml
+
+# Build / Distribution
+dist/
+build/
+site/
+
+# Logs / Reports
+*.log
+*.log.*
+*.logfire
+*.coverage/
+log/
+
+# Caches
+.cache/
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+.gradio/
+.history/
+temp/
+
+# IDE / Editor Files
+.idea/
+.vscode/
+.vscode/settings.json
+
+# Framework-specific files
+local_neo4jWorkDir/
+neo4jWorkDir/
+
+# Data & Storage
+inputs/
+output/
+rag_storage/
+data/
+
+# User cumstomized prompt directory
+prompts/entity_type/
+
+# Evaluation results
+lightrag/evaluation/results/
+
+# Miscellaneous
+.DS_Store
+TODO.md
+ignore_this.txt
+*.ignore.*
+
+# Project-specific files
+/dickens*/
+/book.txt
+/ag2_demo_workdir/
+
+# Frontend build output (built during PyPI release)
+/lightrag/api/webui/
+
+# temporary test files in project root
+/test_*
+
+# AI Agent files
+memory-bank
+.claude/
+
+# Google Jules
+.jules/
+
+# native_parser/docx CLI output (audit JSONL + image dir)
+/parse_output/

+ 28 - 0
.pre-commit-config.yaml

@@ -0,0 +1,28 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+        exclude: ^lightrag/api/webui/
+      - id: end-of-file-fixer
+        exclude: ^lightrag/api/webui/
+      - id: requirements-txt-fixer
+        exclude: ^lightrag/api/webui/
+
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.4
+    hooks:
+      - id: ruff-format
+        exclude: ^lightrag/api/webui/
+      - id: ruff
+        args: [--fix, --ignore=E402]
+        exclude: ^lightrag/api/webui/
+
+
+  - repo: https://github.com/mgedmin/check-manifest
+    rev: "0.49"
+    hooks:
+      - id: check-manifest
+        stages: [manual]
+        exclude: ^lightrag/api/webui/

+ 330 - 0
AGENTS.md

@@ -0,0 +1,330 @@
+# Repository Guidelines
+
+## Project Overview
+
+LightRAG is a Retrieval-Augmented Generation (RAG) framework that uses graph-based knowledge representation for enhanced information retrieval. The system extracts entities and relationships from documents, builds a knowledge graph, and uses multiple retrieval modes (`local`, `global`, `hybrid`, `mix`, `naive`) for queries.
+
+## Project Structure
+
+Top-level directories:
+
+- **lightrag/**: Core Python package — see *Module Layout* below.
+- **lightrag_webui/**: React 19 + TypeScript client (Bun + Vite + Tailwind). UI components in `src/`.
+- **scripts/**: `test.sh` (preferred test runner), `setup/` interactive environment wizard (use `make env-*` rather than calling `setup.sh` directly — see *Configuration > Setup Wizard Outputs*), and release tooling.
+- **tests/**: Pytest coverage, organized into subdirectories that mirror `lightrag/` (see *Testing* below for layout). Working datasets stay in `inputs/`, `rag_storage/`, and `temp/`; deployment collateral lives in `docs/`, `k8s-deploy/`, and compose files.
+
+### Module Layout (`lightrag/`)
+
+- **lightrag.py**: Main orchestrator class (`LightRAG`) — assembled from mixins (see *LightRAG class composition*). Hosts `ainsert_custom_kg`, `_insert_done`, `_process_extract_entities`, `_refresh_addon_params_cache`, and `addon_params` accessors. Critical: always call `await rag.initialize_storages()` after instantiation.
+- **pipeline.py**: `_PipelineMixin` — owns the document ingestion pipeline (`apipeline_enqueue_documents`, `apipeline_process_enqueue_documents`, `apipeline_process_error_documents`), the `parse_native` / `parse_mineru` / `parse_docling` parser dispatchers, multimodal analysis, validation, and the worker scaffolding.
+- **utils_pipeline.py**: Pure helpers shared by the pipeline mixin and other entry points: doc-status field access, document identity (source key, content hash), parsed-artifact path resolution, parser payload normalization, multimodal entity augmentation, and `make_lightrag_doc_content`.
+- **llm_roles.py**: `RoleSpec` / `RoleLLMConfig` / `_RoleLLMState` / `ROLES` registry plus `_RoleLLMMixin` — role normalization, builder registration, wrapper rebuild, runtime config update, queue cleanup, sanitized config export, queue status reporting. Route role-specific behavior here rather than into provider modules.
+- **storage_migrations.py**: `_StorageMigrationMixin` — `check_and_migrate_data`, `_migrate_entity_relation_data`, `_migrate_chunk_tracking_storage`.
+- **addon_params.py**: `ObservableAddonParams` plus `default_addon_params` / `normalize_addon_params` helpers.
+- **operate.py**: Core extraction and query operations including entity/relation extraction, chunking, and multi-mode retrieval logic.
+- **base.py**: Abstract base classes for storage backends (`BaseKVStorage`, `BaseVectorStorage`, `BaseGraphStorage`, `BaseDocStatusStorage`).
+- **kg/**: Storage implementations (JSON, NetworkX, Neo4j, PostgreSQL, MongoDB, Redis, Milvus, Qdrant, Faiss, Memgraph, OpenSearch, NanoVectorDB). The backend registry (`STORAGE_IMPLEMENTATIONS` / `STORAGES`) lives in `kg/__init__.py`; `kg/factory.py::get_storage_class()` resolves backend classes from configuration.
+- **llm/**: LLM and embedding provider bindings (OpenAI, Ollama, Azure, Gemini, Bedrock, Anthropic, etc.). All async with caching support.
+- **parser/**: Unified parsing layer. `parser/routing.py` resolves engine and filename hints for `legacy`, `native`, `mineru`, and `docling` flows; `parser/debug.py` provides an offline LightRAG stub for the `parser/cli.py` debug entry point (`python -m lightrag.parser.cli`). Native format parsers live as sibling sub-packages under `parser/` (currently `parser/docx/`); external HTTP-based adapters live under `parser/external/` (`mineru`, `docling`) with shared helpers in `parser/external/_common.py`, `_manifest.py`, `_zip.py`.
+- **chunker/**: Chunking strategies (token-size, recursive character, semantic vector, paragraph semantic).
+- **api/**: FastAPI service (`lightrag_server.py`) with REST endpoints and Ollama-compatible API; routers under `routers/`, static Swagger assets, packaged WebUI output, and Gunicorn launcher.
+
+## Core Architecture
+
+### LightRAG class composition
+
+`LightRAG` is assembled from focused mixins (split out of the previously monolithic `lightrag.py`):
+
+```
+LightRAG → _RoleLLMMixin → _StorageMigrationMixin → _PipelineMixin → object
+```
+
+The `@final` decorator on `LightRAG` is preserved — the mixin layering is an internal implementation detail, not an external subclassing surface. The public API (`ainsert`, `aquery`, `ainsert_custom_kg`, `initialize_storages`, etc.) is unchanged. `ainsert_custom_kg` and its internal construction logic, `_insert_done`, `_process_extract_entities`, `_refresh_addon_params_cache`, and the `addon_params` property accessors stay on `LightRAG` itself because they cut across multiple flows or depend on prompt-profile state.
+
+### Storage Layer
+
+LightRAG uses 4 storage types with pluggable backends:
+- **KV_STORAGE**: LLM response cache, text chunks, document info
+- **VECTOR_STORAGE**: Entity/relation/chunk embeddings
+- **GRAPH_STORAGE**: Entity-relation graph structure
+- **DOC_STATUS_STORAGE**: Document processing status tracking
+
+Each `LightRAG` instance can pass a `workspace` parameter for data isolation. Implementation differs per storage type:
+- **File-based**: subdirectories under `working_dir`.
+- **Collection-based**: collection name prefixes.
+- **Relational DB**: workspace column filtering.
+- **Qdrant**: payload-based partitioning.
+
+### Pipeline concurrency contract
+
+The document ingestion pipeline coordinates concurrent writers through `pipeline_status` (a per-workspace shared dict in `lightrag.kg.shared_storage`). These fields are mutated under `get_namespace_lock("pipeline_status", workspace=...)`:
+
+- **`busy`**: any pipeline-busy state. Set by both the processing loop AND destructive jobs (clear / per-doc delete). On its own, `busy=True` does NOT block enqueue — see `destructive_busy` for the exclusive subset.
+- **`destructive_busy`**: the busy job is `/documents/clear` or `/documents/{doc_id}` (delete). These DROP storages and remove input files; a concurrent enqueue accepted in this window would write to storage being torn down and silently lose the document. Reservation and the enqueue last-line guard reject when this is True.
+- **`scanning`**: a `/documents/scan` task is running (whole lifecycle: classification + processing). Used by the `/scan` endpoint to refuse overlapping scans. Does NOT on its own block uploads/inserts.
+- **`scanning_exclusive`**: True only during the scan task's classification phase, when `run_scanning_process` is reading `doc_status` to classify files (PROCESSED → archive, FAILED-without-`full_docs` → retry-as-new, etc.) and possibly deleting stale stubs. Reservation and the enqueue last-line guard reject when this is set. Cleared before the scan transitions to its processing phase, allowing concurrent uploads to land while scan-driven processing finishes.
+- **`pending_enqueues`**: count of `/upload`, `/text`, `/texts` endpoints that have reserved a slot (via `_reserve_enqueue_slot`) but whose bg task has not yet completed. Only the scan endpoint reads this — to refuse starting while uploads are mid-flight.
+- **`request_pending`**: a nudge to the running processing loop. Set by either (a) `apipeline_process_enqueue_documents` when called while `busy=True` or (b) `apipeline_enqueue_documents` after writing to `doc_status` while `busy=True`. The loop checks it after each batch and re-queries `doc_status` if set.
+
+Mutual-exclusion rules (all checked atomically inside the lock):
+
+| Operation | Refuses if | Writes |
+|---|---|---|
+| `_reserve_enqueue_slot` | `scanning_exclusive` or `destructive_busy` | `pending_enqueues++` |
+| `apipeline_enqueue_documents` (last-line guard) | (`scanning_exclusive` and not `from_scan`) or `destructive_busy` | — |
+| Scan endpoint reservation | `busy or scanning or pending_enqueues > 0` | `scanning = True` |
+| `apipeline_process_enqueue_documents` entry | (already busy → set `request_pending`, return) | `busy = True` (NOT `destructive_busy`) |
+| `clear_documents` / `delete_document` (synchronous reservation) | `busy or scanning or pending_enqueues > 0` | `busy = True`, `destructive_busy = True` |
+
+The contract permits **concurrent enqueue + processing**: a freshly-uploaded doc lands in `doc_status` while the loop is mid-batch, the loop sees `request_pending` after the current batch, re-queries `doc_status`, and picks up the new PENDING row.
+
+For the rest — write ordering of `full_docs` vs `doc_status`, the workspace-scoped `enqueue_serialize` lock around dedup-and-upsert, and the `from_scan=True` bypass — see the docstrings on `apipeline_enqueue_documents` and `apipeline_process_enqueue_documents` in `lightrag/pipeline.py`.
+
+### Query Modes
+
+- **local**: Context-dependent retrieval focused on specific entities
+- **global**: Community/summary-based broad knowledge retrieval
+- **hybrid**: Combines local and global
+- **naive**: Direct vector search without graph
+- **mix**: Integrates KG and vector retrieval (recommended with reranker)
+
+## Development Commands
+
+### Setup
+```bash
+# Install with uv
+uv sync
+source .venv/bin/activate  # Or: .venv\Scripts\activate on Windows
+
+# Install with API support
+uv sync --extra api
+
+# Install specific extras
+uv sync --extra offline-storage  # Storage backends
+uv sync --extra offline-llm      # LLM providers
+uv sync --extra test             # Testing dependencies
+```
+
+### API Server
+```bash
+# Copy and configure environment
+cp env.example .env  # Edit with your LLM/embedding configs
+
+# Build WebUI
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# Run server
+lightrag-server                                           # Production
+uvicorn lightrag.api.lightrag_server:app --reload        # Development
+lightrag-gunicorn                                         # Multi-worker (gunicorn)
+```
+
+### WebUI
+```bash
+cd lightrag_webui
+bun install --frozen-lockfile      # Install dependencies
+bun run dev                        # Dev server (Node + Vite)
+bun run dev:bun                    # Dev server (Bun native)
+bun run build                      # Production build
+bun run preview                    # Preview production build
+bun run lint                       # ESLint over *.ts/tsx/js/jsx
+
+# Testing — Bun built-in runner (NOT Vitest/Jest)
+bun test                           # All tests
+bun test --watch                   # Watch mode
+bun test --coverage                # With coverage report
+bun test src/api/lightrag.test.ts  # Single test file
+```
+
+### Testing
+
+- Use mock-based tests for external services (Redis, httpx, etc.) — do not depend on live services in unit tests.
+- Add regression tests for every bug fix.
+- Run the full test suite (or relevant subset) and report pass counts before declaring done.
+- Backend tests use pytest; frontend unit tests use Bun's built-in runner — see *WebUI* above.
+
+```bash
+# Preferred for fresh shells and automation; resolves PYTHON, venv, uv, .venv, venv, python, python3
+./scripts/test.sh tests
+
+# Run specific test file
+./scripts/test.sh tests/kg/test_graph_storage.py
+
+# Run with custom workers
+./scripts/test.sh tests --test-workers 4
+```
+
+- `tests/`: main test suite, mirrors feature folders. Place new tests under the subdirectory matching the module under test:
+  - `tests/api/{auth,config,routes}/` for FastAPI server tests (auth/token, config loading, route handlers); top-level `tests/api/` for app-wide concerns (path prefixes, Ollama-compatible endpoint).
+  - `tests/chunker/`, `tests/evaluation/`, `tests/extraction/` for the like-named modules.
+  - `tests/kg/<backend>_impl/` for backend-specific storage tests, mirroring the `lightrag/kg/<backend>_impl.py` file naming. The `_impl` suffix on every subdirectory keeps the layout uniform and avoids `sys.path` shadowing on names that overlap with top-level PyPI/stdlib packages (`faiss`, `json`, `neo4j`, `networkx`, `redis`) when a test is launched directly via `python tests/kg/...`. Current backends: `faiss_impl/`, `json_impl/`, `memgraph_impl/`, `milvus_impl/`, `mongo_impl/`, `nano_impl/`, `neo4j_impl/`, `networkx_impl/`, `opensearch_impl/`, `postgres_impl/`, `qdrant_impl/`, `redis_impl/`. `tests/kg/` root holds cross-backend tests (`test_graph_storage`, `test_batch_graph_operations`, `test_unified_lock_safety`, `test_file_atomic`).
+  - `tests/llm/<provider>_impl/` for provider-specific behavior, same `_impl` convention: `bedrock_impl/`, `gemini_impl/`, `ollama_impl/`, `openai_impl/`, `voyageai_impl/`, `zhipu_impl/`. `tests/llm/` root holds cross-provider concerns (embedding, VLM, cache, role).
+  - `tests/parser/`, `tests/parser/docx/`, `tests/parser/external/{mineru,docling}/` for parser implementations.
+  - `tests/pipeline/` for ingestion pipeline and doc-status behavior (including `test_pipeline_*`, `test_doc_status_*`, `test_multimodal_*`, `test_graph_keyed_locks`).
+  - `tests/sidecar/`, `tests/setup/`, `tests/workspace/` for the like-named cross-cutting concerns.
+  - When adding a new backend or LLM provider, create a new subdirectory plus an empty `__init__.py` rather than dropping the file in the parent directory root.
+- Markers (see `tests/pytest.ini`): `offline`, `integration`, `requires_db`, `requires_api`. Integration tests are skipped by default via `-m "not integration"`.
+- Integration env vars: `LIGHTRAG_RUN_INTEGRATION=true`, `LIGHTRAG_KEEP_ARTIFACTS=true`, `LIGHTRAG_TEST_WORKERS=4`, plus storage-specific connection strings.
+
+### Linting
+```bash
+ruff check .
+```
+
+## Key Implementation Patterns
+
+### LightRAG Initialization (Critical)
+
+The most common error is forgetting to initialize storages (manifests as `AttributeError: __aenter__` or `KeyError: 'history_messages'`):
+
+```python
+import asyncio
+from lightrag import LightRAG
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+
+async def main():
+    rag = LightRAG(
+        working_dir="./rag_storage",
+        llm_model_func=gpt_4o_mini_complete,
+        embedding_func=openai_embed
+    )
+
+    # REQUIRED: Initialize storage backends
+    await rag.initialize_storages()
+
+    # Now safe to use
+    await rag.ainsert("Your text here")
+    result = await rag.aquery("Your question", param=QueryParam(mode="hybrid"))
+
+    # Cleanup
+    await rag.finalize_storages()
+
+asyncio.run(main())
+```
+
+### Custom Embedding Functions
+
+Use `@wrap_embedding_func_with_attrs` decorator and call `.func` when wrapping (already-decorated functions cannot be wrapped again — access the underlying via `.func`):
+
+```python
+from lightrag.utils import wrap_embedding_func_with_attrs
+
+@wrap_embedding_func_with_attrs(embedding_dim=1536, max_token_size=8192)
+async def custom_embed(texts: list[str]) -> np.ndarray:
+    # Call underlying function, not wrapped version
+    return await openai_embed.func(texts, model="text-embedding-3-large")
+
+# Wrong: EmbeddingFunc(func=openai_embed)
+# Right: EmbeddingFunc(func=openai_embed.func)
+```
+
+> **Pitfall — switching embedding models**: when changing the embedding model you MUST clear the data directory (optionally keeping `kv_store_llm_response_cache.json` for LLM cache). Existing vectors will not match the new model's space.
+
+### Storage Configuration
+
+Configure via environment variables or constructor params:
+
+```python
+# Environment-based (recommended for production)
+# See env.example for full list
+
+# Constructor-based
+rag = LightRAG(
+    working_dir="./storage",
+    workspace="project_name",  # For data isolation
+    kv_storage="PGKVStorage",
+    vector_storage="PGVectorStorage",
+    graph_storage="Neo4JStorage",
+    doc_status_storage="PGDocStatusStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.2
+    }
+)
+```
+
+### Document Insertion
+
+```python
+# Single document
+await rag.ainsert("Text content")
+
+# Batch insertion
+await rag.ainsert(["Text 1", "Text 2", ...])
+
+# With custom IDs
+await rag.ainsert("Text", ids=["doc-123"])
+
+# With file paths (for citation)
+await rag.ainsert(["Text 1", "Text 2"], file_paths=["doc1.pdf", "doc2.pdf"])
+
+# Configure batch size
+rag = LightRAG(..., max_parallel_insert=4)  # Default: 2, max recommended: 10
+```
+
+### Query Configuration
+
+```python
+from lightrag import QueryParam
+
+result = await rag.aquery(
+    "Your question",
+    param=QueryParam(
+        mode="mix",                    # Recommended with reranker
+        top_k=60,                      # KG entities/relations to retrieve
+        chunk_top_k=20,                # Text chunks to retrieve
+        max_entity_tokens=6000,
+        max_relation_tokens=8000,
+        max_total_tokens=30000,
+        enable_rerank=True,
+        user_prompt="Additional instructions for LLM",
+        stream=False
+    )
+)
+```
+
+## Frontend Debugging via Playwright
+
+For WebUI bugs whose symptoms only surface in the rendered DOM — layout/overflow/scrollbar issues, transient flashes, third-party libraries attaching helpers to `<body>` outside React's tree, or end-to-end verification of a fix — drive the running dev server (`http://localhost:5173`) with the `document-skills:webapp-testing` skill instead of reasoning from source alone. Seed state directly via `localStorage` (persist key `settings-storage`, schema in `lightrag_webui/src/stores/settings.ts`) to skip live LLM calls. Use `wait_until="domcontentloaded"` plus a selector wait — Vite dev's long-lived polling makes `networkidle` time out.
+
+## Configuration
+
+### .env Configuration
+Primary configuration file for API server. Generate it with `make env-base` or copy `env.example` manually. Key sections:
+- Server settings (HOST, PORT, CORS)
+- Storage backends (connection strings via environment variables)
+- Query parameters (TOP_K, MAX_TOTAL_TOKENS, etc.)
+- Reranking configuration (RERANK_BINDING, RERANK_MODEL)
+- Authentication (AUTH_ACCOUNTS, LIGHTRAG_API_KEY)
+
+See `env.example` for comprehensive template.
+
+### Setup Wizard Outputs
+- Keep `.env` host-usable. Container-only hostnames and staged SSL paths belong in the wizard-managed compose layer, not persisted back into `.env`.
+- Treat `docker-compose.final.yml` as generated output assembled from `scripts/setup/templates/*.yml`.
+- For setup workflow changes, prefer `make env-*` targets over direct `scripts/setup/setup.sh` calls.
+
+## Code Style
+
+### Language
+Comments, backend code, and log messages in English. Frontend uses i18next for multi-language support.
+
+### Python
+- Follow PEP 8 with 4-space indentation
+- Use type annotations
+- Prefer dataclasses for state management
+- Use `lightrag.utils.logger` instead of print
+- Async/await patterns throughout
+
+### TypeScript / React (incl. WebUI ESLint)
+- Functional components with hooks; PascalCase for components
+- 2-space indentation, single quotes (enforced by `@stylistic` rules)
+- Tailwind utility-first styling
+- ESLint stack: TypeScript-ESLint + React Hooks plugin + Prettier; `@typescript-eslint/no-explicit-any` is disabled (allowed)
+
+## Commit and Pull Request Guidance
+
+- If this repo is a fork of `HKUDS/LightRAG`. Target to `HKUDS/LightRAG` when creating PRs, not the fork's own repo.
+- PR descriptions should include: summary, motivation, linked issues if applyed, what's changed, what's broken and how it works.

+ 1 - 0
CLAUDE.md

@@ -0,0 +1 @@
+Strictly follow the rules in ./AGENTS.md

+ 110 - 0
Dockerfile

@@ -0,0 +1,110 @@
+# syntax=docker/dockerfile:1
+
+# Frontend build stage
+# Build frontend assets on the native build platform to avoid
+# cross-architecture emulation issues during multi-platform builds.
+FROM --platform=$BUILDPLATFORM oven/bun:1 AS frontend-builder
+
+WORKDIR /app
+
+# Copy frontend source code
+COPY lightrag_webui/ ./lightrag_webui/
+
+# Build frontend assets for inclusion in the API package
+RUN --mount=type=cache,target=/root/.bun/install/cache \
+    cd lightrag_webui \
+    && bun install --frozen-lockfile \
+    && bun run build
+
+# Python build stage - using uv for faster package installation
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV UV_SYSTEM_PYTHON=1
+ENV UV_COMPILE_BYTECODE=1
+
+WORKDIR /app
+
+# Install system deps (Rust is required by some wheels)
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        curl \
+        build-essential \
+        pkg-config \
+    && rm -rf /var/lib/apt/lists/* \
+    && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
+
+# Ensure shared data directory exists for uv caches
+RUN mkdir -p /root/.local/share/uv
+
+# Copy project metadata and sources
+COPY pyproject.toml .
+COPY setup.py .
+COPY uv.lock .
+
+# Install base, API, and offline extras without the project to improve caching
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-install-project --no-editable
+
+# Copy project sources after dependency layer
+COPY lightrag/ ./lightrag/
+
+# Include pre-built frontend assets from the previous stage
+COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
+
+# Sync project in non-editable mode and ensure pip is available for runtime installs
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+    && /app/.venv/bin/python -m ensurepip --upgrade
+
+# Prepare offline cache directory and pre-populate tiktoken data
+# Use uv run to execute commands from the virtual environment
+RUN mkdir -p /app/data/tiktoken \
+    && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
+    if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
+
+# Final stage
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install uv for package management
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+
+ENV UV_SYSTEM_PYTHON=1
+
+# Copy installed packages and application code
+COPY --from=builder /root/.local /root/.local
+COPY --from=builder /app/.venv /app/.venv
+COPY --from=builder /app/lightrag ./lightrag
+COPY pyproject.toml .
+COPY setup.py .
+COPY uv.lock .
+
+# Ensure the installed scripts are on PATH
+ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH
+
+# Install dependencies with uv sync (uses locked versions from uv.lock)
+# And ensure pip is available for runtime installs
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --extra offline --no-editable \
+    && /app/.venv/bin/python -m ensurepip --upgrade
+
+# Create persistent data directories AFTER package installation
+RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/prompts /app/data/tiktoken
+
+# Copy offline cache into the newly created directory
+COPY --from=builder /app/data/tiktoken /app/data/tiktoken
+
+# Point to the prepared cache
+ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+ENV WORKING_DIR=/app/data/rag_storage
+ENV INPUT_DIR=/app/data/inputs
+ENV PROMPT_DIR=/app/data/prompts
+
+# Expose API port
+EXPOSE 9621
+
+ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]

+ 111 - 0
Dockerfile.lite

@@ -0,0 +1,111 @@
+# syntax=docker/dockerfile:1
+
+# Frontend build stage
+# Build frontend assets on the native build platform to avoid
+# cross-architecture emulation issues during multi-platform builds.
+FROM --platform=$BUILDPLATFORM oven/bun:1 AS frontend-builder
+
+WORKDIR /app
+
+# Copy frontend source code
+COPY lightrag_webui/ ./lightrag_webui/
+
+# Build frontend assets for inclusion in the API package
+RUN --mount=type=cache,target=/root/.bun/install/cache \
+    cd lightrag_webui \
+    && bun install --frozen-lockfile \
+    && bun run build
+
+# Python build stage - using uv for package installation
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV UV_SYSTEM_PYTHON=1
+ENV UV_COMPILE_BYTECODE=1
+
+WORKDIR /app
+
+# Install system dependencies required by some wheels
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        curl \
+        build-essential \
+        pkg-config \
+    && rm -rf /var/lib/apt/lists/* \
+    && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+ENV PATH="/root/.cargo/bin:/root/.local/bin:${PATH}"
+
+# Ensure shared data directory exists for uv caches
+RUN mkdir -p /root/.local/share/uv
+
+# Copy project metadata and sources
+COPY pyproject.toml .
+COPY setup.py .
+COPY uv.lock .
+
+# Install project dependencies (base + API extras) without the project to improve caching
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-install-project --no-editable
+
+# Copy project sources after dependency layer
+COPY lightrag/ ./lightrag/
+
+# Include pre-built frontend assets from the previous stage
+COPY --from=frontend-builder /app/lightrag/api/webui ./lightrag/api/webui
+
+# Sync project in non-editable mode and ensure pip is available for runtime installs
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-editable \
+    && /app/.venv/bin/python -m ensurepip --upgrade
+
+# Prepare tiktoken cache directory and pre-populate tokenizer data
+# Ignore exit code 2 which indicates assets already cached
+RUN mkdir -p /app/data/tiktoken \
+    && uv run lightrag-download-cache --cache-dir /app/data/tiktoken || status=$?; \
+    if [ -n "${status:-}" ] && [ "$status" -ne 0 ] && [ "$status" -ne 2 ]; then exit "$status"; fi
+
+# Final stage
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install uv for package management
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+
+ENV UV_SYSTEM_PYTHON=1
+
+# Copy installed packages and application code
+COPY --from=builder /root/.local /root/.local
+COPY --from=builder /app/.venv /app/.venv
+COPY --from=builder /app/lightrag ./lightrag
+COPY pyproject.toml .
+COPY setup.py .
+COPY uv.lock .
+
+# Ensure the installed scripts are on PATH
+ENV PATH=/app/.venv/bin:/root/.local/bin:$PATH
+
+# Sync dependencies inside the final image using uv
+# And ensure pip is available for runtime installs
+RUN --mount=type=cache,target=/root/.local/share/uv \
+    uv sync --frozen --no-dev --extra api --no-editable \
+    && /app/.venv/bin/python -m ensurepip --upgrade
+
+# Create persistent data directories
+RUN mkdir -p /app/data/rag_storage /app/data/inputs /app/data/prompts /app/data/tiktoken
+
+# Copy cached tokenizer assets prepared in the builder stage
+COPY --from=builder /app/data/tiktoken /app/data/tiktoken
+
+# Docker data directories
+ENV TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+ENV WORKING_DIR=/app/data/rag_storage
+ENV INPUT_DIR=/app/data/inputs
+ENV PROMPT_DIR=/app/data/prompts
+
+# Expose API port
+EXPOSE 9621
+
+# Set entrypoint
+ENTRYPOINT ["python", "-m", "lightrag.api.lightrag_server"]

+ 43 - 0
Dockerfile.postgres

@@ -0,0 +1,43 @@
+# Build stage: compile Apache AGE against PostgreSQL 18 on top of pgvector
+FROM pgvector/pgvector:pg18-trixie AS build
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+       ca-certificates \
+       git \
+       bison \
+       build-essential \
+       flex \
+       postgresql-server-dev-18
+
+RUN git clone --depth 1 --branch release/PG18/1.7.0 https://github.com/apache/age.git /usr/src/age \
+    && cd /usr/src/age \
+    && make \
+    && make install
+
+# Final stage: Create a final image by copying the files created in the build stage
+FROM pgvector/pgvector:pg18-trixie
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+    locales
+
+RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen \
+    && locale-gen \
+    && update-locale LANG=en_US.UTF-8
+
+ENV LANG=en_US.UTF-8
+ENV LC_COLLATE=en_US.UTF-8
+ENV LC_CTYPE=en_US.UTF-8
+
+COPY --from=build /usr/lib/postgresql/18/lib/age.so /usr/lib/postgresql/18/lib/
+COPY --from=build /usr/share/postgresql/18/extension/age--1.7.0.sql /usr/share/postgresql/18/extension/
+COPY --from=build /usr/share/postgresql/18/extension/age.control /usr/share/postgresql/18/extension/
+
+RUN printf '%s\n' \
+    'CREATE EXTENSION IF NOT EXISTS vector;' \
+    'CREATE EXTENSION IF NOT EXISTS age CASCADE;' \
+    > /docker-entrypoint-initdb.d/00-create-extensions.sql
+
+# Note: AGE extension require to be loaded  shared_preload_libraries
+CMD ["postgres", "-c", "shared_preload_libraries=age"]

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 LightRAG Team
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 5 - 0
MANIFEST.in

@@ -0,0 +1,5 @@
+include requirements.txt
+include lightrag/api/requirements.txt
+recursive-include lightrag/api/webui *
+recursive-include lightrag/api/static *
+recursive-include prompts/samples *

+ 99 - 0
Makefile

@@ -0,0 +1,99 @@
+SHELL := /bin/bash
+SETUP_SCRIPT := scripts/setup/setup.sh
+SETUP_BASH ?= $(or $(firstword $(wildcard /opt/homebrew/bin/bash /usr/local/bin/bash /opt/local/bin/bash)),$(shell command -v bash 2>/dev/null),bash)
+SETUP_OPTS ?=
+COLOR_RESET := \033[0m
+COLOR_BOLD := \033[1m
+COLOR_BLUE := \033[34m
+COLOR_GREEN := \033[32m
+COLOR_YELLOW := \033[33m
+
+ifeq ($(NO_COLOR),1)
+COLOR_RESET :=
+COLOR_BOLD :=
+COLOR_BLUE :=
+COLOR_GREEN :=
+COLOR_YELLOW :=
+endif
+
+.PHONY: help dev configure env-base env-storage env-server env-validate env-backup env-security-check env-base-rewrite env-storage-rewrite env base storage server validate backup security security-check base-rewrite storage-rewrite
+
+help:
+	@printf "$(COLOR_BOLD)Interactive setup targets$(COLOR_RESET)\n"
+	@printf "  $(COLOR_GREEN)make dev$(COLOR_RESET)                    Bootstrap local dev+test+offline env with uv + bun\n"
+	@printf "  $(COLOR_GREEN)make env-base$(COLOR_RESET)               Configure LLM, embedding, and reranker (run first)\n"
+	@printf "  $(COLOR_GREEN)make env-storage$(COLOR_RESET)            Configure storage backends and databases\n"
+	@printf "  $(COLOR_GREEN)make env-server$(COLOR_RESET)             Configure server, security, and SSL\n"
+	@printf "  $(COLOR_GREEN)make env-validate$(COLOR_RESET)           Validate existing .env\n"
+	@printf "  $(COLOR_GREEN)make env-security-check$(COLOR_RESET)     Audit existing .env for security risks\n"
+	@printf "  $(COLOR_GREEN)make env-backup$(COLOR_RESET)             Backup current .env\n"
+	@printf "  $(COLOR_GREEN)make env-base-rewrite$(COLOR_RESET)       Force-regenerate wizard-managed compose services during base setup\n"
+	@printf "  $(COLOR_GREEN)make env-storage-rewrite$(COLOR_RESET)    Force-regenerate wizard-managed compose services during storage setup\n"
+	@printf "  $(COLOR_GREEN)make base$(COLOR_RESET)                   Short form of make env-base (all env prefix can be stripped)\n"
+	@printf "\n"
+	@printf "$(COLOR_BOLD)Typical workflow$(COLOR_RESET)\n"
+	@printf "  1. make dev            # install backend/test deps and build frontend\n"
+	@printf "  2. make env-base       # set LLM/embedding/reranker\n"
+	@printf "  3. make env-storage    # set storage backends (optional)\n"
+	@printf "  4. make env-server     # set port/security/SSL (optional)\n\n"
+	@printf "$(COLOR_BOLD)Examples$(COLOR_RESET)\n"
+	@printf "  make dev\n"
+	@printf "  make env-base\n"
+	@printf "  make env-storage SETUP_OPTS=--debug\n"
+	@printf "  make env-server\n\n"
+	@printf "  make env-storage-rewrite\n\n"
+	@printf "  make env-security-check\n\n"
+	@printf "$(COLOR_BOLD)Compose Output$(COLOR_RESET)\n"
+	@printf "  Bundled service images are defined in scripts/setup/templates/*.yml.\n"
+	@printf "  Compose file output: docker-compose.final.yml\n"
+
+dev:
+	@if ! command -v uv >/dev/null 2>&1; then \
+		printf "$(COLOR_YELLOW)uv is required for make dev.$(COLOR_RESET)\n"; \
+		printf "Install uv first: https://docs.astral.sh/uv/getting-started/installation/\n"; \
+		printf "Unix/macOS: curl -LsSf https://astral.sh/uv/install.sh | sh\n"; \
+		printf "Windows: powershell -c \"irm https://astral.sh/uv/install.ps1 | iex\"\n"; \
+		exit 1; \
+	fi
+	@if ! command -v bun >/dev/null 2>&1; then \
+		printf "$(COLOR_YELLOW)bun is required for make dev.$(COLOR_RESET)\n"; \
+		printf "Install Bun first: https://bun.sh/docs/installation\n"; \
+		printf "macOS/Linux: curl -fsSL https://bun.sh/install | bash\n"; \
+		printf "Windows: powershell -c \"irm bun.sh/install.ps1 | iex\"\n"; \
+		exit 1; \
+	fi
+	@printf "$(COLOR_BLUE)Syncing backend and test dependencies with uv...$(COLOR_RESET)\n"
+	@uv sync --extra test --extra offline
+	@printf "$(COLOR_BLUE)Installing frontend dependencies with Bun...$(COLOR_RESET)\n"
+	@cd lightrag_webui && bun install --frozen-lockfile
+	@printf "$(COLOR_BLUE)Building frontend assets...$(COLOR_RESET)\n"
+	@cd lightrag_webui && bun run build
+	@printf "$(COLOR_GREEN)Development environment is ready.$(COLOR_RESET)\n"
+	@printf "Next steps:\n"
+	@printf "  source .venv/bin/activate\n"
+	@printf "  make env-base\n"
+	@printf "  lightrag-server\n"
+
+env-base env base configure:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --base $(SETUP_OPTS)
+
+env-storage storage:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --storage $(SETUP_OPTS)
+
+env-base-rewrite base-rewrite:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --base --rewrite-compose $(SETUP_OPTS)
+
+env-storage-rewrite storage-rewrite:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --storage --rewrite-compose $(SETUP_OPTS)
+
+env-server server:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --server $(SETUP_OPTS)
+
+env-validate validate:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --validate $(SETUP_OPTS)
+
+env-security-check security security-check:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --security-check $(SETUP_OPTS)
+
+env-backup backup:
+	@$(SETUP_BASH) $(SETUP_SCRIPT) --backup $(SETUP_OPTS)

+ 436 - 0
README-zh.md

@@ -0,0 +1,436 @@
+<div align="center">
+
+<div style="margin: 20px 0;">
+  <img src="./assets/logo.png" width="120" height="120" alt="LightRAG Logo" style="border-radius: 20px; box-shadow: 0 8px 32px rgba(0, 217, 255, 0.3);">
+</div>
+
+# 🚀 LightRAG: 简单且快速的检索增强生成(RAG)框架
+
+<div align="center">
+    <a href="https://trendshift.io/repositories/13043" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13043" alt="HKUDS%2FLightRAG | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+
+<div align="center">
+  <div style="width: 100%; height: 2px; margin: 20px 0; background: linear-gradient(90deg, transparent, #00d9ff, transparent);"></div>
+</div>
+
+<div align="center">
+  <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 25px; text-align: center;">
+    <p>
+      <a href='https://github.com/HKUDS/LightRAG'><img src='https://img.shields.io/badge/🔥项目-主页-00d9ff?style=for-the-badge&logo=github&logoColor=white&labelColor=1a1a2e'></a>
+      <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/📄arXiv-2410.05779-ff6b6b?style=for-the-badge&logo=arxiv&logoColor=white&labelColor=1a1a2e'></a>
+      <a href="https://github.com/HKUDS/LightRAG/stargazers"><img src='https://img.shields.io/github/stars/HKUDS/LightRAG?color=00d9ff&style=for-the-badge&logo=star&logoColor=white&labelColor=1a1a2e' /></a>
+    </p>
+    <p>
+      <img src="https://img.shields.io/badge/🐍Python-3.10-4ecdc4?style=for-the-badge&logo=python&logoColor=white&labelColor=1a1a2e">
+      <a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg?style=for-the-badge&logo=pypi&logoColor=white&labelColor=1a1a2e&color=ff6b6b"></a>
+    </p>
+    <p>
+      <a href="https://discord.gg/yF2MmDJyGJ"><img src="https://img.shields.io/badge/💬Discord-社区-7289da?style=for-the-badge&logo=discord&logoColor=white&labelColor=1a1a2e"></a>
+      <a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬微信群-交流-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
+    </p>
+    <p>
+      <a href="README-zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
+      <a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
+    </p>
+    <p>
+      <a href="https://pepy.tech/projects/lightrag-hku"><img src="https://static.pepy.tech/personalized-badge/lightrag-hku?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads"></a>
+    </p>
+  </div>
+</div>
+
+</div>
+
+<div align="center" style="margin: 30px 0;">
+  <img src="https://user-images.githubusercontent.com/74038190/212284100-561aa473-3905-4a80-b561-0d28506553ee.gif" width="800">
+</div>
+
+<div align="center" style="margin: 30px 0;">
+    <img src="./README.assets/b2aaf634151b4706892693ffb43d9093.png" width="800" alt="LightRAG Diagram">
+</div>
+
+---
+
+<div align="center">
+  <table>
+    <tr>
+      <td style="vertical-align: middle;">
+        <img src="./assets/LiteWrite.png"
+             width="56"
+             height="56"
+             alt="LiteWrite"
+             style="border-radius: 12px;" />
+      </td>
+      <td style="vertical-align: middle; padding-left: 12px;">
+        <a href="https://litewrite.ai">
+          <img src="https://img.shields.io/badge/🚀%20LiteWrite-AI%20原生%20LaTeX%20编辑器-ff6b6b?style=for-the-badge&logoColor=white&labelColor=1a1a2e">
+        </a>
+      </td>
+    </tr>
+  </table>
+</div>
+
+---
+
+## 🎉 新闻
+- [2026.05]🎯[新功能]:**将 RagAnything 合并至 LightRAG**🎉。支持通过 **MinerU / Docling** 服务进行多模态内容解析与提取。
+- [2026.05]🎯[新功能]:引入四种可选的文本分块策略:`Fix`(固定)、`Recursive`(递归)、`Vector`(向量)和 `Paragraph`(段落语义)。
+- [2026.05]🎯[新功能]:**支持按角色配置 LLM**,提供四个独立角色:EXTRACT、QUERY、KEYWORDS 和 VLM,每个角色拥有独立的 LLM 设置。
+- [2026.03]🎯[新功能]: 集成了 **OpenSearch** 作为统一存储后端,为 LightRAG 的全部四种存储类型提供全面支持。
+- [2026.03]🎯[新功能]: 推出交互式安装向导,支持通过 Docker 在本地部署 Embedding、Reranking 及存储后端服务。
+- [2025.11]🎯[新功能]: 集成了 **RAGAS 评估**和 **Langfuse 追踪**。更新了 API 以在查询结果中返回召回上下文,支持上下文精度指标。
+- [2025.10]🎯[可扩展性增强]: 消除了处理瓶颈,以高效支持**大规模数据集**。
+- [2025.09]🎯[新功能]: 显著提升了 Qwen3-30B-A3B 等**开源 LLM** 的知识图谱提取准确性。
+- [2025.08]🎯[新功能]: 现已支持 **Reranker**,显著提升混合查询性能(已设为默认查询模式)。
+- [2025.08]🎯[新功能]: 添加了**文档删除**功能,并支持自动重新生成知识图谱,以确保最佳查询性能。
+- [2025.06]🎯[新发布]: 我们的团队发布了 [RAG-Anything](https://github.com/HKUDS/RAG-Anything) —— 一个用于无缝处理文本、图像、表格和方程式的**全功能多模态 RAG** 系统。
+- [2025.06]🎯[新功能]: LightRAG 现已集成 [RAG-Anything](https://github.com/HKUDS/RAG-Anything),支持全面的多模态数据处理,实现对 PDF、图像、Office 文档、表格和公式等多种格式的无缝文档解析和 RAG 能力。详见[多模态文档处理部分](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration)。
+- [2025.03]🎯[新功能]: LightRAG 现已支持引用功能,实现了准确的源归因和增强的文档可追溯性。
+- [2025.02]🎯[新功能]: 现在您可以使用 MongoDB 作为一体化存储解决方案,实现统一的数据管理。
+- [2025.02]🎯[新发布]: 我们的团队发布了 [VideoRAG](https://github.com/HKUDS/VideoRAG) —— 一个用于理解超长上下文视频的 RAG 系统。
+- [2025.01]🎯[新发布]: 我们的团队发布了 [MiniRAG](https://github.com/HKUDS/MiniRAG),使用小型模型简化 RAG。
+- [2025.01]🎯现在您可以使用 PostgreSQL 作为一体化存储解决方案进行数据管理。
+- [2024.11]🎯[新资源]: LightRAG 的综合指南现已在 [LearnOpenCV](https://learnopencv.com/lightrag) 上发布 —— 探索深入的教程和最佳实践。非常感谢博客作者的杰出贡献!
+- [2024.11]🎯[新功能]: 推出 LightRAG WebUI —— 一个允许您通过直观的 Web 界面插入、查询和可视化 LightRAG 知识的仪表板。
+- [2024.11]🎯[新功能]: 现在您可以[使用 Neo4J 进行存储](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) —— 开启图数据库支持。
+- [2024.10]🎯[新功能]: 我们添加了 [LightRAG 介绍视频](https://youtu.be/oageL-1I0GE) 的链接 —— 演示 LightRAG 的各项功能。感谢作者的杰出贡献!
+- [2024.10]🎯[新频道]: 我们创建了一个 [Discord 频道](https://discord.gg/yF2MmDJyGJ)!💬 欢迎加入我们的社区进行分享、讨论和协作! 🎉🎉
+
+<details>
+  <summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
+    算法流程图
+  </summary>
+
+![LightRAG索引流程图](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg)
+*图1:LightRAG索引流程图 - 图片来源:[Source](https://learnopencv.com/lightrag/)*
+![LightRAG检索和查询流程图](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg)
+*图2:LightRAG检索和查询流程图 - 图片来源:[Source](https://learnopencv.com/lightrag/)*
+
+</details>
+
+## 安装
+
+**💡 使用 uv 进行包管理**: 本项目使用 [uv](https://docs.astral.sh/uv/) 进行快速可靠的 Python 包管理。首先安装 uv: `curl -LsSf https://astral.sh/uv/install.sh | sh` (Unix/macOS) 或 `powershell -c "irm https://astral.sh/uv/install.ps1 | iex"` (Windows)
+
+> **注意**:如果您愿意,也可以使用 pip,但为了获得更好的性能 and 更可靠的依赖管理,建议使用 uv。
+>
+> **📦 离线部署**: 对于离线或隔离环境,请参阅[离线部署指南](./docs/OfflineDeployment.md),了解预安装所有依赖项和缓存文件的说明。
+
+### 安装LightRAG服务器
+
+LightRAG服务器旨在提供Web UI和API支持。Web UI便于文档索引、知识图谱探索和简单的RAG查询界面。LightRAG服务器还提供兼容Ollama的接口,旨在将LightRAG模拟为Ollama聊天模型。这使得AI聊天机器人(如Open WebUI)可以轻松访问LightRAG。
+
+* 从PyPI安装
+
+```bash
+### 使用 uv 安装 LightRAG 服务器(作为工具,推荐)
+uv tool install "lightrag-hku[api]"
+
+### 或使用 pip
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install "lightrag-hku[api]"
+
+### 构建前端代码
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# 配置 env 文件
+# 从 GitHub 仓库的根目录上下载 env.example 文件
+# 或从本地检出的源代码中获取 env.example 文件
+cp env.example .env  # 使用你的LLM和Embedding模型访问参数更新.env文件
+# 启动API-WebUI服务
+lightrag-server
+```
+
+* 从源代码安装
+
+```bash
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+
+# 一键初始化开发环境(推荐)
+make dev
+source .venv/bin/activate  # 激活虚拟环境 (Linux/macOS)
+# Windows 系统: .venv\Scripts\activate
+
+# make dev 会安装测试工具链以及完整的离线依赖栈
+# (API、存储后端与各类 Provider 集成),并构建前端;不会生成 .env。
+# 启动服务前请先运行 make env-base,或手动从 env.example 复制并配置 .env。
+
+# 使用 uv 的等价手动步骤
+# 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境
+uv sync --extra test --extra offline
+source .venv/bin/activate  # 激活虚拟环境 (Linux/macOS)
+# Windows 系统: .venv\Scripts\activate
+
+### 或使用 pip 和虚拟环境
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install -e ".[test,offline]"
+
+# 构建前端代码
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# 配置 env 文件
+make env-base  # 或: cp env.example .env 后手动修改
+# 启动API-WebUI服务
+lightrag-server
+```
+
+* 使用 Docker Compose 启动 LightRAG 服务器
+
+```bash
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+cp env.example .env  # 使用你的LLM和Embedding模型访问参数更新.env文件
+# modify LLM and Embedding settings in .env
+docker compose up
+```
+
+> 在此获取LightRAG docker镜像历史版本: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag)
+>
+> 由 GitHub Actions 发布到 GHCR 的官方镜像已使用 GitHub OIDC 和 Sigstore Cosign 进行签名。校验方式请参阅 [docs/DockerDeployment.md](./docs/DockerDeployment.md#verify-official-ghcr-images-with-cosign)。
+
+### 使用 Setup 工具创建 .env 文件
+
+除了手动编辑 `env.example` 之外,您还可以使用交互式向导生成配置好的 `.env`,并在需要时生成 `docker-compose.final.yml`:
+
+```bash
+make env-base           # 必跑第一步:配置 LLM、Embedding、Reranker
+make env-storage        # 可选:配置存储后端和数据库服务
+make env-server         # 可选:配置服务端口、鉴权和 SSL
+make env-base-rewrite   # 可选:强制重建向导托管的 compose 服务块
+make env-storage-rewrite # 可选:强制重建向导托管的 compose 服务块
+make env-security-check # 可选:审计当前 .env 中的安全风险
+```
+
+每个目标的详细说明请参阅 [docs/InteractiveSetup.md](./docs/InteractiveSetup.md)。
+这些 setup 向导只负责更新配置;如需在部署前审计当前 `.env` 的安全风险,请额外运行
+`make env-security-check`。
+默认情况下,重新运行 setup 会保留未变化的向导托管 compose 服务块;只有在需要按模板强制重建这些托管块时,才使用
+`*-rewrite` 目标。
+
+### 安装LightRAG Core
+
+* 从源代码安装(推荐)
+
+```bash
+cd LightRAG
+# 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境
+uv sync
+source .venv/bin/activate  # 激活虚拟环境 (Linux/macOS)
+# Windows 系统: .venv\Scripts\activate
+
+# 或: pip install -e .
+```
+
+* 从PyPI安装
+
+```bash
+uv pip install lightrag-hku
+# 或: pip install lightrag-hku
+```
+
+## 快速开始
+
+### LightRAG的LLM及配套技术栈要求
+
+LightRAG对大型语言模型(LLM)的能力要求远高于传统RAG,因为它需要LLM执行文档中的实体关系抽取任务。配置合适的Embedding和Reranker模型对提高查询表现也至关重要。
+
+- **LLM选型**:
+  - 推荐选用参数量至少为32B的LLM。
+  - 上下文长度至少为32KB,推荐达到64KB。
+  - 在文档索引阶段不建议选择推理模型。
+  - 在查询阶段建议选择比索引阶段能力更强的模型,以达到更高的查询效果。
+- **Embedding模型**:
+  - 高性能的Embedding模型对RAG至关重要。
+  - 推荐使用主流的多语言Embedding模型,例如:BAAI/bge-m3 和 text-embedding-3-large。
+  - **重要提示**:在文档索引前必须确定使用的Embedding模型,且在文档查询阶段必须沿用与索引阶段相同的模型。有些存储(例如PostgreSQL)在首次建立数表的时候需要确定向量维度,因此更换Embedding模型后需要删除向量相关库表,以便让LightRAG重建新的库表。
+- **Reranker模型配置**:
+  - 配置Reranker模型能够显著提升LightRAG的检索效果。
+  - 启用Reranker模型后,推荐将“mix模式”设为默认查询模式。
+  - 推荐选用主流的Reranker模型,例如:BAAI/bge-reranker-v2-m3 或 Jina 等服务商提供的模型。
+
+### 使用LightRAG服务器
+
+LightRAG 服务器旨在提供 Web UI 和 API 支持,同时提供了全面的知识图谱可视化功能,支持各种重力布局、节点查询、子图过滤等。有关LightRAG服务器的更多信息,请参阅[LightRAG服务器](./docs/LightRAG-API-Server-zh.md)。
+
+![iShot_2025-03-23_12.40.08](./README.assets/iShot_2025-03-23_12.40.08.png)
+
+
+### 使用LightRAG Core
+
+LightRAG核心功能的示例代码请参见`examples`目录。您还可参照[视频](https://www.youtube.com/watch?v=g21royNJ4fw)视频完成环境配置。若已持有OpenAI API密钥,可以通过以下命令运行演示代码:
+
+```bash
+### you should run the demo code with project folder
+cd LightRAG
+### provide your API-KEY for OpenAI
+export OPENAI_API_KEY="sk-...your_opeai_key..."
+### download the demo document of "A Christmas Carol" by Charles Dickens
+curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
+### run the demo code
+python examples/lightrag_openai_demo.py
+```
+
+如需流式响应示例的实现代码,请参阅 `examples/lightrag_openai_compatible_demo.py`。运行前,请确保根据需求修改示例代码中的LLM及嵌入模型配置。
+
+**注意1**:在运行demo程序的时候需要注意,不同的测试程序可能使用的是不同的embedding模型,更换不同的embeding模型的时候需要把清空数据目录(`./dickens`),否则层序执行会出错。如果你想保留LLM缓存,可以在清除数据目录时保留`kv_store_llm_response_cache.json`文件。
+
+**注意2**:官方支持的示例代码仅为 `lightrag_openai_demo.py` 和 `lightrag_openai_compatible_demo.py` 两个文件。其他示例文件均为社区贡献内容,尚未经过完整测试与优化。
+
+## 使用LightRAG Core进行编程
+
+完整的 Core API 参考 —— 包括初始化参数、`QueryParam`、各 LLM/Embedding 接入示例(OpenAI、Ollama、Azure、Gemini、HuggingFace、LlamaIndex)、Rerank 注入、插入操作、实体/关系管理、删除与合并 —— 详见 **[docs/ProgramingWithCore.md](./docs/ProgramingWithCore.md)**(英文)。
+
+> ⚠️ **如果您希望将LightRAG集成到您的项目中,建议您使用LightRAG Server提供的REST API**。LightRAG Core通常用于嵌入式应用,或供希望进行研究与评估的学者使用。
+
+### 高级功能
+
+LightRAG 提供 Token 用量追踪、知识图谱数据导出、LLM 缓存管理、Langfuse 可观测性集成和基于 RAGAS 的评估框架。详见 **[docs/AdvancedFeatures.md](./docs/AdvancedFeatures.md)**(英文)。
+
+### 多模态文档处理
+
+LightRAG Server 已内置多模态文档流水线,支持 PDF、Office 文档、图像、表格和公式。解析通过外置 MinerU 或 Docling 服务完成,多模态索引在 LightRAG 流水线内执行。详见 **[docs/AdvancedFeatures.md](./docs/AdvancedFeatures.md)**(英文)。
+
+## 重现论文结果
+
+LightRAG 在农业、计算机科学、法律和混合等领域均显著优于 NaiveRAG、RQ-RAG、HyDE 和 GraphRAG。完整评估方法论、提示词和复现步骤详见 **[docs/Reproduce.md](./docs/Reproduce.md)**(英文)。
+
+### 总体性能表
+
+||**农业**||**计算机科学**||**法律**||**混合**||
+|----------------------|---------------|------------|------|------------|---------|------------|-------|------------|
+||NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|
+|**全面性**|32.4%|**67.6%**|38.4%|**61.6%**|16.4%|**83.6%**|38.8%|**61.2%**|
+|**多样性**|23.6%|**76.4%**|38.0%|**62.0%**|13.6%|**86.4%**|32.4%|**67.6%**|
+|**赋能性**|32.4%|**67.6%**|38.8%|**61.2%**|16.4%|**83.6%**|42.8%|**57.2%**|
+|**总体**|32.4%|**67.6%**|38.8%|**61.2%**|15.2%|**84.8%**|40.0%|**60.0%**|
+||RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|
+|**全面性**|31.6%|**68.4%**|38.8%|**61.2%**|15.2%|**84.8%**|39.2%|**60.8%**|
+|**多样性**|29.2%|**70.8%**|39.2%|**60.8%**|11.6%|**88.4%**|30.8%|**69.2%**|
+|**赋能性**|31.6%|**68.4%**|36.4%|**63.6%**|15.2%|**84.8%**|42.4%|**57.6%**|
+|**总体**|32.4%|**67.6%**|38.0%|**62.0%**|14.4%|**85.6%**|40.0%|**60.0%**|
+||HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|
+|**全面性**|26.0%|**74.0%**|41.6%|**58.4%**|26.8%|**73.2%**|40.4%|**59.6%**|
+|**多样性**|24.0%|**76.0%**|38.8%|**61.2%**|20.0%|**80.0%**|32.4%|**67.6%**|
+|**赋能性**|25.2%|**74.8%**|40.8%|**59.2%**|26.0%|**74.0%**|46.0%|**54.0%**|
+|**总体**|24.8%|**75.2%**|41.6%|**58.4%**|26.4%|**73.6%**|42.4%|**57.6%**|
+||GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|
+|**全面性**|45.6%|**54.4%**|48.4%|**51.6%**|48.4%|**51.6%**|**50.4%**|49.6%|
+|**多样性**|22.8%|**77.2%**|40.8%|**59.2%**|26.4%|**73.6%**|36.0%|**64.0%**|
+|**赋能性**|41.2%|**58.8%**|45.2%|**54.8%**|43.6%|**56.4%**|**50.8%**|49.2%|
+|**总体**|45.2%|**54.8%**|48.0%|**52.0%**|47.2%|**52.8%**|**50.4%**|49.6%|
+
+
+## 🔗 相关项目
+
+*生态与扩展*
+
+<div align="center">
+  <table>
+    <tr>
+      <td align="center">
+        <a href="https://github.com/HKUDS/RAG-Anything">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">📸</span>
+          </div>
+          <b>RAG-Anything</b><br>
+          <sub>多模态 RAG</sub>
+        </a>
+      </td>
+      <td align="center">
+        <a href="https://github.com/HKUDS/VideoRAG">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">🎥</span>
+          </div>
+          <b>VideoRAG</b><br>
+          <sub>极端长上下文视频 RAG</sub>
+        </a>
+      </td>
+      <td align="center">
+        <a href="https://github.com/HKUDS/MiniRAG">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">✨</span>
+          </div>
+          <b>MiniRAG</b><br>
+          <sub>极简 RAG</sub>
+        </a>
+      </td>
+    </tr>
+  </table>
+</div>
+
+---
+
+## ⭐ Star 历史
+
+[![Star History Chart](https://api.star-history.com/svg?repos=HKUDS/LightRAG&type=Date)](https://star-history.com/#HKUDS/LightRAG&Date)
+
+## 🤝 贡献
+
+<div align="center">
+  我们欢迎各种形式的贡献——Bug 修复、新功能、文档改进等。<br>
+  提交 Pull Request 前,请阅读 <a href=".github/CONTRIBUTING.md"><strong>贡献指南</strong></a>。
+</div>
+
+<br>
+
+<div align="center">
+  我们感谢所有贡献者做出的宝贵贡献。
+</div>
+
+<div align="center">
+  <a href="https://github.com/HKUDS/LightRAG/graphs/contributors">
+    <img src="https://contrib.rocks/image?repo=HKUDS/LightRAG" style="border-radius: 15px; box-shadow: 0 0 20px rgba(0, 217, 255, 0.3);" />
+  </a>
+</div>
+
+
+## 📖 引用
+
+```python
+@article{guo2024lightrag,
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
+year={2024},
+eprint={2410.05779},
+archivePrefix={arXiv},
+primaryClass={cs.IR}
+}
+```
+
+---
+
+<div align="center" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 30px; margin: 30px 0;">
+  <div>
+    <img src="https://user-images.githubusercontent.com/74038190/212284100-561aa473-3905-4a80-b561-0d28506553ee.gif" width="500">
+  </div>
+  <div style="margin-top: 20px;">
+    <a href="https://github.com/HKUDS/LightRAG" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/⭐%20在%20GitHub%20上点亮星星-1a1a2e?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+    <a href="https://github.com/HKUDS/LightRAG/issues" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/🐛%20报告问题-ff6b6b?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+    <a href="https://github.com/HKUDS/LightRAG/discussions" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/💬%20讨论-4ecdc4?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+  </div>
+</div>
+
+<div align="center">
+  <div style="width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);">
+    <div style="display: flex; justify-content: center; align-items: center; gap: 15px;">
+      <span style="font-size: 24px;">⭐</span>
+      <span style="color: #00d9ff; font-size: 18px;">感谢您访问 LightRAG!</span>
+      <span style="font-size: 24px;">⭐</span>
+    </div>
+  </div>
+</div>

BIN
README.assets/b2aaf634151b4706892693ffb43d9093.png


BIN
README.assets/iShot_2025-03-23_12.40.08.png


+ 402 - 57
README.md

@@ -1,92 +1,437 @@
-# LightRAG-cn
+<div align="center">
+
+<div style="margin: 20px 0;">
+  <img src="./assets/logo.png" width="120" height="120" alt="LightRAG Logo" style="border-radius: 20px; box-shadow: 0 8px 32px rgba(0, 217, 255, 0.3);">
+</div>
+
+# 🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation
+
+<div align="center">
+    <a href="https://trendshift.io/repositories/13043" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13043" alt="HKUDS%2FLightRAG | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+<p>
+</p>
+<div align="center">
+  <div style="width: 100%; height: 2px; margin: 20px 0; background: linear-gradient(90deg, transparent, #00d9ff, transparent);"></div>
+</div>
+
+<div align="center">
+  <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 25px; text-align: center;">
+    <p>
+      <a href='https://github.com/HKUDS/LightRAG'><img src='https://img.shields.io/badge/🔥Project-Page-00d9ff?style=for-the-badge&logo=github&logoColor=white&labelColor=1a1a2e'></a>
+      <a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/📄arXiv-2410.05779-ff6b6b?style=for-the-badge&logo=arxiv&logoColor=white&labelColor=1a1a2e'></a>
+      <a href="https://github.com/HKUDS/LightRAG/stargazers"><img src='https://img.shields.io/github/stars/HKUDS/LightRAG?color=00d9ff&style=for-the-badge&logo=star&logoColor=white&labelColor=1a1a2e' /></a>
+    </p>
+    <p>
+      <img src="https://img.shields.io/badge/🐍Python-3.10-4ecdc4?style=for-the-badge&logo=python&logoColor=white&labelColor=1a1a2e">
+      <a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg?style=for-the-badge&logo=pypi&logoColor=white&labelColor=1a1a2e&color=ff6b6b"></a>
+    </p>
+    <p>
+      <a href="https://discord.gg/yF2MmDJyGJ"><img src="https://img.shields.io/badge/💬Discord-Community-7289da?style=for-the-badge&logo=discord&logoColor=white&labelColor=1a1a2e"></a>
+      <a href="https://github.com/HKUDS/LightRAG/issues/285"><img src="https://img.shields.io/badge/💬WeChat-Group-07c160?style=for-the-badge&logo=wechat&logoColor=white&labelColor=1a1a2e"></a>
+    </p>
+    <p>
+      <a href="README-zh.md"><img src="https://img.shields.io/badge/🇨🇳中文版-1a1a2e?style=for-the-badge"></a>
+      <a href="README.md"><img src="https://img.shields.io/badge/🇺🇸English-1a1a2e?style=for-the-badge"></a>
+    </p>
+    <p>
+      <a href="https://pepy.tech/projects/lightrag-hku"><img src="https://static.pepy.tech/personalized-badge/lightrag-hku?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=downloads"></a>
+    </p>
+  </div>
+</div>
+
+</div>
+
+<div align="center" style="margin: 30px 0;">
+  <img src="https://user-images.githubusercontent.com/74038190/212284100-561aa473-3905-4a80-b561-0d28506553ee.gif" width="800">
+</div>
+
+<div align="center" style="margin: 30px 0;">
+    <img src="./README.assets/b2aaf634151b4706892693ffb43d9093.png" width="800" alt="LightRAG Diagram">
+</div>
+
+---
+
+<div align="center">
+  <table>
+    <tr>
+      <td style="vertical-align: middle;">
+        <img src="./assets/LiteWrite.png"
+             width="56"
+             height="56"
+             alt="LiteWrite"
+             style="border-radius: 12px;" />
+      </td>
+      <td style="vertical-align: middle; padding-left: 12px;">
+        <a href="https://litewrite.ai">
+          <img src="https://img.shields.io/badge/🚀%20LiteWrite-AI%20Native%20LaTeX%20Editor-ff6b6b?style=for-the-badge&logoColor=white&labelColor=1a1a2e">
+        </a>
+      </td>
+    </tr>
+  </table>
+</div>
+
+---
+
+## 🎉 News
+- [2026.05]🎯[New Feature]: **Merge RagAnything into LightRAG**🎉. Multimodal content parsing and extraction via **MinerU / Docling** services.
+- [2026.05]🎯[New Feature]: Introducing four selectable text chunking strategies: `Fix`, `Recursive`, `Vector`, and `Paragraph`.
+- [2026.05]🎯[New Feature]: **Role-specific LLM configuration** support, 4 distinct roles: EXTRACT, QUERY, KEYWORDS, and VLM, with independent LLM settings.
+- [2026.03]🎯[New Feature]: Integrated **OpenSearch** as a unified storage backend, providing comprehensive support for all four LightRAG storage.
+- [2026.03]🎯[New Feature]: Introduced a setup wizard. Support for local deployment of embedding, reranking, and storage backends via Docker.
+- [2025.11]🎯[New Feature]: Integrated **RAGAS for Evaluation** and **Langfuse for Tracing**. Updated the API to return retrieved contexts alongside query results to support context precision metrics.
+- [2025.10]🎯[Scalability Enhancement]: Eliminated processing bottlenecks to support **Large-Scale Datasets Efficiently**.
+- [2025.09]🎯[New Feature] Enhances knowledge graph extraction accuracy for **Open-Sourced LLMs** such as Qwen3-30B-A3B.
+- [2025.08]🎯[New Feature] **Reranker** is now supported, significantly boosting performance for mixed queries (set as default query mode).
+- [2025.08]🎯[New Feature] Added **Document Deletion** with automatic KG regeneration to ensure optimal query performance.
+- [2025.06]🎯[New Release] Our team has released [RAG-Anything](https://github.com/HKUDS/RAG-Anything) — an **All-in-One Multimodal RAG** system for seamless processing of text, images, tables, and equations.
+- [2025.06]🎯[New Feature] LightRAG now supports comprehensive multimodal data handling through [RAG-Anything](https://github.com/HKUDS/RAG-Anything) integration, enabling seamless document parsing and RAG capabilities across diverse formats including PDFs, images, Office documents, tables, and formulas. Please refer to the new [multimodal section](https://github.com/HKUDS/LightRAG/?tab=readme-ov-file#multimodal-document-processing-rag-anything-integration) for details.
+- [2025.03]🎯[New Feature] LightRAG now supports citation functionality, enabling proper source attribution and enhanced document traceability.
+- [2025.02]🎯[New Feature] You can now use MongoDB as an all-in-one storage solution for unified data management.
+- [2025.02]🎯[New Release] Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG)-a RAG system for understanding extremely long-context videos
+- [2025.01]🎯[New Release] Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
+- [2025.01]🎯You can now use PostgreSQL as an all-in-one storage solution for data management.
+- [2024.11]🎯[New Resource] A comprehensive guide to LightRAG is now available on [LearnOpenCV](https://learnopencv.com/lightrag). — explore in-depth tutorials and best practices. Many thanks to the blog author for this excellent contribution!
+- [2024.11]🎯[New Feature] Introducing the LightRAG WebUI — an interface that allows you to insert, query, and visualize LightRAG knowledge through an intuitive web-based dashboard.
+- [2024.11]🎯[New Feature] You can now [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage)-enabling graph database support.
+- [2024.10]🎯[New Feature] We've added a link to a [LightRAG Introduction Video](https://youtu.be/oageL-1I0GE). — a walkthrough of LightRAG's capabilities. Thanks to the author for this excellent contribution!
+- [2024.10]🎯[New Channel] We have created a [Discord channel](https://discord.gg/yF2MmDJyGJ)!💬 Welcome to join our community for sharing, discussions, and collaboration! 🎉🎉
+
+<details>
+  <summary style="font-size: 1.4em; font-weight: bold; cursor: pointer; display: list-item;">
+    Algorithm Flowchart
+  </summary>
+
+![LightRAG Indexing Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg)
+*Figure 1: LightRAG Indexing Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)*
+![LightRAG Retrieval and Querying Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg)
+*Figure 2: LightRAG Retrieval and Querying Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)*
+
+</details>
 
+## Installation
+
+**💡 Using uv for Package Management**: This project uses [uv](https://docs.astral.sh/uv/) for fast and reliable Python package management. Install uv first: `curl -LsSf https://astral.sh/uv/install.sh | sh` (Unix/macOS) or `powershell -c "irm https://astral.sh/uv/install.ps1 | iex"` (Windows)
 
+> **Note**: You can also use pip if you prefer, but uv is recommended for better performance and more reliable dependency management.
+>
+> **📦 Offline Deployment**: For offline or air-gapped environments, see the [Offline Deployment Guide](./docs/OfflineDeployment.md) for instructions on pre-installing all dependencies and cache files.
 
-## Getting started
+### Install LightRAG Server
 
-To make it easy for you to get started with GitLab, here's a list of recommended next steps.
+The LightRAG Server is designed to provide Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provide an Ollama compatible interfaces, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bot, such as Open WebUI, to access LightRAG easily.
 
-Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
+* Install from PyPI
 
-## Add your files
+```bash
+### Install LightRAG Server as tool using uv (recommended)
+uv tool install "lightrag-hku[api]"
 
-- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
-- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
+### Or using pip
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install "lightrag-hku[api]"
 
+### Build front-end artifacts
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# Setup env file
+# Obtain the env.example file by downloading it from the GitHub repository root
+# or by copying it from a local source checkout.
+cp env.example .env  # Update the .env with your LLM and embedding configurations
+# Launch the server
+lightrag-server
 ```
-cd existing_repo
-git remote add origin https://www.gitcc.com/nianshilan/lightrag-cn.git
-git branch -M main
-git push -uf origin main
+
+* Installation from Source
+
+```bash
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+
+# Bootstrap the development environment (recommended)
+make dev
+source .venv/bin/activate  # Activate the virtual environment (Linux/macOS)
+# Or on Windows: .venv\Scripts\activate
+
+# make dev installs the test toolchain plus the full offline stack
+# (API, storage backends, and provider integrations), then builds the frontend.
+# Run make env-base or copy env.example to .env before starting the server.
+
+# Equivalent manual steps with uv
+# Note: uv sync automatically creates a virtual environment in .venv/
+uv sync --extra test --extra offline
+source .venv/bin/activate  # Activate the virtual environment (Linux/macOS)
+# Or on Windows: .venv\Scripts\activate
+
+### Or using pip with virtual environment
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install -e ".[test,offline]"
+
+# Build front-end artifacts
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# setup env file
+make env-base  # Or: cp env.example .env and update it manually
+# Launch API-WebUI server
+lightrag-server
 ```
 
-## Integrate with your tools
+* Launching the LightRAG Server with Docker Compose
 
-- [ ] [Set up project integrations](https://www.gitcc.com/nianshilan/lightrag-cn/-/settings/integrations)
+```bash
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+cp env.example .env  # Update the .env with your LLM and embedding configurations
+# modify LLM and Embedding settings in .env
+docker compose up
+```
 
-## Collaborate with your team
+> Historical versions of LightRAG docker images can be found here: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag)
+>
+> Official GHCR images published by GitHub Actions are signed with Sigstore Cosign using GitHub OIDC. See [docs/DockerDeployment.md](./docs/DockerDeployment.md#verify-official-ghcr-images-with-cosign) for verification commands.
 
-- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
-- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
-- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
-- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
-- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
+### Create .env File With Setup Tool
 
-## Test and Deploy
+Instead of editing `env.example` by hand, use the interactive setup wizard to generate a configured `.env` and, when needed, `docker-compose.final.yml`:
 
-Use the built-in continuous integration in GitLab.
+```bash
+make env-base           # Required first step: LLM, embedding, reranker
+make env-storage        # Optional: storage backends and database services
+make env-server         # Optional: server port, auth, and SSL
+make env-base-rewrite   # Optional: force-regenerate wizard-managed compose services
+make env-storage-rewrite # Optional: force-regenerate wizard-managed compose services
+make env-security-check # Optional: audit the current .env for security risks
+```
 
-- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
-- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
-- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
-- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
-- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
+For full description of every target see [docs/InteractiveSetup.md](./docs/InteractiveSetup.md).
+The setup wizards update configuration only; run `make env-security-check` separately to audit the
+current `.env` for security risks before deployment.
+By default, rerunning the setup preserves unchanged wizard-managed compose service blocks; use a
+`*-rewrite` target only when you need to rebuild those managed blocks from the bundled templates.
 
-***
+### Install  LightRAG Core
 
-# Editing this README
+* Install from source (Recommended)
 
-When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
+```bash
+cd LightRAG
+# Note: uv sync automatically creates a virtual environment in .venv/
+uv sync
+source .venv/bin/activate  # Activate the virtual environment (Linux/macOS)
+# Or on Windows: .venv\Scripts\activate
 
-## Suggestions for a good README
-Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
+# Or: pip install -e .
+```
 
-## Name
-Choose a self-explaining name for your project.
+* Install from PyPI
 
-## Description
-Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
+```bash
+uv pip install lightrag-hku
+# Or: pip install lightrag-hku
+```
 
-## Badges
-On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
+## Quick Start
 
-## Visuals
-Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
+### LLM and Technology Stack Requirements for LightRAG
 
-## Installation
-Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
+LightRAG's demands on the capabilities of Large Language Models (LLMs) are significantly higher than those of traditional RAG, as it requires the LLM to perform entity-relationship extraction tasks from documents. Configuring appropriate Embedding and Reranker models is also crucial for improving query performance.
+
+- **LLM Selection**:
+  - It is recommended to use an LLM with at least 32 billion parameters.
+  - The context length should be at least 32KB, with 64KB being recommended.
+  - It is not recommended to choose reasoning models during the document indexing stage.
+  - During the query stage, it is recommended to choose models with stronger capabilities than those used in the indexing stage to achieve better query results.
+- **Embedding Model**:
+  - A high-performance Embedding model is essential for RAG.
+  - We recommend using mainstream multilingual Embedding models, such as: `BAAI/bge-m3` and `text-embedding-3-large`.
+  - **Important Note**: The Embedding model must be determined before document indexing, and the same model must be used during the document query phase. For certain storage solutions (e.g., PostgreSQL), the vector dimension must be defined upon initial table creation. Therefore, when changing embedding models, it is necessary to delete the existing vector-related tables and allow LightRAG to recreate them with the new dimensions.
+- **Reranker Model Configuration**:
+  - Configuring a Reranker model can significantly enhance LightRAG's retrieval performance.
+  - When a Reranker model is enabled, it is recommended to set the "mix mode" as the default query mode.
+  - We recommend using mainstream Reranker models, such as: `BAAI/bge-reranker-v2-m3` or models provided by services like Jina.
 
-## Usage
-Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
+### Quick Start for LightRAG Server
 
-## Support
-Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
+The LightRAG Server is designed to provide Web UI and API support. The LightRAG Server offers a comprehensive knowledge graph visualization feature. It supports various gravity layouts, node queries, subgraph filtering, and more. For more information about LightRAG Server, please refer to [LightRAG Server](./docs/LightRAG-API-Server.md).
 
-## Roadmap
-If you have ideas for releases in the future, it is a good idea to list them in the README.
+![iShot_2025-03-23_12.40.08](./README.assets/iShot_2025-03-23_12.40.08.png)
 
-## Contributing
-State if you are open to contributions and what your requirements are for accepting them.
 
-For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
+### Quick Start for LightRAG core
 
-You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
+To get started with LightRAG core, refer to the sample codes available in the `examples` folder. Additionally, a [video demo](https://www.youtube.com/watch?v=g21royNJ4fw) demonstration is provided to guide you through the local setup process. If you already possess an OpenAI API key, you can run the demo right away:
+
+```bash
+### you should run the demo code with project folder
+cd LightRAG
+### provide your API-KEY for OpenAI
+export OPENAI_API_KEY="sk-...your_opeai_key..."
+### download the demo document of "A Christmas Carol" by Charles Dickens
+curl https://raw.githubusercontent.com/gusye1234/nano-graphrag/main/tests/mock_data.txt > ./book.txt
+### run the demo code
+python examples/lightrag_openai_demo.py
+```
 
-## Authors and acknowledgment
-Show your appreciation to those who have contributed to the project.
+For a streaming response implementation example, please see `examples/lightrag_openai_compatible_demo.py`. Prior to execution, ensure you modify the sample code's LLM and embedding configurations accordingly.
+
+**Note 1**: When running the demo program, please be aware that different test scripts may use different embedding models. If you switch to a different embedding model, you must clear the data directory (`./dickens`); otherwise, the program may encounter errors. If you wish to retain the LLM cache, you can preserve the `kv_store_llm_response_cache.json` file while clearing the data directory.
+
+**Note 2**: Only `lightrag_openai_demo.py` and `lightrag_openai_compatible_demo.py` are officially supported sample codes. Other sample files are community contributions that haven't undergone full testing and optimization.
+
+## Programming with LightRAG Core
+
+For the complete Core API reference — including init parameters, `QueryParam`, LLM/embedding provider examples (OpenAI, Ollama, Azure, Gemini, HuggingFace, LlamaIndex), reranker injection, insert operations, entity/relation management, and delete/merge — see **[docs/ProgramingWithCore.md](./docs/ProgramingWithCore.md)**.
+
+> ⚠️ **If you would like to integrate LightRAG into your project, we recommend utilizing the REST API provided by the LightRAG Server**. LightRAG Core is typically intended for embedded applications or for researchers who wish to conduct studies and evaluations.
+
+### Advanced Features
 
-## License
-For open source projects, say how it is licensed.
+LightRAG provides additional capabilities including token usage tracking, knowledge graph data export, LLM cache management, Langfuse observability integration, and RAGAS-based evaluation. See **[docs/AdvancedFeatures.md](./docs/AdvancedFeatures.md)**.
+
+### Multimodal Document Processing
+
+LightRAG Server includes a multimodal document pipeline for PDFs, Office documents, images, tables, and formulas. Parsing is handled through external MinerU or Docling services, while multimodal indexing runs in the LightRAG pipeline. For setup details, see **[docs/AdvancedFeatures.md](./docs/AdvancedFeatures.md)**.
+
+## Replicating Findings in the Paper
+
+LightRAG consistently outperforms NaiveRAG, RQ-RAG, HyDE, and GraphRAG across agriculture, computer science, legal, and mixed domains. For the full evaluation methodology, prompts, and reproduce steps, see **[docs/Reproduce.md](./docs/Reproduce.md)**.
+
+**Overall Performance Table**
+
+||**Agriculture**||**CS**||**Legal**||**Mix**||
+|----------------------|---------------|------------|------|------------|---------|------------|-------|------------|
+||NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|
+|**Comprehensiveness**|32.4%|**67.6%**|38.4%|**61.6%**|16.4%|**83.6%**|38.8%|**61.2%**|
+|**Diversity**|23.6%|**76.4%**|38.0%|**62.0%**|13.6%|**86.4%**|32.4%|**67.6%**|
+|**Empowerment**|32.4%|**67.6%**|38.8%|**61.2%**|16.4%|**83.6%**|42.8%|**57.2%**|
+|**Overall**|32.4%|**67.6%**|38.8%|**61.2%**|15.2%|**84.8%**|40.0%|**60.0%**|
+||RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|
+|**Comprehensiveness**|31.6%|**68.4%**|38.8%|**61.2%**|15.2%|**84.8%**|39.2%|**60.8%**|
+|**Diversity**|29.2%|**70.8%**|39.2%|**60.8%**|11.6%|**88.4%**|30.8%|**69.2%**|
+|**Empowerment**|31.6%|**68.4%**|36.4%|**63.6%**|15.2%|**84.8%**|42.4%|**57.6%**|
+|**Overall**|32.4%|**67.6%**|38.0%|**62.0%**|14.4%|**85.6%**|40.0%|**60.0%**|
+||HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|
+|**Comprehensiveness**|26.0%|**74.0%**|41.6%|**58.4%**|26.8%|**73.2%**|40.4%|**59.6%**|
+|**Diversity**|24.0%|**76.0%**|38.8%|**61.2%**|20.0%|**80.0%**|32.4%|**67.6%**|
+|**Empowerment**|25.2%|**74.8%**|40.8%|**59.2%**|26.0%|**74.0%**|46.0%|**54.0%**|
+|**Overall**|24.8%|**75.2%**|41.6%|**58.4%**|26.4%|**73.6%**|42.4%|**57.6%**|
+||GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|
+|**Comprehensiveness**|45.6%|**54.4%**|48.4%|**51.6%**|48.4%|**51.6%**|**50.4%**|49.6%|
+|**Diversity**|22.8%|**77.2%**|40.8%|**59.2%**|26.4%|**73.6%**|36.0%|**64.0%**|
+|**Empowerment**|41.2%|**58.8%**|45.2%|**54.8%**|43.6%|**56.4%**|**50.8%**|49.2%|
+|**Overall**|45.2%|**54.8%**|48.0%|**52.0%**|47.2%|**52.8%**|**50.4%**|49.6%|
+
+
+## 🔗 Related Projects
+
+*Ecosystem & Extensions*
+
+<div align="center">
+  <table>
+    <tr>
+      <td align="center">
+        <a href="https://github.com/HKUDS/RAG-Anything">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">📸</span>
+          </div>
+          <b>RAG-Anything</b><br>
+          <sub>Multimodal RAG</sub>
+        </a>
+      </td>
+      <td align="center">
+        <a href="https://github.com/HKUDS/VideoRAG">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">🎥</span>
+          </div>
+          <b>VideoRAG</b><br>
+          <sub>Extreme Long-Context Video RAG</sub>
+        </a>
+      </td>
+      <td align="center">
+        <a href="https://github.com/HKUDS/MiniRAG">
+          <div style="width: 100px; height: 100px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2); display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
+            <span style="font-size: 32px;">✨</span>
+          </div>
+          <b>MiniRAG</b><br>
+          <sub>Extremely Simple RAG</sub>
+        </a>
+      </td>
+    </tr>
+  </table>
+</div>
+
+---
+
+## ⭐ Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=HKUDS/LightRAG&type=Date)](https://star-history.com/#HKUDS/LightRAG&Date)
+
+## 🤝 Contribution
+
+<div align="center">
+  We welcome contributions of all kinds — bug fixes, new features, documentation improvements, and more.<br>
+  Please read our <a href=".github/CONTRIBUTING.md"><strong>Contributing Guide</strong></a> before submitting a pull request.
+</div>
+
+<br>
+
+<div align="center">
+  We thank all our contributors for their valuable contributions.
+</div>
+
+<div align="center">
+  <a href="https://github.com/HKUDS/LightRAG/graphs/contributors">
+    <img src="https://contrib.rocks/image?repo=HKUDS/LightRAG" style="border-radius: 15px; box-shadow: 0 0 20px rgba(0, 217, 255, 0.3);" />
+  </a>
+</div>
+
+
+## 📖 Citation
+
+```python
+@article{guo2024lightrag,
+title={LightRAG: Simple and Fast Retrieval-Augmented Generation},
+author={Zirui Guo and Lianghao Xia and Yanhua Yu and Tu Ao and Chao Huang},
+year={2024},
+eprint={2410.05779},
+archivePrefix={arXiv},
+primaryClass={cs.IR}
+}
+```
 
-## Project status
-If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
+---
+
+<div align="center" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; padding: 30px; margin: 30px 0;">
+  <div>
+    <img src="https://user-images.githubusercontent.com/74038190/212284100-561aa473-3905-4a80-b561-0d28506553ee.gif" width="500">
+  </div>
+  <div style="margin-top: 20px;">
+    <a href="https://github.com/HKUDS/LightRAG" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/⭐%20Star%20us%20on%20GitHub-1a1a2e?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+    <a href="https://github.com/HKUDS/LightRAG/issues" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/🐛%20Report%20Issues-ff6b6b?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+    <a href="https://github.com/HKUDS/LightRAG/discussions" style="text-decoration: none;">
+      <img src="https://img.shields.io/badge/💬%20Discussions-4ecdc4?style=for-the-badge&logo=github&logoColor=white">
+    </a>
+  </div>
+</div>
+
+<div align="center">
+  <div style="width: 100%; max-width: 600px; margin: 20px auto; padding: 20px; background: linear-gradient(135deg, rgba(0, 217, 255, 0.1) 0%, rgba(0, 217, 255, 0.05) 100%); border-radius: 15px; border: 1px solid rgba(0, 217, 255, 0.2);">
+    <div style="display: flex; justify-content: center; align-items: center; gap: 15px;">
+      <span style="font-size: 24px;">⭐</span>
+      <span style="color: #00d9ff; font-size: 18px;">Thank you for visiting LightRAG!</span>
+      <span style="font-size: 24px;">⭐</span>
+    </div>
+  </div>
+</div>

+ 18 - 0
SECURITY.md

@@ -0,0 +1,18 @@
+# Reporting Security Issues
+
+The LightRAG team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.
+
+To report a security issue, please use the GitHub Security Advisory:  [Report a Vulnerability](https://github.com/HKUDS/LightRAG/security/advisories/new)
+
+The LightRAG team will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
+
+Report security bugs in third-party modules to the person or team maintaining the module.
+
+### Supported Versions
+
+The following versions currently being supported with security updates.
+
+| Version | Supported          |
+| ------- | ------------------ |
+| 1.2.x   | :x:                |
+| 1.3.x   | :white_check_mark: |

BIN
assets/LiteWrite.png


BIN
assets/logo.png


+ 52 - 0
config.ini.example

@@ -0,0 +1,52 @@
+; DEPRECATION WARNING:
+; `config.ini` support will be removed in a future release.
+; Please move your configuration to `.env` or environment variables.
+; This file is kept only as a temporary compatibility example.
+
+[neo4j]
+uri = neo4j+s://xxxxxxxx.databases.neo4j.io
+username = neo4j
+password = your-password
+connection_pool_size = 100
+connection_timeout = 30.0
+connection_acquisition_timeout = 30.0
+max_transaction_retry_time = 30.0
+max_connection_lifetime = 300.0
+liveness_check_timeout = 30.0
+keep_alive = true
+
+[mongodb]
+uri = mongodb+srv://name:password@your-cluster-address
+database = lightrag
+
+[redis]
+uri=redis://localhost:6379/1
+
+[qdrant]
+uri = http://localhost:16333
+
+[postgres]
+host = localhost
+port = 5432
+user = your_username
+password = your_password
+database = your_database
+# workspace = default
+max_connections = 12
+vector_index_type = HNSW        # HNSW, IVFFLAT or VCHORDRQ
+hnsw_m = 16
+hnsw_ef = 64
+ivfflat_lists = 100
+vchordrq_build_options =
+vchordrq_probes =
+vchordrq_epsilon = 1.9
+
+[memgraph]
+uri = bolt://localhost:7687
+
+[milvus]
+uri = http://localhost:19530
+db_name = lightrag
+# user = root
+# password = your_password
+# token = your_token

+ 77 - 0
docker-build-push.sh

@@ -0,0 +1,77 @@
+#!/bin/bash
+set -e
+
+# Configuration
+IMAGE_NAME="ghcr.io/hkuds/lightrag"
+DOCKERFILE="Dockerfile"
+TAG="latest"
+
+# Get version from git tags
+VERSION=$(git describe --tags --abbrev=0 2>/dev/null || echo "dev")
+
+echo "=================================="
+echo "  Multi-Architecture Docker Build"
+echo "=================================="
+echo "Image: ${IMAGE_NAME}:${TAG}"
+echo "Version: ${VERSION}"
+echo "Platforms: linux/amd64, linux/arm64"
+echo "=================================="
+echo ""
+
+# Check Docker login status (skip if CR_PAT is set for CI/CD)
+if [ -z "$CR_PAT" ]; then
+    if ! docker info 2>/dev/null | grep -q "Username"; then
+        echo "⚠️  Warning: Not logged in to Docker registry"
+        echo "Please login first: docker login ghcr.io"
+        echo "Or set CR_PAT environment variable for automated login"
+        echo ""
+        read -p "Continue anyway? (y/n) " -n 1 -r
+        echo
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            exit 1
+        fi
+    fi
+else
+    echo "Using CR_PAT environment variable for authentication"
+fi
+
+# Check if buildx builder exists, create if not
+if ! docker buildx ls | grep -q "desktop-linux"; then
+    echo "Creating buildx builder..."
+    docker buildx create --name desktop-linux --use
+    docker buildx inspect --bootstrap
+else
+    echo "Using existing buildx builder: desktop-linux"
+    docker buildx use desktop-linux
+fi
+
+echo ""
+echo "Building and pushing multi-architecture image..."
+echo ""
+
+# Build and push
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  --file ${DOCKERFILE} \
+  --tag ${IMAGE_NAME}:${TAG} \
+  --tag ${IMAGE_NAME}:${VERSION} \
+  --push \
+  .
+
+echo ""
+echo "✓ Build and push complete!"
+echo ""
+echo "Images pushed:"
+echo "  - ${IMAGE_NAME}:${TAG}"
+echo "  - ${IMAGE_NAME}:${VERSION}"
+echo ""
+echo "Verifying multi-architecture manifest..."
+echo ""
+
+# Verify
+docker buildx imagetools inspect ${IMAGE_NAME}:${TAG}
+
+echo ""
+echo "✓ Verification complete!"
+echo ""
+echo "Pull with: docker pull ${IMAGE_NAME}:${TAG}"

+ 244 - 0
docker-compose-full.yml

@@ -0,0 +1,244 @@
+# Full Docker Compose Deployment Sample Generated by Setup Wizard: `make base` and `make storage`
+# This Sample File requires NVIDIA GPU for Milvus and VLLM services.
+# Copy `env.docker-compose-full` to `.env` before starting this compose file.
+# You can customize your setup using the Setup Wizard; for detailed instructions, please refer to docs/InteractiveSetup.md
+services:
+  lightrag:
+    image: ghcr.io/hkuds/lightrag:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+      tags:
+        - ghcr.io/hkuds/lightrag:latest
+    ports:
+      - "${HOST:-0.0.0.0}:${PORT:-9621}:9621"
+    volumes:
+      - ./data/rag_storage:/app/data/rag_storage
+      - ./data/inputs:/app/data/inputs
+      - ./config.ini:/app/config.ini
+      - ./data/prompts:/app/data/prompts
+      - ./.env:/app/.env
+    deploy:
+      restart_policy:
+        condition: on-failure
+        max_attempts: 10
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      HOST: "0.0.0.0"
+      PORT: "9621"
+      EMBEDDING_BINDING_HOST: "http://vllm-embed:8001/v1"
+      RERANK_BINDING_HOST: "http://vllm-rerank:8000/rerank"
+      POSTGRES_HOST: "postgres"
+      POSTGRES_PORT: "5432"
+      NEO4J_URI: "neo4j://neo4j:7687"
+      WORKING_DIR: "/app/data/rag_storage"
+      MILVUS_URI: "http://milvus:19530"
+      INPUT_DIR: "/app/data/inputs"
+      PROMPT_DIR: "/app/data/prompts"
+      MEMGRAPH_URI: "bolt://host.docker.internal:7687"
+      REDIS_URI: "redis://host.docker.internal:6379"
+      QDRANT_URL: "http://host.docker.internal:6333"
+      OPENSEARCH_HOSTS: "host.docker.internal:9200"
+      MONGO_URI: "mongodb://root:root@host.docker.internal:27017/"
+    depends_on:
+      vllm-embed:
+        condition: service_healthy
+      vllm-rerank:
+        condition: service_healthy
+      postgres:
+        condition: service_healthy
+      neo4j:
+        condition: service_healthy
+      milvus:
+        condition: service_healthy
+
+  vllm-embed:
+    image: vllm/vllm-openai:latest
+    runtime: nvidia
+    command: >
+      --model ${VLLM_EMBED_MODEL:-BAAI/bge-m3}
+      --port ${VLLM_EMBED_PORT:-8001}
+      --dtype float16
+      --api-key ${VLLM_EMBED_API_KEY}
+      ${VLLM_EMBED_EXTRA_ARGS:-}
+    environment:
+      NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
+      NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
+    ports:
+      - "${VLLM_EMBED_PORT:-8001}:${VLLM_EMBED_PORT:-8001}"
+    volumes:
+      - vllm_embed_cache:/root/.cache/huggingface
+    ipc: host
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - 'PORT_HEX="$(printf ''%04X'' ${VLLM_EMBED_PORT:-8001})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
+      interval: 5s
+      timeout: 3s
+      retries: 120
+      start_period: 10s
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+
+  vllm-rerank:
+    image: vllm/vllm-openai:latest
+    runtime: nvidia
+    command: >
+      --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3}
+      --port ${VLLM_RERANK_PORT:-8000}
+      --dtype float16
+      --api-key ${VLLM_RERANK_API_KEY}
+      ${VLLM_RERANK_EXTRA_ARGS:-}
+    environment:
+      NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
+      NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
+    ports:
+      - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}"
+    volumes:
+      - vllm_rerank_cache:/root/.cache/huggingface
+    ipc: host
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
+      interval: 5s
+      timeout: 3s
+      retries: 120
+      start_period: 10s
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+
+  postgres:
+    # this image does not support PGGraphStorage
+    image: pgvector/pgvector:pg18
+    # ports:
+    #   - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - 'PORT_HEX="$(printf ''%04X'' 5432)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
+      interval: 5s
+      timeout: 3s
+      retries: 120
+      start_period: 10s
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: "rag"
+      POSTGRES_PASSWORD: "rag"
+      POSTGRES_DB: "rag"
+
+  neo4j:
+    image: neo4j:5-community
+    # ports:
+    #   - "7474:7474"
+    #   - "${NEO4J_BOLT_PORT:-7687}:7687"
+    volumes:
+      - neo4j_data:/data
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - 'PORT_HEX="$(printf ''%04X'' 7687)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
+      interval: 10s
+      timeout: 3s
+      retries: 120
+      start_period: 10s
+    restart: unless-stopped
+    environment:
+      NEO4J_AUTH: ${NEO4J_USERNAME:?missing}/${NEO4J_PASSWORD:?missing}
+      NEO4J_dbms_default__database: "neo4j"
+
+  milvus:
+    image: milvusdb/milvus:v2.6.11-gpu
+    command: ["milvus", "run", "standalone"]
+    security_opt:
+      - seccomp:unconfined
+    environment:
+      ETCD_ENDPOINTS: milvus-etcd:2379
+      MINIO_ADDRESS: milvus-minio:9000
+      MINIO_ACCESS_KEY_ID: "${MINIO_ACCESS_KEY_ID:?missing}"
+      MINIO_SECRET_ACCESS_KEY: "${MINIO_SECRET_ACCESS_KEY:?missing}"
+    # ports:
+    #   - "19530:19530"
+    #   - "9091:9091"
+    volumes:
+      - milvus_data:/var/lib/milvus
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: ["gpu"]
+    healthcheck:
+      test:
+        - CMD-SHELL
+        - 'PORT_HEX="$(printf ''%04X'' 19530)"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
+      interval: 10s
+      timeout: 3s
+      retries: 120
+      start_period: 10s
+    depends_on:
+      milvus-etcd:
+        condition: service_healthy
+      milvus-minio:
+        condition: service_healthy
+    restart: unless-stopped
+
+  milvus-etcd:
+    image: quay.io/coreos/etcd:v3.5.25
+    environment:
+      ETCD_AUTO_COMPACTION_MODE: revision
+      ETCD_AUTO_COMPACTION_RETENTION: "1000"
+      ETCD_QUOTA_BACKEND_BYTES: "4294967296"
+      ETCD_SNAPSHOT_COUNT: "50000"
+    volumes:
+      - milvus-etcd_data:/etcd
+    command: >
+      etcd
+      -advertise-client-urls=http://0.0.0.0:2379
+      -listen-client-urls=http://0.0.0.0:2379
+      -data-dir /etcd
+    healthcheck:
+      test: ["CMD", "etcdctl", "endpoint", "health"]
+      interval: 20s
+      timeout: 20s
+      retries: 3
+    restart: unless-stopped
+
+  milvus-minio:
+    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
+    environment:
+      MINIO_ROOT_USER: "${MINIO_ACCESS_KEY_ID:?missing}"
+      MINIO_ROOT_PASSWORD: "${MINIO_SECRET_ACCESS_KEY:?missing}"
+    volumes:
+      - milvus-minio_data:/minio_data
+    command: minio server /minio_data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+    restart: unless-stopped
+
+volumes:
+  vllm_embed_cache:
+  vllm_rerank_cache:
+  postgres_data:
+  neo4j_data:
+  milvus_data:
+  milvus-etcd_data:
+  milvus-minio_data:

+ 37 - 0
docker-compose.podman.yml

@@ -0,0 +1,37 @@
+# Podman-compatible compose file for LightRAG
+#
+# Usage:
+#   podman-compose -f docker-compose.podman.yml up -d
+#
+# Key differences from docker-compose.yml:
+#   - Uses top-level `restart` instead of `deploy.restart_policy`
+#     (Podman does not support the deploy block for restart policies)
+#   - No `extra_hosts` with `host-gateway` (Podman fails on this special
+#     value; Podman auto-provides host.containers.internal for host access)
+#   - When connecting to host services (e.g. LLM, embedding, rerank),
+#     use `host.containers.internal` instead of `host.docker.internal`
+#     in your .env binding host configuration
+
+services:
+  lightrag:
+    image: ghcr.io/hkuds/lightrag:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+      tags:
+        - ghcr.io/hkuds/lightrag:latest
+    ports:
+      - "${HOST:-0.0.0.0}:${PORT:-9621}:9621"
+    volumes:
+      - ./data/rag_storage:/app/data/rag_storage
+      - ./data/inputs:/app/data/inputs
+      - ./data/prompts:/app/data/prompts
+      - ./config.ini:/app/config.ini
+      - ./.env:/app/.env
+    restart: on-failure:10
+    environment:
+      WORKING_DIR: "/app/data/rag_storage"
+      INPUT_DIR: "/app/data/inputs"
+      PROMPT_DIR: "/app/data/prompts"
+      HOST: "0.0.0.0"
+      PORT: "9621"

+ 27 - 0
docker-compose.yml

@@ -0,0 +1,27 @@
+services:
+  lightrag:
+    image: ghcr.io/hkuds/lightrag:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+      tags:
+        - ghcr.io/hkuds/lightrag:latest
+    ports:
+      - "${HOST:-0.0.0.0}:${PORT:-9621}:9621"
+    volumes:
+      - ./data/rag_storage:/app/data/rag_storage
+      - ./data/inputs:/app/data/inputs
+      - ./data/prompts:/app/data/prompts
+      - ./.env:/app/.env
+    deploy:
+      restart_policy:
+        condition: on-failure
+        max_attempts: 10
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      WORKING_DIR: "/app/data/rag_storage"
+      INPUT_DIR: "/app/data/inputs"
+      PROMPT_DIR: "/app/data/prompts"
+      HOST: "0.0.0.0"
+      PORT: "9621"

+ 233 - 0
docs/AdvancedFeatures.md

@@ -0,0 +1,233 @@
+# Advanced Features
+
+## Multimodal Document Processing
+
+LightRAG Server includes a multimodal document pipeline for text, images, tables, and equations. Document parsing is handled through external MinerU or Docling services configured by endpoint, so the server no longer needs to install or import the `raganything` package locally.
+
+**Status:** the multimodal post-process hook is currently a placeholder; image, table, and equation processors are planned but not yet wired up. Ingestion via external MinerU/Docling parsers and native text indexing already work today.
+
+**Planned Capabilities:**
+- End-to-End Multimodal Pipeline: complete workflow from document ingestion to multimodal query answering
+- Universal Document Support: PDFs, Office documents (DOC/DOCX/PPT/PPTX/XLS/XLSX), images, and diverse file formats
+- Specialized Content Analysis: dedicated processors for images, tables, mathematical equations
+- Multimodal Knowledge Graph: automatic entity extraction and cross-modal relationship discovery
+- Hybrid Intelligent Retrieval: advanced search spanning textual and multimodal content
+
+### Quick Start
+
+Configure parser routing and external parser service endpoints in `.env`:
+
+```bash
+LIGHTRAG_PARSER=pdf:mineru,docx:docling,pptx:docling,xlsx:docling,*:legacy
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+DOCLING_ENDPOINT=http://localhost:5001/v1/convert/file/async
+```
+
+Then upload documents through LightRAG Server. `LIGHTRAG_PARSER` rules match suffixes such as `pdf`, may be separated with commas or semicolons, and are evaluated from left to right. If a rule enables MinerU or Docling, the matching endpoint must be configured before server startup. Per-file hints such as `paper.[mineru].pdf` and `memo.[native].docx` override the default rules. Parsed multimodal sidecars are written by the pipeline and consumed by the normal indexing flow. See [File Processing Configuration](./FileProcessingConfiguration-zh.md) for detailed routing rules and examples.
+
+---
+
+## Token Usage Tracking
+
+**Overview and Usage**
+
+LightRAG provides a `TokenTracker` tool to monitor token consumption reported by supported LLM providers. This feature is useful for controlling API costs and optimizing performance.
+
+`TokenTracker` does not automatically inject itself into LLM calls. Pass it to the provider binding directly, bind it through `llm_model_kwargs`, or capture it in your custom LLM function.
+
+**Method 1: Track direct LLM calls**
+
+```python
+from lightrag.llm.openai import openai_complete_if_cache
+from lightrag.utils import TokenTracker
+
+token_tracker = TokenTracker()
+
+with token_tracker:
+    result1 = await openai_complete_if_cache(
+        "gpt-4o-mini",
+        "your question 1",
+        token_tracker=token_tracker,
+    )
+    result2 = await openai_complete_if_cache(
+        "gpt-4o-mini",
+        "your question 2",
+        token_tracker=token_tracker,
+    )
+```
+
+The context manager resets the tracker when entering the block and prints usage when leaving it. The `token_tracker=token_tracker` argument is still required.
+
+**Method 2: Track LightRAG calls**
+
+```python
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete
+from lightrag.utils import TokenTracker
+
+token_tracker = TokenTracker()
+
+rag = LightRAG(
+    working_dir="./rag_storage",
+    llm_model_func=gpt_4o_mini_complete,
+    llm_model_kwargs={"token_tracker": token_tracker},
+    embedding_func=embedding_func,
+)
+
+await rag.initialize_storages()
+
+token_tracker.reset()
+await rag.ainsert(["document one", "document two"])
+await rag.aquery("your question 1", param=QueryParam(mode="naive"))
+await rag.aquery("your question 2", param=QueryParam(mode="mix"))
+
+print("Token usage:", token_tracker.get_usage())
+```
+
+`llm_model_kwargs={"token_tracker": token_tracker}` is passed to the default role LLM wrappers used by extraction, keyword generation, querying, and VLM calls. If you configure role-specific LLM kwargs, put `token_tracker` in the relevant role kwargs as well, or use the closure pattern below.
+
+**Robust custom wrapper pattern**
+
+```python
+from lightrag import LightRAG
+from lightrag.llm.gemini import gemini_complete_if_cache
+from lightrag.utils import TokenTracker
+
+
+def make_llm_func(token_tracker: TokenTracker):
+    async def _llm_model_func(
+        prompt,
+        system_prompt=None,
+        history_messages=None,
+        **kwargs,
+    ):
+        return await gemini_complete_if_cache(
+            "gemini-2.5-flash-lite",
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            token_tracker=token_tracker,
+            **kwargs,
+        )
+
+    return _llm_model_func
+
+
+token_tracker = TokenTracker()
+
+rag = LightRAG(
+    working_dir="./rag_storage",
+    llm_model_func=make_llm_func(token_tracker),
+    embedding_func=embedding_func,
+)
+
+await rag.initialize_storages()
+
+token_tracker.reset()
+await rag.ainsert(["document one", "document two"])
+
+print("Token usage:", token_tracker.get_usage())
+```
+
+**Usage Tips:**
+- Use context managers for direct LLM sessions when you want automatic reset and final printing
+- For segmented statistics, call `reset()` before each indexing or query phase
+- LLM cache hits do not create new provider calls, so token usage does not increase for cached responses
+- Regular checking of token usage helps detect abnormal consumption early
+
+---
+
+## Data Export Functions
+
+LightRAG allows you to export your knowledge graph data in various formats for analysis, sharing, and backup.
+
+**Basic Usage**
+
+```python
+# Basic CSV export (default format)
+rag.export_data("knowledge_graph.csv")
+
+# Specify any format
+rag.export_data("output.xlsx", file_format="excel")
+```
+
+**Supported File Formats**
+
+```python
+rag.export_data("graph_data.csv", file_format="csv")
+rag.export_data("graph_data.xlsx", file_format="excel")
+rag.export_data("graph_data.md", file_format="md")
+rag.export_data("graph_data.txt", file_format="txt")
+```
+
+**Additional Options**
+
+Include vector embeddings in the export (optional):
+
+```python
+rag.export_data("complete_data.csv", include_vector_data=True)
+```
+
+All exports include entity information (names, IDs, metadata), relation data (connections between entities), and relationship information from the vector database.
+
+---
+
+## Cache Management
+
+**Clear Cache**
+
+`aclear_cache()` clears all cached entries in `llm_response_cache`. It does not support selective cleanup by mode or cache type.
+
+```python
+# Asynchronous
+await rag.aclear_cache()
+
+# Synchronous
+rag.clear_cache()
+```
+
+For selective cleanup of query-related caches, use the `lightrag.tools.clean_llm_query_cache` tool and see the guide in [lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md](../lightrag/tools/README_CLEAN_LLM_QUERY_CACHE.md). It manages query caches and keywords caches for `mix`, `hybrid`, `local`, and `global` modes. It does **not** clean extraction caches such as `default:extract:*` and `default:summary:*`.
+
+---
+
+## Langfuse Observability Integration
+
+Langfuse provides a drop-in replacement for the OpenAI client that automatically tracks all LLM interactions, enabling developers to monitor, debug, and optimize their RAG systems.
+
+### Installation
+
+```bash
+pip install lightrag-hku[observability]
+# Or from source:
+pip install -e ".[observability]"
+```
+
+### Configuration
+
+Add to `.env` file:
+
+```
+## Langfuse Observability (Optional)
+LANGFUSE_SECRET_KEY=""
+LANGFUSE_PUBLIC_KEY=""
+LANGFUSE_HOST="https://cloud.langfuse.com"  # or your self-hosted instance
+LANGFUSE_ENABLE_TRACE=true
+```
+
+### Features
+
+Once installed and configured, Langfuse automatically traces all OpenAI LLM calls. Dashboard features include:
+- **Tracing**: View complete LLM call chains
+- **Analytics**: Token usage, latency, cost metrics
+- **Debugging**: Inspect prompts and responses
+- **Evaluation**: Compare model outputs
+- **Monitoring**: Real-time alerting
+
+> **Note**: LightRAG currently only integrates OpenAI-compatible API calls with Langfuse. APIs such as Ollama, Azure, and AWS Bedrock are not yet supported for Langfuse observability.
+
+---
+
+## RAGAS-based Evaluation
+
+**RAGAS** (Retrieval Augmented Generation Assessment) is a framework for reference-free evaluation of RAG systems using LLMs. LightRAG provides an evaluation script based on RAGAS. For detailed information, see [RAGAS-based Evaluation Framework](../lightrag/evaluation/README_EVALUASTION_RAGAS.md).

+ 4 - 0
docs/Algorithm.md

@@ -0,0 +1,4 @@
+![LightRAG Indexing Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-VectorDB-Json-KV-Store-Indexing-Flowchart-scaled.jpg)
+*Figure 1: LightRAG Indexing Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)*
+![LightRAG Retrieval and Querying Flowchart](https://learnopencv.com/wp-content/uploads/2024/11/LightRAG-Querying-Flowchart-Dual-Level-Retrieval-Generation-Knowledge-Graphs-scaled.jpg)
+*Figure 2: LightRAG Retrieval and Querying Flowchart - Img Caption : [Source](https://learnopencv.com/lightrag/)*

+ 179 - 0
docs/AsymmetricEmbedding.md

@@ -0,0 +1,179 @@
+# Asymmetric Embedding Configuration
+
+LightRAG keeps embedding behavior symmetric by default. Query/document asymmetric
+embedding is enabled only when `EMBEDDING_ASYMMETRIC=true` is explicitly set.
+
+This avoids accidental retrieval changes when prefix variables are present in an
+environment but the user did not intentionally enable asymmetric embeddings.
+
+Before enabling asymmetric embeddings for any model, check the model's current
+model card or provider documentation. Do not infer the right behavior from the
+API binding alone: an `openai`-compatible endpoint can serve instruction-free
+models, prefix-based models, or provider-specific models behind the same API
+shape.
+
+## Reindexing Requirement
+
+Changing asymmetric embedding settings changes the vectors produced for stored
+documents and for future queries. After enabling, disabling, or changing any of
+these settings, clear the existing LightRAG data for the workspace and re-index
+the source files:
+
+- `EMBEDDING_ASYMMETRIC`
+- `EMBEDDING_QUERY_PREFIX`
+- `EMBEDDING_DOCUMENT_PREFIX`
+- Provider task behavior such as Jina `task`, Gemini `task_type`, or VoyageAI
+  `input_type`
+
+Do not reuse an existing vector store across asymmetric embedding configuration
+changes. Mixing vectors generated with different query/document behavior can
+make retrieval quality unpredictable.
+
+## Binding Types
+
+LightRAG distinguishes two asymmetric embedding styles:
+
+| Style | Bindings | How asymmetric behavior is applied |
+| --- | --- | --- |
+| Provider task parameters | `jina`, `gemini`, `voyageai` | LightRAG passes query/document context to the provider-specific `task`, `task_type`, or `input_type` parameter. |
+| Text task prefixes | `openai`, `azure_openai`, `ollama` | LightRAG prepends configured text prefixes before calling the embedding API. Use this only when the model card explicitly requires separate query/document prefixes. |
+
+Other server embedding bindings do not currently support
+`EMBEDDING_ASYMMETRIC=true`.
+
+## Default: Symmetric Embeddings
+
+When `EMBEDDING_ASYMMETRIC` is unset, LightRAG does not enable asymmetric
+embedding behavior, even if prefix variables exist:
+
+```env
+# EMBEDDING_ASYMMETRIC is unset
+# EMBEDDING_QUERY_PREFIX="search_query: "
+# EMBEDDING_DOCUMENT_PREFIX="search_document: "
+```
+
+The prefixes are ignored and a warning is logged.
+
+The same is true when the flag is explicitly false:
+
+```env
+EMBEDDING_ASYMMETRIC=false
+```
+
+## Instruction-Free Models: Keep Symmetric
+
+Some embedding models are instruction-free, sometimes described as using
+implicit intent. They are trained to handle query/document matching from the raw
+text itself and do not require query/document prefixes or provider task
+parameters. For these models, do not set `EMBEDDING_ASYMMETRIC=true`; leave it
+unset or set it to `false`, and do not configure `EMBEDDING_QUERY_PREFIX` or
+`EMBEDDING_DOCUMENT_PREFIX`.
+
+Common examples that should normally stay in symmetric mode:
+
+| Model family | Example model IDs | Notes |
+| --- | --- | --- |
+| BGE-M3 | `BAAI/bge-m3` | Use plain text input. Do not add `search_query:` / `search_document:` unless the specific serving wrapper's model card says otherwise. |
+| OpenAI Text Embedding 3 | `text-embedding-3-small`, `text-embedding-3-large` | The OpenAI embeddings API uses text input plus the model name; it does not expose a query/document task parameter. |
+| Mistral Embed | `mistral-embed` | Use the provider's plain embedding input. Do not invent task prefixes. |
+| Alibaba GTE base models | `gte-large`, `gte-large-zh` | Base GTE models use plain text for normal retrieval. This does not apply to newer `instruct` variants such as `gte-Qwen2-1.5B-instruct`; check that model card. |
+| Jina Embeddings v2 | `jina-embeddings-v2-base-en`, `jina-embeddings-v2-base-zh` | Jina v2 is plain-text input. Jina v3/v4 are different and use the `task` parameter for retrieval tasks. |
+
+If a model is instruction-free, enabling LightRAG's asymmetric mode can make the
+input different from what the model was trained or documented to expect. That can
+reduce retrieval quality even though the server starts successfully.
+
+## Provider Task Parameter Bindings
+
+Use this mode for providers that expose separate query/document embedding tasks.
+Do not configure prefix variables for these bindings.
+
+Jina example:
+
+```env
+EMBEDDING_BINDING=jina
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_MODEL=jina-embeddings-v4
+```
+
+Gemini example:
+
+```env
+EMBEDDING_BINDING=gemini
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_MODEL=gemini-embedding-001
+```
+
+VoyageAI example:
+
+```env
+EMBEDDING_BINDING=voyageai
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_MODEL=voyage-3
+```
+
+If `EMBEDDING_QUERY_PREFIX` or `EMBEDDING_DOCUMENT_PREFIX` is also configured
+for these bindings, LightRAG logs a warning and ignores the prefixes.
+
+## Text Task Prefix Bindings
+
+Use this mode for embedding models that expect task instructions in the input
+text, such as models whose card documents prefixes like `search_query:`,
+`search_document:`, `query:`, or `passage:`. Do not enable this mode just
+because the model is served through `openai`, `azure_openai`, or `ollama`.
+
+Both prefix variables must be explicitly configured:
+
+```env
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_QUERY_PREFIX="search_query: "
+EMBEDDING_DOCUMENT_PREFIX="search_document: "
+```
+
+If one side should intentionally have no prefix, use the sentinel `NO_PREFIX`:
+
+```env
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_QUERY_PREFIX="search_query: "
+EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
+```
+
+`NO_PREFIX` is converted to an empty string internally. It is different from an
+unset variable: it means the side was reviewed and intentionally left without a
+prefix.
+
+At least one side must have a non-empty prefix. This is invalid:
+
+```env
+EMBEDDING_ASYMMETRIC=true
+EMBEDDING_QUERY_PREFIX=NO_PREFIX
+EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
+```
+
+## Invalid Empty Prefixes
+
+Do not use an empty environment value for an intentional empty prefix:
+
+```env
+EMBEDDING_DOCUMENT_PREFIX=
+```
+
+Use `NO_PREFIX` instead. Empty values are rejected because shell, `.env`, and
+Docker Compose handling can make empty strings indistinguishable from accidental
+missing configuration.
+
+## Validation Summary
+
+| Configuration | Result |
+| --- | --- |
+| `EMBEDDING_ASYMMETRIC` unset | Symmetric mode; prefixes ignored with a warning. |
+| `EMBEDDING_ASYMMETRIC=false` | Symmetric mode; prefixes ignored with a warning. |
+| Instruction-free model such as `BAAI/bge-m3`, `text-embedding-3-small`, `mistral-embed`, base GTE, or Jina v2 | Keep symmetric mode; do not configure prefixes or provider tasks unless the model card says to. |
+| `EMBEDDING_ASYMMETRIC=true` with `jina`/`gemini`/`voyageai` | Provider task mode; prefixes ignored with a warning. |
+| `EMBEDDING_ASYMMETRIC=true` with `openai`/`azure_openai`/`ollama` and both prefix variables configured | Prefix mode. |
+| Prefix mode with a missing prefix variable | Startup error; use a real prefix or `NO_PREFIX`. |
+| Prefix mode with both sides `NO_PREFIX` | Startup error; no asymmetric behavior would occur. |
+| Prefix variable set to an empty value | Startup error; use `NO_PREFIX`. |
+
+Any valid change from one asymmetric embedding configuration to another still
+requires clearing the workspace data and re-indexing the source files.

+ 344 - 0
docs/DockerDeployment.md

@@ -0,0 +1,344 @@
+# LightRAG Docker Deployment
+
+A lightweight Knowledge Graph Retrieval-Augmented Generation system with multiple LLM backend support.
+
+## 🚀 Preparation
+
+### Clone the repository:
+
+```bash
+# Linux/MacOS
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+```
+```powershell
+# Windows PowerShell
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+```
+
+### Configure your environment:
+
+```bash
+# Linux/MacOS
+cp env.example .env
+# Edit .env with your preferred configuration
+```
+```powershell
+# Windows PowerShell
+Copy-Item env.example .env
+# Edit .env with your preferred configuration
+```
+
+LightRAG can be configured using environment variables in the `.env` file:
+
+**Server Configuration**
+
+- `HOST`: Server host (default: 0.0.0.0)
+- `PORT`: Server port (default: 9621)
+
+**LLM Configuration**
+
+- `LLM_BINDING`: LLM backend to use (lollms/ollama/openai)
+- `LLM_BINDING_HOST`: LLM server host URL
+- `LLM_MODEL`: Model name to use
+
+**Embedding Configuration**
+
+- `EMBEDDING_BINDING`: Embedding backend (lollms/ollama/openai)
+- `EMBEDDING_BINDING_HOST`: Embedding server host URL
+- `EMBEDDING_MODEL`: Embedding model name
+- `EMBEDDING_ASYMMETRIC`: Explicitly enable query/document asymmetric embeddings
+- `EMBEDDING_DOCUMENT_PREFIX`: Document prefix for prefix-based asymmetric embeddings (or `NO_PREFIX`)
+- `EMBEDDING_QUERY_PREFIX`: Query prefix for prefix-based asymmetric embeddings (or `NO_PREFIX`)
+
+See [Asymmetric Embedding Configuration](./AsymmetricEmbedding.md) for prefix
+validation rules and provider-specific behavior.
+
+**RAG Configuration**
+
+- `MAX_ASYNC`: Maximum async operations
+- `MAX_TOKENS`: Maximum token size
+- `EMBEDDING_DIM`: Embedding dimensions
+
+## 🐳 Docker Deployment
+
+Docker instructions work the same on all platforms with Docker Desktop installed.
+
+### Build Optimization
+
+The Dockerfile uses BuildKit cache mounts to significantly improve build performance:
+
+- **Automatic cache management**: BuildKit is automatically enabled via `# syntax=docker/dockerfile:1` directive
+- **Faster rebuilds**: Only downloads changed dependencies when `uv.lock` or `bun.lock` files are modified
+- **Efficient package caching**: UV and Bun package downloads are cached across builds
+- **No manual configuration needed**: Works out of the box in Docker Compose and GitHub Actions
+
+### Start LightRAG  server:
+
+```bash
+docker compose up -d
+```
+
+If you used the interactive setup, start the generated stack with:
+
+```bash
+docker compose -f docker-compose.final.yml up -d
+```
+
+The interactive setup keeps `.env` host-usable. Container-only hostnames such as `postgres` or `host.docker.internal`, along with staged SSL paths under `/app/data/certs/`, are injected into the generated `docker-compose.final.yml` for the `lightrag` service instead of being persisted back into `.env`.
+On reruns, unchanged wizard-managed service blocks in `docker-compose.final.yml` are preserved by
+default. To repair or fully regenerate those managed blocks from the bundled templates, rerun the
+matching setup target with `make env-base-rewrite` or `make env-storage-rewrite`.
+
+If the generated stack includes local Milvus, compose resolves `MINIO_ACCESS_KEY_ID` and
+`MINIO_SECRET_ACCESS_KEY` at startup from the repo `.env` or exported shell environment. The
+generated compose file does not snapshot those values, and `docker compose` exits immediately if
+either variable is missing.
+
+Before exposing the generated stack beyond localhost, run:
+
+```bash
+make env-security-check
+```
+
+That command audits the current `.env` for missing authentication, unsafe whitelist settings, weak
+JWT secrets, and other setup-level security risks without rewriting any files.
+
+LightRAG Server uses the following paths for data storage:
+
+```
+data/
+├── rag_storage/    # RAG data persistence
+└── inputs/         # Input documents
+```
+
+### Optional: local vLLM embedding and reranker
+
+To run embedding and/or reranking locally with vLLM, run `make env-base` and answer `yes` when prompted to run the embedding model and rerank service locally via Docker.
+That configures the embedding service to use `BAAI/bge-m3` on port 8001 with a local vLLM server, and can also add a `vllm-rerank` service on port 8000.
+
+Alternatively, rerun `make env-base` later and enable only the rerank Docker prompt to add the `vllm-rerank` service automatically.
+vLLM provides a `v1/rerank` endpoint that works with the `cohere` binding.
+
+Example `docker-compose.override.yml` for GPU hosts (embedding + reranker):
+
+```yaml
+services:
+  vllm-embed:
+    image: vllm/vllm-openai:latest
+    runtime: nvidia
+    command: >
+      --model BAAI/bge-m3
+      --port 8001
+      --dtype float16
+    ports:
+      - "8001:8001"
+    volumes:
+      - ./data/hf-cache:/root/.cache/huggingface
+    ipc: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+  vllm-rerank:
+    image: vllm/vllm-openai:latest
+    runtime: nvidia
+    command: >
+      --model BAAI/bge-reranker-v2-m3
+      --port 8000
+      --dtype float16
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data/hf-cache:/root/.cache/huggingface
+    ipc: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+```
+
+For CPU-only hosts, use the official CPU image instead:
+
+```yaml
+services:
+  vllm-embed:
+    image: vllm/vllm-openai-cpu:latest
+    command: >
+      --model BAAI/bge-m3
+      --port 8001
+      --dtype float32
+    ports:
+      - "8001:8001"
+    volumes:
+      - ./data/hf-cache:/root/.cache/huggingface
+
+  vllm-rerank:
+    image: vllm/vllm-openai-cpu:latest
+    command: >
+      --model BAAI/bge-reranker-v2-m3
+      --port 8000
+      --dtype float32
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data/hf-cache:/root/.cache/huggingface
+```
+
+Add the embedding and rerank config to `.env`:
+
+```bash
+EMBEDDING_BINDING=openai
+EMBEDDING_MODEL=BAAI/bge-m3
+EMBEDDING_DIM=1024
+EMBEDDING_BINDING_HOST=http://localhost:8001/v1
+EMBEDDING_BINDING_API_KEY=local-key
+VLLM_EMBED_DEVICE=cpu
+
+RERANK_BINDING=cohere
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=local-key
+VLLM_RERANK_DEVICE=cpu
+```
+
+If LightRAG runs in Docker while vLLM runs on the host, the generated compose file rewrites those endpoints to:
+
+```bash
+EMBEDDING_BINDING_HOST=http://host.docker.internal:8001/v1
+RERANK_BINDING_HOST=http://host.docker.internal:8000/rerank
+```
+
+For GPU, set:
+
+```bash
+VLLM_EMBED_DEVICE=cuda
+VLLM_RERANK_DEVICE=cuda
+```
+
+Ensure the NVIDIA Container Toolkit is installed and the host has CUDA drivers available.
+The setup wizard uses the CPU image by default for `cpu` device and the GPU image for `cuda` device.
+When rerunning `make env-base`, an existing `VLLM_EMBED_DEVICE` / `VLLM_RERANK_DEVICE` value is
+preserved instead of being overwritten by a fresh GPU auto-detection result.
+Those templates already pin the matching vLLM `--dtype` (`float32` on CPU, `float16` on CUDA), so no separate `VLLM_*_DTYPE` environment variables are needed.
+
+### SSL certificates
+
+The setup wizard stages TLS certificate files under `./data/certs/` before generating the compose file.
+This keeps generated host mounts under the same `./data` root used by the default Docker deployment.
+
+### PostgreSQL image
+
+The interactive setup defaults PostgreSQL to `gzdaniel/postgres-for-rag:pg18-age-pgvector`. This image bundles both Apache AGE and pgvector so the generated stack works with `PGGraphStorage` and `PGVectorStorage` without extra extension setup.
+
+The image no longer ships fixed credentials; on first start it creates the user, password, and database from the `POSTGRES_USER` / `POSTGRES_PASSWORD` / `POSTGRES_DB` environment variables. The setup wizard prompts for these values (defaulting to `rag` / `rag` / `lightrag`) and injects them into the generated `docker-compose.final.yml`, so you can choose any user, password, and database name.
+
+**Important Note**: If PGGraphStorage is not required for vector storage, you may replace the upper docker image with the latest official pgvector image `pgvector/pgvector:pg18`. Please note that data file formats are incompatible across different PostgreSQL major versions; once this Docker image is deployed, it cannot be rolled back to a previous version.
+
+#### Build the PostgreSQL image
+
+The default PostgreSQL image can be rebuilt from the repository root with `Dockerfile.postgres`. The Dockerfile starts from `pgvector/pgvector:pg18-trixie`, builds Apache AGE for PostgreSQL 18, and installs an init script that creates both the `vector` and `age` extensions for new databases.
+
+For local development or single-host deployment:
+
+```bash
+docker build -f Dockerfile.postgres \
+  -t gzdaniel/postgres-for-rag:pg18-age-pgvector \
+  .
+```
+
+To publish the image to your own registry, use a private tag:
+
+```bash
+docker build -f Dockerfile.postgres \
+  -t registry.example.com/postgres-for-rag:pg18-age-pgvector \
+  .
+docker push registry.example.com/postgres-for-rag:pg18-age-pgvector
+```
+
+For a multi-architecture image:
+
+```bash
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  -f Dockerfile.postgres \
+  -t registry.example.com/postgres-for-rag:pg18-age-pgvector \
+  --push \
+  .
+```
+
+After building a custom tag, update the `postgres.image` value in the generated `docker-compose.final.yml` to point at that tag. On later setup wizard reruns, existing wizard-managed service images are preserved.
+
+### Updates
+
+To update the Docker container:
+```bash
+docker compose pull
+docker compose down
+docker compose up
+```
+
+### Offline deployment
+
+Software packages requiring `transformers`, `torch`, or `cuda` are not preinstalled in the docker images. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, cannot be used in an offline environment. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service.
+
+## 📦 Build Docker Images
+
+### For local development and testing
+
+```bash
+# Build and run with Docker Compose (BuildKit automatically enabled)
+docker compose up --build
+
+# Or explicitly enable BuildKit if needed
+DOCKER_BUILDKIT=1 docker compose up --build
+```
+
+**Note**: BuildKit is automatically enabled by the `# syntax=docker/dockerfile:1` directive in the Dockerfile, ensuring optimal caching performance.
+
+### For production release
+
+ **multi-architecture build and push**:
+
+```bash
+# Use the provided build script
+./docker-build-push.sh
+```
+
+**The build script will**:
+
+- Check Docker registry login status
+- Create/use buildx builder automatically
+- Build for both AMD64 and ARM64 architectures
+- Push to GitHub Container Registry (ghcr.io)
+- Verify the multi-architecture manifest
+
+**Prerequisites**:
+
+Before building multi-architecture images, ensure you have:
+
+- Docker 20.10+ with Buildx support
+- Sufficient disk space (20GB+ recommended for offline image)
+- Registry access credentials (if pushing images)
+
+### Verify official GHCR images with Cosign
+
+Official LightRAG images published to GitHub Container Registry by GitHub Actions are signed with Sigstore Cosign using GitHub OIDC keyless signing.
+
+Install `cosign`, then verify the image tag you want to run:
+
+```bash
+cosign verify ghcr.io/HKUDS/LightRAG:<tag> \
+  --certificate-identity-regexp '^https://github.com/HKUDS/LightRAG/.github/workflows/(docker-publish|docker-build-manual|docker-build-lite)\.yml@refs/.+$' \
+  --certificate-oidc-issuer https://token.actions.githubusercontent.com
+```
+
+Replace `<tag>` with the version tag you want to validate, for example a release tag, `latest`, `<tag>-lite`, or `lite`.

+ 853 - 0
docs/FileProcessingPipeline-zh.md

@@ -0,0 +1,853 @@
+# 文件处理流水线工作方式说明
+
+从版本 v1.5.0 (目前在dev分支)开始,LightRAG的文件处理流水线进行了重大的升级:
+
+* 支持多种文件内容抽引擎:legacy、native、mineru、docling
+* 支持多种文本块分块方法:Fix、Recursive、Vector、Paragraph
+* 支持对个别文件关闭实体关系抽取
+
+LightRAG Server引入了一个文件处理的中间格式: `LightRAG Document` 。该格式支持表格和图片等多模态数据,同时包含文章的章节段落元数据,方便日后进行内容溯源。
+
+本文以 **LightRAG Server** 的部署与使用视角组织:先给出快速开始可直接套用的配置,再展开内容抽取与分块的配置语法、存储 / 目录布局、去重、并发以及续跑规则。直接通过 Python 代码调用 `LightRAG` 类的开发者请翻到[第八章 Python SDK 调用](#八、Python SDK 调用)。
+
+## 一、快速开始
+
+### 保持旧版文件处理行为
+
+所有文件按旧版的文档解析和分块策略处理所有文档。不配置 `LIGHTRAG_PARSER` 或把它配置为如下值:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+### 推荐起步文件处理行为
+
+不依赖外部文档解析服务,不依赖`VLM`视觉模型。使用新版原生的 `Native` 解析 `docx` 文档,开启表格(t)和公式(e)的模态分析,搭配`P`分块策略;其余文档使用老版本的内容解析器,搭配效果更好的`R`分块策略。
+
+```bash
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+```
+
+### 开启多模态处理能力
+
+开启多模态处理能力需要依赖 `MinerU` 文件解析服务和 `VLM` 视觉识别模型。使用 `Native` 解释 `docx` 文件,使用 `MinerU` 解析 `pdf`、`office` 和各种图片文件。以上文件都开启图片(i)、表格(t)和公式(e)的模态分析,并并搭配`P`分块策略。其余文档回退到老版本的内容解析器并搭配`R`分块策略。
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=kimi-k2.6
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+```
+
+> `P`分块策略是LightRAG原生的分块策略,详情请参阅[Paragraph Semantic 分块策略](ParagraphSemanticChunking-zh.md)。VLM的配资请参阅[基于角色的 LLM/VLM 配置指南](RoleSpecificLLMConfiguration-zh.md)
+
+## 二、内容抽取与处理选项配置
+
+LightRAG 的文件处理配置由两部分合成:内容抽取引擎决定原始文件如何被解析,处理选项决定解析后是否执行多模态分析、使用哪种分块方式,以及是否构建知识图谱。通常先用环境变量 `LIGHTRAG_PARSER` 按文件后缀设置默认规则,再用文件名中的 `[hint]` 覆盖单个文件。引擎和选项可以写在同一个配置片段里,例如 `docx:native-iet` 或 `report.[native-R!].docx`。
+
+为了向后兼容,在未修改配置的情况下,升级后的文件内容提取方式会维持原来的 `legacy` 行为。如需启用新的内容处理引擎,请按本节说明配置。
+
+### 2.1 配置语法总览
+
+完整配置模型如下:
+
+```text
+LIGHTRAG_PARSER=后缀:引擎-选项,后缀:引擎,*:legacy-R
+filename.[ENGINE].ext
+filename.[ENGINE-OPTIONS].ext
+filename.[-OPTIONS].ext
+```
+
+- `LIGHTRAG_PARSER` 是默认规则表,按文件后缀匹配,例如 `pdf:mineru`、`docx:native-iet`。
+- 文件名 `[hint]` 是单文件覆盖规则,例如 `paper.[mineru].pdf`、`memo.[native-R!].docx`。
+- `ENGINE` 是内容抽取引擎:`legacy`、`native`、`mineru` 或 `docling`。
+- `OPTIONS` 是处理选项字符组合,例如 `iet`、`R!`、`P`。选项最终写入 `process_options`,由后续流水线阶段读取。
+- `ENGINE-OPTIONS` 中的连字符只用于分隔引擎和选项,不属于选项本身。
+- 仅指定处理选项时必须写成 `[-OPTIONS]`,例如 `[-!]`。无横线的 `[abc]` 会被严格解释为引擎名并报错,不会回退为选项串。
+
+常见组合示例:
+
+```bash
+LIGHTRAG_PARSER=pdf:mineru-R,docx:native-ietP,*:legacy-R
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+DOCLING_ENDPOINT=http://localhost:5001
+```
+
+```text
+my-proposal.[native-iet].docx   # 使用 native 引擎,开启图、表、公式分析
+my-memo.[native-R!].docx        # 使用 native 引擎,递归语义分块,禁止知识图谱构建
+my-proposal.[-!].docx           # 使用默认引擎,仅禁止知识图谱构建
+my-proposal.[mineru].docx       # 使用 MinerU 引擎,处理选项全部默认
+```
+
+### 2.2 默认规则:`LIGHTRAG_PARSER`
+
+`LIGHTRAG_PARSER` 用来为不同文件后缀配置默认内容抽取引擎,也可以在引擎后追加该规则的默认处理选项:
+
+```text
+后缀:引擎,后缀:引擎,*:legacy
+后缀:引擎;后缀:引擎;*:legacy
+后缀:引擎-选项
+```
+
+- 左侧匹配的是文件后缀,不是完整文件名;应写 `pdf:mineru`,不要写 `*.pdf:mineru`。
+- 规则可以使用英文逗号 `,` 或分号 `;` 分隔。
+- 规则按从左到右的顺序检查;优先规则放在前面,通配符规则通常放在最后。
+- 引擎后缀 `-选项` 部分作为该规则匹配文件的默认 `process_options`。例如 `LIGHTRAG_PARSER=docx:native-iet` 表示所有 `.docx` 默认采用 `native` 引擎,并开启图像、表格、公式分析。
+
+### 2.3 单文件覆盖:文件名 hint
+
+文件名中可以使用中括号临时指定单个文件的处理方式:
+
+```text
+paper.[mineru-R].pdf
+slides.[docling].pptx
+memo.[native-P].docx
+notes.[-R].md
+```
+
+中括号内的内容支持三种形式:
+
+```text
+[ENGINE]              # 仅指定引擎,处理选项使用默认或 LIGHTRAG_PARSER 提供的默认
+[ENGINE-OPTIONS]      # 同时指定引擎和处理选项
+[-OPTIONS]            # 仅指定处理选项,引擎仍按 LIGHTRAG_PARSER / 默认规则解析
+```
+
+解析 hint 时,无横线内容必须整体匹配引擎名(`mineru` / `native` / `docling` / `legacy`);带横线且横线前有内容时,横线前是引擎、横线后是选项;以横线开头时表示仅指定选项。旧式 `[OPTIONS]` 写法不再合法,例如 `[iet]` 应改为 `[-iet]`。
+
+### 2.4 内容抽取引擎
+
+| 引擎 | 说明 | 支持的文件格式(后缀) |
+| --- | --- | --- |
+| `legacy` | 旧版提取方式,在加入流水线前集中提取内容 | `txt` `md` `mdx` `pdf` `docx` `pptx` `xlsx` `rtf` `odt` `tex` `epub` `html` `htm` `csv` `json` `xml` `yaml` `yml` `log` `conf` `ini` `properties` `sql` `bat` `sh` `c` `h` `cpp` `hpp` `py` `java` `js` `ts` `swift` `go` `rb` `php` `css` `scss` `less` |
+| `native` | 内置智能结构化内容抽取器 | `docx` |
+| `mineru` | 外部 MinerU 内容提取引擎 | `pdf` `doc` `docx` `ppt` `pptx` `xls` `xlsx` `png` `jpg` `jpeg` `jp2` `webp` `gif` `bmp` |
+| `docling` | 外部 Docling 内容提取引擎 | `pdf` `docx` `pptx` `xlsx` `md` `html` `xhtml` `png` `jpg` `jpeg` `tiff` `webp` `bmp` |
+
+`mineru` 和 `docling` 是外部内容提取引擎,启用相关规则前必须先把服务跑起来,再在 LightRAG 配置对应 endpoint/token。
+
+LightRAG 在本地会缓存 `mineru` 和 `docling` 引擎的解析结果。重复上传相同的文件通常不会重新调用引擎解析文档。如果需要删除解析缓存,必须在文档管理界面删除文件弹窗中点击“同时删除文件”选项。修改 `mineru` 和 `docling` 引擎的端点地址和有效提取参数也会导致缓存失效,下次上传相同文件的时候会重新调用引擎解析文件内容。
+
+#### MinerU 配置方法与本地部署
+
+MinerU 客户端支持两种模式,二选一:
+
+- `local`:自建 MinerU 服务(推荐用官方 Docker Compose 部署),LightRAG 通过 HTTP 调用本地容器。
+- `official`:直连 MinerU 官方精准 API v4,需要在 [mineru.net](https://mineru.net) 申请 token。
+
+**本地化部署(Docker Compose)**
+
+从 [opendatalab/MinerU](https://github.com/opendatalab/MinerU) 克隆官方仓库到本地,进入仓库内的 docker 部署目录后,先构建镜像:
+
+```bash
+docker compose -f compose.yaml build
+```
+
+然后启动 API 服务(带 `--profile api` 才会启用 HTTP API 容器,默认监听 8000 端口):
+
+```bash
+docker compose -f compose.yaml --profile api up -d
+```
+
+镜像构建细节、GPU 驱动准备、模型权重位置等请参考官方 README:<https://github.com/opendatalab/MinerU>。
+
+**LightRAG 侧 env 配置**
+
+Local 模式(自建 mineru-api):
+
+```bash
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+```
+
+Official 模式(MinerU 云端 API):
+
+```bash
+MINERU_API_MODE=official
+MINERU_API_TOKEN=<your_token>
+# MINERU_OFFICIAL_ENDPOINT=https://mineru.net   # 默认值,通常无需修改
+```
+
+其余高级开关(`MINERU_MODEL_VERSION`、`MINERU_LANGUAGE`、`MINERU_ENABLE_TABLE` / `MINERU_ENABLE_FORMULA`、`MINERU_PAGE_RANGES`、`MINERU_LOCAL_BACKEND` / `MINERU_LOCAL_PARSE_METHOD`、`MINERU_POLL_INTERVAL_SECONDS` / `MINERU_MAX_POLLS`、`MINERU_ENGINE_VERSION`、`LIGHTRAG_FORCE_REPARSE_MINERU` 等)请参考仓库根目录 `env.example` 模板的 MinerU 小节。需要特别注意 `MINERU_PAGE_RANGES` 在两种模式下语义不同:`official` 支持完整列表(如 `1-3,5,7-9`),`local` 仅支持单页(`3`)或简单范围(`1-10`),不接受逗号列表。
+
+#### Docling 配置方法
+
+`docling` 内容提取引擎需要外部的 [docling-serve](https://github.com/DS4SD/docling-serve) 服务(v1 异步 API)。最少配置:
+
+```bash
+DOCLING_ENDPOINT=http://localhost:5001
+```
+
+`DOCLING_ENDPOINT` 只填 base URL(**不**带 `/v1/convert/file/async`)。目前LightRAG固定使用 Docling 的 standard 流水线处理文件。用户可以通过以下环境环境变量来控制 Docling 流水线的行为:
+
+| Env | 默认 | 含义 |
+| --- | --- | --- |
+| `DOCLING_DO_OCR` | `true` | OCR 总开关 |
+| `DOCLING_FORCE_OCR` | `true` | 强制对每页 OCR(扫描件必须开,非扫描件开启通常也有助于提高版面识别质量) |
+| `DOCLING_OCR_ENGINE` | `auto` | OCR 引擎选择(不建议修改) |
+| `DOCLING_OCR_PRESET` | `auto` | OCR 引擎 preset(不建议修改) |
+| `DOCLING_OCR_LANG` | (空) | 按照OCR引擎要求设置(不建议修改) |
+| `DOCLING_DO_FORMULA_ENRICHMENT` | `false` | 是识别文档中的公式并按LaTex格式输出;启用前需要确保Docling后台下载了公式识别模型(见后面说明) |
+
+未配置 `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` 时等同于 `auto`;未配置 `DOCLING_OCR_LANG` 时不向 docling-serve 传递语言列表,由 OCR 引擎使用自身默认值。解析缓存按这些有效参数计算签名,因此“未配置”和“显式填写默认值”不会导致缓存失效。
+
+轮询预算 2 个 env(docling-serve 是 server-side long-poll,客户端不再额外 sleep):
+
+| Env | 默认 | 含义 |
+| --- | --- | --- |
+| `DOCLING_POLL_INTERVAL_SECONDS` | `5` | 等待解析结果的轮询间隔时间 |
+| `DOCLING_MAX_POLLS` | `240` | 最大轮询轮次,超过抛 `TimeoutError`;<br />默认等待时间 ≈ 5 x 240(约20 分钟) |
+
+Bundle 缓存 3 个 env:
+
+| Env | 默认 | 含义 |
+| --- | --- | --- |
+| `DOCLING_ENGINE_VERSION` | (空) | Docling引擎版本;版本变化会导致解析缓存失效 |
+| `LIGHTRAG_FORCE_REPARSE_DOCLING` | `false` | 设为 `true`/`1` 时不启用解析缓存 |
+| `DOCLING_BBOX_ATTRIBUTES` | `{"origin":"LEFTBOTTOM"}` | Docling 版面默认坐标系 |
+
+**`DOCLING_DO_FORMULA_ENRICHMENT` 启用前提**:docling-serve 侧需就绪 code-formula 模型权重。adapter 双轨兼容 —— 启用时 `text` 字段为 LaTeX,关闭或权重缺失导致 `text == orig` 时自动按普通文本处理,不写 `equations.json`。因此默认 `false` 是保守值,部署侧确认模型就绪后再开启。
+
+#### Docling本地部署(启用 LaTeX 公式识别)
+
+下面以 Docker 部署 docling-serve 为例,给出从镜像下载到模型挂载的完整步骤,部署完成后将 `DOCLING_DO_FORMULA_ENRICHMENT=true` 写入 LightRAG 的 `.env` 即可启用 LaTeX 公式识别。
+
+> **重要提示**:以下步骤基于显卡支持 CUDA 13 的环境。如果显卡较老旧、不支持 CUDA 13,需要把命令与 compose 文件中的镜像名 `docling-serve-cu130:main` 替换为对应 CUDA 版本的标签。可选镜像列表参见 [docling-serve Packages](https://github.com/orgs/docling-project/packages?repo_name=docling-serve)。
+
+**1. 下载镜像**
+
+```bash
+docker pull ghcr.io/docling-project/docling-serve-cu130:main
+```
+
+**2. 下载模型**
+
+```bash
+# 创建 docling 工作目录
+mkdir docling
+cd docling
+
+# 创建模型挂载目录
+mkdir models
+
+# 把容器内的原有模型拷贝到 models 目录
+docker run --rm -it \
+  -v "$(pwd)/models:/opt/app-root/src/models" \
+  ghcr.io/docling-project/docling-serve-cu130:main \
+  cp -r /opt/app-root/src/.cache/docling/models /opt/app-root/src/
+
+# 下载公式识别模型
+docker run --rm \
+  -v "$(pwd)/models:/opt/app-root/src/models" \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="/opt/app-root/src/models" \
+  ghcr.io/docling-project/docling-serve-cu130:main \
+  docling-tools models download-hf-repo docling-project/CodeFormulaV2 -o models
+```
+
+**3. 创建 `docker-compose.yaml` 文件**
+
+在上一步的 `docling` 目录下创建 `docker-compose.yaml`,内容如下:
+
+```yaml
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu130:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all"
+      DOCLING_SERVE_ARTIFACTS_PATH: "/opt/app-root/src/models"
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
+    volumes:
+      - ./models:/opt/app-root/src/models
+```
+
+随后在该目录执行 `docker compose up -d` 启动服务。容器就绪后,在 LightRAG 的 `.env` 中设置:
+
+```bash
+DOCLING_ENDPOINT=http://localhost:5001
+DOCLING_DO_FORMULA_ENRICHMENT=true
+```
+
+即可让 LightRAG 通过本地 docling-serve 识别文档中的公式并以 LaTeX 形式输出。
+
+### 2.5 文件处理选项
+
+处理选项控制单个文件在多模态分析、知识图谱构建和文本分块上的行为。所有选项都是可选的;缺省值见下表。同一文件最多指定一种分块方式(F/R/V/P),其它选项可任意组合。
+
+| 选项 | 类型 | 默认 | 含义 |
+| --- | --- | --- | --- |
+| `i` | 多模态 | 关闭 | 启用图像分析(VLM) |
+| `t` | 多模态 | 关闭 | 启用表格分析(VLM) |
+| `e` | 多模态 | 关闭 | 启用公式分析(VLM) |
+| `!` | 流水线 | 关闭 | 禁止实体/关系抽取,不构建知识图谱(仅保留 chunks 向量索引,naive / mix 检索仍可用) |
+| `F` | 分块 | 默认 | Fix/固定长度分块:遗留方法, 按固定Token长度或按分隔符机械分割(按分隔符分割时文本块不会出现重叠) |
+| `R` | 分块 | - | Recursive/递归字符分块(RecursiveCharacterTextSplitter@LangChain):接收一个分隔符列表(默认是 `["\n\n","\n","。","!","?",";",","," ",""]`,按从语义最强到最弱排列)。优先按段落(双换行符)切分;如果切出的块依然超过 Token 限制,逐级降级使用单换行符 → 中文句末标点(`。!?`)→ 中文句中标点(`;,`)→ 空格 → 逐字符切分。**默认 cascade 包含中文标点**,使中文 / 中英混合文档能在语义边界切分。英文 `.?!` 故意排除(字面量匹配会误切 `0.95` / `e.g.`)。 |
+| `V` | 分块 | - | Vector/向量语义分块(SemanticChunker@LangChain):首先按句子拆分文本(默认句子切分正则同时识别英文 `.?!` 与中文 `。?!`,使中文 / 中英混合文档能正确切句),计算相邻句子的 Embedding,然后根据指定的阈值策略(如百分位 percentile、标准差 standard_deviation 或四分位距 interquartile)寻找语义断层进行切分。`SemanticChunker` 本身没有 chunk size 上限——任何超过 `chunk_token_size` 的语义块在落库前会自动通过 R 二次切分(保留 V 的非重叠语义)。此分块策略不会出现文本块重叠的情况。 |
+| `P` | 分块 | - | Paragraph/段落语义分块(native);优先按标题分割,严格避免上一标题底部内容与下一个标题内容混合破坏语义。适合对能够准确识别标题且标题结构清晰的文档进行分块。同一标题下的超长正文 fallback 到 R 时允许按 `CHUNK_P_OVERLAP_SIZE` 保留重叠;相邻大表格之间的桥接文字也可按该预算重复进入前后表格块。此分块方法只能运用在保存在 sidecar 目录的 `lightrag` 内容。如果 `lightrag` 内容不存在,将退化为使用 `R` 方法进行文本分块。此分块方法出现文本块重叠的情况远少于 `R策略` 和 `F策略`。 |
+
+> 多模态全局开关 `addon_params["enable_multimodal_pipeline"]` 已废弃,相关行为统一由文件级 `i/t/e` 选项控制。详见[附录 A](#附录-a从旧版升级的注意事项)。
+
+#### 选项生效阶段
+
+处理选项的不同字符在流水线的不同阶段生效:
+
+| 选项 | 作用阶段 | 说明 |
+| :-: | --- | --- |
+| i/t/e | Analyzing多模态分析 | 决定是否对 sidecar 中的图像 / 表格 / 公式调用 VLM 做摘要分析。**抽取阶段不受影响**:内容提取引擎按文档实际内容输出 `drawings.json` / `tables.json` / `equations.json` sidecar 文件。这样后续仅修改 `i`/`t`/`e` 选项触发"再分析"即可补做 VLM,无须重新解析原始文件。 |
+| ! | Extraction实体关系抽取 | 跳过实体/关系抽取与图谱写入;chunks 仍写入向量库以保留 naive / mix 检索能力。 |
+| F/R/V/P | Chunking文本分块 | 决定使用哪种分块策略;对解析阶段输出无影响。 |
+
+> 模态可用性以"sidecar 文件是否存在"为唯一信号,内容提取引擎不需要在 meta 中声明能力。某文档若没有任何图像/表格/公式,对应 sidecar 不会写入;用户即使开启了 `i/t/e`,对应模态也只会被静默跳过,但 `analyze_multimodal` 会在该篇文档落一行 INFO 级日志(`[analyze_multimodal] sidecar e:equations empty: doc—id ...`),便于排查"VLM 为何没跑"。这种情况不会报错。
+
+### 2.6 校验、优先级与回退
+
+- 启动时会严格校验 `LIGHTRAG_PARSER`:未知内容提取引擎、错误后缀写法、显式使用不支持的后缀、外部引擎缺少 endpoint、处理选项中的非法字符都会导致启动失败。
+- **通配符规则匹配某后缀时**,引擎需通过两道可用性检查(见 `parser_routing._engine_is_usable`):(a) 该引擎能力表支持此后缀;(b) 若是外部引擎(`mineru` / `docling`),对应 endpoint/token 环境变量已配置。任一检查不过,本规则跳过,继续匹配下一条规则。例如 `*:mineru;html:docling` 中:MinerU 不支持 `html` 后缀(条件 a 不过),`html` 继续命中 `docling`;如果 `MINERU_API_MODE=local` 但未设置 `MINERU_LOCAL_ENDPOINT`,所有 PDF 也会跳过 `*:mineru` 落到下一条规则(条件 b 不过)。这一行为对 `LIGHTRAG_PARSER` 规则匹配和文件名 hint 引擎选择都生效。
+- 文件名 hint 的优先级高于 `LIGHTRAG_PARSER`。如果 hint 指定的引擎不支持该后缀,系统会回退到默认规则继续选择可用引擎。
+- 如果文件名 hint 提供了非空选项串,则以 hint 为准;否则使用 `LIGHTRAG_PARSER` 规则中匹配项的默认选项;都没有则使用全部默认。
+- 如果所有规则都不可用,文件内容提取方式会回退到 `legacy`;如果 `legacy` 也不支持对应的文件后缀,会向系统添加一个错误条目,上传文件保留在 `INPUT` 目录。
+- F/R/V/P至多出现一个;同一选项重复时只生效一次但不报错。
+- 大小写敏感:分块选项 F/R/V/P必须大写;其它选项 i/t/e小写。
+- 中括号内出现非法字符时,整个 hint 失效,引擎按默认规则解析,选项按 `LIGHTRAG_PARSER` 默认或全部默认;同时落日志 warning。
+- `P` 仅对 `native` 抽取出的 LightRAG Document 结构化结果有效;对 `legacy` 路径或非结构化输出会自动降级到 `R` 并记录 warning。
+
+## 三、分块器参数配置(chunk_options)
+
+### 3.1 process_options vs chunk_options 的职责
+
+`process_options` 选**用哪种**分块策略(F/R/V/P),`chunk_options` 决定那一路分块器**用哪些参数**。两者职责正交:前者是单字符 selector,后者是结构化字典。
+
+```
+env vars                                                  (启动期一次性读取)
+   │
+   ▼
+addon_params["chunker"]                                   (LightRAG 实例字段,由 env 与 legacy 兜底填入)
+   │
+   ▼  resolve_chunk_options(addon_params, split_by_character=…, split_by_character_only=…)
+   │
+full_docs[doc_id]["chunk_options"]                       (入队时冻结,每文件独立快照)
+   │
+   ▼
+chunker(tokenizer, content, chunk_token_size, **strategy_kwargs)   (分块时按 selector 派发)
+```
+
+- **env vars** 在 `LightRAG.__init__` 阶段(由 `default_chunker_config()` 读取 strategy 特定 env,再由 `_apply_chunk_size_overlay` 兜底 legacy env)灌进 `addon_params["chunker"]`。
+- **`addon_params["chunker"]`** 是 `ObservableAddonParams` 字段;Server 部署只需通过 env / 重启即可让新值生效。若需要在 Python 进程内运行时改它(不重启)以及 per-file 覆盖,请见[第八章 Python SDK 调用](#八python-sdk-调用)。
+- **`full_docs.chunk_options`** 在 `apipeline_enqueue_documents` 入队时冻结:默认由 `resolve_chunk_options(self.addon_params, ...)` 现场拼装;若调用方传入 `chunk_options` 参数则原样持久化(SDK 用法,见 §8.4)。
+- **分块器调用**从 `full_docs.chunk_options` 取对应子字典,按 `process_options.chunking` selector 派发到 F/R/V/P。
+
+### 3.2 环境变量
+
+下表所有变量在 `LightRAG` 实例化时一次性读入 `addon_params["chunker"]`:strategy 特定 env 由 `default_chunker_config()` 读取,legacy env (`CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`) 由 `_apply_chunk_size_overlay` 在 strategy env 与 legacy 构造字段都没填的槽位上兜底。修改 env 后需要重启服务(或新建 `LightRAG` 实例)才生效;已入队的文档持有冻结快照不受影响。
+
+| 变量 | 默认 | 类型 | 作用域 |
+|---|---|---|---|
+| `CHUNK_SIZE` | `1200` | int | legacy 顶层 `chunk_token_size` 兜底;优先级低于 strategy 特定 env 与 SDK 路径设置的 `addon_params["chunker"]["chunk_token_size"]` |
+| `CHUNK_OVERLAP_SIZE` | `100` | int | legacy overlap 兜底;当某 strategy 既无特定 env (`CHUNK_F_OVERLAP_SIZE` / `CHUNK_R_OVERLAP_SIZE` / `CHUNK_P_OVERLAP_SIZE`) 又无 SDK 路径的 `LightRAG(chunk_overlap_token_size=…)` 时填入 |
+| `CHUNK_F_SIZE` | 未设 | int | F strategy 特定 `chunk_token_size`;高于顶层 legacy 兜底(`CHUNK_SIZE` 与 SDK 路径的 `LightRAG(chunk_token_size=…)`)。未设时 F 沿用顶层解析结果 |
+| `CHUNK_F_OVERLAP_SIZE` | 未设 | int | F strategy 特定 overlap;高于 legacy 构造字段与 `CHUNK_OVERLAP_SIZE` |
+| `CHUNK_F_SPLIT_BY_CHARACTER` | (未设 = `null`) | str? | F 预切分隔符;`null` / 空串 = 仅按 token 窗 |
+| `CHUNK_F_SPLIT_BY_CHARACTER_ONLY` | `false` | bool | F 严格模式:不二次按 token 切,超长抛错 |
+| `CHUNK_R_SIZE` | 未设 | int | R strategy 特定 `chunk_token_size`;高于顶层 legacy 兜底(`CHUNK_SIZE` 与 SDK 路径的 `LightRAG(chunk_token_size=…)`)。未设时 R 沿用顶层解析结果 |
+| `CHUNK_R_OVERLAP_SIZE` | 未设 | int | R strategy 特定 overlap;高于 legacy 构造字段与 `CHUNK_OVERLAP_SIZE` |
+| `CHUNK_R_SEPARATORS` | `["\n\n","\n","。","!","?",";",","," ",""]` | JSON 数组字符串 | R 分隔符级联,按从语义最强到最弱排列。默认包含中文句末(`。!?`)和句中(`;,`)标点,使中文 / 中英混合文档能在语义边界切分。英文 `.?!` 故意排除(字面量匹配会误切数字与缩写) |
+| `CHUNK_V_SIZE` | 未设 | int | V strategy 特定 `chunk_token_size`(hard cap,超过时自动通过 R 二次切分);高于顶层 legacy 兜底。未设时 V 沿用顶层解析结果 |
+| `CHUNK_V_BREAKPOINT_THRESHOLD_TYPE` | `percentile` | str | V 阈值类型;可选 `percentile` / `standard_deviation` / `interquartile` / `gradient` |
+| `CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT` | (未设 = `null`) | float? | V 阈值大小;`null` 让 LangChain 按类型自选默认(如 percentile=95) |
+| `CHUNK_V_BUFFER_SIZE` | `1` | int | V 句子缓冲窗,距离计算时合并的相邻句数 |
+| `CHUNK_V_SENTENCE_SPLIT_REGEX` | `(?<=[.?!])\s+\|(?<=[。?!])` | str | V 的句子切分正则,喂给 LangChain `SemanticChunker`。默认同时识别英文 `.?!`(要求后接空白,避免误切 `0.95`)和中文 `。?!`(不要求空白,适应中文连写)。env 值为原始正则字符串,无需 JSON 引号 |
+| `CHUNK_P_SIZE` | `2000`(`DEFAULT_CHUNK_P_SIZE`) | int | P strategy 特定 `chunk_token_size`。与 R/V 不同,未设时 P **不**沿用顶层 `CHUNK_SIZE` / `LightRAG(chunk_token_size=…)`——段落语义合并需要比全局默认更大的上限才能将相关段落保留在一起,因此槽位始终携带 `DEFAULT_CHUNK_P_SIZE`(2000) |
+| `CHUNK_P_OVERLAP_SIZE` | 未设 | int | P strategy 特定 overlap;高于 legacy 构造字段与 `CHUNK_OVERLAP_SIZE`。用于同一 JSONL content 行内长正文 fallback 到 R 时的文本重叠,以及相邻大表格之间桥接文字复制到前后表格块的单侧预算 |
+
+P 的内部比例常量是算法刻度,会随 `chunk_token_size` 自动按比例推导。P 始终使用独立于全局链的 `chunk_token_size`——即使 `CHUNK_P_SIZE` 未设,P 也会回退到 `DEFAULT_CHUNK_P_SIZE`(2000)而**不**沿用全局 `CHUNK_SIZE`,因为段落语义合并需要比全局默认更大的上限才能将相关段落保留在一起。需要按部署调整时通过 `CHUNK_P_SIZE` 覆盖该默认。`CHUNK_P_OVERLAP_SIZE` 只影响 P 内部普通文本 fallback 与表格桥接上下文,不会让表格行级切片互相重叠。`CHUNK_F_SIZE` / `CHUNK_R_SIZE` / `CHUNK_V_SIZE` 行为不同——未设时**仍会**沿用顶层 `chunk_token_size`(F 即默认全局窗口,R 偏向较小目标利于句段切分,V 作为 advisory ceiling 通常希望放大以减少过度拆分)。
+
+### 3.3 优先级链
+
+每个分块槽位的最终值按 specificity-ordered 链解析(高 → 低):
+
+1. **`addon_params["chunker"]` 显式值** —— 通过 SDK 路径运行时设置或在构造时显式写入的字段值(见 §8.3)。Server-only 部署通常不会出现这一档。最直接,赢一切。
+2. **strategy 特定 env** —— 如 `CHUNK_F_SIZE` / `CHUNK_R_SIZE` / `CHUNK_V_SIZE`(各策略 `chunk_token_size`)、`CHUNK_F_OVERLAP_SIZE` / `CHUNK_R_OVERLAP_SIZE` / `CHUNK_P_OVERLAP_SIZE`(overlap)、`CHUNK_P_SIZE`(P 专属)。未设对应 size env 时,F/R/V 沿用顶层 `chunk_token_size`。仅当槽位未被 ① 显式占用时填入。
+3. **legacy 构造字段** —— `LightRAG(chunk_token_size=…, chunk_overlap_token_size=…)`,仅 SDK 路径生效,详见 §8.2。strategy 无关,"粗粒度缺省",只填仍空的槽位。
+4. **legacy env** —— `CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`。最终回退。
+
+举例:`CHUNK_R_OVERLAP_SIZE=42` + `LightRAG(chunk_overlap_token_size=2)` → R 子字典 `chunk_overlap_token_size=42`(strategy env 胜出),F / P 子字典 `chunk_overlap_token_size=2`(无 F / P 特定 env,legacy 构造字段填入)。
+
+**P 的 `chunk_token_size` 特例**:P 的 `chunk_token_size` 槽位**不**走完整的四档链。当 ① 未显式提供时,直接按 `CHUNK_P_SIZE` env > `DEFAULT_CHUNK_P_SIZE`(2000)解析,**跳过** ③ legacy 构造字段 `LightRAG(chunk_token_size=…)` 与 ④ legacy env `CHUNK_SIZE`。理由参见 §3.2 `CHUNK_P_SIZE` 行。
+
+三层语义保证:
+
+1. **复现性**:env 改了,重启后老文档仍按入队那一刻的快照分块,结果不变。
+2. **续跑一致性**:续跑分支 B(内容已抽取,按当前 `process_options` 重做分块)读的也是 `full_docs.chunk_options`,避免 env 漂移破坏一致性。
+3. **per-file 个性化**:调用方可以为每个文件传不同的 `chunk_options`(典型用法:管理 UI 单独配置某个文件的 separators 或 V 阈值)。这是 SDK 路径的入参语义,详见 §8.4。
+
+### 3.4 字段结构
+
+`addon_params["chunker"]`(实例字段)保留全部四种策略的子字典作为运行时基线;`full_docs[doc_id]["chunk_options"]` 是**精简快照**——入队时只保留 `process_options` 选中的那一路策略子字典(缺省 F),其它策略的参数会被丢弃,因为处理阶段不会读它们。重新解析时 `process_options` 与 `chunk_options` 一同改写,避免旧策略的参数残留。
+
+**`addon_params["chunker"]` 全量基线**(运行时可由 SDK 修改,影响后续入队):
+
+```jsonc
+{
+  "chunk_token_size": 1200,                                   // 通用 token 上限
+  "fixed_token": {                                            // F 专属
+    "chunk_token_size": 1200,                                 // 可选;不写沿用顶层 chunk_token_size(可由 CHUNK_F_SIZE 种子化)
+    "chunk_overlap_token_size": 100,
+    "split_by_character": null,
+    "split_by_character_only": false
+  },
+  "recursive_character": {                                    // R 专属
+    "chunk_token_size": 1200,                                 // 可选;不写沿用顶层 chunk_token_size
+    "chunk_overlap_token_size": 100,
+    "separators": ["\n\n", "\n", "。", "!", "?", ";", ",", " ", ""]   // 默认 cascade 含中文标点
+  },
+  "semantic_vector": {                                        // V 专属
+    "chunk_token_size": 1200,                                 // 可选 hard cap;超过时通过 R 二次切分
+    "breakpoint_threshold_type": "percentile",                // percentile | standard_deviation | interquartile | gradient
+    "breakpoint_threshold_amount": null,                      // null = LangChain 默认
+    "buffer_size": 1,
+    "sentence_split_regex": "(?<=[.?!])\\s+|(?<=[。?!])"      // 默认正则兼容中英文句末标点
+  },
+  "paragraph_semantic": {                                     // P 专属
+    "chunk_token_size": 2000,                                 // 不写则按 CHUNK_P_SIZE 或 DEFAULT_CHUNK_P_SIZE(2000)解析;
+                                                              // **不**继承通用 chunk_token_size
+    "chunk_overlap_token_size": 100                           // 不写沿用 legacy overlap 解析链
+  }
+}
+```
+
+**`full_docs[doc_id]["chunk_options"]` 精简快照**(按 selector 投影;下例为 `process_options="R"`):
+
+```jsonc
+{
+  "chunk_token_size": 1200,                                   // 通用 token 上限(保留为顶层 fallback)
+  "recursive_character": {                                    // 唯一保留的策略子字典
+    "chunk_overlap_token_size": 100,
+    "separators": ["\n\n", "\n", "。", "!", "?", ";", ",", " ", ""]
+  }
+}
+```
+
+selector → 子字典映射:F → `fixed_token`,R → `recursive_character`,V → `semantic_vector`,P → `paragraph_semantic`;无 selector 默认 F。各子字典与对应分块器函数的 keyword-only 参数一一对应;新增参数时无需改 dispatcher,只在 chunker 函数添加 kwarg 即可。
+
+### 3.5 缺失兼容
+
+老文档入队时还没有 `chunk_options` 字段;分块时 dispatcher 会按当前 `process_options` 调用 `resolve_chunk_options(self.addon_params, process_options=…)` 兜底拼装一份精简快照。建议在升级后通过 reprocess 一次让老文档拿到精简的 `chunk_options` 快照(且与当前 `process_options` 对齐)。
+
+## 四、存储与目录布局
+
+### 4.1 `full_docs` 字段
+
+文件入队和抽取结果会写入 `full_docs`:
+
+| 字段 | 说明 |
+| --- | --- |
+| `file_path` | 文件名 basename(不含目录),**保留用户提供的原始名(含中括号 hint)**,例如 `abc.[native-iet].docx` 原样写入。未提供有效来源时保存为 `unknown_source`。文件名 hint 不会被剥离,方便管理 UI 直接展示用户原本的命名意图。 |
+| `canonical_basename` | 去掉处理提示 hint 后的规范化 basename(例如 `abc.docx`)。文件名查重以此字段为索引 key,保证 `abc.docx` 与 `abc.[native-iet].docx` 视为同一逻辑文档。 |
+| `source_path` | 入队时提供的原始路径(仅当含目录分隔符或绝对路径时才写入),供 `native` / `mineru` / `docling` 解析器定位真实文件位置。 |
+| `parse_format` | 内容格式:`pending_parse`, `raw`, `lightrag`。 |
+| `content` | `raw` 时保存抽取文本;`pending_parse` 时为空字符串;`lightrag` 时存储以 `{{LRdoc}}` 开头的**完整合并文本**(拼接 `.blocks.jsonl` 中所有 `type=="content"` 行的 body 段),分块阶段 `parse_native` 会剥离前缀后再交给 chunking_func,与 `raw` 走完全相同的代码路径。 |
+| `content_hash` | 内容 MD5,用于跨文件名查重。`parse_format=raw` 取 `sanitize_text_for_encoding` 后文本的 hash;`parse_format=lightrag` 取 `*.blocks.jsonl` 文件 hash;`parse_format=pending_parse` 不写入,待抽取完成后补上。 |
+| `lightrag_document_path` | `parse_format=lightrag` 时保存结构化 LightRAG Document 的路径;新记录优先保存为相对 `INPUT_DIR` 的路径,例如 `__parsed__/report.docx.parsed/report.blocks.jsonl`。注意路径中的子目录与 blocks 文件名都使用规范化 basename(不含 hint)。 |
+| `parse_engine` | 实际完成抽取的引擎:`legacy`, `native`, `mineru`, `docling`。对于待抽取文件,也可暂存目标引擎。 |
+| `process_options` | 入队时记录的原始处理选项串(不含引擎名和分隔 `-`),例如 `"iet"`、`"R!"`、`""`。下游各阶段以此字段为权威源,决定是否启用图像/表格/公式分析(`i/t/e`)、是否禁止知识图谱构建(`!`)以及分块方式(`F/R/V/P`)。空字符串等价于全部默认值。 |
+| `chunk_options` | 入队时**冻结**的分块器参数快照(精简字典:只保留 `process_options` 选中的那一路策略子字典,其它策略丢弃)。由 SDK 路径调用方传入或由 `resolve_chunk_options(self.addon_params, process_options=…)` 从实例字段(含 env 默认)兜底(见 §3.1)。`process_options` 选哪种分块策略(F/R/V/P),`chunk_options` 决定那一路分块器使用哪些参数。下游 `process_single_document` 在分块前从此字段读取专属 kwargs;持久化保证 env 变化、续跑、重启后老文档行为可复现。重新解析时与 `process_options` 一同改写。 |
+
+`pending_parse` 表示文件已经入队,但还没有完成抽取。抽取成功后会改写为 `raw` 或 `lightrag`,并补齐 `content_hash`。抽取失败时保留 `pending_parse` 和空 `content`,便于后续排查和重试。
+
+> `doc_status` 中也同步保存原始 `file_path`(含 hint)、`canonical_basename` 与 `content_hash`,作为 `get_doc_by_file_basename` / `get_doc_by_content_hash` 的查重索引来源。`get_doc_by_file_basename` 内部把传入参数先经 `canonicalize_parser_hinted_basename` 规范化后再与 `canonical_basename` 比对,因此 `abc.docx` 与 `abc.[native-iet].docx` 总是命中同一文档。
+> `process_options` 同时镜像写入 `doc_status.metadata["process_options"]`,便于管理 UI 直接展示当前文件的处理策略。
+
+### 4.2 `__parsed__` 目录结构
+
+`__parsed__` 是输入目录旁的归档与分析结果目录。它同时保存已经处理过的原始文档,以及结构化解析产生的 LightRAG Document (lightrag格式)的文件和图片等资源。
+
+- 原始文件归档:`legacy` 本地抽取成功并入队后,原文件会移动到同级 `__parsed__` 目录;`native` / `mineru` / `docling` 会先保留原文件供 pipeline 解析,解析成功并写入 `full_docs` 后再移动到 `__parsed__`。**归档时保留原始文件名(含 `[hint]`)**,例如 `report.[native-iet].docx` 归档为 `__parsed__/report.[native-iet].docx`,便于追溯用户最初的命名与处理选项。
+- 分析结果目录:结构化解析结果会写入以**规范化文件名**(去掉 `[hint]`)加 `.parsed` 后缀命名的子目录,避免与归档原文件同名冲突,并保证当文件名 hint 或处理选项变化时同一逻辑文档继续指向同一目录。例如 `report.docx`、`report.[native].docx`、`report.[native-iet].docx` 的分析结果都写入 `__parsed__/report.docx.parsed/`。
+- 分析结果文件:LightRAG Document blocks 文件以及 sidecar 都使用规范化文件名的主干命名,例如 `__parsed__/report.docx.parsed/report.blocks.jsonl`;同一目录下还可能包含 `report.tables.json`、`report.drawings.json`、`report.equations.json` 和 `report.blocks.assets/` 图片资源目录。**sidecar 是否生成由文档内容决定**:解析器只在文档实际包含表格/图片/公式时写出对应文件。这是模态可用性的唯一信号 —— 引擎不需要在 meta 中声明能力。`i`/`t`/`e` 选项只决定下一阶段是否对已存在的 sidecar 调用 VLM 做摘要分析。
+- 解析失败时,原文件不会移动,便于修复配置后重新处理。
+- `/documents/scan` 扫描到同名且已 `PROCESSED` 的文件时,该输入文件会被视为已处理并移动到 `__parsed__`,不会作为新文档入队。
+- `/documents/scan` 同一次扫描中发现多个规范化后同名的文件时,会优先保留带支持引擎 hint 的文件以尊重用户的引擎选择;如果没有任何变体带 hint,则按排序处理第一个文件。其余变体会输出 warning 并移动到 `__parsed__`,避免同批文件互相覆盖。例如 `abc.docx` 和 `abc.[native].docx` 同时存在时只会处理 `abc.[native].docx`。
+- 扫描或解析过程中发现内容 hash 重复时,该输入文件同样会移动到 `__parsed__`;本次 `doc_status` 保留为 `FAILED duplicate` 以便追踪。
+- 移动文件只作用于当前输入文件,不会覆盖或移动既有文档源文件。若目标目录已存在同名文件,系统会自动追加 `_001`、`_002` 等编号,例如 `report.pdf` 会依次归档为 `report_001.pdf`、`report_002.pdf`。若分析结果目录名已被普通文件占用,也会追加编号,例如 `report.docx.parsed_001/`。
+
+### 4.3 MinerU 原始产物目录 `<base>.mineru_raw/`
+
+`mineru` 引擎在解析过程中会把 MinerU 服务返回的完整产物(`content_list.json` + 可选的 `full.md` / `middle.json` / `layout.pdf` / `images/` 等)落到 `__parsed__/<规范文件名>.mineru_raw/` 目录下,并写入 `_manifest.json` 作为完整性校验文件。
+
+设计目的:
+
+- **避免重复上传**。再次解析同一文件时,先用源文件的内容 hash + 文件大小校验 `_manifest.json`,命中即跳过 MinerU 服务调用,直接从本地 `content_list.json` 走 adapter → SidecarWriter 流程。
+- **保留诊断信息**。MinerU 解析出错或者下游 sidecar 字段异常时,可以直接到 `*.mineru_raw/` 比对原始 content_list 与图片资源。
+- **支持对象溯源**。MinerU 生成的 `drawings.json` / `tables.json` / `equations.json` 会在 `self_ref` 中保存 `content_list.json#/N`,用于回查对应的 MinerU 原始对象及其 `page_idx` / `bbox` 等定位信息。
+- **上传文件名去 hint**。源文件名包含 `[mineru-...]` / `[-iet]` 等处理 hint 时,调用 MinerU API 使用去 hint 后的规范文件名,避免 MinerU 返回的 raw bundle 内部文件名携带 hint。
+
+生命周期:
+
+| 操作 | 行为 |
+|---|---|
+| 首次解析 | 下载所有产物 → 原子写入 `_manifest.json`。 |
+| 重复解析(cache 命中) | 不调用 MinerU 服务;不重写产物;走 adapter+Writer 重生成 sidecar(适用于 adapter 升级场景)。 |
+| 重复解析(cache miss) | 清空目录内所有文件后重新下载并写入 manifest。 |
+| `DELETE /documents` 且 `delete_file=True` | `*.parsed/` 与 `*.mineru_raw/` 与原始文件一并删除。 |
+| `DELETE /documents` 且 `delete_file=False` | 保留所有产物,仅删 doc_status 与 KG 数据。 |
+| `clear_documents` / `__parsed__` 整体清理 | 自然一并清除。 |
+| scan 周期 | 不主动 GC 孤儿 `*.mineru_raw/`(用户显式删除时才清,避免误删调试现场)。 |
+
+强制重新解析(绕过 cache):设置 `LIGHTRAG_FORCE_REPARSE_MINERU=true`。
+
+并发安全:LightRAG 强制要求同一 workspace 下 `canonical_basename` 唯一(上传/入队时返回 HTTP 409),加上流水线对单个文档的串行化处理,因此 `*.mineru_raw/` 不会出现并发写入冲突,无需额外锁。
+
+`_manifest.json` 失效条件(任一触发即 cache miss):
+
+- 源文件大小或 sha256 与 manifest 记录不符;
+- `MINERU_ENGINE_VERSION` 环境变量与 manifest 记录的 `engine_version` 都非空且不一致;
+- 当前 `MINERU_API_MODE` 与 manifest 记录的 `api_mode` 都非空且不一致;
+- 当前 mode 对应 endpoint(`MINERU_OFFICIAL_ENDPOINT` / `MINERU_LOCAL_ENDPOINT`)与 manifest 记录的 `endpoint_signature` 都非空且不一致;
+- `content_list.json` 大小或 sha256 与 manifest 不符;
+- 任一记录的非关键文件(图片、`middle.json` 等)大小与 manifest 不符。
+
+> 关于 `engine_version` / `endpoint_signature` 的"任一侧为空即跳过"语义:当 manifest 写入时该字段为空(例如首次解析时未配置 `MINERU_ENGINE_VERSION`),或当前环境变量未设置时,该项不参与失效判断。如果首次解析时未设置版本环境变量,事后再补上并不会自动让历史缓存失效——这类场景需要手动设置 `LIGHTRAG_FORCE_REPARSE_MINERU=true` 触发重新解析。
+
+### 4.4 Docling 原始产物目录 `<base>.docling_raw/`
+
+`docling` 引擎在解析过程中会把 docling-serve 返回的 zip 产物(DoclingDocument JSON、Markdown 和引用图片)解压到 `__parsed__/<规范文件名>.docling_raw/` 目录下,并写入 `_manifest.json` 作为完整性校验文件。IR builder 在二次解析时会读取该目录的 `.json` 文件喂给 `DoclingIRBuilder`,不再走 docling-serve 服务。
+
+目录布局:
+
+```text
+__parsed__/<base>.docling_raw/
+├── _manifest.json
+├── <base>.json        # DoclingDocument JSON(含 pages[].image base64)
+├── <base>.md          # Markdown 形态,供人工检查
+└── artifacts/
+    └── image_*.png    # pictures[*].image.uri 指向的图片资源
+```
+
+设计目的:
+
+- **避免重复上传/转换**。再次解析同一文件时,先用源文件 hash + 文件大小校验 `_manifest.json`,命中即跳过对 docling-serve 的上传 / 轮询 / 下载,直接从本地 `.json` 走 DoclingIRBuilder → SidecarWriter 流程。
+- **保留诊断信息**。docling-serve 解析出错或下游 sidecar 字段异常时,可以直接到 `*.docling_raw/` 比对原始 DoclingDocument JSON、Markdown 与 `artifacts/` 图片。
+
+生命周期:
+
+| 操作 | 行为 |
+|---|---|
+| 首次解析 | `POST /v1/convert/file/async` 上传 → 长轮询 `/v1/status/poll/{task_id}?wait=N` → `GET /v1/result/{task_id}` 下载 zip → 安全解压(拒绝绝对路径与 `..`)→ 原子写入 `_manifest.json`。 |
+| 重复解析(cache 命中) | 不调用 docling-serve;不重写产物;走 adapter+Writer 重生成 sidecar(适用于 adapter 升级场景)。 |
+| 重复解析(cache miss) | 清空目录内所有文件后重新上传 / 下载 / 写入 manifest。 |
+| `DELETE /documents` 且 `delete_file=True` | `*.parsed/` 与 `*.docling_raw/` 与原始文件一并删除。 |
+| `DELETE /documents` 且 `delete_file=False` | 保留所有产物,仅删 doc_status 与 KG 数据。 |
+| `clear_documents` / `__parsed__` 整体清理 | 自然一并清除。 |
+| scan 周期 | 不主动 GC 孤儿 `*.docling_raw/`(用户显式删除时才清,避免误删调试现场)。 |
+
+强制重新解析(绕过 cache):设置 `LIGHTRAG_FORCE_REPARSE_DOCLING=true`。
+
+并发安全:与 MinerU 路径一致 —— LightRAG 强制要求同一 workspace 下 `canonical_basename` 唯一(上传 / 入队时返回 HTTP 409),加上流水线对单个文档的串行化处理,因此 `*.docling_raw/` 不会出现并发写入冲突,无需额外锁。
+
+`_manifest.json` 失效条件(任一触发即 cache miss):
+
+- 源文件大小或 sha256 与 manifest 记录不符;
+- `DOCLING_ENDPOINT` 与 manifest 记录的 `endpoint_signature` 不一致;
+- `DOCLING_ENGINE_VERSION` 设置且与 manifest 记录的 `engine_version` 不一致;
+- `options_signature` 不一致 —— 任一 OCR / 公式 / pipeline 字段变化都会触发,覆盖范围包括:
+  - 可调 env:`DOCLING_DO_OCR` / `DOCLING_FORCE_OCR` / `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` / `DOCLING_OCR_LANG` / `DOCLING_DO_FORMULA_ENRICHMENT`;
+  - 固化常量:`pipeline` / `target_type` / `to_formats` / `image_export_mode`(写入 signature 是为了防止未来值变更后老 bundle 被误复用);
+- 主 JSON 缺失、大小或 sha256 不一致;
+- `artifacts/` 内任一图片缺失或大小不一致;
+- `LIGHTRAG_FORCE_REPARSE_DOCLING=true`。
+
+> `engine_version` / `endpoint_signature` 的"任一侧为空即跳过"语义与 MinerU §4.3 一致:manifest 写入时该字段为空(首次未配置 `DOCLING_ENGINE_VERSION`)或当前环境变量未设置时,该项不参与失效判断;事后补上版本号不会自动让历史缓存失效,需要 `LIGHTRAG_FORCE_REPARSE_DOCLING=true` 触发。
+
+## 五、文档重复判定规则
+
+文件上传、文件解析入队和文本接口会按照「文件名 + 内容 hash」两道关卡判断是否重复,命中任一即视为重复并写入一条 `FAILED` 记录,不会覆盖已有的 `full_docs`。`/documents/scan` 目录扫描也使用同一套索引,但为了便于自动重试未完成文件,对文件名重复有单独的归档与重处理规则。
+
+### 5.1 文件名(basename)查重
+
+- 判断粒度为 basename,不包含目录路径和 workspace 路径。例如 `/data/a.pdf`、`inputs/a.pdf` 和 `a.pdf` 都视为同一个文件名 `a.pdf`。
+- 文件名查重以 `canonical_basename` 为索引:将文件名末尾的支持引擎处理提示 hint 剥离后再比对,因此 `abc.docx`、`abc.[native].docx`、`abc.[native-iet].docx` 之间互相视为同名;不支持的 hint 不会被剥离,例如 `abc.[draft].docx` 仍按原文件名处理。
+- 对普通上传、文本接口和核心入队 API,只要 `doc_status` 中已经存在同名文件记录,无论该记录当前处于 `PENDING`、`PARSING`、`ANALYZING`、`PROCESSING`、`FAILED` 还是 `PROCESSED`,同名文件都会被视为重复。
+- 对 `/documents/scan` 目录扫描:
+  - 同一次扫描中如果有多个文件规范化后同名,优先处理带支持引擎 hint 的文件;若无任何 hint 变体,则处理排序后的第一个文件,其余文件会归档到 `__parsed__` 并跳过。
+  - 如果同名记录已经是 `PROCESSED`,当前扫描到的文件视为已处理文件,系统会输出 warning,将该输入文件移动到同级 `__parsed__` 目录,并跳过入队。
+  - 如果同名记录不是 `PROCESSED`,扫描文件**不**仅因文件名相同而跳过,但**也不**会重新提取/覆盖既有记录。具体路径取决于既有记录的形态(与下文"为什么 scan 仍是独占写者"一节列举的分类规则一致):
+    - 同名非 PROCESSED 且 `full_docs` 存在 → **resume 路径**:doc_status 现状保留,源文件留在 `INPUT/`,由处理循环按状态查询接走(不重新提取、不覆盖既有状态)。
+    - 同名 `FAILED` 且 `full_docs` 缺失 → 视为 `apipeline_enqueue_error_documents` 写下的提取错误 stub:scan 删掉这条 stub 后**把当前文件按新文件重新入队**。这是唯一会重新提取的子分支,目的是让"修好源文件再 scan 一次"自动生效。
+- 普通上传和核心入队 API 中,同名文件即使内容已经变化,也需要先删除旧文档记录后再重新上传或入队;扫描路径上述两种自动恢复仅用于目录扫描场景。
+- 文本接口必须提供有效的 `file_source`,并按 `file_source` 的 basename 判断重复;缺少有效 `file_source` 时直接返回 400。
+- SDK 路径调用 `insert` / `ainsert` / `apipeline_enqueue_documents` 时不传 `file_paths` 是被允许的,相关行为详见 §8.4。这类无来源文档的 `file_path` 保存为 `unknown_source`。
+- 空字符串、`no-file-path` 和 `unknown_source` 都会被视为未知来源;它们不会阻止新的无来源文本入队,也不会作为同名文件互相去重。
+
+存储后端通过 `get_doc_by_file_basename` 提供 basename 直查能力,内部按 `canonical_basename` 字段比对(传入参数会先经 `canonicalize_parser_hinted_basename` 规范化)。`JsonDocStatusStorage` 已经实现了内存级遍历;其它后端目前回落到默认实现(扫描全部状态后比对 `canonical_basename`),将在后续 PR 中补齐原生索引。
+
+### 5.2 内容 hash 查重
+
+- 文件名不同但抽取后的内容完全相同的文档同样视为重复。这里的 hash 是按配置的抽取引擎得到最终文本或 LightRAG Document 后计算的内容 hash,不是原始文件字节 hash。
+- `full_docs` 与 `doc_status` 会按内容格式写入或补齐 `content_hash` 字段:
+  - `parse_format=raw`:取经过 `sanitize_text_for_encoding` 之后的文本 MD5。
+  - `parse_format=lightrag`:取 `lightrag_document_path` 解析出的 `*.blocks.jsonl` 文件 MD5。相对路径按 `INPUT_DIR` 解析。
+  - `parse_format=pending_parse`:暂不写入 hash,等到真正完成解析后由后续步骤补上(避免按空内容误判)。
+- `legacy` 路径会在本地提取文本后、入队时进行内容 hash 查重;命中重复时,本次记录写为 `FAILED duplicate`,不会生成新的 `full_docs`、chunks 或图数据。
+- `native` / `mineru` / `docling` 路径会先以 `pending_parse` 入队;真正完成解析并补齐 `content_hash` 后,如果发现其它文档已有相同 hash,本次记录会在进入分析、切块、实体抽取和图写入前停止。
+- 重复记录会在 `metadata.duplicate_kind` 中标记为 `filename` 或 `content_hash`,便于排查。内容 hash 重复还会记录 `metadata.is_duplicate=true`、`metadata.original_doc_id` 和 `metadata.original_track_id`;解析后才发现的重复会删除本次临时写入的 `full_docs`。
+- 相关 warning 会尽量减少重复噪音:扫描发现已 `PROCESSED` 的同名文件时会写入日志和 pipeline status;入队阶段重复使用 LightRAG 层的 `Duplicate document detected (...)` 日志;解析完成后才发现的内容重复使用 `Duplicate content skipped after parsing`,并写入 pipeline status。扫描归档不会额外输出 `[File Extraction]Duplicate skipped`。
+- 存储后端通过 `get_doc_by_content_hash` 进行 hash 直查;命名约定与 `get_doc_by_file_basename` 一致。
+
+> 入队批次内(同一次 `apipeline_enqueue_documents` 调用)也会做 basename 与 content_hash 去重,命中时把后续条目直接写为 `FAILED` 并标记 `existing_status=batch_duplicate`。其中 basename 去重只对有效文件名生效;`unknown_source`、`no-file-path` 和空来源只参与内容 hash 去重。
+>
+> **跨调用并发去重**也由 workspace 级串行锁保证(详见 [§6.7 enqueue 串行锁(防并发去重穿透)](#67-enqueue-串行锁防并发去重穿透)):两次相同内容、不同文件名的并发入队不会双双穿透 `content_hash` 检查。
+
+## 六、流水线并发与重入约束
+
+为防止 `scan` / `upload` / `insert` 与运行中的流水线相互覆盖 `doc_status` / `full_docs` 记录,所有写入入口在 `pipeline_status` 共享字典上协调。同一 workspace 下的 `pipeline_status_lock` 保证下表所有 transition 都在锁内原子完成。
+
+### 6.1 `pipeline_status` 字段
+
+| 字段 | 语义 |
+| --- | --- |
+| `busy` | 流水线繁忙的笼统标志。处理循环和破坏性作业(clear/delete)都会设它。**仅有 `busy=True`(处理循环)不阻塞 enqueue**——循环按 batch 拉取 `doc_status` 快照处理,每批结束后通过 `request_pending` 检查是否还有新工作。 |
+| `destructive_busy` | `busy` 的破坏性子集:`/documents/clear` 或 `/documents/{doc_id}`(删除)正在 drop 存储 / 删源文件。reservation 和 enqueue last-line guard 都会拒绝——并发 enqueue 会写入正被 drop 的存储,已接受的文档会静默丢失。处理循环不会设此字段。 |
+| `scanning` | `/documents/scan` 后台任务运行中(整个生命周期:分类阶段 + 处理阶段)。仅 `/scan` 端点用它拒绝重叠 scan,本身**不**阻塞 upload/insert。 |
+| `scanning_exclusive` | `scanning` 的独占子集:只在 scan 的**分类阶段**为 True——run_scanning_process 在读 doc_status 分类(已处理 / 续跑 / 删 stub / 归档),不能与并发写者交错。reservation 和 enqueue last-line guard 都会拒绝。分类完成后会立即清旗,scan 进入处理阶段后允许并发 upload。 |
+| `pending_enqueues` | 已通过 `_reserve_enqueue_slot` 但 bg task 未完成的 upload/insert 数。仅给 scan 端点参考——决定是否能拿独占。bg task 在 `finally` 里释放 slot。 |
+| `request_pending` | 让运行中的处理循环再扫一轮的信号。enqueue 在 `busy=True` 时写完 `doc_status` 后置位;处理循环每个 batch 结束后检查并重新拉快照。 |
+
+### 6.2 入口行为
+
+| 入口 | 条件 | 行为 |
+| --- | --- | --- |
+| `/documents/upload` / `/documents/text` / `/documents/texts` | `scanning_exclusive=True` 或 `destructive_busy=True` | 抛 HTTP 409,不写文件、不调入队 |
+| 同上 | 否则(含纯 `busy=True`、scan 处理阶段 `scanning=True` 但 `scanning_exclusive=False`) | 锁内 `pending_enqueues++` 预留 slot → 严格名字预检 → 保存文件 → schedule bg task;bg task 在 `finally` 释放 slot |
+| `/documents/scan` | `busy=True` 或 `scanning=True` 或 `pending_enqueues>0` | 落 warning 后立即返回 `scanning_skipped_pipeline_busy`,不 schedule 后台任务 |
+| 同上 | 全部 idle | 锁内设 `scanning=True` 后 schedule,task 结束在 `finally` 清旗 |
+| `/documents/clear` / `/documents/delete_document` | `busy=True` 或 `scanning=True` 或 `pending_enqueues>0` | 端点同步返回 `status="busy"`,不 schedule 后台任务 |
+| 同上 | 全部 idle | 端点**同步**在锁内设 `busy=True` + `destructive_busy=True`(`delete_document` 在返回 `deletion_started` 之前),bg task 的 finally 一并清旗 |
+| `apipeline_enqueue_documents` 内部 (last-line guard) | `scanning_exclusive=True` 且 `from_scan=False`,或 `destructive_busy=True` | 抛 `RuntimeError("Cannot enqueue while scan is classifying / clearing or deleting")` |
+| 同上 | 任何其它情况(含纯 `busy=True`、scan 处理阶段) | 正常入队;写完 `doc_status` 后若 `busy=True` 自动 nudge `request_pending=True` |
+
+`from_scan=True` 是 scan 后台任务自身入队时的旁路:scan 已持有 `scanning` 旗标,必须允许它把扫到的文件入队。
+
+### 6.3 为什么 `busy` 不再阻塞 enqueue
+
+旧版本里 `busy=True` 一律拒绝任何新入队,理由是"修改 `doc_status` 会与流水线工作线程交错"。但实际上:
+
+1. **写入顺序保证一致性**:`apipeline_enqueue_documents` 总是先 upsert `full_docs`、再 upsert `doc_status`。处理循环开头的 consistency check 仅删除"`doc_status` 行没有对应 `full_docs`"的孤儿——这种状态在并发 enqueue 中不可能出现。
+2. **批次级快照**:处理循环每个 batch 拉一次 `get_docs_by_statuses` 快照,新写入的 `PENDING` 行不会破坏当前 batch;下一轮通过 `request_pending` 重拉快照即可看到新工作。
+3. **`request_pending` 设计本就为此**:旧版同时存在 `request_pending` 字段——它就是为"运行中又有新工作"设计的,但被 busy 守护堵死了。
+
+新契约把这个机制启用起来后,**用户在长批次处理过程中仍可继续上传新文档**,bg task 写完 `doc_status` 后由运行中的循环自动接管。
+
+### 6.4 为什么 scan 仍是独占写者
+
+scan 不仅 enqueue 自己扫到的新文件,还会读 `doc_status` 决定每个文件去向:
+
+- 同名 `PROCESSED` 行 → 归档源文件、跳过入队。
+- 同名非 PROCESSED 且 `full_docs` 存在 → resume 路径,源文件**保留在 `INPUT/`**,不归档(pending-parse 解析器仍可能需要它),由处理循环按状态查询接走。
+- 同名 `FAILED` 且 `full_docs` 缺失 → 识别为之前 `apipeline_enqueue_error_documents` 写下的提取错误 stub(一致性检查会保留这种行供人工 review),scan 自动删除该 stub 并把当前文件按新文件重新入队,让用户"修好源文件再 scan 一次"能直接生效。
+
+这些"读—决策—写"组合不能与其它写者交错,否则分类决策会基于过期视图。所以 scan 必须独占,且 scan 端点会在 `busy` / `scanning` / `pending_enqueues>0` 任一存在时拒绝。
+
+### 6.5 严格名字预检(upload 路径)
+
+upload 通过 reservation 后、保存文件前必须双道检查:
+
+1. **INPUT 目录扫描**:把要保存的 basename 经 `canonicalize_parser_hinted_basename` 规范化,遍历 INPUT 目录里现有任何同 canonical 变体(含 hint / 不含 hint),命中即 409。
+2. **doc_status 查重**:用规范化 basename 调 `get_existing_doc_by_file_basename`,命中即 409。
+
+两道都过 → 保存文件 → schedule bg task → bg task 调 `apipeline_enqueue_documents` 写库 + 调 `apipeline_process_enqueue_documents` 触发处理。
+
+> 旧版本曾允许 upload 在已有同名记录时悄悄写入 FAILED 重复条目;新规则改为 fail-fast,不在 doc_status 留下任何重复痕迹。如需替换同名文档,请先调用 `/documents/{doc_id}` 的删除接口。
+
+### 6.6 多 reservation 并发的协调
+
+两个 upload 同时进来时(scan 此时拿不到独占):
+
+1. A `_reserve_enqueue_slot` → `pending_enqueues=1`,写文件,schedule bg task A,返回 success。
+2. B `_reserve_enqueue_slot` → `pending_enqueues=2`,写文件,schedule bg task B,返回 success。
+3. bg task A `apipeline_enqueue_documents` → 写 `doc_status` → 调 `apipeline_process_enqueue_documents` → 设 `busy=True` 处理 A 的文档。
+4. bg task B `apipeline_enqueue_documents` → 看到 `scanning=False`,正常写入;写完后看到 `busy=True`,自动设 `request_pending=True`。
+5. bg task B 调 `apipeline_process_enqueue_documents` → 看到 `busy=True`,设 `request_pending=True` 立即返回。
+6. A 的处理循环跑完当前 batch,看到 `request_pending=True`,重拉快照,把 B 的 `PENDING` 行接上处理。
+7. 全部完成后 `busy=False`、`pending_enqueues=0`。
+
+任何一个 bg task 都不会因为 busy 被误拒——因为 enqueue 不再检查 busy;处理循环也不会重复处理同一份 batch——`request_pending` 只在 batch 间生效,且每次重拉前清零。
+
+### 6.7 enqueue 串行锁(防并发去重穿透)
+
+`apipeline_enqueue_documents` 内部"读 doc_status 做去重 → 写 `full_docs` / `doc_status`"这一段在 workspace 级 `enqueue_serialize` 锁内串行执行。原因:放开 busy/scan-processing 阶段允许并发 enqueue 之后,两次相同内容、不同文件名的入队(典型场景:scan 处理阶段的 enqueue 与 upload 同时进来)若在没有锁的情况下并发执行——
+
+1. A 读 `doc_status` 查 `content_hash`:未命中。
+2. B 读 `doc_status` 查 `content_hash`:仍未命中(A 还没 upsert)。
+3. A upsert `full_docs` + `doc_status`。
+4. B upsert `full_docs` + `doc_status`。
+
+结果:同 `content_hash` 的两条 `PENDING` 都进入流水线后续处理,原本应当被识别为 `duplicate_kind=content_hash` 的那条**没**被识别。
+
+加上串行锁后第二次 enqueue 一定能在去重读时看到第一次已 upsert 的行,正常走"无新唯一文档"的早返回路径并把本次记为 `duplicate_kind=content_hash` 的 FAILED 行。锁的作用范围**只覆盖**:
+
+- `filter_keys`(按 doc_id 排除已存在)
+- 文件名 / 内容 hash 去重读
+- 重复 FAILED 行的 upsert
+- `full_docs.upsert` + `doc_status.upsert`
+
+锁**不**覆盖 `request_pending` nudge(在锁外,只取一下 `pipeline_status_lock`),也**不**阻塞处理循环的 `get_docs_by_statuses` 读(处理循环走的是 `doc_status` 自身的并发读,与 enqueue 写是 KV 级原子,不抢同一把锁)。锁顺序:`enqueue_serialize → pipeline_status_lock`,无死锁路径。
+
+### 6.8 流水线并发参数
+
+`pipeline_status` 相关的锁解决的是"谁能写"的正确性问题,本节这一组参数解决的是"同时跑几个 worker"的吞吐量问题。流水线分为 3 个阶段,每个阶段的 worker 池数量独立可调:
+
+```
+          ┌─ q_native  ──► [native parser  × N1] ─┐
+PENDING ─►├─ q_mineru  ──► [mineru parser  × N2] ─┼─► q_analyze ─►[analyzer × N4] ─► q_process ─►[processor × N5]
+          └─ q_docling ──► [docling parser × N3] ─┘
+```
+
+入队时 `resolve_stored_document_parser_engine` 根据每个文档的 `parser_engine`(来自 `LIGHTRAG_PARSER` 默认值或文件 hint)把它放入对应解析队列;3 个解析队列**完全互不阻塞**——mineru 占满不会拖慢 docling 或 native。解析完成后统一进入 `q_analyze`(多模态分析),再进入 `q_process`(实体/关系抽取 + 入库)。
+
+| 环境变量 | 默认值 | 作用 | 调优建议 |
+| --- | --- | --- | --- |
+| `MAX_PARALLEL_PARSE_NATIVE` | `5` | N1: native 解析(docx / pdf / txt 等纯本地处理)并发 worker 数 | 纯 CPU、内存占用低,可按 CPU 核数提高 |
+| `MAX_PARALLEL_PARSE_MINERU` | `1` | N2: MinerU 解析并发 worker 数 | MinerU 占用 GPU/CPU 显著,**默认串行最稳**。本地部署且显存充足时可设 2-3;走 MinerU 官方云端服务时可适当提高(受云端配额限制) |
+| `MAX_PARALLEL_PARSE_DOCLING` | `1` | N3: Docling 解析并发 worker 数 | Docling 同样资源敏感,**默认串行最稳**。本地部署且 CPU/GPU 充足时可设 2-3 |
+| `MAX_PARALLEL_ANALYZE` | `5` | N4: 多模态分析(VLM 图片 / 表格描述)并发 worker 数 | 直接消耗 VLM 配额。建议 ≤ VLM 服务并发上限 |
+| `MAX_PARALLEL_INSERT` | `2` | N5: 实体 / 关系抽取 + 入库阶段并发文档数 | 推荐 `MAX_ASYNC / 3`,区间 2~10。该阶段每个文档会触发多次 LLM 调用,过高会撞 LLM 限流。同时该值还作为 `asyncio.Semaphore` 用于二次约束(worker 数和信号量值一致) |
+| `QUEUE_SIZE_DEFAULT` | `100` | parse / analyze 阶段间的有界队列容量 | 一般无需调整。极少量大批量任务(成千上万)可适当提高,避免 enqueue 端反压;内存紧张时可调低 |
+| `QUEUE_SIZE_INSERT` | `4` | analyze → process 阶段间的队列容量 | process 是流水线中最慢、最耗内存的阶段,队列特意做小,给上游提供反压防止内存堆积 |
+
+**几个要点:**
+
+1. **解析阶段按引擎隔离**,所以混用 native/mineru/docling 时不必担心一种引擎慢拖累另一种。
+2. **mineru / docling 默认串行(=1)**:实测两者资源占用高,并行收益不稳定(容易 OOM / 显存竞争 / 失败重试)。如果你部署了多 GPU 或专门的解析服务器,可手动调高。
+3. **`MAX_PARALLEL_INSERT` 兼任 worker 池大小和信号量上限**:流水线创建 `Semaphore(max_parallel_insert)`,每个 process worker 在抽取入库前还要拿一次信号量。所以哪怕你把 worker 数手动改大,实际并发上限仍由这个值决定——直接调它就够了。
+4. **queue size 与背压**:`QUEUE_SIZE_INSERT=4` 这个偏小的默认值是有意为之——process 阶段慢且占内存,让 analyze 阶段在队列写满时阻塞、再反压到 parse 阶段,避免一次性把成千上万份解析结果堆在内存里。
+5. **改后生效方式**:所有参数通过 `.env`(或环境变量)传入,仅在 `LightRAG` 实例构造时读取一次;改完需要重启服务。
+
+**典型调优场景:**
+
+- 大量 PDF + 本地 MinerU 单 GPU:`MAX_PARALLEL_PARSE_MINERU=1`、`MAX_PARALLEL_ANALYZE=5`、`MAX_PARALLEL_INSERT=2`(默认即可)。
+- 大量 PDF + MinerU 云端服务:`MAX_PARALLEL_PARSE_MINERU=3~5`(视云端配额),其它保持默认。
+- 纯 docx / txt(仅走 native):`MAX_PARALLEL_PARSE_NATIVE=10`、`MAX_PARALLEL_INSERT` 按 `MAX_ASYNC/3` 推算。
+- LLM 限流明显:先降 `MAX_PARALLEL_INSERT`(process 阶段每文档多次 LLM 调用),再降 `MAX_PARALLEL_ANALYZE`(VLM 是独立配额)。
+
+## 七、流水线启动时的续跑规则
+
+每次 `apipeline_process_enqueue_documents` 起步时,会拉取所有处于 `PARSING` / `ANALYZING` / `PROCESSING` / `PENDING` / `FAILED` 状态的文档继续处理。续跑路径**根据"内容是否已抽取"分流**,保证同一个文档无论之前进度如何,按当前 `process_options` 续跑都有幂等结果。
+
+续跑规则只对 `doc_id` 已经存在于 `doc_status` 的文档生效。新文件入队需要"并发与重入约束"中的文件查重逻辑,避免新文件挤掉旧的已经成功提取内容的文件记录。
+
+### 7.1 判断"内容已抽取"
+
+读 `full_docs[doc_id]`:
+
+| `parse_format` | 判定 |
+| --- | --- |
+| `lightrag` 且 `lightrag_document_path` 文件存在 | ✅ 已抽取 |
+| `raw` 且 `content` 非空 | ✅ 已抽取 |
+| 其它(含 `pending_parse`、记录缺失) | ❌ 未抽取 |
+
+### 7.2 分支 A:未抽取
+
+走完整流水线(`parse_native` / `parse_mineru` / `parse_docling` → `analyze_multimodal` → 分块 → 实体抽取),按 `full_docs.process_options` 决定每一阶段的行为。这是"首次入队"的常规流。
+
+### 7.3 分支 B:已抽取
+
+**一律跳过解析**(不重新调 `parse_*`),从 ANALYZING 阶段重启,并清光旧 chunks / entities 后按当前 `process_options` 重做:
+
+| 子步骤 | 行为 |
+| --- | --- |
+| 引擎对比 | 若 `process_options` 隐含的引擎 ≠ `full_docs.parse_engine`,**仅 warn**,不重新解析。已抽取的内容是不可变事实,重新跑不同引擎会产生不一致。要切换引擎请先 delete 整个文档再重传。 |
+| 旧 chunks / 实体 / 关系清理 | 读 `status_doc.chunks_list` 收集旧 chunk id 集,调 `_purge_doc_chunks_and_kg(doc_id, chunk_ids)`:从 `chunks_vdb` / `text_chunks` 删除 chunk 行;按 `entity_chunks` / `relation_chunks` 反查受影响的实体 / 关系,对失去全部源的条目直接从图谱与向量库删除,对仍有其它文档贡献的条目调 `rebuild_knowledge_from_chunks` 用剩余 chunks 重建;最后删除 `full_entities` / `full_relations` 中本 doc 的索引行。purge 完成后 `status_doc.chunks_list = []` / `chunks_count = 0` 重置,避免后续 state-machine upsert 写回旧 ID。 |
+| `analyze_multimodal` | 对已启用模态,每次运行都会重新计算 sidecar item 分析并覆盖已有的 `llm_analyze_result`。由于 LLM cache 的存在重复计算通常会保持语义字段不变,只会重写 `analyze_time` 等运行时字段;cache miss,例如更换模型和提示词等,保存内容才可能与上次不同。 |
+| 重新分块 | 按新 `process_options.chunking` 选策略,参数从 `full_docs.chunk_options` 读取(入队快照,不会因续跑被覆盖;env 改动后老文档仍按入队那一刻的参数分块)。LightRAG Document path 在 `process_options=P` 时走 paragraph_semantic,否则按 selector 分发到 F/R/V。 |
+| 实体抽取 / KG-skip | 按新 `process_options.skip_kg` 决定 |
+
+> 这条规则保证:用户改 `i/t/e` 重传同名文档(先删旧 doc 再上传带新 hint 的文件)时,多模态分析能增量补齐;改 `F/R/V/P` 时 chunks 与图谱重建;改 `!` 时停掉或恢复 KG 构建。引擎变更被视为"重大变更",统一由 delete + 重传完成,不在续跑路径里隐式发生。
+
+## 八、Python SDK 调用
+
+本章针对**直接 import `LightRAG` 类**进行集成的开发者,覆盖 Server 部署不会用到的运行时 API、构造期参数和已移除的旧接口。Server 用户通常无须阅读本章。
+
+### 8.1 适用对象
+
+```python
+from lightrag import LightRAG
+rag = LightRAG(working_dir="./rag_storage", ...)
+await rag.initialize_storages()
+await rag.ainsert("text", file_paths="doc.pdf")
+```
+
+这种调用方式以下行为与 Server 路径不同:可在不重启进程的情况下改 `addon_params["chunker"]`,可向 `apipeline_enqueue_documents` 传入 per-file `chunk_options`,可在 `ainsert` 调用时动态覆盖 F 策略的预切分参数。
+
+### 8.2 LightRAG 构造期参数
+
+`LightRAG(chunk_token_size=…, chunk_overlap_token_size=…)` 是 §3.3 优先级链中的**第 3 档**:"legacy 构造字段"。strategy 无关、粗粒度缺省,只填仍空的槽位:
+
+- 优先级低于 `addon_params["chunker"]` 显式值(§8.3)和 strategy 特定 env(§3.2)。
+- 优先级高于 legacy env `CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`。
+- 实例字段 `self.chunk_token_size` / `self.chunk_overlap_token_size` 在 `__post_init__` 之后总会被回填为 `int`,方便仍读这两个字段的旧路径(如 `pipeline.py` 中 `chunk_opts.get("chunk_token_size") or self.chunk_token_size` 兜底)继续工作。
+
+### 8.3 运行时改 `addon_params["chunker"]`
+
+`addon_params["chunker"]` 是 `ObservableAddonParams` 字段,可以**运行时改**:
+
+```python
+rag.addon_params["chunker"]["recursive_character"]["separators"] = ["##", "\n", " "]
+```
+
+改完后,**后续入队**的文档拿到新默认;已入队文档保留入队时的快照不变(参见 §3.3 三层语义保证)。这是 §3.3 优先级链的第 1 档:"`addon_params["chunker"]` 显式值",赢一切。
+
+Server 部署没有这个能力 —— 改 env 后必须重启服务才生效。
+
+### 8.4 `apipeline_enqueue_documents(chunk_options=…)`
+
+`apipeline_enqueue_documents` 接受可选的 `chunk_options` 参数,调用方传入 `dict` / `list[dict]` 会按当前文档的 `process_options` 投影为精简快照(只保留对应策略子字典 + 顶层 `chunk_token_size`)后持久化到 `full_docs[doc_id]["chunk_options"]`;不传则由 `resolve_chunk_options(self.addon_params, process_options=…)` 现场拼装一份。调用方可以放心传入全量字典——其它策略子字典会被 dispatcher 丢弃,不会污染存储。
+
+典型用法:
+
+```python
+await rag.apipeline_enqueue_documents(
+    input=["text A", "text B"],
+    file_paths=["a.[native-R].txt", "b.txt"],
+    process_options=["R", ""],
+    chunk_options=[
+        {"chunk_token_size": 800, "recursive_character": {"separators": ["\n\n", "\n"]}},
+        {"chunk_token_size": 1500},
+    ],
+)
+```
+
+per-file 个性化的典型场景:管理 UI 单独配置某个文件的 separators 或 V 阈值;将来上传 API 也可在 form / hint 中接收覆盖。
+
+**不传 `file_paths` 的兼容**:核心 API `insert` / `ainsert` / `apipeline_enqueue_documents` 仍兼容未传 `file_paths` 的调用;这类文档的 `file_path` 会保存为 `unknown_source`,不会参与文件名查重,文档 ID 继续按文本内容生成。
+
+`apipeline_enqueue_documents` 自身的并发约束(last-line guard、`from_scan=True` 旁路)见 §6.2 入口行为表。
+
+### 8.5 `ainsert(split_by_character=…, split_by_character_only=…)`
+
+`LightRAG.ainsert(split_by_character=…, split_by_character_only=…)` 的运行时参数在入队时由 `resolve_chunk_options` 覆写到 `chunk_options.fixed_token`:
+
+- `split_by_character` 非 `None` 即覆盖 env 默认;
+- `split_by_character_only=True` 即覆盖(`False` 是签名默认值,与"未指定"无法区分,所以 env 默认胜出)。
+
+仅对 F 策略生效;其它策略的子字典不受影响。
+
+### 8.6 已移除的 SDK 入参:`reprocess_existing_non_processed`
+
+旧 `apipeline_enqueue_documents` 的 `reprocess_existing_non_processed=True` 行为会在 scan 时直接删除非 PROCESSED 的旧记录并重建,与 §五 / §六 的规则相冲突,已整段移除。替代路径:
+
+- 自动续跑:scan 按 §6.4 的分类规则处理同名文件(归档 / 续跑 / 删 stub 后重入队),由 §七 续跑规则在处理循环里统一接管。
+- 强制刷新:先调 `/documents/{doc_id}` 删旧文档,再上传同名新文件。

+ 853 - 0
docs/FileProcessingPipeline.md

@@ -0,0 +1,853 @@
+# File Processing Pipeline Specification
+
+Starting from version v1.5.0 (currently on the dev branch), LightRAG's file processing pipeline has received a major upgrade:
+
+* Supports multiple file content extraction engines: legacy, native, mineru, docling
+* Supports multiple text chunking methods: Fix, Recursive, Vector, Paragraph
+* Supports disabling entity-relation extraction for individual files
+
+LightRAG Server introduces an intermediate file-processing format: `LightRAG Document`. This format supports multimodal data such as tables and images, and also includes the document's section/paragraph metadata, which is convenient for content traceability later.
+
+This document is organized from the perspective of **LightRAG Server** deployment and use: the quick-start configuration that can be applied directly is given first, followed by configuration syntax for content extraction and chunking, storage / directory layout, deduplication, concurrency, and resume rules. Developers who call the `LightRAG` class directly via Python should jump to [Chapter 8: Python SDK Invocation](#8-python-sdk-invocation).
+
+## 1. Quick Start
+
+### Keep the legacy file-processing behavior
+
+All files are processed using the legacy document parsing and chunking strategy. Either leave `LIGHTRAG_PARSER` unconfigured, or set it to the following value:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+### Recommended starting file-processing behavior
+
+No reliance on external document parsing services or on `VLM` vision models. Use the new built-in `Native` engine to parse `docx` documents with table (t) and equation (e) modality analysis enabled, paired with the `P` chunking strategy; other documents use the legacy content extractor paired with the more effective `R` chunking strategy.
+
+```bash
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+```
+
+### Enable multimodal processing capability
+
+Enabling multimodal processing requires the `MinerU` file parsing service and a `VLM` vision recognition model. Use `Native` to parse `docx` files; use `MinerU` to parse `pdf`, `office`, and various image files. All of the above files have image (i), table (t), and equation (e) modality analysis enabled and are paired with the `P` chunking strategy. Other documents fall back to the legacy content extractor paired with the `R` chunking strategy.
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=kimi-k2.6
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+```
+
+> `P` is LightRAG's native chunking strategy; see [Paragraph Semantic Chunking](ParagraphSemanticChunking.md) for details. For VLM configuration, see [Role-based LLM/VLM Configuration Guide](RoleSpecificLLMConfiguration.md).
+
+## 2. Content Extraction and Processing Option Configuration
+
+LightRAG's file processing configuration is composed of two parts: the content extraction engine determines how the original file is parsed, and the processing options determine whether multimodal analysis is performed after parsing, which chunking method to use, and whether to build a knowledge graph. Typically, the environment variable `LIGHTRAG_PARSER` is first used to set default rules by file extension, and then a `[hint]` in the filename overrides individual files. Engine and options can be written in the same configuration fragment, for example `docx:native-iet` or `report.[native-R!].docx`.
+
+For backward compatibility, when the configuration is not modified, the upgraded file content extraction behavior remains the original `legacy` behavior. To enable the new content processing engines, configure as described in this section.
+
+### 2.1 Configuration Syntax Overview
+
+The complete configuration model is as follows:
+
+```text
+LIGHTRAG_PARSER=ext:engine-options,ext:engine,*:legacy-R
+filename.[ENGINE].ext
+filename.[ENGINE-OPTIONS].ext
+filename.[-OPTIONS].ext
+```
+
+- `LIGHTRAG_PARSER` is the default rule table, matched by file extension, e.g., `pdf:mineru`, `docx:native-iet`.
+- The `[hint]` in a filename is a single-file override rule, e.g., `paper.[mineru].pdf`, `memo.[native-R!].docx`.
+- `ENGINE` is the content extraction engine: `legacy`, `native`, `mineru`, or `docling`.
+- `OPTIONS` is a string combination of processing options, e.g., `iet`, `R!`, `P`. The options are ultimately written into `process_options` and read by subsequent pipeline stages.
+- The hyphen in `ENGINE-OPTIONS` is only used to separate the engine from the options; it is not part of the options themselves.
+- When only processing options are specified, it must be written as `[-OPTIONS]`, e.g., `[-!]`. `[abc]` without a hyphen is strictly interpreted as an engine name and will raise an error; it will not fall back to being interpreted as options.
+
+Common combination examples:
+
+```bash
+LIGHTRAG_PARSER=pdf:mineru-R,docx:native-ietP,*:legacy-R
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+DOCLING_ENDPOINT=http://localhost:5001
+```
+
+```text
+my-proposal.[native-iet].docx   # Use the native engine, enable drawing/table/equation analysis
+my-memo.[native-R!].docx        # Use the native engine, recursive semantic chunking, disable knowledge graph construction
+my-proposal.[-!].docx           # Use the default engine, only disable knowledge graph construction
+my-proposal.[mineru].docx       # Use the MinerU engine, all processing options default
+```
+
+### 2.2 Default Rules: `LIGHTRAG_PARSER`
+
+`LIGHTRAG_PARSER` is used to configure the default content extraction engine for different file extensions; default processing options for the rule can also be appended after the engine:
+
+```text
+ext:engine,ext:engine,*:legacy
+ext:engine;ext:engine;*:legacy
+ext:engine-options
+```
+
+- The left side matches the file extension, not the full filename; write `pdf:mineru`, not `*.pdf:mineru`.
+- Rules can be separated by either a comma `,` or a semicolon `;`.
+- Rules are checked left to right; priority rules go in front, with the wildcard rule typically at the end.
+- The `-options` suffix after the engine serves as the default `process_options` for files matched by this rule. For example, `LIGHTRAG_PARSER=docx:native-iet` means all `.docx` files default to the `native` engine with image, table, and equation analysis enabled.
+
+### 2.3 Single-File Override: filename hints
+
+Square brackets in the filename can be used to temporarily specify how a single file is processed:
+
+```text
+paper.[mineru-R].pdf
+slides.[docling].pptx
+memo.[native-P].docx
+notes.[-R].md
+```
+
+The content inside the square brackets supports three forms:
+
+```text
+[ENGINE]              # Specify only the engine; processing options use the default or what LIGHTRAG_PARSER provides
+[ENGINE-OPTIONS]      # Specify both engine and processing options
+[-OPTIONS]            # Specify only processing options; the engine still follows LIGHTRAG_PARSER / default rules
+```
+
+When parsing the hint, content without a hyphen must match an engine name exactly (`mineru` / `native` / `docling` / `legacy`); when there is content before a hyphen, the part before the hyphen is the engine and the part after is the options; when starting with a hyphen, it specifies only options. The legacy `[OPTIONS]` syntax is no longer valid; for example, `[iet]` must now be written as `[-iet]`.
+
+### 2.4 Content Extraction Engines
+
+| Engine | Description | Supported file formats (extensions) |
+| --- | --- | --- |
+| `legacy` | Legacy extraction; content is centrally extracted before joining the pipeline | `txt` `md` `mdx` `pdf` `docx` `pptx` `xlsx` `rtf` `odt` `tex` `epub` `html` `htm` `csv` `json` `xml` `yaml` `yml` `log` `conf` `ini` `properties` `sql` `bat` `sh` `c` `h` `cpp` `hpp` `py` `java` `js` `ts` `swift` `go` `rb` `php` `css` `scss` `less` |
+| `native` | Built-in intelligent structured content extractor | `docx` |
+| `mineru` | External MinerU content extraction engine | `pdf` `doc` `docx` `ppt` `pptx` `xls` `xlsx` `png` `jpg` `jpeg` `jp2` `webp` `gif` `bmp` |
+| `docling` | External Docling content extraction engine | `pdf` `docx` `pptx` `xlsx` `md` `html` `xhtml` `png` `jpg` `jpeg` `tiff` `webp` `bmp` |
+
+`mineru` and `docling` are external content extraction engines; before enabling related rules, the services must be running first, and the corresponding endpoint/token must be configured in LightRAG.
+
+LightRAG caches the parsing results of the `mineru` and `docling` engines locally. Re-uploading the same file usually does not trigger the engine to re-parse the document. To delete the parse cache, you must click the "also delete file" option in the delete-file dialog of the document management interface. Modifying the endpoint addresses and effective extraction parameters of the `mineru` / `docling` engines will also invalidate the cache, causing the engine to re-parse the file content on the next upload of the same file.
+
+#### MinerU Configuration and Local Deployment
+
+The MinerU client supports two modes; choose one:
+
+- `local`: self-hosted MinerU service (the official Docker Compose deployment is recommended); LightRAG calls the local container via HTTP.
+- `official`: directly connects to the MinerU official precise API v4; you need to apply for a token at [mineru.net](https://mineru.net).
+
+**Local deployment (Docker Compose)**
+
+Clone the official [opendatalab/MinerU](https://github.com/opendatalab/MinerU) repository to your local machine, enter the docker deployment directory inside the repository, and first build the image:
+
+```bash
+docker compose -f compose.yaml build
+```
+
+Then start the API service (`--profile api` is required to enable the HTTP API container; the default listening port is 8000):
+
+```bash
+docker compose -f compose.yaml --profile api up -d
+```
+
+For image build details, GPU driver setup, model weight locations, etc., refer to the official README: <https://github.com/opendatalab/MinerU>.
+
+**LightRAG-side env configuration**
+
+Local mode (self-hosted mineru-api):
+
+```bash
+MINERU_API_MODE=local
+MINERU_LOCAL_ENDPOINT=http://localhost:8000
+```
+
+Official mode (MinerU cloud API):
+
+```bash
+MINERU_API_MODE=official
+MINERU_API_TOKEN=<your_token>
+# MINERU_OFFICIAL_ENDPOINT=https://mineru.net   # Default value, usually no need to change
+```
+
+For the remaining advanced switches (`MINERU_MODEL_VERSION`, `MINERU_LANGUAGE`, `MINERU_ENABLE_TABLE` / `MINERU_ENABLE_FORMULA`, `MINERU_PAGE_RANGES`, `MINERU_LOCAL_BACKEND` / `MINERU_LOCAL_PARSE_METHOD`, `MINERU_POLL_INTERVAL_SECONDS` / `MINERU_MAX_POLLS`, `MINERU_ENGINE_VERSION`, `LIGHTRAG_FORCE_REPARSE_MINERU`, etc.), refer to the MinerU section of the `env.example` template at the repository root. Note that `MINERU_PAGE_RANGES` has different semantics in the two modes: `official` supports a complete list (e.g., `1-3,5,7-9`), while `local` only supports a single page (`3`) or a simple range (`1-10`); it does not accept comma-separated lists.
+
+#### Docling Configuration
+
+The `docling` content extraction engine requires an external [docling-serve](https://github.com/DS4SD/docling-serve) service (v1 async API). Minimal configuration:
+
+```bash
+DOCLING_ENDPOINT=http://localhost:5001
+```
+
+`DOCLING_ENDPOINT` is just the base URL (**without** `/v1/convert/file/async`). Currently LightRAG uses Docling's standard pipeline to process files. Users can control the behavior of the Docling pipeline through the following environment variables:
+
+| Env | Default | Meaning |
+| --- | --- | --- |
+| `DOCLING_DO_OCR` | `true` | OCR master switch |
+| `DOCLING_FORCE_OCR` | `true` | Force OCR per page (mandatory for scanned documents; enabling it for non-scanned documents usually also helps improve layout recognition quality) |
+| `DOCLING_OCR_ENGINE` | `auto` | OCR engine selection (not recommended to change) |
+| `DOCLING_OCR_PRESET` | `auto` | OCR engine preset (not recommended to change) |
+| `DOCLING_OCR_LANG` | (empty) | Set per OCR engine requirements (not recommended to change) |
+| `DOCLING_DO_FORMULA_ENRICHMENT` | `false` | Whether to recognize equations in the document and output them in LaTeX format; before enabling, ensure that Docling has downloaded the equation recognition model on the backend (see explanation below) |
+
+When `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` are not configured, they are equivalent to `auto`; when `DOCLING_OCR_LANG` is not configured, no language list is passed to docling-serve, and the OCR engine uses its own default. The parse cache signature is computed from these effective parameters, so "not configured" and "explicitly set to the default value" do not invalidate the cache.
+
+Two polling-budget envs (docling-serve uses server-side long-poll; the client does not sleep extra):
+
+| Env | Default | Meaning |
+| --- | --- | --- |
+| `DOCLING_POLL_INTERVAL_SECONDS` | `5` | Poll interval for awaiting parse results |
+| `DOCLING_MAX_POLLS` | `240` | Maximum poll iterations; raises `TimeoutError` when exceeded;<br />default wait time ≈ 5 × 240 (about 20 minutes) |
+
+Three bundle-cache envs:
+
+| Env | Default | Meaning |
+| --- | --- | --- |
+| `DOCLING_ENGINE_VERSION` | (empty) | Docling engine version; version changes invalidate the parse cache |
+| `LIGHTRAG_FORCE_REPARSE_DOCLING` | `false` | When set to `true`/`1`, the parse cache is not used |
+| `DOCLING_BBOX_ATTRIBUTES` | `{"origin":"LEFTBOTTOM"}` | Default coordinate system for Docling layout |
+
+**Prerequisites for `DOCLING_DO_FORMULA_ENRICHMENT`**: the docling-serve side must have the code-formula model weights ready. The adapter is dual-track compatible — when enabled, the `text` field is LaTeX; when disabled, or when missing weights cause `text == orig`, it falls back to plain text and does not write `equations.json`. Therefore the default of `false` is conservative; turn it on only after confirming the model is ready on the deployment side.
+
+#### Docling Local Deployment (enabling LaTeX equation recognition)
+
+The following uses a Docker-based docling-serve deployment as an example, giving the complete steps from image download to model mounting. After deployment completes, write `DOCLING_DO_FORMULA_ENRICHMENT=true` into LightRAG's `.env` to enable LaTeX equation recognition.
+
+> **Important**: the steps below are based on an environment where the GPU supports CUDA 13. If your GPU is older and does not support CUDA 13, replace the image name `docling-serve-cu130:main` in the command and compose file with the tag corresponding to your CUDA version. For the list of available images, see [docling-serve Packages](https://github.com/orgs/docling-project/packages?repo_name=docling-serve).
+
+**1. Pull the image**
+
+```bash
+docker pull ghcr.io/docling-project/docling-serve-cu130:main
+```
+
+**2. Download models**
+
+```bash
+# Create the docling working directory
+mkdir docling
+cd docling
+
+# Create the model mount directory
+mkdir models
+
+# Copy the existing models inside the container into the models directory
+docker run --rm -it \
+  -v "$(pwd)/models:/opt/app-root/src/models" \
+  ghcr.io/docling-project/docling-serve-cu130:main \
+  cp -r /opt/app-root/src/.cache/docling/models /opt/app-root/src/
+
+# Download the equation recognition model
+docker run --rm \
+  -v "$(pwd)/models:/opt/app-root/src/models" \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="/opt/app-root/src/models" \
+  ghcr.io/docling-project/docling-serve-cu130:main \
+  docling-tools models download-hf-repo docling-project/CodeFormulaV2 -o models
+```
+
+**3. Create `docker-compose.yaml`**
+
+Create `docker-compose.yaml` in the `docling` directory from the previous step, with the following contents:
+
+```yaml
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu130:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all"
+      DOCLING_SERVE_ARTIFACTS_PATH: "/opt/app-root/src/models"
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
+    volumes:
+      - ./models:/opt/app-root/src/models
+```
+
+Then execute `docker compose up -d` in that directory to start the service. After the container is ready, set the following in LightRAG's `.env`:
+
+```bash
+DOCLING_ENDPOINT=http://localhost:5001
+DOCLING_DO_FORMULA_ENRICHMENT=true
+```
+
+This enables LightRAG to recognize equations in documents via the local docling-serve and output them in LaTeX form.
+
+### 2.5 File Processing Options
+
+Processing options control the behavior of a single file with respect to multimodal analysis, knowledge graph construction, and text chunking. All options are optional; defaults are shown in the table below. At most one chunking method (F/R/V/P) is specified per file; the other options can be combined arbitrarily.
+
+| Option | Type | Default | Meaning |
+| --- | --- | --- | --- |
+| `i` | Multimodal | Off | Enable image analysis (VLM) |
+| `t` | Multimodal | Off | Enable table analysis (VLM) |
+| `e` | Multimodal | Off | Enable equation analysis (VLM) |
+| `!` | Pipeline | Off | Disable entity/relation extraction; do not build the knowledge graph (only the chunks vector index is kept; naive / mix retrieval still works) |
+| `F` | Chunking | Default | Fix / fixed-length chunking: legacy method, splits mechanically by fixed token length or by separator (no chunk overlap when splitting by separator) |
+| `R` | Chunking | - | Recursive / recursive character chunking (RecursiveCharacterTextSplitter@LangChain): takes a list of separators (default `["\n\n","\n","。","!","?",";",","," ",""]`, ordered from strongest to weakest semantic boundary). Splits by paragraph (double newline) first; if a chunk is still over the token limit, falls back stepwise to single newline → Chinese sentence-ending punctuation (`。!?`) → Chinese mid-sentence punctuation (`;,`) → space → per-character split. **The default cascade includes Chinese punctuation**, letting Chinese / mixed Chinese-English documents split at semantic boundaries. English `.?!` is deliberately excluded (literal matching would mis-split `0.95` / `e.g.`). |
+| `V` | Chunking | - | Vector / semantic vector chunking (SemanticChunker@LangChain): first splits text into sentences (the default sentence splitting regex recognizes both English `.?!` and Chinese `。?!`, allowing correct sentence splitting in Chinese / mixed Chinese-English documents), computes embeddings of adjacent sentences, then finds semantic breakpoints based on the specified threshold strategy (e.g., percentile, standard_deviation, or interquartile) for splitting. `SemanticChunker` itself has no chunk size cap — any semantic chunk that exceeds `chunk_token_size` is automatically split again by R before persistence (preserving V's non-overlap semantics). This chunking strategy never produces overlapping chunks. |
+| `P` | Chunking | - | Paragraph / paragraph semantic chunking (native); splits by heading first and strictly avoids mixing content from the bottom of the previous heading with content from the next heading, which would break semantics. Suited for chunking documents that can accurately identify headings with a clear heading structure. When the body under the same heading is too long and falls back to R, overlap can be preserved according to `CHUNK_P_OVERLAP_SIZE`; bridging text between adjacent large tables can also be repeated into the surrounding table chunks within that budget. This chunking method can only be applied to `lightrag` content stored in the sidecar directory. If `lightrag` content does not exist, it degrades to chunking with `R`. This chunking method produces far fewer overlapping chunks than the `R` or `F` strategies. |
+
+> The global multimodal switch `addon_params["enable_multimodal_pipeline"]` is deprecated; the related behavior is now uniformly controlled by the file-level `i/t/e` options. See [Appendix A](#appendix-a-notes-on-upgrading-from-legacy).
+
+#### Option effective stages
+
+Different characters of processing options take effect at different stages of the pipeline:
+
+| Option | Stage | Description |
+| :-: | --- | --- |
+| i/t/e | Analyzing (multimodal analysis) | Determines whether VLM summarization analysis is invoked on the images / tables / equations in the sidecar. **The extraction stage is unaffected**: the content extraction engine outputs `drawings.json` / `tables.json` / `equations.json` sidecar files based on what the document actually contains. As a result, simply tweaking the `i`/`t`/`e` options to trigger "re-analysis" can complete VLM later without re-parsing the original file. |
+| ! | Extraction (entity-relation extraction) | Skips entity/relation extraction and graph writing; chunks are still written to the vector store to retain naive / mix retrieval capabilities. |
+| F/R/V/P | Chunking (text chunking) | Determines which chunking strategy to use; does not affect the output of the parsing stage. |
+
+> Modality availability is signaled solely by "whether the sidecar file exists"; the content extraction engine does not need to declare its capabilities in meta. If a given document contains no images/tables/equations, the corresponding sidecar is not written; even if the user has enabled `i/t/e`, the corresponding modality is silently skipped, but `analyze_multimodal` logs an INFO-level line for that document (`[analyze_multimodal] sidecar e:equations empty: doc—id ...`), making it easy to diagnose "why didn't the VLM run". This is not an error.
+
+### 2.6 Validation, Priority, and Fallback
+
+- `LIGHTRAG_PARSER` is strictly validated at startup: unknown content extraction engines, malformed extension syntax, explicitly using an unsupported extension, external engines missing endpoint, and illegal characters in processing options all cause startup to fail.
+- **When a wildcard rule matches a certain extension**, the engine must pass two usability checks (see `parser_routing._engine_is_usable`): (a) the engine's capability table supports that extension; (b) if it is an external engine (`mineru` / `docling`), the corresponding endpoint/token environment variable is configured. If either check fails, the rule is skipped and the next rule is matched. For example, in `*:mineru;html:docling`: MinerU does not support the `html` extension (condition a fails), so `html` continues to match `docling`; if `MINERU_API_MODE=local` but `MINERU_LOCAL_ENDPOINT` is not set, all PDFs also skip `*:mineru` and fall to the next rule (condition b fails). This behavior applies to both `LIGHTRAG_PARSER` rule matching and filename hint engine selection.
+- Filename hints have higher priority than `LIGHTRAG_PARSER`. If the engine specified in a hint does not support that extension, the system falls back to the default rules to continue selecting an available engine.
+- If the filename hint provides a non-empty options string, the hint takes precedence; otherwise the default options of the matching item in `LIGHTRAG_PARSER` are used; if neither is provided, all defaults are used.
+- If no rule is available, the file content extraction falls back to `legacy`; if `legacy` also does not support the file extension, an error entry is added to the system and the uploaded file remains in the `INPUT` directory.
+- At most one of F/R/V/P may appear; repeating the same option has effect only once but does not raise an error.
+- Case-sensitive: the chunking options F/R/V/P must be uppercase; other options i/t/e must be lowercase.
+- If illegal characters appear inside the square brackets, the entire hint is invalidated, the engine follows the default rules, and the options fall back to `LIGHTRAG_PARSER` defaults or all defaults; a warning is also logged.
+- `P` is only effective for structured `LightRAG Document` results extracted by `native`; for the `legacy` path or unstructured output, it automatically degrades to `R` and logs a warning.
+
+## 3. Chunker Parameter Configuration (chunk_options)
+
+### 3.1 Responsibilities of process_options vs chunk_options
+
+`process_options` selects **which** chunking strategy (F/R/V/P), while `chunk_options` decides **which parameters** that chunker uses. The two responsibilities are orthogonal: the former is a single-character selector, the latter is a structured dictionary.
+
+```
+env vars                                                  (read once at startup)
+   │
+   ▼
+addon_params["chunker"]                                   (LightRAG instance field, filled by env with legacy fallback)
+   │
+   ▼  resolve_chunk_options(addon_params, split_by_character=…, split_by_character_only=…)
+   │
+full_docs[doc_id]["chunk_options"]                       (frozen at enqueue time, an independent snapshot per file)
+   │
+   ▼
+chunker(tokenizer, content, chunk_token_size, **strategy_kwargs)   (dispatched by selector during chunking)
+```
+
+- **env vars** are loaded into `addon_params["chunker"]` during the `LightRAG.__init__` stage (strategy-specific env is read by `default_chunker_config()`, then `_apply_chunk_size_overlay` fills in legacy env as a fallback).
+- **`addon_params["chunker"]`** is an `ObservableAddonParams` field; for Server deployments, you only need env / restart for the new values to take effect. To change it at runtime within the Python process (without restarting) and to do per-file overrides, see [Chapter 8: Python SDK Invocation](#8-python-sdk-invocation).
+- **`full_docs.chunk_options`** is frozen at `apipeline_enqueue_documents` enqueue time: by default it is assembled by `resolve_chunk_options(self.addon_params, ...)` on the spot; if the caller passes a `chunk_options` argument, it is persisted as-is (SDK usage, see §8.4).
+- **The chunker invocation** takes the corresponding sub-dictionary from `full_docs.chunk_options` and dispatches to F/R/V/P by the `process_options.chunking` selector.
+
+### 3.2 Environment Variables
+
+All variables in the table below are read into `addon_params["chunker"]` once when `LightRAG` is instantiated: strategy-specific env is read by `default_chunker_config()`, while legacy env (`CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`) is filled in by `_apply_chunk_size_overlay` into slots that neither strategy env nor legacy constructor fields filled. After modifying env, the service must be restarted (or a new `LightRAG` instance created) for it to take effect; documents already enqueued hold the frozen snapshot and are unaffected.
+
+| Variable | Default | Type | Scope |
+|---|---|---|---|
+| `CHUNK_SIZE` | `1200` | int | Legacy top-level `chunk_token_size` fallback; lower priority than strategy-specific env and the SDK path setting of `addon_params["chunker"]["chunk_token_size"]` |
+| `CHUNK_OVERLAP_SIZE` | `100` | int | Legacy overlap fallback; filled when a strategy has neither a specific env (`CHUNK_F_OVERLAP_SIZE` / `CHUNK_R_OVERLAP_SIZE` / `CHUNK_P_OVERLAP_SIZE`) nor the SDK path's `LightRAG(chunk_overlap_token_size=…)` |
+| `CHUNK_F_SIZE` | unset | int | F strategy-specific `chunk_token_size`; higher than the top-level legacy fallback (`CHUNK_SIZE` and the SDK path's `LightRAG(chunk_token_size=…)`). When unset, F inherits the top-level resolved value. |
+| `CHUNK_F_OVERLAP_SIZE` | unset | int | F strategy-specific overlap; higher than the legacy constructor field and `CHUNK_OVERLAP_SIZE` |
+| `CHUNK_F_SPLIT_BY_CHARACTER` | (unset = `null`) | str? | F pre-split separator; `null` / empty string = split by token window only |
+| `CHUNK_F_SPLIT_BY_CHARACTER_ONLY` | `false` | bool | F strict mode: no secondary token split; raise error when oversized |
+| `CHUNK_R_SIZE` | unset | int | R strategy-specific `chunk_token_size`; higher than top-level legacy fallback (`CHUNK_SIZE` and the SDK path's `LightRAG(chunk_token_size=…)`). When unset, R inherits the top-level resolved value. |
+| `CHUNK_R_OVERLAP_SIZE` | unset | int | R strategy-specific overlap; higher than the legacy constructor field and `CHUNK_OVERLAP_SIZE` |
+| `CHUNK_R_SEPARATORS` | `["\n\n","\n","。","!","?",";",","," ",""]` | JSON array string | R separator cascade, ordered from strongest to weakest semantic boundary. The default includes Chinese sentence-ending (`。!?`) and mid-sentence (`;,`) punctuation, letting Chinese / mixed Chinese-English documents split at semantic boundaries. English `.?!` is deliberately excluded (literal matching would mis-split numbers and abbreviations). |
+| `CHUNK_V_SIZE` | unset | int | V strategy-specific `chunk_token_size` (hard cap, automatically re-split through R when exceeded); higher than the top-level legacy fallback. When unset, V inherits the top-level resolved value. |
+| `CHUNK_V_BREAKPOINT_THRESHOLD_TYPE` | `percentile` | str | V threshold type; can be `percentile` / `standard_deviation` / `interquartile` / `gradient` |
+| `CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT` | (unset = `null`) | float? | V threshold magnitude; `null` lets LangChain pick the default by type (e.g., percentile=95) |
+| `CHUNK_V_BUFFER_SIZE` | `1` | int | V sentence buffer window; the number of adjacent sentences to merge during distance computation |
+| `CHUNK_V_SENTENCE_SPLIT_REGEX` | `(?<=[.?!])\s+\|(?<=[。?!])` | str | V's sentence splitting regex, fed to LangChain's `SemanticChunker`. The default recognizes both English `.?!` (requiring trailing whitespace to avoid mis-splitting `0.95`) and Chinese `。?!` (no whitespace required, fitting Chinese continuous writing). The env value is the raw regex string; no JSON quoting needed. |
+| `CHUNK_P_SIZE` | `2000` (`DEFAULT_CHUNK_P_SIZE`) | int | P strategy-specific `chunk_token_size`. Unlike R/V, P does NOT inherit the top-level `CHUNK_SIZE` / `LightRAG(chunk_token_size=…)` when unset — paragraph-semantic merging needs more headroom than the global default to keep related paragraphs together, so the slot always carries `DEFAULT_CHUNK_P_SIZE` (2000) instead. |
+| `CHUNK_P_OVERLAP_SIZE` | unset | int | P strategy-specific overlap; higher than the legacy constructor field and `CHUNK_OVERLAP_SIZE`. Used for text overlap when long body text within the same JSONL content line falls back to R, and as the per-side budget for bridging text copied into the adjacent large-table chunks. |
+
+P's internal ratio constants are algorithmic scales and are automatically derived in proportion to `chunk_token_size`. P always uses an independent `chunk_token_size` decoupled from the global chain — even when `CHUNK_P_SIZE` is unset, P falls back to `DEFAULT_CHUNK_P_SIZE` (2000) rather than the global `CHUNK_SIZE`, because paragraph-semantic merging needs more headroom than the global default to keep related paragraphs together. Use `CHUNK_P_SIZE` to override that default per deployment. `CHUNK_P_OVERLAP_SIZE` only affects P's internal plain-text fallback and table bridging context; it does not let table row-level slices overlap each other. `CHUNK_F_SIZE` / `CHUNK_R_SIZE` / `CHUNK_V_SIZE` work differently — when unset they DO fall back to the top-level `chunk_token_size` (F is the default global window, R prefers a smaller target to better split sentences, while V — as an advisory ceiling — typically wants to be enlarged to reduce over-splitting).
+
+### 3.3 Priority Chain
+
+The final value of each chunking slot is resolved by a specificity-ordered chain (high → low):
+
+1. **`addon_params["chunker"]` explicit value** — field values explicitly written at construction time or set at runtime via the SDK path (see §8.3). Server-only deployments usually don't hit this tier. Most direct; wins everything.
+2. **Strategy-specific env** — `CHUNK_F_SIZE` / `CHUNK_R_SIZE` / `CHUNK_V_SIZE` (per-strategy `chunk_token_size`), `CHUNK_F_OVERLAP_SIZE` / `CHUNK_R_OVERLAP_SIZE` / `CHUNK_P_OVERLAP_SIZE` (overlap), `CHUNK_P_SIZE` (P-specific). When the corresponding size env is unset, F/R/V inherit the top-level `chunk_token_size`. Filled only when the slot is not already occupied by ①.
+3. **Legacy constructor fields** — `LightRAG(chunk_token_size=…, chunk_overlap_token_size=…)`; only effective on the SDK path, see §8.2. Strategy-agnostic, "coarse-grained default", fills only the slots still empty.
+4. **Legacy env** — `CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`. Final fallback.
+
+Example: `CHUNK_R_OVERLAP_SIZE=42` + `LightRAG(chunk_overlap_token_size=2)` → R sub-dictionary `chunk_overlap_token_size=42` (strategy env wins), F / P sub-dictionary `chunk_overlap_token_size=2` (no F / P-specific env; the legacy constructor field is filled in).
+
+**Special case for P's `chunk_token_size`**: the P `chunk_token_size` slot does NOT walk the full four-tier chain. When ① is not explicitly provided, it resolves directly via `CHUNK_P_SIZE` env > `DEFAULT_CHUNK_P_SIZE` (2000), **skipping** ③ legacy constructor field `LightRAG(chunk_token_size=…)` and ④ legacy env `CHUNK_SIZE`. See the `CHUNK_P_SIZE` row in §3.2 for the rationale.
+
+Three layers of semantic guarantee:
+
+1. **Reproducibility**: change env, restart — old documents still chunk by the snapshot from the moment they were enqueued; results unchanged.
+2. **Resume consistency**: resume branch B (content already extracted, redo chunking by current `process_options`) also reads `full_docs.chunk_options`, preventing env drift from breaking consistency.
+3. **Per-file personalization**: callers can pass different `chunk_options` for each file (typical usage: a management UI configures separators or V threshold individually for a certain file). These are the input semantics on the SDK path; see §8.4.
+
+### 3.4 Field Structure
+
+`addon_params["chunker"]` (instance field) keeps the sub-dictionaries of all four strategies as the runtime baseline; `full_docs[doc_id]["chunk_options"]` is a **slim snapshot** — at enqueue time, only the strategy sub-dictionary selected by `process_options` is kept (default F), and the parameters of other strategies are discarded, because the processing stage will not read them. When re-parsing, `process_options` and `chunk_options` are rewritten together, avoiding residue of old-strategy parameters.
+
+**`addon_params["chunker"]` full baseline** (modifiable at runtime via SDK, affecting subsequent enqueues):
+
+```jsonc
+{
+  "chunk_token_size": 1200,                                   // common token cap
+  "fixed_token": {                                            // F-specific
+    "chunk_token_size": 1200,                                 // optional; when omitted, inherits the top-level chunk_token_size (seedable via CHUNK_F_SIZE)
+    "chunk_overlap_token_size": 100,
+    "split_by_character": null,
+    "split_by_character_only": false
+  },
+  "recursive_character": {                                    // R-specific
+    "chunk_token_size": 1200,                                 // optional; when omitted, inherits the top-level chunk_token_size
+    "chunk_overlap_token_size": 100,
+    "separators": ["\n\n", "\n", "。", "!", "?", ";", ",", " ", ""]   // default cascade includes Chinese punctuation
+  },
+  "semantic_vector": {                                        // V-specific
+    "chunk_token_size": 1200,                                 // optional hard cap; re-split through R when exceeded
+    "breakpoint_threshold_type": "percentile",                // percentile | standard_deviation | interquartile | gradient
+    "breakpoint_threshold_amount": null,                      // null = LangChain default
+    "buffer_size": 1,
+    "sentence_split_regex": "(?<=[.?!])\\s+|(?<=[。?!])"      // default regex handles both English and Chinese sentence-ending punctuation
+  },
+  "paragraph_semantic": {                                     // P-specific
+    "chunk_token_size": 2000,                                 // when omitted, resolves from CHUNK_P_SIZE or DEFAULT_CHUNK_P_SIZE (2000);
+                                                              // does NOT inherit the common chunk_token_size
+    "chunk_overlap_token_size": 100                           // when omitted, inherits the legacy overlap resolution chain
+  }
+}
+```
+
+**`full_docs[doc_id]["chunk_options"]` slim snapshot** (projected by selector; example below is for `process_options="R"`):
+
+```jsonc
+{
+  "chunk_token_size": 1200,                                   // common token cap (kept as a top-level fallback)
+  "recursive_character": {                                    // the only retained strategy sub-dictionary
+    "chunk_overlap_token_size": 100,
+    "separators": ["\n\n", "\n", "。", "!", "?", ";", ",", " ", ""]
+  }
+}
+```
+
+selector → sub-dictionary mapping: F → `fixed_token`, R → `recursive_character`, V → `semantic_vector`, P → `paragraph_semantic`; without a selector, F is the default. Each sub-dictionary corresponds one-to-one with the keyword-only parameters of the corresponding chunker function; when adding new parameters, no dispatcher change is needed, just add a kwarg to the chunker function.
+
+### 3.5 Backward Compatibility for Missing Fields
+
+Old documents at enqueue time don't yet have the `chunk_options` field; during chunking, the dispatcher calls `resolve_chunk_options(self.addon_params, process_options=…)` per the current `process_options` to fall back to a slim snapshot. After upgrading, it is recommended to run a reprocess once to give old documents a slim `chunk_options` snapshot (aligned with the current `process_options`).
+
+## 4. Storage and Directory Layout
+
+### 4.1 `full_docs` Fields
+
+File enqueue and extraction results are written into `full_docs`:
+
+| Field | Description |
+| --- | --- |
+| `file_path` | Basename of the filename (without directory), **preserves the original name provided by the user (including the square-bracket hint)**, e.g., `abc.[native-iet].docx` is written as-is. When no valid source is provided, it is saved as `unknown_source`. The filename hint is not stripped, so the management UI can directly show the user's original naming intent. |
+| `canonical_basename` | The canonicalized basename with the processing hint stripped (e.g., `abc.docx`). Filename deduplication uses this field as the index key, ensuring `abc.docx` and `abc.[native-iet].docx` are treated as the same logical document. |
+| `source_path` | The original path provided at enqueue time (written only when it contains a directory separator or is an absolute path), used by the `native` / `mineru` / `docling` parsers to locate the actual file. |
+| `parse_format` | Content format: `pending_parse`, `raw`, `lightrag`. |
+| `content` | When `raw`, holds the extracted text; when `pending_parse`, it is an empty string; when `lightrag`, holds the **complete merged text** starting with `{{LRdoc}}` (concatenated body segments of all `type=="content"` lines in `.blocks.jsonl`). During chunking, `parse_native` strips the prefix and hands it to the chunking_func, going through exactly the same code path as `raw`. |
+| `content_hash` | MD5 of the content, used for cross-filename deduplication. For `parse_format=raw`, takes the hash of text after `sanitize_text_for_encoding`; for `parse_format=lightrag`, takes the hash of the `*.blocks.jsonl` file; for `parse_format=pending_parse`, not written, filled in after extraction completes. |
+| `lightrag_document_path` | When `parse_format=lightrag`, saves the path to the structured LightRAG Document; new records prefer to save the path relative to `INPUT_DIR`, e.g., `__parsed__/report.docx.parsed/report.blocks.jsonl`. Note that the subdirectories and the blocks filename in the path both use the canonicalized basename (without hint). |
+| `parse_engine` | The engine that actually completed extraction: `legacy`, `native`, `mineru`, `docling`. For files awaiting extraction, can also temporarily store the target engine. |
+| `process_options` | The original processing options string recorded at enqueue time (without engine name and the separator `-`), e.g., `"iet"`, `"R!"`, `""`. Downstream stages take this field as the authoritative source for deciding whether to enable image / table / equation analysis (`i/t/e`), whether to disable knowledge graph construction (`!`), and the chunking method (`F/R/V/P`). An empty string is equivalent to all defaults. |
+| `chunk_options` | The **frozen** snapshot of chunker parameters at enqueue time (slim dictionary: only the strategy sub-dictionary selected by `process_options` is retained, others discarded). Passed in by the SDK-path caller or assembled by `resolve_chunk_options(self.addon_params, process_options=…)` from instance fields (containing env defaults) as a fallback (see §3.1). `process_options` chooses which chunking strategy (F/R/V/P); `chunk_options` decides which parameters that chunker uses. The downstream `process_single_document` reads strategy-specific kwargs from this field before chunking; persistence guarantees that old documents behave reproducibly across env changes, resumes, and restarts. Rewritten together with `process_options` when re-parsing. |
+
+`pending_parse` indicates the file has been enqueued but extraction is not yet complete. After successful extraction, it is rewritten to `raw` or `lightrag`, and `content_hash` is filled in. On extraction failure, `pending_parse` and the empty `content` are kept, making subsequent troubleshooting and retry easier.
+
+> The original `file_path` (with hint), `canonical_basename`, and `content_hash` are also synchronized into `doc_status`, serving as the deduplication index sources for `get_doc_by_file_basename` / `get_doc_by_content_hash`. `get_doc_by_file_basename` internally canonicalizes the input through `canonicalize_parser_hinted_basename` before comparing against `canonical_basename`, so `abc.docx` and `abc.[native-iet].docx` always hit the same document.
+> `process_options` is also mirrored into `doc_status.metadata["process_options"]`, making it convenient for the management UI to directly display the current file's processing policy.
+
+### 4.2 `__parsed__` Directory Structure
+
+`__parsed__` is the archival and analysis-result directory next to the input directory. It both stores already-processed original documents and the `LightRAG Document` (lightrag format) files and image assets produced by structured parsing.
+
+- Original file archival: after `legacy` local extraction succeeds and enqueueing finishes, the original file is moved into the sibling `__parsed__` directory; `native` / `mineru` / `docling` keep the original file first for the pipeline to parse, and only move it to `__parsed__` after successful parsing and writing to `full_docs`. **When archived, the original filename (including `[hint]`) is preserved**, e.g., `report.[native-iet].docx` is archived as `__parsed__/report.[native-iet].docx`, making it easy to trace the user's original name and processing options.
+- Analysis result directory: structured parsing results are written into a subdirectory named with the **canonicalized filename** (with `[hint]` removed) plus the `.parsed` suffix, avoiding name conflicts with the archived original file and ensuring that the same logical document continues to point to the same directory when the filename hint or processing options change. For example, the analysis results of `report.docx`, `report.[native].docx`, and `report.[native-iet].docx` are all written into `__parsed__/report.docx.parsed/`.
+- Analysis result files: the LightRAG Document blocks file and sidecars are named with the canonicalized filename stem, e.g., `__parsed__/report.docx.parsed/report.blocks.jsonl`; the same directory may also contain `report.tables.json`, `report.drawings.json`, `report.equations.json`, and the `report.blocks.assets/` image asset directory. **Whether a sidecar is generated is determined by the document content**: the parser only writes the corresponding file when the document actually contains tables / images / equations. This is the only signal of modality availability — the engine does not need to declare capabilities in meta. The `i`/`t`/`e` options only determine whether the next stage invokes the VLM for summarization analysis on already-existing sidecars.
+- When parsing fails, the original file is not moved, making it easy to fix the configuration and re-process.
+- When `/documents/scan` encounters a file with the same name that is already `PROCESSED`, the input file is treated as already processed and moved to `__parsed__`, not enqueued as a new document.
+- When `/documents/scan` finds multiple files that share the same canonicalized name in the same scan, it prefers the file with a supported engine hint to respect the user's engine selection; if no variant has a hint, it processes the first file in sorted order. Other variants emit warnings and are moved to `__parsed__`, avoiding files in the same batch overwriting each other. For example, if both `abc.docx` and `abc.[native].docx` exist, only `abc.[native].docx` is processed.
+- When duplicate content hashes are found during scanning or parsing, the input file is likewise moved to `__parsed__`; this `doc_status` entry is kept as `FAILED duplicate` for tracking.
+- File moves only act on the current input file and do not overwrite or move existing document source files. If a file with the same name already exists at the destination, the system automatically appends `_001`, `_002`, etc., e.g., `report.pdf` is archived as `report_001.pdf`, `report_002.pdf`. If the analysis result directory name is already taken by a regular file, a number is also appended, e.g., `report.docx.parsed_001/`.
+
+### 4.3 MinerU Raw Artifacts Directory `<base>.mineru_raw/`
+
+The `mineru` engine writes the complete artifacts returned by the MinerU service (`content_list.json` + optional `full.md` / `middle.json` / `layout.pdf` / `images/`, etc.) into the `__parsed__/<canonical filename>.mineru_raw/` directory during parsing, and writes `_manifest.json` as the integrity validation file.
+
+Design goals:
+
+- **Avoid duplicate uploads**. When parsing the same file again, the source file's content hash + size is first validated against `_manifest.json`; on hit, the MinerU service call is skipped and the local `content_list.json` is fed directly through adapter → SidecarWriter.
+- **Preserve diagnostic information**. When MinerU parses incorrectly or downstream sidecar fields are abnormal, you can go straight to `*.mineru_raw/` to compare the original content_list and image assets.
+- **Support object traceability**. The `drawings.json` / `tables.json` / `equations.json` generated by MinerU save `content_list.json#/N` in `self_ref`, used for looking up the corresponding MinerU original object and its `page_idx` / `bbox`, etc.
+- **De-hint uploaded filenames**. When the source filename contains processing hints like `[mineru-...]` / `[-iet]`, the MinerU API is called with the canonicalized filename (hint removed), to avoid hint-bearing filenames inside the raw bundle returned by MinerU.
+
+Lifecycle:
+
+| Operation | Behavior |
+|---|---|
+| First parse | Download all artifacts → atomically write `_manifest.json`. |
+| Re-parse (cache hit) | Do not call the MinerU service; do not rewrite artifacts; rerun adapter+Writer to regenerate sidecar (for adapter upgrade scenarios). |
+| Re-parse (cache miss) | Clear all files in the directory, then re-download and write manifest. |
+| `DELETE /documents` with `delete_file=True` | `*.parsed/`, `*.mineru_raw/`, and the original file are all deleted together. |
+| `DELETE /documents` with `delete_file=False` | All artifacts are preserved; only doc_status and KG data are deleted. |
+| `clear_documents` / a full sweep of `__parsed__` | Naturally cleared together. |
+| scan cycle | Does not actively GC orphan `*.mineru_raw/` (only cleared on explicit deletion by the user, to avoid accidentally removing the debug site). |
+
+Force re-parse (bypass cache): set `LIGHTRAG_FORCE_REPARSE_MINERU=true`.
+
+Concurrency safety: LightRAG mandates `canonical_basename` uniqueness within the same workspace (HTTP 409 on upload/enqueue), and combined with the pipeline's serialization per document, `*.mineru_raw/` has no concurrent write conflicts and needs no extra locks.
+
+`_manifest.json` invalidation conditions (any triggers a cache miss):
+
+- Source file size or sha256 does not match manifest;
+- `MINERU_ENGINE_VERSION` environment variable and the `engine_version` recorded in manifest are both non-empty but inconsistent;
+- Current `MINERU_API_MODE` and the `api_mode` recorded in manifest are both non-empty but inconsistent;
+- Endpoint for the current mode (`MINERU_OFFICIAL_ENDPOINT` / `MINERU_LOCAL_ENDPOINT`) and the `endpoint_signature` recorded in manifest are both non-empty but inconsistent;
+- `content_list.json` size or sha256 does not match manifest;
+- Size of any recorded non-critical file (images, `middle.json`, etc.) does not match manifest.
+
+> About the "either side empty → skip" semantics of `engine_version` / `endpoint_signature`: when the field was empty at manifest-write time (e.g., `MINERU_ENGINE_VERSION` was not configured at first parse), or when the current environment variable is not set, the check is skipped for that item. If the version env was not set at first parse, setting it later does not automatically invalidate the historical cache — this scenario requires manually setting `LIGHTRAG_FORCE_REPARSE_MINERU=true` to trigger re-parsing.
+
+### 4.4 Docling Raw Artifacts Directory `<base>.docling_raw/`
+
+The `docling` engine extracts the zip artifact returned by docling-serve (DoclingDocument JSON, Markdown, and referenced images) into the `__parsed__/<canonical filename>.docling_raw/` directory during parsing, and writes `_manifest.json` as the integrity validation file. On a subsequent parse, the IR builder reads the `.json` file in that directory and feeds it to `DoclingIRBuilder`, no longer calling docling-serve.
+
+Directory layout:
+
+```text
+__parsed__/<base>.docling_raw/
+├── _manifest.json
+├── <base>.json        # DoclingDocument JSON (contains pages[].image base64)
+├── <base>.md          # Markdown form, for human inspection
+└── artifacts/
+    └── image_*.png    # image assets referenced by pictures[*].image.uri
+```
+
+Design goals:
+
+- **Avoid duplicate uploads/conversions**. When parsing the same file again, the source file's hash + size is first validated against `_manifest.json`; on hit, the upload / poll / download against docling-serve is skipped, and the local `.json` is fed directly through DoclingIRBuilder → SidecarWriter.
+- **Preserve diagnostic information**. When docling-serve parses incorrectly or downstream sidecar fields are abnormal, you can go straight to `*.docling_raw/` to compare the original DoclingDocument JSON, Markdown, and `artifacts/` images.
+
+Lifecycle:
+
+| Operation | Behavior |
+|---|---|
+| First parse | `POST /v1/convert/file/async` upload → long-poll `/v1/status/poll/{task_id}?wait=N` → `GET /v1/result/{task_id}` download zip → safe extraction (rejecting absolute paths and `..`) → atomically write `_manifest.json`. |
+| Re-parse (cache hit) | Do not call docling-serve; do not rewrite artifacts; rerun adapter+Writer to regenerate sidecar (for adapter upgrade scenarios). |
+| Re-parse (cache miss) | Clear all files in the directory, then re-upload / download / write manifest. |
+| `DELETE /documents` with `delete_file=True` | `*.parsed/`, `*.docling_raw/`, and the original file are all deleted together. |
+| `DELETE /documents` with `delete_file=False` | All artifacts are preserved; only doc_status and KG data are deleted. |
+| `clear_documents` / a full sweep of `__parsed__` | Naturally cleared together. |
+| scan cycle | Does not actively GC orphan `*.docling_raw/` (only cleared on explicit deletion by the user, to avoid accidentally removing the debug site). |
+
+Force re-parse (bypass cache): set `LIGHTRAG_FORCE_REPARSE_DOCLING=true`.
+
+Concurrency safety: identical to the MinerU path — LightRAG mandates `canonical_basename` uniqueness within the same workspace (HTTP 409 on upload / enqueue), and combined with the pipeline's serialization per document, `*.docling_raw/` has no concurrent write conflicts and needs no extra locks.
+
+`_manifest.json` invalidation conditions (any triggers a cache miss):
+
+- Source file size or sha256 does not match manifest;
+- `DOCLING_ENDPOINT` does not match the `endpoint_signature` recorded in manifest;
+- `DOCLING_ENGINE_VERSION` is set and does not match the `engine_version` recorded in manifest;
+- `options_signature` does not match — any OCR / equation / pipeline field change triggers it, covering:
+  - Tunable env: `DOCLING_DO_OCR` / `DOCLING_FORCE_OCR` / `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` / `DOCLING_OCR_LANG` / `DOCLING_DO_FORMULA_ENRICHMENT`;
+  - Hard-coded constants: `pipeline` / `target_type` / `to_formats` / `image_export_mode` (written into the signature to prevent old bundles from being mistakenly reused if these values change in the future);
+- Main JSON missing, size, or sha256 does not match;
+- Any image in `artifacts/` missing or size mismatch;
+- `LIGHTRAG_FORCE_REPARSE_DOCLING=true`.
+
+> The "either side empty → skip" semantics of `engine_version` / `endpoint_signature` is the same as MinerU §4.3: when the field was empty at manifest-write time (first parse without `DOCLING_ENGINE_VERSION` configured) or when the current environment variable is not set, the check is skipped for that item; adding the version number later does not automatically invalidate the historical cache; `LIGHTRAG_FORCE_REPARSE_DOCLING=true` is needed to trigger.
+
+## 5. Document Duplicate Detection Rules
+
+File upload, file-parse enqueue, and the text APIs check duplicates against two gates: "filename + content hash". Hitting either is considered a duplicate, and a `FAILED` record is written without overwriting the existing `full_docs`. `/documents/scan` directory scanning uses the same set of indexes, but in order to facilitate automatic retry of unfinished files, it has separate archive and re-process rules for duplicate filenames.
+
+### 5.1 Filename (basename) Deduplication
+
+- The granularity of the check is basename, excluding directory path and workspace path. For example, `/data/a.pdf`, `inputs/a.pdf`, and `a.pdf` are all considered the same filename `a.pdf`.
+- Filename deduplication uses `canonical_basename` as the index: the supported-engine processing hint at the end of the filename is stripped before comparison, so `abc.docx`, `abc.[native].docx`, and `abc.[native-iet].docx` are considered the same name. Unsupported hints are not stripped; e.g., `abc.[draft].docx` is still treated by its original filename.
+- For ordinary upload, text APIs, and core enqueue APIs, as long as a file with the same name already exists in `doc_status` — whether that record is currently `PENDING`, `PARSING`, `ANALYZING`, `PROCESSING`, `FAILED`, or `PROCESSED` — the same-name file is considered a duplicate.
+- For `/documents/scan` directory scan:
+  - If multiple files in the same scan share the same canonicalized name, the file with a supported engine hint is processed first; if no variant has a hint, the first file after sorting is processed, and the rest are archived to `__parsed__` and skipped.
+  - If the same-name record is already `PROCESSED`, the file just scanned is treated as already processed; the system emits a warning, moves the input file to the sibling `__parsed__` directory, and skips enqueueing.
+  - If the same-name record is not `PROCESSED`, the scanned file is **not** skipped simply because of the same name, but **also** does not re-extract / overwrite the existing record. The specific path depends on the form of the existing record (consistent with the classification rules listed below in the "Why is scan still the exclusive writer" section):
+    - Same name non-PROCESSED with `full_docs` present → **resume path**: doc_status is preserved as-is, the source file remains in `INPUT/`, and the processing loop picks it up by status query (no re-extract, no overwrite of existing status).
+    - Same name `FAILED` with `full_docs` missing → recognized as an extraction-error stub written by `apipeline_enqueue_error_documents`: scan deletes the stub and **enqueues the current file as a new file**. This is the only sub-branch that re-extracts; the purpose is to make "fix the source file, scan again" automatically take effect.
+- For ordinary upload and core enqueue APIs, a file with the same name — even if its content has changed — must have its old document record deleted before re-upload or re-enqueue; the two automatic recoveries above only apply to the directory-scan path.
+- The text APIs must provide a valid `file_source`, and duplicates are checked by the basename of `file_source`; lacking a valid `file_source` returns 400 directly.
+- When the SDK path calls `insert` / `ainsert` / `apipeline_enqueue_documents` without `file_paths`, that is allowed; related behavior is detailed in §8.4. Such documents without a source have `file_path` saved as `unknown_source`.
+- Empty strings, `no-file-path`, and `unknown_source` are all considered unknown sources; they do not block new source-less text from being enqueued, nor do they deduplicate each other as same-named files.
+
+The storage backend provides basename direct lookup via `get_doc_by_file_basename`, internally comparing against the `canonical_basename` field (the input parameter is first canonicalized through `canonicalize_parser_hinted_basename`). `JsonDocStatusStorage` already implements an in-memory traversal; other backends currently fall back to the default implementation (scanning all states and comparing `canonical_basename`), to be augmented with native indexes in subsequent PRs.
+
+### 5.2 Content Hash Deduplication
+
+- Documents with different filenames but identical extracted content are also considered duplicates. The hash here is the content hash of the final text or LightRAG Document obtained by the configured extraction engine; it is not the hash of the original file bytes.
+- `full_docs` and `doc_status` write or fill in the `content_hash` field according to the content format:
+  - `parse_format=raw`: the MD5 of the text after `sanitize_text_for_encoding`.
+  - `parse_format=lightrag`: the MD5 of the `*.blocks.jsonl` file parsed out of `lightrag_document_path`. Relative paths are resolved against `INPUT_DIR`.
+  - `parse_format=pending_parse`: no hash is written yet; it is filled in by subsequent steps after parsing actually completes (to avoid mistakenly judging by empty content).
+- The `legacy` path deduplicates content hashes after locally extracting text and during enqueue; on hit, this record is written as `FAILED duplicate`, and no new `full_docs`, chunks, or graph data are generated.
+- The `native` / `mineru` / `docling` paths first enqueue with `pending_parse`; after parsing completes and `content_hash` is filled in, if another document already has the same hash, this record is stopped before entering analysis, chunking, entity extraction, and graph writing.
+- Duplicate records are marked as `filename` or `content_hash` in `metadata.duplicate_kind` for diagnosis. Content-hash duplicates also record `metadata.is_duplicate=true`, `metadata.original_doc_id`, and `metadata.original_track_id`; duplicates discovered only after parsing also have the temporarily-written `full_docs` deleted.
+- Related warnings minimize repetitive noise: when scanning discovers a same-name file already `PROCESSED`, a log and pipeline status are written; duplicates at the enqueue stage use the LightRAG layer's `Duplicate document detected (...)` log; content duplicates only discovered after parsing use `Duplicate content skipped after parsing` and write a pipeline status. Scan archiving does not emit the extra `[File Extraction]Duplicate skipped`.
+- The storage backend provides hash direct lookup via `get_doc_by_content_hash`; the naming convention is the same as `get_doc_by_file_basename`.
+
+> Within an enqueue batch (the same `apipeline_enqueue_documents` call), basename and content_hash dedup are also performed; on hit, subsequent entries are written as `FAILED` directly and marked with `existing_status=batch_duplicate`. Basename dedup only applies to valid filenames; `unknown_source`, `no-file-path`, and empty sources only participate in content-hash dedup.
+>
+> **Cross-call concurrent dedup** is also guaranteed by the workspace-level serialization lock (see [§6.7 enqueue serialization lock (preventing concurrent dedup leakage)](#67-enqueue-serialization-lock-preventing-concurrent-dedup-leakage)): two concurrent enqueues of identical content with different filenames will not both leak past the `content_hash` check.
+
+## 6. Pipeline Concurrency and Reentry Constraints
+
+To prevent `scan` / `upload` / `insert` from overwriting `doc_status` / `full_docs` records of an in-flight pipeline, all write entry points coordinate via the `pipeline_status` shared dictionary. The `pipeline_status_lock` per workspace ensures that all transitions in the table below are completed atomically within the lock.
+
+### 6.1 `pipeline_status` Fields
+
+| Field | Semantics |
+| --- | --- |
+| `busy` | Generic pipeline-busy flag. Both the processing loop and destructive jobs (clear/delete) set it. **`busy=True` (processing loop) alone does not block enqueue** — the loop pulls a `doc_status` snapshot per batch and checks `request_pending` between batches for any newly arrived work. |
+| `destructive_busy` | A destructive subset of `busy`: `/documents/clear` or `/documents/{doc_id}` (delete) is dropping storages / removing source files. Both reservation and the enqueue last-line guard reject — a concurrent enqueue would write to storage being torn down, and accepted documents would be silently lost. The processing loop does not set this field. |
+| `scanning` | The `/documents/scan` background task is running (entire lifecycle: classification stage + processing stage). Only the `/scan` endpoint uses it to reject overlapping scans; it does **not** itself block upload/insert. |
+| `scanning_exclusive` | An exclusive subset of `scanning`: True only during scan's **classification phase** — run_scanning_process is reading doc_status to classify (already processed / resume / delete stub / archive) and cannot interleave with concurrent writers. Both reservation and the enqueue last-line guard reject. After classification, the flag is cleared immediately, and concurrent uploads are allowed once scan enters the processing phase. |
+| `pending_enqueues` | The number of upload/insert calls that have passed `_reserve_enqueue_slot` but whose bg task has not completed. Used only by the scan endpoint — to decide whether to take the exclusive lock. The bg task releases the slot in `finally`. |
+| `request_pending` | A signal nudging the running processing loop to scan another round. Enqueue sets it after writing to `doc_status` when `busy=True`; the processing loop checks it after each batch and re-pulls the snapshot. |
+
+### 6.2 Entry Point Behavior
+
+| Entry point | Condition | Behavior |
+| --- | --- | --- |
+| `/documents/upload` / `/documents/text` / `/documents/texts` | `scanning_exclusive=True` or `destructive_busy=True` | Throw HTTP 409; do not write file, do not call enqueue |
+| Same as above | Otherwise (including pure `busy=True`, scan-processing-phase `scanning=True` but `scanning_exclusive=False`) | Within the lock: `pending_enqueues++` reserves a slot → strict name precheck → save file → schedule bg task; the bg task releases the slot in `finally` |
+| `/documents/scan` | `busy=True` or `scanning=True` or `pending_enqueues>0` | Emit a warning and immediately return `scanning_skipped_pipeline_busy`; do not schedule a background task |
+| Same as above | All idle | Within the lock, set `scanning=True` then schedule; the task clears the flag in `finally` upon completion |
+| `/documents/clear` / `/documents/delete_document` | `busy=True` or `scanning=True` or `pending_enqueues>0` | The endpoint synchronously returns `status="busy"` and does not schedule a background task |
+| Same as above | All idle | The endpoint **synchronously** within the lock sets `busy=True` + `destructive_busy=True` (before `delete_document` returns `deletion_started`), and the bg task's finally clears both flags |
+| `apipeline_enqueue_documents` internal (last-line guard) | `scanning_exclusive=True` and `from_scan=False`, or `destructive_busy=True` | Throw `RuntimeError("Cannot enqueue while scan is classifying / clearing or deleting")` |
+| Same as above | Anything else (including pure `busy=True`, scan processing phase) | Enqueue normally; after writing `doc_status`, if `busy=True`, automatically nudge `request_pending=True` |
+
+`from_scan=True` is a bypass for scan's own background-task enqueue: scan already holds the `scanning` flag, so it must be allowed to enqueue the files it has scanned.
+
+### 6.3 Why `busy` no longer blocks enqueue
+
+In the old version, `busy=True` always rejected any new enqueue, on the reasoning that "modifying `doc_status` would interleave with the pipeline worker thread." However, in practice:
+
+1. **Write order guarantees consistency**: `apipeline_enqueue_documents` always upserts `full_docs` first, then upserts `doc_status`. The consistency check at the start of the processing loop only deletes "orphan `doc_status` rows that have no corresponding `full_docs`" — a state that cannot occur with concurrent enqueue.
+2. **Batch-level snapshots**: each processing-loop batch pulls a `get_docs_by_statuses` snapshot once; newly written `PENDING` rows don't disturb the current batch, and the next round re-pulls the snapshot via `request_pending` to see the new work.
+3. **`request_pending` is designed for this**: the old version already had the `request_pending` field — it was designed for "new work arrives while running" — but was gated by busy.
+
+With this mechanism enabled in the new contract, **users can continue to upload new documents during long batch processing**, and the bg task, after writing `doc_status`, will be automatically picked up by the running loop.
+
+### 6.4 Why scan is still the exclusive writer
+
+scan not only enqueues the new files it finds, but also reads `doc_status` to decide what to do with each file:
+
+- Same-name `PROCESSED` row → archive source file, skip enqueue.
+- Same-name non-PROCESSED with `full_docs` present → resume path; the source file **stays in `INPUT/`**, not archived (the pending-parse parser may still need it); the processing loop picks it up by status query.
+- Same-name `FAILED` with `full_docs` missing → recognized as an extraction-error stub previously written by `apipeline_enqueue_error_documents` (consistency check preserves such rows for human review); scan automatically deletes that stub and enqueues the current file as a new file, so that "fix the source file, scan again" takes effect directly.
+
+These "read–decide–write" combinations cannot interleave with other writers; otherwise classification decisions would be based on a stale view. So scan must be exclusive, and the scan endpoint will reject when any of `busy` / `scanning` / `pending_enqueues>0` is present.
+
+### 6.5 Strict name precheck (upload path)
+
+After upload passes the reservation but before saving the file, a two-pass check is required:
+
+1. **INPUT directory scan**: canonicalize the basename to be saved via `canonicalize_parser_hinted_basename`, traverse the INPUT directory for any existing same-canonical variant (with hint / without hint); 409 on hit.
+2. **doc_status check**: call `get_existing_doc_by_file_basename` with the canonicalized basename; 409 on hit.
+
+Both pass → save the file → schedule the bg task → bg task calls `apipeline_enqueue_documents` to write the store + calls `apipeline_process_enqueue_documents` to trigger processing.
+
+> The old version once allowed upload to silently write a FAILED duplicate entry when a same-name record existed; the new rule is fail-fast, leaving no duplicate traces in doc_status. To replace a same-name document, call the `/documents/{doc_id}` delete API first.
+
+### 6.6 Coordination of Multiple Concurrent Reservations
+
+When two uploads arrive simultaneously (scan cannot acquire exclusivity at this time):
+
+1. A `_reserve_enqueue_slot` → `pending_enqueues=1`, write file, schedule bg task A, return success.
+2. B `_reserve_enqueue_slot` → `pending_enqueues=2`, write file, schedule bg task B, return success.
+3. bg task A `apipeline_enqueue_documents` → writes `doc_status` → calls `apipeline_process_enqueue_documents` → sets `busy=True` to process A's document.
+4. bg task B `apipeline_enqueue_documents` → sees `scanning=False`, writes normally; after writing, sees `busy=True`, automatically sets `request_pending=True`.
+5. bg task B calls `apipeline_process_enqueue_documents` → sees `busy=True`, sets `request_pending=True` and returns immediately.
+6. A's processing loop finishes the current batch, sees `request_pending=True`, re-pulls the snapshot, and picks up B's `PENDING` row.
+7. After all is complete: `busy=False`, `pending_enqueues=0`.
+
+No bg task will be falsely rejected due to busy — because enqueue no longer checks busy; the processing loop will not process the same batch repeatedly — because `request_pending` only takes effect between batches and is cleared before each re-pull.
+
+### 6.7 enqueue Serialization Lock (Preventing Concurrent Dedup Leakage)
+
+Inside `apipeline_enqueue_documents`, "read doc_status to dedupe → write `full_docs` / `doc_status`" runs serially under the workspace-level `enqueue_serialize` lock. Reason: now that concurrent enqueue is allowed during the busy/scan-processing phases, two enqueues with identical content but different filenames (typical scenario: a scan-processing-phase enqueue and an upload arriving together) would, without the lock, race as follows —
+
+1. A reads `doc_status` to check `content_hash`: miss.
+2. B reads `doc_status` to check `content_hash`: still miss (A hasn't upserted yet).
+3. A upserts `full_docs` + `doc_status`.
+4. B upserts `full_docs` + `doc_status`.
+
+Result: both `PENDING` rows with the same `content_hash` enter the downstream pipeline, and the row that should have been identified as `duplicate_kind=content_hash` was **not** identified.
+
+With the serialization lock, the second enqueue's dedup read is guaranteed to see the row already upserted by the first, taking the normal "no new unique document" early-return path and writing this run as a `duplicate_kind=content_hash` FAILED row. The lock only covers:
+
+- `filter_keys` (exclude existing by doc_id)
+- Filename / content hash dedup reads
+- Upsert of duplicate FAILED rows
+- `full_docs.upsert` + `doc_status.upsert`
+
+The lock does **not** cover the `request_pending` nudge (outside the lock; only briefly takes `pipeline_status_lock`), and does **not** block the `get_docs_by_statuses` read of the processing loop (which goes through `doc_status`'s own concurrent reads — a KV-level atomic with the enqueue writes, not contending for the same lock). Lock order: `enqueue_serialize → pipeline_status_lock`; no deadlock path.
+
+### 6.8 Pipeline Concurrency Parameters
+
+The locks around `pipeline_status` solve the correctness problem of "who can write"; this section's set of parameters solves the throughput problem of "how many workers run concurrently". The pipeline is divided into 3 stages, each with an independently tunable worker pool:
+
+```
+          ┌─ q_native  ──► [native parser  × N1] ─┐
+PENDING ─►├─ q_mineru  ──► [mineru parser  × N2] ─┼─► q_analyze ─►[analyzer × N4] ─► q_process ─►[processor × N5]
+          └─ q_docling ──► [docling parser × N3] ─┘
+```
+
+At enqueue time, `resolve_stored_document_parser_engine` puts each document into the corresponding parse queue based on its `parser_engine` (from `LIGHTRAG_PARSER` defaults or the filename hint); the three parse queues are **completely non-blocking** with respect to each other — mineru saturation does not slow down docling or native. After parsing, they enter `q_analyze` (multimodal analysis) uniformly, and then enter `q_process` (entity/relation extraction + ingest).
+
+| Environment variable | Default | Effect | Tuning advice |
+| --- | --- | --- | --- |
+| `MAX_PARALLEL_PARSE_NATIVE` | `5` | N1: number of concurrent workers for native parsing (docx / pdf / txt and other pure local processing) | Pure CPU, low memory usage; can be raised to CPU core count |
+| `MAX_PARALLEL_PARSE_MINERU` | `1` | N2: number of concurrent workers for MinerU parsing | MinerU has significant GPU/CPU usage; **the default of serial is most stable**. With local deployment and ample VRAM, you can set 2–3; when going through MinerU's official cloud service, you can raise it appropriately (subject to cloud quotas). |
+| `MAX_PARALLEL_PARSE_DOCLING` | `1` | N3: number of concurrent workers for Docling parsing | Docling is similarly resource-sensitive; **the default of serial is most stable**. With local deployment and ample CPU/GPU, you can set 2–3. |
+| `MAX_PARALLEL_ANALYZE` | `5` | N4: number of concurrent workers for multimodal analysis (VLM image / table description) | Directly consumes the VLM quota. Recommended ≤ VLM service concurrency cap. |
+| `MAX_PARALLEL_INSERT` | `2` | N5: number of concurrent documents at the entity / relation extraction + ingest stage | Recommended `MAX_ASYNC / 3`, in the range 2–10. This stage triggers multiple LLM calls per document; setting it too high will hit LLM rate limits. This value also serves as the `asyncio.Semaphore` for an additional constraint (worker count and semaphore value are the same). |
+| `QUEUE_SIZE_DEFAULT` | `100` | Bounded queue capacity between the parse / analyze stages | Generally no need to tune. For very large batches (thousands or more), can be raised to avoid backpressure at the enqueue side; lower it when memory is tight. |
+| `QUEUE_SIZE_INSERT` | `4` | Queue capacity between the analyze → process stage | The process stage is the slowest and most memory-hungry in the pipeline; the queue is deliberately small to provide backpressure to upstream and prevent memory bloat. |
+
+**Several key points:**
+
+1. **Parsing stage is isolated per engine**, so when mixing native/mineru/docling, you don't have to worry about a slow engine dragging another down.
+2. **mineru / docling default to serial (=1)**: in practice both have high resource usage, and concurrency benefits are unstable (prone to OOM / VRAM contention / failure retry). With multi-GPU or a dedicated parser server, you can raise them manually.
+3. **`MAX_PARALLEL_INSERT` doubles as worker pool size and semaphore cap**: the pipeline creates a `Semaphore(max_parallel_insert)`, and each process worker also takes the semaphore before extraction and ingest. So even if you manually raise the worker count, the actual concurrency cap is still bounded by this value — just tune it directly.
+4. **Queue size and backpressure**: the small default `QUEUE_SIZE_INSERT=4` is intentional — the process stage is slow and memory-hungry; when the queue fills, analyze blocks, and backpressure reaches the parse stage, preventing thousands of parsing results from piling up in memory at once.
+5. **How changes take effect**: all parameters are passed in via `.env` (or environment variables), read once at `LightRAG` construction; restart the service after changing them.
+
+**Typical tuning scenarios:**
+
+- Large batch of PDFs + local MinerU on a single GPU: `MAX_PARALLEL_PARSE_MINERU=1`, `MAX_PARALLEL_ANALYZE=5`, `MAX_PARALLEL_INSERT=2` (defaults are fine).
+- Large batch of PDFs + MinerU cloud service: `MAX_PARALLEL_PARSE_MINERU=3~5` (depending on cloud quota), others at defaults.
+- Pure docx / txt (only native): `MAX_PARALLEL_PARSE_NATIVE=10`; `MAX_PARALLEL_INSERT` derived from `MAX_ASYNC/3`.
+- Heavy LLM rate-limiting: first lower `MAX_PARALLEL_INSERT` (the process stage makes multiple LLM calls per document), then lower `MAX_PARALLEL_ANALYZE` (VLM is a separate quota).
+
+## 7. Pipeline Resume Rules at Startup
+
+Each time `apipeline_process_enqueue_documents` starts up, it pulls all documents in `PARSING` / `ANALYZING` / `PROCESSING` / `PENDING` / `FAILED` to continue processing. The resume path **branches by "whether content has been extracted"**, ensuring that any document, regardless of its previous progress, has an idempotent result when resumed under the current `process_options`.
+
+The resume rule only applies to documents whose `doc_id` already exists in `doc_status`. New files joining the queue require the file dedup logic in "Concurrency and Reentry Constraints", to avoid new files squeezing out the records of files whose content has already been successfully extracted.
+
+### 7.1 Determining "Content Has Been Extracted"
+
+Read `full_docs[doc_id]`:
+
+| `parse_format` | Verdict |
+| --- | --- |
+| `lightrag` and `lightrag_document_path` file exists | ✅ extracted |
+| `raw` and `content` is non-empty | ✅ extracted |
+| Other (including `pending_parse`, missing record) | ❌ not extracted |
+
+### 7.2 Branch A: Not Extracted
+
+Go through the full pipeline (`parse_native` / `parse_mineru` / `parse_docling` → `analyze_multimodal` → chunking → entity extraction), with each stage's behavior determined by `full_docs.process_options`. This is the normal flow of a "first-time enqueue".
+
+### 7.3 Branch B: Already Extracted
+
+**Always skip parsing** (do not call `parse_*` again), restart from the ANALYZING stage, clear old chunks / entities, and redo per the current `process_options`:
+
+| Sub-step | Behavior |
+| --- | --- |
+| Engine comparison | If the engine implied by `process_options` ≠ `full_docs.parse_engine`, **only warn**, do not re-parse. The extracted content is an immutable fact; re-running a different engine would produce inconsistency. To switch engines, delete the whole document and re-upload it. |
+| Old chunks / entities / relations cleanup | Read `status_doc.chunks_list` to collect old chunk id set, call `_purge_doc_chunks_and_kg(doc_id, chunk_ids)`: delete chunk rows from `chunks_vdb` / `text_chunks`; reverse-lookup affected entities / relations by `entity_chunks` / `relation_chunks`, directly remove entries that have lost all sources from the graph and vector store, and call `rebuild_knowledge_from_chunks` to rebuild with the remaining chunks for entries still contributed by other documents; finally delete the index rows of this doc in `full_entities` / `full_relations`. After purge completes, `status_doc.chunks_list = []` / `chunks_count = 0` are reset to avoid the subsequent state-machine upsert writing back old IDs. |
+| `analyze_multimodal` | For enabled modalities, every run recomputes the sidecar item analysis and overwrites the existing `llm_analyze_result`. The LLM analysis cache still applies: a cache hit reuses the previous provider response, so semantic fields usually stay the same and only runtime fields such as `analyze_time` are rewritten. Cache misses, for example after changing the model or prompt, can produce different saved content. |
+| Re-chunk | Pick the strategy by the new `process_options.chunking`, with parameters read from `full_docs.chunk_options` (the enqueue snapshot; not overwritten by resume; env changes do not affect old documents that still chunk by the parameters from the moment of enqueue). The LightRAG Document path uses paragraph_semantic when `process_options=P`, otherwise dispatches to F/R/V by selector. |
+| Entity extraction / KG-skip | Determined by the new `process_options.skip_kg` |
+
+> This rule guarantees: when users change `i/t/e` and re-upload the same-named document (delete the old doc first, then upload the file with the new hint), multimodal analysis is incrementally filled in; when changing `F/R/V/P`, chunks and graph are rebuilt; when changing `!`, KG construction is stopped or restored. Engine changes are considered a "major change", uniformly handled by delete + re-upload, not implicitly happening on the resume path.
+
+## 8. Python SDK Invocation
+
+This chapter targets developers who **directly import the `LightRAG` class** for integration, covering runtime APIs, constructor parameters, and removed legacy interfaces that Server deployments don't use. Server users usually don't need to read this chapter.
+
+### 8.1 Audience
+
+```python
+from lightrag import LightRAG
+rag = LightRAG(working_dir="./rag_storage", ...)
+await rag.initialize_storages()
+await rag.ainsert("text", file_paths="doc.pdf")
+```
+
+The following behaviors of this invocation style differ from the Server path: you can change `addon_params["chunker"]` without restarting the process, you can pass per-file `chunk_options` into `apipeline_enqueue_documents`, and you can dynamically override the F strategy's pre-split parameters in an `ainsert` call.
+
+### 8.2 LightRAG Constructor Parameters
+
+`LightRAG(chunk_token_size=…, chunk_overlap_token_size=…)` is **tier 3** in §3.3's priority chain: "legacy constructor field". Strategy-agnostic and coarse-grained default, fills only slots still empty:
+
+- Lower priority than `addon_params["chunker"]` explicit values (§8.3) and strategy-specific env (§3.2).
+- Higher priority than the legacy env `CHUNK_SIZE` / `CHUNK_OVERLAP_SIZE`.
+- The instance fields `self.chunk_token_size` / `self.chunk_overlap_token_size` are always back-filled to `int` after `__post_init__`, so legacy paths still reading these two fields (e.g., the `chunk_opts.get("chunk_token_size") or self.chunk_token_size` fallback in `pipeline.py`) continue to work.
+
+### 8.3 Modifying `addon_params["chunker"]` at Runtime
+
+`addon_params["chunker"]` is an `ObservableAddonParams` field; it can be **modified at runtime**:
+
+```python
+rag.addon_params["chunker"]["recursive_character"]["separators"] = ["##", "\n", " "]
+```
+
+After modification, **subsequent enqueues** get the new defaults; already-enqueued documents keep the snapshot from their enqueue moment (see the three layers of semantic guarantee in §3.3). This is tier 1 of §3.3's priority chain: "`addon_params["chunker"]` explicit value", winning everything.
+
+Server deployments do not have this capability — after changing env, the service must be restarted for it to take effect.
+
+### 8.4 `apipeline_enqueue_documents(chunk_options=…)`
+
+`apipeline_enqueue_documents` accepts an optional `chunk_options` argument. When the caller passes a `dict` / `list[dict]`, it is projected by the current document's `process_options` into a slim snapshot (keeping only the corresponding strategy sub-dictionary + top-level `chunk_token_size`) before being persisted to `full_docs[doc_id]["chunk_options"]`; when not passed, `resolve_chunk_options(self.addon_params, process_options=…)` assembles one on the spot. Callers can safely pass the full dictionary — the other strategies' sub-dictionaries will be discarded by the dispatcher and won't pollute the store.
+
+Typical usage:
+
+```python
+await rag.apipeline_enqueue_documents(
+    input=["text A", "text B"],
+    file_paths=["a.[native-R].txt", "b.txt"],
+    process_options=["R", ""],
+    chunk_options=[
+        {"chunk_token_size": 800, "recursive_character": {"separators": ["\n\n", "\n"]}},
+        {"chunk_token_size": 1500},
+    ],
+)
+```
+
+Typical scenarios for per-file personalization: a management UI configures separators or V threshold individually for a certain file; in the future, upload APIs may also accept overrides in form / hint.
+
+**Compatibility for not passing `file_paths`**: the core APIs `insert` / `ainsert` / `apipeline_enqueue_documents` still support invocations without `file_paths`; the `file_path` of such documents is saved as `unknown_source`, does not participate in filename dedup, and the document ID continues to be generated from text content.
+
+For `apipeline_enqueue_documents`'s own concurrency constraints (last-line guard, `from_scan=True` bypass), see the entry-point behavior table in §6.2.
+
+### 8.5 `ainsert(split_by_character=…, split_by_character_only=…)`
+
+`LightRAG.ainsert(split_by_character=…, split_by_character_only=…)` runtime parameters are overridden into `chunk_options.fixed_token` by `resolve_chunk_options` at enqueue time:
+
+- A non-`None` `split_by_character` overrides the env default;
+- `split_by_character_only=True` overrides (`False` is the signature default, indistinguishable from "not specified", so the env default wins).
+
+Only effective for the F strategy; other strategies' sub-dictionaries are unaffected.
+
+### 8.6 Removed SDK Parameter: `reprocess_existing_non_processed`
+
+The legacy `apipeline_enqueue_documents` behavior of `reprocess_existing_non_processed=True` would directly delete non-PROCESSED old records and rebuild them during scan, which conflicts with the rules in §5 / §6; it has been entirely removed. Replacement paths:
+
+- Automatic resume: scan handles same-named files per the classification rules in §6.4 (archive / resume / delete stub then re-enqueue), uniformly picked up by the resume rules in §7 inside the processing loop.
+- Forced refresh: first call `/documents/{doc_id}` to delete the old document, then upload the same-named new file.

+ 216 - 0
docs/FrontendBuildGuide.md

@@ -0,0 +1,216 @@
+# Frontend Build Guide
+
+## Overview
+
+The LightRAG project includes a React-based WebUI frontend. This guide explains how frontend building works in different scenarios.
+
+## Key Principle
+
+- **Git Repository**: Frontend build results are **NOT** included (kept clean)
+- **PyPI Package**: Frontend build results **ARE** included (ready to use)
+- **Build Tool**: **Bun** is recommended, but **Node.js/npm** is fully supported as a fallback
+
+## Installation Scenarios
+
+### 1. End Users (From PyPI) ✨
+
+**Command:**
+```bash
+pip install lightrag-hku[api]
+```
+
+**What happens:**
+- Frontend is already built and included in the package
+- No additional steps needed
+- Web interface works immediately
+
+---
+
+### 2. Development Mode (Recommended for Contributors) 🔧
+
+**Command:**
+```bash
+# Clone the repository
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+
+# Install in editable mode (no frontend build required yet)
+pip install -e ".[api]"
+
+# Build frontend when needed (can be done anytime)
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+```
+
+**Advantages:**
+- Install first, build later (flexible workflow)
+- Changes take effect immediately (symlink mode)
+- Frontend can be rebuilt anytime without reinstalling
+
+**How it works:**
+- Creates symlinks to source directory
+- Frontend build output goes to `lightrag/api/webui/`
+- Changes are immediately visible in installed package
+
+---
+
+### 3. Normal Installation (Testing Package Build) 📦
+
+**Command:**
+```bash
+# Clone the repository
+git clone https://github.com/HKUDS/LightRAG.git
+cd LightRAG
+
+# ⚠️ MUST build frontend FIRST
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+
+# Now install
+pip install ".[api]"
+```
+
+**What happens:**
+- Frontend files are **copied** to site-packages
+- Post-build modifications won't affect installed package
+- Requires rebuild + reinstall to update
+
+**When to use:**
+- Testing complete installation process
+- Verifying package configuration
+- Simulating PyPI user experience
+
+---
+
+### 4. Creating Distribution Package 🚀
+
+**Command:**
+```bash
+# Build frontend first
+cd lightrag_webui
+bun install --frozen-lockfile --production
+bun run build
+cd ..
+
+# Create distribution packages
+python -m build
+
+# Output: dist/lightrag_hku-*.whl and dist/lightrag_hku-*.tar.gz
+```
+
+**What happens:**
+- `setup.py` checks if frontend is built
+- If missing, installation fails with helpful error message
+- Generated package includes all frontend files
+
+---
+
+## GitHub Actions (Automated Release)
+
+When creating a release on GitHub:
+
+1. **Automatically builds frontend** using Bun
+2. **Verifies** build completed successfully
+3. **Creates Python package** with frontend included
+4. **Publishes to PyPI** using existing trusted publisher setup
+
+**No manual intervention required!**
+
+---
+
+## Quick Reference
+
+| Scenario | Command | Frontend Required | Can Build After |
+|----------|---------|-------------------|-----------------|
+| From PyPI | `pip install lightrag-hku[api]` | Included | No (already installed) |
+| Development | `pip install -e ".[api]"` | No | ✅ Yes (anytime) |
+| Normal Install | `pip install ".[api]"` | ✅ Yes (before) | No (must reinstall) |
+| Create Package | `python -m build` | ✅ Yes (before) | N/A |
+
+---
+
+## Bun Installation
+
+If you don't have Bun installed:
+
+```bash
+# macOS/Linux
+curl -fsSL https://bun.sh/install | bash
+
+# Windows
+powershell -c "irm bun.sh/install.ps1 | iex"
+```
+
+Official documentation: https://bun.sh
+
+---
+
+## File Structure
+
+```
+LightRAG/
+├── lightrag_webui/          # Frontend source code
+│   ├── src/                 # React components
+│   ├── package.json         # Dependencies
+│   └── vite.config.ts       # Build configuration
+│       └── outDir: ../lightrag/api/webui  # Build output
+│
+├── lightrag/
+│   └── api/
+│       └── webui/           # Frontend build output (gitignored)
+│           ├── index.html   # Built files (after running bun run build)
+│           └── assets/      # Built assets
+│
+├── setup.py                 # Build checks
+├── pyproject.toml           # Package configuration
+└── .gitignore               # Excludes lightrag/api/webui/* (except .gitkeep)
+```
+
+---
+
+## Troubleshooting
+
+### Q: I installed in development mode but the web interface doesn't work
+
+**A:** Build the frontend:
+```bash
+cd lightrag_webui && bun run build
+```
+
+### Q: I built the frontend but it's not in my installed package
+
+**A:** You probably used `pip install .` after building. Either:
+- Use `pip install -e ".[api]"` for development
+- Or reinstall: `pip uninstall lightrag-hku && pip install ".[api]"`
+
+### Q: Where are the built frontend files?
+
+**A:** In `lightrag/api/webui/` after running `bun run build`
+
+### Q: Can I use npm or yarn instead of Bun?
+
+**A:** Yes. The build scripts (`dev`, `build`, `preview`, `lint`) are runtime-agnostic and work with both Bun and Node.js/npm:
+```bash
+npm install
+npm run build
+```
+Bun is recommended for speed, but npm is fully supported. Tests (`bun test`) still require Bun.
+
+### Q: Build fails with `Cannot find package '@/lib'`
+
+**A:** This was caused by `vite.config.ts` using a TypeScript path alias (`@/`) that only Bun could resolve at config load time. Update to the latest version where this is fixed with a relative import.
+
+---
+
+## Summary
+
+✅ **PyPI users**: No action needed, frontend included
+✅ **Developers**: Use `pip install -e ".[api]"`, build frontend when needed
+✅ **CI/CD**: Automatic build in GitHub Actions
+✅ **Git**: Frontend build output never committed
+
+For questions or issues, please open a GitHub issue.

+ 303 - 0
docs/InteractiveSetup.md

@@ -0,0 +1,303 @@
+# Interactive Setup Guide
+
+Use the interactive setup wizard when you want LightRAG to guide you through the configuration instead of editing `.env` by hand.
+
+The wizard is exposed through `make` targets:
+
+- `make env-base`
+- `make env-storage`
+- `make env-server`
+- `make env-validate`
+- `make env-security-check`
+- `make env-backup`
+- `make env-base-rewrite`
+- `make env-storage-rewrite`
+
+You do not need to call the underlying shell script directly.
+
+## What This Wizard Is For
+
+The setup wizard helps you configure LightRAG in three parts:
+
+- `env-base` sets up the LLM, embedding model, and optional reranker.
+- `env-storage` adds or changes storage backends such as PostgreSQL, Neo4j, Redis, Milvus, Qdrant, MongoDB, or Memgraph.
+- `env-server` sets server host and port, WebUI labels, authentication, API keys, and SSL.
+
+You can rerun each step later. The wizard loads your existing `.env` and shows current values as defaults, so you only need to change what is different.
+
+## Before You Start
+
+- Run commands from the repository root.
+- The `make env-*` targets automatically choose a compatible Bash 4+ interpreter.
+- Use the documented `make env-*` targets rather than invoking the setup script yourself.
+- `make env-base` is the normal starting point because it creates the initial `.env`.
+- `make env-storage` and `make env-server` require an existing `.env`.
+- If you choose any wizard-managed Docker service, the wizard also prepares LightRAG for the Docker startup path.
+
+## Choose Your Setup Path
+
+Use this quick guide to decide what to run:
+
+- I want the fastest first run with remote model providers: `make env-base`
+- I want embedding or reranking to run locally in Docker: `make env-base`
+- I already configured models and now want databases: `make env-storage`
+- I already configured models and now want auth, API keys, or SSL: `make env-server`
+- I want to check whether my current setup is valid: `make env-validate`
+- I want to audit my current setup before exposing it: `make env-security-check`
+- I want a standalone backup without changing configuration: `make env-backup`
+- I need to repair the generated compose services from the bundled templates: `make env-base-rewrite` or `make env-storage-rewrite`
+
+## Scenario 1: First-Time Local Setup
+
+Use this when you want LightRAG running with the least amount of setup and you already have remote model endpoints or API keys.
+
+**Command**
+
+```bash
+make env-base
+```
+
+**What the wizard asks**
+
+- LLM provider, model, endpoint, and API key
+- Whether the embedding model should run locally via Docker
+- If embedding stays remote: embedding provider, model, dimension, endpoint, and API key
+- Whether reranking should be enabled
+- If reranking is enabled: whether the rerank service should run locally via Docker
+- If reranking stays remote: rerank provider, model, endpoint, and API key
+
+**What gets written**
+
+- `.env`
+- `docker-compose.final.yml` only if you enabled wizard-managed Docker services
+
+**What to do next**
+
+- If you did not enable wizard-managed Docker services:
+
+```bash
+lightrag-server
+```
+
+- If you enabled wizard-managed Docker services:
+
+```bash
+docker compose -f docker-compose.final.yml up -d
+```
+
+## Scenario 2: Local Setup With Docker-Hosted Embedding or Rerank
+
+Use this when you want LightRAG to run local inference services for embedding and/or reranking through Docker.
+
+**Command**
+
+```bash
+make env-base
+```
+
+**Recommended answers**
+
+- Answer `yes` to `Run embedding model locally via Docker (vLLM)?` if you want local embeddings
+- Answer `yes` to `Enable reranking?` and then `yes` to `Run rerank service locally via Docker?` if you want local reranking
+
+**What the wizard asks after you enable local services**
+
+- Embedding model name for local vLLM
+- Rerank model name for local vLLM
+- Remote LLM details if your main LLM is still external
+
+**What gets written**
+
+- `.env`
+- `docker-compose.final.yml` with the selected local services
+
+**What to do next**
+
+```bash
+docker compose -f docker-compose.final.yml up -d
+```
+
+This starts the generated Docker-based LightRAG stack together with the selected local services.
+
+## Scenario 3: Add Storage After The Base Setup
+
+Use this when you already have `.env` from `make env-base` and now want to switch from default local-file storage to database-backed storage.
+
+**Command**
+
+```bash
+make env-storage
+```
+
+**Prerequisite**
+
+- `.env` must already exist
+
+**What the wizard asks**
+
+- KV storage backend
+- Vector storage backend
+- Graph storage backend
+- Doc-status storage backend
+- For each required database, whether it should run locally via Docker
+- For each required database, the needed connection details such as host, URI, port, user, password, database name, or device type
+
+**Important rule**
+
+- `MongoVectorDBStorage` requires Atlas Search / Vector Search support.
+- If you choose the wizard-managed Docker MongoDB service, the wizard now provisions MongoDB Atlas Local, so `MongoVectorDBStorage` can run against the local Docker deployment. The generated host-side `MONGO_URI` uses `?directConnection=true`.
+- If you do not use the wizard-managed Docker MongoDB service, provide an external Atlas-capable MongoDB endpoint for `MONGO_URI`, such as a `mongodb+srv://` Atlas cluster URI or an Atlas Local `mongodb://...?...directConnection=true` URI.
+- For external `mongodb://...?...directConnection=true` URIs, the wizard can only validate the URI format. It cannot determine statically whether the target deployment actually provides Atlas Search / Vector Search support.
+
+**What gets written**
+
+- `.env`
+- `docker-compose.final.yml` if you selected wizard-managed storage services
+
+**What to do next**
+
+- If you selected Docker-managed storage services:
+
+```bash
+docker compose -f docker-compose.final.yml up -d
+```
+
+- If you pointed LightRAG at external databases, make sure those services are reachable before starting LightRAG.
+
+## Scenario 4: Harden A Deployment With Auth And SSL
+
+Use this when you already have `.env` and need to prepare the server for shared or external use.
+
+**Commands**
+
+```bash
+make env-server
+make env-security-check
+```
+
+**Prerequisite**
+
+- `.env` must already exist
+
+**What `env-server` asks**
+
+- Server host and port
+- WebUI title and description
+- Summary language
+- Whether to configure authentication and API key settings
+- Auth accounts, JWT secret, token lifetime, API key, and whitelist paths
+- Whether to enable SSL/TLS
+- SSL certificate file path and SSL key file path
+
+**What gets written**
+
+- `.env`
+- `docker-compose.final.yml` may be updated if your current setup already uses wizard-managed Docker services
+
+**What to do next**
+
+- Run `make env-security-check`
+- If the stack uses Docker, recreate the LightRAG service with your compose file
+- If the stack runs on the host, restart `lightrag-server`
+
+For broader deployment guidance, see [DockerDeployment.md](/Users/ydh/mycode/ai/paper-RAG/docs/DockerDeployment.md).
+
+## Validate, Audit, And Backup
+
+These commands do not walk you through a full setup flow, but they are part of normal operations.
+
+### Validate The Current Configuration
+
+```bash
+make env-validate
+```
+
+Use this when you want to confirm that the current `.env` is internally consistent. It reports problems such as missing required values, malformed auth settings, invalid URIs, invalid ports, or missing SSL files.
+
+### Audit Security Before Exposure
+
+```bash
+make env-security-check
+```
+
+Use this before exposing LightRAG beyond localhost. It reports risky setups such as missing authentication, weak or missing JWT secrets, unsafe whitelist settings, or unresolved sensitive placeholders.
+
+### Create A Standalone Backup
+
+```bash
+make env-backup
+```
+
+Use this when you want a manual backup without running any setup flow.
+
+## Outputs And What They Mean
+
+### `.env`
+
+The wizard writes `.env` in the repository root. This file becomes the current runtime configuration produced by the latest wizard run.
+
+In practice, this means:
+
+- rerunning the wizard updates `.env`
+- existing values are reused as defaults on later runs
+- you should treat `.env` as the active configuration for the workflow you most recently configured
+- before `env-base`, `env-storage`, or `env-server` writes `.env`, the wizard automatically creates a timestamped backup of the existing file when one is present
+
+### `docker-compose.final.yml`
+
+The wizard creates or updates `docker-compose.final.yml` only when you choose wizard-managed Docker services or when an existing wizard-generated compose setup needs to stay aligned with new server settings.
+
+When one of the setup flows is about to replace or remove an existing generated compose file, it automatically creates a timestamped backup first.
+
+For MongoDB-backed storage, the wizard-managed Docker path uses MongoDB Atlas Local rather than MongoDB Community Edition so local Atlas Search / Vector Search workflows are available.
+
+Use this file when starting the generated Docker stack:
+
+```bash
+docker compose -f docker-compose.final.yml up -d
+```
+
+The base `docker-compose.yml` remains the general project compose file. The generated `docker-compose.final.yml` is the wizard-managed output.
+
+## Troubleshooting And Advanced Notes
+
+- If `make env-storage` or `make env-server` says `.env` is missing, run `make env-base` first.
+- You do not need to run `make env-backup` before rerunning `env-base`, `env-storage`, or `env-server`; those flows already back up the existing `.env`, and they also back up the generated compose file before changing it.
+- If you need to fully rebuild wizard-managed compose services from the current bundled templates, use `make env-base-rewrite` or `make env-storage-rewrite`.
+- If you switch between host-oriented and Docker-oriented workflows, rerun the relevant setup step instead of trying to manually merge old settings.
+- If the generated stack includes local Milvus, make sure `MINIO_ACCESS_KEY_ID` and `MINIO_SECRET_ACCESS_KEY` are available before running `docker compose -f docker-compose.final.yml up -d`.
+- For Docker deployment details beyond the interactive wizard, see [DockerDeployment.md](/Users/ydh/mycode/ai/paper-RAG/docs/DockerDeployment.md).
+
+## Typical Command Sequences
+
+### Remote models, local server
+
+```bash
+make env-base
+lightrag-server
+```
+
+### Remote LLM, local embedding and rerank in Docker
+
+```bash
+make env-base
+docker compose -f docker-compose.final.yml up -d
+```
+
+### Add storage after the base setup
+
+```bash
+make env-base
+make env-storage
+docker compose -f docker-compose.final.yml up -d
+```
+
+### Add security and SSL before exposure
+
+```bash
+make env-base
+make env-storage
+make env-server
+make env-security-check
+docker compose -f docker-compose.final.yml up -d
+```

+ 1155 - 0
docs/LightRAG-API-Server-zh.md

@@ -0,0 +1,1155 @@
+# LightRAG 服务器和 WebUI
+
+LightRAG 服务器旨在提供 Web 界面和 API 支持。Web 界面便于文档索引、知识图谱探索和简单的 RAG 查询界面。LightRAG 服务器还提供了与 Ollama 兼容的接口,旨在将 LightRAG 模拟为 Ollama 聊天模型。这使得 AI 聊天机器人(如 Open WebUI)可以轻松访问 LightRAG。
+
+![image-20250323122538997](./LightRAG-API-Server.assets/image-20250323122538997.png)
+
+![image-20250323122754387](./LightRAG-API-Server.assets/image-20250323122754387.png)
+
+![image-20250323123011220](./LightRAG-API-Server.assets/image-20250323123011220.png)
+
+## 从 v1.4.16 升级到 v1.5.0rc2
+
+v1.5.0rc2 引入了新的文件处理流水线、解析器路由、多模态分析、基于角色的 LLM/VLM 配置、JSON 实体抽取以及若干 provider / storage 变更。升级生产实例前,请先阅读 [v1.5.0rc2 发布说明](https://github.com/HKUDS/LightRAG/releases/tag/v1.5.0rc2)。
+
+- 如果希望升级服务器但保持旧版文件处理行为,请设置:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+- `ENTITY_TYPES` 已不再支持。请改用 `ENTITY_TYPE_PROMPT_FILE`,并把 YAML profile 放在 `PROMPT_DIR/entity_type` 下(`PROMPT_DIR` 默认是 `./prompts`)。参考模板位于 `prompts/samples/entity_type_prompt.sample.yml`。
+- 如果使用 OpenSearch 存储且集群版本低于 OpenSearch 3.3.0,请先升级 OpenSearch,再启用 v1.5 存储路径并校验已有索引。新部署建议使用 OpenSearch 3.3.0 或更高版本。
+- 更换 embedding 模型、向量维度、非对称 embedding 行为或 query/document 前缀会改变向量语义。请清空受影响的 LightRAG workspace/向量数据并重新索引源文件。
+- 修改解析器路由(`LIGHTRAG_PARSER`)或文件名 hint 只影响新上传文件。若要把已有文档切换到另一个解析引擎,请先删除该文档再重新上传。
+- 修改 chunker 配置(`CHUNK_*`)会影响服务器重启后入队的文档。若希望旧文档的 `chunk_options` 快照也采用新配置,请重新处理这些文档。
+- 启用多模态选项(`i/t/e`)需要已有解析 sidecar,并设置 `VLM_PROCESS_ENABLE=true`。已有文档可通过重新处理在可用 sidecar 上补跑 VLM 分析;但切换解析引擎仍需要删除并重新上传。
+
+## 入门指南
+
+### 安装
+
+* 从 PyPI 安装
+
+```bash
+### 使用 uv 安装 LightRAG 服务器(作为工具,推荐)
+uv tool install "lightrag-hku[api]"
+
+### 或使用 pip
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install "lightrag-hku[api]"
+```
+
+* 从源代码安装
+
+```bash
+# 克隆仓库
+git clone https://github.com/HKUDS/lightrag.git
+
+# 进入仓库目录
+cd lightrag
+
+# 一键初始化开发环境(推荐)
+make dev
+source .venv/bin/activate  # 激活虚拟环境 (Linux/macOS)
+# Windows 系统: .venv\Scripts\activate
+
+# make dev 会安装测试工具链以及完整的离线依赖栈
+# (API、存储后端与各类 Provider 集成),并构建前端;不会生成 .env。
+# 启动服务前请先运行 make env-base,或手动从 env.example 复制并配置 .env。
+
+# 使用 uv 的等价手动步骤
+# 注意: uv sync 会自动在 .venv/ 目录创建虚拟环境
+uv sync --extra test --extra offline
+source .venv/bin/activate  # 激活虚拟环境 (Linux/macOS)
+# Windows 系统: .venv\Scripts\activate
+
+# 或使用 pip 与虚拟环境
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install -e ".[test,offline]"
+
+# 构建前端代码
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+```
+
+### 启动 LightRAG 服务器前的准备
+
+LightRAG 需要同时集成 LLM(大型语言模型)和嵌入模型以有效执行文档索引和查询操作。在首次部署 LightRAG 服务器之前,必须配置 LLM 和嵌入模型的设置。
+
+LightRAG 支持以下 LLM 后端:
+
+* ollama
+* lollms
+* openai 或 openai 兼容
+* azure_openai
+* bedrock
+* gemini
+
+LightRAG 支持以下 embedding 后端:
+
+* lollms
+* ollama
+* openai 或 openai 兼容
+* azure_openai
+* bedrock
+* jina
+* gemini
+* voyageai
+
+建议使用环境变量来配置 LightRAG 服务器。项目根目录中有一个名为 `env.example` 的示例环境变量文件。请将此文件复制到启动目录并重命名为 `.env`。之后,您可以在 `.env` 文件中修改与 LLM 和嵌入模型相关的参数。需要注意的是,LightRAG 服务器每次启动时都会将 `.env` 中的环境变量加载到系统环境变量中。**LightRAG 服务器会优先使用系统环境变量中的设置**。
+
+> 由于安装了 Python 扩展的 VS Code 可能会在集成终端中自动加载 .env 文件,请在每次修改 .env 文件后打开新的终端会话。
+
+如果需要为实体抽取、关键词抽取、最终回答或多模态分析配置不同的 LLM/VLM,请参考 [基于角色的 LLM/VLM 配置指南](./RoleSpecificLLMConfiguration-zh.md)。
+
+以下是 LLM 和嵌入模型的一些常见设置示例:
+
+* OpenAI LLM + Ollama 嵌入
+
+```
+LLM_BINDING=openai
+LLM_MODEL=gpt-4o
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+# EMBEDDING_BINDING_API_KEY=your_api_key
+```
+
+> 如果改为使用 Google Gemini, 设置 `LLM_BINDING=gemini`, 选择模型 `LLM_MODEL=gemini-flash-latest`, 并设置访问密钥 `LLM_BINDING_API_KEY` (或 `GEMINI_API_KEY`).
+
+* Ollama LLM + Ollama 嵌入
+
+```
+LLM_BINDING=ollama
+LLM_MODEL=mistral-nemo:latest
+LLM_BINDING_HOST=http://localhost:11434
+# LLM_BINDING_API_KEY=your_api_key
+###  Ollama 服务器上下文 token 数(必须大于 MAX_TOTAL_TOKENS+2000)
+OLLAMA_LLM_NUM_CTX=8192
+
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+# EMBEDDING_BINDING_API_KEY=your_api_key
+```
+
+> **重要提示**:在文档索引前必须确定使用的 Embedding 模型和非对称嵌入配置,且在查询阶段必须沿用相同设置。有些存储(例如 PostgreSQL)在首次建立表时需要确定向量维度。更换 Embedding 模型、向量维度、`EMBEDDING_ASYMMETRIC`、query/document 前缀或 provider task 行为后,必须清空现有 LightRAG workspace/向量数据并重新索引源文件。
+
+#### 非对称嵌入配置
+
+LightRAG 默认使用对称嵌入。只有显式设置 `EMBEDDING_ASYMMETRIC=true` 时,才会开启 query/document 非对称嵌入。
+
+- `jina`、`gemini`、`voyageai` 等 provider task 型绑定通过 provider 参数(`task` / `task_type` / `input_type`)区分 query/document,不应配置 query/document 前缀。
+- `openai`、`azure_openai`、`ollama` 等前缀型绑定必须同时配置 `EMBEDDING_QUERY_PREFIX` 和 `EMBEDDING_DOCUMENT_PREFIX`。如果某一侧明确不需要前缀,请使用 `NO_PREFIX`。
+- 任何非对称嵌入配置的有效变更,都需要清空已有数据并重新索引文件。
+
+完整校验规则和示例请参阅 [Asymmetric Embedding Configuration](./AsymmetricEmbedding.md)。
+
+### 使用 Setup 工具创建 .env 文件
+
+除了手动编辑 `env.example` 之外,您还可以使用交互式向导生成配置好的 `.env`,并在需要时生成 `docker-compose.final.yml`:
+
+```bash
+make env-base           # 必跑第一步:配置 LLM、Embedding、Reranker
+make env-storage        # 可选:配置存储后端和数据库服务
+make env-server         # 可选:配置服务端口、鉴权和 SSL
+make env-security-check # 可选:审计当前 .env 中的安全风险
+```
+
+每个目标的详细说明请参阅 [docs/InteractiveSetup.md](./InteractiveSetup.md)。
+这些 setup 向导只负责更新配置;如需在部署前审计当前 `.env` 的安全风险,请额外运行
+`make env-security-check`。
+
+### 启动 LightRAG 服务器
+
+LightRAG 服务器支持两种运行模式:
+* 简单高效的 Uvicorn 模式
+
+```
+lightrag-server
+```
+* 多进程 Gunicorn + Uvicorn 模式(生产模式,不支持 Windows 环境)
+
+```
+lightrag-gunicorn --workers 4
+```
+
+启动LightRAG的时候,当前工作目录必须含有`.env`配置文件。**要求将.env文件置于启动目录中是经过特意设计的**。 这样做的目的是支持用户同时启动多个LightRAG实例,并为不同实例配置不同的.env文件。**修改.env文件后,您需要重新打开终端以使新设置生效**。 这是因为每次启动时,LightRAG Server会将.env文件中的环境变量加载至系统环境变量,且系统环境变量的设置具有更高优先级。
+
+启动时可以通过命令行参数覆盖`.env`文件中的配置。常用的命令行参数包括:
+
+- `--host`:服务器监听地址(默认:0.0.0.0)
+- `--port`:服务器监听端口(默认:9621)
+- `--timeout`:LLM 请求超时时间(默认:150 秒)
+- `--log-level`:日志级别(默认:INFO)
+- `--working-dir`:数据库持久化目录(默认:./rag_storage)
+- `--input-dir`:上传文件存放目录(默认:./inputs)
+- `--workspace`: 工作空间名称,用于逻辑上隔离多个LightRAG实例之间的数据(默认:空)
+- `--api-prefix`:对浏览器暴露的反向代理路径前缀,也可通过 `LIGHTRAG_API_PREFIX` 配置
+- `--rerank-binding`:Rerank provider(`null`、`cohere`、`jina` 或 `aliyun`)
+
+### 路径前缀和多站点 WebUI
+
+当一台主机通过反向代理承载多个 LightRAG 实例,并由代理剥离站点前缀后再转发给后端时,请设置 `LIGHTRAG_API_PREFIX` 或 `--api-prefix`:
+
+```bash
+LIGHTRAG_API_PREFIX=/site01
+lightrag-server --port 9621
+```
+
+后端会把该值作为 FastAPI 的 `root_path`,并把同一个运行时前缀注入 WebUI。WebUI 在服务端内部始终挂载到 `/webui`,因此同一份前端构建产物可以服务任意前缀。完整的 Nginx、Docker 和 Kubernetes 示例请参阅 [Single-Server Multi-Site Deployment](./MultiSiteDeployment.md)。
+
+### 使用 Docker 启动 LightRAG 服务器
+
+使用 Docker Compose 是部署和运行 LightRAG Server 最便捷的方式。
+
+- 创建一个项目目录。
+- 将 LightRAG 仓库中的 `docker-compose.yml` 文件复制到您的项目目录中。
+- 准备 `.env` 文件:复制示例文件 [`env.example`](https://ai.znipower.com:5013/c/env.example) 创建自定义的 `.env` 文件,并根据您的具体需求配置 LLM 和嵌入参数。
+- 通过以下命令启动 LightRAG 服务器:
+
+```shell
+docker compose up
+# 如果希望启动后让程序退到后台运行,需要在命令的最后添加 -d 参数
+```
+
+> 可以通过以下链接获取官方的docker compose文件:[docker-compose.yml]( https://raw.githubusercontent.com/HKUDS/LightRAG/refs/heads/main/docker-compose.yml) 。如需获取LightRAG的历史版本镜像,可以访问以下链接: [LightRAG Docker Images]( https://github.com/HKUDS/LightRAG/pkgs/container/lightrag). 如需获取更多关于docker部署的信息,请参阅 [DockerDeployment.md](./DockerDeployment.md).
+
+### 渐进式配置示例
+
+如果您是 LightRAG 新用户,建议从最小可运行配置开始,确认上一阶段正常后再逐步开启更多能力:
+
+1. 使用托管 LLM 和 Embedding 模型完成最小 Docker 启动
+2. 增加 Reranking 以提升查询质量
+3. 使用 MinerU 官方 API 和视觉模型开启多模态解析
+4. 迁移到 GPU 加速、Docker 托管数据库的准生产部署
+
+完整的 `env.example` 仍然是配置项总参考,并且会被 `make env-*` setup 向导使用。下面的片段只展示每一步最关键的配置。
+
+#### 1. 最小 Docker 启动
+
+如果您只想先把 WebUI 和 API 跑起来,并暂时不引入外部数据库、解析服务或本地模型服务,可以在 `docker-compose.yml` 旁边创建如下最小 `.env`:
+
+```bash
+###########################
+### Server Configuration
+###########################
+PORT=9621
+WEBUI_TITLE='My First LightRAG KB'
+WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
+OLLAMA_EMULATING_MODEL_TAG=latest
+
+########################################
+### Document processing configuration
+########################################
+SUMMARY_LANGUAGE=English
+ENTITY_EXTRACTION_USE_JSON=true
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+VLM_PROCESS_ENABLE=false
+
+###########################################################################
+### LLM Configuration
+###########################################################################
+LLM_BINDING=openai
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+LLM_MODEL=gpt-5-mini
+
+KEYWORD_LLM_MODEL=gpt-5-nano
+QUERY_LLM_MODEL=gpt-5
+
+#######################################################################################
+### Embedding Configuration (do not change after the first file is processed)
+#######################################################################################
+EMBEDDING_BINDING=openai
+EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING_API_KEY=your_api_key
+EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_DIM=3072
+EMBEDDING_TOKEN_LIMIT=8192
+EMBEDDING_SEND_DIM=false
+EMBEDDING_USE_BASE64=true
+
+############################
+### Data storage selection
+############################
+LIGHTRAG_KV_STORAGE=JsonKVStorage
+LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+```
+
+如有需要,请将模型 ID 替换为您自己的 provider 账号可用的模型。上传文档前,先启动并验证服务:
+
+```bash
+docker compose up -d
+curl http://localhost:9621/health
+```
+
+然后打开 WebUI:`http://localhost:9621/webui`,上传一个小型文本或 DOCX 文件,等待索引完成后使用 `hybrid` 或 `mix` 模式查询。
+
+#### 2. 增加 Reranking
+
+Reranking 是查询阶段能力。启用、关闭或更换 reranker 通常不需要重新索引已有文档。
+
+使用 Cohere 官方托管 rerank 服务:
+
+```bash
+RERANK_BINDING=cohere
+RERANK_MODEL=rerank-v3.5
+RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
+RERANK_BINDING_API_KEY=your_cohere_api_key
+```
+
+使用本地 vLLM 部署、并暴露 Cohere-compatible API 的 reranker:
+
+```bash
+RERANK_BINDING=cohere
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+如果 LightRAG 自身运行在 Docker 容器中,而 reranker 运行在宿主机,请使用 `host.docker.internal` 等容器可访问地址,不要直接使用 `localhost`。如果 reranker 由 setup 向导生成,向导会自动把 Compose 内部服务地址注入到 `docker-compose.final.yml`。
+
+#### 3. 使用 MinerU 官方 API 开启多模态解析
+
+建议在基础文档流程已经正常后再开启该能力。使用 MinerU 官方 API 可以避免本地部署解析服务,但必须在 LightRAG 服务器启动前配置 `MINERU_API_TOKEN`。VLM 角色也必须使用支持图片输入的 provider/model。
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=gpt-5-mini
+
+MINERU_API_MODE=official
+MINERU_API_TOKEN=your_mineru_api_token
+MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+MINERU_MODEL_VERSION=vlm
+MINERU_IS_OCR=false
+```
+
+该路由会优先对支持的 DOCX 文件使用内置 `native` 解析器,对 PDF、图片等其他 MinerU 支持的文件使用 MinerU,最后回退到 `legacy`。`i`、`t`、`e` 选项会在解析器产出对应 sidecar 时,对图片、表格和公式运行 VLM 分析。
+
+使用 official 模式时,Docker 不需要访问宿主机上的 MinerU 回环地址;容器只需要能够访问 `MINERU_OFFICIAL_ENDPOINT`。
+
+#### 4. GPU All-In-One 风格部署
+
+对于本地 GPU 加速部署,建议使用 setup 向导生成 `.env` 和 `docker-compose.final.yml`,不要手写每个服务块:
+
+```bash
+make env-base
+```
+
+推荐选择:
+
+- 主 LLM 使用托管 provider 或 OpenAI-compatible provider。
+- 对 `Run embedding model locally via Docker (vLLM)?` 回答 `yes`。
+- Embedding device 选择 `cuda`。
+- 启用 reranking,对 `Run rerank service locally via Docker?` 回答 `yes`,rerank device 选择 `cuda`。
+
+然后配置存储:
+
+```bash
+make env-storage
+```
+
+推荐存储选择:
+
+- `LIGHTRAG_KV_STORAGE=PGKVStorage`
+- `LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage`
+- `LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage`
+- `LIGHTRAG_GRAPH_STORAGE=MemgraphStorage`
+- PostgreSQL、Milvus 和 Memgraph 均选择本地 Docker 运行。
+- 如果主机具备 NVIDIA GPU 支持且已安装 NVIDIA Container Toolkit,Milvus device 可选择 `cuda`。
+
+最后配置服务端对外设置并验证:
+
+```bash
+make env-server
+make env-validate
+make env-security-check
+docker compose -f docker-compose.final.yml up -d
+```
+
+对外暴露前,请在 `make env-server` 中配置认证、API key 和 SSL。生成的 `.env` 会保持宿主机可用;容器专用服务名和 Docker 专用覆盖项会写入 `docker-compose.final.yml`。
+
+处理生产数据前请注意:
+
+- 首次上传前确定 Embedding 模型、向量维度和非对称嵌入设置。之后修改这些配置需要清空对应 workspace/向量数据并重新索引文档。
+- 首次上传前确定存储后端。当前不支持在不同存储实现之间直接迁移。
+- 修改 `LIGHTRAG_PARSER` 只影响新上传文件。如需让已有文档使用新的解析路由,请删除后重新上传。
+
+### Nginx 反向代理配置
+
+在 LightRAG 服务器前使用 Nginx 作为反向代理时,需要为 `/documents/upload` 端点配置 `client_max_body_size` 以处理大文件上传。如果不进行此配置,Nginx 将拒绝大于 1MB(默认限制)的文件,并在请求到达 LightRAG 之前返回 `413 Request Entity Too Large` 错误。
+
+**推荐配置:**
+
+```nginx
+server {
+    listen 80;
+    server_name your-domain.com;
+
+    # 全局默认:8MB 用于 LLM 长上下文查询
+    client_max_body_size 8M;
+
+    # 上传端点:100MB 用于大文件上传
+    location /documents/upload {
+        client_max_body_size 100M;
+
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # 大文件上传需要更长超时时间
+        proxy_read_timeout 300s;
+        proxy_send_timeout 300s;
+    }
+
+    # 流式端点:LLM 响应流式传输
+    location ~ ^/(query/stream|api/chat|api/generate) {
+        gzip off;  # 禁用流式响应的压缩
+
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # LLM 生成需要较长超时
+        proxy_read_timeout 300s;
+    }
+
+    # 其他端点
+    location / {
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+}
+```
+
+**关键要点:**
+
+1. **全局限制(8MB)**:足以处理具有长对话历史和上下文的 LLM 查询(128K tokens ≈ 512KB + JSON 开销)。
+2. **上传端点(100MB)**:必须匹配或超过 `.env` 文件中的 `MAX_UPLOAD_SIZE`。默认 `MAX_UPLOAD_SIZE` 为 100MB。
+3. **流式端点**:为流式端点禁用 gzip 压缩(`gzip off`)以确保实时响应传输。LightRAG 自动设置 `X-Accel-Buffering: no` 头以禁用响应缓冲。
+4. **超时设置**:大文件上传和 LLM 生成需要更长的超时时间;相应调整 `proxy_read_timeout` 和 `proxy_send_timeout`。
+5. **大小验证层**:
+   - Nginx 首先验证 `Content-Length` 头
+   - LightRAG 在上传过程中执行流式验证
+   - 在两层设置适当的限制可确保更好的错误消息和安全性
+
+### 离线部署
+
+官方的 LightRAG Docker 镜像完全兼容离线或隔离网络环境。如需搭建自己的离线部署环境,请参考 [离线部署指南](./OfflineDeployment.md)。
+
+### 启动多个 LightRAG 实例
+
+有两种方式可以启动多个LightRAG实例。第一种方式是为每个实例配置一个完全独立的工作环境。此时需要为每个实例创建一个独立的工作目录,然后在这个工作目录上放置一个当前实例专用的`.env`配置文件。不同实例的配置文件中的服务器监听端口不能重复,然后在工作目录上执行 lightrag-server 启动服务即可。
+
+第二种方式是所有实例共享一套相同的`.env`配置文件,然后通过命令行参数来为每个实例指定不同的服务器监听端口和工作空间。你可以在同一个工作目录中通过不同的命令行参数启动多个LightRAG实例。例如:
+
+```
+# 启动实例1
+lightrag-server --port 9621 --workspace space1
+
+# 启动实例2
+lightrag-server --port 9622 --workspace space2
+```
+
+工作空间的作用是实现不同实例之间的数据隔离。因此不同实例之间的`workspace`参数必须不同,否则会导致数据混乱,数据将会被破坏。
+
+通过 Docker Compose 启动多个 LightRAG 实例时,只需在 `docker-compose.yml` 中为每个容器指定不同的 `WORKSPACE` 和 `PORT` 环境变量即可。即使所有实例共享同一个 `.env` 文件,Compose 中定义的容器环境变量也会优先覆盖 `.env` 文件中的同名设置,从而确保每个实例拥有独立的配置。
+
+### LightRAG 实例间的数据隔离
+
+每个实例配置一个独立的工作目录和专用`.env`配置文件通常能够保证内存数据库中的本地持久化文件保存在各自的工作目录,实现数据的相互隔离。LightRAG默认存储全部都是内存数据库,通过这种方式进行数据隔离是没有问题的。但是如果使用的是外部数据库,如果不同实例访问的是同一个数据库实例,就需要通过配置工作空间来实现数据隔离,否则不同实例的数据将会出现冲突并被破坏。
+
+命令行的 workspace 参数和`.env`文件中的环境变量`WORKSPACE` 都可以用于指定当前实例的工作空间名字,命令行参数的优先级别更高。下面是不同类型的存储实现工作空间的方式:
+
+- **对于本地基于文件的数据库,数据隔离通过工作空间子目录实现:** JsonKVStorage, JsonDocStatusStorage, NetworkXStorage, NanoVectorDBStorage, FaissVectorDBStorage。
+- **对于将数据存储在集合(collection)中的数据库,通过在集合名称前添加工作空间前缀来实现:** RedisKVStorage, RedisDocStatusStorage, MilvusVectorDBStorage, QdrantVectorDBStorage, MongoKVStorage, MongoDocStatusStorage, MongoVectorDBStorage, MongoGraphStorage, PGGraphStorage。
+- **对于关系型数据库,数据隔离通过向表中添加 `workspace` 字段进行数据的逻辑隔离:** PGKVStorage, PGVectorStorage, PGDocStatusStorage。
+
+* **对于Neo4j图数据库,通过label来实现数据的逻辑隔离**:Neo4JStorage
+* **对于OpenSearch,通过索引名称前缀实现数据隔离**:OpenSearchKVStorage、OpenSearchDocStatusStorage、OpenSearchGraphStorage、OpenSearchVectorDBStorage
+
+为了保持对遗留数据的兼容,在未配置工作空间时PostgreSQL的默认工作空间为`default`,Neo4j的默认工作空间为`base`。对于所有的外部存储,系统都提供了专用的工作空间环境变量,用于覆盖公共的 `WORKSPACE`环境变量配置。这些适用于指定存储类型的工作空间环境变量为:`REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `OPENSEARCH_WORKSPACE`。
+
+### Gunicorn + Uvicorn 的多工作进程
+
+LightRAG 服务器可以在 `Gunicorn + Uvicorn` 预加载模式下运行。Gunicorn 的多工作进程(多进程)功能可以防止文档索引任务阻塞 RAG 查询。CPU 密集型文档提取工具应作为外置服务部署,避免阻塞 API 进程。
+
+虽然 LightRAG 服务器使用一个工作进程来处理文档索引流程,但通过 Uvicorn 的异步任务支持,可以并行处理多个文件。文档索引速度的瓶颈主要在于 LLM。如果您的 LLM 支持高并发,您可以通过增加 LLM 的并发级别来加速文档索引。以下是几个与并发处理相关的环境变量及其默认值:
+
+```
+### 工作进程数,数字不大于 (2 x 核心数) + 1
+WORKERS=2
+### 一批中并行处理的文件数
+MAX_PARALLEL_INSERT=2
+# LLM 的最大并发请求数
+MAX_ASYNC=4
+```
+
+在 macOS 上,Gunicorn 多工作进程模式还要求 Objective-C fork safety 覆盖变量必须在 Python 进程启动前就存在。不要依赖 `.env` 设置这个变量; `.env` 会在 Python 启动后才加载,对 Objective-C 运行时来说已经太晚:
+
+```shell
+export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
+lightrag-gunicorn --workers 2
+```
+
+### 将 LightRAG 安装为 Linux 服务
+
+从示例文件 `lightrag.service.example` 创建您的服务文件 `lightrag.service`。修改服务文件中的服务启动定义:
+
+```text
+# Set Enviroment to your Python virtual enviroment
+Environment="PATH=/home/netman/lightrag-xyj/venv/bin"
+WorkingDirectory=/home/netman/lightrag-xyj
+# ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-server
+ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-gunicorn
+```
+
+> ExecStart命令必须是 lightrag-gunicorn 或 lightrag-server 中的一个,不能使用其它脚本包裹它们。因为停止服务必须要求主进程必须是这两个进程。
+
+安装 LightRAG 服务。如果您的系统是 Ubuntu,以下命令将生效:
+
+```shell
+sudo cp lightrag.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl start lightrag.service
+sudo systemctl status lightrag.service
+sudo systemctl enable lightrag.service
+```
+
+## Ollama 模拟
+
+我们为 LightRAG 提供了 Ollama 兼容接口,旨在将 LightRAG 模拟为 Ollama 聊天模型。这使得支持 Ollama 的 AI 聊天前端(如 Open WebUI)可以轻松访问 LightRAG。
+
+### 将 Open WebUI 连接到 LightRAG
+
+启动 lightrag-server 后,您可以在 Open WebUI 管理面板中添加 Ollama 类型的连接。然后,一个名为 `lightrag:latest` 的模型将出现在 Open WebUI 的模型管理界面中。用户随后可以通过聊天界面向 LightRAG 发送查询。对于这种用例,最好将 LightRAG 安装为服务。
+
+Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因此,Ollama 聊天补全 API 会检测并将 OpenWebUI 会话相关请求直接转发给底层 LLM。Open WebUI 的截图:
+
+![image-20250323194750379](./LightRAG-API-Server.assets/image-20250323194750379.png)
+
+### 在聊天中选择查询模式
+
+如果您从 LightRAG 的 Ollama 接口发送消息(查询),默认查询模式是 `hybrid`。您可以通过发送带有查询前缀的消息来选择查询模式。
+
+查询字符串中的查询前缀可以决定使用哪种 LightRAG 查询模式来生成响应。支持的前缀包括:
+
+```
+/local
+/global
+/hybrid
+/naive
+/mix
+
+/bypass
+/context
+/localcontext
+/globalcontext
+/hybridcontext
+/naivecontext
+/mixcontext
+```
+
+例如,聊天消息 "/mix 唐僧有几个徒弟" 将触发 LightRAG 的混合模式查询。没有查询前缀的聊天消息默认会触发混合模式查询。
+
+"/bypass" 不是 LightRAG 查询模式,它会告诉 API 服务器将查询连同聊天历史直接传递给底层 LLM。因此用户可以使用 LLM 基于聊天历史回答问题。如果您使用 Open WebUI 作为前端,您可以直接切换到普通 LLM 模型,而不是使用 /bypass 前缀。
+
+"/context" 也不是 LightRAG 查询模式,它会告诉 LightRAG 只返回为 LLM 准备的上下文信息。您可以检查上下文是否符合您的需求,或者自行处理上下文。
+
+### 在聊天中添加用户提示词
+
+使用LightRAG进行内容查询时,应避免将搜索过程与无关的输出处理相结合,这会显著影响查询效果。用户提示(user prompt)正是为解决这一问题而设计 -- 它不参与RAG检索阶段,而是在查询完成后指导大语言模型(LLM)如何处理检索结果。我们可以在查询前缀末尾添加方括号,从而向LLM传递用户提示词:
+
+```
+/[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱
+/mix[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱
+```
+
+## API 密钥和认证
+
+默认情况下,LightRAG 服务器可以在没有任何认证的情况下访问。我们可以使用 API 密钥或账户凭证配置服务器以确保其安全。
+
+* API 密钥
+
+```
+LIGHTRAG_API_KEY=your-secure-api-key-here
+WHITELIST_PATHS=/health,/api/*
+```
+
+> 健康检查和 Ollama 模拟端点默认不进行 API 密钥检查。为了安全原因,如果不需要提供Ollama服务,应该把`/api/*`从WHITELIST_PATHS中移除。
+
+API Key使用的请求头是 `X-API-Key` 。以下是使用API访问LightRAG Server的一个例子:
+
+```
+curl -X 'POST' \
+  'http://localhost:9621/documents/scan' \
+  -H 'accept: application/json' \
+  -H 'X-API-Key: your-secure-api-key-here-123' \
+  -d ''
+```
+
+* 账户凭证(Web 界面需要登录后才能访问)
+
+LightRAG API 服务器使用基于 HS256 算法的 JWT 认证。要启用安全访问控制,需要以下环境变量:
+
+```bash
+# JWT 认证
+AUTH_ACCOUNTS='admin:{bcrypt}$2b$12$replace-with-generated-hash,user1:pass456'
+TOKEN_SECRET='your-key'
+TOKEN_EXPIRE_HOURS=4
+```
+
+没有前缀的密码会被当作明文。要使用 bcrypt,请在生成出的哈希前加上 `{bcrypt}`。最方便的方式是直接运行:
+
+```bash
+lightrag-hash-password --username admin
+```
+
+该命令会安全提示输入密码,并输出可直接粘贴到 `.env` 的 `admin:{bcrypt}...` 条目。
+
+> 目前仅支持配置一个管理员账户和密码。尚未开发和实现完整的账户系统。
+
+如果未配置账户凭证,Web 界面将以访客身份访问系统。因此,即使仅配置了 API 密钥,所有 API 仍然可以通过访客账户访问,这仍然不安全。因此,要保护 API,需要同时配置这两种认证方法。
+
+## Azure OpenAI 后端配置
+
+可以使用以下 Azure CLI 命令创建 Azure OpenAI API(您需要先从 [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) 安装 Azure CLI):
+
+```bash
+# 根据需要更改资源组名称、位置和 OpenAI 资源名称
+RESOURCE_GROUP_NAME=LightRAG
+LOCATION=swedencentral
+RESOURCE_NAME=LightRAG-OpenAI
+
+az login
+az group create --name $RESOURCE_GROUP_NAME --location $LOCATION
+az cognitiveservices account create --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME  --kind OpenAI --sku S0 --location swedencentral
+az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME  --model-format OpenAI --name $RESOURCE_NAME --deployment-name gpt-4o --model-name gpt-4o --model-version "2024-08-06"  --sku-capacity 100 --sku-name "Standard"
+az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME  --model-format OpenAI --name $RESOURCE_NAME --deployment-name text-embedding-3-large --model-name text-embedding-3-large --model-version "1"  --sku-capacity 80 --sku-name "Standard"
+az cognitiveservices account show --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --query "properties.endpoint"
+az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_NAME
+```
+
+最后一个命令的输出将提供 OpenAI API 的端点和密钥。您可以使用这些值在 `.env` 文件中设置环境变量。
+
+```
+# .env 中的 Azure OpenAI 配置
+LLM_BINDING=azure_openai
+LLM_BINDING_HOST=your-azure-endpoint
+LLM_MODEL=your-model-deployment-name
+LLM_BINDING_API_KEY=your-azure-api-key
+### API Version可选,默认为最新版本
+AZURE_OPENAI_API_VERSION=2024-08-01-preview
+
+### 如果使用 Azure OpenAI 进行嵌入
+EMBEDDING_BINDING=azure_openai
+EMBEDDING_MODEL=your-embedding-deployment-name
+```
+
+## LightRAG 服务器详细配置
+
+API 服务器可以通过两种方式配置(优先级从高到低):
+
+* 命令行参数
+* 环境变量或 .env 文件
+
+大多数配置都有默认设置,详细信息请查看示例文件:`.env.example`。存储配置也应通过环境变量或 `.env` 文件设置。
+
+### 支持的 LLM 和嵌入后端
+
+LightRAG 支持绑定到各种 LLM 后端:
+
+* ollama
+* openai (含openai 兼容)
+* azure_openai
+* lollms
+* bedrock
+* gemini
+
+LightRAG 支持绑定到各种嵌入后端:
+
+* lollms
+* ollama
+* openai (含 openai 兼容)
+* azure_openai
+* bedrock
+* jina
+* gemini
+* voyageai
+
+使用环境变量 `LLM_BINDING` 或 CLI 参数 `--llm-binding` 选择 LLM 后端类型。使用环境变量 `EMBEDDING_BINDING` 或 CLI 参数 `--embedding-binding` 选择嵌入后端类型。
+
+Bedrock 会忽略 `LLM_BINDING_API_KEY` 和 `EMBEDDING_BINDING_API_KEY`。请通过 AWS credential chain 使用 SigV4 凭据;如果要使用 Bedrock API key / bearer token,请在启动前显式设置进程级环境变量 `AWS_BEARER_TOKEN_BEDROCK`:
+
+```bash
+LLM_BINDING=bedrock
+LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+LLM_MODEL=us.amazon.nova-lite-v1:0
+AWS_REGION=us-west-2
+# 使用 AWS credential chain,或设置 AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY,
+# 或在启动服务器前设置 AWS_BEARER_TOKEN_BEDROCK。
+```
+
+非对称嵌入需要显式开启。仅当所选嵌入后端支持 provider task 参数或任务前缀时,才设置 `EMBEDDING_ASYMMETRIC=true`。修改这些设置前请先阅读 [Asymmetric Embedding Configuration](./AsymmetricEmbedding.md),因为任何变更后都必须清空已有数据并重新索引文件。
+
+LLM和Embedding配置例子请查看项目根目录的 env.example 文件。OpenAI和Ollama兼容LLM接口的支持的完整配置选型可以通过一下命令查看:
+
+```
+lightrag-server --llm-binding openai --help
+lightrag-server --llm-binding ollama --help
+lightrag-server --llm-binding gemini --help
+lightrag-server --embedding-binding ollama --help
+lightrag-server --embedding-binding gemini --help
+```
+
+> 请使用openai兼容方式访问OpenRouter、vLLM或SLang部署的LLM。可以通过 `OPENAI_LLM_EXTRA_BODY` 环境变量给OpenRouter、vLLM或SGLang推理框架传递额外的参数,实现推理模式的关闭或者其它个性化控制。
+
+设置 `max_tokens` 参数旨在**防止在实体关系提取阶段出现LLM 响应输出过长或无休止的循环输出的问题**。设置 `max_tokens` 参数的目的是在超时发生之前截断 LLM 输出,从而防止文档提取失败。这解决了某些包含大量实体和关系的文本块(例如表格或引文)可能导致 LLM 产生过长甚至无限循环输出的问题。此设置对于本地部署的小参数模型尤为重要。`max_tokens` 值可以通过以下公式计算:
+
+```
+# For vLLM/SGLang doployed models, or most of OpenAI compatible API provider
+OPENAI_LLM_MAX_TOKENS=9000
+
+# For Ollama Deployed Modeles
+OLLAMA_LLM_NUM_PREDICT=9000
+
+# For OpenAI o1-mini or newer modles
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+```
+
+### 基于角色的 LLM/VLM 配置
+
+服务器可以为不同处理阶段使用不同模型,而不改变客户端 API。当前支持四个角色:
+
+| 角色 | 用途 |
+| --- | --- |
+| `EXTRACT` | 实体/关系抽取以及实体/关系描述合并摘要 |
+| `KEYWORD` | 查询检索前的关键词生成 |
+| `QUERY` | 最终回答、bypass 查询以及 Ollama 兼容聊天响应 |
+| `VLM` | 图片、表格、公式等 sidecar 项目的多模态分析 |
+
+如果某个角色未单独配置,会继承基础 `LLM_*` 设置。同 provider 的最小示例:
+
+```bash
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+EXTRACT_LLM_MODEL=gpt-5-mini
+KEYWORD_LLM_MODEL=gpt-5-nano
+QUERY_LLM_MODEL=gpt-5
+VLM_LLM_MODEL=gpt-5-mini
+```
+
+跨 provider 规则、`QUERY_OPENAI_LLM_REASONING_EFFORT` 等 provider 专属选项、角色级 Bedrock SigV4 凭据以及队列行为,请参阅 [基于角色的 LLM/VLM 配置指南](./RoleSpecificLLMConfiguration-zh.md)。
+
+### 多模态分析配置
+
+解析器可以产出图片/绘图、表格和公式 sidecar。VLM 分析只会在两个条件同时满足时运行:
+
+- 文档的 `process_options` 包含对应模态标记:`i` 表示图片,`t` 表示表格,`e` 表示公式。
+- `VLM_PROCESS_ENABLE=true`,且实际生效的 VLM binding 支持图片输入。
+
+当前支持视觉输入的 provider 包括 `openai`、`azure_openai`、`gemini`、`bedrock`、`ollama` 和 `anthropic`;`lollms` 不能用于 VLM。典型配置:
+
+```bash
+VLM_PROCESS_ENABLE=true
+VLM_LLM_BINDING=openai
+VLM_LLM_MODEL=gpt-4o
+VLM_LLM_BINDING_HOST=https://api.openai.com/v1
+VLM_LLM_BINDING_API_KEY=your_vlm_api_key
+VLM_MAX_IMAGE_BYTES=5242880
+SURROUNDING_LEADING_MAX_TOKENS=2000
+SURROUNDING_TRAILING_MAX_TOKENS=2000
+```
+
+周边上下文预算控制在 VLM 和抽取 prompt 中为一个多模态项目注入多少附近文本。解析器与单文件选项示例见 [文档和块处理逻辑说明](#文档和块处理逻辑说明)。
+
+### 实体提取配置
+
+实体抽取使用基础 LLM 或 `EXTRACT` 角色 LLM。重要的服务端选项包括:
+
+- `ENABLE_LLM_CACHE_FOR_EXTRACT`:启用实体抽取 LLM 缓存(默认:`true`)。这对测试环境和重新处理很有用。
+- `ENTITY_EXTRACTION_USE_JSON`:要求实体抽取输出 JSON 结构。v1.5 推荐开启以提高可靠性,但会增加一定延迟。
+- `ENTITY_TYPE_PROMPT_FILE`:实体类型指导和示例的 YAML profile 文件名。该值只能是文件名,文件从 `PROMPT_DIR/entity_type` 加载,不要传绝对路径。
+- `MAX_EXTRACT_INPUT_TOKENS`:单次抽取输入上下文的最大 token 预算。
+- `MAX_EXTRACTION_RECORDS`:单次响应中实体和关系记录总数上限。
+- `MAX_EXTRACTION_ENTITIES`:单次响应中实体记录数上限。
+
+示例:
+
+```bash
+ENTITY_EXTRACTION_USE_JSON=true
+ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+PROMPT_DIR=/opt/lightrag/prompts
+MAX_EXTRACT_INPUT_TOKENS=20480
+MAX_EXTRACTION_RECORDS=100
+MAX_EXTRACTION_ENTITIES=40
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+```
+
+如果旧 `.env` 中仍包含 `ENTITY_TYPES`,请在启动前移除。该变量已被 prompt profile 替代,服务器会对此进行快速失败校验。
+
+### 支持的存储类型
+
+LightRAG 使用 4 种类型的存储用于不同目的:
+
+* KV_STORAGE:llm 响应缓存、文本块、文档信息
+* VECTOR_STORAGE:实体向量、关系向量、块向量
+* GRAPH_STORAGE:实体关系图
+* DOC_STATUS_STORAGE:文档索引状态
+
+每种存储类型都有多种存储实现方式。LightRAG Server 默认的存储实现为内存数据库,数据通过文件持久化保存到 WORKING_DIR 目录。LightRAG 还支持 PostgreSQL、MongoDB、FAISS、Milvus、Qdrant、Neo4j、Memgraph、Redis 和 OpenSearch 等存储实现方式。详细的存储支持方式请参考根目录下的 `README.md` 文件中关于存储的相关内容。
+
+**Milvus 索引配置:** LightRAG 现在可通过环境变量支持对 Milvus 向量存储的可配置索引类型(AUTOINDEX、HNSW、HNSW_SQ、IVF_FLAT 等)。HNSW_SQ 需要 Milvus 2.6.8 或更高版本,并能显著节省内存。有关完整的配置选项,请参阅主 README.md 文件中的“使用 Milvus 进行向量存储”部分。
+
+您可以通过环境变量选择存储实现。例如,在首次启动 API 服务器之前,您可以将以下环境变量设置为特定的存储实现名称:
+
+```
+LIGHTRAG_KV_STORAGE=PGKVStorage
+LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
+LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+```
+
+在向 LightRAG 添加文档后,您不能更改存储实现选择。目前尚不支持从一个存储实现迁移到另一个存储实现。更多配置信息请阅读示例 `.env.example` 文件。
+
+### 在不同存储类型之间迁移LLM缓存
+
+当LightRAG更换存储实现方式的时候,可以LLM缓存从就的存储迁移到新的存储。先以后在新的存储上重新上传文件时,将利用利用原有存储的LLM缓存大幅度加快文件处理的速度。LLM缓存迁移工具的使用方法请参考 [README_MIGRATE_LLM_CACHE.md](../lightrag/tools/README_MIGRATE_LLM_CACHE.md)
+
+### LightRAG API 服务器命令行选项
+
+| 参数 | 默认值 | 描述 |
+| --- | --- | --- |
+| `--host` | `0.0.0.0` | 服务器监听主机 |
+| `--port` | `9621` | 服务器端口 |
+| `--working-dir` | `./rag_storage` | RAG 存储工作目录 |
+| `--input-dir` | `./inputs` | 上传/输入文档目录 |
+| `--timeout` | `150` | Gunicorn worker timeout 以及 fallback 请求超时 |
+| `--max-async` | `4` | 最大并发 LLM 操作数 |
+| `--log-level` | `INFO` | 日志级别(`DEBUG`、`INFO`、`WARNING`、`ERROR`、`CRITICAL`) |
+| `--verbose` | `False` | 详细调试输出,配合 debug 日志生效 |
+| `--key` | `None` | 用于认证的 API key |
+| `--ssl` | `False` | 启用 HTTPS |
+| `--ssl-certfile` | `None` | SSL 证书文件路径,启用 `--ssl` 时必需 |
+| `--ssl-keyfile` | `None` | SSL 私钥文件路径,启用 `--ssl` 时必需 |
+| `--workspace` | `""` | 用于存储隔离的默认 workspace |
+| `--api-prefix` | `""` | 反向代理路径前缀,也可通过 `LIGHTRAG_API_PREFIX` 配置 |
+| `--workers` | `1` | Gunicorn worker 数量 |
+| `--llm-binding` | `ollama` | LLM 绑定类型(`lollms`、`ollama`、`openai`、`openai-ollama`、`azure_openai`、`bedrock`、`gemini`) |
+| `--embedding-binding` | `ollama` | Embedding 绑定类型(`lollms`、`ollama`、`openai`、`azure_openai`、`bedrock`、`jina`、`gemini`、`voyageai`) |
+| `--rerank-binding` | `null` | Rerank 绑定类型(`null`、`cohere`、`jina`、`aliyun`) |
+
+### Reranking 配置
+
+Reranking 查询召回的块可以显著提高检索质量,它通过基于优化的相关性评分模型对文档重新排序。LightRAG 目前支持以下 rerank 提供商:
+
+- **Cohere / vLLM**:提供与 Cohere AI 的 `v2/rerank` 端点的完整 API 集成。由于 vLLM 提供了与 Cohere 兼容的 reranker API,因此也支持所有通过 vLLM 部署的 reranker 模型。
+- **Jina AI**:提供与所有 Jina rerank 模型的完全实现兼容性。
+- **阿里云**:具有旨在支持阿里云 rerank API 格式的自定义实现。
+
+Rerank 提供商通过 `.env` 文件进行配置。以下是使用 vLLM 本地部署的 rerank 模型的示例配置:
+
+```
+RERANK_BINDING=cohere
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+以下是使用阿里云提供的 Reranker 服务的示例配置:
+
+```
+RERANK_BINDING=aliyun
+RERANK_MODEL=gte-rerank-v2
+RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+Reranker 调用有独立的并发和超时控制:
+
+```bash
+MAX_ASYNC_RERANK=4
+RERANK_TIMEOUT=30
+```
+
+`MAX_ASYNC_RERANK` 未设置时回退到 `MAX_ASYNC`。`RERANK_TIMEOUT` 有独立默认值,因为 reranker 请求通常比 LLM 生成请求短。更完整的 reranker 配置示例,包括 Cohere-compatible chunking 选项以及 Jina/阿里云 endpoint,请参阅 `env.example` 文件。
+
+### 启用 Reranking
+
+可以按查询启用或禁用 Reranking。
+
+`/query` 和 `/query/stream` API 端点包含一个 `enable_rerank` 参数,默认设置为 `true`,用于控制当前查询是否激活 reranking。要将 `enable_rerank` 参数的默认值更改为 `false`,请设置以下环境变量:
+
+```
+RERANK_BY_DEFAULT=False
+```
+
+### 在参考文件中包含文本块内容
+
+默认情况下 `/query` and `/query/stream` 端点在返回引用内容仅包括 `reference_id` 和 `file_path`. 为了评估、调试或引用的需要,你可以要求在返回的引用内容包括实际检索到的文本块内容.
+
+参数 `include_chunk_content` (默认值: `false`) 将控制返回的引用内容总是否包含召回文本块中的原文内容。这对于一下情形是非常有用的:
+
+- **RAG 评估**: 类似 RAGAS 这一类评估系统的工作需要获取到召回的原文才能工作
+- **Debugging**: 检查和验证用于生成答案到底使用了哪些原文
+- **Citation Display**: 向用户展现回答应用了哪些原文
+- **Transparency**: 为RAG检索提供一个可以观察的过程
+
+**重要**: `content` 字段是一个**字符串数组**,其中每个字符串代表来自同一文件的分块(chunk)。由于单个文件可能对应多个分块,因此内容以列表形式返回,以保留分块边界。
+
+**API请求示例:**
+
+```json
+{
+  "query": "What is LightRAG?",
+  "mode": "mix",
+  "include_references": true,
+  "include_chunk_content": true
+}
+```
+
+**响应示例(含文本块内容):**
+
+```json
+{
+  "response": "LightRAG is a graph-based RAG system...",
+  "references": [
+    {
+      "reference_id": "1",
+      "file_path": "/documents/intro.md",
+      "content": [
+        "LightRAG is a retrieval-augmented generation system that combines knowledge graphs with vector similarity search...",
+        "The system uses a dual-indexing approach with both vector embeddings and graph structures for enhanced retrieval..."
+      ]
+    },
+    {
+      "reference_id": "2",
+      "file_path": "/documents/features.md",
+      "content": [
+        "The system provides multiple query modes including local, global, hybrid, and mix modes..."
+      ]
+    }
+  ]
+}
+```
+
+**说明**:
+- 此参数仅用于配合 `include_references=true` 参数工作. 如果没有包含引用参数,`include_chunk_content=true` 设置是不会生效的.
+- **破坏性变化**: 之前版本返回的 `content` 是一个链接在一起的字符串。现在返回的是一个字符串数组,每个字符串代表一个分块的内容。这是为了保留分块边界,避免在合并时丢失信息。如果需要将所有分块合并为一个字符串,可使用 `"\n\n".join(content)` 等方法。
+
+### .env 文件示例
+
+下面示例适合作为已有部署的调优参考。首次运行建议优先阅读[渐进式配置示例](#渐进式配置示例),而不是直接手动复制完整 `env.example`。
+
+```bash
+### Server Configuration
+# HOST=0.0.0.0
+PORT=9621
+WORKERS=2
+# LIGHTRAG_API_PREFIX=/site01
+
+### Settings for document indexing
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+ENTITY_EXTRACTION_USE_JSON=true
+# ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+# MAX_EXTRACT_INPUT_TOKENS=20480
+# MAX_EXTRACTION_RECORDS=100
+# MAX_EXTRACTION_ENTITIES=40
+SUMMARY_LANGUAGE=Chinese
+MAX_PARALLEL_INSERT=2
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+# CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
+# CHUNK_P_SIZE=2000
+
+### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+TIMEOUT=150
+MAX_ASYNC=4
+
+LLM_BINDING=openai
+LLM_MODEL=gpt-4o-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your-api-key
+KEYWORD_LLM_MODEL=gpt-4o-mini
+QUERY_LLM_MODEL=gpt-4o
+
+### Optional VLM configuration for documents using i/t/e process options
+VLM_PROCESS_ENABLE=false
+# VLM_LLM_MODEL=gpt-4o
+# VLM_MAX_IMAGE_BYTES=5242880
+# SURROUNDING_LEADING_MAX_TOKENS=2000
+# SURROUNDING_TRAILING_MAX_TOKENS=2000
+
+### Optional reranker configuration
+RERANK_BINDING=null
+# MAX_ASYNC_RERANK=4
+# RERANK_TIMEOUT=30
+
+### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+# see also env.ollama-binding-options.example for fine tuning ollama
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+# 可选:前缀型模型的非对称嵌入配置
+# EMBEDDING_ASYMMETRIC=true
+# EMBEDDING_QUERY_PREFIX="search_query: "
+# EMBEDDING_DOCUMENT_PREFIX="search_document: "
+# 如果某一侧明确不需要前缀,请使用 NO_PREFIX。
+
+### For JWT Auth
+# AUTH_ACCOUNTS='admin:{bcrypt}$2b$12$replace-with-generated-hash,user1:pass456'
+# TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx
+# TOKEN_EXPIRE_HOURS=48
+
+# LIGHTRAG_API_KEY=your-secure-api-key-here-123
+# WHITELIST_PATHS=/api/*
+# WHITELIST_PATHS=/health,/api/*
+```
+
+## 文档和块处理逻辑说明
+
+v1.5 引入了分阶段文档流水线。文件会先经过内容抽取引擎,然后进入可选的多模态分析、文本分块,最后执行实体/关系抽取;如果该文件禁用了知识图谱构建,则跳过实体/关系抽取和图写入。
+
+### 快速配置示例
+
+保持 v1.4 兼容行为:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+不依赖外部解析服务的推荐起点:
+
+```bash
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+```
+
+该配置会对支持的文件使用内置 `native` 解析器,为这些文件启用表格/公式 sidecar 分析选项,并尽可能使用段落语义分块;其他文件回退到 legacy 抽取和递归分块。
+
+使用 MinerU 官方 API 和 VLM 的完整多模态配置:
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=gpt-4o
+MINERU_API_MODE=official
+MINERU_API_TOKEN=your_mineru_api_token
+MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+MINERU_MODEL_VERSION=vlm
+MINERU_IS_OCR=false
+```
+
+如果将文件路由到 `docling`,请配置 `DOCLING_ENDPOINT=http://localhost:5001`。
+
+### 解析引擎和路由
+
+`LIGHTRAG_PARSER` 按文件扩展名定义默认抽取规则。规则从左到右匹配,可以用逗号或分号分隔:
+
+```bash
+LIGHTRAG_PARSER=pdf:mineru-R,docx:native-ietP,*:legacy-R
+```
+
+支持的引擎:
+
+| 引擎 | 用途 |
+| --- | --- |
+| `legacy` | 原有抽取行为,适合兼容旧部署和简单文本类文件。 |
+| `native` | 内置结构化解析器,目前重点支持 `.docx` 和 LightRAG Document sidecar。 |
+| `mineru` | 外部 MinerU 解析器,适用于 PDF、Office 文件和图片。需要配置 `MINERU_API_MODE` 以及 `MINERU_LOCAL_ENDPOINT` 或 `MINERU_API_TOKEN`。 |
+| `docling` | 外部 docling-serve 解析器,适用于 PDF、Office 文件、Markdown/HTML 和图片。需要配置 `DOCLING_ENDPOINT`。 |
+
+文件名 hint 可以覆盖单个上传文件的默认规则:
+
+```text
+paper.[mineru-iteP].pdf
+memo.[native-R!].docx
+notes.[-R].md
+```
+
+`/documents/upload` 和 `/documents/scan` 会读取文件名 hint 和 `LIGHTRAG_PARSER`。`/documents/text` 与 `/documents/texts` 插入的是调用方已经提供的纯文本,在当前服务端路径中使用固定分块。
+
+### 处理选项
+
+处理选项可以在引擎后用连字符追加,也可以在文件名 hint 中单独写成 `[-OPTIONS]`。
+
+| 选项 | 含义 |
+| --- | --- |
+| `i` | 对存在的图片/绘图 sidecar 运行 VLM 分析 |
+| `t` | 对存在的表格 sidecar 运行 VLM 分析 |
+| `e` | 对存在的公式 sidecar 运行 VLM 分析 |
+| `!` | 跳过实体/关系抽取和图写入;仍会保存 chunk 向量 |
+| `F` | 固定 token 分块,即 legacy 分块方式 |
+| `R` | 递归字符分块,支持可配置分隔符级联 |
+| `V` | 语义向量分块;超长 chunk 会再用 `R` 切分 |
+| `P` | 面向结构化 LightRAG Document 内容的段落语义分块;缺少结构化内容时自动回退到 `R` |
+
+每个文件最多选择 `F`、`R`、`V`、`P` 中的一种。分块参数通过 `CHUNK_SIZE`、`CHUNK_OVERLAP_SIZE` 以及策略专属变量配置,例如 `CHUNK_R_SEPARATORS`、`CHUNK_V_BREAKPOINT_THRESHOLD_TYPE`、`CHUNK_P_SIZE`、`CHUNK_P_OVERLAP_SIZE`。这些值在服务器启动时读取,并在文档入队时作为该文档的 `chunk_options` 快照保存。
+
+完整路由语法、支持扩展名、解析缓存行为、chunker 配置、并发规则以及 Python SDK 差异,请参阅 [文件处理流水线规格](./FileProcessingPipeline-zh.md)。`P` 策略细节请参阅 [段落语义分块](./ParagraphSemanticChunking-zh.md)。如需在索引前调试解析输出,请参阅 [解析器调试 CLI](./ParserDebugCLI-zh.md)。
+
+### 流水线并发
+
+`MAX_PARALLEL_INSERT` 控制并行处理的文件数量。`MAX_ASYNC` 控制并发 LLM 调用,包括抽取、合并、查询关键词生成和最终回答生成。解析压力较大的部署可以使用可选的分阶段流水线变量,例如 `MAX_PARALLEL_PARSE_NATIVE`、`MAX_PARALLEL_PARSE_MINERU`、`MAX_PARALLEL_PARSE_DOCLING` 和 `MAX_PARALLEL_ANALYZE`。
+
+当处理循环 busy 时,上传和文本插入仍可被接受;运行中的循环会被通知并拾取新 pending 文档。`/documents/clear`、单文档删除等破坏性任务,以及 `/documents/scan` 的分类阶段仍会拒绝并发入队,以保护存储一致性。失败文件可通过 WebUI 重新处理,也可以触发 `/documents/scan`。
+
+## API 端点
+
+所有支持的后端(`lollms`、`ollama`、`openai` / OpenAI-compatible、`azure_openai`、`bedrock` 和 `gemini`)都暴露相同的 LightRAG REST API。当 API 服务器运行时,访问:
+
+- Swagger UI:http://localhost:9621/docs
+- ReDoc:http://localhost:9621/redoc
+
+您可以使用提供的 curl 命令或通过 Swagger UI 界面测试 API 端点。确保:
+
+1. 启动相应的后端服务,或确认托管 provider 的凭据可用
+2. 启动 RAG 服务器
+3. 使用文档管理端点上传一些文档
+4. 使用查询端点查询系统
+5. 如果在输入目录中放入新文件,触发文档扫描
+
+`/health` 端点会返回运行状态和关键配置,包括角色 LLM 配置、LLM/embedding/rerank 队列状态、workspace/storage workspace 映射、VLM 是否启用、rerank 是否启用,以及流水线 busy/scanning/destructive 状态。
+
+## 异步文档索引与进度跟踪
+
+LightRAG采用异步文档索引机制,便于前端监控和查询文档处理进度。用户通过指定端点上传文件或插入文本时,系统将返回唯一的跟踪ID,以便实时监控处理进度。
+
+**支持生成跟踪ID的API端点:**
+
+* `/documents/upload`
+* `/documents/text`
+* `/documents/texts`
+
+**文档处理状态查询端点:**
+* `/documents/track_status/{track_id}`
+
+该端点提供全面的状态信息,包括:
+* 文档处理状态(待处理/处理中/已处理/失败)
+* 内容摘要和元数据
+* 处理失败时的错误信息
+* 创建和更新时间戳

BIN
docs/LightRAG-API-Server.assets/image-20250323122538997.png


BIN
docs/LightRAG-API-Server.assets/image-20250323122754387.png


BIN
docs/LightRAG-API-Server.assets/image-20250323123011220.png


BIN
docs/LightRAG-API-Server.assets/image-20250323194750379.png


+ 1155 - 0
docs/LightRAG-API-Server.md

@@ -0,0 +1,1155 @@
+# LightRAG Server and WebUI
+
+The LightRAG Server is designed to provide a Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provides an Ollama-compatible interface, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bots, such as Open WebUI, to access LightRAG easily.
+
+![image-20250323122538997](./LightRAG-API-Server.assets/image-20250323122538997.png)
+
+![image-20250323122754387](./LightRAG-API-Server.assets/image-20250323122754387.png)
+
+![image-20250323123011220](./LightRAG-API-Server.assets/image-20250323123011220.png)
+
+## Upgrading from v1.4.16 to v1.5.0rc2
+
+The v1.5.0rc2 release adds the new file-processing pipeline, parser routing, multimodal analysis, role-specific LLM/VLM configuration, JSON entity extraction, and several provider/storage changes. Review the [v1.5.0rc2 release notes](https://github.com/HKUDS/LightRAG/releases/tag/v1.5.0rc2) before upgrading a production instance.
+
+- To keep the old file-processing behavior while upgrading the server, set:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+- `ENTITY_TYPES` is no longer supported. Use `ENTITY_TYPE_PROMPT_FILE` instead, with a YAML profile stored under `PROMPT_DIR/entity_type` (`PROMPT_DIR` defaults to `./prompts`). A sample template is available at `prompts/samples/entity_type_prompt.sample.yml`.
+- If you use OpenSearch storage and the cluster is older than OpenSearch 3.3.0, upgrade OpenSearch before enabling the v1.5 storage path and validate existing indices. For new deployments, use OpenSearch 3.3.0 or later.
+- Changing the embedding model, embedding dimension, asymmetric embedding behavior, or query/document prefixes changes vector semantics. Clear the affected LightRAG workspace/vector data and re-index source files.
+- Changing parser routing (`LIGHTRAG_PARSER`) or filename hints affects newly uploaded files. To switch an existing document to another parser engine, delete that document and upload it again.
+- Changing chunker settings (`CHUNK_*`) affects documents enqueued after the server restarts. Reprocess older documents if you want their stored `chunk_options` snapshot to match the new settings.
+- Enabling multimodal options (`i/t/e`) requires parsed sidecars plus `VLM_PROCESS_ENABLE=true`. Existing documents can be reprocessed to run VLM analysis on available sidecars; switching extraction engines still requires delete + re-upload.
+
+## Getting Started
+
+### Installation
+
+* Install from PyPI
+
+```bash
+### Install LightRAG Server as tool using uv (recommended)
+uv tool install "lightrag-hku[api]"
+
+### Or using pip
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install "lightrag-hku[api]"
+```
+
+* Installation from Source
+
+```bash
+# Clone the repository
+git clone https://github.com/HKUDS/lightrag.git
+
+# Change to the repository directory
+cd lightrag
+
+# Bootstrap the development environment (recommended)
+make dev
+source .venv/bin/activate  # Activate the virtual environment (Linux/macOS)
+# Or on Windows: .venv\Scripts\activate
+
+# make dev installs the test toolchain plus the full offline stack
+# (API, storage backends, and provider integrations), then builds the frontend.
+# Run make env-base or copy env.example to .env before starting the server.
+
+# Equivalent manual steps with uv
+# Note: uv sync automatically creates a virtual environment in .venv/
+uv sync --extra test --extra offline
+source .venv/bin/activate  # Activate the virtual environment (Linux/macOS)
+# Or on Windows: .venv\Scripts\activate
+
+# Or using pip with virtual environment
+# python -m venv .venv
+# source .venv/bin/activate  # Windows: .venv\Scripts\activate
+# pip install -e ".[test,offline]"
+
+# Build front-end artifacts
+cd lightrag_webui
+bun install --frozen-lockfile
+bun run build
+cd ..
+```
+
+### Before Starting LightRAG Server
+
+LightRAG necessitates the integration of both an LLM (Large Language Model) and an Embedding Model to effectively execute document indexing and querying operations. Prior to the initial deployment of the LightRAG server, it is essential to configure the settings for both the LLM and the Embedding Model.
+
+LightRAG supports these LLM backends:
+
+* ollama
+* lollms
+* openai or openai compatible
+* azure_openai
+* bedrock
+* gemini
+
+LightRAG supports these embedding backends:
+
+* lollms
+* ollama
+* openai or openai compatible
+* azure_openai
+* bedrock
+* jina
+* gemini
+* voyageai
+
+It is recommended to use environment variables to configure the LightRAG Server. There is an example environment variable file named `env.example` in the root directory of the project. Please copy this file to the startup directory and rename it to `.env`. After that, you can modify the parameters related to the LLM and Embedding models in the `.env` file. It is important to note that the LightRAG Server will load the environment variables from `.env` into the system environment variables each time it starts. **LightRAG Server will prioritize the settings in the system environment variables to .env file**.
+
+> Since VS Code with the Python extension may automatically load the .env file in the integrated terminal, please open a new terminal session after each modification to the .env file.
+
+If you need to configure different LLMs/VLMs for entity extraction, keyword extraction, final answers, or multimodal analysis, see the [Role-Specific LLM/VLM Configuration Guide](./RoleSpecificLLMConfiguration.md).
+
+Here are some examples of common settings for LLM and Embedding models:
+
+* OpenAI LLM + Ollama Embedding:
+
+```
+LLM_BINDING=openai
+LLM_MODEL=gpt-4o
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+# EMBEDDING_BINDING_API_KEY=your_api_key
+```
+
+> When targeting Google Gemini, set `LLM_BINDING=gemini`, choose a model such as `LLM_MODEL=gemini-flash-latest`, and provide your Gemini key via `LLM_BINDING_API_KEY` (or `GEMINI_API_KEY`).
+
+* Ollama LLM + Ollama Embedding:
+
+```
+LLM_BINDING=ollama
+LLM_MODEL=mistral-nemo:latest
+LLM_BINDING_HOST=http://localhost:11434
+# LLM_BINDING_API_KEY=your_api_key
+###  Ollama Server context length (Must be larger than MAX_TOTAL_TOKENS+2000)
+OLLAMA_LLM_NUM_CTX=16384
+
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+# EMBEDDING_BINDING_API_KEY=your_api_key
+```
+
+> **Important Note**: The embedding model and asymmetric embedding configuration must be determined before document indexing, and the same settings must be used during the query phase. For certain storage solutions (e.g., PostgreSQL), the vector dimension must be defined upon initial table creation. When changing the embedding model, embedding dimension, `EMBEDDING_ASYMMETRIC`, query/document prefixes, or provider task behavior, clear the existing LightRAG workspace/vector data and re-index the source files.
+
+#### Asymmetric Embedding Configuration
+
+LightRAG uses symmetric embeddings by default. Query/document asymmetric embeddings are enabled only when `EMBEDDING_ASYMMETRIC=true` is explicitly set.
+
+- Provider task bindings such as `jina`, `gemini`, and `voyageai` use provider parameters (`task` / `task_type` / `input_type`) and should not use query/document prefixes.
+- Prefix-based bindings such as `openai`, `azure_openai`, and `ollama` require both `EMBEDDING_QUERY_PREFIX` and `EMBEDDING_DOCUMENT_PREFIX`. Use `NO_PREFIX` for a side that should intentionally have no prefix.
+- Any valid change to asymmetric embedding settings requires clearing existing data and re-indexing files.
+
+For the full validation rules and examples, see [Asymmetric Embedding Configuration](./AsymmetricEmbedding.md).
+
+### Create .env File With Setup Tool
+
+Instead of editing `env.example` by hand, you can use the interactive setup wizard to generate a configured `.env` and, when needed, `docker-compose.final.yml`:
+
+```bash
+make env-base           # Required first step: LLM, embedding, reranker
+make env-storage        # Optional: storage backends and database services
+make env-server         # Optional: server port, auth, and SSL
+make env-security-check # Optional: audit the current .env for security risks
+```
+
+For a full description of every target and what each flow does, see [docs/InteractiveSetup.md](./InteractiveSetup.md).
+The setup wizards update configuration only; run `make env-security-check` separately to audit the
+current `.env` for security risks before deployment.
+
+### Starting LightRAG Server
+
+The LightRAG Server supports two operational modes:
+* The simple and efficient Uvicorn mode:
+
+```
+lightrag-server
+```
+* The multiprocess Gunicorn + Uvicorn mode (production mode, not supported on Windows environments):
+
+```
+lightrag-gunicorn --workers 4
+```
+
+When starting LightRAG, the current working directory must contain the `.env` configuration file. **It is intentionally designed that the `.env` file must be placed in the startup directory**. The purpose of this is to allow users to launch multiple LightRAG instances simultaneously and configure different `.env` files for different instances. **After modifying the `.env` file, you need to reopen the terminal for the new settings to take effect.** This is because each time LightRAG Server starts, it loads the environment variables from the `.env` file into the system environment variables, and system environment variables have higher precedence.
+
+During startup, configurations in the `.env` file can be overridden by command-line parameters. Common command-line parameters include:
+
+- `--host`: Server listening address (default: 0.0.0.0)
+- `--port`: Server listening port (default: 9621)
+- `--timeout`: LLM request timeout (default: 150 seconds)
+- `--log-level`: Log level (default: INFO)
+- `--working-dir`: Database persistence directory (default: ./rag_storage)
+- `--input-dir`: Directory for uploaded files (default: ./inputs)
+- `--workspace`: Workspace name, used to logically isolate data between multiple LightRAG instances (default: empty)
+- `--api-prefix`: Reverse-proxy path prefix exposed to browsers, also configurable with `LIGHTRAG_API_PREFIX`
+- `--rerank-binding`: Rerank provider (`null`, `cohere`, `jina`, or `aliyun`)
+
+### Path Prefix and Multi-Site WebUI
+
+Set `LIGHTRAG_API_PREFIX` or `--api-prefix` when one host serves multiple LightRAG instances behind a reverse proxy that strips a site prefix before forwarding to the backend:
+
+```bash
+LIGHTRAG_API_PREFIX=/site01
+lightrag-server --port 9621
+```
+
+The backend passes this value to FastAPI as `root_path` and injects the same runtime prefix into the WebUI. The WebUI is always mounted at `/webui` inside the server, so one frontend build can serve any prefix. See [Single-Server Multi-Site Deployment](./MultiSiteDeployment.md) for full Nginx, Docker, and Kubernetes examples.
+
+### Launching LightRAG Server with Docker
+
+Using Docker Compose is the most convenient way to deploy and run the LightRAG Server.
+
+- Create a project directory.
+- Copy the `docker-compose.yml` file from the LightRAG repository into your project directory.
+- Prepare the `.env` file: Duplicate the sample file [`env.example`](https://ai.znipower.com:5013/c/env.example)to create a customized `.env` file, and configure the LLM and embedding parameters according to your specific requirements.
+- Start the LightRAG Server with the following command:
+
+```shell
+docker compose up
+# If you want the program to run in the background after startup, add the -d parameter at the end of the command.
+```
+
+You can get the official docker compose file from here: [docker-compose.yml](https://raw.githubusercontent.com/HKUDS/LightRAG/refs/heads/main/docker-compose.yml). For historical versions of LightRAG docker images, visit this link: [LightRAG Docker Images](https://github.com/HKUDS/LightRAG/pkgs/container/lightrag). For more details about docker deployment, please refer to [DockerDeployment.md](./DockerDeployment.md).
+
+### Progressive Setup Recipes
+
+If you are new to LightRAG, start with the smallest working configuration and add capabilities only after the previous step is healthy:
+
+1. Minimal Docker run with hosted LLM and embedding models
+2. Add reranking to improve query quality
+3. Add multimodal parsing with MinerU and a vision-capable model
+4. Move to a GPU-backed, Docker-managed deployment with database storage
+
+The full `env.example` file remains the complete configuration reference and is used by the `make env-*` setup wizard. The snippets below intentionally show only the values that matter for each step.
+
+#### 1. Minimal Docker Run
+
+Use this path when you want the WebUI and API running first, with no external database, parser service, or local model service. Create `.env` next to `docker-compose.yml` with a minimal OpenAI-compatible configuration:
+
+```bash
+###########################
+### Server Configuration
+###########################
+PORT=9621
+WEBUI_TITLE='My First LightRAG KB'
+WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
+OLLAMA_EMULATING_MODEL_TAG=latest
+
+########################################
+### Document processing configuration
+########################################
+SUMMARY_LANGUAGE=English
+ENTITY_EXTRACTION_USE_JSON=true
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+VLM_PROCESS_ENABLE=false
+
+###########################################################################
+### LLM Configuration
+###########################################################################
+LLM_BINDING=openai
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+LLM_MODEL=gpt-5-mini
+
+KEYWORD_LLM_MODEL=gpt-5-nano
+QUERY_LLM_MODEL=gpt-5
+
+#######################################################################################
+### Embedding Configuration (do not change after the first file is processed)
+#######################################################################################
+EMBEDDING_BINDING=openai
+EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING_API_KEY=your_api_key
+EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_DIM=3072
+EMBEDDING_TOKEN_LIMIT=8192
+EMBEDDING_SEND_DIM=false
+EMBEDDING_USE_BASE64=true
+
+############################
+### Data storage selection
+############################
+LIGHTRAG_KV_STORAGE=JsonKVStorage
+LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+```
+
+Replace the model IDs with models available in your provider account when needed. Start the service and verify it before uploading documents:
+
+```bash
+docker compose up -d
+curl http://localhost:9621/health
+```
+
+Then open the WebUI at `http://localhost:9621/webui`, upload a small text or DOCX file, wait for indexing to finish, and run a `hybrid` or `mix` query.
+
+#### 2. Add Reranking
+
+Reranking is a query-time improvement. Enabling, disabling, or changing the reranker usually does not require re-indexing existing documents.
+
+For Cohere's official hosted rerank service:
+
+```bash
+RERANK_BINDING=cohere
+RERANK_MODEL=rerank-v3.5
+RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
+RERANK_BINDING_API_KEY=your_cohere_api_key
+```
+
+For a local vLLM reranker that exposes a Cohere-compatible API:
+
+```bash
+RERANK_BINDING=cohere
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+If LightRAG itself runs inside Docker and the reranker runs on the host, use a host-reachable address such as `host.docker.internal` instead of `localhost`. If the setup wizard creates the vLLM service, it injects the internal Compose service URL into `docker-compose.final.yml` for you.
+
+#### 3. Add Multimodal Parsing With MinerU Official API
+
+Use this after the basic document flow works. The MinerU official API avoids running a local parser service, but `MINERU_API_TOKEN` must be configured before the LightRAG server starts. The VLM role must use a provider/model that supports image input.
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=gpt-5-mini
+
+MINERU_API_MODE=official
+MINERU_API_TOKEN=your_mineru_api_token
+MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+MINERU_MODEL_VERSION=vlm
+MINERU_IS_OCR=false
+```
+
+This routing uses the built-in `native` parser for supported DOCX files, MinerU for other MinerU-supported files such as PDFs and images, and `legacy` as the fallback. The `i`, `t`, and `e` options enable VLM analysis for image, table, and equation sidecars when the parser produces them.
+
+For official mode, Docker does not need a host-loopback MinerU endpoint. The container only needs outbound network access to `MINERU_OFFICIAL_ENDPOINT`.
+
+#### 4. GPU All-In-One Style Deployment
+
+For a local GPU-backed deployment, let the wizard generate `.env` and `docker-compose.final.yml` instead of hand-writing every service block:
+
+```bash
+make env-base
+```
+
+Recommended answers:
+
+- Configure the main LLM as a hosted or OpenAI-compatible provider.
+- Answer `yes` to `Run embedding model locally via Docker (vLLM)?`.
+- Choose `cuda` for the embedding device.
+- Enable reranking, answer `yes` to `Run rerank service locally via Docker?`, and choose `cuda` for the rerank device.
+
+Then configure storage:
+
+```bash
+make env-storage
+```
+
+Recommended storage choices:
+
+- `LIGHTRAG_KV_STORAGE=PGKVStorage`
+- `LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage`
+- `LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage`
+- `LIGHTRAG_GRAPH_STORAGE=MemgraphStorage`
+- Answer `yes` to run PostgreSQL, Milvus, and Memgraph locally via Docker.
+- Choose `cuda` for Milvus if your host has NVIDIA GPU support and the NVIDIA Container Toolkit is installed.
+
+Finally configure server-facing settings and validate the result:
+
+```bash
+make env-server
+make env-validate
+make env-security-check
+docker compose -f docker-compose.final.yml up -d
+```
+
+Before exposing this deployment, configure authentication, API keys, and SSL in `make env-server`. The generated `.env` stays host-usable; container-only service names and Docker-specific overrides are written into `docker-compose.final.yml`.
+
+Important rules before processing production data:
+
+- Choose the embedding model, embedding dimension, and asymmetric embedding settings before the first upload. Changing them later requires clearing the affected workspace/vector data and re-indexing documents.
+- Choose storage backends before the first upload. Direct migration between storage implementations is not supported.
+- Changing `LIGHTRAG_PARSER` affects only newly uploaded files. Delete and upload an existing document again if you want it processed by a different parser route.
+
+### Nginx Reverse Proxy Configuration
+
+When using Nginx as a reverse proxy in front of LightRAG Server, you need to configure `client_max_body_size` for the `/documents/upload` endpoint to handle large file uploads. Without this configuration, Nginx will reject files larger than 1MB (the default limit) with a `413 Request Entity Too Large` error before the request reaches LightRAG.
+
+**Recommended Configuration:**
+
+```nginx
+server {
+    listen 80;
+    server_name your-domain.com;
+
+    # Global default: 8MB for LLM queries with long context
+    client_max_body_size 8M;
+
+    # Upload endpoint: 100MB for large file uploads
+    location /documents/upload {
+        client_max_body_size 100M;
+
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # Increase timeouts for large file uploads
+        proxy_read_timeout 300s;
+        proxy_send_timeout 300s;
+    }
+
+    # Streaming endpoints: LLM response streaming
+    location ~ ^/(query/stream|api/chat|api/generate) {
+        gzip off;  # Disable compression for streaming responses
+
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # Long timeout for LLM generation
+        proxy_read_timeout 300s;
+    }
+
+    # Other endpoints
+    location / {
+        proxy_pass http://localhost:9621;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+}
+```
+
+**Key Points:**
+
+1. **Global Limit (8MB)**: Sufficient for LLM queries with long conversation history and context (128K tokens ≈ 512KB + JSON overhead).
+2. **Upload Endpoint (100MB)**: Must match or exceed `MAX_UPLOAD_SIZE` in your `.env` file. The default `MAX_UPLOAD_SIZE` is 100MB.
+3. **Streaming Endpoints**: Disable gzip compression (`gzip off`) for streaming endpoints to ensure real-time response delivery. LightRAG automatically sets `X-Accel-Buffering: no` header to disable response buffering.
+4. **Timeout Settings**: Large file uploads and LLM generation require longer timeouts; adjust `proxy_read_timeout` and `proxy_send_timeout` accordingly.
+5. **Size Validation Layers**:
+   - Nginx validates the `Content-Length` header first
+   - LightRAG performs streaming validation during upload
+   - Setting appropriate limits at both layers ensures better error messages and security
+
+### Offline Deployment
+
+Official LightRAG Docker images are fully compatible with offline or air-gapped environments. If you want to build up you own  offline enviroment, please refer to [Offline Deployment Guide](./OfflineDeployment.md).
+
+### Starting Multiple LightRAG Instances
+
+There are two ways to start multiple LightRAG instances. The first way is to configure a completely independent working environment for each instance. This requires creating a separate working directory for each instance and placing a dedicated `.env` configuration file in that directory. The server listening ports in the configuration files of different instances cannot be the same. Then, you can start the service by running `lightrag-server` in the working directory.
+
+The second way is for all instances to share the same set of `.env` configuration files, and then use command-line arguments to specify different server listening ports and workspaces for each instance. You can start multiple LightRAG instances in the same working directory with different command-line arguments. For example:
+
+```
+# Start instance 1
+lightrag-server --port 9621 --workspace space1
+
+# Start instance 2
+lightrag-server --port 9622 --workspace space2
+```
+
+The purpose of a workspace is to achieve data isolation between different instances. Therefore, the `workspace` parameter must be different for different instances; otherwise, it will lead to data confusion and corruption.
+
+When launching multiple LightRAG instances via Docker Compose, simply specify unique `WORKSPACE` and `PORT` environment variables for each container within your `docker-compose.yml`. Even if all instances share a common `.env` file, the container-specific environment variables defined in Compose will take precedence, ensuring independent configurations for each instance.
+
+### Data Isolation Between LightRAG Instances
+
+Configuring an independent working directory and a dedicated `.env` configuration file for each instance can generally ensure that locally persisted files in the in-memory database are saved in their respective working directories, achieving data isolation. By default, LightRAG uses all in-memory databases, and this method of data isolation is sufficient. However, if you are using an external database, and different instances access the same database instance, you need to use workspaces to achieve data isolation; otherwise, the data of different instances will conflict and be destroyed.
+
+The command-line `workspace` argument and the `WORKSPACE` environment variable in the `.env` file can both be used to specify the workspace name for the current instance, with the command-line argument having higher priority. Here is how workspaces are implemented for different types of storage:
+
+- **For local file-based databases, data isolation is achieved through workspace subdirectories:** `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage`.
+- **For databases that store data in collections, it's done by adding a workspace prefix to the collection name:** `RedisKVStorage`, `RedisDocStatusStorage`, `MilvusVectorDBStorage`, `MongoKVStorage`, `MongoDocStatusStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage`.
+- **For Qdrant vector database, data isolation is achieved through payload-based partitioning (Qdrant's recommended multitenancy approach):** `QdrantVectorDBStorage` uses shared collections with payload filtering for unlimited workspace scalability.
+- **For relational databases, data isolation is achieved by adding a `workspace` field to the tables for logical data separation:** `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage`.
+- **For graph databases, logical data isolation is achieved through labels:** `Neo4JStorage`, `MemgraphStorage`
+- **For OpenSearch, data isolation is achieved through index name prefixes:** `OpenSearchKVStorage`, `OpenSearchDocStatusStorage`, `OpenSearchGraphStorage`, `OpenSearchVectorDBStorage`
+
+To maintain compatibility with legacy data, the default workspace for PostgreSQL is `default` and for Neo4j is `base` when no workspace is configured. For all external storages, the system provides dedicated workspace environment variables to override the common `WORKSPACE` environment variable configuration. These storage-specific workspace environment variables are: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `MEMGRAPH_WORKSPACE`, `OPENSEARCH_WORKSPACE`.
+
+### Multiple workers for Gunicorn + Uvicorn
+
+The LightRAG Server can operate in the `Gunicorn + Uvicorn` preload mode. Gunicorn's multiple worker (multiprocess) capability prevents document indexing tasks from blocking RAG queries. CPU-heavy document extraction tools should be deployed as external services so they do not block the API process.
+
+Though LightRAG Server uses one worker to process the document indexing pipeline, with the async task support of Uvicorn, multiple files can be processed in parallel. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values:
+
+```
+### Number of worker processes, not greater than (2 x number_of_cores) + 1
+WORKERS=2
+### Number of parallel files to process in one batch
+MAX_PARALLEL_INSERT=2
+### Max concurrent requests to the LLM
+MAX_ASYNC=4
+```
+
+On macOS, Gunicorn multi-worker mode also requires the Objective-C fork-safety override to be present before the Python process starts. Do not rely on `.env` for this variable; `.env` is loaded after Python startup and is too late for the Objective-C runtime:
+
+```shell
+export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
+lightrag-gunicorn --workers 2
+```
+
+### Install LightRAG as a Linux Service
+
+Create your service file `lightrag.service` from the sample file: `lightrag.service.example`. Modify the start options the service file:
+
+```text
+# Set Enviroment to your Python virtual enviroment
+Environment="PATH=/home/netman/lightrag-xyj/venv/bin"
+WorkingDirectory=/home/netman/lightrag-xyj
+# ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-server
+ExecStart=/home/netman/lightrag-xyj/venv/bin/lightrag-gunicorn
+```
+
+> The ExecStart command must be either `lightrag-gunicorn` or `lightrag-server`; no wrapper scripts are allowed. This is because service termination requires the main process to be one of these two executables.
+
+Install LightRAG service. If your system is Ubuntu, the following commands will work:
+
+```shell
+sudo cp lightrag.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl start lightrag.service
+sudo systemctl status lightrag.service
+sudo systemctl enable lightrag.service
+```
+
+## Ollama Emulation
+
+We provide Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily.
+
+### Connect Open WebUI to LightRAG
+
+After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin panel. And then a model named `lightrag:latest` will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface. You should install LightRAG as a service for this use case.
+
+Open WebUI uses an LLM to do the session title and session keyword generation task. So the Ollama chat completion API detects and forwards OpenWebUI session-related requests directly to the underlying LLM. Screenshot from Open WebUI:
+
+![image-20250323194750379](./LightRAG-API-Server.assets/image-20250323194750379.png)
+
+### Choose Query mode in chat
+
+The default query mode is `hybrid` if you send a message (query) from the Ollama interface of LightRAG. You can select query mode by sending a message with a query prefix.
+
+A query prefix in the query string can determine which LightRAG query mode is used to generate the response for the query. The supported prefixes include:
+
+```
+/local
+/global
+/hybrid
+/naive
+/mix
+
+/bypass
+/context
+/localcontext
+/globalcontext
+/hybridcontext
+/naivecontext
+/mixcontext
+```
+
+For example, the chat message `/mix What's LightRAG?` will trigger a mix mode query for LightRAG. A chat message without a query prefix will trigger a hybrid mode query by default.
+
+`/bypass` is not a LightRAG query mode; it will tell the API Server to pass the query directly to the underlying LLM, including the chat history. So the user can use the LLM to answer questions based on the chat history. If you are using Open WebUI as a front end, you can just switch the model to a normal LLM instead of using the `/bypass` prefix.
+
+`/context` is also not a LightRAG query mode; it will tell LightRAG to return only the context information prepared for the LLM. You can check the context if it's what you want, or process the context by yourself.
+
+### Add user prompt in chat
+
+When using LightRAG for content queries, avoid combining the search process with unrelated output processing, as this significantly impacts query effectiveness. User prompt is specifically designed to address this issue — it does not participate in the RAG retrieval phase, but rather guides the LLM on how to process the retrieved results after the query is completed. We can append square brackets to the query prefix to provide the LLM with the user prompt:
+
+```
+/[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge
+/mix[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge
+```
+
+## API Key and Authentication
+
+By default, the LightRAG Server can be accessed without any authentication. We can configure the server with an API Key or account credentials to secure it.
+
+* API Key:
+
+```
+LIGHTRAG_API_KEY=your-secure-api-key-here
+WHITELIST_PATHS=/health,/api/*
+```
+
+> Health check and Ollama emulation endpoints are excluded from API Key check by default. For security reasons, remove `/api/*` from `WHITELIST_PATHS` if the Ollama service is not required.
+
+The API key is passed using the request header `X-API-Key`. Below is an example of accessing the LightRAG Server via API:
+
+```
+curl -X 'POST' \
+  'http://localhost:9621/documents/scan' \
+  -H 'accept: application/json' \
+  -H 'X-API-Key: your-secure-api-key-here-123' \
+  -d ''
+```
+
+* Account credentials (the Web UI requires login before access can be granted):
+
+LightRAG API Server implements JWT-based authentication using the HS256 algorithm. To enable secure access control, the following environment variables are required:
+
+```bash
+# For jwt auth
+AUTH_ACCOUNTS='admin:{bcrypt}$2b$12$replace-with-generated-hash,user1:pass456'
+TOKEN_SECRET='your-key'
+TOKEN_EXPIRE_HOURS=4
+```
+
+Passwords without a prefix are treated as plaintext. To store a bcrypt password, prefix the generated hash with `{bcrypt}`. The easiest way to generate a value that can be pasted directly into `AUTH_ACCOUNTS` is:
+
+```bash
+lightrag-hash-password --username admin
+```
+
+The command prompts for the password and prints an `admin:{bcrypt}...` entry ready to paste into `.env`.
+
+> Currently, only the configuration of an administrator account and password is supported. A comprehensive account system is yet to be developed and implemented.
+
+If Account credentials are not configured, the Web UI will access the system as a Guest. Therefore, even if only an API Key is configured, all APIs can still be accessed through the Guest account, which remains insecure. Hence, to safeguard the API, it is necessary to configure both authentication methods simultaneously.
+
+## For Azure OpenAI Backend
+
+Azure OpenAI API can be created using the following commands in Azure CLI (you need to install Azure CLI first from [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)):
+
+```bash
+# Change the resource group name, location, and OpenAI resource name as needed
+RESOURCE_GROUP_NAME=LightRAG
+LOCATION=swedencentral
+RESOURCE_NAME=LightRAG-OpenAI
+
+az login
+az group create --name $RESOURCE_GROUP_NAME --location $LOCATION
+az cognitiveservices account create --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME  --kind OpenAI --sku S0 --location swedencentral
+az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME  --model-format OpenAI --name $RESOURCE_NAME --deployment-name gpt-4o --model-name gpt-4o --model-version "2024-08-06"  --sku-capacity 100 --sku-name "Standard"
+az cognitiveservices account deployment create --resource-group $RESOURCE_GROUP_NAME  --model-format OpenAI --name $RESOURCE_NAME --deployment-name text-embedding-3-large --model-name text-embedding-3-large --model-version "1"  --sku-capacity 80 --sku-name "Standard"
+az cognitiveservices account show --name $RESOURCE_NAME --resource-group $RESOURCE_GROUP_NAME --query "properties.endpoint"
+az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_NAME
+```
+
+The output of the last command will give you the endpoint and the key for the OpenAI API. You can use these values to set the environment variables in the `.env` file.
+
+```
+# Azure OpenAI Configuration in .env:
+LLM_BINDING=azure_openai
+LLM_BINDING_HOST=your-azure-endpoint
+LLM_MODEL=your-model-deployment-name
+LLM_BINDING_API_KEY=your-azure-api-key
+### API version is optional, defaults to latest version
+AZURE_OPENAI_API_VERSION=2024-08-01-preview
+
+### If using Azure OpenAI for embeddings
+EMBEDDING_BINDING=azure_openai
+EMBEDDING_MODEL=your-embedding-deployment-name
+```
+
+## LightRAG Server Configuration in Detail
+
+The API Server can be configured in two ways (highest priority first):
+
+* Command line arguments
+* Environment variables or .env file
+
+Most of the configurations come with default settings; check out the details in the sample file: `.env.example`. Storage configuration should also be set through environment variables or the `.env` file.
+
+### LLM and Embedding Backend Supported
+
+LightRAG supports binding to various LLM backends:
+
+* ollama
+* openai (including openai compatible)
+* azure_openai
+* lollms
+* bedrock
+* gemini
+
+LightRAG supports binding to various Embedding backends:
+
+* lollms
+* ollama
+* openai (including openai compatible)
+* azure_openai
+* bedrock
+* jina
+* gemini
+* voyageai
+
+Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type.
+
+Bedrock ignores `LLM_BINDING_API_KEY` and `EMBEDDING_BINDING_API_KEY`. Use SigV4 credentials through the AWS credential chain, or set the process-level `AWS_BEARER_TOKEN_BEDROCK` environment variable before startup for Bedrock API key / bearer-token auth:
+
+```bash
+LLM_BINDING=bedrock
+LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+LLM_MODEL=us.amazon.nova-lite-v1:0
+AWS_REGION=us-west-2
+# Use the AWS credential chain, or set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY,
+# or set AWS_BEARER_TOKEN_BEDROCK before starting the server.
+```
+
+Asymmetric embedding is explicit opt-in. Set `EMBEDDING_ASYMMETRIC=true` only when the selected embedding backend supports either provider task parameters or task prefixes. See [Asymmetric Embedding Configuration](./AsymmetricEmbedding.md) before changing these settings, because existing data must be cleared and files re-indexed after any change.
+
+For LLM and embedding configuration examples, please refer to the `env.example` file in the project's root directory. To view the complete list of configurable options for OpenAI and Ollama-compatible LLM interfaces, use the following commands:
+
+```
+lightrag-server --llm-binding openai --help
+lightrag-server --llm-binding ollama --help
+lightrag-server --llm-binding gemini --help
+lightrag-server --embedding-binding ollama --help
+lightrag-server --embedding-binding gemini --help
+```
+
+> Please use OpenAI-compatible method to access LLMs deployed by OpenRouter or vLLM/SGLang. You can pass additional parameters to OpenRouter or vLLM/SGLang through the `OPENAI_LLM_EXTRA_BODY` environment variable to disable reasoning mode or achieve other personalized controls.
+
+Set the max_tokens to **prevent excessively long or endless output loop** during the entity relationship extraction phase for Large Language Model (LLM) responses.  The purpose of setting max_tokens parameter is to truncate LLM output before timeouts occur, thereby preventing document extraction failures. This addresses issues where certain text blocks (e.g., tables or citations) containing numerous entities and relationships can lead to overly long or even endless loop outputs from LLMs. This setting is particularly crucial for locally deployed, smaller-parameter models. Max tokens value can be calculated by this formula: `LLM_TIMEOUT * llm_output_tokens/second` (i.e. `180s * 50 tokens/s = 9000`)
+
+```
+# For vLLM/SGLang doployed models, or most of OpenAI compatible API provider
+OPENAI_LLM_MAX_TOKENS=9000
+
+# For Ollama Deployed Modeles
+OLLAMA_LLM_NUM_PREDICT=9000
+
+# For OpenAI o1-mini or newer modles
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+```
+
+### Role-Specific LLM/VLM Configuration
+
+The server can use different models for different stages without changing client APIs. Four roles are supported:
+
+| Role | Purpose |
+| --- | --- |
+| `EXTRACT` | Entity/relation extraction and merge summaries |
+| `KEYWORD` | Query keyword generation before retrieval |
+| `QUERY` | Final answers, bypass queries, and Ollama-compatible chat responses |
+| `VLM` | Multimodal analysis for images, tables, equations, and similar sidecar items |
+
+If a role is not configured, it inherits the base `LLM_*` settings. Minimal same-provider example:
+
+```bash
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+EXTRACT_LLM_MODEL=gpt-5-mini
+KEYWORD_LLM_MODEL=gpt-5-nano
+QUERY_LLM_MODEL=gpt-5
+VLM_LLM_MODEL=gpt-5-mini
+```
+
+For cross-provider rules, provider-specific options such as `QUERY_OPENAI_LLM_REASONING_EFFORT`, role-level Bedrock SigV4 credentials, and queue behavior, see [Role-Specific LLM/VLM Configuration Guide](./RoleSpecificLLMConfiguration.md).
+
+### Multimodal Analysis Configuration
+
+The parser can produce sidecars for drawings/images, tables, and equations. VLM analysis only runs when both conditions are true:
+
+- The document's `process_options` contains the matching modality flag: `i` for images, `t` for tables, or `e` for equations.
+- `VLM_PROCESS_ENABLE=true` and the effective VLM binding supports image input.
+
+Current vision-capable providers are `openai`, `azure_openai`, `gemini`, `bedrock`, `ollama`, and `anthropic`; `lollms` is rejected for VLM use. Typical configuration:
+
+```bash
+VLM_PROCESS_ENABLE=true
+VLM_LLM_BINDING=openai
+VLM_LLM_MODEL=gpt-4o
+VLM_LLM_BINDING_HOST=https://api.openai.com/v1
+VLM_LLM_BINDING_API_KEY=your_vlm_api_key
+VLM_MAX_IMAGE_BYTES=5242880
+SURROUNDING_LEADING_MAX_TOKENS=2000
+SURROUNDING_TRAILING_MAX_TOKENS=2000
+```
+
+The surrounding-context budgets control how much nearby text is included in VLM and extraction prompts for a multimodal item. Parser and per-file option examples are in [Document and Chunk Processing](#document-and-chunk-processing).
+
+### Entity Extraction Configuration
+
+Entity extraction is controlled by the base or `EXTRACT` role LLM. Important server-side options:
+
+- `ENABLE_LLM_CACHE_FOR_EXTRACT`: enable LLM cache for entity extraction (default: `true`). This is useful in test environments and during reprocessing.
+- `ENTITY_EXTRACTION_USE_JSON`: request JSON-structured extraction output. In v1.5 this is recommended for reliability, but it can increase latency.
+- `ENTITY_TYPE_PROMPT_FILE`: file-name-only YAML profile for entity type guidance and examples. The file is loaded from `PROMPT_DIR/entity_type`; do not pass an absolute path here.
+- `MAX_EXTRACT_INPUT_TOKENS`: maximum token budget for one extraction input context.
+- `MAX_EXTRACTION_RECORDS`: per-response cap for total entity and relationship records.
+- `MAX_EXTRACTION_ENTITIES`: per-response cap for entity records.
+
+Example:
+
+```bash
+ENTITY_EXTRACTION_USE_JSON=true
+ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+PROMPT_DIR=/opt/lightrag/prompts
+MAX_EXTRACT_INPUT_TOKENS=20480
+MAX_EXTRACTION_RECORDS=100
+MAX_EXTRACTION_ENTITIES=40
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+```
+
+If an old `.env` still contains `ENTITY_TYPES`, remove it before startup. The server fails fast because this variable has been replaced by prompt profiles.
+
+### Storage Types Supported
+
+LightRAG uses 4 types of storage for different purposes:
+
+* KV_STORAGE: llm response cache, text chunks, document information
+* VECTOR_STORAGE: entities vectors, relation vectors, chunks vectors
+* GRAPH_STORAGE: entity relation graph
+* DOC_STATUS_STORAGE: document indexing status
+
+LightRAG Server offers various storage implementations, with the default being an in-memory database that persists data to the WORKING_DIR directory. Additionally, LightRAG supports a wide range of storage solutions including PostgreSQL, MongoDB, FAISS, Milvus, Qdrant, Neo4j, Memgraph, Redis, and OpenSearch. For detailed information on supported storage options, please refer to the storage section in the README.md file located in the root directory.
+
+**Milvus Index Configuration:** LightRAG now supports configurable index types for Milvus vector storage (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.) through environment variables. HNSW_SQ requires Milvus 2.6.8+ and provides significant memory savings. See the "Using Milvus for Vector Storage" section in the main README.md for complete configuration options.
+
+You can select the storage implementation by configuring environment variables. For instance, prior to the initial launch of the API server, you can set the following environment variable to specify your desired storage implementation:
+
+```
+LIGHTRAG_KV_STORAGE=PGKVStorage
+LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
+LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+```
+
+You cannot change storage implementation selection after adding documents to LightRAG. Data migration from one storage implementation to another is not supported yet. For further information, please read the sample `.env.example` file.
+
+### LLM Cache Migration Between Storage Types
+
+When switching the storage implementation in LightRAG, the LLM cache can be migrated from the existing storage to the new one. Subsequently, when re-uploading files to the new storage, the pre-existing LLM cache will significantly accelerate file processing. For detailed instructions on using the LLM cache migration tool, please refer to [README_MIGRATE_LLM_CACHE.md](../lightrag/tools/README_MIGRATE_LLM_CACHE.md)
+
+### LightRAG API Server Command Line Options
+
+| Parameter | Default | Description |
+| --- | --- | --- |
+| `--host` | `0.0.0.0` | Server host |
+| `--port` | `9621` | Server port |
+| `--working-dir` | `./rag_storage` | Working directory for RAG storage |
+| `--input-dir` | `./inputs` | Directory containing uploaded/input documents |
+| `--timeout` | `150` | Gunicorn worker timeout and fallback request timeout |
+| `--max-async` | `4` | Maximum concurrent LLM operations |
+| `--log-level` | `INFO` | Logging level (`DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`) |
+| `--verbose` | `False` | Verbose debug output, effective with debug logging |
+| `--key` | `None` | API key for authentication |
+| `--ssl` | `False` | Enable HTTPS |
+| `--ssl-certfile` | `None` | Path to SSL certificate file, required if `--ssl` is enabled |
+| `--ssl-keyfile` | `None` | Path to SSL private key file, required if `--ssl` is enabled |
+| `--workspace` | `""` | Default workspace for storage isolation |
+| `--api-prefix` | `""` | Reverse-proxy path prefix, also configurable with `LIGHTRAG_API_PREFIX` |
+| `--workers` | `1` | Gunicorn worker count |
+| `--llm-binding` | `ollama` | LLM binding type (`lollms`, `ollama`, `openai`, `openai-ollama`, `azure_openai`, `bedrock`, `gemini`) |
+| `--embedding-binding` | `ollama` | Embedding binding type (`lollms`, `ollama`, `openai`, `azure_openai`, `bedrock`, `jina`, `gemini`, `voyageai`) |
+| `--rerank-binding` | `null` | Rerank binding type (`null`, `cohere`, `jina`, `aliyun`) |
+
+### Reranking Configuration
+
+Reranking query-recalled chunks can significantly enhance retrieval quality by re-ordering documents based on an optimized relevance scoring model. LightRAG currently supports the following rerank providers:
+
+- **Cohere / vLLM**: Offers full API integration with Cohere AI's `v2/rerank` endpoint. As vLLM provides a Cohere-compatible reranker API, all reranker models deployed via vLLM are also supported.
+- **Jina AI**: Provides complete implementation compatibility with all Jina rerank models.
+- **Aliyun**: Features a custom implementation designed to support Aliyun's rerank API format.
+
+The rerank provider is configured via the `.env` file. Below is an example configuration for a rerank model deployed locally using vLLM:
+
+```
+RERANK_BINDING=cohere
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+Here is an example configuration for utilizing the Reranker service provided by Aliyun:
+
+```
+RERANK_BINDING=aliyun
+RERANK_MODEL=gte-rerank-v2
+RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
+RERANK_BINDING_API_KEY=your_rerank_api_key_here
+```
+
+Reranker calls have their own concurrency and timeout controls:
+
+```bash
+MAX_ASYNC_RERANK=4
+RERANK_TIMEOUT=30
+```
+
+`MAX_ASYNC_RERANK` falls back to `MAX_ASYNC` when unset. `RERANK_TIMEOUT` has an independent default because reranker requests are usually shorter than LLM generation requests. For comprehensive reranker configuration examples, including Cohere-compatible chunking options and Jina/Aliyun endpoints, refer to the `env.example` file.
+
+### Enable Reranking
+
+Reranking can be enabled or disabled on a per-query basis.
+
+The `/query` and `/query/stream` API endpoints include an `enable_rerank` parameter, which is set to `true` by default, controlling whether reranking is active for the current query. To change the default value of the `enable_rerank` parameter to `false`, set the following environment variable:
+
+```
+RERANK_BY_DEFAULT=False
+```
+
+### Include Chunk Content in References
+
+By default, the `/query` and `/query/stream` endpoints return references with only `reference_id` and `file_path`. For evaluation, debugging, or citation purposes, you can request the actual retrieved chunk content to be included in references.
+
+The `include_chunk_content` parameter (default: `false`) controls whether the actual text content of retrieved chunks is included in the response references. This is particularly useful for:
+
+- **RAG Evaluation**: Testing systems like RAGAS that need access to retrieved contexts
+- **Debugging**: Verifying what content was actually used to generate the answer
+- **Citation Display**: Showing users the exact text passages that support the response
+- **Transparency**: Providing full visibility into the RAG retrieval process
+
+**Important**: The `content` field is an **array of strings**, where each string represents a chunk from the same file. A single file may correspond to multiple chunks, so the content is returned as a list to preserve chunk boundaries.
+
+**Example API Request:**
+
+```json
+{
+  "query": "What is LightRAG?",
+  "mode": "mix",
+  "include_references": true,
+  "include_chunk_content": true
+}
+```
+
+**Example Response (with chunk content):**
+
+```json
+{
+  "response": "LightRAG is a graph-based RAG system...",
+  "references": [
+    {
+      "reference_id": "1",
+      "file_path": "/documents/intro.md",
+      "content": [
+        "LightRAG is a retrieval-augmented generation system that combines knowledge graphs with vector similarity search...",
+        "The system uses a dual-indexing approach with both vector embeddings and graph structures for enhanced retrieval..."
+      ]
+    },
+    {
+      "reference_id": "2",
+      "file_path": "/documents/features.md",
+      "content": [
+        "The system provides multiple query modes including local, global, hybrid, and mix modes..."
+      ]
+    }
+  ]
+}
+```
+
+**Notes**:
+- This parameter only works when `include_references=true`. Setting `include_chunk_content=true` without including references has no effect.
+- **Breaking Change**: Prior versions returned `content` as a single concatenated string. Now it returns an array of strings to preserve individual chunk boundaries. If you need a single string, join the array elements with your preferred separator (e.g., `"\n\n".join(content)`).
+
+### .env Examples
+
+The examples below are reference snippets for tuning existing deployments. For a first run, follow [Progressive Setup Recipes](#progressive-setup-recipes) instead of copying the entire `env.example` file by hand.
+
+```bash
+### Server Configuration
+# HOST=0.0.0.0
+PORT=9621
+WORKERS=2
+# LIGHTRAG_API_PREFIX=/site01
+
+### Settings for document indexing
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+ENTITY_EXTRACTION_USE_JSON=true
+# ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+# MAX_EXTRACT_INPUT_TOKENS=20480
+# MAX_EXTRACTION_RECORDS=100
+# MAX_EXTRACTION_ENTITIES=40
+SUMMARY_LANGUAGE=Chinese
+MAX_PARALLEL_INSERT=2
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+# CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
+# CHUNK_P_SIZE=2000
+
+### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+TIMEOUT=150
+MAX_ASYNC=4
+
+LLM_BINDING=openai
+LLM_MODEL=gpt-4o-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your-api-key
+KEYWORD_LLM_MODEL=gpt-4o-mini
+QUERY_LLM_MODEL=gpt-4o
+
+### Optional VLM configuration for documents using i/t/e process options
+VLM_PROCESS_ENABLE=false
+# VLM_LLM_MODEL=gpt-4o
+# VLM_MAX_IMAGE_BYTES=5242880
+# SURROUNDING_LEADING_MAX_TOKENS=2000
+# SURROUNDING_TRAILING_MAX_TOKENS=2000
+
+### Optional reranker configuration
+RERANK_BINDING=null
+# MAX_ASYNC_RERANK=4
+# RERANK_TIMEOUT=30
+
+### Embedding Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
+# see also env.ollama-binding-options.example for fine tuning ollama
+EMBEDDING_MODEL=bge-m3:latest
+EMBEDDING_DIM=1024
+EMBEDDING_BINDING=ollama
+EMBEDDING_BINDING_HOST=http://localhost:11434
+# Optional asymmetric embedding for prefix-based models:
+# EMBEDDING_ASYMMETRIC=true
+# EMBEDDING_QUERY_PREFIX="search_query: "
+# EMBEDDING_DOCUMENT_PREFIX="search_document: "
+# Use NO_PREFIX for a side that should intentionally have no prefix.
+
+### For JWT Auth
+# AUTH_ACCOUNTS='admin:{bcrypt}$2b$12$replace-with-generated-hash,user1:pass456'
+# TOKEN_SECRET=your-key-for-LightRAG-API-Server-xxx
+# TOKEN_EXPIRE_HOURS=48
+
+# LIGHTRAG_API_KEY=your-secure-api-key-here-123
+# WHITELIST_PATHS=/api/*
+# WHITELIST_PATHS=/health,/api/*
+```
+
+## Document and Chunk Processing
+
+v1.5 introduces a staged document pipeline. Files first go through a content extraction engine, optional multimodal analysis, text chunking, and then entity/relation extraction unless the file disables knowledge graph construction.
+
+### Quick Recipes
+
+Keep v1.4-compatible behavior:
+
+```bash
+LIGHTRAG_PARSER=*:legacy-F
+```
+
+Recommended starting point without external parser services:
+
+```bash
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+```
+
+This uses the built-in `native` parser for supported files, enables table/equation sidecar analysis options for those files, uses paragraph semantic chunking where possible, and falls back to legacy extraction plus recursive chunking for other files.
+
+Full multimodal setup with the MinerU official API and a VLM:
+
+```bash
+LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+VLM_PROCESS_ENABLE=true
+VLM_LLM_MODEL=gpt-4o
+MINERU_API_MODE=official
+MINERU_API_TOKEN=your_mineru_api_token
+MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+MINERU_MODEL_VERSION=vlm
+MINERU_IS_OCR=false
+```
+
+Use `DOCLING_ENDPOINT=http://localhost:5001` when routing files to `docling`.
+
+### Parser Engines and Routing
+
+`LIGHTRAG_PARSER` defines default extraction rules by file extension. Rules are matched left to right and can be separated by commas or semicolons:
+
+```bash
+LIGHTRAG_PARSER=pdf:mineru-R,docx:native-ietP,*:legacy-R
+```
+
+Supported engines:
+
+| Engine | Use case |
+| --- | --- |
+| `legacy` | Original extraction behavior. Good for compatibility and simple text-like files. |
+| `native` | Built-in structured parser, currently focused on `.docx` and LightRAG Document sidecars. |
+| `mineru` | External MinerU parser for PDFs, Office files, and images. Requires `MINERU_API_MODE` plus `MINERU_LOCAL_ENDPOINT` or `MINERU_API_TOKEN`. |
+| `docling` | External docling-serve parser for PDFs, Office files, Markdown/HTML, and images. Requires `DOCLING_ENDPOINT`. |
+
+Filename hints override the default rule for one uploaded file:
+
+```text
+paper.[mineru-iteP].pdf
+memo.[native-R!].docx
+notes.[-R].md
+```
+
+The `/documents/upload` and `/documents/scan` paths honor filename hints and `LIGHTRAG_PARSER`. The `/documents/text` and `/documents/texts` endpoints insert already-provided text and currently use fixed chunking on the server path.
+
+### Processing Options
+
+Processing options are appended after the engine with a hyphen, or supplied alone in a filename hint with `[-OPTIONS]`.
+
+| Option | Meaning |
+| --- | --- |
+| `i` | Run VLM analysis for image/drawing sidecars when present |
+| `t` | Run VLM analysis for table sidecars when present |
+| `e` | Run VLM analysis for equation sidecars when present |
+| `!` | Skip entity/relation extraction and graph writes; chunk vectors are still stored |
+| `F` | Fixed token chunking, the legacy chunking method |
+| `R` | Recursive character chunking with configurable separator cascade |
+| `V` | Semantic vector chunking; oversize chunks are re-split by `R` |
+| `P` | Paragraph semantic chunking for structured LightRAG Document content; falls back to `R` when structured content is unavailable |
+
+At most one of `F`, `R`, `V`, and `P` should be selected for a file. Chunker parameters are configured with `CHUNK_SIZE`, `CHUNK_OVERLAP_SIZE`, and strategy-specific variables such as `CHUNK_R_SEPARATORS`, `CHUNK_V_BREAKPOINT_THRESHOLD_TYPE`, `CHUNK_P_SIZE`, and `CHUNK_P_OVERLAP_SIZE`. These values are read at server startup and stored as a per-document `chunk_options` snapshot when a document is enqueued.
+
+For the full routing syntax, supported extensions, parser cache behavior, chunker configuration, concurrency rules, and Python SDK differences, see [File Processing Pipeline Specification](./FileProcessingPipeline.md). For the `P` strategy details, see [Paragraph Semantic Chunking](./ParagraphSemanticChunking.md). To debug parser output before indexing a file, see [Parser Debug CLI](./ParserDebugCLI.md).
+
+### Pipeline Concurrency
+
+`MAX_PARALLEL_INSERT` controls how many files are processed in parallel. `MAX_ASYNC` controls concurrent LLM calls, including extraction, merging, query keyword generation, and final answer generation. Optional staged-pipeline variables such as `MAX_PARALLEL_PARSE_NATIVE`, `MAX_PARALLEL_PARSE_MINERU`, `MAX_PARALLEL_PARSE_DOCLING`, and `MAX_PARALLEL_ANALYZE` can be used for parser-heavy deployments.
+
+Uploads and text inserts can be accepted while the processing loop is busy; the running loop is nudged to pick up the new pending work. Destructive jobs such as document clear/delete and the classification phase of `/documents/scan` still reject concurrent enqueues to protect storage consistency. Failed files can be reprocessed from the WebUI or by triggering `/documents/scan`.
+
+## API Endpoints
+
+All supported backends (`lollms`, `ollama`, `openai` / OpenAI-compatible, `azure_openai`, `bedrock`, and `gemini`) expose the same LightRAG REST API surface. When the API Server is running, visit:
+
+- Swagger UI: http://localhost:9621/docs
+- ReDoc: http://localhost:9621/redoc
+
+You can test the API endpoints using the provided curl commands or through the Swagger UI interface. Make sure to:
+
+1. Start the appropriate backend service or confirm the hosted provider credentials
+2. Start the RAG server
+3. Upload some documents using the document management endpoints
+4. Query the system using the query endpoints
+5. Trigger document scan if new files are put into the inputs directory
+
+The `/health` endpoint reports operational state and selected configuration, including role LLM configuration, LLM/embedding/rerank queue status, workspace/storage workspace mapping, VLM enablement, rerank enablement, and pipeline busy/scanning/destructive status.
+
+## Asynchronous Document Indexing with Progress Tracking
+
+LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring.
+
+**API Endpoints Supporting Track ID Generation:**
+
+* `/documents/upload`
+* `/documents/text`
+* `/documents/texts`
+
+**Document Processing Status Query Endpoint:**
+* `/documents/track_status/{track_id}`
+
+This endpoint provides comprehensive status information including:
+* Document processing status (pending/processing/processed/failed)
+* Content summary and metadata
+* Error messages if processing failed
+* Timestamps for creation and updates

+ 399 - 0
docs/LightRAGSidecarFormat-zh.md

@@ -0,0 +1,399 @@
+# LightRAG Sidecar 文件格式说明
+
+本文介绍内解析引擎输出的**LightRAG Sidecar**文件格式。LightRAG 在使用native/mineru/docling这些支持多模态内容解析引擎提取文件内容的时候,会把"正文 + 多模态对象 + 解析元数据"拆开写到一个 `*.parsed/` 目录中,目录内的每个 JSON / JSONL 文件统称为 **sidecar** 文件。Sidecar 是后续流水线(多模态分析 → 多模态 chunk 构造 → 实体抽取 → 文档删除时的缓存清理)唯一可靠的依据。Sidecar的文件格式是LightRAG内置的通用文件交换格式,新的多模态内容提取引擎都需要遵循这个格式。公开**LightRAG Sidecar**文件格式的目的是给社区开发者编写字节的内容解析引擎提供方便。
+
+## 一、概述
+
+| 关注点 | 文件 | 存放内容 | 说明 |
+|---|---|---|---|
+| 主文件 | `<doc>.blocks.jsonl` | 存放 Block 正文 | 所有 Block 的 content字段内容拼接后形成完整的原文 |
+| 图形对象 | `<doc>.drawings.json` | 文件中抽取出来的图形对象 | 送VLM进行分析后回填分析结果 |
+| 表格对象 | `<doc>.tables.json` | 文件中抽取出来的表格对象 | 送LLM进行分析后回填分析结果 |
+| 公式对象 | `<doc>.equations.json` | 文件中抽取出来的公司对象 | 送LLM进行分析后回填分析结果 |
+| 原始图像资源 | `<doc>.blocks.assets/` | 文件中抽取出来的图片原始文件 | 送VLM进行图片分析 |
+
+Sidecar 的设计意图:
+
+- 解析阶段 内容提取引擎(native/mineru/docling) **只**负责生成 `blockid / heading / content / surrounding` 等"客观"字段;
+- 多模态分析阶段 (`analyze_multimodal`) 由 LightRAG 写入分析结果 `llm_analyze_result` 字典,可能是首次追加,也可能覆盖已有结果;解析器不应预先填充该字段
+
+## 二、目录布局
+
+```
+inputs/space1/__parsed__/<规范文件名>.parsed/
+├── <规范文件名>.blocks.jsonl        正文块序列 + 文档级 meta(首行)
+├── <规范文件名>.drawings.json       图形 sidecar(dict 容器,键 = 图形 id)
+├── <规范文件名>.tables.json         表格 sidecar
+├── <规范文件名>.equations.json      公式 sidecar
+└── <规范文件名>.blocks.assets/      原始资源目录(存放drawings.json中的图片文件放这里)
+    ├── image1.wmf
+    ├── image2.wmf
+    ├── image3.wmf
+    ├── image4.png
+    ├── image5.png
+    ├── image6.png
+    └── image7.emf
+```
+
+## 三、blocks.jsonl
+
+`blocks.jsonl` 是按行序列化的 JSON,**第一行 `type="meta"`**,其余每行是一个内容块 `type="content"`。
+
+### 3.1 meta 行实例
+
+```json
+{
+  "type": "meta",
+  "format": "lightrag",
+  "version": "1.0",
+  "document_name": "m012-manual.docx",
+  "document_format": "docx",
+  "document_hash": "sha256:4840...3f9543d9db0822d2d59",
+  "table_file": true,
+  "equation_file": true,
+  "drawing_file": true,
+  "asset_dir": true,
+  "split_option": { "fixlevel": 0 },
+  "blocks": 39,
+  "doc_id": "doc-f1bee60173d067d88595c00e7d9b0ce5",
+  "parse_engine": "native",
+  "parse_time": "2026-05-13T18:42:25.943490+00:00",
+  "doc_title": "m012-manual"
+}
+```
+
+| 字段 | 类型 | 说明 |
+|---|---|---|
+| `type` | `"meta"` | 行类型,固定值,校验位 |
+| `format` | `"lightrag"` | sidecar 大版本族标识 |
+| `version` | `str` | sidecar schema 版本 |
+| `document_name` | `str` | 规范文件名(含后缀,不含处理指示) |
+| `document_format` | `str` | 文件格式(目前以文件后缀表示) |
+| `document_hash` | `"sha256:<hex>"` | sidecar 正文指纹,定义为 `SHA-256(merged_text)`,其中 `merged_text` 是所有非空 content 行的 `content` 字段按 `"\n\n"` 拼接后的字符串。供外部消费者快速判断两份 `.parsed/` 是否同源(不必逐行比对 body),并作为 sidecar 文件的自描述内容校验位。注意:LightRAG 入库流水线本身不读此字段,跨文档去重由 `doc_status.content_hash` 单独承担 |
+| `table_file` / `equation_file` / `drawing_file` | `bool` | 是否存在对应 sidecar 文件(为真时对应文件必然存在) |
+| `asset_dir` | `bool` | 是否存在`blocks.assets`资源目录 |
+| `split_option` | `object` | 文件提取时的分块参数。此字段留给文件提取引擎自己记录和使用 |
+| `blocks` | `int` | content 行数(不含 meta) |
+| `doc_id` | `"doc-<md5>"` | 文档全局 id。sidecar item id(`im-/tb-/eq-`)使用 `doc_id` 去掉 `doc-` 前缀后的哈希部分,以缩短嵌入正文中的占位标签 |
+| `parse_engine` | `str` | 解析引擎`native/mineru/docling/legacy` |
+| `parse_time` | `str` | 解析完成时间; 格式:ISO-8601 UTC |
+| `doc_title` | `str` | 文档标题(通常为首个 H1);可选 |
+| `doc_summary` | `str` | 文档摘要;可选 |
+| `doc_attributes` | `object` | 文章扩展属性对象;可选 |
+| `bbox_attributes` | `object` | bbox possition全局属性;详见[§八](八、positions) |
+
+> LightRAG要求同一个workspace(知识库)内的文件名(document_name)必须唯一。
+
+### 3.2 content 行
+
+每个 content 行是一个原始文档"块"的最小可寻址单位,至少包含:
+
+```json
+{
+  "type": "content",
+  "blockid": "462c6364584a7ba4bdae6853f85ac429",
+  "format": "plain_text",
+  "content": "1 产品用途和功能\nMI012模块用于支撑供氧抗荷调节器的供氧抗荷控制功能...",
+  "heading": "1 产品用途和功能",
+  "parent_headings": [],
+  "level": 1,
+  "session_type": "body",
+  "table_slice": "none",
+  "positions": [
+    {
+      "type": "paraid",
+      "range": ["5EA4577A", "6555DDCB"]
+    }
+  ]
+}
+```
+
+| 字段 | 含义 |
+|---|---|
+| `type` | `"content"` |
+| `blockid` | 全局唯一的Block ID |
+| `format` | 内容形态,目前固定为 `"plain_text"` |
+| `content` | 文本内容;**公式和图片此以占位标签出现,表格以带table标签的JSON或HTLM格式出现**(见 3.3) |
+| `heading` | content所在章节的最高层级标题;heading真实存在时,应该同时出现在content的开头;如果heading之后紧接着下一个层级的heading,则把下一个层级的heading正文看待。这样做的目的是需要保证所有 Block 的 content字段内容拼接后形成完整的原文。 |
+| `parent_headings` | 字符串数组: 自顶向下的祖先标题列表,不含当前 `heading` |
+| `level` | 整数: `heading` 在文档大纲中的层级(`1` = H1 / 一级标题,0表示无标题) |
+| `session_type` | Block所处区域:`body` `preface` `TOC` `references` `appendix` |
+| `table_slice` | 可选保留字段;表示Block是否仅包括表格片段。目前分析引擎不会拆分长表格。因此本字段固定为 `"none"`(表示表格不会被分片) |
+| `table_header` | 可选保留字段;在当前块位表格片段的时候,保存识别出来的表格头。目前不存在 |
+| `positions` | `position` 对象数组:标识文本块的版面位置;文本块来与版面的多个位置的时候,则会出现多个`position` 对象。参见[§八](#八、position) |
+
+> - blockid计算方式:`md5(doc_id + ":" + block_index + ":" + heading + ":" + content)`。文档经过分块策略处理得到的 chunk 将保存 blockid 用于溯源 chunk 在s idecar 中的位置。
+> - 不关系文档章节结构的分块策略 `F` `R` `V` 使用的就是 content 字段拼接后的内容进行分块。因此需要保证所有 Block 的 content字段合并在一起能够构成完整的文档内容,不会缺少内容,不会出现重叠的内容。
+
+### 3.3 content 内嵌占位标签
+
+为了让 P 分块策略在不破坏多模态对象的前提下对正文做切分,`content` 文本里使用如下三种 XML 风格的占位标签:
+
+| 标签 | 含义 | 标签属性 |
+|---|---|---|
+| `<table id="tb-…" format="json">…</table>` | 表格占位,包体是表格原始 JSON / HTML | `id` 指向 `tables.json` 里对应 item;`format` ∈ `json` / `html` |
+| `<drawing id="im-…" format="png" path="…" src="…" caption="…" />` | 自闭合图形占位 | `id` 指向 `drawings.json`;`path` 相对 `*.parsed/` 目录;`src` 是原文档里的引用名 |
+| `<equation id="eq-…" format="latex" caption="…">…</equation>` | 公式占位 | 行内公式同样用 `<equation format="latex">` 但**不**带 `id`,不会进 sidecar; 仅块公式(独占一行或多行)时携带 `id` |
+
+在实体关系抽取的时候喂给大模型的文本会把 `id / path / src` 等内部属性剥掉,但为保留键属性(`format / caption`)。目的是避免抽取出文章不可见的实体,给抽取结果注入过多的噪声。
+
+### 3.4 blockid 与 chunk sidecar.refs 的对应
+
+葛总分块策略在sidecar文件存在时,会在其输出的每个 chunk 都会带上 `sidecar = {"type": "block", "id": <主来源 blockid>, "refs": [{"type": "block", "id": <blockid>}, …]}`,其中:
+
+- 未合并的 chunk → `sidecar.refs` 只有一个元素,等于该 chunk 来自的 blocks.jsonl 行的 `blockid`;
+- Stage D 合并后的 chunk → `refs` 顺序保留所有来源 `blockid`(去重);
+- hard fallback split 后的子 chunk → 共享父 chunk 的 `sidecar`。
+
+这条链路是文档级追溯(chunk ↔ block ↔ 原段落 paraId)的基础。
+
+## 四、drawings.json
+
+顶层是 `{"version": "1.0", "drawings": { <id>: <item>, … }}` 形态的 dict 容器,**键 = `id` 字段**,便于按 id 查找。每个 item 形如:
+
+```json
+{
+  "id": "im-f1bee60173d067d88595c00e7d9b0ce5-0004",
+  "blockid": "2f52b70839d13a936d97955916820147",
+  "heading": "2.3 结构尺寸及重量",
+  "format": "png",
+  "path": "m012-manual.blocks.assets/image4.png",
+  "src": "",
+  "caption": "",
+  "footnotes": [],
+  "extras": {
+    "ocr_texts": "图内第一段 OCR 文本\n\n图内第二段 OCR 文本",
+    "ocr_texts_count": 2
+  },
+  "surrounding": {
+    "leading": "2.3 结构尺寸及重量\n尺寸及重量要求如下:\na) 外廓尺寸长度为:<drawing …",
+    "trailing": "\n图1 外廓尺寸示意\nb) 重量不大于0.85kg。\nc) 测试结果:实测电路噪声Vpp=1.526mV…"
+  },
+  "llm_analyze_result": {
+    "name": "产品外廓尺寸工程图纸",
+    "type": "Illustration",
+    "description": "该图纸为产品的外廓尺寸示意图,展示了一个电子设备或电源模块的三视图设计…",
+    "analyze_time": 1778697752,
+    "status": "success",
+    "message": ""
+  },
+  "llm_cache_list": [
+    "default:analysis:fcf4c4f88227ee1c1bf0ed4394039e37"
+  ]
+}
+```
+
+| 字段 | 说明 |
+|---|---|
+| `id` | `im-<doc_hash>-<NNNN>` 形式(`doc_hash` 为 `doc_id` 去掉 `doc-` 前缀后的 32 位 md5) |
+| `blockid` | 指向产生该图形的 content 行 |
+| `heading` | 所在章节标题 |
+| `format` | 原始扩展名(去点):`png` / `jpeg` / `gif` / `webp` / `wmf` / `emf` / … |
+| `path` | 相对 `*.parsed/` 目录的资源路径,**永远**指向 `*.blocks.assets/` 内文件 |
+| `src` | 原文档里图形的引用别名(多数情况下为空) |
+| `caption` | 可见标题(解析器可能留空) |
+| `footnotes` | 脚注字符串列表 |
+| `surrounding` | 上下文对象:参见[§七](#七、surrounding) |
+| `self_ref` | 字符串:可选;解析引擎原始输出中的对象引用(如 Docling JSON Pointer `#/pictures/3`,或 MinerU `content_list.json#/23`),用于溯源时回查原始解析产物中的对应对象(页面位置、原始结构等)。native 等不提供此字段时不输出 |
+| `extras` | 对象:可选;引擎专属的旁路字段(如图片中包含的OCR文字等)。不属于 spec 校验范围,下游消费者不应依赖具体键。 |
+| `llm_analyze_result` | 模态分析结果对象:详见 [§九](#九、`llm_analyze_result`) (后续会注入到多模态文本块) |
+| `llm_cache_list` | 模态分析LLM缓存数组(后续会注入到多模态文本块) |
+
+`extras` 中常见的 drawing 专属键:
+
+| 键 | 说明 |
+|---|---|
+| `ocr_texts` | 字符串:可选;图形对象内部 OCR 文本,多个段落用空行(`\n\n`)拼接。仅当解析引擎显式把 OCR 文本挂在该 drawing 的 children 下时输出;caption / footnote 不进入此字段。 |
+| `ocr_texts_count` | 整数:可选;写入 `ocr_texts` 的非空 OCR 段落数量。 |
+
+**只有图形支持的 raster 格式(png / jpeg / gif / webp)才会进入 VLM 分析**;其他格式(wmf / emf / svg 等)写 `llm_analyze_result.status="skipped"`,下游不生成多模态 chunk,文档继续处理。图片大小超过环境变量`VLM_MAX_IMAGE_BYTES`规定的大小后,图片同样不会进入VLM分析。
+
+> 图片的大小、DPI等信息统一放进 `extras` 对象;不要在 item 顶层引入未声明的字段(比如 `image` / `img_path` 等)。tables / equations 也遵循同样的 `extras` 约定。`self_ref` 是 spec 顶层声明的可选字段,不属于 extras 范围。
+
+## 五、tables.json
+
+顶层是 `{"version": "1.0", "tables": { <id>: <item>, ... }}` 形态的 dict 容器,**键 = `id` 字段**,便于按 id 查找。每个 item 形如:
+
+```json
+{
+  "id": "tb-f1bee60173d067d88595c00e7d9b0ce5-0007",
+  "blockid": "3f33897b5e105d254addc655f1efbf8c",
+  "heading": "2.4.4 温度-湿度-高度(随系统进行)",
+  "dimension": [16, 8],
+  "format": "json",
+  "content": "[[\"试验步骤\", \"温度(℃)\", \"高度(m)\", \"相对湿度\", \"时间(min)\", \"辅助冷却\", \"系统电源\", \"功能、性能检查\"],…",
+  "caption": "",
+  "footnotes": [],
+  "table_header": "[[\"试验步骤\", \"温度(℃)\", \"高度(m)\", \"相对湿度\", \"时间(min)\", \"辅助冷却\", \"系统电源\", \"功能、性能检查\"]]"
+  "surrounding": {
+    "leading": "2.4.4 温度-湿度-高度(随系统进行)\n产品应能承受执行任务期间的温度、湿度、高度环境综合作用…",
+    "trailing": "\n注:以上步骤重复10个循环。a成品及附件达到温度稳定或240min,以长者为准;b成品及附件达到温度稳定或120min,以长者为准。…"
+  },
+  "llm_analyze_result": {
+    "name": "文档管理元数据表",
+    "description": "这是一份文档管理信息表,用于记录技术文档的基本元数据和版本控制信息 …",
+    "analyze_time": 1778697759,
+    "status": "success",
+    "message": ""
+  },
+  "llm_cache_list": [
+    "default:analysis:b316aacd40fdca0cb56430870bb89a62"
+  ]
+}
+```
+
+tables.json 文件的 `blockid` `heading` `surrounding` `llm_analyze_result` 字段与drawings.json相同。不同或新添加的字段说明如下:
+
+| 字段 | 说明 |
+|---|---|
+| `id` | `tb-<doc_hash>-<NNNN>` 形式(`doc_hash` 为 `doc_id` 去掉 `doc-` 前缀后的 32 位 md5) |
+| `dimension` | 整数数组:`[num_rows, num_cols]`,包含表头行 |
+| `format` | `"json"` (二维数组) 或 `"html"` (负载 `<table>…</table>` 片段,含起止标签) |
+| `content` | 字符串:表格正文,按 `format` 决定结构;这是后续多模态 chunk 真正使用的字符串。 |
+| `table_header` | 字符串:可选;识别出来的作为表格头的行内容 |
+| `self_ref` | 可选;解析引擎原始输出中的对象引用(如 Docling JSON Pointer `#/tables/2`,或 MinerU `content_list.json#/31`),用于溯源时回查原始解析产物 |
+
+在模态分析阶段,如果`content`字段长度超过大模型的上下文长度时,表格内容会被机械地截断后在喂给模型。
+
+## 六、equations.json
+
+顶层是 `{"version": "1.0", "equations": { <id>: <item>, ... }}` 形态的 dict 容器,**键 = `id` 字段**,便于按 id 查找。每个 item 形如:
+
+```json
+{
+  "id": "eq-f1bee60173d067d88595c00e7d9b0ce5-0001",
+  "blockid": "2f52b70839d13a936d97955916820147",
+  "heading": "2.3 结构尺寸及重量",
+  "format": "latex",
+  "content": "C=2∗\\frac{P∗T}{\\left( {V}_{H}^{2}−{V}_{L}^{2} \\right)∗η}",
+  "caption": "",
+  "footnotes": [],
+  "surrounding": {
+    "leading": "2.3 结构尺寸及重量\n尺寸及重量要求如下:\n …",
+    "trailing": "\n其中P为供电异常时维持的功率28W,T为期望储能时间,V<sub>H</sub>为电容放电前…"
+  },
+  "llm_analyze_result": {
+    "name": "电容储能时间计算公式",
+    "description": "该公式用于计算在电源异常情况下维持系统正常工作所需的电容储能值 …",
+    "analyze_time": 1778697783,
+    "status": "success",
+    "message": "",
+    "equation": "C=2\\cdot\\frac{P\\cdot T}{(V_{H}^{2}-V_{L}^{2})\\cdot\\eta}"
+  },
+  "llm_cache_list": [
+    "default:analysis:fcf4c4f88227ee1c1bf0ed4394039e37"
+  ]
+}
+```
+
+equations.json 文件的 `blockid` `heading` `surrounding` `llm_analyze_result` 字段与drawings.json相同。不同或新添加的字段说明如下:
+
+| 字段 | 说明 |
+|---|---|
+| `id` | `eq-<doc_hash>-<NNNN>` 形式(`doc_hash` 为 `doc_id` 去掉 `doc-` 前缀后的 32 位 md5) |
+| `format` | 固定为 `"latex"` |
+| `content` | 字符串:是**原始** LaTeX(可能包含 Unicode 运算符、外层 `\[ \]`),不包含两头的`$`分割符;模态分析阶段直接读这里 |
+| `self_ref` | 可选;解析引擎原始输出中的对象引用(如 Docling JSON Pointer `#/texts/15`,或 MinerU `content_list.json#/45`),用于溯源时回查原始解析产物 |
+| `llm_analyze_result.equation` | 字符串:是大模型输出的**规范化**后的 LaTeX公式(外层 `$ / \[ \] / equation` 环境,Unicode 转 LaTeX,不包含联投的`$`分割符),这是后续多模态 chunk 真正使用的字符串; |
+
+在模态分析阶段,如果`content`字段长度超过大模型的上下文长度时,表格内容会被机械地截断后在喂给模型。行内公式(与正文连续的 `<equation format="latex">…</equation>`)**不会**保存到 equations.json 文件,它仅会在 blocks 文本里以无 `id` 形式留存。这样做的目的是避免给抽取结果注入过多的噪音。
+
+## 七、surrounding
+
+`surrounding.leading` 和 `surrounding.trailing` 是 sidecar item 的可分析上下文窗口,目的是提供图片、表格和公式所在段落的上下文信息,提高多模态分析的质量。**surrounding内容有LightRAG在分析阶段自动注入,不需要在文档解析引擎中主动写入sidecar文件中**。以下是surrounding内容的生成逻辑:
+
+- 取自同一 `blockid` 对应的 content 行文本,以多模态占位标签的位置为切分点;
+- 每一侧的 token 上限由环境变量 `SURROUNDING_LEADING_MAX_TOKENS` / `SURROUNDING_TRAILING_MAX_TOKENS` 控制(缺省 `2000`,可独立调整);按 tokenizer 截断,倾向保留靠近目标的句子;
+- 文本中保留**同行其他**多模态对象的占位标签,这让模型能感知"图 1 之后还有公式 1"这种上下文;但解析器内部标识符(`id` / `path` / `src` / `refid`)已被 `strip_internal_multimodal_markup_for_extraction` 剥离 —— 与 chunk content 实体抽取前的清理一致,避免噪声进入 VLM/LLM prompt。具体清理规则:
+  - `<drawing id="im-…" path="…" src="…" caption="Fig 1" />` → `<drawing caption="Fig 1" />`;**没有 caption 的 drawing 整段移除**(标签不再携带任何对模型可见的信息);
+  - `<table id="tb-…" format="json" caption="…">rows</table>` → `<table format="json" caption="…">rows</table>`;
+  - `<equation id="eq-…" format="latex">body</equation>` → `<equation format="latex">body</equation>`;
+  - `<cite type="table" refid="tb-…">表 1</cite>` → `<cite type="table">表 1</cite>`;`<cite type="equation" refid="eq-…">公式 2</cite>` → `<cite type="equation">公式 2</cite>`。仅删 `refid` 属性,保留 `<cite type="…">…</cite>` 包装 —— 让 VLM/LLM 能识别"这是对其他表/公式的引用"而非普通的文本,同时屏蔽 LLM 看不到的解析器内部 id;
+    - 例外:`tables.json` 类型的 surrounding 在 strip 之前先走 `remove_table_tags`,把所有 `<cite type="table">` 整段移除(分析目标表时不希望被对其他表的悬挂引用干扰);
+- 清理发生在 token 预算截断**之前**:token 数按"LLM 实际看到的内容"统计,且截断点不会落到未清理的 `id="…"` 属性中间,避免标签结构残缺;
+- 当目标对象本身位于 block 起点 / 终点时,对应一侧为 `""` 而不是 `"n/a"`(提示词组装时再把空字符串显示为 `n/a`);
+- `enrich_sidecars_with_surrounding` 是幂等的:每次 `analyze_multimodal` 入口都会重新计算并覆盖 `surrounding`,因此修改 `SURROUNDING_LEADING_MAX_TOKENS` / `SURROUNDING_TRAILING_MAX_TOKENS` 后无需手动清理 sidecar,重新执行多模态分析即可按新预算重写。
+
+## 八、positions
+
+`positions`是一个对象数组,用于标识`blockid`的内容来之文件中的哪一个文字,用于内容溯源的时候能够在原始文件中找到和显示对应的内容。当`blockid`的内容是由版面的多个栏目合并而成时,会出现多个`position` 对象,每个`position` 对象对应1个版面方框或栏目。为了适应不同的文档格式的内容定位方式,系统提供了以下几种`position` 对象对象类型。
+
+`position` 对象有多种类型,对象的`type`字段决定了其类型:
+
+* paraid
+
+适用于docx格式文件;按`段落id`(paraid)定位内容。`rang`字段指定起止`段落id`;`charspan`为可选字段,指定内容从段落的m个字符开始到底n个字符结束。不提供`charspan`表示`blockid`为起止段落的全部内容。示例:
+
+```
+"positions": [
+{
+    "type": "paraid",
+    "range": ["5EA4577A", "6555DDCB"]
+    "charspan": [10,999]
+}]
+```
+
+* bbox
+
+适用于与PDF格式类似的文件,通过页面矩形位置来标定内容来源的原始位置。bbox支持一下字段:
+
+```
+origin: 矩形坐标相对于页面那个位置(可选字段,默认为LEFTTOP,另一个可选值为LEFTBOTTOM)
+max: 页面布局的长和宽的最大值,坐标按此值归一化以便能准确显示位置(可选字段,为空表示坐标按图片的点阵计算)
+anchor: 页码, 页码为字符串,支持罗马数字等非数阿拉伯数字字页码
+range: 矩形坐标数组 [h1,w1,h2,w2],例如 [174, 155, 818, 333]
+charspan: 内容从标定段落的m个字符开始到底n个字符结束(可选字段)
+```
+
+`blocks.jsonl`文件的`meta`行的`bbox_attributes`字段保存的是bbox的全局设置,避免每个`content`行的`positions`对象中重复保存相同的内容。一下是一个典型的`positions`对象示例:
+
+```
+"positions": [
+{
+    "type": "bbox",
+    "anchor": "ii"
+    "range": [174, 155, 818, 333]
+    "charspan": [10, 999]
+}]
+```
+
+* heading
+
+适用于与Markdown格式类似的文件,按标题定位内容。`anchor`是起始标题(标题重复是的处理方式查到markdown anchor规范);`charspan`为可选字段,指定内容从段落的m个字符开始到底n个字符结束。不提供`charspan`表示`blockid`为起止段落的全部内容。
+
+```
+"positions": [
+{
+    "type": "heading",
+    "anchor": "ii"
+    "range": [174, 155, 818, 333]
+    "charspan": [10, 999]
+}]
+```
+
+* absolute
+
+适用于text格式类似的文件,按字符绝对位置定位。`charspan`指定内容从段落的m个字符开始到底n个字符结束。
+
+```
+"positions": [
+{
+    "charspan": [10, 999]
+}]
+```
+
+## 九、`llm_analyze_result`
+
+| `status` | 触发场景 | 字段说明 |
+|---|---|---|
+| `success` | 模型成功返回合法 JSON 且必需字段齐全 | 图形:`name / type / description`;表格:`name / description`;公式:`name / description / equation` |
+| `skipped` | 期跳过多模态分析:图片格式不支持、像素 < `VLM_MIN_IMAGE_PIXEL`(默认 32px)、大于 `VLM_MAX_IMAGE_BYTES`(默认 5 MB)、未启用VLM | `message` 写跳过原因 |
+| `failure` | 必需字段缺失、JSON 修复后仍不合法、VLM/EXTRACT role 未配置而对应模态被启用、模型调用异常 | `message` 写诊断 |
+
+补充:
+
+- `analyze_time` 是 epoch 秒,每个 status 都有;
+- `message` 在 `status="success"` 时**恒为空串**,便于过滤;
+- 对已启用模态的 item,每次 `analyze_multimodal` 都会重新计算,并用本次结果覆盖已有的 `llm_analyze_result`(无论原先是 `success`、`skipped` 还是 `failure`)。这样修正 VLM/EXTRACT 配置后可以直接重试,无须手动清理旧 sidecar 结果。LLM 调用仍会走 analysis cache:如果 cache key 命中,不会再次请求 provider,语义字段通常保持一致,但 `analyze_time` 等运行时字段会被重写。只有 cache miss,例如有效 role 模型 / binding / host、prompt 输入或图片元数据变化后,保存内容才可能与上次不同。
+
+图形 `type` 受 12 项枚举约束(见 [`IMAGE_TYPE_ENUM`](../lightrag/prompt_multimodal.py):`Photo / Illustration / Screenshot / Icon / Chart / Table / Infographic / Flowchart / Chat Log / Wireframe / Texture / Other`);模型若返回枚举外的值,会被规整成 `Other` 而不是失败。

+ 399 - 0
docs/LightRAGSidecarFormat.md

@@ -0,0 +1,399 @@
+# LightRAG Sidecar File Format Specification
+
+This document describes the **LightRAG Sidecar** file format that content parsing engines output. When LightRAG uses multimodal-capable content parsing engines such as native/mineru/docling to extract file content, it splits "body text + multimodal objects + parsing metadata" into a `*.parsed/` directory. Each JSON / JSONL file in that directory is collectively called a **sidecar** file. Sidecars are the only reliable source of truth for the subsequent pipeline (multimodal analysis → multimodal chunk construction → entity extraction → cache cleanup on document deletion). The sidecar format is LightRAG's built-in universal file interchange format; new multimodal content extraction engines must follow this format. The purpose of publicly documenting the **LightRAG Sidecar** format is to make it convenient for community developers to write their own content parsing engines.
+
+## 1. Overview
+
+| Concern | File | Contents | Notes |
+|---|---|---|---|
+| Main file | `<doc>.blocks.jsonl` | Stores block body | Concatenating the `content` fields of all blocks reconstructs the complete original text |
+| Drawing objects | `<doc>.drawings.json` | Drawing objects extracted from the file | Sent to a VLM for analysis; analysis results are written back |
+| Table objects | `<doc>.tables.json` | Table objects extracted from the file | Sent to an LLM for analysis; analysis results are written back |
+| Equation objects | `<doc>.equations.json` | Equation objects extracted from the file | Sent to an LLM for analysis; analysis results are written back |
+| Original image assets | `<doc>.blocks.assets/` | Original image files extracted from the document | Sent to a VLM for image analysis |
+
+Design intent of sidecars:
+
+- During the parsing stage, the content extraction engine (native/mineru/docling) is **only** responsible for generating "objective" fields such as `blockid / heading / content / surrounding`;
+- During the multimodal analysis stage (`analyze_multimodal`), the analysis result dict `llm_analyze_result` is written by LightRAG and may be appended or overwritten; parsers should not pre-populate it.
+
+## 2. Directory Layout
+
+```
+inputs/space1/__parsed__/<canonical filename>.parsed/
+├── <canonical filename>.blocks.jsonl        body block sequence + document-level meta (first line)
+├── <canonical filename>.drawings.json       drawing sidecar (dict container, key = drawing id)
+├── <canonical filename>.tables.json         table sidecar
+├── <canonical filename>.equations.json      equation sidecar
+└── <canonical filename>.blocks.assets/      original asset directory (image files referenced by drawings.json live here)
+    ├── image1.wmf
+    ├── image2.wmf
+    ├── image3.wmf
+    ├── image4.png
+    ├── image5.png
+    ├── image6.png
+    └── image7.emf
+```
+
+## 3. blocks.jsonl
+
+`blocks.jsonl` is JSON serialized line by line. The **first line has `type="meta"`**; every subsequent line is a content block with `type="content"`.
+
+### 3.1 meta line example
+
+```json
+{
+  "type": "meta",
+  "format": "lightrag",
+  "version": "1.0",
+  "document_name": "m012-manual.docx",
+  "document_format": "docx",
+  "document_hash": "sha256:4840...3f9543d9db0822d2d59",
+  "table_file": true,
+  "equation_file": true,
+  "drawing_file": true,
+  "asset_dir": true,
+  "split_option": { "fixlevel": 0 },
+  "blocks": 39,
+  "doc_id": "doc-f1bee60173d067d88595c00e7d9b0ce5",
+  "parse_engine": "native",
+  "parse_time": "2026-05-13T18:42:25.943490+00:00",
+  "doc_title": "m012-manual"
+}
+```
+
+| Field | Type | Description |
+|---|---|---|
+| `type` | `"meta"` | Line type, fixed value, sanity check |
+| `format` | `"lightrag"` | Sidecar major version family identifier |
+| `version` | `str` | Sidecar schema version |
+| `document_name` | `str` | Canonical filename (with extension, without processing hints) |
+| `document_format` | `str` | File format (currently expressed as the file extension) |
+| `document_hash` | `"sha256:<hex>"` | Sidecar body fingerprint, defined as `SHA-256(merged_text)`, where `merged_text` is the concatenation of all non-empty content lines' `content` fields joined by `"\n\n"`. Used by external consumers to quickly determine whether two `.parsed/` directories share the same source (without line-by-line body comparison), and serves as a self-describing content checksum for the sidecar file. Note: the LightRAG ingestion pipeline itself does not read this field; cross-document deduplication is handled separately by `doc_status.content_hash`. |
+| `table_file` / `equation_file` / `drawing_file` | `bool` | Whether the corresponding sidecar files exist (when true, the corresponding file must exist) |
+| `asset_dir` | `bool` | Whether the `blocks.assets` asset directory exists |
+| `split_option` | `object` | Chunking parameters used during file extraction. This field is reserved for the extraction engine itself to record and use |
+| `blocks` | `int` | Number of content lines (excluding meta) |
+| `doc_id` | `"doc-<md5>"` | Global document ID. Sidecar item IDs (`im-/tb-/eq-`) use the hash portion of `doc_id` with the `doc-` prefix removed, in order to shorten the placeholder tags embedded in body text. |
+| `parse_engine` | `str` | Parsing engine `native/mineru/docling/legacy` |
+| `parse_time` | `str` | Parse completion time; format: ISO-8601 UTC |
+| `doc_title` | `str` | Document title (usually the first H1); optional |
+| `doc_summary` | `str` | Document summary; optional |
+| `doc_attributes` | `object` | Document extended attributes object; optional |
+| `bbox_attributes` | `object` | Global bbox position attributes; see [§8](#8-positions) |
+
+> LightRAG requires that filenames (`document_name`) be unique within the same workspace (knowledge base).
+
+### 3.2 content line
+
+Each content line is the minimum addressable unit of an original document "block" and contains at least:
+
+```json
+{
+  "type": "content",
+  "blockid": "462c6364584a7ba4bdae6853f85ac429",
+  "format": "plain_text",
+  "content": "1 Product Purpose and Functions\nThe MI012 module is used to support the oxygen-supply and anti-gravity control function of the oxygen-supply and anti-gravity regulator...",
+  "heading": "1 Product Purpose and Functions",
+  "parent_headings": [],
+  "level": 1,
+  "session_type": "body",
+  "table_slice": "none",
+  "positions": [
+    {
+      "type": "paraid",
+      "range": ["5EA4577A", "6555DDCB"]
+    }
+  ]
+}
+```
+
+| Field | Meaning |
+|---|---|
+| `type` | `"content"` |
+| `blockid` | Globally unique Block ID |
+| `format` | Content form, currently fixed to `"plain_text"` |
+| `content` | Text content; **equations and images appear as placeholder tags here, tables appear as JSON or HTML wrapped in table tags** (see §3.3) |
+| `heading` | The top-most-level heading of the section containing this content. When `heading` is real, it should also appear at the beginning of `content`; if a heading is immediately followed by a heading at the next level, the next-level heading should be treated as body text. The goal is to ensure that concatenating the `content` fields of all blocks reconstructs the complete original text. |
+| `parent_headings` | String array: the top-down list of ancestor headings, excluding the current `heading` |
+| `level` | Integer: the level of `heading` in the document outline (`1` = H1 / first-level heading; `0` means no heading) |
+| `session_type` | The region the block belongs to: `body` `preface` `TOC` `references` `appendix` |
+| `table_slice` | Optional reserved field; indicates whether the block contains only a slice of a table. The current analysis engines do not split long tables, so this field is fixed to `"none"` (meaning the table will not be sliced) |
+| `table_header` | Optional reserved field; when the current block is a table slice, this holds the recognized table header. Currently unused. |
+| `positions` | Array of `position` objects: identifies the layout position of the text block; when the text block comes from multiple positions in the layout, multiple `position` objects appear. See [§8](#8-positions) |
+
+> - blockid computation: `md5(doc_id + ":" + block_index + ":" + heading + ":" + content)`. Chunks produced by chunking strategies record the blockid for tracing the chunk back to its location in the sidecar.
+> - The chunking strategies `F` / `R` / `V` that ignore document section structure operate on the concatenated `content` fields. Therefore, concatenating the `content` fields of all blocks must form the complete document content — no content missing, no content overlapping.
+
+### 3.3 Inline placeholder tags inside content
+
+To let the P chunking strategy split body text without breaking multimodal objects, three XML-style placeholder tags are used inside `content`:
+
+| Tag | Meaning | Tag attributes |
+|---|---|---|
+| `<table id="tb-…" format="json">…</table>` | Table placeholder; the body is the raw table JSON / HTML | `id` points to the corresponding item in `tables.json`; `format` ∈ `json` / `html` |
+| `<drawing id="im-…" format="png" path="…" src="…" caption="…" />` | Self-closing drawing placeholder | `id` points to `drawings.json`; `path` is relative to the `*.parsed/` directory; `src` is the reference name in the original document |
+| `<equation id="eq-…" format="latex" caption="…">…</equation>` | Equation placeholder | Inline equations also use `<equation format="latex">`, but **without** `id`, and are not written to the sidecar; only block equations (occupying one or more entire lines) carry an `id` |
+
+When the text is fed to the LLM during entity/relation extraction, internal attributes such as `id / path / src` are stripped, but key attributes (`format / caption`) are preserved. The goal is to avoid extracting entities that are invisible in the article and injecting too much noise into the extraction results.
+
+### 3.4 Correspondence between blockid and chunk sidecar.refs
+
+When a sidecar file exists, the chunking strategies attach `sidecar = {"type": "block", "id": <primary source blockid>, "refs": [{"type": "block", "id": <blockid>}, …]}` to each output chunk, where:
+
+- Unmerged chunk → `sidecar.refs` has only one element, equal to the `blockid` of the blocks.jsonl line the chunk came from;
+- Chunk merged in Stage D → `refs` preserves the order of all source `blockid`s (deduplicated);
+- Sub-chunks after hard fallback split → share the parent chunk's `sidecar`.
+
+This linkage is the basis for document-level traceability (chunk ↔ block ↔ original paragraph paraId).
+
+## 4. drawings.json
+
+The top level is a dict container of the form `{"version": "1.0", "drawings": { <id>: <item>, … }}`, **keyed by the `id` field** for lookup by id. Each item looks like:
+
+```json
+{
+  "id": "im-f1bee60173d067d88595c00e7d9b0ce5-0004",
+  "blockid": "2f52b70839d13a936d97955916820147",
+  "heading": "2.3 Structural Dimensions and Weight",
+  "format": "png",
+  "path": "m012-manual.blocks.assets/image4.png",
+  "src": "",
+  "caption": "",
+  "footnotes": [],
+  "extras": {
+    "ocr_texts": "First OCR paragraph inside the image\n\nSecond OCR paragraph inside the image",
+    "ocr_texts_count": 2
+  },
+  "surrounding": {
+    "leading": "2.3 Structural Dimensions and Weight\nDimensional and weight requirements are as follows:\na) Outer dimensions length: <drawing …",
+    "trailing": "\nFigure 1  Outer dimension schematic\nb) Weight does not exceed 0.85 kg.\nc) Test result: measured circuit noise Vpp=1.526 mV…"
+  },
+  "llm_analyze_result": {
+    "name": "Product outer-dimension engineering drawing",
+    "type": "Illustration",
+    "description": "This drawing is a schematic of the product's outer dimensions, presenting three views of an electronic device or power module design…",
+    "analyze_time": 1778697752,
+    "status": "success",
+    "message": ""
+  },
+  "llm_cache_list": [
+    "default:analysis:fcf4c4f88227ee1c1bf0ed4394039e37"
+  ]
+}
+```
+
+| Field | Description |
+|---|---|
+| `id` | Form `im-<doc_hash>-<NNNN>` (`doc_hash` is the 32-character md5 portion of `doc_id` with the `doc-` prefix removed) |
+| `blockid` | Points to the content line that produced this drawing |
+| `heading` | The section heading the drawing belongs to |
+| `format` | Original extension (no dot): `png` / `jpeg` / `gif` / `webp` / `wmf` / `emf` / … |
+| `path` | Resource path relative to the `*.parsed/` directory; **always** points to a file inside `*.blocks.assets/` |
+| `src` | The reference alias of the drawing in the original document (empty in most cases) |
+| `caption` | Visible caption (the parser may leave it empty) |
+| `footnotes` | List of footnote strings |
+| `surrounding` | Context object: see [§7](#7-surrounding) |
+| `self_ref` | String, optional; an object reference from the original parsing engine output (e.g., Docling JSON Pointer `#/pictures/3`, or MinerU `content_list.json#/23`), used to look up the original object in the parsing artifacts (page position, original structure, etc.) when tracing back. Not output by `native` and other engines that do not provide this field. |
+| `extras` | Object, optional; engine-specific bypass fields (such as OCR text contained inside the image, etc.). Not part of spec validation; downstream consumers should not rely on specific keys. |
+| `llm_analyze_result` | Modal analysis result object: see [§9](#9-llm_analyze_result) (will later be injected into the multimodal text block) |
+| `llm_cache_list` | LLM cache list for modal analysis (will later be injected into the multimodal text block) |
+
+Common drawing-specific keys inside `extras`:
+
+| Key | Description |
+|---|---|
+| `ocr_texts` | String, optional; OCR text inside the drawing object, with multiple paragraphs concatenated by blank lines (`\n\n`). Only written when the parsing engine explicitly attaches OCR text under this drawing's children; caption / footnote do not enter this field. |
+| `ocr_texts_count` | Integer, optional; number of non-empty OCR paragraphs written into `ocr_texts`. |
+
+**Only raster formats supported by drawings (png / jpeg / gif / webp) enter VLM analysis**; other formats (wmf / emf / svg, etc.) get `llm_analyze_result.status="skipped"`, no multimodal chunk is generated downstream, and document processing continues. Images larger than the size specified by the environment variable `VLM_MAX_IMAGE_BYTES` likewise will not enter VLM analysis.
+
+> Information such as image size and DPI is uniformly placed in the `extras` object; do not introduce undeclared fields (like `image` / `img_path`, etc.) at the item top level. tables / equations follow the same `extras` convention. `self_ref` is a top-level optional field declared by the spec and does not belong to `extras`.
+
+## 5. tables.json
+
+The top level is a dict container of the form `{"version": "1.0", "tables": { <id>: <item>, ... }}`, **keyed by the `id` field** for lookup by id. Each item looks like:
+
+```json
+{
+  "id": "tb-f1bee60173d067d88595c00e7d9b0ce5-0007",
+  "blockid": "3f33897b5e105d254addc655f1efbf8c",
+  "heading": "2.4.4 Temperature-Humidity-Altitude (run with the system)",
+  "dimension": [16, 8],
+  "format": "json",
+  "content": "[[\"Step\", \"Temperature (°C)\", \"Altitude (m)\", \"Relative humidity\", \"Time (min)\", \"Auxiliary cooling\", \"System power\", \"Functional/performance check\"],…",
+  "caption": "",
+  "footnotes": [],
+  "table_header": "[[\"Step\", \"Temperature (°C)\", \"Altitude (m)\", \"Relative humidity\", \"Time (min)\", \"Auxiliary cooling\", \"System power\", \"Functional/performance check\"]]"
+  "surrounding": {
+    "leading": "2.4.4 Temperature-Humidity-Altitude (run with the system)\nThe product shall withstand the combined temperature, humidity, and altitude environment during mission execution…",
+    "trailing": "\nNote: the above steps are repeated for 10 cycles. a) Finished product and accessories reach thermal stability or 240 min, whichever is longer; b) Finished product and accessories reach thermal stability or 120 min, whichever is longer.…"
+  },
+  "llm_analyze_result": {
+    "name": "Document management metadata table",
+    "description": "This is a document management information table used to record basic metadata and version control information for a technical document …",
+    "analyze_time": 1778697759,
+    "status": "success",
+    "message": ""
+  },
+  "llm_cache_list": [
+    "default:analysis:b316aacd40fdca0cb56430870bb89a62"
+  ]
+}
+```
+
+The `blockid` / `heading` / `surrounding` / `llm_analyze_result` fields of tables.json have the same meaning as in drawings.json. Different or newly added fields are described below:
+
+| Field | Description |
+|---|---|
+| `id` | Form `tb-<doc_hash>-<NNNN>` (`doc_hash` is the 32-character md5 portion of `doc_id` with the `doc-` prefix removed) |
+| `dimension` | Integer array: `[num_rows, num_cols]`, including header rows |
+| `format` | `"json"` (2D array) or `"html"` (payload `<table>…</table>` fragment including the opening and closing tags) |
+| `content` | String: the table body, structured according to `format`; this is the string actually used by the downstream multimodal chunk. |
+| `table_header` | String, optional; the recognized row(s) treated as the table header |
+| `self_ref` | Optional; object reference from the original parsing engine output (e.g., Docling JSON Pointer `#/tables/2`, or MinerU `content_list.json#/31`), used to look up the original artifact when tracing back |
+
+During the modal analysis stage, when the length of the `content` field exceeds the LLM's context window, the table content is mechanically truncated before being fed to the model.
+
+## 6. equations.json
+
+The top level is a dict container of the form `{"version": "1.0", "equations": { <id>: <item>, ... }}`, **keyed by the `id` field** for lookup by id. Each item looks like:
+
+```json
+{
+  "id": "eq-f1bee60173d067d88595c00e7d9b0ce5-0001",
+  "blockid": "2f52b70839d13a936d97955916820147",
+  "heading": "2.3 Structural Dimensions and Weight",
+  "format": "latex",
+  "content": "C=2∗\\frac{P∗T}{\\left( {V}_{H}^{2}−{V}_{L}^{2} \\right)∗η}",
+  "caption": "",
+  "footnotes": [],
+  "surrounding": {
+    "leading": "2.3 Structural Dimensions and Weight\nDimensional and weight requirements are as follows:\n …",
+    "trailing": "\nwhere P is the power maintained during power abnormalities 28 W, T is the desired energy-storage time, V<sub>H</sub> is before capacitor discharge…"
+  },
+  "llm_analyze_result": {
+    "name": "Capacitor energy-storage time calculation formula",
+    "description": "This formula calculates the capacitor energy storage value required to maintain normal system operation during power abnormality …",
+    "analyze_time": 1778697783,
+    "status": "success",
+    "message": "",
+    "equation": "C=2\\cdot\\frac{P\\cdot T}{(V_{H}^{2}-V_{L}^{2})\\cdot\\eta}"
+  },
+  "llm_cache_list": [
+    "default:analysis:fcf4c4f88227ee1c1bf0ed4394039e37"
+  ]
+}
+```
+
+The `blockid` / `heading` / `surrounding` / `llm_analyze_result` fields of equations.json have the same meaning as in drawings.json. Different or newly added fields are described below:
+
+| Field | Description |
+|---|---|
+| `id` | Form `eq-<doc_hash>-<NNNN>` (`doc_hash` is the 32-character md5 portion of `doc_id` with the `doc-` prefix removed) |
+| `format` | Fixed to `"latex"` |
+| `content` | String: the **raw** LaTeX (possibly containing Unicode operators, outer `\[ \]`); does not include the leading/trailing `$` delimiters; read directly by the modal analysis stage |
+| `self_ref` | Optional; object reference from the original parsing engine output (e.g., Docling JSON Pointer `#/texts/15`, or MinerU `content_list.json#/45`), used to look up the original artifact when tracing back |
+| `llm_analyze_result.equation` | String: the **canonicalized** LaTeX equation output by the LLM (outer `$ / \[ \] / equation` environment, Unicode converted to LaTeX, no leading/trailing `$` delimiters); this is the string actually used by the downstream multimodal chunk. |
+
+During the modal analysis stage, when the length of the `content` field exceeds the LLM's context window, the content is mechanically truncated before being fed to the model. Inline equations (those continuous with the body, as `<equation format="latex">…</equation>`) **are not** saved to equations.json; they remain only in the blocks text without an `id`. The goal is to avoid injecting too much noise into the extraction results.
+
+## 7. surrounding
+
+`surrounding.leading` and `surrounding.trailing` are the analyzable context windows of a sidecar item; their purpose is to provide contextual information about the paragraph containing the image, table, or equation, improving the quality of multimodal analysis. **The surrounding content is automatically injected by LightRAG during the analysis stage; it does not need to be actively written into the sidecar by the document parsing engine.** The generation logic of the surrounding content is as follows:
+
+- Taken from the text of the content line with the same `blockid`, split at the position of the multimodal placeholder tag;
+- The token limit on each side is controlled by the environment variables `SURROUNDING_LEADING_MAX_TOKENS` / `SURROUNDING_TRAILING_MAX_TOKENS` (default `2000`, can be tuned independently); truncated by tokenizer, preferring to retain sentences close to the target;
+- The text preserves placeholder tags of **other multimodal objects on the same line**, allowing the model to perceive context such as "after Figure 1 there is also Equation 1"; but internal parser identifiers (`id` / `path` / `src` / `refid`) have been stripped by `strip_internal_multimodal_markup_for_extraction` — consistent with chunk content cleanup before entity extraction, to avoid noise entering the VLM/LLM prompt. Specific cleanup rules:
+  - `<drawing id="im-…" path="…" src="…" caption="Fig 1" />` → `<drawing caption="Fig 1" />`; **drawings without a caption are removed entirely** (the tag carries no model-visible information anymore);
+  - `<table id="tb-…" format="json" caption="…">rows</table>` → `<table format="json" caption="…">rows</table>`;
+  - `<equation id="eq-…" format="latex">body</equation>` → `<equation format="latex">body</equation>`;
+  - `<cite type="table" refid="tb-…">Table 1</cite>` → `<cite type="table">Table 1</cite>`; `<cite type="equation" refid="eq-…">Equation 2</cite>` → `<cite type="equation">Equation 2</cite>`. Only the `refid` attribute is removed; the `<cite type="…">…</cite>` wrapper is preserved — letting the VLM/LLM recognize "this is a reference to another table/equation" rather than ordinary text, while hiding the parser-internal id that the LLM cannot see.
+    - Exception: surrounding of the `tables.json` type first goes through `remove_table_tags` before stripping, removing all `<cite type="table">` blocks entirely (when analyzing the target table, we don't want to be distracted by dangling references to other tables);
+- Cleanup happens **before** token-budget truncation: the token count is computed on "what the LLM actually sees", and truncation does not land inside an uncleaned `id="…"` attribute, avoiding broken tag structure;
+- When the target object itself sits at the start / end of the block, the corresponding side is `""` instead of `"n/a"` (when assembling the prompt, the empty string is later displayed as `n/a`);
+- `enrich_sidecars_with_surrounding` is idempotent: each `analyze_multimodal` entry point recomputes and overwrites `surrounding`, so after changing `SURROUNDING_LEADING_MAX_TOKENS` / `SURROUNDING_TRAILING_MAX_TOKENS` there is no need to manually clean the sidecar — just re-run multimodal analysis and `surrounding` will be rewritten under the new budget.
+
+## 8. positions
+
+`positions` is an array of objects that identifies which piece of text in the file the `blockid` content comes from, allowing the original content to be located and displayed in the source file during content traceability. When the content of a `blockid` is composed of several columns from the layout, multiple `position` objects appear, with each `position` object corresponding to one layout box or column. To accommodate different document formats' content positioning approaches, the system supports the following types of `position` object.
+
+`position` objects have multiple types, and the `type` field determines its type:
+
+* paraid
+
+Applicable to docx-format files; locates content by `paragraph id` (paraid). The `range` field specifies the start and end `paragraph id`s; `charspan` is an optional field specifying that the content starts at character m and ends at character n of the paragraph. When `charspan` is not provided, the `blockid` covers the entire content of the start and end paragraphs. Example:
+
+```
+"positions": [
+{
+    "type": "paraid",
+    "range": ["5EA4577A", "6555DDCB"]
+    "charspan": [10,999]
+}]
+```
+
+* bbox
+
+Applicable to PDF-like files; identifies the original position of the content via a rectangle on the page. bbox supports the following fields:
+
+```
+origin: Which position the rectangle coordinates are relative to on the page (optional, defaults to LEFTTOP; another option is LEFTBOTTOM)
+max: Maximum length and width of the page layout; coordinates are normalized by this value for accurate position display (optional; empty means coordinates are computed by the image's pixel grid)
+anchor: Page number, as a string, supporting non-Arabic page numbers such as Roman numerals
+range: Rectangle coordinate array [h1, w1, h2, w2], e.g., [174, 155, 818, 333]
+charspan: Content starts at character m and ends at character n of the anchored paragraph (optional)
+```
+
+The `bbox_attributes` field of the `meta` line in `blocks.jsonl` holds global bbox settings, avoiding repeating the same content in every `content` line's `positions` object. A typical `positions` object example:
+
+```
+"positions": [
+{
+    "type": "bbox",
+    "anchor": "ii"
+    "range": [174, 155, 818, 333]
+    "charspan": [10, 999]
+}]
+```
+
+* heading
+
+Applicable to Markdown-like files; locates content by heading. `anchor` is the starting heading (for handling duplicated headings, refer to the Markdown anchor specification); `charspan` is an optional field specifying that the content starts at character m and ends at character n of the paragraph. When `charspan` is not provided, the `blockid` covers the entire content of the start and end paragraphs.
+
+```
+"positions": [
+{
+    "type": "heading",
+    "anchor": "ii"
+    "range": [174, 155, 818, 333]
+    "charspan": [10, 999]
+}]
+```
+
+* absolute
+
+Applicable to text-like files; locates content by absolute character position. `charspan` specifies that the content starts at character m and ends at character n.
+
+```
+"positions": [
+{
+    "charspan": [10, 999]
+}]
+```
+
+## 9. `llm_analyze_result`
+
+| `status` | Trigger scenario | Field description |
+|---|---|---|
+| `success` | The model returns valid JSON and all required fields are present | Drawing: `name / type / description`; Table: `name / description`; Equation: `name / description / equation` |
+| `skipped` | Multimodal analysis was deliberately skipped: image format unsupported, pixels < `VLM_MIN_IMAGE_PIXEL` (default 32 px), larger than `VLM_MAX_IMAGE_BYTES` (default 5 MB), or VLM not enabled | `message` records the skip reason |
+| `failure` | Required fields missing, JSON still invalid after repair, the VLM/EXTRACT role is not configured while the corresponding modality is enabled, or the model invocation throws an exception | `message` records the diagnostic |
+
+Additional notes:
+
+- `analyze_time` is epoch seconds and is present for every status;
+- `message` is **always an empty string** when `status="success"`, making filtering convenient;
+- Items for enabled modalities are recomputed on each `analyze_multimodal` run, and the current run overwrites any prior `llm_analyze_result` (`success`, `skipped`, or `failure`). This allows operators to fix VLM/EXTRACT configuration and retry without manually clearing stale sidecar results. LLM calls still use the analysis cache: if the cache key matches, the provider is not called and semantic fields usually remain the same, though runtime fields such as `analyze_time` are rewritten. A cache miss, for example after changing the effective role model/binding/host, prompt inputs, or image metadata, can produce different saved content.
+
+Drawing `type` is constrained to a 12-value enum (see [`IMAGE_TYPE_ENUM`](../lightrag/prompt_multimodal.py): `Photo / Illustration / Screenshot / Icon / Chart / Table / Infographic / Flowchart / Chat Log / Wireframe / Texture / Other`); values returned by the model outside the enum are normalized to `Other` rather than failing.

+ 197 - 0
docs/MilvusConfigurationGuide.md

@@ -0,0 +1,197 @@
+# Milvus Configuration via vector_db_storage_cls_kwargs
+
+## Overview
+
+Milvus index parameters can be configured through `vector_db_storage_cls_kwargs`, which is the **recommended approach** for framework integration scenarios (e.g., when using RAGAnything or other frameworks built on top of LightRAG).
+
+## Why Use vector_db_storage_cls_kwargs?
+
+✅ **Framework Integration**: Allows configuration to be passed through framework layers without environment variable changes
+✅ **Programmatic Configuration**: Set parameters in code rather than relying on environment variables
+✅ **Dynamic Configuration**: Different configurations for different RAG instances
+✅ **Clean API**: All parameters passed in one place during initialization
+
+## Supported Parameters
+
+All 11 MilvusIndexConfig parameters can be configured via `vector_db_storage_cls_kwargs`:
+
+### Base Configuration
+- `index_type`: Index type (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.)
+- `metric_type`: Distance metric (COSINE, L2, IP)
+
+### HNSW Parameters
+- `hnsw_m`: Number of connections per layer (2-2048, default: 16)
+- `hnsw_ef_construction`: Size of dynamic candidate list during construction (default: 360)
+- `hnsw_ef`: Size of dynamic candidate list during search (default: 200)
+
+### HNSW_SQ Parameters (requires Milvus 2.6.8+)
+- `sq_type`: Quantization type (SQ4U, SQ6, SQ8, BF16, FP16, default: SQ8)
+- `sq_refine`: Enable refinement (default: False)
+- `sq_refine_type`: Refinement type (SQ6, SQ8, BF16, FP16, FP32, default: FP32)
+- `sq_refine_k`: Number of candidates to refine (default: 10)
+
+### IVF Parameters
+- `ivf_nlist`: Number of cluster units (1-65536, default: 1024)
+- `ivf_nprobe`: Number of units to query (default: 16)
+
+## Configuration Priority
+
+Configuration is resolved in the following order:
+1. **Parameters passed via vector_db_storage_cls_kwargs** (highest priority)
+2. Environment variables (MILVUS_INDEX_TYPE, etc.)
+3. Default values
+
+## Usage Examples
+
+### Basic Configuration
+
+```python
+from lightrag import LightRAG
+
+rag = LightRAG(
+    working_dir="./demo",
+    vector_storage="MilvusVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.2,
+        "index_type": "HNSW",
+        "metric_type": "COSINE",
+        "hnsw_m": 32,
+        "hnsw_ef_construction": 256,
+        "hnsw_ef": 150,
+    }
+)
+```
+
+### RAGAnything Framework Integration
+
+```python
+# In RAGAnything framework code:
+def create_lightrag_instance(user_config):
+    """Create LightRAG instance with user-provided Milvus configuration"""
+
+    # User configuration from RAGAnything
+    milvus_config = {
+        "cosine_better_than_threshold": user_config.get("threshold", 0.2),
+        "index_type": user_config.get("index_type", "HNSW"),
+        "hnsw_m": user_config.get("hnsw_m", 32),
+        # ... other parameters
+    }
+
+    # Pass configuration to LightRAG
+    rag = LightRAG(
+        working_dir=user_config["working_dir"],
+        vector_storage="MilvusVectorDBStorage",
+        vector_db_storage_cls_kwargs=milvus_config,
+    )
+
+    return rag
+```
+
+### Advanced Configuration with HNSW_SQ
+
+```python
+rag = LightRAG(
+    working_dir="./demo",
+    vector_storage="MilvusVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.2,
+        "index_type": "HNSW_SQ",  # Requires Milvus 2.6.8+
+        "metric_type": "COSINE",
+        "hnsw_m": 48,
+        "hnsw_ef_construction": 400,
+        "hnsw_ef": 200,
+        "sq_type": "SQ8",
+        "sq_refine": True,
+        "sq_refine_type": "FP32",
+        "sq_refine_k": 20,
+    }
+)
+```
+
+### IVF Configuration
+
+```python
+rag = LightRAG(
+    working_dir="./demo",
+    vector_storage="MilvusVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.2,
+        "index_type": "IVF_FLAT",
+        "metric_type": "L2",
+        "ivf_nlist": 2048,
+        "ivf_nprobe": 32,
+    }
+)
+```
+
+## Implementation Details
+
+### How It Works
+
+1. When `MilvusVectorDBStorage.__post_init__()` is called:
+   ```python
+   kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
+   index_config_keys = MilvusIndexConfig.get_config_field_names()
+   index_config_params = {
+       k: v for k, v in kwargs.items() if k in index_config_keys
+   }
+   self.index_config = MilvusIndexConfig(**index_config_params)
+   ```
+
+2. `MilvusIndexConfig.get_config_field_names()` dynamically extracts all valid parameter names from the dataclass
+3. Only valid Milvus index parameters are extracted from kwargs
+4. Parameters are passed to `MilvusIndexConfig` which applies defaults and validates them
+5. Environment variables are used as fallback for any parameters not provided in kwargs
+
+### Automatic Synchronization
+
+The implementation uses `MilvusIndexConfig.get_config_field_names()` to dynamically extract valid parameters. This means:
+- ✅ New parameters added to `MilvusIndexConfig` are **automatically recognized**
+- ✅ No need to maintain duplicate parameter lists
+- ✅ Single source of truth for configuration parameters
+
+## Testing
+
+The configuration via `vector_db_storage_cls_kwargs` is thoroughly tested:
+
+```bash
+# Run all kwargs bridge tests
+python -m pytest tests/kg/milvus_impl/test_milvus_kwargs_bridge.py -v
+
+# Test RAGAnything integration scenario specifically
+python -m pytest tests/kg/milvus_impl/test_milvus_kwargs_bridge.py::TestMilvusKwargsParameterBridge::test_raganything_framework_integration_scenario -v
+
+# Test all parameters support
+python -m pytest tests/kg/milvus_impl/test_milvus_kwargs_bridge.py::TestMilvusKwargsParameterBridge::test_all_milvus_parameters_supported_via_kwargs -v
+```
+
+## Examples
+
+See `examples/milvus_kwargs_configuration_demo.py` for a complete working example.
+
+## Backward Compatibility
+
+✅ **100% backward compatible** with existing code
+✅ Environment variable configuration still works
+✅ All existing tests pass
+
+## FAQ
+
+### Q: Can I mix kwargs and environment variables?
+**A:** Yes! Parameters in `vector_db_storage_cls_kwargs` take priority over environment variables.
+
+### Q: What happens to non-Milvus parameters in kwargs?
+**A:** They are ignored. Only valid MilvusIndexConfig parameters are extracted. This allows frameworks to pass their own parameters alongside Milvus configuration.
+
+### Q: Do I need to set environment variables?
+**A:** No! When using `vector_db_storage_cls_kwargs`, environment variables are optional. They serve as fallback values.
+
+### Q: Is this approach recommended for RAGAnything?
+**A:** Yes! This is the **recommended approach** for any framework that builds on top of LightRAG, as it allows clean configuration passing through framework layers.
+
+## References
+
+- Test Suite: `tests/kg/milvus_impl/test_milvus_kwargs_bridge.py`
+- Implementation: `lightrag/kg/milvus_impl.py` (lines 1237-1272)
+- Example: `examples/milvus_kwargs_configuration_demo.py`
+- MilvusIndexConfig: `lightrag/kg/milvus_impl.py` (lines 75-303)

+ 382 - 0
docs/MultiSiteDeployment.md

@@ -0,0 +1,382 @@
+# Single-Server Multi-Site Deployment
+
+This document explains how to run multiple isolated LightRAG instances behind one host using a reverse proxy (nginx, Traefik, Kubernetes Ingress, …), with **one shared WebUI build** reused by every instance.
+
+> Looking for the basic single-instance Docker setup? See [DockerDeployment.md](./DockerDeployment.md). For frontend build
+> mechanics in general, see [FrontendBuildGuide.md](./FrontendBuildGuide.md).
+
+---
+
+## TL;DR
+
+- Set `LIGHTRAG_API_PREFIX` per-instance, on the **backend only**. The WebUI is always mounted at `/webui` (not configurable).
+- Build the WebUI **once**. The same artifacts work under any reverse-proxy prefix.
+- Point your reverse proxy at each backend, stripping the site prefix before forwarding.
+
+```bash
+# One image, two containers, two prefixes — no rebuild.
+docker run -e LIGHTRAG_API_PREFIX=/site01 -p 9621:9621 lightrag:latest
+docker run -e LIGHTRAG_API_PREFIX=/site02 -p 9622:9621 lightrag:latest
+```
+
+---
+
+## Why "build once, deploy many"
+
+Earlier versions of LightRAG baked the site prefix into the JavaScript bundle at build time (via `VITE_API_PREFIX` / `VITE_WEBUI_PREFIX`). Every site that used a different prefix needed its own WebUI build, and reusing a single Docker image across sites required a rebuild step at deploy time. Since the runtime-config-injection refactor:
+
+- **Asset URLs** in `index.html` are emitted as relative paths (`./assets/index-abc.js`). The browser resolves them against the current document URL, so they work under any mount point.
+- **API base URL** and **in-app links** read their prefix from `window.__LIGHTRAG_CONFIG__`, which the FastAPI server injects into `index.html` on each response based on its own `LIGHTRAG_API_PREFIX`.
+
+The result: a single `lightrag/api/webui/` directory (or Docker image) is reusable across any number of sites with no per-site build artifact.
+
+---
+
+## How runtime prefix injection works
+
+Each request for `index.html` goes through `SmartStaticFiles` in `lightrag/api/lightrag_server.py`, which:
+
+1. Reads the static `index.html` produced by `bun run build`.
+2. Looks for the placeholder comment `<!-- __LIGHTRAG_RUNTIME_CONFIG__ -->`.
+3. Replaces it with
+   `<script>window.__LIGHTRAG_CONFIG__ = {"apiPrefix":"…","webuiPrefix":"…"}</script>`,
+   computed from the configured `LIGHTRAG_API_PREFIX` (the in-app `/webui` mount is hardcoded server-side).
+
+Sequence — browser request to a site-prefixed instance:
+
+```
+Browser            nginx                  uvicorn         SmartStaticFiles
+  │                  │                       │                    │
+  │ GET /site01/webui/                       │                    │
+  │─────────────────►│                       │                    │
+  │                  │ GET /webui/  (strips /site01)              │
+  │                  │──────────────────────►│                    │
+  │                  │                       │ get_response("")   │
+  │                  │                       │───────────────────►│
+  │                  │                       │                    │ inject
+  │                  │                       │                    │ window.__LIGHTRAG_CONFIG__
+  │                  │                       │                    │ = { apiPrefix: "/site01",
+  │                  │                       │                    │ webuiPrefix: "/site01/webui/" }
+  │                  │                       │◄───────────────────│
+  │                  │◄──────────────────────│                    │
+  │◄─────────────────│                       │                    │
+  │ index.html with injected runtime config
+```
+
+The SPA reads the injected config via `src/lib/runtimeConfig.ts` and uses
+it for `axios.baseURL`, `fetch()` template strings, the API-docs iframe,
+and in-app links.
+
+---
+
+## One backend variable, that's it
+
+| Variable | Default | Meaning |
+| --- | --- | --- |
+| `LIGHTRAG_API_PREFIX` | `""` | Reverse-proxy mount prefix. The backend accepts both strip and verbatim forwarding — pick whichever fits your proxy stack. Passed to FastAPI as `root_path`. |
+
+The WebUI is always mounted at `/webui` server-side. `window.__LIGHTRAG_CONFIG__.webuiPrefix` is computed as `LIGHTRAG_API_PREFIX + "/webui/"` and injected for the SPA — you do **not** set it yourself.
+
+There are no longer any frontend `VITE_API_PREFIX` / `VITE_WEBUI_PREFIX` variables. Setting them has no effect (they are ignored by the build).
+
+### Forwarding modes: strip and verbatim both work
+
+After setting `LIGHTRAG_API_PREFIX=/site01`, the backend resolves all routes correctly under either forwarding style:
+
+- **Strip** — proxy removes the prefix, backend sees `/webui/` and `/documents/foo`. The nginx example below uses this style.
+- **Verbatim** — proxy forwards the request unchanged, backend sees `/site01/webui/` and `/site01/documents/foo`. The Vite dev flow ([Scenario 2](#scenario-2--simulate-a-site-prefix)) and any non-rewriting proxy use this style.
+
+A small ASGI middleware in `create_app` prepends `root_path` to `scope["path"]` whenever the path does not already include it, so plain Routes and Mount sub-apps (the WebUI's `StaticFiles`) both resolve identically in either mode. You do not need to standardize on one — both coexist on the same backend without configuration toggles.
+
+---
+
+## End-to-end example: two sites behind one nginx
+
+### Instance configuration
+
+`site01.env`:
+```bash
+HOST=0.0.0.0
+PORT=9621
+LIGHTRAG_API_PREFIX=/site01
+WORKING_DIR=/data/site01/storage
+INPUT_DIR=/data/site01/inputs
+LIGHTRAG_API_KEY=site01-secret
+# … LLM / embedding config …
+```
+
+`site02.env`:
+```bash
+HOST=0.0.0.0
+PORT=9621
+LIGHTRAG_API_PREFIX=/site02
+WORKING_DIR=/data/site02/storage
+INPUT_DIR=/data/site02/inputs
+LIGHTRAG_API_KEY=site02-secret
+# … LLM / embedding config …
+```
+
+### docker-compose.yml (one image, two services)
+
+```yaml
+services:
+  site01:
+    image: ghcr.io/hkuds/lightrag:latest
+    env_file: site01.env
+    volumes:
+      - ./data/site01:/data/site01
+    ports:
+      - "127.0.0.1:9621:9621"
+
+  site02:
+    image: ghcr.io/hkuds/lightrag:latest
+    env_file: site02.env
+    volumes:
+      - ./data/site02:/data/site02
+    ports:
+      - "127.0.0.1:9622:9621"
+```
+
+### nginx config
+
+```nginx
+server {
+    listen 443 ssl http2;
+    server_name host.example.com;
+
+    # site01: strips /site01/ before forwarding
+    location /site01/ {
+        proxy_pass http://127.0.0.1:9621/;
+        proxy_set_header X-Forwarded-Prefix /site01;
+        proxy_set_header Host $host;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+    }
+
+    # site02: strips /site02/ before forwarding
+    location /site02/ {
+        proxy_pass http://127.0.0.1:9622/;
+        proxy_set_header X-Forwarded-Prefix /site02;
+        proxy_set_header Host $host;
+        proxy_http_version 1.1;
+        proxy_set_header Connection "";
+    }
+}
+```
+
+Browsing `https://host.example.com/site01/webui/` shows site01's WebUI; `https://host.example.com/site02/webui/` shows site02's. The same Docker image serves both — no per-site build artifact, no rebuild on prefix changes.
+
+### What each layer sees
+
+| Layer | site01 GET /webui/ |
+| --- | --- |
+| Browser address bar | `https://host.example.com/site01/webui/` |
+| nginx receives | `/site01/webui/` |
+| nginx forwards | `/webui/` |
+| FastAPI `root_path` | `/site01` |
+| `app.mount` resolves | `/webui/` |
+| Injected `apiPrefix` | `/site01` |
+| Injected `webuiPrefix` | `/site01/webui/` |
+| Asset URLs in HTML | `./assets/index-abc.js` (resolves to `https://host.example.com/site01/webui/assets/index-abc.js`) |
+
+---
+
+## Single-image Docker recipe
+
+The `Dockerfile` builds the WebUI once, with no prefix:
+
+```dockerfile
+FROM oven/bun:1 AS webui-build
+WORKDIR /src/lightrag_webui
+COPY lightrag_webui/package.json lightrag_webui/bun.lock ./
+RUN bun install --frozen-lockfile
+COPY lightrag_webui/ ./
+COPY lightrag/api/webui/.gitkeep /src/lightrag/api/webui/.gitkeep
+RUN bun run build
+
+FROM python:3.11-slim
+COPY --from=webui-build /src/lightrag/api/webui /app/lightrag/api/webui
+# … rest of the image …
+```
+
+Run any number of containers from the same image, each with its own prefix:
+
+```bash
+# Plain single-instance, no prefix.
+docker run --rm -p 9621:9621 lightrag:latest
+
+# Same image, different prefixes — runtime decides.
+docker run --rm -e LIGHTRAG_API_PREFIX=/site01 -p 9621:9621 lightrag:latest
+docker run --rm -e LIGHTRAG_API_PREFIX=/site02 -p 9622:9621 lightrag:latest
+```
+
+### Kubernetes Ingress equivalent
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: lightrag-multisite
+  annotations:
+    nginx.ingress.kubernetes.io/rewrite-target: /$2
+spec:
+  rules:
+  - host: host.example.com
+    http:
+      paths:
+      - path: /site01(/|$)(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: lightrag-site01
+            port: { number: 9621 }
+      - path: /site02(/|$)(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: lightrag-site02
+            port: { number: 9621 }
+```
+
+Backends still set `LIGHTRAG_API_PREFIX=/site01` / `=/site02`.
+
+---
+
+## Local development with `bun run dev`
+
+> **Always open `http://localhost:5173/` — root path, no `/webui`, no `/site01` — regardless of which scenario below you're in.**
+>
+> Vite's dev server serves the SPA at its own root (`/`) no matter what prefix you configure. `VITE_DEV_API_PREFIX` only affects how the SPA composes API URLs *after* the page is loaded, and which paths the dev proxy intercepts; it does **not** change the URL you type in the address bar. Trying to access `localhost:5173/site01/webui/` works (Vite's SPA fallback returns the same `index.html`), but it's not the canonical entry point and only differs cosmetically in the address bar.
+>
+> This is the deliberate consequence of `base: './'` in [`vite.config.ts`](../lightrag_webui/vite.config.ts) — the same setting that makes one production build reusable across any number of reverse-proxy mount points. Tying the dev URL to a prefix would force the build to bake the prefix back in.
+
+The dev server mirrors production injection: it serves `index.html` via the same `transformIndexHtml` mechanism the FastAPI server uses at request time, so the SPA reads `window.__LIGHTRAG_CONFIG__` in dev exactly the way it does in prod. Only **two** environment variables matter:
+
+| Variable | Purpose | Where it lives |
+| --- | --- | --- |
+| `VITE_BACKEND_URL` | Where the dev server forwards proxied API calls. | `lightrag_webui/.env*` |
+| `VITE_DEV_API_PREFIX` | Prefix to **simulate** (matches the backend LIGHTRAG_API_PREFIX`). Empty → no prefix. | `lightrag_webui/.env*` |
+
+`VITE_DEV_API_PREFIX` injects `apiPrefix` into `window.__LIGHTRAG_CONFIG__` in the browser, mirroring the backend behavior. It also serves as a prefix for `VITE_API_ENDPOINTS`, ensuring correct access to backend APIs. The matching `webuiPrefix` is derived as `${VITE_DEV_API_PREFIX}/webui/` automatically — you don't need a separate variable for it.
+
+Three scenarios cover everything you'll hit:
+
+### Scenario 1 — single-instance dev (no prefix, no proxy)
+
+The default. Don't set anything beyond the existing `.env.development`.
+
+```
+Browser ──► localhost:5173 (Vite) ──► localhost:9621 (backend, no prefix)
+```
+
+```bash
+# lightrag_webui/.env.development (already in repo as sample)
+VITE_BACKEND_URL=http://localhost:9621
+VITE_API_PROXY=true
+VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status,/static
+# VITE_DEV_API_PREFIX=          ← leave empty
+```
+
+Run:
+```bash
+lightrag-server                  # in one terminal, no LIGHTRAG_API_PREFIX
+cd lightrag_webui && bun run dev # in another; open http://localhost:5173/
+```
+
+### Scenario 2 — simulate a site prefix
+
+You want the SPA to run under `/site01` (or whatever production prefix). Set `VITE_DEV_API_PREFIX=/site01`. Vite injects the matching `window.__LIGHTRAG_CONFIG__` and registers prefixed proxy keys; SPA requests like `fetch("/site01/documents/foo")` are forwarded verbatim to whatever `VITE_BACKEND_URL` points at. The upstream — local backend or production nginx — is responsible for understanding the prefix.
+
+```
+Browser ──► localhost:5173 (Vite + HMR)
+                │
+                │  Vite proxy forwards /site01/* verbatim, no rewrite
+                ▼
+            VITE_BACKEND_URL  ──►  upstream that knows /site01
+```
+
+`.env.local` (gitignored — your personal dev config):
+```bash
+VITE_BACKEND_URL=…                             # see "Where to point VITE_BACKEND_URL" below
+VITE_API_PROXY=true
+VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status,/static
+VITE_DEV_API_PREFIX=/site01
+```
+
+Run `bun run dev` and open **`http://localhost:5173/`**. HMR is purely local — the browser only talks to `localhost:5173` for SPA assets, no WebSocket-upgrade config needed on any upstream.
+
+#### Where to point `VITE_BACKEND_URL`
+
+Two options, picked by where the prefix-aware upstream lives. The Vite-side configuration is identical; only this one variable changes.
+
+**A. Local backend with `LIGHTRAG_API_PREFIX=/site01`** (no nginx anywhere) — the simplest setup, two processes on your laptop. Vite's proxy itself plays the role of the reverse proxy.
+
+```bash
+VITE_BACKEND_URL=http://localhost:9621
+```
+```bash
+# Terminal 1
+LIGHTRAG_API_PREFIX=/site01 lightrag-server
+# Terminal 2
+cd lightrag_webui && bun run dev
+```
+
+The backend's FastAPI `root_path=/site01` accepts the prefixed form natively (Starlette's `get_route_path()` strips `root_path` from the request path before matching), so no extra rewriting is needed on either side.
+
+**B. Real (remote) backend reached through its production nginx** — useful when the actual backend has data / configs that are painful to reproduce locally. nginx already strips `/site01/` before forwarding to the backend; the dev frontend benefits without changing anything in production.
+
+```bash
+VITE_BACKEND_URL=https://prod.example.com      # or http://10.0.0.5 — the nginx URL
+```
+
+The production nginx and backend stay exactly as they are. The flow becomes:
+
+```
+SPA fetch /site01/documents/foo
+  → Vite forwards to https://prod.example.com/site01/documents/foo
+  → nginx matches /site01/, strips it, forwards /documents/foo to backend
+  → backend serves it
+```
+
+#### Why `VITE_BACKEND_URL` does **not** include `/site01`
+
+Vite forwards the request path **verbatim** (no rewrite). The browser already emits `/site01/documents/foo`, so the URL Vite sends upstream is `${VITE_BACKEND_URL}/site01/documents/foo`. If you set `VITE_BACKEND_URL=https://prod.example.com/site01` you would get `https://prod.example.com/site01/site01/documents/foo` — a duplicated prefix that both nginx and the backend reject. Always point `VITE_BACKEND_URL` at the upstream **root**.
+
+#### Common pitfalls (mostly relevant to option B)
+
+- **HTTPS upstream + self-signed cert**: Vite's proxy rejects by default. Set `proxy: { ..., secure: false }` in `vite.config.ts` to skip cert validation when targeting a staging proxy with a non-public cert.
+- **Auth required**: if the upstream requires `LIGHTRAG_API_KEY`, log in via the dev SPA exactly as you would in prod — the auth token flows through the proxy unchanged.
+- **CORS errors**: shouldn't happen because the browser sees same-origin requests to `localhost:5173`. If they appear, check that `changeOrigin: true` is in effect (it is, by default in `vite.config.ts`).
+
+### Quick decision matrix
+
+| Scenario | `VITE_BACKEND_URL` | `VITE_DEV_API_PREFIX` | Upstream the dev proxy talks to | Open in browser |
+| --- | --- | --- | --- | --- |
+| 1. Default single-instance dev | `http://localhost:9621` | unset | local backend, no prefix | `http://localhost:5173/` |
+| 2A. Simulate a prefix locally (no nginx) | `http://localhost:9621` | `/site01` | local backend with `LIGHTRAG_API_PREFIX=/site01` | `http://localhost:5173/` |
+| 2B. Hit a real backend through its production nginx | `https://prod.example.com` | `/site01` | remote nginx that already strips `/site01/` | `http://localhost:5173/` |
+
+Rows 2A and 2B share **everything except `VITE_BACKEND_URL`** — the choice is purely "is the prefix-aware upstream on my laptop or in production?".
+
+**The "Open in browser" column is always `http://localhost:5173/` — that is the entry point in every dev scenario.** What changes between rows is where the API traffic ultimately lands; the SPA itself is always served from the dev server's root.
+
+---
+
+## Troubleshooting
+
+### Asset URLs 404 when accessing the WebUI
+
+The base URL must end with `/`. Accessing `/site01/webui` (no trailing slash) makes the browser resolve `./assets/foo.js` against `/site01/`, which 404s. The server already redirects the no-slash form to the
+slash form; verify the redirect is reaching nginx (check `X-Forwarded-Prefix` and that nginx uses `proxy_pass http://…/` with the trailing slash).
+
+### `apiPrefix` is empty in `window.__LIGHTRAG_CONFIG__` after deploy
+
+View the page source. If you see the literal placeholder `<!-- __LIGHTRAG_RUNTIME_CONFIG__ -->` instead of an injected `<script>` tag, the request did not go through `SmartStaticFiles` — double-check that `lightrag/api/webui/index.html` exists in the running container and that the WebUI mount succeeded (the server logs `WebUI assets mounted at <path>` at startup).
+
+### `bun run dev` proxy returns 404 with `VITE_DEV_API_PREFIX` set
+
+Confirm the backend is also running with the matching `LIGHTRAG_API_PREFIX`. The dev proxy forwards prefixed paths verbatim; if the backend has no prefix configured, it does not register routes under that path.
+
+### I want to disable the WebUI entirely
+
+Don't build the frontend — `lightrag/api/webui/index.html` will not exist and the server will skip the WebUI mount, redirecting `/` and the WebUI path to `/docs` instead. The runtime-config injection is purely opt-in via the existence of the build artifact.

+ 316 - 0
docs/OfflineDeployment.md

@@ -0,0 +1,316 @@
+# LightRAG Offline Deployment Guide
+
+This guide provides comprehensive instructions for deploying LightRAG in offline environments where internet access is limited or unavailable.
+
+If you deploy LightRAG using Docker, there is no need to refer to this document, as the LightRAG Docker image is pre-configured for offline operation.
+
+> Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group. Consequently, document extraction tools such as Docling, as well as local LLM models like Hugging Face and LMDeploy, are outside the scope of offline installation support. These high-compute-resource-demanding services should not be integrated into LightRAG. Docling will be decoupled and deployed as a standalone service.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Quick Start](#quick-start)
+- [Layered Dependencies](#layered-dependencies)
+- [Tiktoken Cache Management](#tiktoken-cache-management)
+- [Complete Offline Deployment Workflow](#complete-offline-deployment-workflow)
+- [Troubleshooting](#troubleshooting)
+
+## Overview
+
+LightRAG uses dynamic package installation (`pipmaster`) for optional features based on file types and configurations. In offline environments, these dynamic installations will fail. This guide shows you how to pre-install all necessary dependencies and cache files.
+
+### What Gets Dynamically Installed?
+
+LightRAG dynamically installs packages for:
+
+- **Storage Backends**: `redis`, `neo4j`, `pymilvus`, `pymongo`, `asyncpg`, `qdrant-client`
+- **LLM Providers**: `openai`, `anthropic`, `ollama`, `zhipuai`, `aioboto3`, `voyageai`, `llama-index`, `lmdeploy`, `transformers`, `torch`
+- **Tiktoken Models**: BPE encoding models downloaded from OpenAI CDN
+
+**Note**: Document processing dependencies (`pypdf`, `python-docx`, `python-pptx`, `openpyxl`) are now pre-installed with the `api` extras group and no longer require dynamic installation.
+
+## Quick Start
+
+### Option 1: Using pip with Offline Extras
+
+```bash
+# Online environment: Install all offline dependencies
+pip install lightrag-hku[offline]
+
+# Download tiktoken cache
+lightrag-download-cache
+
+# Create offline package
+pip download lightrag-hku[offline] -d ./offline-packages
+tar -czf lightrag-offline.tar.gz ./offline-packages ~/.tiktoken_cache
+
+# Transfer to offline server
+scp lightrag-offline.tar.gz user@offline-server:/path/to/
+
+# Offline environment: Install
+tar -xzf lightrag-offline.tar.gz
+pip install --no-index --find-links=./offline-packages lightrag-hku[offline]
+export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache
+```
+
+### Option 2: Using Requirements Files
+
+```bash
+# Online environment: Download packages
+pip download -r requirements-offline.txt -d ./packages
+
+# Transfer to offline server
+tar -czf packages.tar.gz ./packages
+scp packages.tar.gz user@offline-server:/path/to/
+
+# Offline environment: Install
+tar -xzf packages.tar.gz
+pip install --no-index --find-links=./packages -r requirements-offline.txt
+```
+
+## Layered Dependencies
+
+LightRAG provides flexible dependency groups for different use cases:
+
+### Available Dependency Groups
+
+| Group | Description | Use Case |
+| ----- | ----------- | -------- |
+| `api` | API server + document processing | FastAPI server with PDF, DOCX, PPTX, XLSX support |
+| `offline-storage` | Storage backends | Redis, Neo4j, MongoDB, PostgreSQL, etc. |
+| `offline-llm` | LLM providers | OpenAI, Anthropic, Ollama, etc. |
+| `offline` | Complete offline package | API + Storage + LLM (all features) |
+
+**Note**: Document processing (PDF, DOCX, PPTX, XLSX) is included in the `api` extras group. The previous `offline-docs` group has been merged into `api` for better integration.
+
+> Software packages requiring `transformers`, `torch`, or `cuda` will not be included in the offline dependency group.
+
+### Installation Examples
+
+```bash
+# Install API with document processing
+pip install lightrag-hku[api]
+
+# Install API and storage backends
+pip install lightrag-hku[api,offline-storage]
+
+# Install all offline dependencies (recommended for offline deployment)
+pip install lightrag-hku[offline]
+```
+
+### Using Individual Requirements Files
+
+```bash
+# Storage backends only
+pip install -r requirements-offline-storage.txt
+
+# LLM providers only
+pip install -r requirements-offline-llm.txt
+
+# All offline dependencies
+pip install -r requirements-offline.txt
+```
+
+## Tiktoken Cache Management
+
+Tiktoken downloads BPE encoding models on first use. In offline environments, you must pre-download these models.
+
+### Using the CLI Command
+
+After installing LightRAG, use the built-in command:
+
+```bash
+# Download to default location (see output for exact path)
+lightrag-download-cache
+
+# Download to specific directory
+lightrag-download-cache --cache-dir ./tiktoken_cache
+
+# Download specific models only
+lightrag-download-cache --models gpt-4o-mini gpt-4
+```
+
+### Default Models Downloaded
+
+- `gpt-4o-mini` (LightRAG default)
+- `gpt-4o`
+- `gpt-4`
+- `gpt-3.5-turbo`
+- `text-embedding-ada-002`
+- `text-embedding-3-small`
+- `text-embedding-3-large`
+
+### Setting Cache Location in Offline Environment
+
+```bash
+# Option 1: Environment variable (temporary)
+export TIKTOKEN_CACHE_DIR=/path/to/tiktoken_cache
+
+# Option 2: Add to ~/.bashrc or ~/.zshrc (persistent)
+echo 'export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache' >> ~/.bashrc
+source ~/.bashrc
+
+# Option 3: Copy to default location
+cp -r /path/to/tiktoken_cache ~/.tiktoken_cache/
+```
+
+## Complete Offline Deployment Workflow
+
+### Step 1: Prepare in Online Environment
+
+```bash
+# 1. Install LightRAG with offline dependencies
+pip install lightrag-hku[offline]
+
+# 2. Download tiktoken cache
+lightrag-download-cache --cache-dir ./offline_cache/tiktoken
+
+# 3. Download all Python packages
+pip download lightrag-hku[offline] -d ./offline_cache/packages
+
+# 4. Create archive for transfer
+tar -czf lightrag-offline-complete.tar.gz ./offline_cache
+
+# 5. Verify contents
+tar -tzf lightrag-offline-complete.tar.gz | head -20
+```
+
+### Step 2: Transfer to Offline Environment
+
+```bash
+# Using scp
+scp lightrag-offline-complete.tar.gz user@offline-server:/tmp/
+
+# Or using USB/physical media
+# Copy lightrag-offline-complete.tar.gz to USB drive
+```
+
+### Step 3: Install in Offline Environment
+
+```bash
+# 1. Extract archive
+cd /tmp
+tar -xzf lightrag-offline-complete.tar.gz
+
+# 2. Install Python packages
+pip install --no-index \
+    --find-links=/tmp/offline_cache/packages \
+    lightrag-hku[offline]
+
+# 3. Set up tiktoken cache
+mkdir -p ~/.tiktoken_cache
+cp -r /tmp/offline_cache/tiktoken/* ~/.tiktoken_cache/
+export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache
+
+# 4. Add to shell profile for persistence
+echo 'export TIKTOKEN_CACHE_DIR=~/.tiktoken_cache' >> ~/.bashrc
+```
+
+### Step 4: Verify Installation
+
+```bash
+# Test Python import
+python -c "from lightrag import LightRAG; print('✓ LightRAG imported')"
+
+# Test tiktoken
+python -c "from lightrag.utils import TiktokenTokenizer; t = TiktokenTokenizer(); print('✓ Tiktoken working')"
+
+# Test optional dependencies (if installed)
+python -c "import redis; print('✓ Redis available')"
+```
+
+## Troubleshooting
+
+### Issue: Tiktoken fails with network error
+
+**Problem**: `Unable to load tokenizer for model gpt-4o-mini`
+
+**Solution**:
+```bash
+# Ensure TIKTOKEN_CACHE_DIR is set
+echo $TIKTOKEN_CACHE_DIR
+
+# Verify cache files exist
+ls -la ~/.tiktoken_cache/
+
+# If empty, you need to download cache in online environment first
+```
+
+### Issue: Dynamic package installation fails
+
+**Problem**: `Error installing package xxx`
+
+**Solution**:
+```bash
+# Pre-install the specific package you need
+# For API with document processing:
+pip install lightrag-hku[api]
+
+# For storage backends:
+pip install lightrag-hku[offline-storage]
+
+# For LLM providers:
+pip install lightrag-hku[offline-llm]
+```
+
+### Issue: Missing dependencies at runtime
+
+**Problem**: `ModuleNotFoundError: No module named 'xxx'`
+
+**Solution**:
+```bash
+# Check what you have installed
+pip list | grep -i xxx
+
+# Install missing component
+pip install lightrag-hku[offline]  # Install all offline deps
+```
+
+### Issue: Permission denied on tiktoken cache
+
+**Problem**: `PermissionError: [Errno 13] Permission denied`
+
+**Solution**:
+```bash
+# Ensure cache directory has correct permissions
+chmod 755 ~/.tiktoken_cache
+chmod 644 ~/.tiktoken_cache/*
+
+# Or use a user-writable directory
+export TIKTOKEN_CACHE_DIR=~/my_tiktoken_cache
+mkdir -p ~/my_tiktoken_cache
+```
+
+## Best Practices
+
+1. **Test in Online Environment First**: Always test your complete setup in an online environment before going offline.
+
+2. **Keep Cache Updated**: Periodically update your offline cache when new models are released.
+
+3. **Document Your Setup**: Keep notes on which optional dependencies you actually need.
+
+4. **Version Pinning**: Consider pinning specific versions in production:
+   ```bash
+   pip freeze > requirements-production.txt
+   ```
+
+5. **Minimal Installation**: Only install what you need:
+   ```bash
+   # If you only need API with document processing
+   pip install lightrag-hku[api]
+   # Then manually add specific LLM: pip install openai
+   ```
+
+## Additional Resources
+
+- [LightRAG GitHub Repository](https://github.com/HKUDS/LightRAG)
+- [Docker Deployment Guide](./DockerDeployment.md)
+- [API Server Documentation](./LightRAG-API-Server.md)
+
+## Support
+
+If you encounter issues not covered in this guide:
+
+1. Check the [GitHub Issues](https://github.com/HKUDS/LightRAG/issues)
+2. Review the [project documentation](../README.md)
+3. Create a new issue with your offline deployment details

+ 404 - 0
docs/ParagraphSemanticChunking-zh.md

@@ -0,0 +1,404 @@
+# Paragraph Semantic 分块策略
+
+## 1. 适用场景与策略选择
+
+### 1.1 P 策略要解决什么问题
+
+Paragraph Semantic Chunking(下文简称 **P 策略**)面向 DOCX 等具有清晰章节结构的文档。其核心目标是:**让分块边界尽可能对齐文档原生的语义边界**(标题、段落、表格行),而不是仅由 token 长度计数决定切点。
+
+P 策略主要解决以下四类问题:
+
+1. **表格语境断裂**:大表被拆分后,首尾切片容易脱离前置说明、后置解释或中间桥接文字,召回时无法独立理解。
+2. **层级信息利用不足**:仅看相邻段落的方法无法利用父标题路径、同级条款之间的关系。
+3. **细碎章节尺寸失衡**:规章、标准、合同等文档常包含大量 100~300 token 的细碎条款,若不合并则块过短、语义稀薄;若仅按相邻长度合并又会跨主题污染。
+4. **长块二次拆分破坏结构**:章节过长时,常规字符切分会忽略表格行边界和标题层级。
+
+P 策略仅对 `native` 抽取引擎生成的 `.blocks.jsonl` 结构化产物有效;对非结构化输入会自动降级为 R 策略(见 §8)。
+
+### 1.2 P / R / V 三种策略对比
+
+| 维度 | R 策略(Recursive) | V 策略(SemanticVector) | P 策略(ParagraphSemantic) |
+|---|---|---|---|
+| 切分依据 | 字符分隔符级联(段落 → 换行 → 中文标点 → 空格 → 字符)+ token 预算 | 句子级 embedding 距离阈值(百分位 / 标准差 / 四分位距 / 梯度)寻找语义断层 | DOCX outline level 与 `parent_headings` + 表格行边界 + 锚点 + 层级感知合并 |
+| 块大小控制 | `chunk_token_size` 硬上限 | `chunk_token_size` 仅为 advisory ceiling,超限时通过 R 二次切分 | `target_max` 硬上限 + `target_ideal` 软目标 + 表格阈值 + 尾部吸收阈值多重协同 |
+| 表格处理 | 不感知表格,可能在表格中间切断 | 不感知表格 | 表格小于 `table_max` 保持完整;大表按 JSON 行数组 / HTML `<tr>` 行边界切片,并重新包裹为合法 `<table>` |
+| 表格上下文 | 依赖窗口偶然覆盖 | 依赖 embedding 距离 | 首切片粘连前置说明、末切片粘连后置解释、连续大表桥接文字双向重叠 |
+| 块间重叠 | 全局 `chunk_overlap_token_size` | 不会出现重叠 | 章节边界不会重叠;同章节长正文 fallback 到 R 时按 `CHUNK_P_OVERLAP_SIZE` 重叠;连续大表桥接文字可同时进入前后两个表格块 |
+| heading 元数据 | 通常无 | 通常无 | 继承或提升 heading;拆分后追加 `[part n]` 后缀;保留 `parent_headings` 和 `level` |
+| 嵌入计算开销 | 无 | 高(需对每个句子计算 embedding) | 无 |
+| 依赖输入 | 任意文本 | 任意文本 + Embedding 模型 | 必须有 `.blocks.jsonl` sidecar(即 `native` 引擎抽取结果),否则降级为 R |
+
+### 1.3 怎么选
+
+| 场景 | 推荐 | 理由 |
+|---|---|---|
+| DOCX 且章节层级清晰、含大表格、含细碎条款 | **P** | 充分利用标题层级与表格行边界,块边界最贴合语义;避免跨主题污染 |
+| 文档以散文 / 评论 / 长篇正文为主,没有明确章节结构 | **V** | 按语义相似度切分能在话题切换点形成自然边界,比字符切分更稳定 |
+| 输入是纯文本、Markdown、代码、日志,或追求最低算力开销 | **R** | 无嵌入开销,分隔符级联对中英文混合文本足够稳定 |
+| 通用配置(不确定文件类型) | **R** | P 在无 sidecar 时自动降级到 R;V 在无 Embedding 模型时也降级到 R |
+| 标题样式混乱、正文中大量伪标题的文档 | **R** 或 **V** | P 依赖 native parser 正确识别标题,标题错乱会导致基础块边界偏移 |
+| 单行超大表格或不可解析表格 | 任意 | 三种策略最终都会走字符级 fallback;P 仍保留表格上下文粘连优势 |
+
+### 1.4 P 策略的代价
+
+- 必须搭配 `native` 引擎:在 `LIGHTRAG_PARSER` 中显式声明,例如 `docx:native-P`;否则即使写了 `P`,也会因为缺少 `.blocks.jsonl` 退化到 R。
+- 仅支持 DOCX:其他格式没有 `.blocks.jsonl` 产物。
+- 算法路径多、阈值多:调试时需要先确认输入 sidecar 是否正确,再看各阶段输出。
+
+## 2. 工作原理总览
+
+P 策略以 native parser 在 `fixlevel=0` 模式下产生的 `.blocks.jsonl` 为输入,**每个 `type == "content"` 行被视为一个标题级基础块**,然后在该基础上执行表格切片、长块拆分和层级合并:
+
+```text
+DOCX
+  ↓  native parser (fixlevel=0)
+.blocks.jsonl + sidecar (.tables.json / .equations.json / .drawings.json / .blocks.assets/)
+  ↓  Stage B:超大表格按行边界切片并赋予 first/middle/last 角色
+  ↓  Stage B.1:连续大表之间桥接文字双向重叠
+  ↓  Stage C:锚点驱动的长文本块再切分
+  ↓  Stage D:层级感知的双相位合并
+  ↓  Stage E:[part n] 行级来源追溯编号
+最终 chunk 列表
+```
+
+**P 策略的关键不变量**:
+
+1. **章节边界不会重叠**:不同 `.blocks.jsonl` 内容行之间的文本绝不会被复制到对方块里,避免“张冠李戴”。
+2. **章节内长正文可重叠**:同一个内容行内拆分的多个片段允许按 `chunk_overlap_token_size` 保留 R 风格 overlap,减少长正文中途切断。
+3. **表格之间桥接文字可双向重叠**:唯一的跨段落复制场景,专门服务连续大表的上下文保留。
+4. **表格行不互相重叠**:行级切片本身是非重叠的,与 R 的 overlap 概念不同。
+
+## 3. 输入与输出
+
+### 3.1 输入
+
+`chunking_by_paragraph_semantic()` 接收以下输入:
+
+| 参数 | 来源 | 说明 |
+|---|---|---|
+| `content` | `full_docs[doc_id].content` | 拼接后的合并文本,用于 sidecar 缺失时降级 |
+| `blocks_path` | `full_docs[doc_id].lightrag_document_path` | `.blocks.jsonl` 路径,是 P 策略的主输入 |
+| `chunk_token_size` | `chunk_options.chunk_token_size` / `CHUNK_P_SIZE` | 目标硬上限 N,默认 `2000` |
+| `chunk_overlap_token_size` | `CHUNK_P_OVERLAP_SIZE` / `chunk_overlap_token_size` | 同一内容行内长正文 fallback 与表格桥接预算的上限,默认 `100` |
+| `tokenizer` | LightRAG 已解析好的 tokenizer | 所有 token 计数与文本 overlap 截取的基准 |
+
+P 策略**不接收** `split_by_character` / `split_by_character_only`,因为正常路径由标题和段落结构驱动。
+
+### 3.2 `.blocks.jsonl` 约定
+
+P 策略只处理 `type == "content"` 行。每个内容行通常包含:
+
+- `content`:该标题下的正文文本,可能包含普通段落、`<table ... />` 标签、`<equation ... />` 公式、`<drawing ... />` 图形。
+- `heading`:当前标题。
+- `parent_headings`:父级标题链。
+- `level`:标题级别(1~9,对应原始 outline level 0~8)。
+- `positions`:原始段落定位(用于追溯)。
+
+native parser 的 `fixlevel=0` 模式保证「一条标题下的正文作为一个基础块」,不在解析阶段做 token 阈值拆分。表格保持完整插入到 `content` 中。
+
+### 3.3 输出
+
+最终输出为有序 chunk 列表,每个元素:
+
+```python
+{
+    "tokens": int,                    # 真实 token 数(合并后会复测)
+    "content": str,                   # 块文本(可能包含 <table> 标签)
+    "chunk_order_index": int,         # 块顺序索引
+    "heading": str,                   # 拆分后追加 [part n] 后缀
+    "parent_headings": list[str],     # 父级标题链,不追加后缀
+    "level": int,                     # 标题层级
+}
+```
+
+实现内部还会临时使用 `paragraphs`、`table_chunk_role`、`uuid`、`uuid_end`、`type` 等字段辅助拆分和合并,但**不会进入最终输出**。
+
+### 3.4 `[part n]` 后缀规则
+
+- 同一个原始 `.blocks.jsonl` 内容行被拆成多个片段时,所有片段的 `heading` 字段追加 `[part 1]`、`[part 2]` …
+- 未发生拆分的内容行保持原 heading 不变。
+- `parent_headings` 不追加后缀。
+- 编号在每个原始内容行内**独立重置**。
+- 旧的 `[表格片段N]` 后缀已统一由 `[part n]` 替代。
+
+## 4. 关键阈值
+
+P 策略的阈值不是固定常量,而是按 `chunk_token_size`(记为 N)动态推导:
+
+| 名称 | 计算式 | N = 2000 时取值 | 技术含义 |
+|---|---|---:|---|
+| `target_max` | N | 2000 | 文本块硬上限 |
+| `target_ideal` | 0.75 × N | 1500 | 文本块理想目标,达到此值后停止参与普通同级合并 |
+| `table_max` | 0.625 × N | 1250 | 表格触发切片阈值 |
+| `table_ideal` | 0.375 × N | 750 | 表格切片理想大小 |
+| `table_min_last` | 0.32 × `table_max` | 400 | 表格末片回吞阈值(小于此值且能合并则回吞至前一切片) |
+| `small_tail_threshold` | 0.125 × N | 250 | 尾部碎块吸收阈值 |
+| `max_anchor_candidate_length` | 固定 | 100 字符 | 长块拆分锚点候选段落长度上限 |
+
+比例约束关系:`table_max < target_ideal < target_max`、`table_ideal < table_max`。这些比例源自审计模式经验值(`大块 8000、小表 5000、理想表 3000、表格尾块 1600`),现按 `chunk_token_size` 等比缩放。
+
+## 5. Stage A:标题级基础块
+
+标题识别由 native parser 完成,**P chunker 自身不扫描 docx body、也不判断标题样式**。
+
+native parser 在 `fixlevel=0` 模式下:
+
+1. 读取 `styles.xml`,按 `<w:basedOn>` 建立样式继承链,回溯有效 `<w:outlineLvl>`。
+2. 遍历 `document.xml` 段落,沿继承链解析大纲级别;原始 outline level 0~8 映射为内部 `level` 1~9。
+3. 维护 `current_heading_stack`,遇新标题时清理不浅于当前 level 的旧标题,计算 `parent_headings`。
+4. 将表格、公式、图形分别提取为单行标签(`<table id="..." format="json">...</table>` 等),写入对应 sidecar。
+5. 所有可识别标题均触发基础块边界,**不**执行 token 阈值拆分。
+
+P chunker 直接读取 `.blocks.jsonl`,每个 content 行作为后续 Stage B/C 的独立处理单元。这意味着 `[part n]` 编号按每个原始 content 行独立重置。
+
+## 6. Stage B:超大表格行边界切片
+
+Stage B 只处理 token 数超过 `table_max` 的表格。其目标**不是单纯拆表**,而是在行边界优先拆分的基础上保留表格边界上下文。
+
+### 6.1 行边界优先切片
+
+- `format="json"`:按 JSON 顶层行数组切片。
+- `format="html"`:按 `<tr>...</tr>` 行切片。
+- 未显式标注但内容可嗅探为 JSON / HTML 的表格同样按上述规则处理。
+
+切片前预扣 `<table {attrs}></table>` 外壳 token 开销,使重新包裹后的切片尽量不超过 `table_max`。每个切片重新包裹为合法的 `<table>` 标签,便于下游解析。
+
+### 6.2 行级递归二次切片
+
+若某个行子集重新包裹后仍超过 `table_max`,则在该行子集内继续细分。**只有切片已经收敛到单行、且该单行自身超过限制时,才退化为字符级切分**。该机制使可被行边界表达的表格内容尽量保留合法表格结构。
+
+### 6.3 末片回吞
+
+若表格末片 token 数低于 `table_min_last`,且与前一切片合并后不超过 `table_max`,则将末片回吞至前一切片,减少无效短表格块。
+
+### 6.4 表格切片角色与物理粘连
+
+每个表格切片被赋予内部字段 `table_chunk_role`,并按角色决定与周围段落的粘连方式:
+
+| 角色 | 含义 | 粘连策略 |
+|---|---|---|
+| `first` | 原始表格的首切片 | 追加到当前累积块尾部,使表格**前置说明**与首切片进入同一块 |
+| `middle` | 原始表格的中间切片 | 独立输出,避免与无关正文合并 |
+| `last` | 原始表格的末切片 | 作为新累积块起点,使**后置解释**自动追加到末切片之后 |
+| `none` | 非表格切片或未拆分的完整表格 | 按普通文本块处理 |
+
+`table_chunk_role` 是内部字段,最终输出不会保留,**但在 Stage D 中继续作为合并约束使用**(见 §9.1)。
+
+## 7. Stage B.1:连续大表桥接文字双向重叠
+
+当同一原始内容行中出现「大表 A、短桥接文字、大表 B」的模式,且两张表均被拆分时,桥接文字按上下文预算进行双向分配:
+
+1. 将桥接文字按 token 编码。
+2. 计算左侧预算 `prev_budget = min(chunk_overlap_token_size, target_max - 左侧末切片当前 token 数)`。
+3. 计算右侧预算 `next_budget = min(chunk_overlap_token_size, target_max - 右侧首切片当前 token 数)`。
+4. **若桥接文字长度同时不超过两侧预算**:左右两个表格边界块都包含**完整桥接文字**。
+5. **若桥接文字较长**:前缀进入左侧末切片块,后缀进入右侧首切片块;超出两侧预算的中间段独立成为普通文本块。
+
+单侧预算还会被限制到不超过 `chunk_token_size / 2`,避免桥接文字主导整个块。
+
+这与普通相邻 chunk overlap 的差异:
+
+- 普通 overlap 按前后顺序复制字符或 token,与边界类型无关。
+- B.1 机制以表格切片角色为触发条件,把桥接文字同时作为左表后文上下文和右表前文上下文,避免桥接说明只归属一侧表格或被单独切散后难以召回。
+
+## 8. Stage C:锚点驱动的长文本块再切分
+
+Stage C 处理 Stage B 后仍超过 `target_max` 的内容块。
+
+### 8.1 短段落锚点
+
+把内容按段落恢复,选择满足以下条件的段落作为候选锚点:
+
+- 段落不是表格(不以 `<table` 开头)。
+- 段落文本长度不超过 `max_anchor_candidate_length`(100 字符)。
+- 段落不是该块的第一个段落(避免递归无法收敛)。
+
+### 8.2 均衡选锚
+
+根据目标子块数量计算理想切分位置,从候选锚点中选择距离理想位置最近的锚点。被选中的锚点**晋升为后续子块的新 `heading`**,原 heading 写入该子块的 `parent_headings`。
+
+### 8.3 无锚点降级
+
+若不存在合格锚点:
+
+1. **表格优先**:若块内仍存在超限表格,优先调用 Stage B 的行边界切片。
+2. **贪心打包**:其余文本按段落贪心打包到接近 `target_max`。
+3. **递归字符切分**:单一过长普通文本段落降级到 R 策略(`chunking_by_recursive_character`),使用 `chunk_overlap_token_size` 保持相邻文本片段的连续性。
+
+无锚点 fallback 路径保证算法**不会丢弃内容**,并尽量遵守用户配置的块大小上限。
+
+## 9. Stage D:层级感知的双相位合并
+
+Stage D 解决细碎章节场景下「块过碎」和「跨主题污染」的矛盾。核心思想是**自深层级向浅层级处理**,先合并同级小块,再允许浅层块吸收深层块,同时引入尺寸约束、表格切片角色约束和标题路径约束。
+
+### 9.1 D.0 合并约束(每次合并都要满足)
+
+1. **尺寸约束**:合并后的真实文本 token 数不超过 `target_max`;已达到 `target_ideal` 的块原则上不继续参与普通同级合并。
+2. **角色约束**:`middle` 表格切片锁定独立;`first`、`last` 按方向参与合并,防止表格边界上下文被错误吞并。
+3. **层级约束**:同级合并在相同 `level` 之间发生;跨级吸收只允许浅层吸收深层,**禁止深层反向吸收浅层**。
+4. **父标题路径一致性约束**:相邻块的 `parent_headings` 一致,或处于同一父标题路径所限定的连续范围内。这是避免跨主题污染的关键。
+
+### 9.2 D.1 Phase A:同级合并
+
+针对当前 level 的相邻块,若两者均低于 `target_ideal`,且满足上述约束,则合并为一个块。
+
+表格切片角色的方向规则:
+
+| 块角色 | 可向后吸收下一块 | 可被前一块吸收 |
+|---|:-:|:-:|
+| `none` | 是 | 是 |
+| `first` | 是 | 否 |
+| `middle` | 否 | 否 |
+| `last` | 否 | 是 |
+
+### 9.3 D.2 尾部整批吸收
+
+若一个已达到 `target_ideal` 的块后面紧跟一串同级小块,且该串小块总 token 数低于 `small_tail_threshold`、合并后真实 token 数不超过 `target_max`,则**一次性吸收**该串小块。遇到 `middle` 表格切片时停止。
+
+### 9.4 D.3 Phase B:跨级吸收
+
+对于 Phase A 后仍未饱和的小块,尝试跨级合并,但仅允许浅层吸收深层:
+
+- 当前块比后一块更浅时,当前块可向后吸收后一块。
+- 当前块比前一块更深时,前一浅层块可吸收当前块。
+- 反方向合并被禁止。
+- 跨级阶段允许 `last` 角色向后吸收;`middle` 仍不参与合并。
+
+### 9.5 D.4 合并后真实 token 复测
+
+由于合并时会插入换行连接符,逐块 token 数相加可能低估合并结果。**每次提交合并前,都要对拼接后的真实文本重新计算 token 数**,确认不超过 `target_max` 后再提交。
+
+合并后保留主块的 `heading`。如果多个 part 片段被合并,最终 heading 保留主块的 part 后缀,**不会**额外拼接多个 part 标签。
+
+## 10. Fallback 与降级路径
+
+P 策略有多层降级保护:
+
+| 触发条件 | 降级行为 |
+|---|---|
+| `blocks_path` 缺失、不可读、无有效 content 行 | 整体降级到 `chunking_by_recursive_character()`,传入解析出的 `chunk_overlap_token_size` |
+| Stage B 中表格无法识别 JSON / HTML 结构 | 该表格调用 R 策略字符切分 |
+| Stage B 中单行表格自身超过 `table_max` | 该单行调用 R 策略字符切分 |
+| Stage C 中长块没有合格短段落锚点 | 表格优先 → 贪心打包 → 单段落超长再降级 R 字符切分 |
+
+**重要**:整体 fallback 后不再具备标题层级、表格角色和桥接文字双向重叠能力;但能保证文档仍产生检索块,不因结构化 sidecar 缺失而被静默丢弃。
+
+## 11. 配置项
+
+| 配置 | 默认 | 说明 |
+|---|---|---|
+| `CHUNK_P_SIZE` | `2000`(未设时使用 `DEFAULT_CHUNK_P_SIZE`,**不**沿用 `CHUNK_SIZE`) | P 专用 `chunk_token_size`;段落语义合并需要比全局默认更大的上限,因此独立默认而非回退到 `CHUNK_SIZE` |
+| `CHUNK_P_OVERLAP_SIZE` | 未设(沿用 `CHUNK_OVERLAP_SIZE`) | P 专用 overlap;只影响同一内容行内长正文 fallback 和表格桥接预算,**不**让表格行级切片互相重叠 |
+| `CHUNK_OVERLAP_SIZE` / `LightRAG(chunk_overlap_token_size=…)` | `100` | 未设 P 专用 overlap 时的全局兜底 |
+
+配置语法、优先级链、`addon_params["chunker"]` 运行时改值等详见 [FileProcessingConfiguration-zh.md](FileProcessingConfiguration-zh.md) §3。
+
+启用 P 的典型 `LIGHTRAG_PARSER` 写法:
+
+```bash
+LIGHTRAG_PARSER=docx:native-P,*:legacy-R
+CHUNK_P_SIZE=2000
+CHUNK_P_OVERLAP_SIZE=100
+```
+
+或在单文件覆盖:
+
+```text
+my-proposal.[native-P].docx
+```
+
+## 12. 分块效果检验
+
+### 12.1 检查 sidecar 是否生成
+
+确认 native parser 是否成功产生 `.blocks.jsonl`:
+
+```bash
+ls -l INPUT/__parsed__/<doc>.docx.parsed/<doc>.blocks.jsonl
+```
+
+若文件不存在或为空,P 策略会整体降级为 R,不会获得 P 的任何收益。常见原因:
+
+- 未配置 `LIGHTRAG_PARSER=docx:native-...`。
+- 解析失败(看 `pipeline_status` 错误条目)。
+- 文档不是 DOCX(其他格式不支持 P)。
+
+### 12.2 检查 blocks.jsonl 内容
+
+每行一个 JSON,过滤 `type == "content"` 后查看 heading / level / parent_headings 是否符合预期:
+
+```bash
+jq -c 'select(.type=="content") | {level, heading, parent_headings}' \
+   INPUT/__parsed__/<doc>.docx.parsed/<doc>.blocks.jsonl | head
+```
+
+若 heading 大量为空或 level 异常,说明 native parser 没正确识别标题样式 —— 此时 P 策略的层级合并和锚点提升都会失效。
+
+### 12.3 检查最终 chunks
+
+查看 `text_chunks` 存储中的 chunk 元数据:
+
+```bash
+jq '.[] | {heading, level, tokens, parent_headings}' \
+   rag_storage/kv_store_text_chunks.json | head -30
+```
+
+应观察到:
+
+- 大表前后块的 heading 通常对应 `[part 1]` / `[part n]`(说明 Stage B 拆分发生)。
+- 细碎条款被合并到接近 `target_ideal` 的块(说明 Stage D 生效)。
+- `parent_headings` 在不同章节切换处发生跳变,同章节内保持稳定。
+
+### 12.4 块尺寸分布检验
+
+理想分布:大多数 chunk 落在 `[target_ideal, target_max]` 区间(即 N=2000 时约 1500~2000 token);明显偏小的块通常是 `middle` 表格切片(锁定独立)或紧靠章节边界的尾块。
+
+若出现大量低于 `small_tail_threshold` 的尾块,可能是:
+
+- 父标题路径一致性约束过严(不同 `parent_headings` 的相邻小块无法合并)。
+- 大量 `middle` 表格切片堆积(表格本身就很大)。
+
+## 13. 错误调试
+
+### 13.1 P 没生效,输出与 R 一致
+
+按以下顺序排查:
+
+1. `full_docs[doc_id].process_options` 是否包含 `P`?
+2. `full_docs[doc_id].parse_format` 是否为 `lightrag`?若为 `raw`,说明走的是 legacy 路径,P 会自动降级到 R。
+3. `lightrag_document_path` 指向的 `.blocks.jsonl` 是否存在、是否非空?
+4. 日志中是否有 `paragraph_semantic ... fallback to recursive_character` 字样?
+
+### 13.2 表格被切散、前后说明分离
+
+- 检查表格是否真的被识别为 `<table format="json">` 或 `<table format="html">`(看 `.blocks.jsonl`)。未识别格式的表格只能走字符切分,无法启动 Stage B 的角色机制。
+- 检查表格 token 数是否真的超过 `table_max`。低于阈值的表格保持完整,不会触发首/中/末切片。
+- 若是连续大表,确认两张表之间的桥接文字是否在**同一 content 行**内 —— 跨 content 行的桥接不参与 B.1 双向重叠。
+
+### 13.3 细碎条款没有被合并
+
+- 检查相邻条款的 `parent_headings` 是否一致:父标题路径一致性约束会阻止跨主题合并。
+- 检查 `level` 是否一致:同级合并要求相同 `level`,跨级吸收只允许浅吸深。
+- 检查中间是否插入了 `middle` 表格切片:会阻断尾部整批吸收。
+
+### 13.4 出现单个超过 `target_max` 的块
+
+正常情况下 Stage D 的真实 token 复测会拒绝超限合并,但以下场景仍可能出现超限块:
+
+- 单行表格自身超过 `target_max`,无锚点可拆,最终走 R 字符切分但单 chunk 仍超限。
+- `enforce_chunk_token_limit_before_embedding` 在 embedding 前会做最后的硬切分,下游不会真把超限 chunk 嵌入向量库。
+
+### 13.5 `[part n]` 后缀异常
+
+- 同一原始 content 行拆出多片但只看到一个 `[part 1]`:检查是否在 Stage D 中被合并 —— 合并后保留主块的 part 后缀,不拼接多个。
+- 出现旧式 `[表格片段N]` 后缀:说明使用了旧版 chunker 输出的数据,新版统一为 `[part n]`,需要重新分块。
+
+### 13.6 日志关键字
+
+P 策略相关日志关键字(用于 `grep` 排查):
+
+- `paragraph_semantic` — 模块入口
+- `fallback to recursive_character` — 整体或单段落降级
+- `table_chunk_role` — 表格角色相关
+- `bridge` — Stage B.1 桥接文字处理
+- `anchor` — Stage C 锚点选择

+ 404 - 0
docs/ParagraphSemanticChunking.md

@@ -0,0 +1,404 @@
+# Paragraph Semantic Chunking Strategy
+
+## 1. Use Cases and Strategy Selection
+
+### 1.1 What the P Strategy Solves
+
+Paragraph Semantic Chunking (hereafter the **P strategy**) targets documents with clear sectional structure such as DOCX. Its core goal is to **align chunk boundaries with the document's native semantic boundaries** (headings, paragraphs, table rows) as much as possible, rather than determining cut points solely from token-length counting.
+
+The P strategy is mainly designed to address the following four categories of problems:
+
+1. **Table context fragmentation**: When a large table is split, its head and tail slices easily become detached from the preceding description, following explanation, or intermediate bridging text, making them impossible to understand independently during recall.
+2. **Insufficient utilization of hierarchical information**: Methods that only look at neighboring paragraphs cannot leverage parent heading paths or relationships between sibling clauses.
+3. **Imbalanced sizes of fine-grained sections**: Regulations, standards, contracts, etc., often contain many fine-grained clauses of 100–300 tokens. Without merging, chunks become too short and semantically thin; merging by adjacent length alone causes cross-topic pollution.
+4. **Long-chunk re-splitting breaks structure**: When sections are excessively long, ordinary character splitting ignores table row boundaries and heading hierarchy.
+
+The P strategy is effective only for the `.blocks.jsonl` structured artifacts produced by the `native` extraction engine; for unstructured inputs, it automatically falls back to the R strategy (see §8).
+
+### 1.2 Comparison of P / R / V Strategies
+
+| Dimension | R Strategy (Recursive) | V Strategy (SemanticVector) | P Strategy (ParagraphSemantic) |
+|---|---|---|---|
+| Splitting basis | Cascading character separators (paragraph → newline → Chinese punctuation → whitespace → character) + token budget | Sentence-level embedding distance thresholds (percentile / standard deviation / IQR / gradient) to locate semantic breaks | DOCX outline level with `parent_headings` + table row boundaries + anchors + hierarchy-aware merging |
+| Chunk size control | `chunk_token_size` hard cap | `chunk_token_size` is merely an advisory ceiling; when exceeded, secondary splitting via R | `target_max` hard cap + `target_ideal` soft target + table threshold + tail-absorption threshold working in concert |
+| Table handling | Table-unaware; may cut in the middle of a table | Table-unaware | Tables smaller than `table_max` are kept intact; large tables are sliced by JSON row array / HTML `<tr>` row boundaries and re-wrapped as valid `<table>` |
+| Table context | Relies on incidental window coverage | Relies on embedding distance | First slice glues to preceding description, last slice glues to following explanation; bidirectional overlap of bridging text between consecutive large tables |
+| Inter-chunk overlap | Global `chunk_overlap_token_size` | No overlap | No overlap across section boundaries; within the same section, long body falls back to R with overlap by `CHUNK_P_OVERLAP_SIZE`; bridging text between consecutive large tables may enter both the preceding and following table chunks |
+| Heading metadata | Usually none | Usually none | Inherits or promotes heading; appends `[part n]` suffix after splitting; preserves `parent_headings` and `level` |
+| Embedding compute cost | None | High (must compute embedding per sentence) | None |
+| Input requirements | Any text | Any text + Embedding model | Must have a `.blocks.jsonl` sidecar (i.e., result of the `native` engine); otherwise falls back to R |
+
+### 1.3 How to Choose
+
+| Scenario | Recommended | Rationale |
+|---|---|---|
+| DOCX with clear sectional hierarchy, large tables, fine-grained clauses | **P** | Fully leverages heading hierarchy and table row boundaries; chunk boundaries best match semantics; avoids cross-topic pollution |
+| Documents dominated by prose / commentary / long body without clear sectional structure | **V** | Splitting by semantic similarity forms natural boundaries at topic shifts, more stable than character splitting |
+| Inputs are plain text, Markdown, code, logs, or you want minimum compute overhead | **R** | No embedding overhead; cascading separators are stable enough for mixed Chinese-English text |
+| General configuration (uncertain about file types) | **R** | P automatically falls back to R when no sidecar is present; V also falls back to R when no Embedding model is available |
+| Documents with chaotic heading styles and many pseudo-headings in body | **R** or **V** | P depends on the native parser correctly identifying headings; messy headings cause basic chunk boundaries to shift |
+| Single-line giant tables or unparsable tables | Any | All three strategies eventually fall back to character-level splitting; P still retains the advantage of table context gluing |
+
+### 1.4 Costs of the P Strategy
+
+- Must be paired with the `native` engine: explicitly declared in `LIGHTRAG_PARSER`, e.g., `docx:native-P`; otherwise, even if `P` is written, it falls back to R due to the missing `.blocks.jsonl`.
+- DOCX only: other formats have no `.blocks.jsonl` artifact.
+- Many algorithmic paths and thresholds: debugging requires first verifying the input sidecar, then inspecting the outputs of each stage.
+
+## 2. Overview of How It Works
+
+The P strategy takes as input the `.blocks.jsonl` produced by the native parser in `fixlevel=0` mode. **Each `type == "content"` line is treated as one heading-level basic chunk**, then table slicing, long-chunk splitting, and hierarchical merging are performed on top:
+
+```text
+DOCX
+  ↓  native parser (fixlevel=0)
+.blocks.jsonl + sidecars (.tables.json / .equations.json / .drawings.json / .blocks.assets/)
+  ↓  Stage B: slice oversized tables along row boundaries and assign first/middle/last roles
+  ↓  Stage B.1: bidirectional overlap of bridging text between consecutive large tables
+  ↓  Stage C: anchor-driven re-splitting of long text chunks
+  ↓  Stage D: hierarchy-aware two-phase merging
+  ↓  Stage E: [part n] line-level provenance numbering
+Final chunk list
+```
+
+**Key invariants of the P strategy**:
+
+1. **No overlap across section boundaries**: Text between different `.blocks.jsonl` content lines is never copied into the other chunk, avoiding "misattribution".
+2. **Long body within a section may overlap**: Multiple slices from within the same content line may keep R-style overlap controlled by `chunk_overlap_token_size`, reducing mid-sentence cuts in long bodies.
+3. **Bridging text between tables may overlap bidirectionally**: The only cross-paragraph copying scenario, specifically serving context preservation for consecutive large tables.
+4. **Table rows do not overlap each other**: Row-level slicing itself is non-overlapping, different from R's overlap concept.
+
+## 3. Input and Output
+
+### 3.1 Input
+
+`chunking_by_paragraph_semantic()` receives the following inputs:
+
+| Parameter | Source | Description |
+|---|---|---|
+| `content` | `full_docs[doc_id].content` | Concatenated merged text, used for fallback when sidecar is missing |
+| `blocks_path` | `full_docs[doc_id].lightrag_document_path` | Path to `.blocks.jsonl`, the primary input for the P strategy |
+| `chunk_token_size` | `chunk_options.chunk_token_size` / `CHUNK_P_SIZE` | Target hard cap N; defaults to `2000` |
+| `chunk_overlap_token_size` | `CHUNK_P_OVERLAP_SIZE` / `chunk_overlap_token_size` | Upper bound for long-body fallback overlap within the same content line and for the table bridging budget; defaults to `100` |
+| `tokenizer` | The tokenizer already parsed by LightRAG | Basis for all token counting and text overlap truncation |
+
+The P strategy **does not accept** `split_by_character` / `split_by_character_only`, because the normal path is driven by heading and paragraph structure.
+
+### 3.2 `.blocks.jsonl` Convention
+
+The P strategy only processes `type == "content"` lines. Each content line typically contains:
+
+- `content`: The body text under the heading, possibly including ordinary paragraphs, `<table ... />` tags, `<equation ... />` formulas, `<drawing ... />` graphics.
+- `heading`: The current heading.
+- `parent_headings`: The chain of parent headings.
+- `level`: Heading level (1–9, corresponding to the original outline levels 0–8).
+- `positions`: Original paragraph positioning (used for traceability).
+
+The native parser's `fixlevel=0` mode guarantees that "the body under a heading becomes one basic chunk" without performing token-threshold splitting during parsing. Tables are inserted into `content` while staying intact.
+
+### 3.3 Output
+
+The final output is an ordered list of chunks, where each element is:
+
+```python
+{
+    "tokens": int,                    # Actual token count (re-measured after merging)
+    "content": str,                   # Chunk text (may contain <table> tags)
+    "chunk_order_index": int,         # Chunk ordering index
+    "heading": str,                   # Suffix [part n] appended after splitting
+    "parent_headings": list[str],     # Parent heading chain; no suffix appended
+    "level": int,                     # Heading level
+}
+```
+
+Internally, the implementation also temporarily uses fields such as `paragraphs`, `table_chunk_role`, `uuid`, `uuid_end`, `type` to assist splitting and merging, but **these do not appear in the final output**.
+
+### 3.4 `[part n]` Suffix Rules
+
+- When the same original `.blocks.jsonl` content line is split into multiple slices, the `heading` field of every slice gets `[part 1]`, `[part 2]` … appended.
+- Content lines that are not split keep the original heading unchanged.
+- `parent_headings` does not get any suffix.
+- Numbering is **reset independently within each original content line**.
+- The legacy `[表格片段N]` ("table fragment N") suffix is uniformly replaced by `[part n]`.
+
+## 4. Key Thresholds
+
+P strategy thresholds are not fixed constants; they are dynamically derived from `chunk_token_size` (denoted N):
+
+| Name | Formula | Value when N = 2000 | Technical meaning |
+|---|---|---:|---|
+| `target_max` | N | 2000 | Hard upper bound for text chunks |
+| `target_ideal` | 0.75 × N | 1500 | Ideal target for text chunks; chunks at or above this value stop participating in ordinary peer merging |
+| `table_max` | 0.625 × N | 1250 | Threshold that triggers table slicing |
+| `table_ideal` | 0.375 × N | 750 | Ideal size for a table slice |
+| `table_min_last` | 0.32 × `table_max` | 400 | Last-slice swallow-back threshold (if the last slice is smaller and can be merged, swallow it back into the previous slice) |
+| `small_tail_threshold` | 0.125 × N | 250 | Threshold for tail fragment absorption |
+| `max_anchor_candidate_length` | Fixed | 100 chars | Upper bound on paragraph length for candidate anchors in long-chunk splitting |
+
+Proportional constraint relationships: `table_max < target_ideal < target_max`, `table_ideal < table_max`. These ratios originate from empirical values in the audit mode (`large chunk 8000, small table 5000, ideal table 3000, table tail 1600`) and are now proportionally scaled by `chunk_token_size`.
+
+## 5. Stage A: Heading-Level Basic Chunks
+
+Heading recognition is performed by the native parser; **the P chunker itself does not scan the docx body nor judge heading styles**.
+
+In `fixlevel=0` mode, the native parser:
+
+1. Reads `styles.xml`, builds a style inheritance chain via `<w:basedOn>`, and traces back the effective `<w:outlineLvl>`.
+2. Iterates over the paragraphs of `document.xml`, resolving outline levels along the inheritance chain; original outline levels 0–8 are mapped to internal `level` 1–9.
+3. Maintains `current_heading_stack`, clearing old headings no shallower than the current level when a new heading is encountered, and computing `parent_headings`.
+4. Extracts tables, formulas, and drawings into single-line tags (`<table id="..." format="json">...</table>` etc.) and writes them to the corresponding sidecars.
+5. All recognizable headings trigger a basic chunk boundary; **no** token-threshold splitting is performed.
+
+The P chunker directly reads `.blocks.jsonl`, treating each content line as an independent unit of processing for subsequent Stages B/C. This implies that `[part n]` numbering is reset independently per original content line.
+
+## 6. Stage B: Row-Boundary Slicing for Oversized Tables
+
+Stage B only processes tables whose token count exceeds `table_max`. Its goal is **not merely to split the table** but to preserve table boundary context based on row-boundary-priority splitting.
+
+### 6.1 Row-Boundary-Priority Slicing
+
+- `format="json"`: Slice by the top-level JSON row array.
+- `format="html"`: Slice by `<tr>...</tr>` rows.
+- Tables not explicitly tagged but sniffable as JSON / HTML are handled by the same rules.
+
+Before slicing, the `<table {attrs}></table>` wrapper token cost is pre-deducted so that each re-wrapped slice stays under `table_max` as much as possible. Each slice is re-wrapped as a valid `<table>` tag for ease of downstream parsing.
+
+### 6.2 Row-Level Recursive Re-Slicing
+
+If a row subset, after re-wrapping, still exceeds `table_max`, further subdivision is performed within that row subset. **Only when slicing has converged to a single row that itself exceeds the limit does it degrade to character-level splitting**. This mechanism keeps as much valid table structure as possible for table content expressible by row boundaries.
+
+### 6.3 Last-Slice Swallow-Back
+
+If the token count of the last table slice falls below `table_min_last` and the result of merging with the previous slice does not exceed `table_max`, the last slice is swallowed back into the previous slice, reducing useless short table chunks.
+
+### 6.4 Table Slice Roles and Physical Gluing
+
+Each table slice is assigned an internal field `table_chunk_role`, and gluing to surrounding paragraphs is decided by role:
+
+| Role | Meaning | Gluing strategy |
+|---|---|---|
+| `first` | First slice of the original table | Appended to the tail of the current accumulating chunk so that the table's **preceding description** enters the same chunk as the first slice |
+| `middle` | Middle slice of the original table | Output independently to avoid merging with unrelated body |
+| `last` | Last slice of the original table | Used as the starting point of a new accumulating chunk so that the **following explanation** is automatically appended after the last slice |
+| `none` | Non-table slice or untouched intact table | Treated as ordinary text chunks |
+
+`table_chunk_role` is an internal field that does not survive in the final output, **but in Stage D it continues to serve as a merging constraint** (see §9.1).
+
+## 7. Stage B.1: Bidirectional Overlap of Bridging Text Between Consecutive Large Tables
+
+When the pattern "large table A, short bridging text, large table B" appears in the same original content line and both tables are split, the bridging text is distributed bidirectionally according to a context budget:
+
+1. Encode the bridging text into tokens.
+2. Compute the left budget `prev_budget = min(chunk_overlap_token_size, target_max - current token count of the left last slice)`.
+3. Compute the right budget `next_budget = min(chunk_overlap_token_size, target_max - current token count of the right first slice)`.
+4. **If the bridging text length does not exceed either budget**: Both the left and right table boundary chunks contain the **complete bridging text**.
+5. **If the bridging text is longer**: The prefix enters the left last-slice chunk, the suffix enters the right first-slice chunk; the middle portion that exceeds both budgets becomes an independent ordinary text chunk.
+
+Each one-sided budget is additionally capped at `chunk_token_size / 2` to prevent the bridging text from dominating an entire chunk.
+
+The difference from ordinary adjacent chunk overlap:
+
+- Ordinary overlap copies characters or tokens by forward/backward order, regardless of boundary type.
+- The B.1 mechanism is triggered by table slice roles, treating bridging text as both the post-text context of the left table and the pre-text context of the right table, avoiding the bridging description being assigned to only one side or being split off and hard to recall.
+
+## 8. Stage C: Anchor-Driven Re-Splitting of Long Text Chunks
+
+Stage C processes content chunks that still exceed `target_max` after Stage B.
+
+### 8.1 Short-Paragraph Anchors
+
+Restore content into paragraphs, then select paragraphs that satisfy all of the following as candidate anchors:
+
+- The paragraph is not a table (does not start with `<table`).
+- The paragraph text length does not exceed `max_anchor_candidate_length` (100 chars).
+- The paragraph is not the first paragraph of the chunk (to avoid non-convergent recursion).
+
+### 8.2 Balanced Anchor Selection
+
+Based on the target sub-chunk count, ideal split positions are computed, and the anchor closest to each ideal position is chosen from candidates. The chosen anchor is **promoted to the new `heading` of the following sub-chunk**, while the original heading is written into that sub-chunk's `parent_headings`.
+
+### 8.3 No-Anchor Fallback
+
+If no qualifying anchor exists:
+
+1. **Table first**: If oversized tables still exist within the chunk, prioritize Stage B's row-boundary slicing.
+2. **Greedy packing**: Greedily pack the remaining text by paragraph, approaching `target_max`.
+3. **Recursive character splitting**: A single excessively long ordinary text paragraph falls back to the R strategy (`chunking_by_recursive_character`), using `chunk_overlap_token_size` to keep continuity between adjacent text slices.
+
+The no-anchor fallback path guarantees the algorithm **does not discard content** and tries to respect the user-configured chunk size cap.
+
+## 9. Stage D: Hierarchy-Aware Two-Phase Merging
+
+Stage D resolves the tension between "chunks too small" and "cross-topic pollution" in fine-grained section scenarios. The core idea is to **process from deeper levels to shallower levels**, first merging small chunks at the same level, then allowing shallow chunks to absorb deep chunks, while introducing size constraints, table slice role constraints, and heading path constraints.
+
+### 9.1 D.0 Merging Constraints (every merge must satisfy)
+
+1. **Size constraint**: The actual text token count after merging does not exceed `target_max`; chunks that have reached `target_ideal` in principle do not continue to participate in ordinary peer merging.
+2. **Role constraint**: `middle` table slices are locked as independent; `first` and `last` participate in merging directionally to prevent table boundary context from being incorrectly swallowed.
+3. **Level constraint**: Peer merging happens between equal `level`; cross-level absorption only allows shallow absorbing deep, **disallowing deep absorbing shallow in reverse**.
+4. **Parent heading path consistency constraint**: Adjacent chunks have identical `parent_headings`, or are within a contiguous range constrained by the same parent heading path. This is key to avoiding cross-topic pollution.
+
+### 9.2 D.1 Phase A: Peer Merging
+
+For adjacent chunks at the current level, if both are below `target_ideal` and satisfy the above constraints, merge them into one chunk.
+
+Directional rules of table slice roles:
+
+| Chunk role | Can forward-absorb next chunk | Can be absorbed by previous chunk |
+|---|:-:|:-:|
+| `none` | Yes | Yes |
+| `first` | Yes | No |
+| `middle` | No | No |
+| `last` | No | Yes |
+
+### 9.3 D.2 Batched Tail Absorption
+
+If a chunk that has reached `target_ideal` is followed by a string of peer small chunks, and the total token count of that string is below `small_tail_threshold` and the actual merged token count does not exceed `target_max`, then **absorb that string in one shot**. Stop when encountering a `middle` table slice.
+
+### 9.4 D.3 Phase B: Cross-Level Absorption
+
+For small chunks still unsaturated after Phase A, attempt cross-level merging, but only allow shallow absorbing deep:
+
+- When the current chunk is shallower than the next, the current chunk may forward-absorb the next.
+- When the current chunk is deeper than the previous, the previous shallower chunk may absorb the current.
+- Reverse merging is forbidden.
+- In the cross-level phase, the `last` role is allowed to forward-absorb; `middle` still does not participate in merging.
+
+### 9.5 D.4 Post-Merge Actual Token Re-Measurement
+
+Because merging inserts newline connectors, chunk-by-chunk token summation may underestimate the merged result. **Before committing each merge, the actual concatenated text must be re-tokenized**, and the merge is committed only after confirming it does not exceed `target_max`.
+
+After merging, the main chunk's `heading` is retained. If multiple part slices are merged, the final heading keeps the part suffix of the main chunk, **never** additionally concatenating multiple part tags.
+
+## 10. Fallback and Degradation Paths
+
+The P strategy has multiple layers of fallback protection:
+
+| Trigger | Degradation behavior |
+|---|---|
+| `blocks_path` missing, unreadable, or no valid content line | Fall back entirely to `chunking_by_recursive_character()`, passing in the parsed `chunk_overlap_token_size` |
+| Stage B cannot identify the JSON / HTML structure of a table | That table uses the R strategy's character splitting |
+| Stage B finds a single-row table itself exceeding `table_max` | That single row uses the R strategy's character splitting |
+| Stage C finds a long chunk with no qualifying short-paragraph anchor | Table first → greedy packing → fall back to R character splitting if a single paragraph is too long |
+
+**Important**: After the overall fallback, capabilities such as heading hierarchy, table roles, and bidirectional bridging-text overlap are no longer available; however, it still ensures the document produces retrieval chunks and is not silently dropped due to a missing structured sidecar.
+
+## 11. Configuration
+
+| Configuration | Default | Description |
+|---|---|---|
+| `CHUNK_P_SIZE` | `2000` (when unset, uses `DEFAULT_CHUNK_P_SIZE`; does **not** fall back to `CHUNK_SIZE`) | P-specific `chunk_token_size`; paragraph semantic merging requires a higher cap than the global default, hence an independent default rather than falling back to `CHUNK_SIZE` |
+| `CHUNK_P_OVERLAP_SIZE` | Unset (falls back to `CHUNK_OVERLAP_SIZE`) | P-specific overlap; only affects long-body fallback within the same content line and the table bridging budget. **Does not** cause table row-level slices to overlap |
+| `CHUNK_OVERLAP_SIZE` / `LightRAG(chunk_overlap_token_size=…)` | `100` | Global fallback when no P-specific overlap is set |
+
+For configuration syntax, the priority chain, and runtime overrides via `addon_params["chunker"]`, see [FileProcessingConfiguration-zh.md](FileProcessingConfiguration-zh.md) §3.
+
+A typical `LIGHTRAG_PARSER` setup that enables P:
+
+```bash
+LIGHTRAG_PARSER=docx:native-P,*:legacy-R
+CHUNK_P_SIZE=2000
+CHUNK_P_OVERLAP_SIZE=100
+```
+
+Or override per single file:
+
+```text
+my-proposal.[native-P].docx
+```
+
+## 12. Validating Chunking Results
+
+### 12.1 Check Whether the Sidecar Was Generated
+
+Confirm whether the native parser successfully produced `.blocks.jsonl`:
+
+```bash
+ls -l INPUT/__parsed__/<doc>.docx.parsed/<doc>.blocks.jsonl
+```
+
+If the file is missing or empty, the P strategy falls back to R entirely and gains none of P's benefits. Common causes:
+
+- `LIGHTRAG_PARSER=docx:native-...` was not configured.
+- Parsing failed (see error entries in `pipeline_status`).
+- The document is not a DOCX (other formats do not support P).
+
+### 12.2 Inspect the Contents of blocks.jsonl
+
+Each line is a JSON; filter `type == "content"` and inspect whether heading / level / parent_headings match expectations:
+
+```bash
+jq -c 'select(.type=="content") | {level, heading, parent_headings}' \
+   INPUT/__parsed__/<doc>.docx.parsed/<doc>.blocks.jsonl | head
+```
+
+If most headings are empty or levels are abnormal, the native parser did not correctly recognize heading styles — in which case P's hierarchical merging and anchor promotion will both fail.
+
+### 12.3 Inspect the Final Chunks
+
+View chunk metadata in the `text_chunks` storage:
+
+```bash
+jq '.[] | {heading, level, tokens, parent_headings}' \
+   rag_storage/kv_store_text_chunks.json | head -30
+```
+
+You should observe:
+
+- Headings of chunks around large tables typically correspond to `[part 1]` / `[part n]` (indicating Stage B splitting occurred).
+- Fine-grained clauses are merged into chunks close to `target_ideal` (indicating Stage D took effect).
+- `parent_headings` jumps at boundaries between different sections and stays stable within the same section.
+
+### 12.4 Chunk Size Distribution Check
+
+Ideal distribution: most chunks fall in the range `[target_ideal, target_max]` (i.e., approximately 1500–2000 tokens when N=2000); chunks noticeably smaller are usually `middle` table slices (locked as independent) or tail chunks at section boundaries.
+
+If many tail chunks below `small_tail_threshold` appear, possible causes include:
+
+- The parent heading path consistency constraint is too strict (adjacent small chunks with different `parent_headings` cannot merge).
+- Many `middle` table slices pile up (the table itself is very large).
+
+## 13. Troubleshooting
+
+### 13.1 P Did Not Take Effect; Output Matches R
+
+Investigate in this order:
+
+1. Does `full_docs[doc_id].process_options` contain `P`?
+2. Is `full_docs[doc_id].parse_format` equal to `lightrag`? If `raw`, it is on the legacy path and P automatically falls back to R.
+3. Does the `.blocks.jsonl` pointed to by `lightrag_document_path` exist and is it non-empty?
+4. Are there `paragraph_semantic ... fallback to recursive_character` messages in the logs?
+
+### 13.2 Tables Are Scattered; Preceding and Following Explanations Are Detached
+
+- Check whether the table is truly recognized as `<table format="json">` or `<table format="html">` (see `.blocks.jsonl`). Tables with unrecognized format can only undergo character splitting and cannot trigger Stage B's role mechanism.
+- Check whether the table's token count actually exceeds `table_max`. Tables below the threshold remain intact and never trigger first/middle/last slicing.
+- For consecutive large tables, confirm whether the bridging text between the two tables resides in the **same content line** — bridging across content lines does not participate in B.1 bidirectional overlap.
+
+### 13.3 Fine-Grained Clauses Are Not Merged
+
+- Check whether the `parent_headings` of adjacent clauses are identical: the parent heading path consistency constraint prevents cross-topic merging.
+- Check whether `level` is the same: peer merging requires equal `level`; cross-level absorption only allows shallow absorbing deep.
+- Check whether a `middle` table slice is inserted in the middle: this blocks batched tail absorption.
+
+### 13.4 A Single Chunk Exceeds `target_max`
+
+Normally, Stage D's actual token re-measurement rejects oversized merges, but oversized chunks may still occur in the following scenarios:
+
+- A single-row table itself exceeds `target_max` with no anchor to split on; eventually it goes through R character splitting but a single chunk still exceeds the limit.
+- `enforce_chunk_token_limit_before_embedding` performs a final hard cut before embedding; downstream will not actually embed an oversized chunk into the vector store.
+
+### 13.5 Abnormal `[part n]` Suffixes
+
+- Multiple slices come from the same original content line, but only one `[part 1]` is seen: check whether they were merged in Stage D — after merging, the main chunk's part suffix is retained and multiple part tags are not concatenated.
+- Legacy `[表格片段N]` suffix appears: this indicates data output by an older chunker; the new version standardizes on `[part n]`, and re-chunking is required.
+
+### 13.6 Log Keywords
+
+P-strategy-related log keywords (for `grep`-based troubleshooting):
+
+- `paragraph_semantic` — module entry
+- `fallback to recursive_character` — overall or single-paragraph degradation
+- `table_chunk_role` — table role-related
+- `bridge` — Stage B.1 bridging text handling
+- `anchor` — Stage C anchor selection

+ 129 - 0
docs/ParserDebugCLI-zh.md

@@ -0,0 +1,129 @@
+# Parser CLI Debuger使用指南
+
+本工具用于本地调试 LightRAG 的三个内容解析引擎(`native` / `mineru` / `docling`),针对**单个文件**触发 `LightRAG.parse_<engine>` 生产代码路径,并把解析产物(sidecar 与 raw 缓存)输出到一个**扁平目录布局**——与生产入库目录相比,区别仅在于:
+
+- **无 `__parsed__/` 中间层**:产物直接落在指定父目录下,便于查看;
+- **源文件不会被归档**:源文件保留在原位置(生产路径会把源文件移到 `<INPUT_DIR>/__parsed__/`);
+- **raw 缓存只看目录是否存在**:`mineru` / `docling` 的 raw 目录非空即视为有效,跳过 `_manifest.json` 校验。
+
+其余流程(IR 构建、sidecar 写入、对 `full_docs` 的同步逻辑)与生产入库完全一致,便于排查解析阶段问题。
+
+## 命令格式
+
+```bash
+python -m lightrag.parser.cli <input_file> \
+    --engine {native|mineru|docling} \
+    [-o <sidecar_parent_dir>] \
+    [--doc-id <doc-id>] \
+    [--force-reparse] \
+    [--preview N]
+```
+
+| 参数 | 说明 |
+|---|---|
+| `input_file` | 待解析的源文件路径(位置参数,必填)。文件必须实际存在。 |
+| `--engine` | 必填:`native`(仅 `.docx`,本地解析)/ `mineru`(PDF/办公文档,调 MinerU 服务)/ `docling`(PDF/办公文档,调 docling-serve)。 |
+| `-o / --sidecar-parent-dir` | sidecar 与 raw 目录的父目录,默认 = 源文件所在目录。 |
+| `--doc-id` | 自定义文档 ID,默认 `doc-<md5(源文件绝对路径)>`(同一文件多次跑结果稳定)。 |
+| `--force-reparse` | 仅对 `mineru` / `docling` 生效:清空 raw 目录、强制重新下载与解析。默认行为是 raw 目录非空即复用。 |
+| `--preview N` | 解析完成后打印前 N 个 block 的预览(headings + 内容片段),默认 5;`0` 关闭。 |
+
+## 输出目录布局
+
+以输入 `./inputs/workspace/sample.pdf` + 默认 sidecar 父目录(即 `./inputs/workspace/`)为例:
+
+```
+./inputs/workspace/
+├── sample.pdf                       # 原文件,不动
+├── sample.pdf.parsed/               # ← sidecar 输出
+│   ├── sample.blocks.jsonl          # JSONL:首行 meta,后续每行一个 block
+│   ├── sample.blocks.assets/        # native 抽取的图片/媒体资产(若有)
+│   ├── sample.tables.json           # 表格 sidecar(若 IR 含 tables)
+│   ├── sample.drawings.json         # 图纸/图片 sidecar(若 IR 含 drawings)
+│   └── sample.equations.json        # 公式 sidecar(若 IR 含 equations)
+└── sample.pdf.<engine>_raw/         # ← mineru / docling 的 raw 缓存(native 无此目录)
+    ├── _manifest.json               # 由引擎下载流程写入;CLI 缓存校验不读
+    └── <bundle files>               # 引擎特定 raw 产物(content_list.json / *.json / 资产等)
+```
+
+`native` 引擎不产生 raw 目录(解析是本地的,无外部服务参与)。
+
+## 典型用例
+
+### A. 本地解析 `.docx`(零网络依赖)
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.docx --engine native
+# 产出:./inputs/workspace/sample.docx.parsed/  (含 blocks.jsonl + assets)
+```
+
+### B. 用 MinerU 解析 PDF(首次会下载 raw)
+
+```bash
+# 第一次:下载 raw bundle + 生成 sidecar
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine mineru
+# 第二次(无任何修改):raw 目录非空 → 直接复用 → 仅重建 sidecar,速度快
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine mineru
+# 日志会显示: [parse_mineru] raw cache hit doc_id=... raw_dir=.../sample.pdf.mineru_raw
+```
+
+### C. 用 Docling 解析 PDF + 复用已有 raw 目录
+
+```bash
+# 已有 ./inputs/workspace/sample.pdf.docling_raw/ (含 docling 产物的 JSON 等文件)
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine docling
+# CLI 不查 manifest,只要 raw 目录非空就跳过 docling-serve 调用
+```
+
+> 注:这是旧 `python -m lightrag.parser.external.docling` 调试入口「从已有 raw 重建 sidecar」场景的等价替代——只需把 raw 目录放到约定位置(`<sidecar_parent>/<source>.docling_raw/`)即可触发缓存命中分支。
+
+### D. 输出到自定义目录
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.docx \
+    --engine native -o /tmp/debug_sidecar
+# 产出:/tmp/debug_sidecar/sample.docx.parsed/
+# 原文件 ./inputs/workspace/sample.docx 不会被移动
+```
+
+### E. 强制重新解析(清空 raw 后重新下载)
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf \
+    --engine docling --force-reparse
+# raw 目录被清空 → 重新调 docling-serve 下载 → 重新生成 sidecar
+```
+
+## 环境变量
+
+`mineru` / `docling` 引擎在 **缓存未命中**(首次解析或 `--force-reparse`)时会调用外部服务,所需环境变量与生产入库一致:
+
+- **MinerU**:`MINERU_API_MODE`(`local` / `official`)、`MINERU_API_TOKEN`、`MINERU_LOCAL_ENDPOINT` 或 `MINERU_OFFICIAL_ENDPOINT`,可选 `MINERU_ENGINE_VERSION` / `MINERU_MODEL_VERSION` / `MINERU_POLL_INTERVAL_SECONDS` / `MINERU_MAX_POLLS`。
+- **Docling**:`DOCLING_ENDPOINT`,可选 `DOCLING_ENGINE_VERSION` / `DOCLING_DO_OCR` / `DOCLING_FORCE_OCR` / `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` / `DOCLING_OCR_LANG` / `DOCLING_DO_FORMULA_ENRICHMENT` / `DOCLING_POLL_INTERVAL_SECONDS` / `DOCLING_MAX_POLLS`。
+
+详见 [FileProcessingConfiguration-zh.md](./FileProcessingConfiguration-zh.md)。
+
+**缓存命中**时(raw 目录已存在且非空,且未传 `--force-reparse`)无需任何外部服务环境变量——可用于离线复现解析输出。
+
+## 常见排障
+
+| 现象 | 处理 |
+|---|---|
+| `error: input file does not exist: ...` | 检查 `input_file` 路径,必须是已存在的文件(不是 raw 目录)。 |
+| raw 目录存在但 sidecar 内容仍是旧的 | 默认会**复用** raw 重建 sidecar。如果 raw 本身就过期或被替换,加 `--force-reparse` 清空重下。 |
+| MinerU 报 `MINERU_API_TOKEN` 缺失 / Docling 连接 `DOCLING_ENDPOINT` 失败 | 缓存未命中触发了外部服务调用——核对对应环境变量;或确认 raw 目录是否非空(命中缓存时无需服务)。 |
+| 源文件被意外移动 | 不应发生:CLI 已 mock 归档函数。若复现请提 issue(可能是 pipeline 内增加了新的归档调用点)。 |
+| `parse_docling` 报 `produced zero blocks` | docling raw 中的主 JSON 内容不可解析或为空。检查 raw 目录的 `*.json` 是否合法。 |
+
+## 与 `LightRAG.parse_*` 生产路径的等价性
+
+本 CLI 直接调用生产代码路径 `LightRAG.parse_native` / `parse_mineru` / `parse_docling`(通过 `lightrag/parser/debug.py` 的轻量 RAG 替身),因此:
+
+- sidecar 字段、命名、内容格式与生产入库完全一致;
+- IR 构建器、`write_sidecar` 调用、`_persist_parsed_full_docs` 行为完全一致;
+- 三处差异均由 CLI 内的 `monkey-patch` 实现,**不修改任何生产代码**:
+  1. `parsed_artifact_dir_for_source` → 返回扁平路径(无 `__parsed__/`);
+  2. `is_bundle_valid` → 「raw 非空即有效」;
+  3. `archive_docx_source_after_full_docs_sync` → no-op,保留源文件。
+
+可与 `tests/parser/docx/golden/native_docx/` 下的 golden fixture 对比验证(CLI 不冻结时间戳,比对时排除 `created_at` 等时间字段即可)。

+ 129 - 0
docs/ParserDebugCLI.md

@@ -0,0 +1,129 @@
+# Parser CLI Debugger Guide
+
+This tool is used to locally debug LightRAG's three content parsing engines (`native` / `mineru` / `docling`). It triggers the `LightRAG.parse_<engine>` production code path for a **single file** and outputs the parsing artifacts (sidecar and raw cache) into a **flat directory layout**. Compared with the production ingestion directory, the only differences are:
+
+- **No `__parsed__/` intermediate layer**: artifacts land directly under the specified parent directory for easy inspection;
+- **The source file is not archived**: the source file stays at its original location (the production path moves the source file to `<INPUT_DIR>/__parsed__/`);
+- **Raw cache validity only checks directory existence**: any non-empty `mineru` / `docling` raw directory is considered valid, skipping `_manifest.json` validation.
+
+The rest of the flow (IR construction, sidecar writing, `full_docs` synchronization logic) is identical to production ingestion, making it convenient for troubleshooting parsing-stage issues.
+
+## Command Format
+
+```bash
+python -m lightrag.parser.cli <input_file> \
+    --engine {native|mineru|docling} \
+    [-o <sidecar_parent_dir>] \
+    [--doc-id <doc-id>] \
+    [--force-reparse] \
+    [--preview N]
+```
+
+| Argument | Description |
+|---|---|
+| `input_file` | Path to the source file to parse (positional argument, required). The file must actually exist. |
+| `--engine` | Required: `native` (only `.docx`, local parsing) / `mineru` (PDF/Office documents, calls MinerU service) / `docling` (PDF/Office documents, calls docling-serve). |
+| `-o / --sidecar-parent-dir` | Parent directory of the sidecar and raw directories. Defaults to the directory containing the source file. |
+| `--doc-id` | Custom document ID. Defaults to `doc-<md5(absolute path of source file)>` (stable across multiple runs on the same file). |
+| `--force-reparse` | Effective only for `mineru` / `docling`: clears the raw directory and forces re-download and re-parse. By default, a non-empty raw directory is reused. |
+| `--preview N` | After parsing completes, prints a preview of the first N blocks (headings + content snippets). Default 5; `0` disables it. |
+
+## Output Directory Layout
+
+Taking input `./inputs/workspace/sample.pdf` + the default sidecar parent directory (i.e., `./inputs/workspace/`) as an example:
+
+```
+./inputs/workspace/
+├── sample.pdf                       # original file, untouched
+├── sample.pdf.parsed/               # ← sidecar output
+│   ├── sample.blocks.jsonl          # JSONL: first line is meta, each subsequent line is a block
+│   ├── sample.blocks.assets/        # image/media assets extracted by native (if any)
+│   ├── sample.tables.json           # table sidecar (if IR contains tables)
+│   ├── sample.drawings.json         # drawing/image sidecar (if IR contains drawings)
+│   └── sample.equations.json        # equation sidecar (if IR contains equations)
+└── sample.pdf.<engine>_raw/         # ← raw cache for mineru / docling (native has no such directory)
+    ├── _manifest.json               # written by the engine download flow; not read by CLI cache validation
+    └── <bundle files>               # engine-specific raw artifacts (content_list.json / *.json / assets, etc.)
+```
+
+The `native` engine does not produce a raw directory (parsing is local, with no external service involved).
+
+## Typical Use Cases
+
+### A. Locally parse a `.docx` (zero network dependency)
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.docx --engine native
+# Output: ./inputs/workspace/sample.docx.parsed/  (contains blocks.jsonl + assets)
+```
+
+### B. Parse a PDF with MinerU (raw will be downloaded on first run)
+
+```bash
+# First run: download raw bundle + generate sidecar
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine mineru
+# Second run (no changes): raw directory non-empty → reused directly → only regenerate sidecar, fast
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine mineru
+# The log will show: [parse_mineru] raw cache hit doc_id=... raw_dir=.../sample.pdf.mineru_raw
+```
+
+### C. Parse a PDF with Docling + reuse an existing raw directory
+
+```bash
+# Existing ./inputs/workspace/sample.pdf.docling_raw/ (contains docling's JSON output, etc.)
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf --engine docling
+# The CLI does not check the manifest; as long as the raw directory is non-empty, the docling-serve call is skipped
+```
+
+> Note: this is the equivalent replacement for the "rebuild sidecar from an existing raw directory" scenario that used to live in the legacy `python -m lightrag.parser.external.docling` debug entry point — just place the raw directory at the agreed location (`<sidecar_parent>/<source>.docling_raw/`) to trigger the cache-hit branch.
+
+### D. Output to a custom directory
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.docx \
+    --engine native -o /tmp/debug_sidecar
+# Output: /tmp/debug_sidecar/sample.docx.parsed/
+# The source file ./inputs/workspace/sample.docx is not moved
+```
+
+### E. Force re-parse (clear raw and re-download)
+
+```bash
+python -m lightrag.parser.cli ./inputs/workspace/sample.pdf \
+    --engine docling --force-reparse
+# raw directory is cleared → docling-serve is called again to download → sidecar regenerated
+```
+
+## Environment Variables
+
+The `mineru` / `docling` engines call external services when the **cache misses** (first parse or `--force-reparse`); the required environment variables are identical to production ingestion:
+
+- **MinerU**: `MINERU_API_MODE` (`local` / `official`), `MINERU_API_TOKEN`, `MINERU_LOCAL_ENDPOINT` or `MINERU_OFFICIAL_ENDPOINT`, optional `MINERU_ENGINE_VERSION` / `MINERU_MODEL_VERSION` / `MINERU_POLL_INTERVAL_SECONDS` / `MINERU_MAX_POLLS`.
+- **Docling**: `DOCLING_ENDPOINT`, optional `DOCLING_ENGINE_VERSION` / `DOCLING_DO_OCR` / `DOCLING_FORCE_OCR` / `DOCLING_OCR_ENGINE` / `DOCLING_OCR_PRESET` / `DOCLING_OCR_LANG` / `DOCLING_DO_FORMULA_ENRICHMENT` / `DOCLING_POLL_INTERVAL_SECONDS` / `DOCLING_MAX_POLLS`.
+
+See [FileProcessingConfiguration.md](./FileProcessingConfiguration.md) for details.
+
+When the **cache is hit** (the raw directory already exists and is non-empty, and `--force-reparse` is not passed), no external service environment variables are needed — this can be used to offline-reproduce parsing output.
+
+## Common Troubleshooting
+
+| Symptom | Action |
+|---|---|
+| `error: input file does not exist: ...` | Check the `input_file` path; it must be an existing file (not a raw directory). |
+| Raw directory exists but sidecar content is still stale | The default behavior is to **reuse** raw and regenerate sidecar. If the raw itself is outdated or has been replaced, add `--force-reparse` to clear and re-download. |
+| MinerU reports `MINERU_API_TOKEN` missing / Docling fails to connect to `DOCLING_ENDPOINT` | A cache miss triggered an external service call — verify the corresponding environment variables; or confirm whether the raw directory is non-empty (no service needed when the cache hits). |
+| Source file is unexpectedly moved | Should not happen: the CLI has mocked the archive function. If reproducible, please file an issue (a new archive call site may have been added in the pipeline). |
+| `parse_docling` reports `produced zero blocks` | The main JSON content in docling raw is unparseable or empty. Check whether the `*.json` files in the raw directory are valid. |
+
+## Equivalence with the `LightRAG.parse_*` Production Path
+
+This CLI directly calls the production code paths `LightRAG.parse_native` / `parse_mineru` / `parse_docling` (via the lightweight RAG stand-in in `lightrag/parser/debug.py`), so:
+
+- The sidecar fields, naming, and content format are identical to production ingestion;
+- The IR builders, `write_sidecar` calls, and `_persist_parsed_full_docs` behavior are identical;
+- All three differences are implemented via `monkey-patch` inside the CLI — **no production code is modified**:
+  1. `parsed_artifact_dir_for_source` → returns the flat path (no `__parsed__/`);
+  2. `is_bundle_valid` → "raw is valid if non-empty";
+  3. `archive_docx_source_after_full_docs_sync` → no-op, source file preserved.
+
+Results can be cross-validated against golden fixtures under `tests/parser/docx/golden/native_docx/` (the CLI does not freeze timestamps; just exclude time fields such as `created_at` when comparing).

+ 1222 - 0
docs/ProgramingWithCore.md

@@ -0,0 +1,1222 @@
+# Programming With LightRAG Core
+
+> If you want to integrate LightRAG into your project, we recommend using the REST API provided by the LightRAG Server. LightRAG Core is intended for embedded applications or researchers conducting studies and evaluations.
+
+## A Simple Program
+
+```python
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
+from lightrag.utils import setup_logger
+
+setup_logger("lightrag", level="INFO")
+
+WORKING_DIR = "./rag_storage"
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete,
+    )
+    # IMPORTANT: Both initialization calls are required!
+    await rag.initialize_storages()  # Initialize storage backends
+    return rag
+
+async def main():
+    try:
+        # Initialize RAG instance
+        rag = await initialize_rag()
+        await rag.ainsert("Your text")
+
+        # Perform hybrid search
+        mode = "hybrid"
+        print(
+          await rag.aquery(
+              "What are the top themes in this story?",
+              param=QueryParam(mode=mode)
+          )
+        )
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.finalize_storages()
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+Notes:
+- Export your `OPENAI_API_KEY` environment variable before running.
+- All data is persisted to `WORKING_DIR`.
+
+**Important:**
+
+**LightRAG requires explicit initialization before use.** You must call `await rag.initialize_storages()` after creating a LightRAG instance, otherwise you will encounter errors.
+
+
+## LightRAG Init Parameters
+
+**Parameters**
+
+| **Parameter** | **Type** | **Explanation** | **Default** |
+| -------------- | ---------- | ----------------- | ------------- |
+| **working_dir** | `str` | Directory where the cache will be stored | `lightrag_cache+timestamp` |
+| **workspace** | str | Workspace name for data isolation between different LightRAG Instances | |
+| **kv_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`,`PGKVStorage`,`RedisKVStorage`,`MongoKVStorage`,`OpenSearchKVStorage` | `JsonKVStorage` |
+| **vector_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`,`PGVectorStorage`,`MilvusVectorDBStorage`,`ChromaVectorDBStorage`,`FaissVectorDBStorage`,`MongoVectorDBStorage`,`QdrantVectorDBStorage`,`OpenSearchVectorDBStorage` | `NanoVectorDBStorage` |
+| **graph_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`,`Neo4JStorage`,`PGGraphStorage`,`AGEStorage`,`OpenSearchGraphStorage` | `NetworkXStorage` |
+| **doc_status_storage** | `str` | Storage type for documents process status. Supported types: `JsonDocStatusStorage`,`PGDocStatusStorage`,`MongoDocStatusStorage`,`OpenSearchDocStatusStorage` | `JsonDocStatusStorage` |
+| **chunk_token_size** | `int` | Maximum token size per chunk when splitting documents | `1200` |
+| **chunk_overlap_token_size** | `int` | Overlap token size between two chunks when splitting documents | `100` |
+| **tokenizer** | `Tokenizer` | The function used to convert text into tokens (numbers) and back using .encode() and .decode() functions following `TokenizerInterface` protocol. If you don't specify one, it will use the default Tiktoken tokenizer. | `TiktokenTokenizer` |
+| **tiktoken_model_name** | `str` | If you're using the default Tiktoken tokenizer, this is the name of the specific Tiktoken model to use. This setting is ignored if you provide your own tokenizer. | `gpt-4o-mini` |
+| **entity_extract_max_gleaning** | `int` | Number of loops in the entity extraction process, appending history messages | `1` |
+| **node_embedding_algorithm** | `str` | Algorithm for node embedding (currently not used) | `node2vec` |
+| **node2vec_params** | `dict` | Parameters for node embedding | `{"dimensions": 1536,"num_walks": 10,"walk_length": 40,"window_size": 2,"iterations": 3,"random_seed": 3,}` |
+| **embedding_func** | `EmbeddingFunc` | Function to generate embedding vectors from text | `openai_embed` |
+| **embedding_batch_num** | `int` | Maximum batch size for embedding processes (multiple texts sent per batch) | `32` |
+| **embedding_func_max_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
+| **llm_model_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
+| **llm_model_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
+| **summary_context_size** | `int` | Maximum tokens send to LLM to generate summaries for entity relation merging | `10000`(configured by env var SUMMARY_CONTEXT_SIZE) |
+| **summary_max_tokens** | `int` | Maximum token size for entity/relation description | `500`(configured by env var SUMMARY_MAX_TOKENS) |
+| **llm_model_max_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) |
+| **llm_model_kwargs** | `dict` | Additional parameters for LLM generation | |
+| **vector_db_storage_cls_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
+| **enable_llm_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
+| **enable_llm_cache_for_entity_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
+| **addon_params** | `dict` | Runtime knobs for extraction prompts and chunking. See [addon_params](#addon_params). | Env-backed defaults from `SUMMARY_LANGUAGE`, `ENTITY_TYPE_PROMPT_FILE`, and `CHUNK_*` |
+| **embedding_cache_config** | `dict` | Configuration for question-answer caching. Contains three parameters: `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers. `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM. `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
+
+
+## addon_params
+
+`addon_params` is a live configuration mapping on each `LightRAG` instance. LightRAG currently reads the fields below; unknown custom keys may remain in the dict, but core LightRAG behavior does not use them.
+
+### Supported Fields
+
+| Field | Value | Purpose |
+|---|---|---|
+| `language` | Non-empty string. Defaults to `SUMMARY_LANGUAGE`, then `English`. | Output language used in entity and relationship extraction, entity/relation summaries, keyword extraction, and multimodal analysis prompts. |
+| `entity_type_prompt_file` | `.yml` or `.yaml` file name only. Loaded from `${PROMPT_DIR:-./prompts}/entity_type`. | Loads an entity extraction prompt profile. The profile can define `entity_types_guidance`, `entity_extraction_examples`, and `entity_extraction_json_examples`. The active extraction mode must have matching examples: text mode needs `entity_extraction_examples`; JSON mode needs `entity_extraction_json_examples`. |
+| `entity_types_guidance` | Non-empty string. | Inline entity type guidance injected into extraction prompts. This overrides both the prompt profile file and the built-in default guidance. |
+| `chunker` | Dict with F/R/V/P chunking settings. | Runtime baseline for chunker parameters. Each document gets a slim `chunk_options` snapshot at enqueue time; later edits affect only future enqueues. |
+
+Compact `chunker` shape:
+
+```jsonc
+{
+  "chunk_token_size": 1200,
+  "fixed_token": {
+    "chunk_token_size": 1200,
+    "chunk_overlap_token_size": 100,
+    "split_by_character": null,
+    "split_by_character_only": false
+  },
+  "recursive_character": {
+    "chunk_token_size": 1200,
+    "chunk_overlap_token_size": 100,
+    "separators": ["\n\n", "\n", "。", "!", "?", ";", ",", " ", ""]
+  },
+  "semantic_vector": {
+    "chunk_token_size": 1200,
+    "breakpoint_threshold_type": "percentile",
+    "breakpoint_threshold_amount": null,
+    "buffer_size": 1,
+    "sentence_split_regex": "(?<=[.?!])\\s+|(?<=[。?!])"
+  },
+  "paragraph_semantic": {
+    "chunk_token_size": 2000,
+    "chunk_overlap_token_size": 100
+  }
+}
+```
+
+### Initialization
+
+When you create a `LightRAG` object, `addon_params` is normalized before storage initialization:
+
+- If `addon_params` is omitted, LightRAG builds defaults from `SUMMARY_LANGUAGE`, `ENTITY_TYPE_PROMPT_FILE`, and the chunker-related `CHUNK_*` environment variables.
+- If you pass a partial dict, missing `language`, `entity_type_prompt_file`, and `chunker` values are still backfilled from the same env-backed defaults.
+- `entity_type_prompt_file` and `entity_types_guidance` are resolved into a cached entity extraction prompt profile during construction.
+- `chunk_token_size` and `chunk_overlap_token_size` constructor arguments are overlaid into `addon_params["chunker"]` only for slots that were not already set by explicit `addon_params` or strategy-specific env vars.
+
+Example:
+
+```python
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=embedding_func,
+    addon_params={
+        "language": "Chinese",
+        "entity_type_prompt_file": "entity_type_prompt.sample.yml",
+        "entity_types_guidance": "- Paper: academic papers, reports, and preprints",
+        "chunker": {
+            "chunk_token_size": 1000,
+            "recursive_character": {
+                "separators": ["\n\n", "\n", "。", "!", "?", " "]
+            }
+        },
+    },
+)
+await rag.initialize_storages()
+```
+
+### Updating After Creation
+
+`rag.addon_params` is an observable mapping. Top-level updates mark the derived prompt cache dirty; the cache is refreshed the next time LightRAG builds runtime config for extraction or query work.
+
+Update one field:
+
+```python
+rag.addon_params["language"] = "Chinese"
+rag.addon_params["entity_types_guidance"] = "- Dataset: structured research data"
+```
+
+Replace the whole mapping:
+
+```python
+rag.addon_params = {
+    "language": "German",
+    "entity_type_prompt_file": "domain_profile.yml",
+}
+```
+
+Replacing `rag.addon_params` creates a new observable mapping. If you kept an old reference, discard it and re-read `rag.addon_params` before making more changes.
+
+Change F-strategy fixed-token splitting defaults for future documents:
+
+```python
+rag.addon_params["chunker"]["fixed_token"]["split_by_character"] = "\n\n"
+rag.addon_params["chunker"]["fixed_token"]["split_by_character_only"] = True
+```
+
+`split_by_character` pre-splits text by the given separator before token-window chunking. When `split_by_character_only` is `True`, an oversized segment raises an error instead of being split again by token size.
+
+Change R-strategy recursive splitting defaults for future documents:
+
+```python
+rag.addon_params["chunker"]["recursive_character"]["separators"] = [
+    "\n\n",
+    "\n",
+    "###",
+    "。",
+    "!",
+    "?",
+    " ",
+]
+```
+
+Nested `chunker` edits are read when future documents are enqueued. Documents already enqueued keep their persisted `chunk_options` snapshot.
+
+### Notes and Precedence
+
+- Entity type guidance precedence is: `addon_params["entity_types_guidance"]` > `entity_type_prompt_file` profile > built-in default guidance.
+- Chunker precedence is: explicit `addon_params["chunker"]` values > strategy-specific `CHUNK_*` env vars > legacy constructor fields (`chunk_token_size`, `chunk_overlap_token_size`) > legacy env vars (`CHUNK_SIZE`, `CHUNK_OVERLAP_SIZE`).
+- Per-strategy `chunk_token_size`: every strategy reads `chunk_token_size` from its own sub-dict first and falls back to the top-level `chunk_token_size` when its sub-dict doesn't set one. F, R, and V can each seed their sub-dict value from a dedicated env var (`CHUNK_F_SIZE` / `CHUNK_R_SIZE` / `CHUNK_V_SIZE`) or set it explicitly in `addon_params`; when neither is set they inherit the top-level value.
+- `paragraph_semantic.chunk_token_size` is the exception: unlike F/R/V it never inherits the top-level `chunk_token_size`; if not explicit it uses `CHUNK_P_SIZE`, then the built-in default `2000`.
+- `enable_multimodal_pipeline` is deprecated and ignored if passed in `addon_params`. Use per-document `process_options` such as `i`, `t`, and `e` to control multimodal processing.
+
+
+## QueryParam
+
+Use `QueryParam` to control the behavior of your query:
+
+```python
+class QueryParam:
+    """Configuration parameters for query execution in LightRAG."""
+
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global"
+    """Specifies the retrieval mode:
+    - "local": Focuses on context-dependent information.
+    - "global": Utilizes global knowledge.
+    - "hybrid": Combines local and global retrieval methods.
+    - "naive": Performs a basic search without advanced techniques.
+    - "mix": Integrates knowledge graph and vector retrieval.
+    """
+
+    only_need_context: bool = False
+    """If True, only returns the retrieved context without generating a response."""
+
+    only_need_prompt: bool = False
+    """If True, only returns the generated prompt without producing a response."""
+
+    response_type: str = "Multiple Paragraphs"
+    """Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'."""
+
+    stream: bool = False
+    """If True, enables streaming output for real-time responses."""
+
+    top_k: int = int(os.getenv("TOP_K", "60"))
+    """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
+
+    chunk_top_k: int = int(os.getenv("CHUNK_TOP_K", "20"))
+    """Number of text chunks to retrieve initially from vector search and keep after reranking.
+    If None, defaults to top_k value.
+    """
+
+    max_entity_tokens: int = int(os.getenv("MAX_ENTITY_TOKENS", "6000"))
+    """Maximum number of tokens allocated for entity context in unified token control system."""
+
+    max_relation_tokens: int = int(os.getenv("MAX_RELATION_TOKENS", "8000"))
+    """Maximum number of tokens allocated for relationship context in unified token control system."""
+
+    max_total_tokens: int = int(os.getenv("MAX_TOTAL_TOKENS", "30000"))
+    """Maximum total tokens budget for the entire query context (entities + relations + chunks + system prompt)."""
+
+    # History messages are only sent to LLM for context, not used for retrieval
+    conversation_history: list[dict[str, str]] = field(default_factory=list)
+    """Stores past conversation history to maintain context.
+    Format: [{"role": "user/assistant", "content": "message"}].
+    """
+
+    user_prompt: str | None = None
+    """User-provided prompt for the query.
+    Addition instructions for LLM. If provided, this will be inject into the prompt template.
+    It's purpose is the let user customize the way LLM generate the response.
+    """
+
+    enable_rerank: bool = True
+    """Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued.
+    Default is True to enable reranking when rerank model is available.
+    """
+```
+
+> The default value of `top_k` can be changed by the environment variable `TOP_K`.
+
+
+## LLM and Embedding Injection
+
+LightRAG requires LLM and Embedding models for document indexing and querying. During initialization, inject the relevant model functions into LightRAG.
+
+### Model Selection Requirements
+
+- **LLM**: at least 32B parameters, 32KB context (64KB recommended). Avoid reasoning models during indexing; use stronger models at query time.
+- **Embedding**: must be consistent across indexing and querying. Recommended: `BAAI/bge-m3`, `text-embedding-3-large`. Changing models requires clearing vector storage.
+- **Reranker**: significantly improves retrieval. When enabled, set query mode to `mix`. Recommended: `BAAI/bge-reranker-v2-m3`, Jina rerankers.
+
+#### Using OpenAI-like APIs
+
+LightRAG supports OpenAI-like chat/embeddings APIs:
+
+```python
+import os
+import numpy as np
+from lightrag.utils import wrap_embedding_func_with_attrs
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        "solar-mini",
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("UPSTAGE_API_KEY"),
+        base_url="https://api.upstage.ai/v1/solar",
+        **kwargs
+    )
+
+@wrap_embedding_func_with_attrs(embedding_dim=4096, max_token_size=8192, model_name="solar-embedding-1-large-query")
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await openai_embed.func(
+        texts,
+        model="solar-embedding-1-large-query",
+        api_key=os.getenv("UPSTAGE_API_KEY"),
+        base_url="https://api.upstage.ai/v1/solar"
+    )
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func  # Pass the decorated function directly
+    )
+    await rag.initialize_storages()
+    return rag
+```
+
+> **Important Note on Embedding Function Wrapping:**
+>
+> `EmbeddingFunc` cannot be nested. Functions decorated with `@wrap_embedding_func_with_attrs` (such as `openai_embed`, `ollama_embed`, etc.) cannot be wrapped again using `EmbeddingFunc()`. This is why we call `xxx_embed.func` (the underlying unwrapped function) instead of `xxx_embed` directly when creating custom embedding functions.
+
+#### Using Hugging Face Models
+
+See `lightrag_hf_demo.py`
+
+```python
+from functools import partial
+from transformers import AutoTokenizer, AutoModel
+
+# Pre-load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+embed_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+
+# Initialize LightRAG with Hugging Face model
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=hf_model_complete,  # Use Hugging Face model for text generation
+    llm_model_name='meta-llama/Llama-3.1-8B-Instruct',  # Model name from Hugging Face
+    # Use Hugging Face embedding function
+    embedding_func=EmbeddingFunc(
+        embedding_dim=384,
+        max_token_size=2048,
+        model_name="sentence-transformers/all-MiniLM-L6-v2",
+        func=partial(
+            hf_embed.func,  # Use .func to access the unwrapped function
+            tokenizer=tokenizer,
+            embed_model=embed_model
+        )
+    ),
+)
+```
+
+#### Using Ollama Models
+
+Pull the model you plan to use and an embedding model, for example `nomic-embed-text`:
+
+```python
+import numpy as np
+from lightrag.utils import wrap_embedding_func_with_attrs
+from lightrag.llm.ollama import ollama_model_complete, ollama_embed
+
+@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=8192, model_name="nomic-embed-text")
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await ollama_embed.func(texts, embed_model="nomic-embed-text")
+
+# Initialize LightRAG with Ollama model
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=ollama_model_complete,
+    llm_model_name='your_model_name',
+    embedding_func=embedding_func,
+)
+```
+
+#### Increasing context size
+
+LightRAG requires at least 32k context tokens. Ollama defaults to 8k. Two approaches:
+
+*Approach 1: Edit Modelfile*
+
+```bash
+ollama pull qwen2
+ollama show --modelfile qwen2 > Modelfile
+# Add this line to Modelfile:
+# PARAMETER num_ctx 32768
+ollama create -f Modelfile qwen2m
+```
+
+*Approach 2: Set `num_ctx` via `llm_model_kwargs`*
+
+```python
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=ollama_model_complete,
+    llm_model_name='your_model_name',
+    llm_model_kwargs={"options": {"num_ctx": 32768}},
+    embedding_func=embedding_func,
+)
+```
+
+> **Important Note on Embedding Function Wrapping:**
+>
+> `EmbeddingFunc` cannot be nested. Use `xxx_embed.func` to access the underlying unwrapped function.
+
+**Low RAM GPUs**
+
+For low-RAM GPUs (e.g. 6GB), select a small model and tune the context window. For example, `gemma2:2b` with `num_ctx=26000` can find ~197 entities and 19 relations on `book.txt`.
+
+#### LlamaIndex
+
+LightRAG supports integration with LlamaIndex (`llm/llama_index_impl.py`):
+
+```python
+import asyncio
+from lightrag import LightRAG
+from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_index_embed
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+from lightrag.utils import setup_logger
+
+setup_logger("lightrag", level="INFO")
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir="your/path",
+        llm_model_func=llama_index_complete_if_cache,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=1536,
+            max_token_size=2048,
+            model_name=embed_model,
+            func=partial(llama_index_embed.func, embed_model=embed_model)
+        ),
+    )
+    await rag.initialize_storages()
+    return rag
+```
+
+**Further reading:**
+- [LlamaIndex Documentation](https://developers.llamaindex.ai/python/framework/)
+- [Direct OpenAI Example](examples/unofficial-sample/lightrag_llamaindex_direct_demo.py)
+- [LiteLLM Proxy Example](examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py)
+- [LiteLLM Proxy with Opik Example](examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py)
+
+#### Using Azure OpenAI Models
+
+```python
+import os
+import numpy as np
+from lightrag.utils import wrap_embedding_func_with_attrs
+from lightrag.llm.azure_openai import azure_openai_complete_if_cache, azure_openai_embed
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await azure_openai_complete_if_cache(
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+        deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
+        **kwargs
+    )
+
+@wrap_embedding_func_with_attrs(
+    embedding_dim=1536,
+    max_token_size=8192,
+    model_name=os.getenv("AZURE_OPENAI_EMBEDDING_MODEL")
+)
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await azure_openai_embed.func(
+        texts,
+        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+        deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
+    )
+
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=embedding_func
+)
+```
+
+#### Using Google Gemini Models
+
+```python
+import os
+import numpy as np
+from lightrag.utils import wrap_embedding_func_with_attrs
+from lightrag.llm.gemini import gemini_model_complete, gemini_embed
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await gemini_model_complete(
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("GEMINI_API_KEY"),
+        model_name="gemini-2.0-flash",
+        **kwargs
+    )
+
+@wrap_embedding_func_with_attrs(
+    embedding_dim=768,
+    max_token_size=2048,
+    model_name="models/text-embedding-004"
+)
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await gemini_embed.func(
+        texts,
+        api_key=os.getenv("GEMINI_API_KEY"),
+        model="models/text-embedding-004"
+    )
+
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    llm_model_name="gemini-2.0-flash",
+    embedding_func=embedding_func
+)
+```
+
+### Rerank Function Injection
+
+To enhance retrieval quality, documents can be re-ranked based on a more effective relevance scoring model. The `rerank.py` file provides three Reranker provider driver functions:
+
+- **Cohere / vLLM**: `cohere_rerank`
+- **Jina AI**: `jina_rerank`
+- **Aliyun**: `ali_rerank`
+
+Inject one of these functions into the `rerank_model_func` attribute of the LightRAG object. For detailed usage, refer to `examples/rerank_example.py`.
+
+### User Prompt vs. Query
+
+When using LightRAG for content queries, avoid combining the search process with unrelated output processing, as this significantly impacts query effectiveness. The `user_prompt` parameter in `QueryParam` does not participate in the RAG retrieval phase — it guides the LLM on how to process the retrieved results after the query is completed.
+
+```python
+query_param = QueryParam(
+    mode="hybrid",
+    user_prompt="For diagrams, use mermaid format with English/Pinyin node names and Chinese display labels",
+)
+
+response_default = rag.query(
+    "Please draw a character relationship diagram for Scrooge",
+    param=query_param
+)
+print(response_default)
+```
+
+
+## Storage Backends
+
+### Sotrage Types
+
+LightRAG uses 4 types of storage for different purposes:
+
+| Storage Type | Purpose |
+|---|---|
+| **KV_STORAGE** | LLM response cache, text chunks, document information |
+| **VECTOR_STORAGE** | Entity/relation/chunk embedding vectors |
+| **GRAPH_STORAGE** | Entity-relation graph structure |
+| **DOC_STATUS_STORAGE** | Document indexing status |
+
+### Supported Implementations
+
+**KV_STORAGE**
+```
+JsonKVStorage        JsonFile (default)
+PGKVStorage          Postgres
+RedisKVStorage       Redis
+MongoKVStorage       MongoDB
+OpenSearchKVStorage  OpenSearch
+```
+
+**GRAPH_STORAGE**
+```
+NetworkXStorage          NetworkX (default)
+Neo4JStorage             Neo4J
+PGGraphStorage           PostgreSQL with AGE plugin
+MemgraphStorage          Memgraph
+OpenSearchGraphStorage   OpenSearch
+```
+
+> Testing has shown that Neo4J delivers superior performance in production environments compared to PostgreSQL with AGE plugin.
+
+**VECTOR_STORAGE**
+```
+NanoVectorDBStorage         NanoVector (default)
+PGVectorStorage             Postgres
+MilvusVectorDBStorage       Milvus
+FaissVectorDBStorage        Faiss
+QdrantVectorDBStorage       Qdrant
+MongoVectorDBStorage        MongoDB
+OpenSearchVectorDBStorage   OpenSearch
+```
+
+**DOC_STATUS_STORAGE**
+```
+JsonDocStatusStorage        JsonFile (default)
+PGDocStatusStorage          Postgres
+MongoDocStatusStorage       MongoDB
+OpenSearchDocStatusStorage  OpenSearch
+```
+
+Example connection configurations for each storage type can be found in the repository's `env.example` file. The database instance in the connection string must be created beforehand — LightRAG only creates tables within the instance, not the instance itself.
+
+###  Backend-Specific Setup
+
+#### Using Neo4J Storage
+
+For production level scenarios you will most likely want to leverage an enterprise solution for KG storage. Running Neo4J in Docker is recommended for seamless local testing. See: https://hub.docker.com/_/neo4j
+
+```bash
+export NEO4J_URI="neo4j://localhost:7687"
+export NEO4J_USERNAME="neo4j"
+export NEO4J_PASSWORD="password"
+export NEO4J_DATABASE="neo4j"  # Required for community edition
+```
+
+```python
+from lightrag.utils import setup_logger
+
+setup_logger("lightrag", level="INFO")
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,
+        graph_storage="Neo4JStorage",
+    )
+    await rag.initialize_storages()
+    return rag
+```
+
+See `test_neo4j.py` for a working example.
+
+#### Using PostgreSQL Storage
+
+PostgreSQL can provide a one-stop solution as KV store, VectorDB (pgvector), and GraphDB (apache AGE). PostgreSQL version 16.6 or higher is supported.
+
+- PostgreSQL is lightweight; the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
+- If you prefer Docker, start with this image to avoid hiccups: https://hub.docker.com/r/gzdaniel/postgres-for-rag. The latest image no longer ships hardcoded credentials; on first start it creates the user, password, and database from the `POSTGRES_USER` / `POSTGRES_PASSWORD` / `POSTGRES_DB` environment variables (these are set automatically when you deploy via the `scripts/setup/setup.sh` wizard, so you can pick any values).
+- How to start: see [examples/lightrag_gemini_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_gemini_postgres_demo.py)
+- For high-performance graph database requirements, Neo4j is recommended as Apache AGE's performance is not as competitive.
+
+#### Using Faiss Storage
+
+Before using Faiss, manually install `faiss-cpu` or `faiss-gpu`:
+
+```bash
+pip install faiss-cpu
+```
+
+```python
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    embeddings = model.encode(texts, convert_to_numpy=True)
+    return embeddings
+
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=llm_model_func,
+    embedding_func=EmbeddingFunc(
+        embedding_dim=384,
+        max_token_size=2048,
+        model_name="all-MiniLM-L6-v2",
+        func=embedding_func,
+    ),
+    vector_storage="FaissVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "cosine_better_than_threshold": 0.3
+    }
+)
+```
+
+#### Using Memgraph for Storage
+
+Memgraph is a high-performance, in-memory graph database compatible with the Neo4j Bolt protocol. See: https://memgraph.com/download
+
+```bash
+export MEMGRAPH_URI="bolt://localhost:7687"
+```
+
+```python
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,
+        graph_storage="MemgraphStorage",
+    )
+    await rag.initialize_storages()
+    return rag
+```
+
+#### Using Milvus for Vector Storage
+
+Milvus is a high-performance, scalable vector database for production-level vector storage. For full configuration options including index types (HNSW, HNSW_SQ, IVF, DISKANN, etc.) and metric types, see [docs/MilvusConfigurationGuide.md](./MilvusConfigurationGuide.md).
+
+**Quick setup via environment variables:**
+
+```bash
+MILVUS_URI=http://localhost:19530
+MILVUS_DB_NAME=lightrag
+LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
+```
+
+**Quick setup via Python SDK:**
+
+```python
+rag = LightRAG(
+    working_dir="./rag_storage",
+    llm_model_func=...,
+    embedding_func=...,
+    vector_storage="MilvusVectorDBStorage",
+    vector_db_storage_cls_kwargs={
+        "milvus_uri": "http://localhost:19530",
+        "milvus_db_name": "lightrag",
+        "cosine_better_than_threshold": 0.2,
+    },
+)
+```
+
+#### Using MongoDB Storage
+
+MongoDB provides a one-stop storage solution for LightRAG with native KV storage and vector storage. LightRAG uses MongoDB collections to implement a simple graph storage.
+
+`MongoVectorDBStorage` requires a MongoDB deployment with Atlas Search / Vector Search support (e.g., MongoDB Atlas or Atlas local). The setup wizard's bundled local Docker MongoDB service is MongoDB Community Edition — it can be used for KV/graph/doc-status storage but **not** for `MongoVectorDBStorage`.
+
+#### Using Redis Storage
+
+LightRAG supports Redis as KV storage. Configure persistence and memory usage carefully. Recommended Redis configuration:
+
+```
+save 900 1
+save 300 10
+save 60 1000
+stop-writes-on-bgsave-error yes
+maxmemory 4gb
+maxmemory-policy noeviction
+maxclients 500
+```
+
+When the interactive setup manages a local Redis container, it stages a user-editable config at `./data/config/redis.conf` and mounts it into the container. Setup preserves that file on reruns so local Redis tuning can be adjusted without losing manual edits.
+
+#### Using OpenSearch Storage
+
+OpenSearch provides a unified storage solution for all four LightRAG storage types (KV, Vector, Graph, DocStatus). It offers native k-NN vector search, full-text search, and horizontal scalability without cloud-only restrictions.
+
+**Requirements**: OpenSearch 3.x or higher with k-NN plugin enabled.
+
+Install with Docker (without plugins):
+```bash
+docker run -d -p 9200:9200 -e "discovery.type=single-node" \
+  -e "OPENSEARCH_INITIAL_ADMIN_PASSWORD=<custom-admin-password>" \
+  opensearchproject/opensearch:latest
+```
+
+Install with Docker Compose (Recommended, with plugins):
+```bash
+curl -O https://raw.githubusercontent.com/opensearch-project/opensearch-build/main/docker/release/dockercomposefiles/docker-compose-3.x.yml
+OPENSEARCH_INITIAL_ADMIN_PASSWORD=<custom-admin-password> docker-compose -f docker-compose-3.x.yml up -d
+```
+
+**Configuration** (see `env.example` for full list):
+```bash
+export OPENSEARCH_HOSTS=localhost:9200
+export OPENSEARCH_USER=admin
+export OPENSEARCH_PASSWORD=<custom-admin-password>
+export OPENSEARCH_USE_SSL=true
+export OPENSEARCH_VERIFY_CERTS=false
+```
+
+**Usage**:
+```python
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=your_llm_func,
+    embedding_func=your_embed_func,
+    kv_storage="OpenSearchKVStorage",
+    doc_status_storage="OpenSearchDocStatusStorage",
+    graph_storage="OpenSearchGraphStorage",
+    vector_storage="OpenSearchVectorDBStorage",
+)
+```
+
+**Graph Traversal**: When the OpenSearch SQL plugin with PPL support is available, graph queries use server-side BFS via the `graphlookup` command for optimal performance. Otherwise, it falls back to client-side batched BFS. Auto-detected at startup, or force via `OPENSEARCH_USE_PPL_GRAPHLOOKUP=true|false`.
+
+**Integration Testing**:
+
+1. Start OpenSearch using Docker Compose:
+```bash
+OPENSEARCH_INITIAL_ADMIN_PASSWORD=<custom-admin-password> docker-compose -f docker-compose-3.x.yml up -d
+```
+
+2. Verify the cluster is running:
+```bash
+curl -sk -u admin:<custom-admin-password> https://localhost:9200
+curl -sk -u admin:<custom-admin-password> https://localhost:9200/_cat/plugins?v
+```
+
+3. Run unit tests (no OpenSearch required — uses mocks):
+```bash
+python -m pytest tests/kg/opensearch_impl/test_opensearch_storage.py -v
+```
+
+4. Run the OpenSearch storage demo:
+```bash
+export OPENSEARCH_HOSTS=localhost:9200
+export OPENSEARCH_USER=admin
+export OPENSEARCH_PASSWORD=<custom-admin-password>
+export OPENSEARCH_USE_SSL=true
+export OPENSEARCH_VERIFY_CERTS=false
+python examples/opensearch_storage_demo.py
+```
+
+5. Run the full OpenAI + OpenSearch demo (requires `OPENAI_API_KEY`):
+```bash
+export OPENAI_API_KEY=your-api-key
+python examples/lightrag_openai_opensearch_graph_demo.py
+```
+
+6. Visualize the knowledge graph via LightRAG WebUI:
+```bash
+LIGHTRAG_KV_STORAGE=OpenSearchKVStorage \
+LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage \
+LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage \
+LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage \
+LLM_BINDING=openai \
+EMBEDDING_BINDING=openai \
+EMBEDDING_MODEL=text-embedding-3-large \
+EMBEDDING_DIM=3072 \
+OPENAI_API_KEY=your-api-key \
+lightrag-server
+```
+
+
+## Data Isolation Between LightRAG Instances
+
+The `workspace` parameter ensures data isolation between different LightRAG instances. Once initialized, the `workspace` is immutable.
+
+| Storage Type | Isolation Method |
+|---|---|
+| `JsonKVStorage`, `JsonDocStatusStorage`, `NetworkXStorage`, `NanoVectorDBStorage`, `FaissVectorDBStorage` | Workspace subdirectories |
+| `RedisKVStorage`, `MilvusVectorDBStorage`, `MongoKVStorage`, `MongoVectorDBStorage`, `MongoGraphStorage`, `PGGraphStorage` | Workspace prefix on collection name |
+| `QdrantVectorDBStorage` | Payload-based partitioning (Qdrant multitenancy) |
+| `PGKVStorage`, `PGVectorStorage`, `PGDocStatusStorage` | `workspace` field in tables |
+| `Neo4JStorage` | Labels |
+| `OpenSearch*` | Index name prefixes |
+
+**Legacy compatibility**: Default workspace for PostgreSQL non-graph storage is `default`; for PostgreSQL AGE graph storage is null; for Neo4j graph storage is `base`.
+
+Storage-specific workspace environment variables override the common `WORKSPACE` variable: `REDIS_WORKSPACE`, `MILVUS_WORKSPACE`, `QDRANT_WORKSPACE`, `MONGODB_WORKSPACE`, `POSTGRES_WORKSPACE`, `NEO4J_WORKSPACE`, `OPENSEARCH_WORKSPACE`.
+
+For a practical demonstration of managing multiple isolated knowledge bases, see [Workspace Demo](examples/lightrag_gemini_workspace_demo.py).
+
+
+## Insert
+
+* Basic Insert
+
+```python
+rag.insert("Text")
+```
+
+* Batch Insert
+
+```python
+# Basic Batch Insert
+rag.insert(["TEXT1", "TEXT2", ...])
+
+# Batch Insert with custom batch size
+rag = LightRAG(
+    ...
+    working_dir=WORKING_DIR,
+    max_parallel_insert=4
+)
+rag.insert(["TEXT1", "TEXT2", "TEXT3", ...])  # Processed in batches of 4
+```
+
+The `max_parallel_insert` parameter determines the number of documents processed concurrently. Default is **2**. Recommended to keep **below 10**, as the bottleneck typically lies with the LLM.
+
+* Insert with ID
+
+The number of documents and IDs must be the same.
+
+```python
+# Single text with ID
+rag.insert("TEXT1", ids=["ID_FOR_TEXT1"])
+
+# Multiple texts with IDs
+rag.insert(["TEXT1", "TEXT2", ...], ids=["ID_FOR_TEXT1", "ID_FOR_TEXT2"])
+```
+
+* Insert using Pipeline
+
+`apipeline_enqueue_documents` and `apipeline_process_enqueue_documents` allow incremental insertion of documents in the background while the main thread continues executing.
+
+```python
+rag = LightRAG(..)
+await rag.apipeline_enqueue_documents(input)
+# Your routine in loop
+await rag.apipeline_process_enqueue_documents(input)
+```
+
+* Insert Multi-file Type Support
+
+The `textract` library supports reading TXT, DOCX, PPTX, CSV, and PDF:
+
+```python
+import textract
+
+file_path = 'TEXT.pdf'
+text_content = textract.process(file_path)
+rag.insert(text_content.decode('utf-8'))
+```
+
+* Citation Functionality
+
+By providing file paths, the system ensures sources can be traced back to their original documents:
+
+```python
+documents = ["Document content 1", "Document content 2"]
+file_paths = ["path/to/doc1.txt", "path/to/doc2.txt"]
+
+rag.insert(documents, file_paths=file_paths)
+```
+
+
+## Edit Entities and Relations
+
+LightRAG supports comprehensive knowledge graph management: create, edit, and delete entities and relationships.
+
+* Create Entities and Relations
+
+```python
+# Create entity
+entity = rag.create_entity("Google", {
+    "description": "Google is a multinational technology company specializing in internet-related services and products.",
+    "entity_type": "company"
+})
+
+product = rag.create_entity("Gmail", {
+    "description": "Gmail is an email service developed by Google.",
+    "entity_type": "product"
+})
+
+# Create relation
+relation = rag.create_relation("Google", "Gmail", {
+    "description": "Google develops and operates Gmail.",
+    "keywords": "develops operates service",
+    "weight": 2.0
+})
+```
+
+* Edit Entities and Relations
+
+```python
+# Edit entity attributes
+updated_entity = rag.edit_entity("Google", {
+    "description": "Google is a subsidiary of Alphabet Inc., founded in 1998.",
+    "entity_type": "tech_company"
+})
+
+# Rename entity (with all its relationships properly migrated)
+renamed_entity = rag.edit_entity("Gmail", {
+    "entity_name": "Google Mail",
+    "description": "Google Mail (formerly Gmail) is an email service."
+})
+
+# Edit relation
+updated_relation = rag.edit_relation("Google", "Google Mail", {
+    "description": "Google created and maintains Google Mail service.",
+    "keywords": "creates maintains email service",
+    "weight": 3.0
+})
+```
+
+All operations are available in both synchronous and asynchronous versions. Async versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
+
+* Insert Custom KG
+
+```python
+custom_kg = {
+    "chunks": [
+        {
+            "content": "Alice and Bob are collaborating on quantum computing research.",
+            "source_id": "doc-1",
+            "file_path": "test_file",
+        }
+    ],
+    "entities": [
+        {
+            "entity_name": "Alice",
+            "entity_type": "person",
+            "description": "Alice is a researcher specializing in quantum physics.",
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        },
+        {
+            "entity_name": "Bob",
+            "entity_type": "person",
+            "description": "Bob is a mathematician.",
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        },
+        {
+            "entity_name": "Quantum Computing",
+            "entity_type": "technology",
+            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        }
+    ],
+    "relationships": [
+        {
+            "src_id": "Alice",
+            "tgt_id": "Bob",
+            "description": "Alice and Bob are research partners.",
+            "keywords": "collaboration research",
+            "weight": 1.0,
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        },
+        {
+            "src_id": "Alice",
+            "tgt_id": "Quantum Computing",
+            "description": "Alice conducts research on quantum computing.",
+            "keywords": "research expertise",
+            "weight": 1.0,
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        },
+        {
+            "src_id": "Bob",
+            "tgt_id": "Quantum Computing",
+            "description": "Bob researches quantum computing.",
+            "keywords": "research application",
+            "weight": 1.0,
+            "source_id": "doc-1",
+            "file_path": "test_file"
+        }
+    ]
+}
+
+rag.insert_custom_kg(custom_kg)
+```
+
+* Other Entity and Relation Operations
+  - **create_entity**: Creates a new entity with specified attributes
+  - **edit_entity**: Updates an existing entity's attributes or renames it
+  - **create_relation**: Creates a new relation between existing entities
+  - **edit_relation**: Updates an existing relation's attributes
+
+These operations maintain data consistency across both the graph database and vector database components.
+
+
+## Delete Functions
+
+LightRAG provides comprehensive deletion capabilities.
+
+### Delete Entities
+
+```python
+# Synchronous
+rag.delete_by_entity("Google")
+
+# Asynchronous
+await rag.adelete_by_entity("Google")
+```
+
+When deleting an entity:
+- Removes the entity node from the knowledge graph
+- Deletes all associated relationships
+- Removes related embedding vectors from the vector database
+- Maintains knowledge graph integrity
+
+### Delete Relations
+
+```python
+# Synchronous
+rag.delete_by_relation("Google", "Gmail")
+
+# Asynchronous
+await rag.adelete_by_relation("Google", "Gmail")
+```
+
+When deleting a relationship:
+- Removes the specified relationship edge
+- Deletes the relationship's embedding vector
+- Preserves both entity nodes and their other relationships
+
+### Delete by Document ID
+
+```python
+# Asynchronous only (complex reconstruction process)
+await rag.adelete_by_doc_id("doc-12345")
+```
+
+The deletion process:
+1. Delete all text chunks related to the document
+2. Identify and delete entities/relationships that belong only to this document
+3. Rebuild entities/relationships that still exist in other documents
+4. Update all related vector indexes
+5. Clean up document status records
+
+**Important Reminders:**
+1. All deletion operations are **irreversible** — use with caution
+2. Deleting large amounts of data may take time, especially deletion by document ID
+3. Deletion operations automatically maintain consistency between the graph and vector databases
+4. Consider backing up data before performing important deletions
+
+
+## Entity Merging
+
+**Merge Entities and Their Relationships**
+
+```python
+# Basic merge
+rag.merge_entities(
+    source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
+    target_entity="AI Technology"
+)
+
+# With custom merge strategy
+rag.merge_entities(
+    source_entities=["John Smith", "Dr. Smith", "J. Smith"],
+    target_entity="John Smith",
+    merge_strategy={
+        "description": "concatenate",  # Combine all descriptions
+        "entity_type": "keep_first",   # Keep the type from the first entity
+        "source_id": "join_unique"     # Combine all unique source IDs
+    }
+)
+
+# With custom target entity data
+rag.merge_entities(
+    source_entities=["New York", "NYC", "Big Apple"],
+    target_entity="New York City",
+    target_entity_data={
+        "entity_type": "LOCATION",
+        "description": "New York City is the most populous city in the United States.",
+    }
+)
+
+# Advanced: combining both strategy and custom data
+rag.merge_entities(
+    source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
+    target_entity="Microsoft",
+    merge_strategy={
+        "description": "concatenate",
+        "source_id": "join_unique"
+    },
+    target_entity_data={
+        "entity_type": "ORGANIZATION",
+    }
+)
+```
+
+When merging entities:
+- All relationships from source entities are redirected to the target entity
+- Duplicate relationships are intelligently merged
+- Self-relationships (loops) are prevented
+- Source entities are removed after merging
+- Relationship weights and attributes are preserved
+
+
+## Troubleshooting
+
+### Common Initialization Errors
+
+1. **`AttributeError: __aenter__`**
+   - **Cause**: Storage backends not initialized
+   - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance
+
+2. **`KeyError: 'history_messages'`**
+   - **Cause**: Pipeline status not initialized
+   - **Solution**: Call `await rag.initialize_storages()` after creating the LightRAG instance
+
+3. **Both errors in sequence**
+   - **Solution**: Always follow this pattern:
+   ```python
+   rag = LightRAG(...)
+   await rag.initialize_storages()
+   ```
+
+### Model Switching Issues
+
+When switching between different embedding models, you must clear the data directory to avoid errors. The only file you may want to preserve is `kv_store_llm_response_cache.json` if you wish to retain the LLM cache.

+ 235 - 0
docs/Reproduce.md

@@ -0,0 +1,235 @@
+# Evaluation Result Reproduce
+
+## Dataset
+
+The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain).
+
+## Generate Query
+
+LightRAG uses the following prompt to generate high-level queries, with the corresponding code in `examples/generate_query.py`.
+
+**Prompt**
+
+```
+Given the following description of a dataset:
+
+{description}
+
+Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+Output the results in the following structure:
+- User 1: [user description]
+    - Task 1: [task description]
+        - Question 1:
+        - Question 2:
+        - Question 3:
+        - Question 4:
+        - Question 5:
+    - Task 2: [task description]
+        ...
+    - Task 5: [task description]
+- User 2: [user description]
+    ...
+- User 5: [user description]
+    ...
+```
+
+## Batch Eval
+
+To evaluate the performance of two RAG systems on high-level queries, LightRAG uses the following prompt, with the specific code available in `reproduce/batch_eval.py`.
+
+**Prompt**
+
+```
+---Role---
+You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+---Goal---
+You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
+
+- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
+- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
+- **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
+
+For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
+
+Here is the question:
+{query}
+
+Here are the two answers:
+
+**Answer 1:**
+{answer1}
+
+**Answer 2:**
+{answer2}
+
+Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
+
+Output your evaluation in the following JSON format:
+
+{{
+    "Comprehensiveness": {{
+        "Winner": "[Answer 1 or Answer 2]",
+        "Explanation": "[Provide explanation here]"
+    }},
+    "Empowerment": {{
+        "Winner": "[Answer 1 or Answer 2]",
+        "Explanation": "[Provide explanation here]"
+    }},
+    "Overall Winner": {{
+        "Winner": "[Answer 1 or Answer 2]",
+        "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
+    }}
+}}
+```
+
+## Overall Performance Table
+
+||**Agriculture**||**CS**||**Legal**||**Mix**||
+|----------------------|---------------|------------|------|------------|---------|------------|-------|------------|
+||NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|NaiveRAG|**LightRAG**|
+|**Comprehensiveness**|32.4%|**67.6%**|38.4%|**61.6%**|16.4%|**83.6%**|38.8%|**61.2%**|
+|**Diversity**|23.6%|**76.4%**|38.0%|**62.0%**|13.6%|**86.4%**|32.4%|**67.6%**|
+|**Empowerment**|32.4%|**67.6%**|38.8%|**61.2%**|16.4%|**83.6%**|42.8%|**57.2%**|
+|**Overall**|32.4%|**67.6%**|38.8%|**61.2%**|15.2%|**84.8%**|40.0%|**60.0%**|
+||RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|RQ-RAG|**LightRAG**|
+|**Comprehensiveness**|31.6%|**68.4%**|38.8%|**61.2%**|15.2%|**84.8%**|39.2%|**60.8%**|
+|**Diversity**|29.2%|**70.8%**|39.2%|**60.8%**|11.6%|**88.4%**|30.8%|**69.2%**|
+|**Empowerment**|31.6%|**68.4%**|36.4%|**63.6%**|15.2%|**84.8%**|42.4%|**57.6%**|
+|**Overall**|32.4%|**67.6%**|38.0%|**62.0%**|14.4%|**85.6%**|40.0%|**60.0%**|
+||HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|HyDE|**LightRAG**|
+|**Comprehensiveness**|26.0%|**74.0%**|41.6%|**58.4%**|26.8%|**73.2%**|40.4%|**59.6%**|
+|**Diversity**|24.0%|**76.0%**|38.8%|**61.2%**|20.0%|**80.0%**|32.4%|**67.6%**|
+|**Empowerment**|25.2%|**74.8%**|40.8%|**59.2%**|26.0%|**74.0%**|46.0%|**54.0%**|
+|**Overall**|24.8%|**75.2%**|41.6%|**58.4%**|26.4%|**73.6%**|42.4%|**57.6%**|
+||GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|GraphRAG|**LightRAG**|
+|**Comprehensiveness**|45.6%|**54.4%**|48.4%|**51.6%**|48.4%|**51.6%**|**50.4%**|49.6%|
+|**Diversity**|22.8%|**77.2%**|40.8%|**59.2%**|26.4%|**73.6%**|36.0%|**64.0%**|
+|**Empowerment**|41.2%|**58.8%**|45.2%|**54.8%**|43.6%|**56.4%**|**50.8%**|49.2%|
+|**Overall**|45.2%|**54.8%**|48.0%|**52.0%**|47.2%|**52.8%**|**50.4%**|49.6%|
+
+## Reproduce
+
+All the code can be found in the `./reproduce` directory.
+
+### Step-0 Extract Unique Contexts
+
+First, extract unique contexts from the datasets.
+
+**Code**
+
+```python
+def extract_unique_contexts(input_directory, output_directory):
+
+    os.makedirs(output_directory, exist_ok=True)
+
+    jsonl_files = glob.glob(os.path.join(input_directory, '*.jsonl'))
+    print(f"Found {len(jsonl_files)} JSONL files.")
+
+    for file_path in jsonl_files:
+        filename = os.path.basename(file_path)
+        name, ext = os.path.splitext(filename)
+        output_filename = f"{name}_unique_contexts.json"
+        output_path = os.path.join(output_directory, output_filename)
+
+        unique_contexts_dict = {}
+
+        print(f"Processing file: {filename}")
+
+        try:
+            with open(file_path, 'r', encoding='utf-8') as infile:
+                for line_number, line in enumerate(infile, start=1):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        json_obj = json.loads(line)
+                        context = json_obj.get('context')
+                        if context and context not in unique_contexts_dict:
+                            unique_contexts_dict[context] = None
+                    except json.JSONDecodeError as e:
+                        print(f"JSON decoding error in file {filename} at line {line_number}: {e}")
+        except FileNotFoundError:
+            print(f"File not found: {filename}")
+            continue
+        except Exception as e:
+            print(f"An error occurred while processing file {filename}: {e}")
+            continue
+
+        unique_contexts_list = list(unique_contexts_dict.keys())
+        print(f"There are {len(unique_contexts_list)} unique `context` entries in the file {filename}.")
+
+        try:
+            with open(output_path, 'w', encoding='utf-8') as outfile:
+                json.dump(unique_contexts_list, outfile, ensure_ascii=False, indent=4)
+            print(f"Unique `context` entries have been saved to: {output_filename}")
+        except Exception as e:
+            print(f"An error occurred while saving to the file {output_filename}: {e}")
+
+    print("All files have been processed.")
+```
+
+### Step-1 Insert Contexts
+
+Insert the extracted contexts into the LightRAG system.
+
+**Code**
+
+```python
+def insert_text(rag, file_path):
+    with open(file_path, mode='r') as f:
+        unique_contexts = json.load(f)
+
+    retries = 0
+    max_retries = 3
+    while retries < max_retries:
+        try:
+            rag.insert(unique_contexts)
+            break
+        except Exception as e:
+            retries += 1
+            print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
+            time.sleep(10)
+    if retries == max_retries:
+        print("Insertion failed after exceeding the maximum number of retries")
+```
+
+### Step-2 Generate Queries
+
+Extract tokens from the first and second half of each context, then combine them as dataset descriptions to generate queries.
+
+**Code**
+
+```python
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+
+def get_summary(context, tot_tokens=2000):
+    tokens = tokenizer.tokenize(context)
+    half_tokens = tot_tokens // 2
+
+    start_tokens = tokens[1000:1000 + half_tokens]
+    end_tokens = tokens[-(1000 + half_tokens):1000]
+
+    summary_tokens = start_tokens + end_tokens
+    summary = tokenizer.convert_tokens_to_string(summary_tokens)
+
+    return summary
+```
+
+### Step-3 Query
+
+Extract and query LightRAG with the queries generated in Step-2.
+
+**Code**
+
+```python
+def extract_queries(file_path):
+    with open(file_path, 'r') as f:
+        data = f.read()
+
+    data = data.replace('**', '')
+
+    queries = re.findall(r'- Question \d+: (.+)', data)
+
+    return queries
+```

+ 376 - 0
docs/RoleSpecificLLMConfiguration-zh.md

@@ -0,0 +1,376 @@
+# 基于角色的 LLM/VLM 配置指南
+
+LightRAG 支持为不同处理阶段配置不同的 LLM 或 VLM。这个机制适合把低成本模型用于抽取,把更强模型用于最终回答,或为多模态分析单独指定视觉语言模型。
+
+## 角色说明
+
+当前支持四个角色:
+
+| 角色 | 用途 |
+| --- | --- |
+| `EXTRACT` | 实体/关系抽取,以及实体/关系描述摘要。 |
+| `KEYWORD` | 查询关键词抽取,用于检索前的 high-level / low-level keyword 生成。 |
+| `QUERY` | 最终问答、普通查询、bypass 查询,以及 Ollama-compatible API 的查询路径。 |
+| `VLM` | 多模态分析阶段,用于图片、表格、公式等内容的 VLM 分析。 |
+
+如果某个角色没有专门配置,LightRAG 会使用基础 `LLM_*` 配置。
+
+## 基础 LLM 配置
+
+基础配置定义默认 LLM provider、模型、服务地址、认证信息和并发控制:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+# 所有 LLM 请求的默认超时时间
+LLM_TIMEOUT=180
+
+# 所有 LLM 调用的默认最大并发数
+MAX_ASYNC=4
+```
+
+常用字段:
+
+| 变量 | 说明 |
+| --- | --- |
+| `LLM_BINDING` | 基础 LLM provider。支持 `openai`、`ollama`、`lollms`、`azure_openai`、`bedrock`、`gemini`。 |
+| `LLM_MODEL` | 基础模型名。对 Azure OpenAI 通常使用 deployment 名称。 |
+| `LLM_BINDING_HOST` | 基础 provider endpoint。对于 SDK 默认 endpoint,可使用对应 sentinel,例如 `DEFAULT_GEMINI_ENDPOINT` 或 `DEFAULT_BEDROCK_ENDPOINT`。 |
+| `LLM_BINDING_API_KEY` | 基础 API key。Bedrock 不使用这个字段。 |
+| `LLM_TIMEOUT` | 基础 LLM timeout。角色未设置 timeout 时继承它。 |
+| `MAX_ASYNC` | 基础 LLM 最大并发。角色未设置 `{ROLE}_MAX_ASYNC_LLM` 时继承它。 |
+
+## 角色覆盖变量
+
+每个角色都可以覆盖 binding、模型、endpoint、API key、并发和 timeout:
+
+```env
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_query_api_key
+QUERY_MAX_ASYNC_LLM=2
+QUERY_LLM_TIMEOUT=240
+```
+
+变量格式:
+
+| 变量 | 说明 |
+| --- | --- |
+| `{ROLE}_LLM_BINDING` | 覆盖角色 provider。`ROLE` 可为 `EXTRACT`、`KEYWORD`、`QUERY`、`VLM`。 |
+| `{ROLE}_LLM_MODEL` | 覆盖角色模型名。 |
+| `{ROLE}_LLM_BINDING_HOST` | 覆盖角色 endpoint。 |
+| `{ROLE}_LLM_BINDING_API_KEY` | 覆盖角色 API key。Bedrock 不支持。 |
+| `{ROLE}_MAX_ASYNC_LLM` | 覆盖角色最大并发。未设置时继承 `MAX_ASYNC`。 |
+| `{ROLE}_LLM_TIMEOUT` | 覆盖角色 timeout。未设置时继承 `LLM_TIMEOUT`。 |
+
+## Provider 参数覆盖
+
+provider 细项使用下面的格式:
+
+```env
+{ROLE}_{PROVIDER_PREFIX}_{FIELD}
+```
+
+例如:
+
+```env
+# 只覆盖 QUERY 角色的 OpenAI reasoning effort
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+
+# 只覆盖 EXTRACT 角色的 Bedrock 生成参数
+EXTRACT_BEDROCK_LLM_TEMPERATURE=0.0
+EXTRACT_BEDROCK_LLM_MAX_TOKENS=2048
+
+# 只覆盖 VLM 角色的 Gemini 生成参数
+VLM_GEMINI_LLM_MAX_OUTPUT_TOKENS=4096
+VLM_GEMINI_LLM_TEMPERATURE=0.2
+```
+
+常见 provider 前缀:
+
+| Provider | 基础参数前缀 | 角色参数示例 |
+| --- | --- | --- |
+| `openai` / `azure_openai` | `OPENAI_LLM_*` | `QUERY_OPENAI_LLM_REASONING_EFFORT` |
+| `ollama` | `OLLAMA_LLM_*` | `EXTRACT_OLLAMA_LLM_NUM_PREDICT` |
+| `lollms` | 使用 Ollama 兼容参数集合 | `QUERY_OLLAMA_LLM_TEMPERATURE` |
+| `bedrock` | `BEDROCK_LLM_*` | `EXTRACT_BEDROCK_LLM_MAX_TOKENS` |
+| `gemini` | `GEMINI_LLM_*` | `VLM_GEMINI_LLM_THINKING_CONFIG` |
+
+## 继承规则
+
+### 同一个 provider 内覆盖
+
+如果角色没有设置 `{ROLE}_LLM_BINDING`,或设置成与基础 `LLM_BINDING` 相同,角色会继承基础配置:
+
+- 未设置 `{ROLE}_LLM_MODEL` 时继承 `LLM_MODEL`。
+- 未设置 `{ROLE}_LLM_BINDING_HOST` 时继承 `LLM_BINDING_HOST`。
+- 未设置 `{ROLE}_LLM_BINDING_API_KEY` 时继承 `LLM_BINDING_API_KEY`。
+- 未设置 `{ROLE}_LLM_TIMEOUT` 时继承 `LLM_TIMEOUT`。
+- 未设置 `{ROLE}_MAX_ASYNC_LLM` 时继承 `MAX_ASYNC`。
+- provider 参数先继承基础 provider options,再叠加角色专属 provider options。
+
+因此,同一个 provider 下只想换模型时,只需要写模型名:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+
+# QUERY 继承 host、API key、timeout、并发和 OPENAI_LLM_REASONING_EFFORT
+QUERY_LLM_MODEL=gpt-5
+```
+
+### 跨 provider 覆盖
+
+如果角色的 `{ROLE}_LLM_BINDING` 与基础 `LLM_BINDING` 不同,就是跨 provider 配置。当前规则是:
+
+- 必须设置 `{ROLE}_LLM_MODEL`。
+- 非 Bedrock provider 必须设置 `{ROLE}_LLM_BINDING_API_KEY`。
+- 如果没有设置 `{ROLE}_LLM_BINDING_HOST`,LightRAG 会尝试使用该 provider 的默认 host。
+- provider 参数不继承基础 provider options,而是从空配置开始,只叠加角色专属 provider options。
+
+示例:基础使用 Ollama,本地抽取;最终回答改用 OpenAI:
+
+```env
+LLM_BINDING=ollama
+LLM_MODEL=qwen3.5:9b
+LLM_BINDING_HOST=http://localhost:11434
+OLLAMA_LLM_NUM_CTX=32768
+
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5-mini
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_openai_api_key
+QUERY_OPENAI_LLM_REASONING_EFFORT=minimal
+```
+
+跨 provider 时建议显式设置 `{ROLE}_LLM_BINDING_HOST`,避免默认 host 与基础 provider 的 endpoint 混淆。
+
+### Bedrock 认证规则
+
+Bedrock 不使用 `LLM_BINDING_API_KEY`,也不支持 `{ROLE}_LLM_BINDING_API_KEY`。可用认证方式:
+
+- 全局 SigV4:`AWS_ACCESS_KEY_ID`、`AWS_SECRET_ACCESS_KEY`、`AWS_SESSION_TOKEN`、`AWS_REGION`。
+- 角色级 SigV4:`{ROLE}_AWS_ACCESS_KEY_ID`、`{ROLE}_AWS_SECRET_ACCESS_KEY`、`{ROLE}_AWS_SESSION_TOKEN`、`{ROLE}_AWS_REGION`。
+- 进程级 bearer token:`AWS_BEARER_TOKEN_BEDROCK`。这是 AWS SDK 进程级设置,不能按角色覆盖。
+
+角色级 Bedrock 示例:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+
+EXTRACT_LLM_BINDING=bedrock
+EXTRACT_LLM_MODEL=us.amazon.nova-lite-v1:0
+EXTRACT_LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+EXTRACT_AWS_REGION=us-west-2
+EXTRACT_AWS_ACCESS_KEY_ID=your_extract_access_key
+EXTRACT_AWS_SECRET_ACCESS_KEY=your_extract_secret_key
+EXTRACT_AWS_SESSION_TOKEN=your_optional_session_token
+EXTRACT_BEDROCK_LLM_TEMPERATURE=0.0
+EXTRACT_BEDROCK_LLM_MAX_TOKENS=2048
+```
+
+## Provider 行为对照
+
+| Provider | 角色级 host/base_url | 角色级 API key | 认证限制 |
+| --- | --- | --- | --- |
+| `openai` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 传给 OpenAI-compatible client。 | 支持 `{ROLE}_LLM_BINDING_API_KEY`,未设置时同 provider 继承基础 `LLM_BINDING_API_KEY`。 | 当前主要是 API key / Bearer 模式。 |
+| `ollama` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 传给 Ollama client。 | 支持 `{ROLE}_LLM_BINDING_API_KEY`,未设置时同 provider 继承基础 key;底层未收到 key 时会再回退 `OLLAMA_API_KEY`。 | Bearer header。 |
+| `lollms` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 作为 `base_url`。 | 支持 `{ROLE}_LLM_BINDING_API_KEY`,未设置时同 provider 继承基础 key。 | Bearer header。 |
+| `azure_openai` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 作为 Azure endpoint。 | 支持 `{ROLE}_LLM_BINDING_API_KEY`,未设置时同 provider 继承基础 key,也可能回退 `AZURE_OPENAI_API_KEY`。 | `AZURE_OPENAI_API_VERSION` 是全局环境变量,不支持角色级覆盖。 |
+| `bedrock` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 作为 `endpoint_url`;`DEFAULT_BEDROCK_ENDPOINT` 表示交给 AWS SDK 选择。 | 不支持 generic API key。 | 使用全局或角色级 SigV4。`AWS_BEARER_TOKEN_BEDROCK` 是进程级,不能按角色覆盖。 |
+| `gemini` | 支持,通过 `{ROLE}_LLM_BINDING_HOST` 传给 Google GenAI client;`DEFAULT_GEMINI_ENDPOINT` 表示使用 SDK 默认 endpoint。 | AI Studio 模式支持 `{ROLE}_LLM_BINDING_API_KEY`。 | Vertex AI 由 `GOOGLE_GENAI_USE_VERTEXAI`、`GOOGLE_CLOUD_PROJECT`、`GOOGLE_CLOUD_LOCATION`、`GOOGLE_APPLICATION_CREDENTIALS` 控制,都是进程级设置。 |
+
+## 推荐配置模式
+
+### 1. 同 provider 只更换模型
+
+适合用同一个 OpenAI key 和 endpoint,但让最终回答使用更强模型:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+
+QUERY_LLM_MODEL=gpt-5
+QUERY_MAX_ASYNC_LLM=2
+```
+
+`QUERY` 会继承基础 host、API key 和 `OPENAI_LLM_REASONING_EFFORT`。
+
+### 2. 同 provider 更换模型并调整参数
+
+适合基础模型用于抽取,最终回答使用更高 reasoning effort:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+OPENAI_LLM_MAX_COMPLETION_TOKENS=4096
+
+QUERY_LLM_MODEL=gpt-5
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+QUERY_OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+QUERY_LLM_TIMEOUT=240
+```
+
+### 3. 同 provider 使用不同 endpoint 和 API key
+
+适合所有角色都走 `openai` binding,但其中一些角色访问 OpenAI 官方接口,另一些角色访问本地 vLLM、SGLang 或 OpenRouter 等 OpenAI-compatible endpoint。下面的例子中:
+
+- `EXTRACT` 使用 OpenAI 官方 `gpt-5-mini`。
+- `QUERY` 使用 OpenAI 官方 `gpt-5.4`,并使用单独的 OpenAI key。
+- `KEYWORD` 使用本地 vLLM 部署的 `Qwen3.5-35B-A3B`。
+
+```env
+###########################################################################
+# Base LLM fallback. Keep it aligned with EXTRACT so unspecified roles still
+# have a valid OpenAI configuration.
+###########################################################################
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_extract_openai_api_key
+LLM_TIMEOUT=180
+MAX_ASYNC=4
+
+###########################################################################
+# IMPORTANT:
+# Do not set global OPENAI_LLM_REASONING_EFFORT here if any same-provider role
+# points to a local OpenAI-compatible server that does not support it.
+# Use role-specific OPENAI options instead.
+###########################################################################
+# OPENAI_LLM_REASONING_EFFORT=none
+
+###########################################################################
+# EXTRACT: OpenAI official API, gpt-5-mini
+###########################################################################
+EXTRACT_LLM_BINDING=openai
+EXTRACT_LLM_MODEL=gpt-5-mini
+EXTRACT_LLM_BINDING_HOST=https://api.openai.com/v1
+EXTRACT_LLM_BINDING_API_KEY=your_extract_openai_api_key
+EXTRACT_OPENAI_LLM_REASONING_EFFORT=low
+EXTRACT_OPENAI_LLM_MAX_COMPLETION_TOKENS=4096
+EXTRACT_MAX_ASYNC_LLM=4
+EXTRACT_LLM_TIMEOUT=180
+
+###########################################################################
+# QUERY: OpenAI official API, gpt-5.4, separate API key
+###########################################################################
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5.4
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_query_openai_api_key
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+QUERY_OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+QUERY_MAX_ASYNC_LLM=2
+QUERY_LLM_TIMEOUT=240
+
+###########################################################################
+# KEYWORD: local vLLM OpenAI-compatible endpoint, Qwen3.5-35B-A3B
+###########################################################################
+KEYWORD_LLM_BINDING=openai
+KEYWORD_LLM_MODEL=Qwen3.5-35B-A3B
+KEYWORD_LLM_BINDING_HOST=http://localhost:8000/v1
+# If vLLM was started with --api-key, use the same value here.
+# If vLLM has no auth, still set a non-empty dummy value to avoid falling
+# back to the official OpenAI key.
+KEYWORD_LLM_BINDING_API_KEY=local-vllm-api-key
+KEYWORD_OPENAI_LLM_MAX_TOKENS=2048
+# Optional for Qwen-style models served by vLLM when you want to disable thinking.
+KEYWORD_OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+KEYWORD_MAX_ASYNC_LLM=4
+KEYWORD_LLM_TIMEOUT=180
+```
+
+这个模式不是跨 provider,因为三个角色的 binding 都是 `openai`。LightRAG 会分别把每个角色的 `*_LLM_BINDING_HOST` 和 `*_LLM_BINDING_API_KEY` 传给 OpenAI-compatible client。
+
+注意:同 provider 的 provider options 会继承基础 `OPENAI_LLM_*`。如果本地 vLLM 不支持 OpenAI 官方参数,例如 `reasoning_effort`,不要设置全局 `OPENAI_LLM_REASONING_EFFORT`;改用 `EXTRACT_OPENAI_LLM_REASONING_EFFORT`、`QUERY_OPENAI_LLM_REASONING_EFFORT` 这类角色级变量。
+
+### 4. 某个角色跨 provider
+
+适合基础使用 OpenAI 官方模型,只有关键词抽取使用本地 Ollama:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+OPENAI_LLM_REASONING_EFFORT=medium
+
+KEYWORD_LLM_BINDING=ollama
+KEYWORD_LLM_MODEL=qwen3.5:9b
+KEYWORD_LLM_BINDING_HOST=http://localhost:11434
+KEYWORD_LLM_BINDING_API_KEY=ollama-local-key
+KEYWORD_OLLAMA_LLM_NUM_CTX=32768
+```
+
+跨 provider 时,Ollama 参数不会继承 OpenAI 参数。`KEYWORD_LLM_BINDING_API_KEY` 对本地 Ollama 通常可以使用占位值;当前跨 provider 校验会要求非 Bedrock 角色显式提供角色级 API key。
+
+### 5. 为 VLM 单独指定多模态模型
+
+适合文本任务使用便宜模型,多模态分析使用视觉语言模型:
+
+```env
+VLM_PROCESS_ENABLE=true
+
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+VLM_LLM_BINDING=openai
+VLM_LLM_MODEL=gpt-4o
+VLM_OPENAI_LLM_MAX_TOKENS=4096
+VLM_MAX_ASYNC_LLM=2
+VLM_LLM_TIMEOUT=240
+```
+
+如果 VLM 使用同一个 provider 和 key,可以省略 `VLM_LLM_BINDING_HOST` 与 `VLM_LLM_BINDING_API_KEY`。
+
+`VLM_PROCESS_ENABLE` 是多模态分析的总开关:设为 `false` 时,pipeline 会对每个多模态 item 输出 warning 并跳过,不调用 VLM;设为 `true` 时,生效的 VLM binding(设置了 `VLM_LLM_BINDING` 时取该值,否则取 `LLM_BINDING`)必须支持图片输入。当前支持视觉输入的 provider 包括:`openai`、`azure_openai`、`gemini`、`bedrock`、`ollama`、`anthropic`。`lollms` 无法接收图片输入,会在启动时直接报错。
+
+### 6. Bedrock 角色级 SigV4 凭证
+
+适合只有某个角色访问 Bedrock,并使用独立 IAM/STS 凭证:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+
+QUERY_LLM_BINDING=bedrock
+QUERY_LLM_MODEL=us.amazon.nova-lite-v1:0
+QUERY_LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+QUERY_AWS_REGION=us-east-1
+QUERY_AWS_ACCESS_KEY_ID=your_query_access_key
+QUERY_AWS_SECRET_ACCESS_KEY=your_query_secret_key
+QUERY_AWS_SESSION_TOKEN=your_optional_session_token
+QUERY_BEDROCK_LLM_MAX_TOKENS=4096
+QUERY_BEDROCK_LLM_TEMPERATURE=0.2
+```
+
+不要设置 `QUERY_LLM_BINDING_API_KEY`,Bedrock 会拒绝该配置。
+
+## 注意事项
+
+- 同 provider 下,`OPENAI_LLM_REASONING_EFFORT`、`OPENAI_LLM_MAX_TOKENS`、`OLLAMA_LLM_NUM_CTX`、`GEMINI_LLM_THINKING_CONFIG` 等 provider 参数会自动继承。
+- 当前没有干净的角色级“取消继承某个 provider 参数”的语义。如果某个同 provider 角色模型不支持基础参数,需要为该角色显式覆盖为可用值,或将它配置成跨 provider,并且只设置该角色支持的 provider 参数。
+- `azure_openai` 的 `AZURE_OPENAI_DEPLOYMENT` 和 `AZURE_OPENAI_API_VERSION` 是全局环境变量。若设置了 `AZURE_OPENAI_DEPLOYMENT`,它可能优先于角色模型名。
+- Gemini Vertex AI 模式由进程级 Google 环境变量控制,不能在同一个 LightRAG 进程里让某些角色使用 Vertex AI、另一些角色使用 AI Studio API key。
+- `LLM_BINDING_HOST` 在 Docker/Compose 中通常需要使用容器可访问地址,例如 `host.docker.internal`,角色级 host 也遵循相同原则。
+- 修改 `.env` 后请重启 LightRAG Server。部分 IDE 终端会预加载 `.env`,建议打开新的终端会话确认环境变量生效。

+ 376 - 0
docs/RoleSpecificLLMConfiguration.md

@@ -0,0 +1,376 @@
+# Role-Specific LLM/VLM Configuration Guide
+
+LightRAG supports configuring different LLMs or VLMs for different processing stages. This mechanism is useful when using a lower-cost model for extraction, a stronger model for final answers, or a dedicated vision-language model for multimodal analysis.
+
+## Role Overview
+
+Four roles are currently supported:
+
+| Role | Purpose |
+| --- | --- |
+| `EXTRACT` | Entity/relation extraction and entity/relation description summarization. |
+| `KEYWORD` | Query keyword extraction for high-level / low-level keyword generation before retrieval. |
+| `QUERY` | Final QA, regular queries, bypass queries, and the query path of the Ollama-compatible API. |
+| `VLM` | Multimodal analysis stage for VLM analysis of images, tables, formulas, and similar content. |
+
+If a role has no dedicated configuration, LightRAG uses the base `LLM_*` configuration.
+
+## Base LLM Configuration
+
+The base configuration defines the default LLM provider, model, service endpoint, authentication information, and concurrency control:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+# Default timeout for all LLM requests
+LLM_TIMEOUT=180
+
+# Default maximum concurrency for all LLM calls
+MAX_ASYNC=4
+```
+
+Common fields:
+
+| Variable | Description |
+| --- | --- |
+| `LLM_BINDING` | Base LLM provider. Supported values are `openai`, `ollama`, `lollms`, `azure_openai`, `bedrock`, and `gemini`. |
+| `LLM_MODEL` | Base model name. For Azure OpenAI, this is usually the deployment name. |
+| `LLM_BINDING_HOST` | Base provider endpoint. For SDK default endpoints, use the corresponding sentinel, such as `DEFAULT_GEMINI_ENDPOINT` or `DEFAULT_BEDROCK_ENDPOINT`. |
+| `LLM_BINDING_API_KEY` | Base API key. Bedrock does not use this field. |
+| `LLM_TIMEOUT` | Base LLM timeout. A role inherits it when no role timeout is set. |
+| `MAX_ASYNC` | Base maximum LLM concurrency. A role inherits it when `{ROLE}_MAX_ASYNC_LLM` is not set. |
+
+## Role Override Variables
+
+Each role can override the binding, model, endpoint, API key, concurrency, and timeout:
+
+```env
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_query_api_key
+QUERY_MAX_ASYNC_LLM=2
+QUERY_LLM_TIMEOUT=240
+```
+
+Variable format:
+
+| Variable | Description |
+| --- | --- |
+| `{ROLE}_LLM_BINDING` | Overrides the role provider. `ROLE` can be `EXTRACT`, `KEYWORD`, `QUERY`, or `VLM`. |
+| `{ROLE}_LLM_MODEL` | Overrides the role model name. |
+| `{ROLE}_LLM_BINDING_HOST` | Overrides the role endpoint. |
+| `{ROLE}_LLM_BINDING_API_KEY` | Overrides the role API key. Bedrock does not support it. |
+| `{ROLE}_MAX_ASYNC_LLM` | Overrides the role maximum concurrency. Inherits `MAX_ASYNC` when unset. |
+| `{ROLE}_LLM_TIMEOUT` | Overrides the role timeout. Inherits `LLM_TIMEOUT` when unset. |
+
+## Provider Option Overrides
+
+Provider-specific options use the following format:
+
+```env
+{ROLE}_{PROVIDER_PREFIX}_{FIELD}
+```
+
+Examples:
+
+```env
+# Override only the OpenAI reasoning effort for the QUERY role
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+
+# Override only Bedrock generation parameters for the EXTRACT role
+EXTRACT_BEDROCK_LLM_TEMPERATURE=0.0
+EXTRACT_BEDROCK_LLM_MAX_TOKENS=2048
+
+# Override only Gemini generation parameters for the VLM role
+VLM_GEMINI_LLM_MAX_OUTPUT_TOKENS=4096
+VLM_GEMINI_LLM_TEMPERATURE=0.2
+```
+
+Common provider prefixes:
+
+| Provider | Base option prefix | Role option example |
+| --- | --- | --- |
+| `openai` / `azure_openai` | `OPENAI_LLM_*` | `QUERY_OPENAI_LLM_REASONING_EFFORT` |
+| `ollama` | `OLLAMA_LLM_*` | `EXTRACT_OLLAMA_LLM_NUM_PREDICT` |
+| `lollms` | Uses the Ollama-compatible option set | `QUERY_OLLAMA_LLM_TEMPERATURE` |
+| `bedrock` | `BEDROCK_LLM_*` | `EXTRACT_BEDROCK_LLM_MAX_TOKENS` |
+| `gemini` | `GEMINI_LLM_*` | `VLM_GEMINI_LLM_THINKING_CONFIG` |
+
+## Inheritance Rules
+
+### Overrides Within the Same Provider
+
+If a role does not set `{ROLE}_LLM_BINDING`, or sets it to the same value as the base `LLM_BINDING`, the role inherits the base configuration:
+
+- Inherits `LLM_MODEL` when `{ROLE}_LLM_MODEL` is not set.
+- Inherits `LLM_BINDING_HOST` when `{ROLE}_LLM_BINDING_HOST` is not set.
+- Inherits `LLM_BINDING_API_KEY` when `{ROLE}_LLM_BINDING_API_KEY` is not set.
+- Inherits `LLM_TIMEOUT` when `{ROLE}_LLM_TIMEOUT` is not set.
+- Inherits `MAX_ASYNC` when `{ROLE}_MAX_ASYNC_LLM` is not set.
+- Provider options first inherit the base provider options, then apply role-specific provider options.
+
+Therefore, when you only want to change the model within the same provider, you only need to set the model name:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+
+# QUERY inherits host, API key, timeout, concurrency, and OPENAI_LLM_REASONING_EFFORT
+QUERY_LLM_MODEL=gpt-5
+```
+
+### Cross-Provider Overrides
+
+If a role's `{ROLE}_LLM_BINDING` differs from the base `LLM_BINDING`, it is a cross-provider configuration. The current rules are:
+
+- `{ROLE}_LLM_MODEL` must be set.
+- Non-Bedrock providers must set `{ROLE}_LLM_BINDING_API_KEY`.
+- If `{ROLE}_LLM_BINDING_HOST` is not set, LightRAG tries to use that provider's default host.
+- Provider options do not inherit base provider options. They start empty and only apply role-specific provider options.
+
+Example: use Ollama as the base for local extraction, then use OpenAI for final answers:
+
+```env
+LLM_BINDING=ollama
+LLM_MODEL=qwen3.5:9b
+LLM_BINDING_HOST=http://localhost:11434
+OLLAMA_LLM_NUM_CTX=32768
+
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5-mini
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_openai_api_key
+QUERY_OPENAI_LLM_REASONING_EFFORT=minimal
+```
+
+For cross-provider configurations, explicitly setting `{ROLE}_LLM_BINDING_HOST` is recommended to avoid confusion between the default host and the base provider endpoint.
+
+### Bedrock Authentication Rules
+
+Bedrock does not use `LLM_BINDING_API_KEY` and does not support `{ROLE}_LLM_BINDING_API_KEY`. Available authentication methods are:
+
+- Global SigV4: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_SESSION_TOKEN`, and `AWS_REGION`.
+- Role-level SigV4: `{ROLE}_AWS_ACCESS_KEY_ID`, `{ROLE}_AWS_SECRET_ACCESS_KEY`, `{ROLE}_AWS_SESSION_TOKEN`, and `{ROLE}_AWS_REGION`.
+- Process-level bearer token: `AWS_BEARER_TOKEN_BEDROCK`. This is an AWS SDK process-level setting and cannot be overridden per role.
+
+Role-level Bedrock example:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+
+EXTRACT_LLM_BINDING=bedrock
+EXTRACT_LLM_MODEL=us.amazon.nova-lite-v1:0
+EXTRACT_LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+EXTRACT_AWS_REGION=us-west-2
+EXTRACT_AWS_ACCESS_KEY_ID=your_extract_access_key
+EXTRACT_AWS_SECRET_ACCESS_KEY=your_extract_secret_key
+EXTRACT_AWS_SESSION_TOKEN=your_optional_session_token
+EXTRACT_BEDROCK_LLM_TEMPERATURE=0.0
+EXTRACT_BEDROCK_LLM_MAX_TOKENS=2048
+```
+
+## Provider Behavior Matrix
+
+| Provider | Role-level host/base_url | Role-level API key | Authentication limitations |
+| --- | --- | --- | --- |
+| `openai` | Supported, passed to the OpenAI-compatible client through `{ROLE}_LLM_BINDING_HOST`. | Supports `{ROLE}_LLM_BINDING_API_KEY`; when unset within the same provider, it inherits the base `LLM_BINDING_API_KEY`. | Currently mainly API key / Bearer mode. |
+| `ollama` | Supported, passed to the Ollama client through `{ROLE}_LLM_BINDING_HOST`. | Supports `{ROLE}_LLM_BINDING_API_KEY`; when unset within the same provider, it inherits the base key. If no key reaches the lower layer, it falls back to `OLLAMA_API_KEY`. | Bearer header. |
+| `lollms` | Supported, using `{ROLE}_LLM_BINDING_HOST` as `base_url`. | Supports `{ROLE}_LLM_BINDING_API_KEY`; when unset within the same provider, it inherits the base key. | Bearer header. |
+| `azure_openai` | Supported, using `{ROLE}_LLM_BINDING_HOST` as the Azure endpoint. | Supports `{ROLE}_LLM_BINDING_API_KEY`; when unset within the same provider, it inherits the base key and may also fall back to `AZURE_OPENAI_API_KEY`. | `AZURE_OPENAI_API_VERSION` is a global environment variable and does not support role-level overrides. |
+| `bedrock` | Supported, using `{ROLE}_LLM_BINDING_HOST` as `endpoint_url`; `DEFAULT_BEDROCK_ENDPOINT` means letting the AWS SDK choose. | Generic API keys are not supported. | Uses global or role-level SigV4. `AWS_BEARER_TOKEN_BEDROCK` is process-level and cannot be overridden per role. |
+| `gemini` | Supported, passed to the Google GenAI client through `{ROLE}_LLM_BINDING_HOST`; `DEFAULT_GEMINI_ENDPOINT` means using the SDK default endpoint. | AI Studio mode supports `{ROLE}_LLM_BINDING_API_KEY`. | Vertex AI is controlled by `GOOGLE_GENAI_USE_VERTEXAI`, `GOOGLE_CLOUD_PROJECT`, `GOOGLE_CLOUD_LOCATION`, and `GOOGLE_APPLICATION_CREDENTIALS`; all are process-level settings. |
+
+## Recommended Configuration Patterns
+
+### 1. Same Provider, Only Change the Model
+
+Suitable when using the same OpenAI key and endpoint, but using a stronger model for final answers:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+
+QUERY_LLM_MODEL=gpt-5
+QUERY_MAX_ASYNC_LLM=2
+```
+
+`QUERY` inherits the base host, API key, and `OPENAI_LLM_REASONING_EFFORT`.
+
+### 2. Same Provider, Change the Model and Tune Options
+
+Suitable when the base model is used for extraction and final answers use a higher reasoning effort:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+OPENAI_LLM_REASONING_EFFORT=minimal
+OPENAI_LLM_MAX_COMPLETION_TOKENS=4096
+
+QUERY_LLM_MODEL=gpt-5
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+QUERY_OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+QUERY_LLM_TIMEOUT=240
+```
+
+### 3. Same Provider with Different Endpoints and API Keys
+
+Suitable when all roles use the `openai` binding, but some roles access the official OpenAI API while others access a local vLLM, SGLang, OpenRouter, or another OpenAI-compatible endpoint. In the example below:
+
+- `EXTRACT` uses the official OpenAI `gpt-5-mini`.
+- `QUERY` uses the official OpenAI `gpt-5.4` with a separate OpenAI key.
+- `KEYWORD` uses `Qwen3.5-35B-A3B` deployed by local vLLM.
+
+```env
+###########################################################################
+# Base LLM fallback. Keep it aligned with EXTRACT so unspecified roles still
+# have a valid OpenAI configuration.
+###########################################################################
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_extract_openai_api_key
+LLM_TIMEOUT=180
+MAX_ASYNC=4
+
+###########################################################################
+# IMPORTANT:
+# Do not set global OPENAI_LLM_REASONING_EFFORT here if any same-provider role
+# points to a local OpenAI-compatible server that does not support it.
+# Use role-specific OPENAI options instead.
+###########################################################################
+# OPENAI_LLM_REASONING_EFFORT=none
+
+###########################################################################
+# EXTRACT: OpenAI official API, gpt-5-mini
+###########################################################################
+EXTRACT_LLM_BINDING=openai
+EXTRACT_LLM_MODEL=gpt-5-mini
+EXTRACT_LLM_BINDING_HOST=https://api.openai.com/v1
+EXTRACT_LLM_BINDING_API_KEY=your_extract_openai_api_key
+EXTRACT_OPENAI_LLM_REASONING_EFFORT=low
+EXTRACT_OPENAI_LLM_MAX_COMPLETION_TOKENS=4096
+EXTRACT_MAX_ASYNC_LLM=4
+EXTRACT_LLM_TIMEOUT=180
+
+###########################################################################
+# QUERY: OpenAI official API, gpt-5.4, separate API key
+###########################################################################
+QUERY_LLM_BINDING=openai
+QUERY_LLM_MODEL=gpt-5.4
+QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+QUERY_LLM_BINDING_API_KEY=your_query_openai_api_key
+QUERY_OPENAI_LLM_REASONING_EFFORT=medium
+QUERY_OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+QUERY_MAX_ASYNC_LLM=2
+QUERY_LLM_TIMEOUT=240
+
+###########################################################################
+# KEYWORD: local vLLM OpenAI-compatible endpoint, Qwen3.5-35B-A3B
+###########################################################################
+KEYWORD_LLM_BINDING=openai
+KEYWORD_LLM_MODEL=Qwen3.5-35B-A3B
+KEYWORD_LLM_BINDING_HOST=http://localhost:8000/v1
+# If vLLM was started with --api-key, use the same value here.
+# If vLLM has no auth, still set a non-empty dummy value to avoid falling
+# back to the official OpenAI key.
+KEYWORD_LLM_BINDING_API_KEY=local-vllm-api-key
+KEYWORD_OPENAI_LLM_MAX_TOKENS=2048
+# Optional for Qwen-style models served by vLLM when you want to disable thinking.
+KEYWORD_OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+KEYWORD_MAX_ASYNC_LLM=4
+KEYWORD_LLM_TIMEOUT=180
+```
+
+This pattern is not cross-provider because all three roles use the `openai` binding. LightRAG passes each role's `*_LLM_BINDING_HOST` and `*_LLM_BINDING_API_KEY` to the OpenAI-compatible client separately.
+
+Note: provider options within the same provider inherit the base `OPENAI_LLM_*`. If the local vLLM server does not support official OpenAI parameters such as `reasoning_effort`, do not set the global `OPENAI_LLM_REASONING_EFFORT`; use role-level variables such as `EXTRACT_OPENAI_LLM_REASONING_EFFORT` and `QUERY_OPENAI_LLM_REASONING_EFFORT` instead.
+
+### 4. One Role Crosses Provider
+
+Suitable when the base uses an official OpenAI model and only keyword extraction uses local Ollama:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+OPENAI_LLM_REASONING_EFFORT=medium
+
+KEYWORD_LLM_BINDING=ollama
+KEYWORD_LLM_MODEL=qwen3.5:9b
+KEYWORD_LLM_BINDING_HOST=http://localhost:11434
+KEYWORD_LLM_BINDING_API_KEY=ollama-local-key
+KEYWORD_OLLAMA_LLM_NUM_CTX=32768
+```
+
+For cross-provider configurations, Ollama options do not inherit OpenAI options. For local Ollama, `KEYWORD_LLM_BINDING_API_KEY` can usually use a placeholder value; the current cross-provider validation requires non-Bedrock roles to explicitly provide a role-level API key.
+
+### 5. Specify a Dedicated Multimodal Model for VLM
+
+Suitable when text tasks use a cheaper model and multimodal analysis uses a vision-language model:
+
+```env
+VLM_PROCESS_ENABLE=true
+
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+
+VLM_LLM_BINDING=openai
+VLM_LLM_MODEL=gpt-4o
+VLM_OPENAI_LLM_MAX_TOKENS=4096
+VLM_MAX_ASYNC_LLM=2
+VLM_LLM_TIMEOUT=240
+```
+
+If VLM uses the same provider and key, `VLM_LLM_BINDING_HOST` and `VLM_LLM_BINDING_API_KEY` can be omitted.
+
+`VLM_PROCESS_ENABLE` is the master switch for multimodal analysis. When `false`, the pipeline emits a warning and skips every multimodal item without invoking the VLM. When `true`, the effective VLM binding (`VLM_LLM_BINDING` if set, otherwise `LLM_BINDING`) must support image inputs. The following providers are vision-capable: `openai`, `azure_openai`, `gemini`, `bedrock`, `ollama`, `anthropic`. `lollms` is rejected at startup because it cannot accept image inputs.
+
+### 6. Bedrock Role-Level SigV4 Credentials
+
+Suitable when only one role accesses Bedrock and uses independent IAM/STS credentials:
+
+```env
+LLM_BINDING=openai
+LLM_MODEL=gpt-5-mini
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_openai_api_key
+
+QUERY_LLM_BINDING=bedrock
+QUERY_LLM_MODEL=us.amazon.nova-lite-v1:0
+QUERY_LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+QUERY_AWS_REGION=us-east-1
+QUERY_AWS_ACCESS_KEY_ID=your_query_access_key
+QUERY_AWS_SECRET_ACCESS_KEY=your_query_secret_key
+QUERY_AWS_SESSION_TOKEN=your_optional_session_token
+QUERY_BEDROCK_LLM_MAX_TOKENS=4096
+QUERY_BEDROCK_LLM_TEMPERATURE=0.2
+```
+
+Do not set `QUERY_LLM_BINDING_API_KEY`; Bedrock rejects that configuration.
+
+## Caveats
+
+- Within the same provider, provider options such as `OPENAI_LLM_REASONING_EFFORT`, `OPENAI_LLM_MAX_TOKENS`, `OLLAMA_LLM_NUM_CTX`, and `GEMINI_LLM_THINKING_CONFIG` are inherited automatically.
+- There is currently no clean role-level semantic for "unsetting an inherited provider option". If a model in a same-provider role does not support a base option, explicitly override that option for the role with a supported value, or configure the role as cross-provider and set only the role-specific provider options it supports.
+- `AZURE_OPENAI_DEPLOYMENT` and `AZURE_OPENAI_API_VERSION` for `azure_openai` are global environment variables. If `AZURE_OPENAI_DEPLOYMENT` is set, it may take precedence over the role model name.
+- Gemini Vertex AI mode is controlled by process-level Google environment variables. In the same LightRAG process, some roles cannot use Vertex AI while others use AI Studio API keys.
+- In Docker/Compose, `LLM_BINDING_HOST` usually needs to use a container-reachable address such as `host.docker.internal`; role-level hosts follow the same principle.
+- Restart LightRAG Server after modifying `.env`. Some IDE terminals preload `.env`, so opening a new terminal session is recommended to confirm that environment variables take effect.

+ 170 - 0
docs/UV_LOCK_GUIDE.md

@@ -0,0 +1,170 @@
+# uv.lock Update Guide
+
+## What is uv.lock?
+
+`uv.lock` is uv's lock file. It captures the exact version of every dependency, including transitive ones, much like:
+- Node.js `package-lock.json`
+- Rust `Cargo.lock`
+- Python Poetry `poetry.lock`
+
+Keeping `uv.lock` in version control guarantees that everyone installs the same dependency set.
+
+## When does uv.lock change?
+
+### Situations where it does *not* change automatically
+
+- Running `uv sync --frozen`
+- Building Docker images that call `uv sync --frozen`
+- Editing source code without touching dependency metadata
+
+### Situations where it will change
+
+1. **`uv lock` or `uv lock --upgrade`**
+
+   ```bash
+   uv lock                # Resolve according to current constraints
+   uv lock --upgrade      # Re-resolve and upgrade to the newest compatible releases
+   ```
+
+   Use these commands after modifying `pyproject.toml`, when you want fresh dependency versions, or if the lock file was deleted or corrupted.
+
+2. **`uv add`**
+
+   ```bash
+    uv add requests           # Adds the dependency and updates both files
+    uv add --dev pytest       # Adds a dev dependency
+   ```
+
+   `uv add` edits `pyproject.toml` and refreshes `uv.lock` in one step.
+
+3. **`uv remove`**
+
+   ```bash
+   uv remove requests
+   ```
+
+   This removes the dependency from `pyproject.toml` and rewrites `uv.lock`.
+
+4. **`uv sync` without `--frozen`**
+
+   ```bash
+   uv sync
+   ```
+
+   Normally this only installs what is already locked. However, if `pyproject.toml` and `uv.lock` disagree or the lock file is missing, uv will regenerate and update `uv.lock`. In CI and production builds you should prefer `uv sync --frozen` to prevent unintended updates.
+
+## Example workflows
+
+### Scenario 1: Add a new dependency
+
+```bash
+# Recommended: let uv handle both files
+uv add fastapi
+git add pyproject.toml uv.lock
+git commit -m "Add fastapi dependency"
+
+# Manual alternative
+# 1. Edit pyproject.toml
+# 2. Regenerate the lock file
+uv lock
+git add pyproject.toml uv.lock
+git commit -m "Add fastapi dependency"
+```
+
+### Scenario 2: Relax or tighten a version constraint
+
+```bash
+# 1. Edit the requirement in pyproject.toml,
+#    e.g. openai>=1.0.0,<2.0.0 -> openai>=1.5.0,<2.0.0
+
+# 2. Re-resolve the lock file
+uv lock
+
+# 3. Commit both files
+git add pyproject.toml uv.lock
+git commit -m "Update openai to >=1.5.0"
+```
+
+### Scenario 3: Upgrade everything to the newest compatible versions
+
+```bash
+uv lock --upgrade
+git diff uv.lock
+git add uv.lock
+git commit -m "Upgrade dependencies to latest compatible versions"
+```
+
+### Scenario 4: Teammate syncing the project
+
+```bash
+git pull               # Fetch latest code and lock file
+uv sync --frozen       # Install exactly what uv.lock specifies
+```
+
+## Using uv.lock in Docker
+
+```dockerfile
+RUN uv sync --frozen --no-dev --extra api
+```
+
+`--frozen` guarantees reproducible builds because uv will refuse to deviate from the locked versions.
+`--extra api` install API server
+
+## Generating a lock file that includes offline dependencies
+
+If you need `uv.lock` to capture the optional offline stacks, regenerate it with the relevant extras enabled:
+
+```bash
+uv lock --extra api --extra offline
+```
+
+This command resolves the base project requirements plus both the `api` and `offline` optional dependency sets, ensuring downstream `uv sync --frozen --extra api --extra offline` installs work without further resolution.
+
+## Frequently asked questions
+
+- **`uv.lock` is almost 1 MB. Does that matter?**
+  No. The file is read only during dependency resolution.
+
+- **Should we commit `uv.lock`?**
+  Yes. Commit it so collaborators and CI jobs share the same dependency graph.
+
+- **Deleted the lock file by accident?**
+  Run `uv lock` to regenerate it from `pyproject.toml`.
+
+- **Can `uv.lock` and `requirements.txt` coexist?**
+  They can, but maintaining both is redundant. Prefer relying on `uv.lock` alone whenever possible.
+
+- **How do I inspect locked versions?**
+  ```bash
+  uv tree
+  grep -A5 'name = "openai"' uv.lock
+  ```
+
+## Best practices
+
+### Recommended
+
+1. Commit `uv.lock` alongside `pyproject.toml`.
+2. Use `uv sync --frozen` in CI, Docker, and other reproducible environments.
+3. Use plain `uv sync` during local development if you want uv to reconcile the lock for you.
+4. Run `uv lock --upgrade` periodically to pick up the latest compatible releases.
+5. Regenerate the lock file immediately after changing dependency constraints.
+
+### Avoid
+
+1. Running `uv sync` without `--frozen` in CI or production pipelines.
+2. Editing `uv.lock` by hand—uv will overwrite manual edits.
+3. Ignoring lock file diffs in code reviews—unexpected dependency changes can break builds.
+
+## Summary
+
+| Command               | Updates `uv.lock` | Typical use                               |
+|-----------------------|-------------------|-------------------------------------------|
+| `uv lock`             | ✅ Yes            | After editing constraints                 |
+| `uv lock --upgrade`   | ✅ Yes            | Upgrade to the newest compatible versions |
+| `uv add <pkg>`        | ✅ Yes            | Add a dependency                          |
+| `uv remove <pkg>`     | ✅ Yes            | Remove a dependency                       |
+| `uv sync`             | ⚠️ Maybe          | Local development; can regenerate the lock |
+| `uv sync --frozen`    | ❌ No             | CI/CD, Docker, reproducible builds        |
+
+Remember: `uv.lock` only changes when you run a command that tells it to. Keep it in sync with your project and commit it whenever it changes.

+ 1033 - 0
env.docker-compose-full

@@ -0,0 +1,1033 @@
+### All configurable environment variable must show up in this sample file in active or comment out status
+### Setup tool `make env-*` uses this file to generate final .env file
+
+### Target environment of this env file: host/compose (compose is for Dokcer or Kubernetes)
+LIGHTRAG_RUNTIME_TARGET=compose
+
+###########################
+### Server Configuration
+###########################
+HOST=0.0.0.0
+PORT=9621
+WEBUI_TITLE='My Graph KB'
+WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
+# WORKERS=2
+### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
+# TIMEOUT=150
+# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
+
+### Path Prefix Configuration (Optional)
+### Used to host multiple LightRAG instances on one host behind a reverse
+### proxy that routes by site prefix. Leave unset (or empty) for a
+### single-instance deployment.
+###
+### - LIGHTRAG_API_PREFIX  : reverse-proxy prefix the upstream proxy strips
+###                          before forwarding (passed to FastAPI as root_path).
+###
+### See docs/MultiSiteDeployment.md for end-to-end examples.
+# LIGHTRAG_API_PREFIX=/site01
+
+### Optional SSL Configuration
+### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container
+# SSL=true
+# SSL_CERTFILE=/path/to/cert.pem
+# SSL_KEYFILE=/path/to/key.pem
+
+### Directory Configuration (defaults to current working directory)
+### Default value is: ./inputs ./rag_storage
+# INPUT_DIR=<absolute_path_for_doc_input_dir>
+# WORKING_DIR=<absolute_path_for_working_dir>
+
+### Tiktoken cache directory (Store cached files in this folder for offline deployment)
+# TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+
+### Ollama Emulating Model and Tag
+# OLLAMA_EMULATING_MODEL_NAME=lightrag
+OLLAMA_EMULATING_MODEL_TAG=latest
+
+### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
+# MAX_GRAPH_NODES=1000
+
+### Logging level
+# LOG_LEVEL=INFO
+# VERBOSE=False
+# LOG_MAX_BYTES=10485760
+# LOG_BACKUP_COUNT=5
+### Logfile location (defaults to current working directory)
+# LOG_DIR=/path/to/log/directory
+# LIGHTRAG_PERFORMANCE_TIMING_LOGS=false
+
+#####################################
+### Login and API-Key Configuration
+#####################################
+# AUTH_ACCOUNTS='admin:admin123,user1:{bcrypt}$2b$12$S8Yu.gCbuAbNTJFB.231gegTwr5pgrFxc8H9kXQ4/sduFBHkhM8Ka'
+# TOKEN_SECRET=lightrag-jwt-default-secret-key!
+# JWT_ALGORITHM=HS256
+# TOKEN_EXPIRE_HOURS=48
+# GUEST_TOKEN_EXPIRE_HOURS=24
+
+### Token Auto-Renewal Configuration (Sliding Window Expiration)
+### Enable automatic token renewal to prevent active users from being logged out
+### When enabled, tokens will be automatically renewed when remaining time < threshold
+# TOKEN_AUTO_RENEW=true
+### Token renewal threshold (0.0 - 1.0)
+### Renew token when remaining time < (total time * threshold)
+### Default: 0.5 (renew when 50% time remaining)
+### Examples:
+###   0.5 = renew when 24h token has 12h left
+###   0.25 = renew when 24h token has 6h left
+# TOKEN_RENEW_THRESHOLD=0.5
+### Note: Token renewal is automatically skipped for certain endpoints:
+###   - /health: Health check endpoint (no authentication required)
+###   - /documents/paginated: Frequently polled by client (5-30s interval)
+###   - /documents/pipeline_status: Very frequently polled by client (2s interval)
+###   - Rate limit: Minimum 60 seconds between renewals for same user
+
+### API-Key to access LightRAG Server API
+### Use this key in HTTP requests with the 'X-API-Key' header
+### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
+# LIGHTRAG_API_KEY=your-secure-api-key-here
+# WHITELIST_PATHS=/health,/api/*
+
+######################################################################################
+### Query Configuration
+###
+### How to control the context length sent to LLM:
+###    MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
+###    Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens
+######################################################################################
+# LLM response cache for query (Not valid for streaming response)
+ENABLE_LLM_CACHE=true
+# COSINE_THRESHOLD=0.2
+### Number of entities or relations retrieved from KG
+# TOP_K=40
+### Maximum number or chunks for naive vector search
+# CHUNK_TOP_K=20
+### control the actual entities send to LLM
+# MAX_ENTITY_TOKENS=6000
+### control the actual relations send to LLM
+# MAX_RELATION_TOKENS=8000
+### control the maximum tokens send to LLM (include entities, relations and chunks)
+# MAX_TOTAL_TOKENS=30000
+
+### chunk selection strategies
+###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
+###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
+###     If reranking is enabled, the impact of chunk selection strategies will be diminished.
+# KG_CHUNK_PICK_METHOD=VECTOR
+
+### maximum number of related chunks per source entity or relation
+###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
+###     Higher values increase re-ranking time
+# RELATED_CHUNK_NUMBER=5
+
+#########################################################
+### Reranking configuration
+### RERANK_BINDING type: null, cohere, jina, aliyun
+### For rerank model deployed by vLLM use cohere binding
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in RERANK_BINDING_HOST
+#########################################################
+RERANK_BINDING=cohere
+# RERANK_BINDING=null
+RERANK_MODEL=BAAI/bge-reranker-v2-m3
+RERANK_BINDING_HOST=http://localhost:8000/rerank
+RERANK_BINDING_API_KEY=3f5abc937e4263cdefc4f77df4cb0c37
+
+### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough)
+# MIN_RERANK_SCORE=0.0
+### Enable rerank by default in query params when RERANK_BINDING is not null
+# RERANK_BY_DEFAULT=True
+
+### Rerank concurrency and timeout (independent from base LLM settings)
+### MAX_ASYNC_RERANK falls back to MAX_ASYNC when unset.
+### RERANK_TIMEOUT has its own default (30s) since reranker calls are
+### typically much shorter than full LLM generation.
+# MAX_ASYNC_RERANK=4
+# RERANK_TIMEOUT=30
+
+### Cohere AI
+# # RERANK_MODEL=rerank-v3.5
+# # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
+# RERANK_ENABLE_CHUNKING=true
+# RERANK_MAX_TOKENS_PER_DOC=480
+
+### Aliyun Dashscope
+# # RERANK_MODEL=gte-rerank-v2
+# # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### Jina AI
+# # RERANK_MODEL=jina-reranker-v2-base-multilingual
+# # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API)
+### Wizard metadata used to preserve the chosen deployment provider across setup reruns
+LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm
+LIGHTRAG_SETUP_RERANK_PROVIDER=vllm
+VLLM_EMBED_MODEL=BAAI/bge-m3
+VLLM_EMBED_PORT=8001
+VLLM_EMBED_DEVICE=cuda
+### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank
+VLLM_EMBED_API_KEY=7f6904c8185e908a1e0bdf9f69cd3ccc
+# VLLM_EMBED_EXTRA_ARGS=
+VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3
+VLLM_RERANK_PORT=8000
+VLLM_RERANK_DEVICE=cuda
+### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank
+VLLM_RERANK_API_KEY=3f5abc937e4263cdefc4f77df4cb0c37
+### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image.
+# VLLM_USE_CPU=1
+### Set to 1 for CPU mode, unset for GPU mode
+# CUDA_VISIBLE_DEVICES=-1
+### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode
+# NVIDIA_VISIBLE_DEVICES=0
+### Optional Docker runtime equivalent; generated GPU compose honors either variable.
+# VLLM_RERANK_EXTRA_ARGS=
+
+########################################
+### Document processing configuration
+########################################
+### Document processing output language: English, Chinese, French, German ...
+SUMMARY_LANGUAGE=Chinese
+# SUMMARY_LANGUAGE=English
+
+### Enable JSON-structured output for entity extraction
+### Default behavior: JSON output is disabled when ENTITY_EXTRACTION_USE_JSON is unset
+### JSON output incurs higher latency but delivers improved reliability
+ENTITY_EXTRACTION_USE_JSON=true
+
+### Optional external YAML profile for entity type guidance and extraction examples
+### Profiles are loaded from PROMPT_DIR/entity_type (PROMPT_DIR defaults to ./prompts).
+### A reference template is shipped at prompts/samples/entity_type_prompt.sample.yml;
+# ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+# PROMPT_DIR=<absolute_path_for_prompt_dir>
+
+### Multimodal parsing/analyze integration
+### Optional parser routing rules. Example for VLM & MinerU enabled configuration:
+###     LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+### Rules may be separated with commas or semicolons. Rules match file suffixes
+### (pdf), not full names (*.pdf), and are checked left-to-right.
+### If mineru/docling appears in LIGHTRAG_PARSER, the corresponding endpoint
+### below must be configured before server startup.
+### See docs/FileProcessingPipeline.md for detail
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+
+### Async parser service protocol (optional)
+### Configure these when using remote MinerU/Docling async services
+
+### ---- MinerU shared parameters (both local and official modes) ----
+### MinerU API protocol. Choose one active mode.
+### - official: MinerU precision API v4. Requires MINERU_API_TOKEN.
+### - local: self-hosted mineru-api / mineru-router base URL.
+MINERU_API_MODE=local
+# MINERU_POLL_INTERVAL_SECONDS=2
+# MINERU_MAX_POLLS=180
+# MINERU_LANGUAGE=ch
+# MINERU_ENABLE_TABLE=true
+# MINERU_ENABLE_FORMULA=true
+# MINERU_PAGE_RANGES=
+### MINERU_PAGE_RANGES semantics differ by mode:
+### - official: forwarded verbatim, supports e.g. "1-3,5,7-9".
+### - local:    only a single page ("3") or simple range ("1-10"); comma
+###             lists are rejected at startup.
+### When switching modes, double-check this constraint.
+
+### ---- MinerU local-only (MINERU_API_MODE=local) ----
+MINERU_LOCAL_ENDPOINT=http://127.0.0.1:8000
+### MINERU_LOCAL_BACKEND: which mineru-api backend handles the parse.
+###   Accepted values (per mineru-api POST /tasks form parameter `backend`):
+###     hybrid-auto-engine - pipeline + VLM combo with auto-selected local
+###                          engine (mineru-api's default). GPU required.
+###     pipeline           - CPU-friendly traditional pipeline; no VLM step.
+###     vlm-auto-engine    - VLM with auto-selected local inference engine
+###                          (sglang-engine / vllm-engine if GPU is available);
+###                          requires the matching engine extra preinstalled
+###                          on the mineru-api side, plus model weights.
+###   We ship `hybrid-auto-engine` -- requires the target mineru-api
+###   deployment to have a GPU plus the matching inference engine
+###   (sglang / vllm) and model weights installed. Switch to `pipeline`
+###   for CPU-only deployments without those dependencies.
+MINERU_LOCAL_BACKEND=hybrid-auto-engine
+### MINERU_LOCAL_PARSE_METHOD: parsing strategy for the pipeline component.
+###   Accepted values:
+###     auto - auto-detect embedded text-layer vs OCR per page (default).
+###     txt  - extract text from the embedded text layer only; fastest,
+###            but yields empty output on scanned PDFs without a text layer.
+###     ocr  - force OCR on every page regardless of text-layer quality;
+###            slowest, reliable on scanned or low-quality PDFs.
+###   Only consumed when MINERU_LOCAL_BACKEND is `pipeline` or
+###   `hybrid-auto-engine` (the pipeline arm of the hybrid pipeline).
+###   Pure VLM backends (`vlm-auto-engine`, `vlm-http-client`) ignore this
+###   parameter -- the VLM model handles layout/OCR natively.
+MINERU_LOCAL_PARSE_METHOD=auto
+### MINERU_LOCAL_IMAGE_ANALYSIS: enable VLM image/chart analysis pass for
+###   better caption an footnote recognition.
+###   Only consumed by `vlm-auto-engine`, `vlm-http-client`,
+###   `hybrid-auto-engine`, `hybrid-http-client`. The `pipeline` backend
+###   silently drops this flag -- its `_process_pipeline` does not accept
+###   the kwarg, so setting `false` under pipeline does NOT speed parsing
+###   up; pipeline never invokes the VLM image pass to begin with.
+###   Disable (`false`) on VLM / hybrid backends to skip the extra VLM
+###   round, trading image / chart semantic descriptions for faster parsing
+###   and lower GPU cost.
+MINERU_LOCAL_IMAGE_ANALYSIS=true
+# MINERU_LOCAL_START_PAGE_ID=0
+# MINERU_LOCAL_END_PAGE_ID=99999
+
+### ---- MinerU official-only (MINERU_API_MODE=official) ----
+# MINERU_API_TOKEN=your-api-key
+# MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+# MINERU_MODEL_VERSION=vlm
+# MINERU_IS_OCR=false
+
+### Force re-upload of file to MinerU on every retry after failure
+### Disables caching of result outcomes
+# LIGHTRAG_FORCE_REPARSE_MINERU=false
+
+### Docling parser (docling-serve v1 / async API).
+###
+### Endpoint: base URL only — the client appends /v1/convert/file/async,
+###     /v1/status/poll/{task_id}?wait=<DOCLING_POLL_INTERVAL_SECONDS>,
+###     /v1/result/{task_id} itself.
+### Pipeline shape (pipeline=standard, target_type=zip,
+###     to_formats=[json,md], image_export_mode=referenced) is fixed in
+###     code so the sidecar flow stays self-consistent — flipping any of
+###     these would break the adapter and is therefore not exposed as env.
+###
+### OCR tunables:
+### - DOCLING_DO_OCR: master switch; when false the engine relies only on
+###     text-layer extraction.
+### - DOCLING_FORCE_OCR: when true, OCR every page regardless of text-layer
+###     quality (slower, useful for scanned PDFs with bad text layers).
+### - DOCLING_OCR_ENGINE: explicit engine selection (DEPRECATED in the
+###     docling-serve OpenAPI but still honored for older deployments).
+### - DOCLING_OCR_PRESET: recommended replacement for DOCLING_OCR_ENGINE.
+### - DOCLING_OCR_LANG: JSON array (e.g. ["en","zh"]) or comma-separated
+###     list. Empty (default) lets the OCR engine pick its default.
+### - DOCLING_DO_FORMULA_ENRICHMENT: when true, the code-formula model runs
+###     and `texts[*].label="formula"` items carry LaTeX in `text`. Default
+###     false because the model may not be present on every deployment;
+###     adapter falls back to plain-text formulas when disabled.
+###
+### Polling budget (server-side long-poll; client does NOT add extra sleep):
+### - DOCLING_POLL_INTERVAL_SECONDS: ``?wait=N`` value sent to
+###     /v1/status/poll/{task_id}. Larger N = fewer round trips per parse;
+###     bound by your reverse-proxy idle timeout. Default 5.
+### - DOCLING_MAX_POLLS: max polling rounds before raising TimeoutError.
+###     Worst-case wall-clock budget ≈
+###     DOCLING_POLL_INTERVAL_SECONDS × DOCLING_MAX_POLLS. Default 240
+###     (≈ 20 minutes at wait=5s); raise for very large PDFs.
+###
+### Bundle cache controls:
+### - DOCLING_ENGINE_VERSION: recorded in <base>.docling_raw/_manifest.json.
+###     Mismatch with the recorded value forces a cache miss → re-download.
+###     Leave empty to skip this check.
+### - LIGHTRAG_FORCE_REPARSE_DOCLING: when truthy ("1"/"true"), bypass the
+###     docling raw cache and re-upload on every parse_docling call.
+### - DOCLING_BBOX_ATTRIBUTES: override the doc-level bbox_attributes
+###     written into <base>.blocks.jsonl meta. Default
+###     {"origin":"LEFTBOTTOM"} matches docling's default coordinate system.
+DOCLING_ENDPOINT=http://localhost:5001
+DOCLING_DO_OCR=true
+DOCLING_FORCE_OCR=true
+DOCLING_DO_FORMULA_ENRICHMENT=false
+# DOCLING_OCR_ENGINE=auto
+# DOCLING_OCR_PRESET=auto
+# DOCLING_OCR_LANG=
+# DOCLING_POLL_INTERVAL_SECONDS=5
+# DOCLING_MAX_POLLS=240
+# DOCLING_BBOX_ATTRIBUTES={"origin":"LEFTBOTTOM"}
+### Force re-upload of file to Docling on every retry after failure
+### Disables caching of result outcomes
+# LIGHTRAG_FORCE_REPARSE_DOCLING=false
+
+### File upload size limit (in bytes)
+### Default: 104857600 (100MB)
+### Set to 0 or None for unlimited upload size
+### Examples:
+###   52428800  = 50MB
+###   104857600 = 100MB (default)
+###   209715200 = 200MB
+### Note: If using Nginx as reverse proxy, also configure client_max_body_size
+# MAX_UPLOAD_SIZE=104857600
+
+### Chunk size for document splitting, 500~1500 is recommended
+# CHUNK_SIZE=1200
+# CHUNK_OVERLAP_SIZE=100
+
+### Fixed-token chunker (process_options=F, default) settings
+###     CHUNK_F_OVERLAP_SIZE: token overlap; falls back to CHUNK_OVERLAP_SIZE when unset
+###     CHUNK_F_SPLIT_BY_CHARACTER: optional separator string; pre-segment before token windowing
+###     CHUNK_F_SPLIT_BY_CHARACTER_ONLY: when true, raise on oversize segment instead of token re-split
+# CHUNK_F_OVERLAP_SIZE=100
+# CHUNK_F_SPLIT_BY_CHARACTER=
+# CHUNK_F_SPLIT_BY_CHARACTER_ONLY=false
+
+### Recursive character chunker (process_options=R) settings
+###     CHUNK_R_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
+###     CHUNK_R_OVERLAP_SIZE: token overlap between adjacent chunks; falls back to CHUNK_OVERLAP_SIZE when unset
+###     CHUNK_R_SEPARATORS: JSON array of cascaded separators tried by RecursiveCharacterTextSplitter.
+###       Default includes CJK sentence-ending punctuation so Chinese / mixed-language
+###       documents split at semantic boundaries.  Order: paragraph (\n\n) > line (\n) >
+###       Chinese sentence-end (。!?) > Chinese semi-clause (;,) > space > char.
+###       English ".?!" are intentionally omitted (literal match would split "0.95" /
+###       "e.g."); the English path falls through space / char as before.
+# CHUNK_R_SIZE=1200
+# CHUNK_R_OVERLAP_SIZE=100
+# CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
+
+### Semantic vector chunker (process_options=V) settings
+###     CHUNK_V_SIZE: per-strategy chunk_token_size hard cap (oversized pieces are
+###       re-split via R before being emitted); falls back to CHUNK_SIZE when unset
+###     CHUNK_V_BREAKPOINT_THRESHOLD_TYPE: percentile | standard_deviation | interquartile | gradient
+###     CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT: leave empty to use the LangChain per-type default (e.g. 95 for percentile)
+###     CHUNK_V_BUFFER_SIZE: number of adjacent sentences combined when computing distances
+###     CHUNK_V_SENTENCE_SPLIT_REGEX: regex fed to LangChain SemanticChunker for the
+###       initial sentence split.  Default extends the upstream English-only pattern
+###       with CJK sentence-end punctuation (。?!).  Override if you need a
+###       different language mix.  Note: env value is the raw regex string, no JSON
+###       quoting.
+# CHUNK_V_SIZE=1200
+# CHUNK_V_BREAKPOINT_THRESHOLD_TYPE=percentile
+# CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT=
+# CHUNK_V_BUFFER_SIZE=1
+# CHUNK_V_SENTENCE_SPLIT_REGEX=(?<=[.?!])\s+|(?<=[。?!])
+
+### Paragraph semantic chunker (process_options=P) settings
+###     CHUNK_P_SIZE: per-strategy chunk_token_size override; defaults to 2000 when unset
+###       (does NOT fall back to CHUNK_SIZE — paragraph-semantic merging needs more
+###       headroom than the global default to keep related paragraphs together).
+###     CHUNK_P_OVERLAP_SIZE: overlap for prose fallback and table-bridge context;
+###                           falls back to CHUNK_OVERLAP_SIZE when unset
+# CHUNK_P_SIZE=2000
+# CHUNK_P_OVERLAP_SIZE=100
+
+### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended)
+# FORCE_LLM_SUMMARY_ON_MERGE=8
+### Max description token size to trigger LLM summary
+# SUMMARY_MAX_TOKENS = 1200
+### Recommended LLM summary output length in tokens
+# SUMMARY_LENGTH_RECOMMENDED=600
+### Maximum context size sent to LLM for description summary
+# SUMMARY_CONTEXT_SIZE=12000
+### Maximum token size allowed for entity extraction input context
+# MAX_EXTRACT_INPUT_TOKENS=20480
+
+### Multimodal surrounding-context budget (per-half token cap for the
+### `leading` / `trailing` text injected into VLM and extract prompts).
+### Computed at analyze_multimodal entry; the two halves are independent
+### so deployments can bias context forward or backward as needed.
+# SURROUNDING_LEADING_MAX_TOKENS=2000
+# SURROUNDING_TRAILING_MAX_TOKENS=2000
+
+### Per-response cap on total entity+relationship rows/records emitted by the LLM
+# MAX_EXTRACTION_RECORDS=100
+### Per-response cap on entity rows/objects emitted by the LLM
+# MAX_EXTRACTION_ENTITIES=40
+
+### control the maximum chunk_ids stored in vector and graph db
+# MAX_SOURCE_IDS_PER_ENTITY=300
+# MAX_SOURCE_IDS_PER_RELATION=300
+### control chunk_ids limitation method: FIFO, KEEP
+###    FIFO: First in first out
+###    KEEP: Keep oldest (less merge action and faster)
+# SOURCE_IDS_LIMIT_METHOD=FIFO
+
+### Maximum number of file paths stored in entity/relation file_path field
+### For displayed only, does not affect query performance
+# MAX_FILE_PATHS=100
+
+### PDF decryption password for protected PDF files
+# PDF_DECRYPT_PASSWORD=your_pdf_password_here
+
+### LLM cache for entity/relation extract is enable by default
+### Disabling it will prevent graph reconstruction after document deletion
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
+
+########################################
+### Pipeline Concurrency Configuration
+########################################
+### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
+MAX_PARALLEL_INSERT=2
+### Optional per-stage document pipeline concurrency
+# MAX_PARALLEL_PARSE_NATIVE=5
+# MAX_PARALLEL_PARSE_MINERU=1
+# MAX_PARALLEL_PARSE_DOCLING=1
+# MAX_PARALLEL_ANALYZE=5
+### Optional queue sizes for staged pipeline workers
+# QUEUE_SIZE_DEFAULT=100
+# QUEUE_SIZE_INSERT=4
+### Max concurrency requests for Embedding
+# EMBEDDING_FUNC_MAX_ASYNC=8
+### Num of chunks send to Embedding in single request
+# EMBEDDING_BATCH_NUM=10
+
+###########################################################################
+### Gloabal LLM Configuration
+###   LLM_BINDING type: openai, ollama, lollms, azure_openai, bedrock, gemini
+###   LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
+###   LLM_BINDING_API_KEY: api key
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
+###########################################################################
+### LLM request timeout setting for all llm (0 means no timeout for Ollma)
+# LLM_TIMEOUT=180
+
+LLM_BINDING=openai
+LLM_BINDING_HOST=https://ai.znipower.com:5017
+# LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=sk-ffbkKu61NfLLCsXLzx2MRg
+# LLM_BINDING_API_KEY=your_api_key
+LLM_MODEL=gemini-3-flash-preview
+# LLM_MODEL=gpt-5.4-mini
+
+### Max concurrency requests of LLM
+MAX_ASYNC=4
+
+###########################################################################
+### Role-specific LLM/VLM overrides
+### Available roles: EXTRACT, KEYWORD, QUERY, VLM
+### If unset, each role falls back to gloabal LLM configuration above.
+### For detail information, refer to:
+###   docs/RoleSpecificLLMConfiguration.md
+###   docs/RoleSpecificLLMConfiguration-zh.md
+###########################################################################
+KEYWORD_LLM_MODEL=gemini-3-flash-preview
+# KEYWORD_LLM_MODEL=gpt-5.4-nano
+# KEYWORD_MAX_ASYNC_LLM
+# KEYWORD_LLM_TIMEOUT=180
+# KEYWORD_LLM_BINDING=openai
+# KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
+# KEYWORD_LLM_BINDING_API_KEY=your_api_key
+
+QUERY_LLM_MODEL=gemini-3-flash-preview
+# QUERY_LLM_MODEL=gpt-5.4
+# QUERY_MAX_ASYNC_LLM
+# QUERY_LLM_TIMEOUT=180
+# QUERY_LLM_BINDING=openai
+# QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+# QUERY_LLM_BINDING_API_KEY=your_api_key
+
+VLM_LLM_MODEL=gpt-5.4-mini
+# VLM_MAX_ASYNC_LLM=4
+# VLM_LLM_TIMEOUT=180
+# VLM_LLM_BINDING=openai
+# VLM_LLM_BINDING_HOST=https://api.example.com/v1
+# VLM_LLM_BINDING_API_KEY=your_vlm_api_key
+
+### Master switch for VLM multimodal analysis (i/t/e items).
+### When false, multimodal item is skipped regardless of document process_options
+### When true, VLM_LLM_BINDING (or the base LLM_BINDING) must be vision-capable
+### lollms is rejected at startup
+VLM_PROCESS_ENABLE=false
+### Maximum image bytes sent to VLM (5242880=5MB)
+VLM_MAX_IMAGE_BYTES=5242880
+
+###########################################################################
+### Provider sepecific LLM options
+### Increasing the temperature setting may help mitigate infinite inference
+###   loops during entity/elation extraction, particularly when using
+###   models with more limited capabilities, such as Qwen3-30B
+### Set a max output token limit to prevent endless output from certain LLMs,
+###   which may trigger timeout errors during entity and relation extraction.
+###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
+###   i.e. max_output_token = 9000 = 180s * 50 tokens/s
+### Sample commands to list all supported options specific LLM_BINDING:
+###   lightrag-server --llm-binding openai  --help
+###   lightrag-server --llm-binding bedrock --help
+###   lightrag-server --llm-binding gemini  --help
+###########################################################################
+### OpenAI Specific Parameters (Openrouter of other OpenAI compatible API):
+###     LLM_BINDING=openai
+###     LLM_BINDING_HOST=https://openrouter.ai/api/v1
+###     LLM_MODEL=google/gemini-2.5-flash
+# OPENAI_LLM_TEMPERATURE=0.9
+### For vLLM/SGLang and most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+### For OpenAI reason control
+# OPENAI_LLM_REASONING_EFFORT=minimal
+### For OpenRouter reasoning control
+# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
+### For Qwen3 reasoning control deploy by vLLM
+# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+
+### Azure OpenAI Specific Parameters:
+###     LLM_BINDING=azure_openai
+###     LLM_BINDING_HOST=https://xxxx.openai.azure.com/
+###     LLM_BINDING_API_KEY=your_api_key
+###     LLM_MODEL=my-gpt-mini-deployment
+### You may use deployment name for LLM_MODEL or set AZURE_OPENAI_DEPLOYMENT instead
+# AZURE_OPENAI_DEPLOYMEN=my—deplyment-name
+# AZURE_OPENAI_API_VERSION=2024-08-01-preview
+
+### Google AI Studio Gemini Specific Parameters:
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+###     LLM_BINDING=gemini
+###     LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
+###     LLM_BINDING_API_KEY=your_gemini_api_key
+###     LLM_MODEL=gemini-flash-latest
+# GEMINI_LLM_TEMPERATURE=0.7
+# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
+### Enable or disable thinking
+###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
+###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
+### Google Vertex AI Gemini Specific Parameters:
+### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
+# GOOGLE_GENAI_USE_VERTEXAI=true
+# GOOGLE_CLOUD_PROJECT='your-project-id'
+# GOOGLE_CLOUD_LOCATION='us-central1'
+# GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json'
+
+### Bedrock Specific Parameters:
+###     LLM_BINDING=bedrock
+###     LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+###     LLM_MODEL=us.amazon.nova-lite-v1:0
+### Region is required for all three modes (Bedrock endpoints are regional).
+# AWS_REGION=us-west-1
+### Bedrock Authentication (choose ONE of the following three approaches):
+### Bedrock API key (bearer token). Bedrock ignores LLM_BINDING_API_KEY;
+### set AWS_BEARER_TOKEN_BEDROCK directly before startup. This is a
+### process-level AWS SDK setting and cannot be overridden per role.
+# AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
+### SigV4 credentials (classic IAM user / STS / instance profile).
+# AWS_ACCESS_KEY_ID=your_aws_access_key_id
+# AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+# AWS_SESSION_TOKEN=your_optional_aws_session_token
+### Ambient credentials (AWS SDK default credential chain).
+### To use this mode, leave AWS_BEARER_TOKEN_BEDROCK, AWS_ACCESS_KEY_ID,
+### AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN above commented out — the
+### AWS SDK will then resolve credentials from ~/.aws/credentials, IAM role,
+### instance profile, SSO, or environment variables outside .env.
+### Activating any of the lines above forces that explicit mode and bypasses
+### the credential chain.
+# BEDROCK_LLM_TEMPERATURE=1.0
+# BEDROCK_LLM_MAX_TOKENS=9000
+# BEDROCK_LLM_TOP_P=1.0
+# BEDROCK_LLM_STOP_SEQUENCES='["</s>"]'
+### Bedrock model reasoning control
+# BEDROCK_LLM_EXTRA_FIELDS='{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}'
+
+### Ollama Specific Parameters:
+###     LLM_BINDING=ollama
+###     LLM_BINDING_HOST=http://localhost:11434
+###     LLM_MODEL=qwen3.5:9b
+### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
+OLLAMA_LLM_NUM_CTX=32768
+# OLLAMA_LLM_NUM_PREDICT=9000
+# OLLAMA_LLM_TEMPERATURE=0.85
+# OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'
+
+#######################################################################################
+### Embedding Configuration (Should not be changed after the first file processed)
+### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, bedrock
+### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
+### EMBEDDING_BINDING_API_KEY: api key
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
+### Control whether to send embedding_dim parameter to embedding API
+###    For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment
+###    For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter
+###    For Gemini: Allways set EMBEDDING_SEND_DIM=true
+### Control whether to use base64 encoding format for embeddings (improves performance for OpenAI)
+###    For OpenAI: Set EMBEDDING_USE_BASE64=true (default) to use base64 encoding
+###    For Yandex Cloud and other providers that don't support it: Set EMBEDDING_USE_BASE64=false
+#######################################################################################
+# EMBEDDING_TIMEOUT=30
+
+### OpenAI compatible embedding
+EMBEDDING_BINDING=openai
+EMBEDDING_BINDING_HOST=http://localhost:8001/v1
+# EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING_API_KEY=7f6904c8185e908a1e0bdf9f69cd3ccc
+# EMBEDDING_BINDING_API_KEY=your_api_key
+EMBEDDING_MODEL=BAAI/bge-m3
+# EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_DIM=1024
+# EMBEDDING_DIM=3072
+EMBEDDING_TOKEN_LIMIT=8192
+EMBEDDING_SEND_DIM=false
+EMBEDDING_USE_BASE64=true
+
+### Optional: asymmetric embeddings (query/document behavior split)
+### Leave EMBEDDING_ASYMMETRIC unset or set false to keep symmetric behavior.
+### Set true only when the selected embedding backend supports asymmetric mode.
+# EMBEDDING_ASYMMETRIC=true
+### Provider-task bindings such as Jina/Gemini/VoyageAI use provider parameters
+### and should not configure the prefix variables below.
+### Prefix-based models such as BGE/E5/GTE require both prefix variables.
+### Wrap non-empty values with quotes if there are trailing spaces.
+# EMBEDDING_DOCUMENT_PREFIX="search_document: "
+### Use NO_PREFIX for a side that should intentionally have no prefix.
+###     EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
+# EMBEDDING_QUERY_PREFIX="search_query: "
+
+###########################################################################
+### Provider sepecific Embedding options
+### Increasing the temperature setting may help mitigate infinite inference
+###   loops during entity/elation extraction, particularly when using
+###   models with more limited capabilities, such as Qwen3-30B
+### Set a max output token limit to prevent endless output from certain LLMs,
+###   which may trigger timeout errors during entity and relation extraction.
+###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
+###   i.e. max_output_token = 9000 = 180s * 50 tokens/s
+### Sample commands to list all supported options specific EMBEDDING_BINDING:
+###   lightrag-server --embedding-binding openai --help
+###   lightrag-server --embedding-binding ollama --help
+###   lightrag-server --embedding-binding bedrock --help
+###########################################################################
+### Azure Embedding Specific Parameters:
+### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
+###     EMBEDDING_BINDING=azure_openai
+###     EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
+###     EMBEDDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL==my-text-embedding-3-large-deployment
+###     EMBEDDING_DIM=3072
+# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
+
+### Ollama Embedding Specific Parameters:
+###     EMBEDDING_BINDING=ollama
+###     EMBEDDING_BINDING_HOST=http://localhost:11434
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL=qwen3-embedding:4b
+###     EMBEDDING_DIM=2560
+### Ollama should set num_ctx option inaddition to EMBEDDING_TOKEN_LIMIT
+OLLAMA_EMBEDDING_NUM_CTX=8192
+
+### Gemini Embedding Specific Parameters:
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+### Gemini embedding requires sending dimension to server
+###     EMBEDDING_BINDING=gemini
+###     EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL=gemini-embedding-001
+###     EMBEDDING_DIM=1536
+###     EMBEDDING_TOKEN_LIMIT=2048
+###     EMBEDDING_SEND_DIM=true
+
+### Bedrock Embedding Specific Parameters:
+###     EMBEDDING_BINDING=bedrock
+###     EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+###     EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+###     EMBEDDING_DIM=1024
+### Share the same region and authentication settings as LLMs, no reconfiguration here
+###     AWS_REGION=us-west-1
+###     AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
+###     AWS_ACCESS_KEY_ID=your_aws_access_key_id
+###     AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+###     AWS_SESSION_TOKEN=your_optional_aws_session_token
+
+### Jina AI Embedding Specific Parameters:
+###     EMBEDDING_BINDING=jina
+###     EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
+###     EMBEDDING_MODEL=jina-embeddings-v4
+###     EMBEDDING_DIM=2048
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+
+####################################################################
+### WORKSPACE sets workspace name for all storage types
+### for the purpose of isolating data from LightRAG instances.
+### Valid workspace name constraints: a-z, A-Z, 0-9, and _
+####################################################################
+# WORKSPACE=
+
+############################
+### Data storage selection
+############################
+### Default storage: JSON/Nano/NetworkX (Recommended for test deployment)
+LIGHTRAG_KV_STORAGE=PGKVStorage
+# LIGHTRAG_KV_STORAGE=JsonKVStorage
+LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+LIGHTRAG_GRAPH_STORAGE=Neo4JStorage
+# LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+LIGHTRAG_VECTOR_STORAGE=MilvusVectorDBStorage
+# LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+
+### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns
+LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker
+LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable
+# LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker
+LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker
+
+### PostgreSQL Configuration
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+POSTGRES_USER=rag
+# POSTGRES_USER=your_username
+POSTGRES_PASSWORD=rag
+# POSTGRES_PASSWORD='your_password'
+POSTGRES_DATABASE=rag
+POSTGRES_MAX_CONNECTIONS=12
+# POSTGRES_MAX_CONNECTIONS=25
+### DB specific workspace should not be set, keep for compatible only
+# POSTGRES_WORKSPACE=forced_workspace_name
+
+### Use HNSW_HALFVEC for large embeddings (2000+ dim).
+### Requires pgvector extension >= 0.7.0.
+### Vector storage type: HNSW, HNSW_HALFVEC, IVFFlat, VCHORDRQ
+POSTGRES_VECTOR_INDEX_TYPE=HNSW
+POSTGRES_HNSW_M=16
+POSTGRES_HNSW_EF=200
+POSTGRES_IVFFLAT_LISTS=100
+POSTGRES_VCHORDRQ_BUILD_OPTIONS=
+POSTGRES_VCHORDRQ_PROBES=
+POSTGRES_VCHORDRQ_EPSILON=1.9
+
+### PostgreSQL Connection Retry Configuration (Network Robustness)
+### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time
+### These defaults provide out-of-the-box support for PostgreSQL High Availability setups
+###
+### Number of retry attempts (1-100, default: 10)
+###   - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios)
+###   - For extreme cases: increase up to 20-50
+### Initial retry backoff in seconds (0.1-300.0, default: 3.0)
+###   - Default 3.0s provides reasonable initial delay for switchover detection
+###   - For faster recovery: decrease to 1.0-2.0
+### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0)
+###   - Default 30.0s matches typical switchover completion time
+###   - For longer switchovers: increase to 60-90
+### Connection pool close timeout in seconds (1.0-30.0, default: 5.0)
+# POSTGRES_CONNECTION_RETRIES=10
+# POSTGRES_CONNECTION_RETRY_BACKOFF=3.0
+# POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0
+# POSTGRES_POOL_CLOSE_TIMEOUT=5.0
+
+### PostgreSQL SSL Configuration (Optional)
+# POSTGRES_SSL_MODE=require
+# POSTGRES_SSL_CERT=/path/to/client-cert.pem
+# POSTGRES_SSL_KEY=/path/to/client-key.pem
+# POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
+# POSTGRES_SSL_CRL=/path/to/crl.pem
+
+### PostgreSQL Server Settings (for Supabase Supavisor)
+# Use this to pass extra options to the PostgreSQL connection string.
+# For Supabase, you might need to set it like this:
+# POSTGRES_SERVER_SETTINGS='options=reference%3D[project-ref]'
+
+# Default is 100 set to 0 to disable
+# POSTGRES_STATEMENT_CACHE_SIZE=100
+
+### Neo4j Configuration
+NEO4J_URI=neo4j://localhost:7687
+# NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
+NEO4J_USERNAME=neo4j
+NEO4J_PASSWORD=Daniel2026
+# NEO4J_PASSWORD='your_password'
+NEO4J_DATABASE=neo4j
+NEO4J_MAX_CONNECTION_POOL_SIZE=100
+NEO4J_CONNECTION_TIMEOUT=30
+NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
+NEO4J_MAX_TRANSACTION_RETRY_TIME=30
+NEO4J_MAX_CONNECTION_LIFETIME=300
+NEO4J_LIVENESS_CHECK_TIMEOUT=30
+NEO4J_KEEP_ALIVE=true
+### DB specific workspace should not be set, keep for compatible only
+# NEO4J_WORKSPACE=forced_workspace_name
+
+### MongoDB Configuration
+# For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with
+# Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local.
+MONGO_URI=mongodb://root:root@localhost:27017/
+# MONGO_URI=mongodb://localhost:27017/
+MONGO_DATABASE=LightRAG
+### DB specific workspace should not be set, keep for compatible only
+# MONGODB_WORKSPACE=forced_workspace_name
+
+# Community/local Docker MongoDB example for KV, graph, or doc-status storage only:
+# MONGO_URI=mongodb://localhost:27017/
+
+### OpenSearch Configuration
+### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus
+### Connection settings (comma-separated host:port entries; do not include http:// or https://)
+### This setup wizard supports authenticated OpenSearch clusters only.
+### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS.
+OPENSEARCH_HOSTS=localhost:9200
+OPENSEARCH_USER=admin
+OPENSEARCH_PASSWORD=LightRAG2026_!@
+OPENSEARCH_USE_SSL=true
+OPENSEARCH_VERIFY_CERTS=false
+# OPENSEARCH_TIMEOUT=30
+# OPENSEARCH_MAX_RETRIES=3
+### Index Settings (for 3-AZ Amazon OpenSearch Service, set replicas to 2)
+# OPENSEARCH_NUMBER_OF_SHARDS=1
+# OPENSEARCH_NUMBER_OF_REPLICAS=0
+### k-NN Settings for Vector Storage (HNSW algorithm)
+# OPENSEARCH_KNN_EF_CONSTRUCTION=200
+# OPENSEARCH_KNN_M=16
+# OPENSEARCH_KNN_EF_SEARCH=100
+### PPL graphlookup for server-side graph traversal (auto-detected if not set)
+# OPENSEARCH_USE_PPL_GRAPHLOOKUP=true
+### DB specific workspace should not be set, keep for compatible only
+# OPENSEARCH_WORKSPACE=forced_workspace_name
+
+### Milvus Configuration
+MILVUS_URI=http://localhost:19530
+MILVUS_DB_NAME=lightrag
+MILVUS_DEVICE=cuda
+# MILVUS_USER=root
+# MILVUS_PASSWORD=your_password
+# MILVUS_TOKEN=your_token
+# Required for the bundled Docker Milvus stack; may come from .env or exported shell variables.
+MINIO_ACCESS_KEY_ID=minioadmin
+MINIO_SECRET_ACCESS_KEY=minioadmin
+### DB specific workspace should not be set, keep for compatible only
+# MILVUS_WORKSPACE=forced_workspace_name
+
+### Milvus Vector Index Configuration
+### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN
+# MILVUS_INDEX_TYPE=AUTOINDEX
+
+### Metric type: COSINE (default), L2, IP
+# MILVUS_METRIC_TYPE=COSINE
+
+### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults)
+### M: Maximum number of connections per node [2-2048], default 16
+# MILVUS_HNSW_M=16
+### efConstruction: Size of dynamic candidate list during build [8-512], default 360
+# MILVUS_HNSW_EF_CONSTRUCTION=360
+### ef: Size of dynamic candidate list during search, default 200
+# MILVUS_HNSW_EF=200
+
+### HNSW_SQ Specific Parameters (requires Milvus 2.6.8+)
+### sq_type: Scalar quantization type - SQ4U, SQ6, SQ8 (default), BF16, FP16
+# MILVUS_HNSW_SQ_TYPE=SQ8
+### refine: Enable refinement step for higher precision, default false
+# MILVUS_HNSW_SQ_REFINE=false
+### refine_type: Refinement precision (must be higher than sq_type) - SQ6, SQ8, BF16, FP16, FP32
+# MILVUS_HNSW_SQ_REFINE_TYPE=FP32
+### refine_k: Refinement expansion factor, default 10
+# MILVUS_HNSW_SQ_REFINE_K=10
+
+### IVF_FLAT / IVF_SQ8 Parameters
+### nlist: Number of cluster units [1-65536], recommended sqrt(n) for n>1M, default 1024
+# MILVUS_IVF_NLIST=1024
+### nprobe: Number of units to query [1-nlist], default 16
+# MILVUS_IVF_NPROBE=16
+
+### Qdrant
+QDRANT_URL=http://localhost:6333
+# QDRANT_DEVICE=cpu
+# QDRANT_API_KEY=your-api-key
+### Qdrant upsert batching (enabled by default)
+### Split large upserts by estimated JSON payload size and point count
+### Default 16MB keeps safe headroom below common 32MB gateway/request limits
+# QDRANT_UPSERT_MAX_PAYLOAD_BYTES=16777216
+# QDRANT_UPSERT_MAX_POINTS_PER_BATCH=128
+### DB specific workspace should not be set, keep for compatible only
+# QDRANT_WORKSPACE=forced_workspace_name
+
+### Redis
+REDIS_URI=redis://localhost:6379
+REDIS_SOCKET_TIMEOUT=30
+REDIS_CONNECT_TIMEOUT=10
+REDIS_MAX_CONNECTIONS=100
+REDIS_RETRY_ATTEMPTS=3
+### DB specific workspace should not be set, keep for compatible only
+# REDIS_WORKSPACE=forced_workspace_name
+
+### Memgraph Configuration
+MEMGRAPH_URI=bolt://localhost:7687
+MEMGRAPH_USERNAME=
+MEMGRAPH_PASSWORD=
+MEMGRAPH_DATABASE=memgraph
+### DB specific workspace should not be set, keep for compatible only
+# MEMGRAPH_WORKSPACE=forced_workspace_name
+
+###########################################################
+### Langfuse Observability Configuration
+### Only works with LLM provided by OpenAI compatible API
+### Install with: pip install lightrag-hku[observability]
+### Sign up at: https://cloud.langfuse.com or self-host
+###########################################################
+# LANGFUSE_SECRET_KEY=''
+# LANGFUSE_PUBLIC_KEY=''
+# LANGFUSE_HOST='https://cloud.langfuse.com'
+# LANGFUSE_ENABLE_TRACE=true
+
+############################
+### Evaluation Configuration
+############################
+### RAGAS evaluation models (used for RAG quality assessment)
+### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
+### Default uses OpenAI models for evaluation
+
+### LLM Configuration for Evaluation
+# EVAL_LLM_MODEL=gpt-4o-mini
+### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
+# EVAL_LLM_BINDING_API_KEY=your_api_key
+### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
+# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
+
+### Embedding Configuration for Evaluation
+# EVAL_EMBEDDING_MODEL=text-embedding-3-large
+### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
+# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
+### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
+# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+
+### Performance Tuning
+### Number of concurrent test case evaluations
+### Lower values reduce API rate limit issues but increase evaluation time
+# EVAL_MAX_CONCURRENT=2
+### TOP_K query parameter of LightRAG (default: 10)
+### Number of entities or relations retrieved from KG
+# EVAL_QUERY_TOP_K=10
+### LLM request retry and timeout settings for evaluation
+# EVAL_LLM_MAX_RETRIES=5
+# EVAL_LLM_TIMEOUT=180
+
+##########################################################################
+### ----- Preserved custom environment variables from previous .env  -----
+### ----- Comments in this session will persist across regenerations -----
+### (This must be the final session; ensure the preceding lines unchanged)
+##########################################################################
+### The "make env*" wizard will leave the following lines unchanged
+### You may add additional env vars or commnets here for your own purpose
+##########################################################################
+
+### Default Storage (Recommended for test deployment)
+# LIGHTRAG_KV_STORAGE=JsonKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+# LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+# LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+
+### Production Storage
+# LIGHTRAG_KV_STORAGE=RedisKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=RedisDocStatusStorage
+# LIGHTRAG_VECTOR_STORAGE=QdrantVectorDBStorage
+# LIGHTRAG_GRAPH_STORAGE=MemgraphStorage
+
+### Select OpenSearch for all storages
+# LIGHTRAG_KV_STORAGE=OpenSearchKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=OpenSearchDocStatusStorage
+# LIGHTRAG_GRAPH_STORAGE=OpenSearchGraphStorage
+# LIGHTRAG_VECTOR_STORAGE=OpenSearchVectorDBStorage
+
+### Select PostgreSQL for all storages
+# LIGHTRAG_KV_STORAGE=PGKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+# LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
+# LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+
+### Select MongoDB for all storage (Vector storage requires an Atlas-capable deployment)
+# LIGHTRAG_KV_STORAGE=MongoKVStorage
+# LIGHTRAG_DOC_STATUS_STORAGE=MongoDocStatusStorage
+# LIGHTRAG_GRAPH_STORAGE=MongoGraphStorage
+# LIGHTRAG_VECTOR_STORAGE=MongoVectorDBStorage
+
+### ----- Extra setting from previous .env -----
+# ENTITY_TYPES='["Person", "Creature", "Organization", "Location", "Event", "Concept", "Method", "Content", "Data", "Artifact", "NaturalObject"]'
+POSTGRES_ENABLE_VECTOR=true

+ 989 - 0
env.example

@@ -0,0 +1,989 @@
+### All configurable environment variable must show up in this sample file in active or comment out status
+### Setup tool `make env-*` uses this file to generate final .env file
+
+### Target environment of this env file: host/compose (compose is for Dokcer or Kubernetes)
+# LIGHTRAG_RUNTIME_TARGET=host
+
+###########################
+### Server Configuration
+###########################
+HOST=0.0.0.0
+PORT=9621
+WEBUI_TITLE='My Graph KB'
+WEBUI_DESCRIPTION='Simple and Fast Graph Based RAG System'
+# WORKERS=2
+### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
+# TIMEOUT=150
+# CORS_ORIGINS=http://localhost:3000,http://localhost:8080
+
+### Path Prefix Configuration (Optional)
+### Used to host multiple LightRAG instances on one host behind a reverse
+### proxy that routes by site prefix. Leave unset (or empty) for a
+### single-instance deployment.
+###
+### - LIGHTRAG_API_PREFIX  : reverse-proxy prefix the upstream proxy strips
+###                          before forwarding (passed to FastAPI as root_path).
+###
+### See docs/MultiSiteDeployment.md for end-to-end examples.
+# LIGHTRAG_API_PREFIX=/site01
+
+### Optional SSL Configuration
+### Docker note: generated compose files mount staged certs at /app/data/certs/ inside the container
+# SSL=true
+# SSL_CERTFILE=/path/to/cert.pem
+# SSL_KEYFILE=/path/to/key.pem
+
+### Directory Configuration (defaults to current working directory)
+### Default value is: ./inputs ./rag_storage
+# INPUT_DIR=<absolute_path_for_doc_input_dir>
+# WORKING_DIR=<absolute_path_for_working_dir>
+
+### Tiktoken cache directory (Store cached files in this folder for offline deployment)
+# TIKTOKEN_CACHE_DIR=/app/data/tiktoken
+
+### Ollama Emulating Model and Tag
+# OLLAMA_EMULATING_MODEL_NAME=lightrag
+OLLAMA_EMULATING_MODEL_TAG=latest
+
+### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
+# MAX_GRAPH_NODES=1000
+
+### Logging level
+# LOG_LEVEL=INFO
+# VERBOSE=False
+# LOG_MAX_BYTES=10485760
+# LOG_BACKUP_COUNT=5
+### Logfile location (defaults to current working directory)
+# LOG_DIR=/path/to/log/directory
+# LIGHTRAG_PERFORMANCE_TIMING_LOGS=false
+
+#####################################
+### Login and API-Key Configuration
+#####################################
+# AUTH_ACCOUNTS='admin:admin123,user1:{bcrypt}$2b$12$S8Yu.gCbuAbNTJFB.231gegTwr5pgrFxc8H9kXQ4/sduFBHkhM8Ka'
+# TOKEN_SECRET=lightrag-jwt-default-secret-key!
+# JWT_ALGORITHM=HS256
+# TOKEN_EXPIRE_HOURS=48
+# GUEST_TOKEN_EXPIRE_HOURS=24
+
+### Token Auto-Renewal Configuration (Sliding Window Expiration)
+### Enable automatic token renewal to prevent active users from being logged out
+### When enabled, tokens will be automatically renewed when remaining time < threshold
+# TOKEN_AUTO_RENEW=true
+### Token renewal threshold (0.0 - 1.0)
+### Renew token when remaining time < (total time * threshold)
+### Default: 0.5 (renew when 50% time remaining)
+### Examples:
+###   0.5 = renew when 24h token has 12h left
+###   0.25 = renew when 24h token has 6h left
+# TOKEN_RENEW_THRESHOLD=0.5
+### Note: Token renewal is automatically skipped for certain endpoints:
+###   - /health: Health check endpoint (no authentication required)
+###   - /documents/paginated: Frequently polled by client (5-30s interval)
+###   - /documents/pipeline_status: Very frequently polled by client (2s interval)
+###   - Rate limit: Minimum 60 seconds between renewals for same user
+
+### API-Key to access LightRAG Server API
+### Use this key in HTTP requests with the 'X-API-Key' header
+### Example: curl -H "X-API-Key: your-secure-api-key-here" http://localhost:9621/query
+# LIGHTRAG_API_KEY=your-secure-api-key-here
+# WHITELIST_PATHS=/health,/api/*
+
+######################################################################################
+### Query Configuration
+###
+### How to control the context length sent to LLM:
+###    MAX_ENTITY_TOKENS + MAX_RELATION_TOKENS < MAX_TOTAL_TOKENS
+###    Chunk_Tokens = MAX_TOTAL_TOKENS - Actual_Entity_Tokens - Actual_Relation_Tokens
+######################################################################################
+# LLM response cache for query (Not valid for streaming response)
+# ENABLE_LLM_CACHE=true
+# COSINE_THRESHOLD=0.2
+### Number of entities or relations retrieved from KG
+# TOP_K=40
+### Maximum number or chunks for naive vector search
+# CHUNK_TOP_K=20
+### control the actual entities send to LLM
+# MAX_ENTITY_TOKENS=6000
+### control the actual relations send to LLM
+# MAX_RELATION_TOKENS=8000
+### control the maximum tokens send to LLM (include entities, relations and chunks)
+# MAX_TOTAL_TOKENS=30000
+
+### chunk selection strategies
+###     VECTOR: Pick KG chunks by vector similarity, delivered chunks to the LLM aligning more closely with naive retrieval
+###     WEIGHT: Pick KG chunks by entity and chunk weight, delivered more solely KG related chunks to the LLM
+###     If reranking is enabled, the impact of chunk selection strategies will be diminished.
+# KG_CHUNK_PICK_METHOD=VECTOR
+
+### maximum number of related chunks per source entity or relation
+###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
+###     Higher values increase re-ranking time
+# RELATED_CHUNK_NUMBER=5
+
+#########################################################
+### Reranking configuration
+### RERANK_BINDING type: null, cohere, jina, aliyun
+### For rerank model deployed by vLLM use cohere binding
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in RERANK_BINDING_HOST
+#########################################################
+RERANK_BINDING=null
+# RERANK_MODEL=BAAI/bge-reranker-v2-m3
+# RERANK_BINDING_HOST=http://localhost:8000/rerank
+# RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### rerank score chunk filter(set to 0.0 to keep all chunks, 0.6 or above if LLM is not strong enough)
+# MIN_RERANK_SCORE=0.0
+### Enable rerank by default in query params when RERANK_BINDING is not null
+# RERANK_BY_DEFAULT=True
+
+### Rerank concurrency and timeout (independent from base LLM settings)
+### MAX_ASYNC_RERANK falls back to MAX_ASYNC when unset.
+### RERANK_TIMEOUT has its own default (30s) since reranker calls are
+### typically much shorter than full LLM generation.
+# MAX_ASYNC_RERANK=4
+# RERANK_TIMEOUT=30
+
+### Cohere AI
+# # RERANK_MODEL=rerank-v3.5
+# # RERANK_BINDING_HOST=https://api.cohere.com/v2/rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+### Cohere rerank chunking configuration (useful for models with token limits like ColBERT)
+# RERANK_ENABLE_CHUNKING=true
+# RERANK_MAX_TOKENS_PER_DOC=480
+
+### Aliyun Dashscope
+# # RERANK_MODEL=gte-rerank-v2
+# # RERANK_BINDING_HOST=https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### Jina AI
+# # RERANK_MODEL=jina-reranker-v2-base-multilingual
+# # RERANK_BINDING_HOST=https://api.jina.ai/v1/rerank
+# # RERANK_BINDING_API_KEY=your_rerank_api_key_here
+
+### For local deployment Embedding and Reranker with vLLM (OpenAI-compatible API)
+### Wizard metadata used to preserve the chosen deployment provider across setup reruns
+# LIGHTRAG_SETUP_EMBEDDING_PROVIDER=vllm
+# LIGHTRAG_SETUP_RERANK_PROVIDER=vllm
+# VLLM_EMBED_MODEL=BAAI/bge-m3
+# VLLM_EMBED_PORT=8001
+# VLLM_EMBED_DEVICE=cpu
+### VLLM_EMBED_API_KEY is passed as --api-key to vLLM; synced to EMBEDDING_BINDING_API_KEY; auto-generated if blank
+# VLLM_EMBED_API_KEY=
+# VLLM_EMBED_EXTRA_ARGS=
+# VLLM_RERANK_MODEL=BAAI/bge-reranker-v2-m3
+# VLLM_RERANK_PORT=8000
+# VLLM_RERANK_DEVICE=cuda
+### VLLM_RERANK_API_KEY is passed as --api-key to vLLM; synced to RERANK_BINDING_API_KEY; auto-generated if blank
+# VLLM_RERANK_API_KEY=
+### Use float16 for GPU mode. CPU mode uses the official vLLM CPU image.
+# VLLM_USE_CPU=1
+### Set to 1 for CPU mode, unset for GPU mode
+# CUDA_VISIBLE_DEVICES=-1
+### Set to -1 to disable CUDA (CPU mode), or specific GPU IDs for GPU mode
+# NVIDIA_VISIBLE_DEVICES=0
+### Optional Docker runtime equivalent; generated GPU compose honors either variable.
+# VLLM_RERANK_EXTRA_ARGS=
+
+########################################
+### Document processing configuration
+########################################
+### Document processing output language: English, Chinese, French, German ...
+SUMMARY_LANGUAGE=English
+
+### Enable JSON-structured output for entity extraction
+### Default behavior: JSON output is disabled when ENTITY_EXTRACTION_USE_JSON is unset
+### JSON output incurs higher latency but delivers improved reliability
+ENTITY_EXTRACTION_USE_JSON=true
+
+### Optional external YAML profile for entity type guidance and extraction examples
+### Profiles are loaded from PROMPT_DIR/entity_type (PROMPT_DIR defaults to ./prompts).
+### A reference template is shipped at prompts/samples/entity_type_prompt.sample.yml;
+# ENTITY_TYPE_PROMPT_FILE=entity_type_prompt.yml
+# PROMPT_DIR=<absolute_path_for_prompt_dir>
+
+### Multimodal parsing/analyze integration
+### Optional parser routing rules. Example for VLM & MinerU enabled configuration:
+###     LIGHTRAG_PARSER=*:native-iteP,*:mineru-iteP,*:legacy-R
+### Rules may be separated with commas or semicolons. Rules match file suffixes
+### (pdf), not full names (*.pdf), and are checked left-to-right.
+### If mineru/docling appears in LIGHTRAG_PARSER, the corresponding endpoint
+### below must be configured before server startup.
+### See docs/FileProcessingPipeline.md for detail
+LIGHTRAG_PARSER=*:native-teP,*:legacy-R
+
+### Async parser service protocol (optional)
+### Configure these when using remote MinerU/Docling async services
+
+### ---- MinerU shared parameters (both local and official modes) ----
+### MinerU API protocol. Choose one active mode.
+### - official: MinerU precision API v4. Requires MINERU_API_TOKEN.
+### - local: self-hosted mineru-api / mineru-router base URL.
+MINERU_API_MODE=local
+# MINERU_POLL_INTERVAL_SECONDS=2
+# MINERU_MAX_POLLS=180
+# MINERU_LANGUAGE=ch
+# MINERU_ENABLE_TABLE=true
+# MINERU_ENABLE_FORMULA=true
+# MINERU_PAGE_RANGES=
+### MINERU_PAGE_RANGES semantics differ by mode:
+### - official: forwarded verbatim, supports e.g. "1-3,5,7-9".
+### - local:    only a single page ("3") or simple range ("1-10"); comma
+###             lists are rejected at startup.
+### When switching modes, double-check this constraint.
+
+### ---- MinerU local-only (MINERU_API_MODE=local) ----
+MINERU_LOCAL_ENDPOINT=http://127.0.0.1:8000
+### MINERU_LOCAL_BACKEND: which mineru-api backend handles the parse.
+###   Accepted values (per mineru-api POST /tasks form parameter `backend`):
+###     hybrid-auto-engine - pipeline + VLM combo with auto-selected local
+###                          engine (mineru-api's default). GPU required.
+###     pipeline           - CPU-friendly traditional pipeline; no VLM step.
+###     vlm-auto-engine    - VLM with auto-selected local inference engine
+###                          (sglang-engine / vllm-engine if GPU is available);
+###                          requires the matching engine extra preinstalled
+###                          on the mineru-api side, plus model weights.
+###   We ship `hybrid-auto-engine` -- requires the target mineru-api
+###   deployment to have a GPU plus the matching inference engine
+###   (sglang / vllm) and model weights installed. Switch to `pipeline`
+###   for CPU-only deployments without those dependencies.
+MINERU_LOCAL_BACKEND=hybrid-auto-engine
+### MINERU_LOCAL_PARSE_METHOD: parsing strategy for the pipeline component.
+###   Accepted values:
+###     auto - auto-detect embedded text-layer vs OCR per page (default).
+###     txt  - extract text from the embedded text layer only; fastest,
+###            but yields empty output on scanned PDFs without a text layer.
+###     ocr  - force OCR on every page regardless of text-layer quality;
+###            slowest, reliable on scanned or low-quality PDFs.
+###   Only consumed when MINERU_LOCAL_BACKEND is `pipeline` or
+###   `hybrid-auto-engine` (the pipeline arm of the hybrid pipeline).
+###   Pure VLM backends (`vlm-auto-engine`, `vlm-http-client`) ignore this
+###   parameter -- the VLM model handles layout/OCR natively.
+MINERU_LOCAL_PARSE_METHOD=auto
+### MINERU_LOCAL_IMAGE_ANALYSIS: enable VLM image/chart analysis pass for
+###   better caption an footnote recognition.
+###   Only consumed by `vlm-auto-engine`, `vlm-http-client`,
+###   `hybrid-auto-engine`, `hybrid-http-client`. The `pipeline` backend
+###   silently drops this flag -- its `_process_pipeline` does not accept
+###   the kwarg, so setting `false` under pipeline does NOT speed parsing
+###   up; pipeline never invokes the VLM image pass to begin with.
+###   Disable (`false`) on VLM / hybrid backends to skip the extra VLM
+###   round, trading image / chart semantic descriptions for faster parsing
+###   and lower GPU cost.
+MINERU_LOCAL_IMAGE_ANALYSIS=true
+# MINERU_LOCAL_START_PAGE_ID=0
+# MINERU_LOCAL_END_PAGE_ID=99999
+
+### ---- MinerU official-only (MINERU_API_MODE=official) ----
+# MINERU_API_TOKEN=your-api-key
+# MINERU_OFFICIAL_ENDPOINT=https://mineru.net
+# MINERU_MODEL_VERSION=vlm
+# MINERU_IS_OCR=false
+
+### Force re-upload of file to MinerU on every retry after failure
+### Disables caching of result outcomes
+# LIGHTRAG_FORCE_REPARSE_MINERU=false
+
+### Docling parser (docling-serve v1 / async API).
+###
+### Endpoint: base URL only — the client appends /v1/convert/file/async,
+###     /v1/status/poll/{task_id}?wait=<DOCLING_POLL_INTERVAL_SECONDS>,
+###     /v1/result/{task_id} itself.
+### Pipeline shape (pipeline=standard, target_type=zip,
+###     to_formats=[json,md], image_export_mode=referenced) is fixed in
+###     code so the sidecar flow stays self-consistent — flipping any of
+###     these would break the adapter and is therefore not exposed as env.
+###
+### OCR tunables:
+### - DOCLING_DO_OCR: master switch; when false the engine relies only on
+###     text-layer extraction.
+### - DOCLING_FORCE_OCR: when true, OCR every page regardless of text-layer
+###     quality (slower, useful for scanned PDFs with bad text layers).
+### - DOCLING_OCR_ENGINE: explicit engine selection (DEPRECATED in the
+###     docling-serve OpenAPI but still honored for older deployments).
+### - DOCLING_OCR_PRESET: recommended replacement for DOCLING_OCR_ENGINE.
+### - DOCLING_OCR_LANG: JSON array (e.g. ["en","zh"]) or comma-separated
+###     list. Empty (default) lets the OCR engine pick its default.
+### - DOCLING_DO_FORMULA_ENRICHMENT: when true, the code-formula model runs
+###     and `texts[*].label="formula"` items carry LaTeX in `text`. Default
+###     false because the model may not be present on every deployment;
+###     adapter falls back to plain-text formulas when disabled.
+###
+### Polling budget (server-side long-poll; client does NOT add extra sleep):
+### - DOCLING_POLL_INTERVAL_SECONDS: ``?wait=N`` value sent to
+###     /v1/status/poll/{task_id}. Larger N = fewer round trips per parse;
+###     bound by your reverse-proxy idle timeout. Default 5.
+### - DOCLING_MAX_POLLS: max polling rounds before raising TimeoutError.
+###     Worst-case wall-clock budget ≈
+###     DOCLING_POLL_INTERVAL_SECONDS × DOCLING_MAX_POLLS. Default 240
+###     (≈ 20 minutes at wait=5s); raise for very large PDFs.
+###
+### Bundle cache controls:
+### - DOCLING_ENGINE_VERSION: recorded in <base>.docling_raw/_manifest.json.
+###     Mismatch with the recorded value forces a cache miss → re-download.
+###     Leave empty to skip this check.
+### - LIGHTRAG_FORCE_REPARSE_DOCLING: when truthy ("1"/"true"), bypass the
+###     docling raw cache and re-upload on every parse_docling call.
+### - DOCLING_BBOX_ATTRIBUTES: override the doc-level bbox_attributes
+###     written into <base>.blocks.jsonl meta. Default
+###     {"origin":"LEFTBOTTOM"} matches docling's default coordinate system.
+DOCLING_ENDPOINT=http://localhost:5001
+DOCLING_DO_OCR=true
+DOCLING_FORCE_OCR=true
+DOCLING_DO_FORMULA_ENRICHMENT=false
+# DOCLING_OCR_ENGINE=auto
+# DOCLING_OCR_PRESET=auto
+# DOCLING_OCR_LANG=
+# DOCLING_POLL_INTERVAL_SECONDS=5
+# DOCLING_MAX_POLLS=240
+# DOCLING_BBOX_ATTRIBUTES={"origin":"LEFTBOTTOM"}
+### Force re-upload of file to Docling on every retry after failure
+### Disables caching of result outcomes
+# LIGHTRAG_FORCE_REPARSE_DOCLING=false
+
+### File upload size limit (in bytes)
+### Default: 104857600 (100MB)
+### Set to 0 or None for unlimited upload size
+### Examples:
+###   52428800  = 50MB
+###   104857600 = 100MB (default)
+###   209715200 = 200MB
+### Note: If using Nginx as reverse proxy, also configure client_max_body_size
+# MAX_UPLOAD_SIZE=104857600
+
+### Global chunk size, 500~1500 is recommended.
+### Chunker inherits the global value here only when its own var is unset.
+### Exception: P never inherits CHUNK_SIZE — it uses CHUNK_P_SIZE (default 2000).
+# CHUNK_SIZE=1200
+# CHUNK_OVERLAP_SIZE=100
+
+### Fixed-token chunker (process_options=F, default) settings
+###     CHUNK_F_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
+###     CHUNK_F_OVERLAP_SIZE: token overlap; falls back to CHUNK_OVERLAP_SIZE when unset
+###     CHUNK_F_SPLIT_BY_CHARACTER: optional separator string; pre-segment before token windowing
+###     CHUNK_F_SPLIT_BY_CHARACTER_ONLY: when true, raise on oversize segment instead of token re-split
+# CHUNK_F_SIZE=1200
+# CHUNK_F_OVERLAP_SIZE=100
+# CHUNK_F_SPLIT_BY_CHARACTER=
+# CHUNK_F_SPLIT_BY_CHARACTER_ONLY=false
+
+### Recursive character chunker (process_options=R) settings
+###     CHUNK_R_SIZE: per-strategy chunk_token_size override; falls back to CHUNK_SIZE when unset
+###     CHUNK_R_OVERLAP_SIZE: token overlap between adjacent chunks; falls back to CHUNK_OVERLAP_SIZE when unset
+###     CHUNK_R_SEPARATORS: JSON array of cascaded separators tried by RecursiveCharacterTextSplitter.
+###       Default includes CJK sentence-ending punctuation so Chinese / mixed-language
+###       documents split at semantic boundaries.  Order: paragraph (\n\n) > line (\n) >
+###       Chinese sentence-end (。!?) > Chinese semi-clause (;,) > space > char.
+###       English ".?!" are intentionally omitted (literal match would split "0.95" /
+###       "e.g."); the English path falls through space / char as before.
+# CHUNK_R_SIZE=1200
+# CHUNK_R_OVERLAP_SIZE=100
+# CHUNK_R_SEPARATORS=["\n\n","\n","。","!","?",";",","," ",""]
+
+### Semantic vector chunker (process_options=V) settings
+###     CHUNK_V_SIZE: per-strategy chunk_token_size hard cap (oversized pieces are
+###       re-split via R before being emitted); falls back to CHUNK_SIZE when unset
+###     CHUNK_V_BREAKPOINT_THRESHOLD_TYPE: percentile | standard_deviation | interquartile | gradient
+###     CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT: leave empty to use the LangChain per-type default (e.g. 95 for percentile)
+###     CHUNK_V_BUFFER_SIZE: number of adjacent sentences combined when computing distances
+###     CHUNK_V_SENTENCE_SPLIT_REGEX: regex fed to LangChain SemanticChunker for the
+###       initial sentence split.  Default extends the upstream English-only pattern
+###       with CJK sentence-end punctuation (。?!).  Override if you need a
+###       different language mix.  Note: env value is the raw regex string, no JSON
+###       quoting.
+# CHUNK_V_SIZE=1200
+# CHUNK_V_BREAKPOINT_THRESHOLD_TYPE=percentile
+# CHUNK_V_BREAKPOINT_THRESHOLD_AMOUNT=
+# CHUNK_V_BUFFER_SIZE=1
+# CHUNK_V_SENTENCE_SPLIT_REGEX=(?<=[.?!])\s+|(?<=[。?!])
+
+### Paragraph semantic chunker (process_options=P) settings
+###     CHUNK_P_SIZE: per-strategy chunk_token_size override; defaults to 2000 when unset
+###       (does NOT fall back to CHUNK_SIZE — paragraph-semantic merging needs more
+###       headroom than the global default to keep related paragraphs together).
+###     CHUNK_P_OVERLAP_SIZE: overlap for prose fallback and table-bridge context;
+###                           falls back to CHUNK_OVERLAP_SIZE when unset
+# CHUNK_P_SIZE=2000
+# CHUNK_P_OVERLAP_SIZE=100
+
+### Number of summary segments or tokens to trigger LLM summary on entity/relation merge (at least 3 is recommended)
+# FORCE_LLM_SUMMARY_ON_MERGE=8
+### Max description token size to trigger LLM summary
+# SUMMARY_MAX_TOKENS = 1200
+### Recommended LLM summary output length in tokens
+# SUMMARY_LENGTH_RECOMMENDED=600
+### Maximum context size sent to LLM for description summary
+# SUMMARY_CONTEXT_SIZE=12000
+### Maximum token size allowed for entity extraction input context
+# MAX_EXTRACT_INPUT_TOKENS=20480
+
+### Multimodal surrounding-context budget (per-half token cap for the
+### `leading` / `trailing` text injected into VLM and extract prompts).
+### Computed at analyze_multimodal entry; the two halves are independent
+### so deployments can bias context forward or backward as needed.
+# SURROUNDING_LEADING_MAX_TOKENS=2000
+# SURROUNDING_TRAILING_MAX_TOKENS=2000
+
+### Per-response cap on total entity+relationship rows/records emitted by the LLM
+# MAX_EXTRACTION_RECORDS=100
+### Per-response cap on entity rows/objects emitted by the LLM
+# MAX_EXTRACTION_ENTITIES=40
+
+### control the maximum chunk_ids stored in vector and graph db
+# MAX_SOURCE_IDS_PER_ENTITY=300
+# MAX_SOURCE_IDS_PER_RELATION=300
+### control chunk_ids limitation method: FIFO, KEEP
+###    FIFO: First in first out
+###    KEEP: Keep oldest (less merge action and faster)
+# SOURCE_IDS_LIMIT_METHOD=FIFO
+
+### Maximum number of file paths stored in entity/relation file_path field
+### For displayed only, does not affect query performance
+# MAX_FILE_PATHS=100
+
+### PDF decryption password for protected PDF files
+# PDF_DECRYPT_PASSWORD=your_pdf_password_here
+
+### LLM cache for entity/relation extract is enable by default
+### Disabling it will prevent graph reconstruction after document deletion
+# ENABLE_LLM_CACHE_FOR_EXTRACT=true
+
+########################################
+### Pipeline Concurrency Configuration
+########################################
+### Number of parallel processing documents(between 2~10, MAX_ASYNC/3 is recommended)
+MAX_PARALLEL_INSERT=2
+### Optional per-stage document pipeline concurrency
+# MAX_PARALLEL_PARSE_NATIVE=5
+# MAX_PARALLEL_PARSE_MINERU=1
+# MAX_PARALLEL_PARSE_DOCLING=1
+# MAX_PARALLEL_ANALYZE=5
+### Optional queue sizes for staged pipeline workers
+# QUEUE_SIZE_DEFAULT=100
+# QUEUE_SIZE_INSERT=4
+### Max concurrency requests for Embedding
+# EMBEDDING_FUNC_MAX_ASYNC=8
+### Num of chunks send to Embedding in single request (default is 10)
+EMBEDDING_BATCH_NUM=32
+
+###########################################################################
+### Gloabal LLM Configuration
+###   LLM_BINDING type: openai, ollama, lollms, azure_openai, bedrock, gemini
+###   LLM_BINDING_HOST: Service endpoint (left empty if using the provider SDK default endpoint)
+###   LLM_BINDING_API_KEY: api key
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in LLM_BINDING_HOST
+###########################################################################
+### LLM request timeout setting for all llm (0 means no timeout for Ollma)
+# LLM_TIMEOUT=180
+
+LLM_BINDING=openai
+LLM_BINDING_HOST=https://api.openai.com/v1
+LLM_BINDING_API_KEY=your_api_key
+LLM_MODEL=gpt-5.4-mini
+
+### Max concurrency requests of LLM
+MAX_ASYNC=4
+
+###########################################################################
+### Role-specific LLM/VLM overrides
+### Available roles: EXTRACT, KEYWORD, QUERY, VLM
+### If unset, each role falls back to gloabal LLM configuration above.
+### For detail information, refer to:
+###   docs/RoleSpecificLLMConfiguration.md
+###   docs/RoleSpecificLLMConfiguration-zh.md
+###########################################################################
+# KEYWORD_LLM_MODEL=gpt-5.4-nano
+# KEYWORD_MAX_ASYNC_LLM
+# KEYWORD_LLM_TIMEOUT=180
+# KEYWORD_LLM_BINDING=openai
+# KEYWORD_LLM_BINDING_HOST=https://api.openai.com/v1
+# KEYWORD_LLM_BINDING_API_KEY=your_api_key
+
+# QUERY_LLM_MODEL=gpt-5.4
+# QUERY_MAX_ASYNC_LLM
+# QUERY_LLM_TIMEOUT=180
+# QUERY_LLM_BINDING=openai
+# QUERY_LLM_BINDING_HOST=https://api.openai.com/v1
+# QUERY_LLM_BINDING_API_KEY=your_api_key
+
+# VLM_LLM_MODEL=gpt-5.4-mini
+# VLM_MAX_ASYNC_LLM=4
+# VLM_LLM_TIMEOUT=180
+# VLM_LLM_BINDING=openai
+# VLM_LLM_BINDING_HOST=https://api.example.com/v1
+# VLM_LLM_BINDING_API_KEY=your_vlm_api_key
+
+### Master switch for VLM multimodal analysis (i/t/e items).
+### When false, multimodal item is skipped regardless of document process_options
+### When true, VLM_LLM_BINDING (or the base LLM_BINDING) must be vision-capable
+### lollms is rejected at startup
+VLM_PROCESS_ENABLE=false
+### Maximum image bytes sent to VLM (5242880=5MB)
+VLM_MAX_IMAGE_BYTES=5242880
+
+###########################################################################
+### Provider sepecific LLM options
+### Increasing the temperature setting may help mitigate infinite inference
+###   loops during entity/elation extraction, particularly when using
+###   models with more limited capabilities, such as Qwen3-30B
+### Set a max output token limit to prevent endless output from certain LLMs,
+###   which may trigger timeout errors during entity and relation extraction.
+###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
+###   i.e. max_output_token = 9000 = 180s * 50 tokens/s
+### Sample commands to list all supported options specific LLM_BINDING:
+###   lightrag-server --llm-binding openai  --help
+###   lightrag-server --llm-binding bedrock --help
+###   lightrag-server --llm-binding gemini  --help
+###########################################################################
+### OpenAI Specific Parameters (Openrouter of other OpenAI compatible API):
+###     LLM_BINDING=openai
+###     LLM_BINDING_HOST=https://openrouter.ai/api/v1
+###     LLM_MODEL=google/gemini-2.5-flash
+# OPENAI_LLM_TEMPERATURE=0.9
+### For vLLM/SGLang and most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
+# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+### For OpenAI reason control
+# OPENAI_LLM_REASONING_EFFORT=minimal
+### For OpenRouter reasoning control
+# OPENAI_LLM_EXTRA_BODY='{"reasoning": {"enabled": false}}'
+### For Qwen3 reasoning control deploy by vLLM
+# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+
+### Azure OpenAI Specific Parameters:
+###     LLM_BINDING=azure_openai
+###     LLM_BINDING_HOST=https://xxxx.openai.azure.com/
+###     LLM_BINDING_API_KEY=your_api_key
+###     LLM_MODEL=my-gpt-mini-deployment
+### You may use deployment name for LLM_MODEL or set AZURE_OPENAI_DEPLOYMENT instead
+# AZURE_OPENAI_DEPLOYMEN=my—deplyment-name
+# AZURE_OPENAI_API_VERSION=2024-08-01-preview
+
+### Google AI Studio Gemini Specific Parameters:
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+###     LLM_BINDING=gemini
+###     LLM_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
+###     LLM_BINDING_API_KEY=your_gemini_api_key
+###     LLM_MODEL=gemini-flash-latest
+# GEMINI_LLM_TEMPERATURE=0.7
+# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
+### Enable or disable thinking
+###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
+###     GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
+### Google Vertex AI Gemini Specific Parameters:
+### Vertex AI use GOOGLE_APPLICATION_CREDENTIALS instead of API-KEY for authentication
+# GOOGLE_GENAI_USE_VERTEXAI=true
+# GOOGLE_CLOUD_PROJECT='your-project-id'
+# GOOGLE_CLOUD_LOCATION='us-central1'
+# GOOGLE_APPLICATION_CREDENTIALS='/Users/xxxxx/your-service-account-credentials-file.json'
+
+### Bedrock Specific Parameters:
+###     LLM_BINDING=bedrock
+###     LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+###     LLM_MODEL=us.amazon.nova-lite-v1:0
+### Region is required for all three modes (Bedrock endpoints are regional).
+# AWS_REGION=us-west-1
+### Bedrock Authentication (choose ONE of the following three approaches):
+### Bedrock API key (bearer token). Bedrock ignores LLM_BINDING_API_KEY;
+### set AWS_BEARER_TOKEN_BEDROCK directly before startup. This is a
+### process-level AWS SDK setting and cannot be overridden per role.
+# AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
+### SigV4 credentials (classic IAM user / STS / instance profile).
+# AWS_ACCESS_KEY_ID=your_aws_access_key_id
+# AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+# AWS_SESSION_TOKEN=your_optional_aws_session_token
+### Ambient credentials (AWS SDK default credential chain).
+### To use this mode, leave AWS_BEARER_TOKEN_BEDROCK, AWS_ACCESS_KEY_ID,
+### AWS_SECRET_ACCESS_KEY, and AWS_SESSION_TOKEN above commented out — the
+### AWS SDK will then resolve credentials from ~/.aws/credentials, IAM role,
+### instance profile, SSO, or environment variables outside .env.
+### Activating any of the lines above forces that explicit mode and bypasses
+### the credential chain.
+# BEDROCK_LLM_TEMPERATURE=1.0
+# BEDROCK_LLM_MAX_TOKENS=9000
+# BEDROCK_LLM_TOP_P=1.0
+# BEDROCK_LLM_STOP_SEQUENCES='["</s>"]'
+### Bedrock model reasoning control
+# BEDROCK_LLM_EXTRA_FIELDS='{"reasoningConfig": {"type": "enabled", "maxReasoningEffort": "low"}}'
+
+### Ollama Specific Parameters:
+###     LLM_BINDING=ollama
+###     LLM_BINDING_HOST=http://localhost:11434
+###     LLM_MODEL=qwen3.5:9b
+### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
+OLLAMA_LLM_NUM_CTX=32768
+# OLLAMA_LLM_NUM_PREDICT=9000
+# OLLAMA_LLM_TEMPERATURE=0.85
+# OLLAMA_LLM_STOP='["</s>", "<|EOT|>"]'
+
+#######################################################################################
+### Embedding Configuration (Should not be changed after the first file processed)
+### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, bedrock
+### EMBEDDING_BINDING_HOST: Service endpoint (left empty if using default endpoint provided by openai or gemini SDK)
+### EMBEDDING_BINDING_API_KEY: api key
+### If LightRAG deployed in Docker:
+###    uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
+### Control whether to send embedding_dim parameter to embedding API
+###    For OpenAI: Set EMBEDDING_SEND_DIM=true to enable dynamic dimension adjustment
+###    For OpenAI: Set EMBEDDING_SEND_DIM=false (default) to disable sending dimension parameter
+###    For Gemini: Allways set EMBEDDING_SEND_DIM=true
+### Control whether to use base64 encoding format for embeddings (improves performance for OpenAI)
+###    For OpenAI: Set EMBEDDING_USE_BASE64=true (default) to use base64 encoding
+###    For Yandex Cloud and other providers that don't support it: Set EMBEDDING_USE_BASE64=false
+#######################################################################################
+# EMBEDDING_TIMEOUT=30
+
+### OpenAI compatible embedding
+EMBEDDING_BINDING=openai
+EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+EMBEDDING_BINDING_API_KEY=your_api_key
+EMBEDDING_MODEL=text-embedding-3-large
+EMBEDDING_DIM=3072
+EMBEDDING_TOKEN_LIMIT=8192
+EMBEDDING_SEND_DIM=false
+EMBEDDING_USE_BASE64=true
+
+### Optional: asymmetric embeddings (query/document behavior split)
+### Leave EMBEDDING_ASYMMETRIC unset or set false to keep symmetric behavior.
+### Set true only when the selected embedding backend supports asymmetric mode.
+# EMBEDDING_ASYMMETRIC=true
+### Provider-task bindings such as Jina/Gemini/VoyageAI use provider parameters
+### and should not configure the prefix variables below.
+### Prefix-based models such as BGE/E5/GTE require both prefix variables.
+### Wrap non-empty values with quotes if there are trailing spaces.
+# EMBEDDING_DOCUMENT_PREFIX="search_document: "
+### Use NO_PREFIX for a side that should intentionally have no prefix.
+###     EMBEDDING_DOCUMENT_PREFIX=NO_PREFIX
+# EMBEDDING_QUERY_PREFIX="search_query: "
+
+###########################################################################
+### Provider sepecific Embedding options
+### Increasing the temperature setting may help mitigate infinite inference
+###   loops during entity/elation extraction, particularly when using
+###   models with more limited capabilities, such as Qwen3-30B
+### Set a max output token limit to prevent endless output from certain LLMs,
+###   which may trigger timeout errors during entity and relation extraction.
+###        max_output_token < LLM_TIMEOUT * llm_tokens_per_second
+###   i.e. max_output_token = 9000 = 180s * 50 tokens/s
+### Sample commands to list all supported options specific EMBEDDING_BINDING:
+###   lightrag-server --embedding-binding openai --help
+###   lightrag-server --embedding-binding ollama --help
+###   lightrag-server --embedding-binding bedrock --help
+###########################################################################
+### Azure Embedding Specific Parameters:
+### Use deployment name as model name or set AZURE_EMBEDDING_DEPLOYMENT instead
+###     EMBEDDING_BINDING=azure_openai
+###     EMBEDDING_BINDING_HOST=https://xxxx.openai.azure.com/
+###     EMBEDDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL==my-text-embedding-3-large-deployment
+###     EMBEDDING_DIM=3072
+# AZURE_EMBEDDING_API_VERSION=2024-08-01-preview
+
+### Ollama Embedding Specific Parameters:
+###     EMBEDDING_BINDING=ollama
+###     EMBEDDING_BINDING_HOST=http://localhost:11434
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL=qwen3-embedding:4b
+###     EMBEDDING_DIM=2560
+### Ollama should set num_ctx option inaddition to EMBEDDING_TOKEN_LIMIT
+OLLAMA_EMBEDDING_NUM_CTX=8192
+
+### Gemini Embedding Specific Parameters:
+### DEFAULT_GEMINI_ENDPOINT means selecting endpoit by SDK automatically
+### Gemini embedding requires sending dimension to server
+###     EMBEDDING_BINDING=gemini
+###     EMBEDDING_BINDING_HOST=DEFAULT_GEMINI_ENDPOINT
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+###     EMBEDDING_MODEL=gemini-embedding-001
+###     EMBEDDING_DIM=1536
+###     EMBEDDING_TOKEN_LIMIT=2048
+###     EMBEDDING_SEND_DIM=true
+
+### Bedrock Embedding Specific Parameters:
+###     EMBEDDING_BINDING=bedrock
+###     EMBEDDING_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+###     EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+###     EMBEDDING_DIM=1024
+### Share the same region and authentication settings as LLMs, no reconfiguration here
+###     AWS_REGION=us-west-1
+###     AWS_BEARER_TOKEN_BEDROCK=your_bedrock_api_key
+###     AWS_ACCESS_KEY_ID=your_aws_access_key_id
+###     AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
+###     AWS_SESSION_TOKEN=your_optional_aws_session_token
+
+### Jina AI Embedding Specific Parameters:
+###     EMBEDDING_BINDING=jina
+###     EMBEDDING_BINDING_HOST=https://api.jina.ai/v1/embeddings
+###     EMBEDDING_MODEL=jina-embeddings-v4
+###     EMBEDDING_DIM=2048
+###     EMBEDDING_BINDING_API_KEY=your_api_key
+
+####################################################################
+### WORKSPACE sets workspace name for all storage types
+### for the purpose of isolating data from LightRAG instances.
+### Valid workspace name constraints: a-z, A-Z, 0-9, and _
+####################################################################
+# WORKSPACE=
+
+############################
+### Data storage selection
+############################
+### Default storage: JSON/Nano/NetworkX (Recommended for test deployment)
+LIGHTRAG_KV_STORAGE=JsonKVStorage
+LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+LIGHTRAG_GRAPH_STORAGE=NetworkXStorage
+LIGHTRAG_VECTOR_STORAGE=NanoVectorDBStorage
+
+### Wizard metadata used to preserve env-storage Docker deployment defaults across setup reruns
+# LIGHTRAG_SETUP_POSTGRES_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_NEO4J_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MONGODB_DEPLOYMENT=atlas-capable
+# LIGHTRAG_SETUP_REDIS_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MILVUS_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_QDRANT_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_MEMGRAPH_DEPLOYMENT=docker
+# LIGHTRAG_SETUP_OPENSEARCH_DEPLOYMENT=docker
+
+### PostgreSQL Configuration
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+POSTGRES_USER=your_username
+POSTGRES_PASSWORD='your_password'
+POSTGRES_DATABASE=rag
+POSTGRES_MAX_CONNECTIONS=25
+### DB specific workspace should not be set, keep for compatible only
+# POSTGRES_WORKSPACE=forced_workspace_name
+
+### Use HNSW_HALFVEC for large embeddings (2000+ dim).
+### Requires pgvector extension >= 0.7.0.
+### Vector storage type: HNSW, HNSW_HALFVEC, IVFFlat, VCHORDRQ
+POSTGRES_VECTOR_INDEX_TYPE=HNSW
+POSTGRES_HNSW_M=16
+POSTGRES_HNSW_EF=200
+POSTGRES_IVFFLAT_LISTS=100
+POSTGRES_VCHORDRQ_BUILD_OPTIONS=
+POSTGRES_VCHORDRQ_PROBES=
+POSTGRES_VCHORDRQ_EPSILON=1.9
+
+### PostgreSQL Connection Retry Configuration (Network Robustness)
+### NEW DEFAULTS (v1.4.10+): Optimized for HA deployments with ~30s switchover time
+### These defaults provide out-of-the-box support for PostgreSQL High Availability setups
+###
+### Number of retry attempts (1-100, default: 10)
+###   - Default 10 attempts allows ~225s total retry time (sufficient for most HA scenarios)
+###   - For extreme cases: increase up to 20-50
+### Initial retry backoff in seconds (0.1-300.0, default: 3.0)
+###   - Default 3.0s provides reasonable initial delay for switchover detection
+###   - For faster recovery: decrease to 1.0-2.0
+### Maximum retry backoff in seconds (must be >= backoff, max: 600.0, default: 30.0)
+###   - Default 30.0s matches typical switchover completion time
+###   - For longer switchovers: increase to 60-90
+### Connection pool close timeout in seconds (1.0-30.0, default: 5.0)
+# POSTGRES_CONNECTION_RETRIES=10
+# POSTGRES_CONNECTION_RETRY_BACKOFF=3.0
+# POSTGRES_CONNECTION_RETRY_BACKOFF_MAX=30.0
+# POSTGRES_POOL_CLOSE_TIMEOUT=5.0
+
+### PostgreSQL SSL Configuration (Optional)
+# POSTGRES_SSL_MODE=require
+# POSTGRES_SSL_CERT=/path/to/client-cert.pem
+# POSTGRES_SSL_KEY=/path/to/client-key.pem
+# POSTGRES_SSL_ROOT_CERT=/path/to/ca-cert.pem
+# POSTGRES_SSL_CRL=/path/to/crl.pem
+
+### PostgreSQL Server Settings (for Supabase Supavisor)
+# Use this to pass extra options to the PostgreSQL connection string.
+# For Supabase, you might need to set it like this:
+# POSTGRES_SERVER_SETTINGS='options=reference%3D[project-ref]'
+
+# Default is 100 set to 0 to disable
+# POSTGRES_STATEMENT_CACHE_SIZE=100
+
+### Neo4j Configuration
+NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io
+NEO4J_USERNAME=neo4j
+NEO4J_PASSWORD='your_password'
+NEO4J_DATABASE=neo4j
+NEO4J_MAX_CONNECTION_POOL_SIZE=100
+NEO4J_CONNECTION_TIMEOUT=30
+NEO4J_CONNECTION_ACQUISITION_TIMEOUT=30
+NEO4J_MAX_TRANSACTION_RETRY_TIME=30
+NEO4J_MAX_CONNECTION_LIFETIME=300
+NEO4J_LIVENESS_CHECK_TIMEOUT=30
+NEO4J_KEEP_ALIVE=true
+### DB specific workspace should not be set, keep for compatible only
+# NEO4J_WORKSPACE=forced_workspace_name
+
+### MongoDB Configuration
+# For MongoVectorDBStorage, MONGO_URI must point to a MongoDB endpoint with
+# Atlas Search / Vector Search support, such as MongoDB Atlas or Atlas local.
+MONGO_URI=mongodb://localhost:27017/
+MONGO_DATABASE=LightRAG
+### DB specific workspace should not be set, keep for compatible only
+# MONGODB_WORKSPACE=forced_workspace_name
+
+# Community/local Docker MongoDB example for KV, graph, or doc-status storage only:
+# MONGO_URI=mongodb://localhost:27017/
+
+### OpenSearch Configuration
+### OpenSearch can be used for all storage types: KV, Vector, Graph, DocStatus
+### Connection settings (comma-separated host:port entries; do not include http:// or https://)
+### This setup wizard supports authenticated OpenSearch clusters only.
+### OPENSEARCH_USE_SSL controls whether those hosts are reached over TLS.
+OPENSEARCH_HOSTS=localhost:9200
+OPENSEARCH_USER=admin
+OPENSEARCH_PASSWORD=LightRAG2026_!@
+OPENSEARCH_USE_SSL=true
+OPENSEARCH_VERIFY_CERTS=false
+# OPENSEARCH_TIMEOUT=30
+# OPENSEARCH_MAX_RETRIES=3
+### Index Settings (for 3-AZ Amazon OpenSearch Service, set replicas to 2)
+# OPENSEARCH_NUMBER_OF_SHARDS=1
+# OPENSEARCH_NUMBER_OF_REPLICAS=0
+### k-NN Settings for Vector Storage (HNSW algorithm)
+# OPENSEARCH_KNN_EF_CONSTRUCTION=200
+# OPENSEARCH_KNN_M=16
+# OPENSEARCH_KNN_EF_SEARCH=100
+### PPL graphlookup for server-side graph traversal (auto-detected if not set)
+# OPENSEARCH_USE_PPL_GRAPHLOOKUP=true
+### DB specific workspace should not be set, keep for compatible only
+# OPENSEARCH_WORKSPACE=forced_workspace_name
+
+### Milvus Configuration
+MILVUS_URI=http://localhost:19530
+MILVUS_DB_NAME=lightrag
+# MILVUS_DEVICE=cpu
+# MILVUS_USER=root
+# MILVUS_PASSWORD=your_password
+# MILVUS_TOKEN=your_token
+# Required for the bundled Docker Milvus stack; may come from .env or exported shell variables.
+# MINIO_ACCESS_KEY_ID=minioadmin
+# MINIO_SECRET_ACCESS_KEY=minioadmin
+### DB specific workspace should not be set, keep for compatible only
+# MILVUS_WORKSPACE=forced_workspace_name
+
+### Milvus Vector Index Configuration
+### Index type: AUTOINDEX (default), HNSW, HNSW_SQ, HNSW_PQ, IVF_FLAT, IVF_SQ8, DISKANN
+# MILVUS_INDEX_TYPE=AUTOINDEX
+
+### Metric type: COSINE (default), L2, IP
+# MILVUS_METRIC_TYPE=COSINE
+
+### HNSW / HNSW_SQ / HNSW_PQ Parameters (aligned with Milvus 2.4+ defaults)
+### M: Maximum number of connections per node [2-2048], default 16
+# MILVUS_HNSW_M=16
+### efConstruction: Size of dynamic candidate list during build [8-512], default 360
+# MILVUS_HNSW_EF_CONSTRUCTION=360
+### ef: Size of dynamic candidate list during search, default 200
+# MILVUS_HNSW_EF=200
+
+### HNSW_SQ Specific Parameters (requires Milvus 2.6.8+)
+### sq_type: Scalar quantization type - SQ4U, SQ6, SQ8 (default), BF16, FP16
+# MILVUS_HNSW_SQ_TYPE=SQ8
+### refine: Enable refinement step for higher precision, default false
+# MILVUS_HNSW_SQ_REFINE=false
+### refine_type: Refinement precision (must be higher than sq_type) - SQ6, SQ8, BF16, FP16, FP32
+# MILVUS_HNSW_SQ_REFINE_TYPE=FP32
+### refine_k: Refinement expansion factor, default 10
+# MILVUS_HNSW_SQ_REFINE_K=10
+
+### IVF_FLAT / IVF_SQ8 Parameters
+### nlist: Number of cluster units [1-65536], recommended sqrt(n) for n>1M, default 1024
+# MILVUS_IVF_NLIST=1024
+### nprobe: Number of units to query [1-nlist], default 16
+# MILVUS_IVF_NPROBE=16
+
+### Qdrant
+QDRANT_URL=http://localhost:6333
+# QDRANT_DEVICE=cpu
+# QDRANT_API_KEY=your-api-key
+### Qdrant upsert batching (enabled by default)
+### Split large upserts by estimated JSON payload size and point count
+### Default 16MB keeps safe headroom below common 32MB gateway/request limits
+# QDRANT_UPSERT_MAX_PAYLOAD_BYTES=16777216
+# QDRANT_UPSERT_MAX_POINTS_PER_BATCH=128
+### DB specific workspace should not be set, keep for compatible only
+# QDRANT_WORKSPACE=forced_workspace_name
+
+### Redis
+REDIS_URI=redis://localhost:6379
+REDIS_SOCKET_TIMEOUT=30
+REDIS_CONNECT_TIMEOUT=10
+REDIS_MAX_CONNECTIONS=100
+REDIS_RETRY_ATTEMPTS=3
+### DB specific workspace should not be set, keep for compatible only
+# REDIS_WORKSPACE=forced_workspace_name
+
+### Memgraph Configuration
+MEMGRAPH_URI=bolt://localhost:7687
+MEMGRAPH_USERNAME=
+MEMGRAPH_PASSWORD=
+MEMGRAPH_DATABASE=memgraph
+### DB specific workspace should not be set, keep for compatible only
+# MEMGRAPH_WORKSPACE=forced_workspace_name
+
+###########################################################
+### Langfuse Observability Configuration
+### Only works with LLM provided by OpenAI compatible API
+### Install with: pip install lightrag-hku[observability]
+### Sign up at: https://cloud.langfuse.com or self-host
+###########################################################
+# LANGFUSE_SECRET_KEY=''
+# LANGFUSE_PUBLIC_KEY=''
+# LANGFUSE_HOST='https://cloud.langfuse.com'
+# LANGFUSE_ENABLE_TRACE=true
+
+############################
+### Evaluation Configuration
+############################
+### RAGAS evaluation models (used for RAG quality assessment)
+### ⚠️ IMPORTANT: Both LLM and Embedding endpoints MUST be OpenAI-compatible
+### Default uses OpenAI models for evaluation
+
+### LLM Configuration for Evaluation
+# EVAL_LLM_MODEL=gpt-4o-mini
+### API key for LLM evaluation (fallback to OPENAI_API_KEY if not set)
+# EVAL_LLM_BINDING_API_KEY=your_api_key
+### Custom OpenAI-compatible endpoint for LLM evaluation (optional)
+# EVAL_LLM_BINDING_HOST=https://api.openai.com/v1
+
+### Embedding Configuration for Evaluation
+# EVAL_EMBEDDING_MODEL=text-embedding-3-large
+### API key for embeddings (fallback: EVAL_LLM_BINDING_API_KEY -> OPENAI_API_KEY)
+# EVAL_EMBEDDING_BINDING_API_KEY=your_embedding_api_key
+### Custom OpenAI-compatible endpoint for embeddings (fallback: EVAL_LLM_BINDING_HOST)
+# EVAL_EMBEDDING_BINDING_HOST=https://api.openai.com/v1
+
+### Performance Tuning
+### Number of concurrent test case evaluations
+### Lower values reduce API rate limit issues but increase evaluation time
+# EVAL_MAX_CONCURRENT=2
+### TOP_K query parameter of LightRAG (default: 10)
+### Number of entities or relations retrieved from KG
+# EVAL_QUERY_TOP_K=10
+### LLM request retry and timeout settings for evaluation
+# EVAL_LLM_MAX_RETRIES=5
+# EVAL_LLM_TIMEOUT=180
+
+##########################################################################
+### ----- Preserved custom environment variables from previous .env  -----
+### ----- Comments in this session will persist across regenerations -----
+### (This must be the final session; ensure the preceding lines unchanged)
+##########################################################################
+### The "make env*" wizard will leave the following lines unchanged
+### You may add additional env vars or commnets here for your own purpose
+##########################################################################
+
+### AWS Bedrock
+# LLM_BINDING=bedrock
+# LLM_BINDING_HOST=DEFAULT_BEDROCK_ENDPOINT
+# LLM_MODEL=us.anthropic.claude-haiku-4-5-20251001-v1:0
+
+### ----- Extra setting from previous .env -----

+ 57 - 0
examples/generate_query.py

@@ -0,0 +1,57 @@
+from openai import OpenAI
+
+# os.environ["OPENAI_API_KEY"] = ""
+
+
+def openai_complete_if_cache(
+    model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+    openai_client = OpenAI()
+
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.extend(history_messages)
+    messages.append({"role": "user", "content": prompt})
+
+    response = openai_client.chat.completions.create(
+        model=model, messages=messages, **kwargs
+    )
+    if not response.choices or response.choices[0].message is None:
+        return ""
+    return response.choices[0].message.content
+
+
+if __name__ == "__main__":
+    description = ""
+    prompt = f"""
+    Given the following description of a dataset:
+
+    {description}
+
+    Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
+
+    Output the results in the following structure:
+    - User 1: [user description]
+        - Task 1: [task description]
+            - Question 1:
+            - Question 2:
+            - Question 3:
+            - Question 4:
+            - Question 5:
+        - Task 2: [task description]
+            ...
+        - Task 5: [task description]
+    - User 2: [user description]
+        ...
+    - User 5: [user description]
+        ...
+    """
+
+    result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt)
+
+    file_path = "./queries.txt"
+    with open(file_path, "w") as file:
+        file.write(result)
+
+    print(f"Queries written to {file_path}")

+ 34 - 0
examples/graph_visual_with_html.py

@@ -0,0 +1,34 @@
+import pipmaster as pm
+
+if not pm.is_installed("pyvis"):
+    pm.install("pyvis")
+if not pm.is_installed("networkx"):
+    pm.install("networkx")
+
+import networkx as nx
+from pyvis.network import Network
+import random
+
+# Load the GraphML file
+G = nx.read_graphml("./dickens/graph_chunk_entity_relation.graphml")
+
+# Create a Pyvis network
+net = Network(height="100vh", notebook=True)
+
+# Convert NetworkX graph to Pyvis network
+net.from_nx(G)
+
+
+# Add colors and title to nodes
+for node in net.nodes:
+    node["color"] = "#{:06x}".format(random.randint(0, 0xFFFFFF))
+    if "description" in node:
+        node["title"] = node["description"]
+
+# Add title to edges
+for edge in net.edges:
+    if "description" in edge:
+        edge["title"] = edge["description"]
+
+# Save and display the network
+net.show("knowledge_graph.html")

+ 186 - 0
examples/graph_visual_with_neo4j.py

@@ -0,0 +1,186 @@
+import os
+import json
+import xml.etree.ElementTree as ET
+from neo4j import GraphDatabase
+
+# Constants
+WORKING_DIR = "./dickens"
+BATCH_SIZE_NODES = 500
+BATCH_SIZE_EDGES = 100
+
+# Neo4j connection credentials
+NEO4J_URI = "bolt://localhost:7687"
+NEO4J_USERNAME = "neo4j"
+NEO4J_PASSWORD = "your_password"
+
+
+def xml_to_json(xml_file):
+    try:
+        tree = ET.parse(xml_file)
+        root = tree.getroot()
+
+        # Print the root element's tag and attributes to confirm the file has been correctly loaded
+        print(f"Root element: {root.tag}")
+        print(f"Root attributes: {root.attrib}")
+
+        data = {"nodes": [], "edges": []}
+
+        # Use namespace
+        namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
+
+        for node in root.findall(".//node", namespace):
+            node_data = {
+                "id": node.get("id").strip('"'),
+                "entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
+                if node.find("./data[@key='d1']", namespace) is not None
+                else "",
+                "description": node.find("./data[@key='d2']", namespace).text
+                if node.find("./data[@key='d2']", namespace) is not None
+                else "",
+                "source_id": node.find("./data[@key='d3']", namespace).text
+                if node.find("./data[@key='d3']", namespace) is not None
+                else "",
+            }
+            data["nodes"].append(node_data)
+
+        for edge in root.findall(".//edge", namespace):
+            edge_data = {
+                "source": edge.get("source").strip('"'),
+                "target": edge.get("target").strip('"'),
+                "weight": float(edge.find("./data[@key='d5']", namespace).text)
+                if edge.find("./data[@key='d5']", namespace) is not None
+                else 0.0,
+                "description": edge.find("./data[@key='d6']", namespace).text
+                if edge.find("./data[@key='d6']", namespace) is not None
+                else "",
+                "keywords": edge.find("./data[@key='d9']", namespace).text
+                if edge.find("./data[@key='d9']", namespace) is not None
+                else "",
+                "source_id": edge.find("./data[@key='d8']", namespace).text
+                if edge.find("./data[@key='d8']", namespace) is not None
+                else "",
+            }
+            data["edges"].append(edge_data)
+
+        # Print the number of nodes and edges found
+        print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
+
+        return data
+    except ET.ParseError as e:
+        print(f"Error parsing XML file: {e}")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+
+def convert_xml_to_json(xml_path, output_path):
+    """Converts XML file to JSON and saves the output."""
+    if not os.path.exists(xml_path):
+        print(f"Error: File not found - {xml_path}")
+        return None
+
+    json_data = xml_to_json(xml_path)
+    if json_data:
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(json_data, f, ensure_ascii=False, indent=2)
+        print(f"JSON file created: {output_path}")
+        return json_data
+    else:
+        print("Failed to create JSON data")
+        return None
+
+
+def process_in_batches(tx, query, data, batch_size):
+    """Process data in batches and execute the given query."""
+    for i in range(0, len(data), batch_size):
+        batch = data[i : i + batch_size]
+        tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
+
+
+def main():
+    # Paths
+    xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml")
+    json_file = os.path.join(WORKING_DIR, "graph_data.json")
+
+    # Convert XML to JSON
+    json_data = convert_xml_to_json(xml_file, json_file)
+    if json_data is None:
+        return
+
+    # Load nodes and edges
+    nodes = json_data.get("nodes", [])
+    edges = json_data.get("edges", [])
+
+    # Neo4j queries
+    create_nodes_query = """
+    UNWIND $nodes AS node
+    MERGE (e:Entity {id: node.id})
+    SET e.entity_type = node.entity_type,
+        e.description = node.description,
+        e.source_id = node.source_id,
+        e.displayName = node.id
+    REMOVE e:Entity
+    WITH e, node
+    CALL apoc.create.addLabels(e, [node.id]) YIELD node AS labeledNode
+    RETURN count(*)
+    """
+
+    create_edges_query = """
+    UNWIND $edges AS edge
+    MATCH (source {id: edge.source})
+    MATCH (target {id: edge.target})
+    WITH source, target, edge,
+         CASE
+            WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
+            WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
+            WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
+            WHEN edge.keywords CONTAINS 'located' THEN 'located'
+            WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
+           ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '\"', '')
+         END AS relType
+    CALL apoc.create.relationship(source, relType, {
+      weight: edge.weight,
+      description: edge.description,
+      keywords: edge.keywords,
+      source_id: edge.source_id
+    }, target) YIELD rel
+    RETURN count(*)
+    """
+
+    set_displayname_and_labels_query = """
+    MATCH (n)
+    SET n.displayName = n.id
+    WITH n
+    CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
+    RETURN count(*)
+    """
+
+    # Create a Neo4j driver
+    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
+
+    try:
+        # Execute queries in batches
+        with driver.session() as session:
+            # Insert nodes in batches
+            session.execute_write(
+                process_in_batches, create_nodes_query, nodes, BATCH_SIZE_NODES
+            )
+
+            # Insert edges in batches
+            session.execute_write(
+                process_in_batches, create_edges_query, edges, BATCH_SIZE_EDGES
+            )
+
+            # Set displayName and labels
+            session.run(set_displayname_and_labels_query)
+
+    except Exception as e:
+        print(f"Error occurred: {e}")
+
+    finally:
+        driver.close()
+
+
+if __name__ == "__main__":
+    main()

+ 166 - 0
examples/graph_visual_with_opensearch.py

@@ -0,0 +1,166 @@
+"""
+Knowledge Graph Visualization with OpenSearch + LightRAG WebUI
+
+This script demonstrates two ways to visualize the knowledge graph
+stored in OpenSearch:
+
+1. **WebUI (recommended)**: Opens the LightRAG WebUI in your browser
+   for interactive graph exploration with search, filtering, and
+   force-directed layout.
+
+2. **Standalone HTML**: Fetches graph data from the LightRAG Server API
+   and generates an interactive HTML file using Pyvis, similar to
+   graph_visual_with_html.py but reading from OpenSearch instead of
+   a local .graphml file.
+
+Prerequisites:
+    1. LightRAG Server running with OpenSearch storage:
+       lightrag-server --host 0.0.0.0 --port 9621
+
+    2. Documents already indexed (e.g., via the WebUI or API)
+
+Usage:
+    # Open WebUI for interactive exploration
+    python examples/graph_visual_with_opensearch.py
+
+    # Generate standalone HTML file
+    python examples/graph_visual_with_opensearch.py --html
+
+    # Custom server URL and output file
+    python examples/graph_visual_with_opensearch.py --html --server http://localhost:9621 --output my_graph.html
+"""
+
+import argparse
+import os
+import sys
+import webbrowser
+
+import pipmaster as pm
+
+if not pm.is_installed("requests"):
+    pm.install("requests")
+if not pm.is_installed("pyvis"):
+    pm.install("pyvis")
+
+import requests
+from pyvis.network import Network
+
+
+def fetch_graph(server_url: str, label: str = "*", max_nodes: int = 300) -> dict:
+    """Fetch knowledge graph data from LightRAG Server API."""
+    url = f"{server_url}/graphs"
+    params = {"label": label, "max_nodes": max_nodes}
+    resp = requests.get(url, params=params, timeout=30)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def generate_html(graph_data: dict, output_file: str) -> str:
+    """Generate an interactive HTML visualization from graph data."""
+    nodes = graph_data.get("nodes", [])
+    edges = graph_data.get("edges", [])
+
+    if not nodes:
+        print("No nodes found in the graph. Index some documents first.")
+        sys.exit(1)
+
+    print(f"Building visualization: {len(nodes)} nodes, {len(edges)} edges")
+
+    net = Network(height="100vh", notebook=False, cdn_resources="in_line")
+
+    # Add nodes with colors based on entity type
+    import hashlib
+
+    for node in nodes:
+        node_id = node.get("id", "")
+        props = node.get("properties", {})
+        entity_type = props.get("entity_type", "unknown")
+        description = props.get("description", "")
+
+        # Deterministic color from entity type
+        color_hash = int(hashlib.md5(entity_type.encode()).hexdigest()[:6], 16)
+        color = f"#{color_hash:06x}"
+
+        net.add_node(
+            node_id,
+            label=node_id,
+            title=f"[{entity_type}] {description[:200]}"
+            if description
+            else entity_type,
+            color=color,
+        )
+
+    # Add edges
+    for edge in edges:
+        source = edge.get("source", "")
+        target = edge.get("target", "")
+        props = edge.get("properties", {})
+        rel_type = edge.get("type", "")
+        description = props.get("description", "")
+
+        net.add_edge(
+            source,
+            target,
+            title=f"[{rel_type}] {description[:200]}" if description else rel_type,
+            label=rel_type,
+        )
+
+    net.save_graph(output_file)
+    print(f"Graph saved to {output_file}")
+    return output_file
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Visualize LightRAG knowledge graph from OpenSearch"
+    )
+    parser.add_argument(
+        "--html",
+        action="store_true",
+        help="Generate standalone HTML file instead of opening WebUI",
+    )
+    parser.add_argument(
+        "--server",
+        default="http://localhost:9621",
+        help="LightRAG Server URL (default: http://localhost:9621)",
+    )
+    parser.add_argument(
+        "--output",
+        default="knowledge_graph_opensearch.html",
+        help="Output HTML file (default: knowledge_graph_opensearch.html)",
+    )
+    parser.add_argument(
+        "--label",
+        default="*",
+        help="Starting node label, or '*' for all nodes (default: *)",
+    )
+    parser.add_argument(
+        "--max-nodes",
+        type=int,
+        default=300,
+        help="Maximum nodes to fetch (default: 300)",
+    )
+    args = parser.parse_args()
+
+    # Verify server is running
+    try:
+        requests.get(f"{args.server}/health", timeout=5)
+    except requests.ConnectionError:
+        print(f"Error: Cannot connect to LightRAG Server at {args.server}")
+        print("Start the server first: lightrag-server --host 0.0.0.0 --port 9621")
+        sys.exit(1)
+
+    if args.html:
+        # Generate standalone HTML
+        graph_data = fetch_graph(args.server, args.label, args.max_nodes)
+        output = generate_html(graph_data, args.output)
+        webbrowser.open(f"file://{os.path.abspath(output)}")
+    else:
+        # Open WebUI graph explorer
+        url = f"{args.server}/#/graph"
+        print(f"Opening LightRAG WebUI graph explorer: {url}")
+        webbrowser.open(url)
+
+
+if __name__ == "__main__":
+    main()

+ 115 - 0
examples/insert_custom_kg.py

@@ -0,0 +1,115 @@
+import os
+from lightrag import LightRAG
+from lightrag.llm.openai import gpt_4o_mini_complete
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+
+WORKING_DIR = "./custom_kg"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+rag = LightRAG(
+    working_dir=WORKING_DIR,
+    llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
+    # llm_model_func=gpt_4o_complete  # Optionally, use a stronger model
+)
+
+custom_kg = {
+    "entities": [
+        {
+            "entity_name": "CompanyA",
+            "entity_type": "Organization",
+            "description": "A major technology company",
+            "source_id": "Source1",
+        },
+        {
+            "entity_name": "ProductX",
+            "entity_type": "Product",
+            "description": "A popular product developed by CompanyA",
+            "source_id": "Source1",
+        },
+        {
+            "entity_name": "PersonA",
+            "entity_type": "Person",
+            "description": "A renowned researcher in AI",
+            "source_id": "Source2",
+        },
+        {
+            "entity_name": "UniversityB",
+            "entity_type": "Organization",
+            "description": "A leading university specializing in technology and sciences",
+            "source_id": "Source2",
+        },
+        {
+            "entity_name": "CityC",
+            "entity_type": "Location",
+            "description": "A large metropolitan city known for its culture and economy",
+            "source_id": "Source3",
+        },
+        {
+            "entity_name": "EventY",
+            "entity_type": "Event",
+            "description": "An annual technology conference held in CityC",
+            "source_id": "Source3",
+        },
+    ],
+    "relationships": [
+        {
+            "src_id": "CompanyA",
+            "tgt_id": "ProductX",
+            "description": "CompanyA develops ProductX",
+            "keywords": "develop, produce",
+            "weight": 1.0,
+            "source_id": "Source1",
+        },
+        {
+            "src_id": "PersonA",
+            "tgt_id": "UniversityB",
+            "description": "PersonA works at UniversityB",
+            "keywords": "employment, affiliation",
+            "weight": 0.9,
+            "source_id": "Source2",
+        },
+        {
+            "src_id": "CityC",
+            "tgt_id": "EventY",
+            "description": "EventY is hosted in CityC",
+            "keywords": "host, location",
+            "weight": 0.8,
+            "source_id": "Source3",
+        },
+    ],
+    "chunks": [
+        {
+            "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
+            "source_id": "Source1",
+            "source_chunk_index": 0,
+        },
+        {
+            "content": "One outstanding feature of ProductX is its advanced AI capabilities.",
+            "source_id": "Source1",
+            "chunk_order_index": 1,
+        },
+        {
+            "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
+            "source_id": "Source2",
+            "source_chunk_index": 0,
+        },
+        {
+            "content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
+            "source_id": "Source3",
+            "source_chunk_index": 0,
+        },
+        {
+            "content": "None",
+            "source_id": "UNKNOWN",
+            "source_chunk_index": 0,
+        },
+    ],
+}
+
+rag.insert_custom_kg(custom_kg)

+ 296 - 0
examples/lightrag_ag2_multiagent_demo.py

@@ -0,0 +1,296 @@
+"""LightRAG + AG2 Multi-Agent Demo.
+
+Demonstrates how AG2 agents can use LightRAG's knowledge graph retrieval
+as a tool. Multiple specialized agents collaborate to answer complex
+questions over indexed documents.
+
+Architecture:
+    User -> AG2 GroupChat (Researcher + Analyst + Writer) -> LightRAG queries
+    - Researcher: uses LightRAG hybrid search to gather facts
+    - Analyst: uses LightRAG naive (vector) search for complementary results
+    - Writer: synthesizes findings into a final answer
+
+Requires:
+    pip install lightrag-hku "ag2[openai]>=0.11.4,<1.0"
+    export OPENAI_API_KEY="..."
+
+Usage:
+    python examples/lightrag_ag2_multiagent_demo.py
+"""
+
+import asyncio
+import json
+import os
+import shutil
+import threading
+
+from autogen import (
+    AssistantAgent,
+    GroupChat,
+    GroupChatManager,
+    LLMConfig,
+    UserProxyAgent,
+)
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+
+# --- Configuration ---
+
+WORKING_DIR = "./ag2_demo_workdir"
+
+SAMPLE_TEXT = """
+Artificial intelligence has transformed multiple industries. Machine learning,
+a subset of AI, enables systems to learn from data without explicit programming.
+Deep learning, using neural networks with many layers, has achieved breakthroughs
+in computer vision, natural language processing, and speech recognition.
+
+Transformer architectures, introduced in the 2017 paper "Attention Is All You Need"
+by Vaswani et al., revolutionized NLP. Models like GPT and BERT are built on
+transformers. GPT (Generative Pre-trained Transformer) uses decoder-only architecture
+for text generation, while BERT (Bidirectional Encoder Representations) uses
+encoder-only architecture for understanding tasks.
+
+Retrieval-Augmented Generation (RAG) combines the strengths of retrieval systems
+and generative models. Instead of relying solely on parametric knowledge, RAG
+systems retrieve relevant documents from a knowledge base and use them as context
+for generation. This approach reduces hallucination and enables models to access
+up-to-date information.
+
+Knowledge graphs represent information as entities and relationships. When combined
+with RAG, knowledge graphs enable structured reasoning over document collections.
+LightRAG implements this approach with dual-level retrieval: local search focuses
+on specific entities, while global search captures broader themes and relationships.
+"""
+
+
+# --- LightRAG Setup ---
+
+
+async def setup_lightrag() -> LightRAG:
+    """Initialize LightRAG and index sample documents."""
+    if os.path.exists(WORKING_DIR):
+        shutil.rmtree(WORKING_DIR)
+    os.makedirs(WORKING_DIR, exist_ok=True)
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete,
+    )
+    await rag.initialize_storages()
+    await rag.ainsert(SAMPLE_TEXT)
+    print("LightRAG initialized and documents indexed.\n")
+    return rag
+
+
+# --- Async Bridge ---
+# AG2 runs tools in a background thread without an event loop.
+# We maintain a dedicated event loop in a separate thread for LightRAG async calls.
+
+_bg_loop: asyncio.AbstractEventLoop = None
+
+
+def _start_background_loop(loop: asyncio.AbstractEventLoop):
+    asyncio.set_event_loop(loop)
+    loop.run_forever()
+
+
+def _run_async(coro):
+    """Submit a coroutine to the background event loop and wait for the result."""
+    future = asyncio.run_coroutine_threadsafe(coro, _bg_loop)
+    return future.result(timeout=120)
+
+
+# --- AG2 Agent Tools ---
+
+# Global reference to LightRAG instance (set in main)
+_rag_instance: LightRAG = None
+
+
+def create_agents():
+    """Create AG2 agents with LightRAG tools."""
+    llm_config = LLMConfig(
+        {
+            "model": os.environ.get("OPENAI_MODEL", "gpt-4o-mini"),
+            "api_key": os.environ["OPENAI_API_KEY"],
+            "api_type": "openai",
+        }
+    )
+
+    researcher = AssistantAgent(
+        name="Researcher",
+        system_message=(
+            "You are a research specialist. Use the lightrag_query tool to search "
+            "the knowledge base. Start with 'hybrid' mode for comprehensive results. "
+            "If you need specific entity details, use 'local' mode. "
+            "Present your findings as structured bullet points. "
+            "Always call the tool -- do NOT answer from your own knowledge."
+        ),
+        llm_config=llm_config,
+    )
+
+    analyst = AssistantAgent(
+        name="Analyst",
+        system_message=(
+            "You are a knowledge graph analyst. Your FIRST action MUST be calling "
+            "the lightrag_query tool with mode='naive' to run a direct vector search. "
+            "This gives different results from the Researcher's hybrid search. "
+            "After receiving the naive search results, compare them with the "
+            "Researcher's findings and highlight any additional insights. "
+            "You MUST call the tool before writing any analysis."
+        ),
+        llm_config=llm_config,
+    )
+
+    writer = AssistantAgent(
+        name="Writer",
+        system_message=(
+            "You are a technical writer. Synthesize the findings from the "
+            "Researcher and Analyst into a clear, well-structured answer. "
+            "Do NOT use the search tool -- work only with what the other agents "
+            "have found. End your response with TERMINATE."
+        ),
+        llm_config=llm_config,
+    )
+
+    def is_termination(msg):
+        return "TERMINATE" in (msg.get("content") or "")
+
+    user_proxy = UserProxyAgent(
+        name="User",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=10,
+        code_execution_config=False,
+        is_termination_msg=is_termination,
+    )
+
+    # --- Register LightRAG as a tool ---
+
+    @user_proxy.register_for_execution()
+    @researcher.register_for_llm(
+        description=(
+            "Query the LightRAG knowledge base. "
+            "mode: 'naive' (simple vector), 'local' (entity-focused), "
+            "'global' (theme/relationship-focused), 'hybrid' (combined). "
+            "Returns retrieved context from indexed documents."
+        )
+    )
+    @analyst.register_for_llm(
+        description=(
+            "Query the LightRAG knowledge base. "
+            "mode: 'naive' (simple vector), 'local' (entity-focused), "
+            "'global' (theme/relationship-focused), 'hybrid' (combined). "
+            "Returns retrieved context from indexed documents."
+        )
+    )
+    def lightrag_query(query: str, mode: str = "hybrid") -> str:
+        """Query LightRAG synchronously (wraps async call)."""
+        valid_modes = {"naive", "local", "global", "hybrid"}
+        if mode not in valid_modes:
+            return json.dumps(
+                {"error": f"Invalid mode '{mode}'. Use one of: {valid_modes}"}
+            )
+        try:
+            result = _run_async(
+                _rag_instance.aquery(query, param=QueryParam(mode=mode))
+            )
+            return json.dumps({"mode": mode, "query": query, "result": result})
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+
+    return user_proxy, researcher, analyst, writer
+
+
+def run_multiagent_query(user_proxy, researcher, analyst, writer, question: str):
+    """Run a multi-agent GroupChat to answer a question using LightRAG."""
+    # Enforce pipeline: Researcher -> Analyst -> Writer.
+    # func_call_filter (default True) automatically routes tool calls
+    # to/from user_proxy, so transitions only govern non-tool handoffs.
+    # User can only start with Researcher; Researcher advances to Analyst;
+    # Analyst advances to Writer. Writer terminates the conversation.
+    allowed_transitions = {
+        user_proxy: [researcher],
+        researcher: [user_proxy, analyst],
+        analyst: [user_proxy, writer],
+        writer: [],
+    }
+
+    group_chat = GroupChat(
+        agents=[user_proxy, researcher, analyst, writer],
+        messages=[],
+        max_round=12,
+        allowed_or_disallowed_speaker_transitions=allowed_transitions,
+        speaker_transitions_type="allowed",
+    )
+
+    manager = GroupChatManager(
+        groupchat=group_chat,
+        llm_config=LLMConfig(
+            {
+                "model": os.environ.get("OPENAI_MODEL", "gpt-4o-mini"),
+                "api_key": os.environ["OPENAI_API_KEY"],
+                "api_type": "openai",
+            }
+        ),
+        is_termination_msg=lambda msg: "TERMINATE" in (msg.get("content") or ""),
+    )
+
+    print(f"Question: {question}\n{'=' * 60}\n")
+    user_proxy.run(manager, message=question).process()
+    print(f"\n{'=' * 60}")
+
+
+# --- Main ---
+
+
+def main():
+    global _rag_instance, _bg_loop
+
+    if not os.getenv("OPENAI_API_KEY"):
+        print(
+            "Error: OPENAI_API_KEY environment variable is not set.\n"
+            "Set it by running: export OPENAI_API_KEY='your-openai-api-key'"
+        )
+        return
+
+    # Start a background event loop for LightRAG async calls.
+    # AG2 tools run in threads without an event loop, so we need a
+    # persistent loop that can accept coroutines from any thread.
+    _bg_loop = asyncio.new_event_loop()
+    bg_thread = threading.Thread(
+        target=_start_background_loop, args=(_bg_loop,), daemon=True
+    )
+    bg_thread.start()
+
+    try:
+        # Step 1: Set up LightRAG (async, runs on the background loop)
+        _rag_instance = _run_async(setup_lightrag())
+
+        # Step 2: Create AG2 agents with LightRAG tools
+        user_proxy, researcher, analyst, writer = create_agents()
+
+        # Step 3: Ask a complex question
+        run_multiagent_query(
+            user_proxy,
+            researcher,
+            analyst,
+            writer,
+            question=(
+                "How do transformer architectures relate to RAG systems? "
+                "What role do knowledge graphs play in improving retrieval quality?"
+            ),
+        )
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if _rag_instance:
+            _run_async(_rag_instance.finalize_storages())
+        _bg_loop.call_soon_threadsafe(_bg_loop.stop)
+        bg_thread.join(timeout=5)
+        shutil.rmtree(WORKING_DIR, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    main()
+    print("\nDone!")

+ 125 - 0
examples/lightrag_azure_openai_demo.py

@@ -0,0 +1,125 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+from dotenv import load_dotenv
+import logging
+from openai import AzureOpenAI
+
+logging.basicConfig(level=logging.INFO)
+
+load_dotenv()
+
+AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
+AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
+AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
+AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
+
+AZURE_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
+AZURE_EMBEDDING_API_VERSION = os.getenv("AZURE_EMBEDDING_API_VERSION")
+
+WORKING_DIR = "./dickens"
+
+if os.path.exists(WORKING_DIR):
+    import shutil
+
+    shutil.rmtree(WORKING_DIR)
+
+os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    client = AzureOpenAI(
+        api_key=AZURE_OPENAI_API_KEY,
+        api_version=AZURE_OPENAI_API_VERSION,
+        azure_endpoint=AZURE_OPENAI_ENDPOINT,
+    )
+
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    if history_messages:
+        messages.extend(history_messages)
+    messages.append({"role": "user", "content": prompt})
+
+    chat_completion = client.chat.completions.create(
+        model=AZURE_OPENAI_DEPLOYMENT,  # model = "deployment_name".
+        messages=messages,
+        temperature=kwargs.get("temperature", 0),
+        top_p=kwargs.get("top_p", 1),
+        n=kwargs.get("n", 1),
+    )
+    if not chat_completion.choices or chat_completion.choices[0].message is None:
+        return ""
+    return chat_completion.choices[0].message.content
+
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    client = AzureOpenAI(
+        api_key=AZURE_OPENAI_API_KEY,
+        api_version=AZURE_EMBEDDING_API_VERSION,
+        azure_endpoint=AZURE_OPENAI_ENDPOINT,
+    )
+    embedding = client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
+
+    embeddings = [item.embedding for item in embedding.data]
+    return np.array(embeddings)
+
+
+async def test_funcs():
+    result = await llm_model_func("How are you?")
+    print("Resposta do llm_model_func: ", result)
+
+    result = await embedding_func(["How are you?"])
+    print("Resultado do embedding_func: ", result.shape)
+    print("Dimensão da embedding: ", result.shape[1])
+
+
+asyncio.run(test_funcs())
+
+embedding_dimension = 3072
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=8192,
+            func=embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    rag = asyncio.run(initialize_rag())
+
+    book1 = open("./book_1.txt", encoding="utf-8")
+    book2 = open("./book_2.txt", encoding="utf-8")
+
+    rag.insert([book1.read(), book2.read()])
+
+    query_text = "What are the main themes?"
+
+    print("Result (Naive):")
+    print(rag.query(query_text, param=QueryParam(mode="naive")))
+
+    print("\nResult (Local):")
+    print(rag.query(query_text, param=QueryParam(mode="local")))
+
+    print("\nResult (Global):")
+    print(rag.query(query_text, param=QueryParam(mode="global")))
+
+    print("\nResult (Hybrid):")
+    print(rag.query(query_text, param=QueryParam(mode="hybrid")))
+
+
+if __name__ == "__main__":
+    main()

+ 122 - 0
examples/lightrag_gemini_demo.py

@@ -0,0 +1,122 @@
+"""
+LightRAG Demo with Google Gemini Models
+
+This example demonstrates how to use LightRAG with Google's Gemini 2.0 Flash model
+for text generation and the text-embedding-004 model for embeddings.
+
+Prerequisites:
+    1. Set GEMINI_API_KEY environment variable:
+       export GEMINI_API_KEY='your-actual-api-key'
+
+    2. Prepare a text file named 'book.txt' in the current directory
+       (or modify BOOK_FILE constant to point to your text file)
+
+Usage:
+    python examples/lightrag_gemini_demo.py
+"""
+
+import os
+import asyncio
+import nest_asyncio
+import numpy as np
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.gemini import gemini_model_complete, gemini_embed
+from lightrag.utils import wrap_embedding_func_with_attrs
+
+nest_asyncio.apply()
+
+WORKING_DIR = "./rag_storage"
+BOOK_FILE = "./book.txt"
+
+# Validate API key
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    raise ValueError(
+        "GEMINI_API_KEY environment variable is not set. "
+        "Please set it with: export GEMINI_API_KEY='your-api-key'"
+    )
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+# --------------------------------------------------
+# LLM function
+# --------------------------------------------------
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    return await gemini_model_complete(
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=GEMINI_API_KEY,
+        model_name="gemini-2.0-flash",
+        **kwargs,
+    )
+
+
+# --------------------------------------------------
+# Embedding function
+# --------------------------------------------------
+@wrap_embedding_func_with_attrs(
+    embedding_dim=768,
+    send_dimensions=True,
+    max_token_size=2048,
+    model_name="models/text-embedding-004",
+)
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await gemini_embed.func(
+        texts, api_key=GEMINI_API_KEY, model="models/text-embedding-004"
+    )
+
+
+# --------------------------------------------------
+# Initialize RAG
+# --------------------------------------------------
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func,
+        llm_model_name="gemini-2.0-flash",
+    )
+
+    # 🔑 REQUIRED
+    await rag.initialize_storages()
+    return rag
+
+
+# --------------------------------------------------
+# Main
+# --------------------------------------------------
+def main():
+    # Validate book file exists
+    if not os.path.exists(BOOK_FILE):
+        raise FileNotFoundError(
+            f"'{BOOK_FILE}' not found. "
+            "Please provide a text file to index in the current directory."
+        )
+
+    rag = asyncio.run(initialize_rag())
+
+    # Insert text
+    with open(BOOK_FILE, "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    query = "What are the top themes?"
+
+    print("\nNaive Search:")
+    print(rag.query(query, param=QueryParam(mode="naive")))
+
+    print("\nLocal Search:")
+    print(rag.query(query, param=QueryParam(mode="local")))
+
+    print("\nGlobal Search:")
+    print(rag.query(query, param=QueryParam(mode="global")))
+
+    print("\nHybrid Search:")
+    print(rag.query(query, param=QueryParam(mode="hybrid")))
+
+
+if __name__ == "__main__":
+    main()

+ 178 - 0
examples/lightrag_gemini_postgres_demo.py

@@ -0,0 +1,178 @@
+"""
+LightRAG Demo with PostgreSQL + Google Gemini
+
+This example demonstrates how to use LightRAG with:
+- Google Gemini (LLM + Embeddings)
+- PostgreSQL-backed storages for:
+  - Vector storage
+  - Graph storage
+  - KV storage
+  - Document status storage
+
+Prerequisites:
+1. PostgreSQL database running and accessible
+2. Required tables will be auto-created by LightRAG
+3. Set environment variables (example .env):
+
+   POSTGRES_HOST=localhost
+   POSTGRES_PORT=5432
+   POSTGRES_USER=admin
+   POSTGRES_PASSWORD=admin
+   POSTGRES_DATABASE=ai
+
+   LIGHTRAG_KV_STORAGE=PGKVStorage
+   LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
+   LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
+   LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
+
+   GEMINI_API_KEY=your-api-key
+
+4. Prepare a text file to index (default: Data/book-small.txt)
+
+Usage:
+    python examples/lightrag_postgres_demo.py
+"""
+
+import os
+import asyncio
+import numpy as np
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.gemini import gemini_model_complete, gemini_embed
+from lightrag.utils import setup_logger, wrap_embedding_func_with_attrs
+
+
+# --------------------------------------------------
+# Logger
+# --------------------------------------------------
+setup_logger("lightrag", level="INFO")
+
+
+# --------------------------------------------------
+# Config
+# --------------------------------------------------
+WORKING_DIR = "./rag_storage"
+BOOK_FILE = "Data/book.txt"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    raise ValueError("GEMINI_API_KEY environment variable is not set")
+
+
+# --------------------------------------------------
+# LLM function (Gemini)
+# --------------------------------------------------
+async def llm_model_func(
+    prompt,
+    system_prompt=None,
+    history_messages=[],
+    keyword_extraction=False,
+    **kwargs,
+) -> str:
+    return await gemini_model_complete(
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=GEMINI_API_KEY,
+        model_name="gemini-2.0-flash",
+        **kwargs,
+    )
+
+
+# --------------------------------------------------
+# Embedding function (Gemini)
+# --------------------------------------------------
+@wrap_embedding_func_with_attrs(
+    embedding_dim=768,
+    max_token_size=2048,
+    model_name="models/text-embedding-004",
+)
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await gemini_embed.func(
+        texts,
+        api_key=GEMINI_API_KEY,
+        model="models/text-embedding-004",
+    )
+
+
+# --------------------------------------------------
+# Initialize RAG with PostgreSQL storages
+# --------------------------------------------------
+async def initialize_rag() -> LightRAG:
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_name="gemini-2.0-flash",
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func,
+        # Performance tuning
+        embedding_func_max_async=4,
+        embedding_batch_num=8,
+        llm_model_max_async=2,
+        # Chunking
+        chunk_token_size=1200,
+        chunk_overlap_token_size=100,
+        # PostgreSQL-backed storages
+        graph_storage="PGGraphStorage",
+        vector_storage="PGVectorStorage",
+        doc_status_storage="PGDocStatusStorage",
+        kv_storage="PGKVStorage",
+    )
+
+    # REQUIRED: initialize all storage backends
+    await rag.initialize_storages()
+    return rag
+
+
+# --------------------------------------------------
+# Main
+# --------------------------------------------------
+async def main():
+    rag = None
+    try:
+        print("Initializing LightRAG with PostgreSQL + Gemini...")
+        rag = await initialize_rag()
+
+        if not os.path.exists(BOOK_FILE):
+            raise FileNotFoundError(
+                f"'{BOOK_FILE}' not found. Please provide a text file to index."
+            )
+
+        print(f"\nReading document: {BOOK_FILE}")
+        with open(BOOK_FILE, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        print(f"Loaded document ({len(content)} characters)")
+
+        print("\nInserting document into LightRAG (this may take some time)...")
+        await rag.ainsert(content)
+        print("Document indexed successfully!")
+
+        print("\n" + "=" * 60)
+        print("Running sample queries")
+        print("=" * 60)
+
+        query = "What are the top themes in this document?"
+
+        for mode in ["naive", "local", "global", "hybrid"]:
+            print(f"\n[{mode.upper()} MODE]")
+            result = await rag.aquery(query, param=QueryParam(mode=mode))
+            print(result[:400] + "..." if len(result) > 400 else result)
+
+        print("\nRAG system is ready for use!")
+
+    except Exception as e:
+        print("An error occurred:", e)
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        if rag is not None:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 131 - 0
examples/lightrag_gemini_workspace_demo.py

@@ -0,0 +1,131 @@
+"""
+LightRAG Data Isolation Demo: Workspace Management
+
+This example demonstrates how to maintain multiple isolated knowledge bases
+within a single application using LightRAG's 'workspace' feature.
+
+Key Concepts:
+- Workspace Isolation: Each RAG instance is assigned a unique workspace name,
+  which ensures that Knowledge Graphs, Vector Databases, and Chunks are
+  stored in separate, non-conflicting directories.
+- Independent Configuration: Different workspaces can utilize different
+  entity type guidance and document sets simultaneously.
+
+Prerequisites:
+1. Set the following environment variables:
+   - GEMINI_API_KEY: Your Google Gemini API key.
+2. Ensure your data directory contains:
+   - Data/book-small.txt
+   - Data/HR_policies.txt
+
+Usage:
+    python lightrag_workspace_demo.py
+"""
+
+import os
+import asyncio
+import numpy as np
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.gemini import gemini_model_complete, gemini_embed
+from lightrag.utils import wrap_embedding_func_with_attrs
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    """Wrapper for Gemini LLM completion."""
+    return await gemini_model_complete(
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("GEMINI_API_KEY"),
+        model_name="gemini-2.0-flash-exp",
+        **kwargs,
+    )
+
+
+@wrap_embedding_func_with_attrs(
+    embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004"
+)
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    """Wrapper for Gemini embedding model."""
+    return await gemini_embed.func(
+        texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004"
+    )
+
+
+async def initialize_rag(
+    workspace: str = "default_workspace",
+) -> LightRAG:
+    """
+    Initializes a LightRAG instance with data isolation.
+
+    Entity type guidance can be customized by passing
+    addon_params={'entity_types_guidance': '...'} to LightRAG.
+    """
+
+    rag = LightRAG(
+        workspace=workspace,
+        llm_model_name="gemini-2.0-flash",
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func,
+        embedding_func_max_async=4,
+        embedding_batch_num=8,
+        llm_model_max_async=2,
+    )
+
+    await rag.initialize_storages()
+    return rag
+
+
+async def main():
+    rag_1 = None
+    rag_2 = None
+    try:
+        # 1. Initialize Isolated Workspaces
+        # Instance 1: Dedicated to literary analysis
+        # Instance 2: Dedicated to corporate HR documentation
+        print("Initializing isolated LightRAG workspaces...")
+        rag_1 = await initialize_rag("rag_workspace_book")
+        rag_2 = await initialize_rag("rag_workspace_hr")
+
+        # 2. Populate Workspace 1 (Literature)
+        book_path = "Data/book-small.txt"
+        if os.path.exists(book_path):
+            with open(book_path, "r", encoding="utf-8") as f:
+                print(f"Indexing {book_path} into Literature Workspace...")
+                await rag_1.ainsert(f.read())
+
+        # 3. Populate Workspace 2 (Corporate)
+        hr_path = "Data/HR_policies.txt"
+        if os.path.exists(hr_path):
+            with open(hr_path, "r", encoding="utf-8") as f:
+                print(f"Indexing {hr_path} into HR Workspace...")
+                await rag_2.ainsert(f.read())
+
+        # 4. Context-Specific Querying
+        print("\n--- Querying Literature Workspace ---")
+        res1 = await rag_1.aquery(
+            "What is the main theme?",
+            param=QueryParam(mode="hybrid", stream=False),
+        )
+        print(f"Book Analysis: {res1[:200]}...")
+
+        print("\n--- Querying HR Workspace ---")
+        res2 = await rag_2.aquery(
+            "What is the leave policy?", param=QueryParam(mode="hybrid")
+        )
+        print(f"HR Response: {res2[:200]}...")
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        # Finalize storage to safely close DB connections and write buffers
+        if rag_1:
+            await rag_1.finalize_storages()
+        if rag_2:
+            await rag_2.finalize_storages()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 219 - 0
examples/lightrag_ollama_demo.py

@@ -0,0 +1,219 @@
+import asyncio
+import os
+import inspect
+import logging
+import logging.config
+from functools import partial
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.ollama import ollama_model_complete, ollama_embed
+from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=".env", override=False)
+
+WORKING_DIR = "./dickens"
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_ollama_demo.log"))
+
+    print(f"\nLightRAG compatible demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=ollama_model_complete,
+        llm_model_name=os.getenv("LLM_MODEL", "qwen2.5-coder:7b"),
+        summary_max_tokens=8192,
+        llm_model_kwargs={
+            "host": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"),
+            "options": {"num_ctx": 8192},
+            "timeout": int(os.getenv("TIMEOUT", "300")),
+        },
+        # Note: ollama_embed is decorated with @wrap_embedding_func_with_attrs,
+        # which wraps it in an EmbeddingFunc. Using .func accesses the original
+        # unwrapped function to avoid double wrapping when we create our own
+        # EmbeddingFunc with custom configuration (embedding_dim, max_token_size).
+        embedding_func=EmbeddingFunc(
+            embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
+            max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")),
+            func=partial(
+                ollama_embed.func,  # Access the unwrapped function to avoid double EmbeddingFunc wrapping
+                embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"),
+                host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"),
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def print_stream(stream):
+    async for chunk in stream:
+        print(chunk, end="", flush=True)
+
+
+async def main():
+    try:
+        # Clear old data files
+        files_to_delete = [
+            "graph_chunk_entity_relation.graphml",
+            "kv_store_doc_status.json",
+            "kv_store_full_docs.json",
+            "kv_store_text_chunks.json",
+            "vdb_chunks.json",
+            "vdb_entities.json",
+            "vdb_relationships.json",
+        ]
+
+        for file in files_to_delete:
+            file_path = os.path.join(WORKING_DIR, file)
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Deleting old file:: {file_path}")
+
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # Test embedding function
+        test_text = ["This is a test string for embedding."]
+        embedding = await rag.embedding_func(test_text)
+        embedding_dim = embedding.shape[1]
+        print("\n=======================")
+        print("Test embedding function")
+        print("========================")
+        print(f"Test dict: {test_text}")
+        print(f"Detected embedding dimension: {embedding_dim}\n\n")
+
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("\n=====================")
+        print("Query mode: naive")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="naive", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform local search
+        print("\n=====================")
+        print("Query mode: local")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="local", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform global search
+        print("\n=====================")
+        print("Query mode: global")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="global", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform hybrid search
+        print("\n=====================")
+        print("Query mode: hybrid")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="hybrid", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.llm_response_cache.index_done_callback()
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(main())
+    print("\nDone!")

+ 229 - 0
examples/lightrag_openai_compatible_demo.py

@@ -0,0 +1,229 @@
+import os
+import asyncio
+import inspect
+import logging
+import logging.config
+from functools import partial
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import openai_complete_if_cache
+from lightrag.llm.ollama import ollama_embed
+from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=".env", override=False)
+
+WORKING_DIR = "./dickens"
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(
+        os.path.join(log_dir, "lightrag_compatible_demo.log")
+    )
+
+    print(f"\nLightRAG compatible demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_dir), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        os.getenv("LLM_MODEL", "deepseek-chat"),
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("LLM_BINDING_API_KEY") or os.getenv("OPENAI_API_KEY"),
+        base_url=os.getenv("LLM_BINDING_HOST", "https://api.deepseek.com"),
+        **kwargs,
+    )
+
+
+async def print_stream(stream):
+    async for chunk in stream:
+        if chunk:
+            print(chunk, end="", flush=True)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        # Note: ollama_embed is decorated with @wrap_embedding_func_with_attrs,
+        # which wraps it in an EmbeddingFunc. Using .func accesses the original
+        # unwrapped function to avoid double wrapping when we create our own
+        # EmbeddingFunc with custom configuration (embedding_dim, max_token_size).
+        embedding_func=EmbeddingFunc(
+            embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
+            max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")),
+            func=partial(
+                ollama_embed.func,  # Access the unwrapped function to avoid double EmbeddingFunc wrapping
+                embed_model=os.getenv("EMBEDDING_MODEL", "bge-m3:latest"),
+                host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"),
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def main():
+    try:
+        # Clear old data files
+        files_to_delete = [
+            "graph_chunk_entity_relation.graphml",
+            "kv_store_doc_status.json",
+            "kv_store_full_docs.json",
+            "kv_store_text_chunks.json",
+            "vdb_chunks.json",
+            "vdb_entities.json",
+            "vdb_relationships.json",
+        ]
+
+        for file in files_to_delete:
+            file_path = os.path.join(WORKING_DIR, file)
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Deleting old file:: {file_path}")
+
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # Test embedding function
+        test_text = ["This is a test string for embedding."]
+        embedding = await rag.embedding_func(test_text)
+        embedding_dim = embedding.shape[1]
+        print("\n=======================")
+        print("Test embedding function")
+        print("========================")
+        print(f"Test dict: {test_text}")
+        print(f"Detected embedding dimension: {embedding_dim}\n\n")
+
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("\n=====================")
+        print("Query mode: naive")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="naive", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform local search
+        print("\n=====================")
+        print("Query mode: local")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="local", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform global search
+        print("\n=====================")
+        print("Query mode: global")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="global", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform hybrid search
+        print("\n=====================")
+        print("Query mode: hybrid")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="hybrid", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(main())
+    print("\nDone!")

+ 187 - 0
examples/lightrag_openai_demo.py

@@ -0,0 +1,187 @@
+import os
+import asyncio
+import logging
+import logging.config
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+from lightrag.utils import logger, set_verbose_debug
+
+WORKING_DIR = "./dickens"
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_demo.log"))
+
+    print(f"\nLightRAG demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_dir), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        embedding_func=openai_embed,
+        llm_model_func=gpt_4o_mini_complete,
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+
+    return rag
+
+
+async def main():
+    # Check if OPENAI_API_KEY environment variable exists
+    if not os.getenv("OPENAI_API_KEY"):
+        print(
+            "Error: OPENAI_API_KEY environment variable is not set. Please set this variable before running the program."
+        )
+        print("You can set the environment variable by running:")
+        print("  export OPENAI_API_KEY='your-openai-api-key'")
+        return  # Exit the async function
+
+    try:
+        # Clear old data files
+        files_to_delete = [
+            "graph_chunk_entity_relation.graphml",
+            "kv_store_doc_status.json",
+            "kv_store_full_docs.json",
+            "kv_store_text_chunks.json",
+            "vdb_chunks.json",
+            "vdb_entities.json",
+            "vdb_relationships.json",
+        ]
+
+        for file in files_to_delete:
+            file_path = os.path.join(WORKING_DIR, file)
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Deleting old file:: {file_path}")
+
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # Test embedding function
+        test_text = ["This is a test string for embedding."]
+        embedding = await rag.embedding_func(test_text)
+        embedding_dim = embedding.shape[1]
+        print("\n=======================")
+        print("Test embedding function")
+        print("========================")
+        print(f"Test dict: {test_text}")
+        print(f"Detected embedding dimension: {embedding_dim}\n\n")
+
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("\n=====================")
+        print("Query mode: naive")
+        print("=====================")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?", param=QueryParam(mode="naive")
+            )
+        )
+
+        # Perform local search
+        print("\n=====================")
+        print("Query mode: local")
+        print("=====================")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?", param=QueryParam(mode="local")
+            )
+        )
+
+        # Perform global search
+        print("\n=====================")
+        print("Query mode: global")
+        print("=====================")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="global"),
+            )
+        )
+
+        # Perform hybrid search
+        print("\n=====================")
+        print("Query mode: hybrid")
+        print("=====================")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="hybrid"),
+            )
+        )
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(main())
+    print("\nDone!")

+ 108 - 0
examples/lightrag_openai_mongodb_graph_demo.py

@@ -0,0 +1,108 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+
+#########
+# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
+# import nest_asyncio
+# nest_asyncio.apply()
+#########
+WORKING_DIR = "./mongodb_test_dir"
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+os.environ["OPENAI_API_KEY"] = "sk-"
+os.environ["MONGO_URI"] = "mongodb://0.0.0.0:27017/?directConnection=true"
+os.environ["MONGO_DATABASE"] = "LightRAG"
+os.environ["MONGO_KG_COLLECTION"] = "MDB_KG"
+
+# Embedding Configuration and Functions
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    # Note: openai_embed is decorated with @wrap_embedding_func_with_attrs,
+    # which wraps it in an EmbeddingFunc. Using .func accesses the original
+    # unwrapped function to avoid double wrapping when we create our own
+    # EmbeddingFunc with custom configuration in create_embedding_function_instance().
+    return await openai_embed.func(
+        texts,
+        model=EMBEDDING_MODEL,
+    )
+
+
+async def get_embedding_dimension():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    return embedding.shape[1]
+
+
+async def create_embedding_function_instance():
+    # Get embedding dimension
+    embedding_dimension = await get_embedding_dimension()
+    # Create embedding function instance
+    return EmbeddingFunc(
+        embedding_dim=embedding_dimension,
+        max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+        func=embedding_func,
+    )
+
+
+async def initialize_rag():
+    embedding_func_instance = await create_embedding_function_instance()
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,
+        embedding_func=embedding_func_instance,
+        graph_storage="MongoGraphStorage",
+        log_level="DEBUG",
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Perform naive search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    # Perform local search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    # Perform global search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    # Perform hybrid search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 178 - 0
examples/lightrag_openai_opensearch_graph_demo.py

@@ -0,0 +1,178 @@
+"""
+LightRAG Demo with OpenSearch + OpenAI
+
+This example demonstrates how to use LightRAG with:
+- OpenAI (LLM + Embeddings)
+- OpenSearch-backed storages for:
+  - KV storage
+  - Vector storage (k-NN)
+  - Graph storage (dual-index nodes + edges)
+  - Document status storage
+
+Prerequisites:
+1. OpenSearch cluster running and accessible (3.x or higher with k-NN plugin)
+2. Required indices will be auto-created by LightRAG
+3. Set environment variables (example .env):
+
+   OPENSEARCH_HOSTS=localhost:9200
+   OPENSEARCH_USER=admin
+   OPENSEARCH_PASSWORD=your-password
+   OPENSEARCH_USE_SSL=false
+   OPENSEARCH_VERIFY_CERTS=false
+
+   OPENAI_API_KEY=your-api-key
+
+4. Prepare a text file to index (default: ./book.txt)
+
+Usage:
+    python examples/lightrag_openai_opensearch_graph_demo.py
+"""
+
+import os
+import asyncio
+import numpy as np
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
+from lightrag.utils import setup_logger, EmbeddingFunc
+
+
+# --------------------------------------------------
+# Logger
+# --------------------------------------------------
+setup_logger("lightrag", level="INFO")
+
+
+# --------------------------------------------------
+# Config
+# --------------------------------------------------
+WORKING_DIR = "./opensearch_rag_storage"
+BOOK_FILE = "./book.txt"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+# Replace with your API key, or set via environment variable
+if not os.getenv("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = "sk-"
+
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+
+
+# --------------------------------------------------
+# Embedding function (OpenAI)
+# --------------------------------------------------
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await openai_embed.func(
+        texts,
+        model=EMBEDDING_MODEL,
+    )
+
+
+async def get_embedding_dimension():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    return embedding.shape[1]
+
+
+async def create_embedding_function_instance():
+    embedding_dimension = await get_embedding_dimension()
+    return EmbeddingFunc(
+        embedding_dim=embedding_dimension,
+        max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+        func=embedding_func,
+    )
+
+
+# --------------------------------------------------
+# Initialize RAG with OpenSearch storages
+# --------------------------------------------------
+async def initialize_rag() -> LightRAG:
+    embedding_func_instance = await create_embedding_function_instance()
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=gpt_4o_mini_complete,
+        embedding_func=embedding_func_instance,
+        # OpenSearch-backed storages
+        kv_storage="OpenSearchKVStorage",
+        doc_status_storage="OpenSearchDocStatusStorage",
+        graph_storage="OpenSearchGraphStorage",
+        vector_storage="OpenSearchVectorDBStorage",
+    )
+
+    # REQUIRED: initialize all storage backends
+    await rag.initialize_storages()
+
+    # Clean previous data so the example is re-runnable
+    # (LLM response cache is preserved for faster reruns)
+    for storage in [
+        rag.full_docs,
+        rag.text_chunks,
+        rag.full_entities,
+        rag.full_relations,
+        rag.entity_chunks,
+        rag.relation_chunks,
+        rag.entities_vdb,
+        rag.relationships_vdb,
+        rag.chunks_vdb,
+        rag.chunk_entity_relation_graph,
+        rag.doc_status,
+    ]:
+        await storage.drop()
+    print("Cleared previous data.")
+
+    return rag
+
+
+# --------------------------------------------------
+# Main
+# --------------------------------------------------
+async def main():
+    rag = None
+    try:
+        print("Initializing LightRAG with OpenSearch + OpenAI...")
+        rag = await initialize_rag()
+
+        if not os.path.exists(BOOK_FILE):
+            raise FileNotFoundError(
+                f"'{BOOK_FILE}' not found. Please provide a text file to index."
+            )
+
+        print(f"\nReading document: {BOOK_FILE}")
+        with open(BOOK_FILE, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        print(f"Loaded document ({len(content)} characters)")
+
+        print("\nInserting document into LightRAG (this may take some time)...")
+        await rag.ainsert(content)
+        print("Document indexed successfully!")
+
+        print("\n" + "=" * 60)
+        print("Running sample queries")
+        print("=" * 60)
+
+        query = "What are the top themes in this document?"
+
+        for mode in ["naive", "local", "global", "hybrid"]:
+            print(f"\n[{mode.upper()} MODE]")
+            result = await rag.aquery(query, param=QueryParam(mode=mode))
+            print(result)
+
+        print("\nRAG system is ready for use!")
+
+    except Exception as e:
+        print("An error occurred:", e)
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        if rag is not None:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 180 - 0
examples/lightrag_vllm_demo.py

@@ -0,0 +1,180 @@
+"""
+LightRAG Demo with vLLM (LLM, Embeddings, and Reranker)
+
+This example demonstrates how to use LightRAG with:
+- vLLM-served LLM (OpenAI-compatible API)
+- vLLM-served embedding model
+- Jina-compatible reranker (also vLLM-served)
+
+Prerequisites:
+    1. Create a .env file or export environment variables:
+       - LLM_MODEL
+       - LLM_BINDING_HOST
+       - LLM_BINDING_API_KEY
+       - EMBEDDING_MODEL
+       - EMBEDDING_BINDING_HOST
+       - EMBEDDING_BINDING_API_KEY
+       - EMBEDDING_DIM
+       - EMBEDDING_TOKEN_LIMIT
+       - RERANK_MODEL
+       - RERANK_BINDING_HOST
+       - RERANK_BINDING_API_KEY
+
+    2. Prepare a text file to index (default: Data/book-small.txt)
+
+    3. Configure storage backends via environment variables or modify
+       the storage parameters in initialize_rag() below.
+
+Usage:
+    python examples/lightrag_vllm_demo.py
+"""
+
+import os
+import asyncio
+from functools import partial
+from dotenv import load_dotenv
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+from lightrag.utils import EmbeddingFunc
+from lightrag.rerank import jina_rerank
+
+load_dotenv()
+
+# --------------------------------------------------
+# Constants
+# --------------------------------------------------
+
+WORKING_DIR = "./LightRAG_Data"
+BOOK_FILE = "Data/book-small.txt"
+
+# --------------------------------------------------
+# LLM function (vLLM, OpenAI-compatible)
+# --------------------------------------------------
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        model=os.getenv("LLM_MODEL", "Qwen/Qwen3-14B-AWQ"),
+        prompt=prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        base_url=os.getenv("LLM_BINDING_HOST", "http://0.0.0.0:4646/v1"),
+        api_key=os.getenv("LLM_BINDING_API_KEY", "not_needed"),
+        timeout=600,
+        **kwargs,
+    )
+
+
+# --------------------------------------------------
+# Embedding function (vLLM)
+# --------------------------------------------------
+
+vLLM_emb_func = EmbeddingFunc(
+    model_name=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"),
+    send_dimensions=False,
+    embedding_dim=int(os.getenv("EMBEDDING_DIM", 1024)),
+    max_token_size=int(os.getenv("EMBEDDING_TOKEN_LIMIT", 4096)),
+    func=partial(
+        openai_embed.func,
+        model=os.getenv("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B"),
+        base_url=os.getenv(
+            "EMBEDDING_BINDING_HOST",
+            "http://0.0.0.0:1234/v1",
+        ),
+        api_key=os.getenv("EMBEDDING_BINDING_API_KEY", "not_needed"),
+    ),
+)
+
+# --------------------------------------------------
+# Reranker (Jina-compatible, vLLM-served)
+# --------------------------------------------------
+
+jina_rerank_model_func = partial(
+    jina_rerank,
+    model=os.getenv("RERANK_MODEL", "Qwen/Qwen3-Reranker-0.6B"),
+    api_key=os.getenv("RERANK_BINDING_API_KEY"),
+    base_url=os.getenv(
+        "RERANK_BINDING_HOST",
+        "http://0.0.0.0:3535/v1/rerank",
+    ),
+)
+
+# --------------------------------------------------
+# Initialize RAG
+# --------------------------------------------------
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=vLLM_emb_func,
+        rerank_model_func=jina_rerank_model_func,
+        # Storage backends (configurable via environment or modify here)
+        kv_storage=os.getenv("KV_STORAGE", "PGKVStorage"),
+        doc_status_storage=os.getenv("DOC_STATUS_STORAGE", "PGDocStatusStorage"),
+        vector_storage=os.getenv("VECTOR_STORAGE", "PGVectorStorage"),
+        graph_storage=os.getenv("GRAPH_STORAGE", "Neo4JStorage"),
+    )
+
+    await rag.initialize_storages()
+    return rag
+
+
+# --------------------------------------------------
+# Main
+# --------------------------------------------------
+
+
+async def main():
+    rag = None
+    try:
+        # Validate book file exists
+        if not os.path.exists(BOOK_FILE):
+            raise FileNotFoundError(
+                f"'{BOOK_FILE}' not found. Please provide a text file to index."
+            )
+
+        rag = await initialize_rag()
+
+        # --------------------------------------------------
+        # Data Ingestion
+        # --------------------------------------------------
+        print(f"Indexing {BOOK_FILE}...")
+        with open(BOOK_FILE, "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+        print("Indexing complete.")
+
+        # --------------------------------------------------
+        # Query
+        # --------------------------------------------------
+        query = (
+            "What are the main themes of the book, and how do the key characters "
+            "evolve throughout the story?"
+        )
+
+        print("\nHybrid Search with Reranking:")
+        result = await rag.aquery(
+            query,
+            param=QueryParam(
+                mode="hybrid",
+                stream=False,
+                enable_rerank=True,
+            ),
+        )
+
+        print("\nResult:\n", result)
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+    print("\nDone!")

+ 113 - 0
examples/milvus_kwargs_configuration_demo.py

@@ -0,0 +1,113 @@
+"""
+Example: Configuring Milvus Index Parameters via vector_db_storage_cls_kwargs
+
+This example demonstrates how to configure Milvus indexing parameters through
+vector_db_storage_cls_kwargs, which is the recommended approach when using
+frameworks that build on top of LightRAG (like RAGAnything).
+
+This approach allows configuration to be passed through framework layers without
+requiring environment variable changes or direct code modifications.
+"""
+
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+
+
+async def main():
+    # Configure Milvus connection
+    os.environ["MILVUS_URI"] = "http://localhost:19530"
+    # os.environ["MILVUS_USER"] = "root"
+    # os.environ["MILVUS_PASSWORD"] = "your_password"
+    # os.environ["MILVUS_DB_NAME"] = "lightrag"
+
+    # Initialize LightRAG with Milvus index configuration via vector_db_storage_cls_kwargs
+    # This is the recommended approach for framework integration (e.g., RAGAnything)
+    rag = LightRAG(
+        working_dir="./demo_index",
+        llm_model_func=openai_complete_if_cache,
+        embedding_func=openai_embed,
+        # Specify Milvus as the vector storage backend
+        vector_storage="MilvusVectorDBStorage",
+        # Configure Milvus indexing parameters via vector_db_storage_cls_kwargs
+        # These parameters are extracted and passed to MilvusIndexConfig
+        vector_db_storage_cls_kwargs={
+            # Required parameter for all vector storage backends
+            "cosine_better_than_threshold": 0.2,
+            # Milvus index configuration parameters
+            # All of these can be configured via vector_db_storage_cls_kwargs
+            # Index type (AUTOINDEX, HNSW, HNSW_SQ, IVF_FLAT, etc.)
+            "index_type": "HNSW",
+            # Distance metric (COSINE, L2, IP)
+            "metric_type": "COSINE",
+            # HNSW parameters
+            "hnsw_m": 32,  # Number of connections per layer (2-2048)
+            "hnsw_ef_construction": 256,  # Size of dynamic candidate list during construction
+            "hnsw_ef": 150,  # Size of dynamic candidate list during search
+            # IVF parameters (used when index_type is IVF_FLAT, IVF_SQ8, IVF_PQ)
+            # "ivf_nlist": 2048,              # Number of cluster units
+            # "ivf_nprobe": 32,               # Number of units to query
+            # HNSW_SQ parameters (requires Milvus 2.6.8+)
+            # "sq_type": "SQ8",               # Quantization type (SQ4U, SQ6, SQ8, BF16, FP16)
+            # "sq_refine": True,              # Enable refinement
+            # "sq_refine_type": "FP32",       # Refinement type
+            # "sq_refine_k": 20,              # Number of candidates to refine
+        },
+    )
+
+    # Initialize storage backends
+    await rag.initialize_storages()
+
+    print(
+        "✅ LightRAG initialized with Milvus index configuration via vector_db_storage_cls_kwargs"
+    )
+    print(
+        f"   Index Type: {rag.vector_db_storages['entities'].index_config.index_type}"
+    )
+    print(
+        f"   Metric Type: {rag.vector_db_storages['entities'].index_config.metric_type}"
+    )
+    print(f"   HNSW M: {rag.vector_db_storages['entities'].index_config.hnsw_m}")
+    print(
+        f"   HNSW EF Construction: {rag.vector_db_storages['entities'].index_config.hnsw_ef_construction}"
+    )
+    print(f"   HNSW EF: {rag.vector_db_storages['entities'].index_config.hnsw_ef}")
+
+    # Example: Insert some text
+    sample_text = """
+    LightRAG is a Retrieval-Augmented Generation framework that uses graph-based
+    knowledge representation for enhanced information retrieval. It supports multiple
+    vector storage backends including Milvus, which offers advanced indexing options
+    for optimal performance.
+    """
+
+    await rag.ainsert(sample_text)
+    print("\n✅ Sample text inserted")
+
+    # Example: Query with different modes
+    result = await rag.aquery("What is LightRAG?", param=QueryParam(mode="hybrid"))
+    print(f"\n✅ Query result: {result[:200]}...")
+
+    # Cleanup
+    await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    print("=" * 80)
+    print("Milvus Configuration via vector_db_storage_cls_kwargs Example")
+    print("=" * 80)
+    print()
+    print("This example shows how to configure Milvus indexing parameters through")
+    print("vector_db_storage_cls_kwargs, which is ideal for framework integration.")
+    print()
+    print("Key Benefits:")
+    print("  • No environment variable changes required")
+    print("  • Configuration can be passed through framework layers")
+    print("  • Perfect for RAGAnything and similar frameworks")
+    print("  • All 11 index parameters are supported")
+    print()
+    print("=" * 80)
+    print()
+
+    asyncio.run(main())

+ 355 - 0
examples/opensearch_storage_demo.py

@@ -0,0 +1,355 @@
+"""
+Integration test for OpenSearch Storage in LightRAG.
+
+Tests all 4 storage types against a live OpenSearch cluster:
+- KV Storage: CRUD, filter_keys
+- DocStatus Storage: CRUD, pagination (PIT + search_after), status counts
+- Graph Storage: nodes, edges, BFS traversal, search_labels
+- Vector Storage: k-NN upsert, query, get/delete
+
+Prerequisites:
+    OpenSearch cluster running with k-NN plugin enabled.
+    Set env vars: OPENSEARCH_HOSTS, OPENSEARCH_USER, OPENSEARCH_PASSWORD,
+                  OPENSEARCH_USE_SSL, OPENSEARCH_VERIFY_CERTS
+
+Usage:
+    OPENSEARCH_HOSTS=localhost:9200 OPENSEARCH_USER=admin \
+    OPENSEARCH_PASSWORD=<password> OPENSEARCH_USE_SSL=true \
+    OPENSEARCH_VERIFY_CERTS=false python examples/opensearch_storage_demo.py
+"""
+
+import asyncio
+import numpy as np
+from lightrag.kg.opensearch_impl import (
+    OpenSearchKVStorage,
+    OpenSearchDocStatusStorage,
+    OpenSearchGraphStorage,
+    OpenSearchVectorDBStorage,
+    ClientManager,
+)
+from lightrag.kg.shared_storage import initialize_share_data
+from lightrag.base import DocStatus
+
+
+class MockEmbeddingFunc:
+    """Mock embedding function for testing."""
+
+    def __init__(self, dim=128):
+        self.embedding_dim = dim
+        self.max_token_size = 512
+        self.model_name = "mock-embedding"
+
+    async def __call__(self, texts, **kwargs):
+        return np.random.rand(len(texts), self.embedding_dim).astype(np.float32)
+
+
+CONFIG = {
+    "embedding_batch_num": 10,
+    "max_graph_nodes": 1000,
+    "vector_db_storage_cls_kwargs": {"cosine_better_than_threshold": 0.2},
+}
+EMBED = MockEmbeddingFunc()
+PASSED = 0
+FAILED = 0
+
+
+def check(condition, msg):
+    global PASSED, FAILED
+    if condition:
+        print(f"  ✓ {msg}")
+        PASSED += 1
+    else:
+        print(f"  ✗ {msg}")
+        FAILED += 1
+
+
+async def test_connection_manager():
+    print("\n=== Connection Manager ===")
+    client1 = await ClientManager.get_client()
+    client2 = await ClientManager.get_client()
+    check(client1 is client2, "Singleton pattern (same instance)")
+    await ClientManager.release_client(client1)
+    await ClientManager.release_client(client2)
+    check(True, "Released clients")
+
+
+async def test_kv_storage():
+    print("\n=== KV Storage ===")
+    s = OpenSearchKVStorage(
+        namespace="integ_kv",
+        global_config=CONFIG,
+        embedding_func=EMBED,
+        workspace="integ",
+    )
+    await s.initialize()
+    try:
+        await s.upsert({"k1": {"content": "hello"}, "k2": {"content": "world"}})
+        await s.index_done_callback()
+
+        doc = await s.get_by_id("k1")
+        check(doc is not None and doc.get("content") == "hello", "get_by_id")
+
+        docs = await s.get_by_ids(["k1", "k2", "missing"])
+        check(docs[0] is not None and docs[2] is None, "get_by_ids preserves order")
+
+        missing = await s.filter_keys({"k1", "k99"})
+        check(missing == {"k99"}, f"filter_keys: {missing}")
+
+        check(not await s.is_empty(), "is_empty=False")
+
+        await s.delete(["k2"])
+        await s.index_done_callback()
+        check(await s.get_by_id("k2") is None, "delete + verify")
+    finally:
+        await s.drop()
+        await s.finalize()
+
+
+async def test_doc_status_storage():
+    print("\n=== DocStatus Storage ===")
+    s = OpenSearchDocStatusStorage(
+        namespace="integ_ds",
+        global_config=CONFIG,
+        embedding_func=EMBED,
+        workspace="integ",
+    )
+    await s.initialize()
+    try:
+        # Insert docs
+        await s.upsert(
+            {
+                f"d{i}": {
+                    "status": "processed" if i % 2 == 0 else "pending",
+                    "file_path": f"/file{i}.txt",
+                    "content_summary": f"summary {i}",
+                    "content_length": i * 10,
+                    "chunks_count": i,
+                    "created_at": 1000 + i,
+                    "updated_at": 2000 + i,
+                }
+                for i in range(20)
+            }
+        )
+        await s.index_done_callback()
+
+        # Status counts
+        counts = await s.get_all_status_counts()
+        check(counts.get("all") == 20, f"all_status_counts: {counts}")
+        check(
+            counts.get("processed") == 10, f"processed count: {counts.get('processed')}"
+        )
+
+        # get_docs_by_status (uses PIT + search_after)
+        processed = await s.get_docs_by_status(DocStatus.PROCESSED)
+        check(len(processed) == 10, f"get_docs_by_status(processed): {len(processed)}")
+
+        # get_docs_by_track_id (uses PIT + search_after)
+        await s.upsert(
+            {
+                "tracked1": {
+                    "status": "processed",
+                    "file_path": "/t.txt",
+                    "content_summary": "s",
+                    "content_length": 1,
+                    "chunks_count": 1,
+                    "created_at": 100,
+                    "updated_at": 200,
+                    "track_id": "batch-42",
+                }
+            }
+        )
+        await s.index_done_callback()
+        tracked = await s.get_docs_by_track_id("batch-42")
+        check(len(tracked) == 1, f"get_docs_by_track_id: {len(tracked)}")
+
+        # Paginated (uses PIT + search_after)
+        page1, total = await s.get_docs_paginated(page=1, page_size=10)
+        check(total == 21, f"paginated total: {total}")
+        check(len(page1) == 10, f"page1 size: {len(page1)}")
+
+        page2, _ = await s.get_docs_paginated(page=2, page_size=10)
+        check(len(page2) == 10, f"page2 size: {len(page2)}")
+
+        page3, _ = await s.get_docs_paginated(page=3, page_size=10)
+        check(len(page3) == 1, f"page3 size: {len(page3)}")
+
+        # With status filter
+        filtered, ftotal = await s.get_docs_paginated(
+            status_filter=DocStatus.PENDING, page=1, page_size=50
+        )
+        check(ftotal == 10, f"filtered total: {ftotal}")
+
+        # get_doc_by_file_path
+        doc = await s.get_doc_by_file_path("/file0.txt")
+        check(doc is not None and doc["_id"] == "d0", "get_doc_by_file_path")
+    finally:
+        await s.drop()
+        await s.finalize()
+
+
+async def test_graph_storage():
+    print("\n=== Graph Storage ===")
+    s = OpenSearchGraphStorage(
+        namespace="integ_graph",
+        global_config=CONFIG,
+        embedding_func=EMBED,
+        workspace="integ",
+    )
+    await s.initialize()
+    try:
+        # Upsert nodes and edges
+        await s.upsert_node(
+            "Alice", {"entity_type": "person", "description": "A researcher"}
+        )
+        await s.upsert_node(
+            "Bob", {"entity_type": "person", "description": "A developer"}
+        )
+        await s.upsert_node(
+            "Quantum", {"entity_type": "topic", "description": "Quantum computing"}
+        )
+        await s.upsert_edge(
+            "Alice",
+            "Bob",
+            {"relationship": "knows", "weight": "1.0", "keywords": "collab"},
+        )
+        await s.upsert_edge(
+            "Alice",
+            "Quantum",
+            {"relationship": "researches", "weight": "2.0", "keywords": "research"},
+        )
+        await s.upsert_edge(
+            "Bob",
+            "Quantum",
+            {"relationship": "studies", "weight": "0.5", "keywords": "learning"},
+        )
+        await s.index_done_callback()
+
+        check(await s.has_node("Alice"), "has_node(Alice)")
+        check(not await s.has_node("Nobody"), "has_node(Nobody)=False")
+        check(await s.has_edge("Alice", "Bob"), "has_edge(Alice,Bob)")
+
+        node = await s.get_node("Alice")
+        check(node is not None and node.get("entity_type") == "person", "get_node")
+        check(node.get("entity_id") == "Alice", "entity_id field present")
+
+        check(
+            await s.node_degree("Alice") == 2,
+            f"node_degree(Alice)={await s.node_degree('Alice')}",
+        )
+
+        edges = await s.get_node_edges("Alice")
+        check(len(edges) == 2, f"get_node_edges: {len(edges)}")
+
+        # Batch ops
+        batch = await s.get_nodes_batch(["Alice", "Bob", "Missing"])
+        check("Alice" in batch and "Missing" not in batch, "get_nodes_batch")
+
+        degrees = await s.node_degrees_batch(["Alice", "Bob", "Quantum"])
+        check(degrees.get("Alice") == 2, f"node_degrees_batch: {degrees}")
+
+        # Knowledge graph (BFS)
+        kg = await s.get_knowledge_graph("Alice", max_depth=2)
+        check(len(kg.nodes) == 3, f"BFS nodes: {len(kg.nodes)}")
+        check(len(kg.edges) == 3, f"BFS edges: {len(kg.edges)}")
+
+        # get_all_labels (uses PIT)
+        labels = await s.get_all_labels()
+        check("Alice" in labels and "Bob" in labels, f"get_all_labels: {labels}")
+
+        # get_all_nodes (uses PIT)
+        all_nodes = await s.get_all_nodes()
+        check(len(all_nodes) == 3, f"get_all_nodes: {len(all_nodes)}")
+
+        # get_all_edges (uses PIT)
+        all_edges = await s.get_all_edges()
+        check(len(all_edges) == 3, f"get_all_edges: {len(all_edges)}")
+
+        # search_labels
+        found = await s.search_labels("ali", limit=10)
+        check("Alice" in found, f"search_labels('ali'): {found}")
+
+        # popular_labels
+        popular = await s.get_popular_labels(limit=10)
+        check(len(popular) > 0, f"get_popular_labels: {popular}")
+
+        # Delete node (cascading)
+        await s.delete_node("Bob")
+        await s.index_done_callback()
+        check(not await s.has_node("Bob"), "delete_node cascade")
+        check(not await s.has_edge("Alice", "Bob"), "edges removed after delete_node")
+
+        print(f"  (PPL graphlookup: {s._ppl_graphlookup_available})")
+    finally:
+        await s.drop()
+        await s.finalize()
+
+
+async def test_vector_storage():
+    print("\n=== Vector Storage ===")
+    s = OpenSearchVectorDBStorage(
+        namespace="integ_vec",
+        global_config=CONFIG,
+        embedding_func=EMBED,
+        workspace="integ",
+        meta_fields={"content", "entity_name"},
+    )
+    await s.initialize()
+    try:
+        await s.upsert(
+            {
+                "v1": {"content": "apple fruit"},
+                "v2": {"content": "banana fruit"},
+                "v3": {"content": "quantum physics"},
+            }
+        )
+        await s.index_done_callback()
+
+        results = await s.query("apple", top_k=3)
+        check(len(results) > 0, f"query returned {len(results)} results")
+        check(all("distance" in r for r in results), "results have distance")
+
+        doc = await s.get_by_id("v1")
+        check(doc is not None and doc["id"] == "v1", "get_by_id")
+
+        docs = await s.get_by_ids(["v1", "v2", "missing"])
+        check(docs[0] is not None and docs[2] is None, "get_by_ids")
+
+        vecs = await s.get_vectors_by_ids(["v1"])
+        check("v1" in vecs and len(vecs["v1"]) == 128, "get_vectors_by_ids")
+
+        await s.delete(["v3"])
+        await s.index_done_callback()
+        check(await s.get_by_id("v3") is None, "delete + verify")
+    finally:
+        await s.drop()
+        await s.finalize()
+
+
+async def main():
+    print("=" * 60)
+    print("OpenSearch Storage Integration Tests")
+    print("=" * 60)
+
+    initialize_share_data(workers=1)
+
+    try:
+        await test_connection_manager()
+        await test_kv_storage()
+        await test_doc_status_storage()
+        await test_graph_storage()
+        await test_vector_storage()
+    except Exception as e:
+        print(f"\n✗ Fatal error: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    print(f"\n{'=' * 60}")
+    print(f"Results: {PASSED} passed, {FAILED} failed")
+    print(f"{'=' * 60}")
+    if FAILED > 0:
+        exit(1)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 234 - 0
examples/rerank_example.py

@@ -0,0 +1,234 @@
+"""
+LightRAG Rerank Integration Example
+
+This example demonstrates how to use rerank functionality with LightRAG
+to improve retrieval quality across different query modes.
+
+Configuration Required:
+1. Set your OpenAI LLM API key and base URL with env vars
+    LLM_MODEL
+    LLM_BINDING_HOST
+    LLM_BINDING_API_KEY
+2. Set your OpenAI embedding API key and base URL with env vars:
+    EMBEDDING_MODEL
+    EMBEDDING_DIM
+    EMBEDDING_BINDING_HOST
+    EMBEDDING_BINDING_API_KEY
+3. Set your vLLM deployed AI rerank model setting with env vars:
+    RERANK_BINDING=cohere
+    RERANK_MODEL (e.g., answerai-colbert-small-v1 or rerank-v3.5)
+    RERANK_BINDING_HOST (e.g., https://api.cohere.com/v2/rerank or LiteLLM proxy)
+    RERANK_BINDING_API_KEY
+    RERANK_ENABLE_CHUNKING=true (optional, for models with token limits)
+    RERANK_MAX_TOKENS_PER_DOC=480 (optional, default 4096)
+
+Note: Rerank is controlled per query via the 'enable_rerank' parameter (default: True)
+"""
+
+import asyncio
+import os
+import numpy as np
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+from lightrag.utils import EmbeddingFunc, setup_logger
+
+from functools import partial
+from lightrag.rerank import cohere_rerank
+
+# Set up your working directory
+WORKING_DIR = "./test_rerank"
+setup_logger("test_rerank")
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        os.getenv("LLM_MODEL"),
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("LLM_BINDING_API_KEY"),
+        base_url=os.getenv("LLM_BINDING_HOST"),
+        **kwargs,
+    )
+
+
+async def embedding_func(texts: list[str]) -> np.ndarray:
+    return await openai_embed(
+        texts,
+        model=os.getenv("EMBEDDING_MODEL"),
+        api_key=os.getenv("EMBEDDING_BINDING_API_KEY"),
+        base_url=os.getenv("EMBEDDING_BINDING_HOST"),
+    )
+
+
+rerank_model_func = partial(
+    cohere_rerank,
+    model=os.getenv("RERANK_MODEL", "rerank-v3.5"),
+    api_key=os.getenv("RERANK_BINDING_API_KEY"),
+    base_url=os.getenv("RERANK_BINDING_HOST", "https://api.cohere.com/v2/rerank"),
+    enable_chunking=os.getenv("RERANK_ENABLE_CHUNKING", "false").lower() == "true",
+    max_tokens_per_doc=int(os.getenv("RERANK_MAX_TOKENS_PER_DOC", "4096")),
+)
+
+
+async def create_rag_with_rerank():
+    """Create LightRAG instance with rerank configuration"""
+
+    # Get embedding dimension
+    test_embedding = await embedding_func(["test"])
+    embedding_dim = test_embedding.shape[1]
+    print(f"Detected embedding dimension: {embedding_dim}")
+
+    # Method 1: Using custom rerank function
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dim,
+            max_token_size=8192,
+            func=embedding_func,
+        ),
+        # Rerank Configuration - provide the rerank function
+        rerank_model_func=rerank_model_func,
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def test_rerank_with_different_settings():
+    """
+    Test rerank functionality with different enable_rerank settings
+    """
+    print("\n\n🚀 Setting up LightRAG with Rerank functionality...")
+
+    rag = await create_rag_with_rerank()
+
+    # Insert sample documents
+    sample_docs = [
+        "Reranking improves retrieval quality by re-ordering documents based on relevance.",
+        "LightRAG is a powerful retrieval-augmented generation system with multiple query modes.",
+        "Vector databases enable efficient similarity search in high-dimensional embedding spaces.",
+        "Natural language processing has evolved with large language models and transformers.",
+        "Machine learning algorithms can learn patterns from data without explicit programming.",
+    ]
+
+    print("📄 Inserting sample documents...")
+    await rag.ainsert(sample_docs)
+
+    query = "How does reranking improve retrieval quality?"
+    print(f"\n🔍 Testing query: '{query}'")
+    print("=" * 80)
+
+    # Test with rerank enabled (default)
+    print("\n📊 Testing with enable_rerank=True (default):")
+    result_with_rerank = await rag.aquery(
+        query,
+        param=QueryParam(
+            mode="naive",
+            top_k=10,
+            chunk_top_k=5,
+            enable_rerank=True,  # Explicitly enable rerank
+        ),
+    )
+    print(f"   Result length: {len(result_with_rerank)} characters")
+    print(f"   Preview: {result_with_rerank[:100]}...")
+
+    # Test with rerank disabled
+    print("\n📊 Testing with enable_rerank=False:")
+    result_without_rerank = await rag.aquery(
+        query,
+        param=QueryParam(
+            mode="naive",
+            top_k=10,
+            chunk_top_k=5,
+            enable_rerank=False,  # Disable rerank
+        ),
+    )
+    print(f"   Result length: {len(result_without_rerank)} characters")
+    print(f"   Preview: {result_without_rerank[:100]}...")
+
+    # Test with default settings (enable_rerank defaults to True)
+    print("\n📊 Testing with default settings (enable_rerank defaults to True):")
+    result_default = await rag.aquery(
+        query, param=QueryParam(mode="naive", top_k=10, chunk_top_k=5)
+    )
+    print(f"   Result length: {len(result_default)} characters")
+    print(f"   Preview: {result_default[:100]}...")
+
+
+async def test_direct_rerank():
+    """Test rerank function directly"""
+    print("\n🔧 Direct Rerank API Test")
+    print("=" * 40)
+
+    documents = [
+        "Vector search finds semantically similar documents",
+        "LightRAG supports advanced reranking capabilities",
+        "Reranking significantly improves retrieval quality",
+        "Natural language processing with modern transformers",
+        "The quick brown fox jumps over the lazy dog",
+    ]
+
+    query = "rerank improve quality"
+    print(f"Query: '{query}'")
+    print(f"Documents: {len(documents)}")
+
+    try:
+        reranked_results = await rerank_model_func(
+            query=query,
+            documents=documents,
+            top_n=4,
+        )
+
+        print("\n✅ Rerank Results:")
+        i = 0
+        for result in reranked_results:
+            index = result["index"]
+            score = result["relevance_score"]
+            content = documents[index]
+            print(f"  {index}. Score: {score:.4f} | {content}...")
+            i += 1
+
+    except Exception as e:
+        print(f"❌ Rerank failed: {e}")
+
+
+async def main():
+    """Main example function"""
+    print("🎯 LightRAG Rerank Integration Example")
+    print("=" * 60)
+
+    try:
+        # Test direct rerank
+        await test_direct_rerank()
+
+        # Test rerank with different enable_rerank settings
+        await test_rerank_with_different_settings()
+
+        print("\n✅ Example completed successfully!")
+        print("\n💡 Key Points:")
+        print("   ✓ Rerank is now controlled per query via 'enable_rerank' parameter")
+        print("   ✓ Default value for enable_rerank is True")
+        print("   ✓ Rerank function is configured at LightRAG initialization")
+        print("   ✓ Per-query enable_rerank setting overrides default behavior")
+        print(
+            "   ✓ If enable_rerank=True but no rerank model is configured, a warning is issued"
+        )
+        print("   ✓ Monitor API usage and costs when using rerank services")
+
+    except Exception as e:
+        print(f"\n❌ Example failed: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 114 - 0
examples/unofficial-sample/copy_llm_cache_to_another_storage.py

@@ -0,0 +1,114 @@
+"""
+Sometimes you need to switch a storage solution, but you want to save LLM token and time.
+This handy script helps you to copy the LLM caches from one storage solution to another.
+(Not all the storage impl are supported)
+"""
+
+import asyncio
+import logging
+import os
+from dotenv import load_dotenv
+
+from lightrag.kg.postgres_impl import PostgreSQLDB, PGKVStorage
+from lightrag.kg.json_kv_impl import JsonKVStorage
+from lightrag.namespace import NameSpace
+
+load_dotenv()
+ROOT_DIR = os.environ.get("ROOT_DIR")
+WORKING_DIR = f"{ROOT_DIR}/dickens"
+
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+# AGE
+os.environ["AGE_GRAPH_NAME"] = "chinese"
+
+postgres_db = PostgreSQLDB(
+    config={
+        "host": "localhost",
+        "port": 15432,
+        "user": "rag",
+        "password": "rag",
+        "database": "r2",
+    }
+)
+
+
+async def copy_from_postgres_to_json():
+    await postgres_db.initdb()
+
+    from_llm_response_cache = PGKVStorage(
+        namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
+        global_config={"embedding_batch_num": 6},
+        embedding_func=None,
+        db=postgres_db,
+    )
+
+    to_llm_response_cache = JsonKVStorage(
+        namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
+        global_config={"working_dir": WORKING_DIR},
+        embedding_func=None,
+    )
+
+    # Get all cache data using the new flattened structure
+    all_data = await from_llm_response_cache.get_all()
+
+    # Convert flattened data to hierarchical structure for JsonKVStorage
+    kv = {}
+    for flattened_key, cache_entry in all_data.items():
+        # Parse flattened key: {mode}:{cache_type}:{hash}
+        parts = flattened_key.split(":", 2)
+        if len(parts) == 3:
+            mode, cache_type, hash_value = parts
+            if mode not in kv:
+                kv[mode] = {}
+            kv[mode][hash_value] = cache_entry
+            print(f"Copying {flattened_key} -> {mode}[{hash_value}]")
+        else:
+            print(f"Skipping invalid key format: {flattened_key}")
+
+    await to_llm_response_cache.upsert(kv)
+    await to_llm_response_cache.index_done_callback()
+    print("Mission accomplished!")
+
+
+async def copy_from_json_to_postgres():
+    await postgres_db.initdb()
+
+    from_llm_response_cache = JsonKVStorage(
+        namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
+        global_config={"working_dir": WORKING_DIR},
+        embedding_func=None,
+    )
+
+    to_llm_response_cache = PGKVStorage(
+        namespace=NameSpace.KV_STORE_LLM_RESPONSE_CACHE,
+        global_config={"embedding_batch_num": 6},
+        embedding_func=None,
+        db=postgres_db,
+    )
+
+    # Get all cache data from JsonKVStorage (hierarchical structure)
+    all_data = await from_llm_response_cache.get_all()
+
+    # Convert hierarchical data to flattened structure for PGKVStorage
+    flattened_data = {}
+    for mode, mode_data in all_data.items():
+        print(f"Processing mode: {mode}")
+        for hash_value, cache_entry in mode_data.items():
+            # Determine cache_type from cache entry or use default
+            cache_type = cache_entry.get("cache_type", "extract")
+            # Create flattened key: {mode}:{cache_type}:{hash}
+            flattened_key = f"{mode}:{cache_type}:{hash_value}"
+            flattened_data[flattened_key] = cache_entry
+            print(f"\tConverting {mode}[{hash_value}] -> {flattened_key}")
+
+    # Upsert the flattened data
+    await to_llm_response_cache.upsert(flattened_data)
+    print("Mission accomplished!")
+
+
+if __name__ == "__main__":
+    asyncio.run(copy_from_json_to_postgres())

+ 56 - 0
examples/unofficial-sample/lightrag_bedrock_demo.py

@@ -0,0 +1,56 @@
+"""
+LightRAG meets Amazon Bedrock ⛰️
+"""
+
+import os
+import logging
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.bedrock import bedrock_complete, bedrock_embed
+from lightrag.utils import EmbeddingFunc
+
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+logging.getLogger("aiobotocore").setLevel(logging.WARNING)
+
+WORKING_DIR = "./dickens"
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=bedrock_complete,
+        llm_model_name="Anthropic Claude 3 Haiku // Amazon Bedrock",
+        embedding_func=EmbeddingFunc(
+            embedding_dim=1024, max_token_size=8192, func=bedrock_embed
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    rag = asyncio.run(initialize_rag())
+
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    for mode in ["naive", "local", "global", "hybrid"]:
+        print("\n+-" + "-" * len(mode) + "-+")
+        print(f"| {mode.capitalize()} |")
+        print("+-" + "-" * len(mode) + "-+\n")
+        print(
+            rag.query(
+                "What are the top themes in this story?", param=QueryParam(mode=mode)
+            )
+        )
+
+
+if __name__ == "__main__":
+    main()

+ 354 - 0
examples/unofficial-sample/lightrag_cloudflare_demo.py

@@ -0,0 +1,354 @@
+import asyncio
+import os
+import inspect
+import logging
+import logging.config
+from lightrag import LightRAG, QueryParam
+from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
+
+import requests
+import numpy as np
+from dotenv import load_dotenv
+
+"""This code is a modified version of lightrag_openai_demo.py"""
+
+# ideally, as always, env!
+load_dotenv(dotenv_path=".env", override=False)
+
+
+"""    ----========= IMPORTANT CHANGE THIS! =========----    """
+cloudflare_api_key = "YOUR_API_KEY"
+account_id = "YOUR_ACCOUNT ID"  # This is unique to your Cloudflare account
+
+# Authomatically changes
+api_base_url = f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/"
+
+
+# choose an embedding model
+EMBEDDING_MODEL = "@cf/baai/bge-m3"
+# choose a generative model
+LLM_MODEL = "@cf/meta/llama-3.2-3b-instruct"
+
+WORKING_DIR = "../dickens"  # you can change output as desired
+
+
+# Cloudflare init
+class CloudflareWorker:
+    def __init__(
+        self,
+        cloudflare_api_key: str,
+        api_base_url: str,
+        llm_model_name: str,
+        embedding_model_name: str,
+        max_tokens: int = 4080,
+        max_response_tokens: int = 4080,
+    ):
+        self.cloudflare_api_key = cloudflare_api_key
+        self.api_base_url = api_base_url
+        self.llm_model_name = llm_model_name
+        self.embedding_model_name = embedding_model_name
+        self.max_tokens = max_tokens
+        self.max_response_tokens = max_response_tokens
+
+    async def _send_request(self, model_name: str, input_: dict, debug_log: str):
+        headers = {"Authorization": f"Bearer {self.cloudflare_api_key}"}
+
+        print(f"""
+        data sent to Cloudflare
+        ~~~~~~~~~~~
+        {debug_log}
+        """)
+
+        try:
+            response_raw = requests.post(
+                f"{self.api_base_url}{model_name}", headers=headers, json=input_
+            ).json()
+            print(f"""
+        Cloudflare worker responded with:
+        ~~~~~~~~~~~
+        {str(response_raw)}
+            """)
+            result = response_raw.get("result", {})
+
+            if "data" in result:  # Embedding case
+                return np.array(result["data"])
+
+            if "response" in result:  # LLM response
+                return result["response"]
+
+            raise ValueError("Unexpected Cloudflare response format")
+
+        except Exception as e:
+            print(f"""
+            Cloudflare API returned:
+            ~~~~~~~~~
+            Error: {e}
+            """)
+            input("Press Enter to continue...")
+            return None
+
+    async def query(self, prompt, system_prompt: str = "", **kwargs) -> str:
+        # since no caching is used and we don't want to mess with everything lightrag, pop the kwarg it is
+        kwargs.pop("hashing_kv", None)
+
+        message = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt},
+        ]
+
+        input_ = {
+            "messages": message,
+            "max_tokens": self.max_tokens,
+            "response_token_limit": self.max_response_tokens,
+        }
+
+        return await self._send_request(
+            self.llm_model_name,
+            input_,
+            debug_log=f"\n- model used {self.llm_model_name}\n- system prompt: {system_prompt}\n- query: {prompt}",
+        )
+
+    async def embedding_chunk(self, texts: list[str]) -> np.ndarray:
+        print(f"""
+        TEXT inputted
+        ~~~~~
+        {texts}
+        """)
+
+        input_ = {
+            "text": texts,
+            "max_tokens": self.max_tokens,
+            "response_token_limit": self.max_response_tokens,
+        }
+
+        return await self._send_request(
+            self.embedding_model_name,
+            input_,
+            debug_log=f"\n-llm model name {self.embedding_model_name}\n- texts: {texts}",
+        )
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(
+        os.path.join(log_dir, "lightrag_cloudflare_worker_demo.log")
+    )
+
+    print(f"\nLightRAG compatible demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    cloudflare_worker = CloudflareWorker(
+        cloudflare_api_key=cloudflare_api_key,
+        api_base_url=api_base_url,
+        embedding_model_name=EMBEDDING_MODEL,
+        llm_model_name=LLM_MODEL,
+    )
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        max_parallel_insert=2,
+        llm_model_func=cloudflare_worker.query,
+        llm_model_name=os.getenv("LLM_MODEL", LLM_MODEL),
+        summary_max_tokens=4080,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
+            max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "2048")),
+            func=lambda texts: cloudflare_worker.embedding_chunk(
+                texts,
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def print_stream(stream):
+    async for chunk in stream:
+        print(chunk, end="", flush=True)
+
+
+async def main():
+    try:
+        # Clear old data files
+        files_to_delete = [
+            "graph_chunk_entity_relation.graphml",
+            "kv_store_doc_status.json",
+            "kv_store_full_docs.json",
+            "kv_store_text_chunks.json",
+            "vdb_chunks.json",
+            "vdb_entities.json",
+            "vdb_relationships.json",
+        ]
+
+        for file in files_to_delete:
+            file_path = os.path.join(WORKING_DIR, file)
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Deleting old file:: {file_path}")
+
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # Test embedding function
+        test_text = ["This is a test string for embedding."]
+        embedding = await rag.embedding_func(test_text)
+        embedding_dim = embedding.shape[1]
+        print("\n=======================")
+        print("Test embedding function")
+        print("========================")
+        print(f"Test dict: {test_text}")
+        print(f"Detected embedding dimension: {embedding_dim}\n\n")
+
+        # Locate the location of what is needed to be added to the knowledge
+        # Can add several simultaneously by modifying code
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("\n=====================")
+        print("Query mode: naive")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="naive", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform local search
+        print("\n=====================")
+        print("Query mode: local")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="local", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform global search
+        print("\n=====================")
+        print("Query mode: global")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="global", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform hybrid search
+        print("\n=====================")
+        print("Query mode: hybrid")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="hybrid", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        """ FOR TESTING (if you want to test straight away, after building. Uncomment this part"""
+
+        """
+        print("\n" + "=" * 60)
+        print("AI ASSISTANT READY!")
+        print("Ask questions about (your uploaded) regulations")
+        print("Type 'quit' to exit")
+        print("=" * 60)
+
+        while True:
+            question = input("\n🔥 Your question: ")
+
+            if question.lower() in ['quit', 'exit', 'bye']:
+                break
+
+            print("\nThinking...")
+            response = await rag.aquery(question, param=QueryParam(mode="hybrid"))
+            print(f"\nAnswer: {response}")
+
+        """
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.llm_response_cache.index_done_callback()
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(main())
+    print("\nDone!")

+ 235 - 0
examples/unofficial-sample/lightrag_embedding_prefixes.py

@@ -0,0 +1,235 @@
+import os
+import asyncio
+import inspect
+import logging
+import logging.config
+from functools import partial
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.openai import openai_complete_if_cache
+from lightrag.llm.ollama import ollama_embed
+from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
+
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=".env", override=False)
+
+WORKING_DIR = "./dickens"
+
+
+def configure_logging():
+    """Configure logging for the application"""
+
+    # Reset any existing handlers to ensure clean configuration
+    for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
+        logger_instance = logging.getLogger(logger_name)
+        logger_instance.handlers = []
+        logger_instance.filters = []
+
+    # Get log directory path from environment variable or use current directory
+    log_dir = os.getenv("LOG_DIR", os.getcwd())
+    log_file_path = os.path.abspath(
+        os.path.join(log_dir, "lightrag_compatible_demo.log")
+    )
+
+    print(f"\nLightRAG compatible demo log file: {log_file_path}\n")
+    os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
+
+    # Get log file max size and backup count from environment variables
+    log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760))  # Default 10MB
+    log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5))  # Default 5 backups
+
+    logging.config.dictConfig(
+        {
+            "version": 1,
+            "disable_existing_loggers": False,
+            "formatters": {
+                "default": {
+                    "format": "%(levelname)s: %(message)s",
+                },
+                "detailed": {
+                    "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "formatter": "default",
+                    "class": "logging.StreamHandler",
+                    "stream": "ext://sys.stderr",
+                },
+                "file": {
+                    "formatter": "detailed",
+                    "class": "logging.handlers.RotatingFileHandler",
+                    "filename": log_file_path,
+                    "maxBytes": log_max_bytes,
+                    "backupCount": log_backup_count,
+                    "encoding": "utf-8",
+                },
+            },
+            "loggers": {
+                "lightrag": {
+                    "handlers": ["console", "file"],
+                    "level": "INFO",
+                    "propagate": False,
+                },
+            },
+        }
+    )
+
+    # Set the logger level to INFO
+    logger.setLevel(logging.INFO)
+    # Enable verbose debug if needed
+    set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
+
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        os.getenv("LLM_MODEL", "deepseek-chat"),
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=os.getenv("LLM_BINDING_API_KEY") or os.getenv("OPENAI_API_KEY"),
+        base_url=os.getenv("LLM_BINDING_HOST", "https://api.deepseek.com"),
+        **kwargs,
+    )
+
+
+async def print_stream(stream):
+    async for chunk in stream:
+        if chunk:
+            print(chunk, end="", flush=True)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        # Note: ollama_embed is decorated with @wrap_embedding_func_with_attrs,
+        # which wraps it in an EmbeddingFunc. Using .func accesses the original
+        # unwrapped function to avoid double wrapping when we create our own
+        # EmbeddingFunc with custom configuration (embedding_dim, max_token_size and prefixes).
+        embedding_func=EmbeddingFunc(
+            embedding_dim=int(os.getenv("EMBEDDING_DIM", "1024")),
+            max_token_size=int(os.getenv("MAX_EMBED_TOKENS", "8192")),
+            supports_asymmetric=True,
+            func=partial(
+                ollama_embed.func,  # Access the unwrapped function to avoid double EmbeddingFunc wrapping
+                embed_model=os.getenv("EMBEDDING_MODEL", "FRIDA:latest"),
+                host=os.getenv("EMBEDDING_BINDING_HOST", "http://localhost:11434"),
+                query_prefix=os.getenv("EMBEDDING_QUERY_PREFIX", "search_query: "),
+                document_prefix=os.getenv(
+                    "EMBEDDING_DOCUMENT_PREFIX", "search_document: "
+                ),
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def main():
+    rag = None
+    try:
+        # Clear old data files
+        files_to_delete = [
+            "graph_chunk_entity_relation.graphml",
+            "kv_store_doc_status.json",
+            "kv_store_full_docs.json",
+            "kv_store_text_chunks.json",
+            "vdb_chunks.json",
+            "vdb_entities.json",
+            "vdb_relationships.json",
+        ]
+
+        for file in files_to_delete:
+            file_path = os.path.join(WORKING_DIR, file)
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"Deleting old file:: {file_path}")
+
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # Test embedding function
+        test_text = ["This is a test string for embedding."]
+        embedding = await rag.embedding_func(test_text)
+        embedding_dim = embedding.shape[1]
+        print("\n=======================")
+        print("Test embedding function")
+        print("========================")
+        print(f"Test dict: {test_text}")
+        print(f"Detected embedding dimension: {embedding_dim}\n\n")
+
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("\n=====================")
+        print("Query mode: naive")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="naive", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform local search
+        print("\n=====================")
+        print("Query mode: local")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="local", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform global search
+        print("\n=====================")
+        print("Query mode: global")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="global", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+        # Perform hybrid search
+        print("\n=====================")
+        print("Query mode: hybrid")
+        print("=====================")
+        resp = await rag.aquery(
+            "What are the top themes in this story?",
+            param=QueryParam(mode="hybrid", stream=True),
+        )
+        if inspect.isasyncgen(resp):
+            await print_stream(resp)
+        else:
+            print(resp)
+
+    except Exception as e:
+        print(f"An error occurred: {e}")
+    finally:
+        if rag:
+            await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    # Configure logging before running the main function
+    configure_logging()
+    asyncio.run(main())
+    print("\nDone!")

+ 79 - 0
examples/unofficial-sample/lightrag_hf_demo.py

@@ -0,0 +1,79 @@
+import os
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.hf import hf_model_complete, hf_embed
+from lightrag.utils import EmbeddingFunc
+from transformers import AutoModel, AutoTokenizer
+
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=hf_model_complete,
+        llm_model_name="meta-llama/Llama-3.1-8B-Instruct",
+        embedding_func=EmbeddingFunc(
+            embedding_dim=384,
+            max_token_size=5000,
+            func=lambda texts: hf_embed(
+                texts,
+                tokenizer=AutoTokenizer.from_pretrained(
+                    "sentence-transformers/all-MiniLM-L6-v2"
+                ),
+                embed_model=AutoModel.from_pretrained(
+                    "sentence-transformers/all-MiniLM-L6-v2"
+                ),
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    rag = asyncio.run(initialize_rag())
+
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Perform naive search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    # Perform local search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    # Perform global search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    # Perform hybrid search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 139 - 0
examples/unofficial-sample/lightrag_llamaindex_direct_demo.py

@@ -0,0 +1,139 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.llama_index_impl import (
+    llama_index_complete_if_cache,
+    llama_index_embed,
+)
+from lightrag.utils import EmbeddingFunc
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+
+# Configure working directory
+WORKING_DIR = "./index_default"
+print(f"WORKING_DIR: {WORKING_DIR}")
+
+# Model configuration
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
+print(f"LLM_MODEL: {LLM_MODEL}")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
+print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
+
+# OpenAI configuration
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "your-api-key-here")
+
+if not os.path.exists(WORKING_DIR):
+    print(f"Creating working directory: {WORKING_DIR}")
+    os.mkdir(WORKING_DIR)
+
+
+# Initialize LLM function
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize OpenAI if not in kwargs
+        if "llm_instance" not in kwargs:
+            llm_instance = OpenAI(
+                model=LLM_MODEL,
+                api_key=OPENAI_API_KEY,
+                temperature=0.7,
+            )
+            kwargs["llm_instance"] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs["llm_instance"],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            **kwargs,
+        )
+        return response
+    except Exception as e:
+        print(f"LLM request failed: {str(e)}")
+        raise
+
+
+# Initialize embedding function
+async def embedding_func(texts):
+    try:
+        embed_model = OpenAIEmbedding(
+            model=EMBEDDING_MODEL,
+            api_key=OPENAI_API_KEY,
+        )
+        return await llama_index_embed(texts, embed_model=embed_model)
+    except Exception as e:
+        print(f"Embedding failed: {str(e)}")
+        raise
+
+
+# Get embedding dimension
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    print(f"embedding_dim={embedding_dim}")
+    return embedding_dim
+
+
+async def initialize_rag():
+    embedding_dimension = await get_embedding_dim()
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+            func=embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    # Insert example text
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Test different query modes
+    print("\nNaive Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    print("\nLocal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    print("\nGlobal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    print("\nHybrid Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 141 - 0
examples/unofficial-sample/lightrag_llamaindex_litellm_demo.py

@@ -0,0 +1,141 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.llama_index_impl import (
+    llama_index_complete_if_cache,
+    llama_index_embed,
+)
+from lightrag.utils import EmbeddingFunc
+from llama_index.llms.litellm import LiteLLM
+from llama_index.embeddings.litellm import LiteLLMEmbedding
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+
+# Configure working directory
+WORKING_DIR = "./index_default"
+print(f"WORKING_DIR: {WORKING_DIR}")
+
+# Model configuration
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4")
+print(f"LLM_MODEL: {LLM_MODEL}")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
+print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
+
+# LiteLLM configuration
+LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000")
+print(f"LITELLM_URL: {LITELLM_URL}")
+LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-1234")
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+# Initialize LLM function
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize LiteLLM if not in kwargs
+        if "llm_instance" not in kwargs:
+            llm_instance = LiteLLM(
+                model=f"openai/{LLM_MODEL}",  # Format: "provider/model_name"
+                api_base=LITELLM_URL,
+                api_key=LITELLM_KEY,
+                temperature=0.7,
+            )
+            kwargs["llm_instance"] = llm_instance
+
+        response = await llama_index_complete_if_cache(
+            kwargs["llm_instance"],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+        )
+        return response
+    except Exception as e:
+        print(f"LLM request failed: {str(e)}")
+        raise
+
+
+# Initialize embedding function
+async def embedding_func(texts):
+    try:
+        embed_model = LiteLLMEmbedding(
+            model_name=f"openai/{EMBEDDING_MODEL}",
+            api_base=LITELLM_URL,
+            api_key=LITELLM_KEY,
+        )
+        return await llama_index_embed(texts, embed_model=embed_model)
+    except Exception as e:
+        print(f"Embedding failed: {str(e)}")
+        raise
+
+
+# Get embedding dimension
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    print(f"embedding_dim={embedding_dim}")
+    return embedding_dim
+
+
+async def initialize_rag():
+    embedding_dimension = await get_embedding_dim()
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+            func=embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    # Insert example text
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Test different query modes
+    print("\nNaive Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    print("\nLocal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    print("\nGlobal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    print("\nHybrid Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 152 - 0
examples/unofficial-sample/lightrag_llamaindex_litellm_opik_demo.py

@@ -0,0 +1,152 @@
+import os
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.llama_index_impl import (
+    llama_index_complete_if_cache,
+    llama_index_embed,
+)
+from lightrag.utils import EmbeddingFunc
+from llama_index.llms.litellm import LiteLLM
+from llama_index.embeddings.litellm import LiteLLMEmbedding
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+
+# Configure working directory
+WORKING_DIR = "./index_default"
+print(f"WORKING_DIR: {WORKING_DIR}")
+
+# Model configuration
+LLM_MODEL = os.environ.get("LLM_MODEL", "gemma-3-4b")
+print(f"LLM_MODEL: {LLM_MODEL}")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "arctic-embed")
+print(f"EMBEDDING_MODEL: {EMBEDDING_MODEL}")
+EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
+print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
+
+# LiteLLM configuration
+LITELLM_URL = os.environ.get("LITELLM_URL", "http://localhost:4000")
+print(f"LITELLM_URL: {LITELLM_URL}")
+LITELLM_KEY = os.environ.get("LITELLM_KEY", "sk-4JdvGFKqSA3S0k_5p0xufw")
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+# Initialize LLM function
+async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+    try:
+        # Initialize LiteLLM if not in kwargs
+        if "llm_instance" not in kwargs:
+            llm_instance = LiteLLM(
+                model=f"openai/{LLM_MODEL}",  # Format: "provider/model_name"
+                api_base=LITELLM_URL,
+                api_key=LITELLM_KEY,
+                temperature=0.7,
+            )
+            kwargs["llm_instance"] = llm_instance
+
+        chat_kwargs = {}
+        chat_kwargs["litellm_params"] = {
+            "metadata": {
+                "opik": {
+                    "project_name": "lightrag_llamaindex_litellm_opik_demo",
+                    "tags": ["lightrag", "litellm"],
+                }
+            }
+        }
+
+        response = await llama_index_complete_if_cache(
+            kwargs["llm_instance"],
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            chat_kwargs=chat_kwargs,
+        )
+        return response
+    except Exception as e:
+        print(f"LLM request failed: {str(e)}")
+        raise
+
+
+# Initialize embedding function
+async def embedding_func(texts):
+    try:
+        embed_model = LiteLLMEmbedding(
+            model_name=f"openai/{EMBEDDING_MODEL}",
+            api_base=LITELLM_URL,
+            api_key=LITELLM_KEY,
+        )
+        return await llama_index_embed(texts, embed_model=embed_model)
+    except Exception as e:
+        print(f"Embedding failed: {str(e)}")
+        raise
+
+
+# Get embedding dimension
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    print(f"embedding_dim={embedding_dim}")
+    return embedding_dim
+
+
+async def initialize_rag():
+    embedding_dimension = await get_embedding_dim()
+
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
+            func=embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    # Insert example text
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Test different query modes
+    print("\nNaive Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    print("\nLocal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    print("\nGlobal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    print("\nHybrid Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 107 - 0
examples/unofficial-sample/lightrag_lmdeploy_demo.py

@@ -0,0 +1,107 @@
+import os
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.lmdeploy import lmdeploy_model_if_cache
+from lightrag.llm.hf import hf_embed
+from lightrag.utils import EmbeddingFunc
+from transformers import AutoModel, AutoTokenizer
+
+import asyncio
+import nest_asyncio
+
+nest_asyncio.apply()
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+
+async def lmdeploy_model_complete(
+    prompt=None,
+    system_prompt=None,
+    history_messages=[],
+    keyword_extraction=False,
+    **kwargs,
+) -> str:
+    model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
+    return await lmdeploy_model_if_cache(
+        model_name,
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        ## please specify chat_template if your local path does not follow original HF file name,
+        ## or model_name is a pytorch model on huggingface.co,
+        ## you can refer to https://github.com/InternLM/lmdeploy/blob/main/lmdeploy/model.py
+        ## for a list of chat_template available in lmdeploy.
+        chat_template="llama3",
+        # model_format ='awq', # if you are using awq quantization model.
+        # quant_policy=8, # if you want to use online kv cache, 4=kv int4, 8=kv int8.
+        **kwargs,
+    )
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=lmdeploy_model_complete,
+        llm_model_name="meta-llama/Llama-3.1-8B-Instruct",  # please use definite path for local model
+        embedding_func=EmbeddingFunc(
+            embedding_dim=384,
+            max_token_size=5000,
+            func=lambda texts: hf_embed(
+                texts,
+                tokenizer=AutoTokenizer.from_pretrained(
+                    "sentence-transformers/all-MiniLM-L6-v2"
+                ),
+                embed_model=AutoModel.from_pretrained(
+                    "sentence-transformers/all-MiniLM-L6-v2"
+                ),
+            ),
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    # Insert example text
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Test different query modes
+    print("\nNaive Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    print("\nLocal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    print("\nGlobal Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    print("\nHybrid Search:")
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

+ 168 - 0
examples/unofficial-sample/lightrag_nvidia_demo.py

@@ -0,0 +1,168 @@
+import os
+import asyncio
+import nest_asyncio
+
+from lightrag import LightRAG, QueryParam
+from lightrag.llm import (
+    openai_complete_if_cache,
+    nvidia_openai_embed,
+)
+from lightrag.utils import EmbeddingFunc
+import numpy as np
+
+# for custom llm_model_func
+from lightrag.utils import locate_json_string_body_from_string
+
+nest_asyncio.apply()
+
+WORKING_DIR = "./dickens"
+
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+
+# some method to use your API key (choose one)
+# NVIDIA_OPENAI_API_KEY = os.getenv("NVIDIA_OPENAI_API_KEY")
+NVIDIA_OPENAI_API_KEY = "nvapi-xxxx"  # your api key
+
+# using pre-defined function for nvidia LLM API. OpenAI compatible
+# llm_model_func = nvidia_openai_complete
+
+
+# If you trying to make custom llm_model_func to use llm model on NVIDIA API like other example:
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    result = await openai_complete_if_cache(
+        "nvidia/llama-3.1-nemotron-70b-instruct",
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key=NVIDIA_OPENAI_API_KEY,
+        base_url="https://integrate.api.nvidia.com/v1",
+        **kwargs,
+    )
+    if keyword_extraction:
+        return locate_json_string_body_from_string(result)
+    return result
+
+
+# custom embedding
+nvidia_embed_model = "nvidia/nv-embedqa-e5-v5"
+
+
+async def indexing_embedding_func(texts: list[str]) -> np.ndarray:
+    return await nvidia_openai_embed(
+        texts,
+        model=nvidia_embed_model,  # maximum 512 token
+        # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
+        api_key=NVIDIA_OPENAI_API_KEY,
+        base_url="https://integrate.api.nvidia.com/v1",
+        input_type="passage",
+        trunc="END",  # handling on server side if input token is longer than maximum token
+        encode="float",
+    )
+
+
+async def query_embedding_func(texts: list[str]) -> np.ndarray:
+    return await nvidia_openai_embed(
+        texts,
+        model=nvidia_embed_model,  # maximum 512 token
+        # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
+        api_key=NVIDIA_OPENAI_API_KEY,
+        base_url="https://integrate.api.nvidia.com/v1",
+        input_type="query",
+        trunc="END",  # handling on server side if input token is longer than maximum token
+        encode="float",
+    )
+
+
+# dimension are same
+async def get_embedding_dim():
+    test_text = ["This is a test sentence."]
+    embedding = await indexing_embedding_func(test_text)
+    embedding_dim = embedding.shape[1]
+    return embedding_dim
+
+
+# function test
+async def test_funcs():
+    result = await llm_model_func("How are you?")
+    print("llm_model_func: ", result)
+
+    result = await indexing_embedding_func(["How are you?"])
+    print("embedding_func: ", result)
+
+
+# asyncio.run(test_funcs())
+
+
+async def initialize_rag():
+    embedding_dimension = await get_embedding_dim()
+    print(f"Detected embedding dimension: {embedding_dimension}")
+
+    # lightRAG class during indexing
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        # llm_model_name="meta/llama3-70b-instruct", #un comment if
+        embedding_func=EmbeddingFunc(
+            embedding_dim=embedding_dimension,
+            max_token_size=512,  # maximum token size, somehow it's still exceed maximum number of token
+            # so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
+            # so you can adjust to be able to fit the NVIDIA model (future work)
+            func=indexing_embedding_func,
+        ),
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+async def main():
+    try:
+        # Initialize RAG instance
+        rag = await initialize_rag()
+
+        # reading file
+        with open("./book.txt", "r", encoding="utf-8") as f:
+            await rag.ainsert(f.read())
+
+        # Perform naive search
+        print("==============Naive===============")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?", param=QueryParam(mode="naive")
+            )
+        )
+
+        # Perform local search
+        print("==============local===============")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?", param=QueryParam(mode="local")
+            )
+        )
+
+        # Perform global search
+        print("==============global===============")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="global"),
+            )
+        )
+
+        # Perform hybrid search
+        print("==============hybrid===============")
+        print(
+            await rag.aquery(
+                "What are the top themes in this story?",
+                param=QueryParam(mode="hybrid"),
+            )
+        )
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 109 - 0
examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py

@@ -0,0 +1,109 @@
+import os
+import asyncio
+from lightrag import LightRAG, QueryParam
+from lightrag.llm.ollama import ollama_embed, openai_complete_if_cache
+from lightrag.utils import EmbeddingFunc
+
+# WorkingDir
+ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+WORKING_DIR = os.path.join(ROOT_DIR, "myKG")
+if not os.path.exists(WORKING_DIR):
+    os.mkdir(WORKING_DIR)
+print(f"WorkingDir: {WORKING_DIR}")
+
+# redis
+os.environ["REDIS_URI"] = "redis://localhost:6379"
+
+# neo4j
+BATCH_SIZE_NODES = 500
+BATCH_SIZE_EDGES = 100
+os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
+os.environ["NEO4J_USERNAME"] = "neo4j"
+os.environ["NEO4J_PASSWORD"] = "12345678"
+
+# milvus
+os.environ["MILVUS_URI"] = "http://localhost:19530"
+os.environ["MILVUS_USER"] = "root"
+os.environ["MILVUS_PASSWORD"] = "Milvus"
+os.environ["MILVUS_DB_NAME"] = "lightrag"
+
+
+async def llm_model_func(
+    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
+) -> str:
+    return await openai_complete_if_cache(
+        "deepseek-chat",
+        prompt,
+        system_prompt=system_prompt,
+        history_messages=history_messages,
+        api_key="",
+        base_url="",
+        **kwargs,
+    )
+
+
+embedding_func = EmbeddingFunc(
+    embedding_dim=768,
+    max_token_size=512,
+    func=lambda texts: ollama_embed(
+        texts, embed_model="shaw/dmeta-embedding-zh", host="http://117.50.173.35:11434"
+    ),
+)
+
+
+async def initialize_rag():
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        summary_max_tokens=10000,
+        embedding_func=embedding_func,
+        chunk_token_size=512,
+        chunk_overlap_token_size=256,
+        kv_storage="RedisKVStorage",
+        graph_storage="Neo4JStorage",
+        vector_storage="MilvusVectorDBStorage",
+        doc_status_storage="RedisKVStorage",
+    )
+
+    await rag.initialize_storages()  # Auto-initializes pipeline_status
+    return rag
+
+
+def main():
+    # Initialize RAG instance
+    rag = asyncio.run(initialize_rag())
+
+    with open("./book.txt", "r", encoding="utf-8") as f:
+        rag.insert(f.read())
+
+    # Perform naive search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="naive")
+        )
+    )
+
+    # Perform local search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="local")
+        )
+    )
+
+    # Perform global search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="global")
+        )
+    )
+
+    # Perform hybrid search
+    print(
+        rag.query(
+            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio