| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- """
- LightRAG Data Isolation Demo: Workspace Management
- This example demonstrates how to maintain multiple isolated knowledge bases
- within a single application using LightRAG's 'workspace' feature.
- Key Concepts:
- - Workspace Isolation: Each RAG instance is assigned a unique workspace name,
- which ensures that Knowledge Graphs, Vector Databases, and Chunks are
- stored in separate, non-conflicting directories.
- - Independent Configuration: Different workspaces can utilize different
- entity type guidance and document sets simultaneously.
- Prerequisites:
- 1. Set the following environment variables:
- - GEMINI_API_KEY: Your Google Gemini API key.
- 2. Ensure your data directory contains:
- - Data/book-small.txt
- - Data/HR_policies.txt
- Usage:
- python lightrag_workspace_demo.py
- """
- import os
- import asyncio
- import numpy as np
- from lightrag import LightRAG, QueryParam
- from lightrag.llm.gemini import gemini_model_complete, gemini_embed
- from lightrag.utils import wrap_embedding_func_with_attrs
- async def llm_model_func(
- prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
- ) -> str:
- """Wrapper for Gemini LLM completion."""
- return await gemini_model_complete(
- prompt,
- system_prompt=system_prompt,
- history_messages=history_messages,
- api_key=os.getenv("GEMINI_API_KEY"),
- model_name="gemini-2.0-flash-exp",
- **kwargs,
- )
- @wrap_embedding_func_with_attrs(
- embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004"
- )
- async def embedding_func(texts: list[str]) -> np.ndarray:
- """Wrapper for Gemini embedding model."""
- return await gemini_embed.func(
- texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004"
- )
- async def initialize_rag(
- workspace: str = "default_workspace",
- ) -> LightRAG:
- """
- Initializes a LightRAG instance with data isolation.
- Entity type guidance can be customized by passing
- addon_params={'entity_types_guidance': '...'} to LightRAG.
- """
- rag = LightRAG(
- workspace=workspace,
- llm_model_name="gemini-2.0-flash",
- llm_model_func=llm_model_func,
- embedding_func=embedding_func,
- embedding_func_max_async=4,
- embedding_batch_num=8,
- llm_model_max_async=2,
- )
- await rag.initialize_storages()
- return rag
- async def main():
- rag_1 = None
- rag_2 = None
- try:
- # 1. Initialize Isolated Workspaces
- # Instance 1: Dedicated to literary analysis
- # Instance 2: Dedicated to corporate HR documentation
- print("Initializing isolated LightRAG workspaces...")
- rag_1 = await initialize_rag("rag_workspace_book")
- rag_2 = await initialize_rag("rag_workspace_hr")
- # 2. Populate Workspace 1 (Literature)
- book_path = "Data/book-small.txt"
- if os.path.exists(book_path):
- with open(book_path, "r", encoding="utf-8") as f:
- print(f"Indexing {book_path} into Literature Workspace...")
- await rag_1.ainsert(f.read())
- # 3. Populate Workspace 2 (Corporate)
- hr_path = "Data/HR_policies.txt"
- if os.path.exists(hr_path):
- with open(hr_path, "r", encoding="utf-8") as f:
- print(f"Indexing {hr_path} into HR Workspace...")
- await rag_2.ainsert(f.read())
- # 4. Context-Specific Querying
- print("\n--- Querying Literature Workspace ---")
- res1 = await rag_1.aquery(
- "What is the main theme?",
- param=QueryParam(mode="hybrid", stream=False),
- )
- print(f"Book Analysis: {res1[:200]}...")
- print("\n--- Querying HR Workspace ---")
- res2 = await rag_2.aquery(
- "What is the leave policy?", param=QueryParam(mode="hybrid")
- )
- print(f"HR Response: {res2[:200]}...")
- except Exception as e:
- print(f"An error occurred: {e}")
- finally:
- # Finalize storage to safely close DB connections and write buffers
- if rag_1:
- await rag_1.finalize_storages()
- if rag_2:
- await rag_2.finalize_storages()
- if __name__ == "__main__":
- asyncio.run(main())
|