wxcz_admin
/
lightrag-cn-git-d5efd3


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
							"""
LightRAG Data Isolation Demo: Workspace Management

This example demonstrates how to maintain multiple isolated knowledge bases
within a single application using LightRAG's 'workspace' feature.

Key Concepts:
- Workspace Isolation: Each RAG instance is assigned a unique workspace name,
  which ensures that Knowledge Graphs, Vector Databases, and Chunks are
  stored in separate, non-conflicting directories.
- Independent Configuration: Different workspaces can utilize different
  entity type guidance and document sets simultaneously.

Prerequisites:
1. Set the following environment variables:
   - GEMINI_API_KEY: Your Google Gemini API key.
2. Ensure your data directory contains:
   - Data/book-small.txt
   - Data/HR_policies.txt

Usage:
    python lightrag_workspace_demo.py
"""

import os
import asyncio
import numpy as np
from lightrag import LightRAG, QueryParam
from lightrag.llm.gemini import gemini_model_complete, gemini_embed
from lightrag.utils import wrap_embedding_func_with_attrs


async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
) -> str:
    """Wrapper for Gemini LLM completion."""
    return await gemini_model_complete(
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=os.getenv("GEMINI_API_KEY"),
        model_name="gemini-2.0-flash-exp",
        **kwargs,
    )


@wrap_embedding_func_with_attrs(
    embedding_dim=768, max_token_size=2048, model_name="models/text-embedding-004"
)
async def embedding_func(texts: list[str]) -> np.ndarray:
    """Wrapper for Gemini embedding model."""
    return await gemini_embed.func(
        texts, api_key=os.getenv("GEMINI_API_KEY"), model="models/text-embedding-004"
    )


async def initialize_rag(
    workspace: str = "default_workspace",
) -> LightRAG:
    """
    Initializes a LightRAG instance with data isolation.

    Entity type guidance can be customized by passing
    addon_params={'entity_types_guidance': '...'} to LightRAG.
    """

    rag = LightRAG(
        workspace=workspace,
        llm_model_name="gemini-2.0-flash",
        llm_model_func=llm_model_func,
        embedding_func=embedding_func,
        embedding_func_max_async=4,
        embedding_batch_num=8,
        llm_model_max_async=2,
    )

    await rag.initialize_storages()
    return rag


async def main():
    rag_1 = None
    rag_2 = None
    try:
        # 1. Initialize Isolated Workspaces
        # Instance 1: Dedicated to literary analysis
        # Instance 2: Dedicated to corporate HR documentation
        print("Initializing isolated LightRAG workspaces...")
        rag_1 = await initialize_rag("rag_workspace_book")
        rag_2 = await initialize_rag("rag_workspace_hr")

        # 2. Populate Workspace 1 (Literature)
        book_path = "Data/book-small.txt"
        if os.path.exists(book_path):
            with open(book_path, "r", encoding="utf-8") as f:
                print(f"Indexing {book_path} into Literature Workspace...")
                await rag_1.ainsert(f.read())

        # 3. Populate Workspace 2 (Corporate)
        hr_path = "Data/HR_policies.txt"
        if os.path.exists(hr_path):
            with open(hr_path, "r", encoding="utf-8") as f:
                print(f"Indexing {hr_path} into HR Workspace...")
                await rag_2.ainsert(f.read())

        # 4. Context-Specific Querying
        print("\n--- Querying Literature Workspace ---")
        res1 = await rag_1.aquery(
            "What is the main theme?",
            param=QueryParam(mode="hybrid", stream=False),
        )
        print(f"Book Analysis: {res1[:200]}...")

        print("\n--- Querying HR Workspace ---")
        res2 = await rag_2.aquery(
            "What is the leave policy?", param=QueryParam(mode="hybrid")
        )
        print(f"HR Response: {res2[:200]}...")

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Finalize storage to safely close DB connections and write buffers
        if rag_1:
            await rag_1.finalize_storages()
        if rag_2:
            await rag_2.finalize_storages()


if __name__ == "__main__":
    asyncio.run(main())