lightrag_openai_opensearch_graph_demo.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """
  2. LightRAG Demo with OpenSearch + OpenAI
  3. This example demonstrates how to use LightRAG with:
  4. - OpenAI (LLM + Embeddings)
  5. - OpenSearch-backed storages for:
  6. - KV storage
  7. - Vector storage (k-NN)
  8. - Graph storage (dual-index nodes + edges)
  9. - Document status storage
  10. Prerequisites:
  11. 1. OpenSearch cluster running and accessible (3.x or higher with k-NN plugin)
  12. 2. Required indices will be auto-created by LightRAG
  13. 3. Set environment variables (example .env):
  14. OPENSEARCH_HOSTS=localhost:9200
  15. OPENSEARCH_USER=admin
  16. OPENSEARCH_PASSWORD=your-password
  17. OPENSEARCH_USE_SSL=false
  18. OPENSEARCH_VERIFY_CERTS=false
  19. OPENAI_API_KEY=your-api-key
  20. 4. Prepare a text file to index (default: ./book.txt)
  21. Usage:
  22. python examples/lightrag_openai_opensearch_graph_demo.py
  23. """
  24. import os
  25. import asyncio
  26. import numpy as np
  27. from lightrag import LightRAG, QueryParam
  28. from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
  29. from lightrag.utils import setup_logger, EmbeddingFunc
  30. # --------------------------------------------------
  31. # Logger
  32. # --------------------------------------------------
  33. setup_logger("lightrag", level="INFO")
  34. # --------------------------------------------------
  35. # Config
  36. # --------------------------------------------------
  37. WORKING_DIR = "./opensearch_rag_storage"
  38. BOOK_FILE = "./book.txt"
  39. if not os.path.exists(WORKING_DIR):
  40. os.mkdir(WORKING_DIR)
  41. # Replace with your API key, or set via environment variable
  42. if not os.getenv("OPENAI_API_KEY"):
  43. os.environ["OPENAI_API_KEY"] = "sk-"
  44. EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
  45. EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
  46. # --------------------------------------------------
  47. # Embedding function (OpenAI)
  48. # --------------------------------------------------
  49. async def embedding_func(texts: list[str]) -> np.ndarray:
  50. return await openai_embed.func(
  51. texts,
  52. model=EMBEDDING_MODEL,
  53. )
  54. async def get_embedding_dimension():
  55. test_text = ["This is a test sentence."]
  56. embedding = await embedding_func(test_text)
  57. return embedding.shape[1]
  58. async def create_embedding_function_instance():
  59. embedding_dimension = await get_embedding_dimension()
  60. return EmbeddingFunc(
  61. embedding_dim=embedding_dimension,
  62. max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
  63. func=embedding_func,
  64. )
  65. # --------------------------------------------------
  66. # Initialize RAG with OpenSearch storages
  67. # --------------------------------------------------
  68. async def initialize_rag() -> LightRAG:
  69. embedding_func_instance = await create_embedding_function_instance()
  70. rag = LightRAG(
  71. working_dir=WORKING_DIR,
  72. llm_model_func=gpt_4o_mini_complete,
  73. embedding_func=embedding_func_instance,
  74. # OpenSearch-backed storages
  75. kv_storage="OpenSearchKVStorage",
  76. doc_status_storage="OpenSearchDocStatusStorage",
  77. graph_storage="OpenSearchGraphStorage",
  78. vector_storage="OpenSearchVectorDBStorage",
  79. )
  80. # REQUIRED: initialize all storage backends
  81. await rag.initialize_storages()
  82. # Clean previous data so the example is re-runnable
  83. # (LLM response cache is preserved for faster reruns)
  84. for storage in [
  85. rag.full_docs,
  86. rag.text_chunks,
  87. rag.full_entities,
  88. rag.full_relations,
  89. rag.entity_chunks,
  90. rag.relation_chunks,
  91. rag.entities_vdb,
  92. rag.relationships_vdb,
  93. rag.chunks_vdb,
  94. rag.chunk_entity_relation_graph,
  95. rag.doc_status,
  96. ]:
  97. await storage.drop()
  98. print("Cleared previous data.")
  99. return rag
  100. # --------------------------------------------------
  101. # Main
  102. # --------------------------------------------------
  103. async def main():
  104. rag = None
  105. try:
  106. print("Initializing LightRAG with OpenSearch + OpenAI...")
  107. rag = await initialize_rag()
  108. if not os.path.exists(BOOK_FILE):
  109. raise FileNotFoundError(
  110. f"'{BOOK_FILE}' not found. Please provide a text file to index."
  111. )
  112. print(f"\nReading document: {BOOK_FILE}")
  113. with open(BOOK_FILE, "r", encoding="utf-8") as f:
  114. content = f.read()
  115. print(f"Loaded document ({len(content)} characters)")
  116. print("\nInserting document into LightRAG (this may take some time)...")
  117. await rag.ainsert(content)
  118. print("Document indexed successfully!")
  119. print("\n" + "=" * 60)
  120. print("Running sample queries")
  121. print("=" * 60)
  122. query = "What are the top themes in this document?"
  123. for mode in ["naive", "local", "global", "hybrid"]:
  124. print(f"\n[{mode.upper()} MODE]")
  125. result = await rag.aquery(query, param=QueryParam(mode=mode))
  126. print(result)
  127. print("\nRAG system is ready for use!")
  128. except Exception as e:
  129. print("An error occurred:", e)
  130. import traceback
  131. traceback.print_exc()
  132. finally:
  133. if rag is not None:
  134. await rag.finalize_storages()
  135. if __name__ == "__main__":
  136. asyncio.run(main())