lightrag_gemini_postgres_demo.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """
  2. LightRAG Demo with PostgreSQL + Google Gemini
  3. This example demonstrates how to use LightRAG with:
  4. - Google Gemini (LLM + Embeddings)
  5. - PostgreSQL-backed storages for:
  6. - Vector storage
  7. - Graph storage
  8. - KV storage
  9. - Document status storage
  10. Prerequisites:
  11. 1. PostgreSQL database running and accessible
  12. 2. Required tables will be auto-created by LightRAG
  13. 3. Set environment variables (example .env):
  14. POSTGRES_HOST=localhost
  15. POSTGRES_PORT=5432
  16. POSTGRES_USER=admin
  17. POSTGRES_PASSWORD=admin
  18. POSTGRES_DATABASE=ai
  19. LIGHTRAG_KV_STORAGE=PGKVStorage
  20. LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
  21. LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
  22. LIGHTRAG_VECTOR_STORAGE=PGVectorStorage
  23. GEMINI_API_KEY=your-api-key
  24. 4. Prepare a text file to index (default: Data/book-small.txt)
  25. Usage:
  26. python examples/lightrag_postgres_demo.py
  27. """
  28. import os
  29. import asyncio
  30. import numpy as np
  31. from lightrag import LightRAG, QueryParam
  32. from lightrag.llm.gemini import gemini_model_complete, gemini_embed
  33. from lightrag.utils import setup_logger, wrap_embedding_func_with_attrs
  34. # --------------------------------------------------
  35. # Logger
  36. # --------------------------------------------------
  37. setup_logger("lightrag", level="INFO")
  38. # --------------------------------------------------
  39. # Config
  40. # --------------------------------------------------
  41. WORKING_DIR = "./rag_storage"
  42. BOOK_FILE = "Data/book.txt"
  43. if not os.path.exists(WORKING_DIR):
  44. os.mkdir(WORKING_DIR)
  45. GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
  46. if not GEMINI_API_KEY:
  47. raise ValueError("GEMINI_API_KEY environment variable is not set")
  48. # --------------------------------------------------
  49. # LLM function (Gemini)
  50. # --------------------------------------------------
  51. async def llm_model_func(
  52. prompt,
  53. system_prompt=None,
  54. history_messages=[],
  55. keyword_extraction=False,
  56. **kwargs,
  57. ) -> str:
  58. return await gemini_model_complete(
  59. prompt,
  60. system_prompt=system_prompt,
  61. history_messages=history_messages,
  62. api_key=GEMINI_API_KEY,
  63. model_name="gemini-2.0-flash",
  64. **kwargs,
  65. )
  66. # --------------------------------------------------
  67. # Embedding function (Gemini)
  68. # --------------------------------------------------
  69. @wrap_embedding_func_with_attrs(
  70. embedding_dim=768,
  71. max_token_size=2048,
  72. model_name="models/text-embedding-004",
  73. )
  74. async def embedding_func(texts: list[str]) -> np.ndarray:
  75. return await gemini_embed.func(
  76. texts,
  77. api_key=GEMINI_API_KEY,
  78. model="models/text-embedding-004",
  79. )
  80. # --------------------------------------------------
  81. # Initialize RAG with PostgreSQL storages
  82. # --------------------------------------------------
  83. async def initialize_rag() -> LightRAG:
  84. rag = LightRAG(
  85. working_dir=WORKING_DIR,
  86. llm_model_name="gemini-2.0-flash",
  87. llm_model_func=llm_model_func,
  88. embedding_func=embedding_func,
  89. # Performance tuning
  90. embedding_func_max_async=4,
  91. embedding_batch_num=8,
  92. llm_model_max_async=2,
  93. # Chunking
  94. chunk_token_size=1200,
  95. chunk_overlap_token_size=100,
  96. # PostgreSQL-backed storages
  97. graph_storage="PGGraphStorage",
  98. vector_storage="PGVectorStorage",
  99. doc_status_storage="PGDocStatusStorage",
  100. kv_storage="PGKVStorage",
  101. )
  102. # REQUIRED: initialize all storage backends
  103. await rag.initialize_storages()
  104. return rag
  105. # --------------------------------------------------
  106. # Main
  107. # --------------------------------------------------
  108. async def main():
  109. rag = None
  110. try:
  111. print("Initializing LightRAG with PostgreSQL + Gemini...")
  112. rag = await initialize_rag()
  113. if not os.path.exists(BOOK_FILE):
  114. raise FileNotFoundError(
  115. f"'{BOOK_FILE}' not found. Please provide a text file to index."
  116. )
  117. print(f"\nReading document: {BOOK_FILE}")
  118. with open(BOOK_FILE, "r", encoding="utf-8") as f:
  119. content = f.read()
  120. print(f"Loaded document ({len(content)} characters)")
  121. print("\nInserting document into LightRAG (this may take some time)...")
  122. await rag.ainsert(content)
  123. print("Document indexed successfully!")
  124. print("\n" + "=" * 60)
  125. print("Running sample queries")
  126. print("=" * 60)
  127. query = "What are the top themes in this document?"
  128. for mode in ["naive", "local", "global", "hybrid"]:
  129. print(f"\n[{mode.upper()} MODE]")
  130. result = await rag.aquery(query, param=QueryParam(mode=mode))
  131. print(result[:400] + "..." if len(result) > 400 else result)
  132. print("\nRAG system is ready for use!")
  133. except Exception as e:
  134. print("An error occurred:", e)
  135. import traceback
  136. traceback.print_exc()
  137. finally:
  138. if rag is not None:
  139. await rag.finalize_storages()
  140. if __name__ == "__main__":
  141. asyncio.run(main())