Step_1.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import os
  2. import json
  3. import time
  4. import asyncio
  5. from lightrag import LightRAG
  6. def insert_text(rag, file_path):
  7. with open(file_path, mode="r") as f:
  8. unique_contexts = json.load(f)
  9. retries = 0
  10. max_retries = 3
  11. while retries < max_retries:
  12. try:
  13. rag.insert(unique_contexts)
  14. break
  15. except Exception as e:
  16. retries += 1
  17. print(f"Insertion failed, retrying ({retries}/{max_retries}), error: {e}")
  18. time.sleep(10)
  19. if retries == max_retries:
  20. print("Insertion failed after exceeding the maximum number of retries")
  21. cls = "agriculture"
  22. WORKING_DIR = f"../{cls}"
  23. if not os.path.exists(WORKING_DIR):
  24. os.mkdir(WORKING_DIR)
  25. async def initialize_rag():
  26. rag = LightRAG(working_dir=WORKING_DIR)
  27. await rag.initialize_storages() # Auto-initializes pipeline_status
  28. return rag
  29. def main():
  30. # Initialize RAG instance
  31. rag = asyncio.run(initialize_rag())
  32. insert_text(rag, f"../datasets/unique_contexts/{cls}_unique_contexts.json")
  33. if __name__ == "__main__":
  34. main()