lightrag_hf_demo.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. import os
  2. from lightrag import LightRAG, QueryParam
  3. from lightrag.llm.hf import hf_model_complete, hf_embed
  4. from lightrag.utils import EmbeddingFunc
  5. from transformers import AutoModel, AutoTokenizer
  6. import asyncio
  7. import nest_asyncio
  8. nest_asyncio.apply()
  9. WORKING_DIR = "./dickens"
  10. if not os.path.exists(WORKING_DIR):
  11. os.mkdir(WORKING_DIR)
  12. async def initialize_rag():
  13. rag = LightRAG(
  14. working_dir=WORKING_DIR,
  15. llm_model_func=hf_model_complete,
  16. llm_model_name="meta-llama/Llama-3.1-8B-Instruct",
  17. embedding_func=EmbeddingFunc(
  18. embedding_dim=384,
  19. max_token_size=5000,
  20. func=lambda texts: hf_embed(
  21. texts,
  22. tokenizer=AutoTokenizer.from_pretrained(
  23. "sentence-transformers/all-MiniLM-L6-v2"
  24. ),
  25. embed_model=AutoModel.from_pretrained(
  26. "sentence-transformers/all-MiniLM-L6-v2"
  27. ),
  28. ),
  29. ),
  30. )
  31. await rag.initialize_storages() # Auto-initializes pipeline_status
  32. return rag
  33. def main():
  34. rag = asyncio.run(initialize_rag())
  35. with open("./book.txt", "r", encoding="utf-8") as f:
  36. rag.insert(f.read())
  37. # Perform naive search
  38. print(
  39. rag.query(
  40. "What are the top themes in this story?", param=QueryParam(mode="naive")
  41. )
  42. )
  43. # Perform local search
  44. print(
  45. rag.query(
  46. "What are the top themes in this story?", param=QueryParam(mode="local")
  47. )
  48. )
  49. # Perform global search
  50. print(
  51. rag.query(
  52. "What are the top themes in this story?", param=QueryParam(mode="global")
  53. )
  54. )
  55. # Perform hybrid search
  56. print(
  57. rag.query(
  58. "What are the top themes in this story?", param=QueryParam(mode="hybrid")
  59. )
  60. )
  61. if __name__ == "__main__":
  62. main()