lightrag_nvidia_demo.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import os
  2. import asyncio
  3. import nest_asyncio
  4. from lightrag import LightRAG, QueryParam
  5. from lightrag.llm import (
  6. openai_complete_if_cache,
  7. nvidia_openai_embed,
  8. )
  9. from lightrag.utils import EmbeddingFunc
  10. import numpy as np
  11. # for custom llm_model_func
  12. from lightrag.utils import locate_json_string_body_from_string
  13. nest_asyncio.apply()
  14. WORKING_DIR = "./dickens"
  15. if not os.path.exists(WORKING_DIR):
  16. os.mkdir(WORKING_DIR)
  17. # some method to use your API key (choose one)
  18. # NVIDIA_OPENAI_API_KEY = os.getenv("NVIDIA_OPENAI_API_KEY")
  19. NVIDIA_OPENAI_API_KEY = "nvapi-xxxx" # your api key
  20. # using pre-defined function for nvidia LLM API. OpenAI compatible
  21. # llm_model_func = nvidia_openai_complete
  22. # If you trying to make custom llm_model_func to use llm model on NVIDIA API like other example:
  23. async def llm_model_func(
  24. prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
  25. ) -> str:
  26. result = await openai_complete_if_cache(
  27. "nvidia/llama-3.1-nemotron-70b-instruct",
  28. prompt,
  29. system_prompt=system_prompt,
  30. history_messages=history_messages,
  31. api_key=NVIDIA_OPENAI_API_KEY,
  32. base_url="https://integrate.api.nvidia.com/v1",
  33. **kwargs,
  34. )
  35. if keyword_extraction:
  36. return locate_json_string_body_from_string(result)
  37. return result
  38. # custom embedding
  39. nvidia_embed_model = "nvidia/nv-embedqa-e5-v5"
  40. async def indexing_embedding_func(texts: list[str]) -> np.ndarray:
  41. return await nvidia_openai_embed(
  42. texts,
  43. model=nvidia_embed_model, # maximum 512 token
  44. # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
  45. api_key=NVIDIA_OPENAI_API_KEY,
  46. base_url="https://integrate.api.nvidia.com/v1",
  47. input_type="passage",
  48. trunc="END", # handling on server side if input token is longer than maximum token
  49. encode="float",
  50. )
  51. async def query_embedding_func(texts: list[str]) -> np.ndarray:
  52. return await nvidia_openai_embed(
  53. texts,
  54. model=nvidia_embed_model, # maximum 512 token
  55. # model="nvidia/llama-3.2-nv-embedqa-1b-v1",
  56. api_key=NVIDIA_OPENAI_API_KEY,
  57. base_url="https://integrate.api.nvidia.com/v1",
  58. input_type="query",
  59. trunc="END", # handling on server side if input token is longer than maximum token
  60. encode="float",
  61. )
  62. # dimension are same
  63. async def get_embedding_dim():
  64. test_text = ["This is a test sentence."]
  65. embedding = await indexing_embedding_func(test_text)
  66. embedding_dim = embedding.shape[1]
  67. return embedding_dim
  68. # function test
  69. async def test_funcs():
  70. result = await llm_model_func("How are you?")
  71. print("llm_model_func: ", result)
  72. result = await indexing_embedding_func(["How are you?"])
  73. print("embedding_func: ", result)
  74. # asyncio.run(test_funcs())
  75. async def initialize_rag():
  76. embedding_dimension = await get_embedding_dim()
  77. print(f"Detected embedding dimension: {embedding_dimension}")
  78. # lightRAG class during indexing
  79. rag = LightRAG(
  80. working_dir=WORKING_DIR,
  81. llm_model_func=llm_model_func,
  82. # llm_model_name="meta/llama3-70b-instruct", #un comment if
  83. embedding_func=EmbeddingFunc(
  84. embedding_dim=embedding_dimension,
  85. max_token_size=512, # maximum token size, somehow it's still exceed maximum number of token
  86. # so truncate (trunc) parameter on embedding_func will handle it and try to examine the tokenizer used in LightRAG
  87. # so you can adjust to be able to fit the NVIDIA model (future work)
  88. func=indexing_embedding_func,
  89. ),
  90. )
  91. await rag.initialize_storages() # Auto-initializes pipeline_status
  92. return rag
  93. async def main():
  94. try:
  95. # Initialize RAG instance
  96. rag = await initialize_rag()
  97. # reading file
  98. with open("./book.txt", "r", encoding="utf-8") as f:
  99. await rag.ainsert(f.read())
  100. # Perform naive search
  101. print("==============Naive===============")
  102. print(
  103. await rag.aquery(
  104. "What are the top themes in this story?", param=QueryParam(mode="naive")
  105. )
  106. )
  107. # Perform local search
  108. print("==============local===============")
  109. print(
  110. await rag.aquery(
  111. "What are the top themes in this story?", param=QueryParam(mode="local")
  112. )
  113. )
  114. # Perform global search
  115. print("==============global===============")
  116. print(
  117. await rag.aquery(
  118. "What are the top themes in this story?",
  119. param=QueryParam(mode="global"),
  120. )
  121. )
  122. # Perform hybrid search
  123. print("==============hybrid===============")
  124. print(
  125. await rag.aquery(
  126. "What are the top themes in this story?",
  127. param=QueryParam(mode="hybrid"),
  128. )
  129. )
  130. except Exception as e:
  131. print(f"An error occurred: {e}")
  132. if __name__ == "__main__":
  133. asyncio.run(main())