insert_custom_kg.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import os
  2. from lightrag import LightRAG
  3. from lightrag.llm.openai import gpt_4o_mini_complete
  4. #########
  5. # Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
  6. # import nest_asyncio
  7. # nest_asyncio.apply()
  8. #########
  9. WORKING_DIR = "./custom_kg"
  10. if not os.path.exists(WORKING_DIR):
  11. os.mkdir(WORKING_DIR)
  12. rag = LightRAG(
  13. working_dir=WORKING_DIR,
  14. llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
  15. # llm_model_func=gpt_4o_complete # Optionally, use a stronger model
  16. )
  17. custom_kg = {
  18. "entities": [
  19. {
  20. "entity_name": "CompanyA",
  21. "entity_type": "Organization",
  22. "description": "A major technology company",
  23. "source_id": "Source1",
  24. },
  25. {
  26. "entity_name": "ProductX",
  27. "entity_type": "Product",
  28. "description": "A popular product developed by CompanyA",
  29. "source_id": "Source1",
  30. },
  31. {
  32. "entity_name": "PersonA",
  33. "entity_type": "Person",
  34. "description": "A renowned researcher in AI",
  35. "source_id": "Source2",
  36. },
  37. {
  38. "entity_name": "UniversityB",
  39. "entity_type": "Organization",
  40. "description": "A leading university specializing in technology and sciences",
  41. "source_id": "Source2",
  42. },
  43. {
  44. "entity_name": "CityC",
  45. "entity_type": "Location",
  46. "description": "A large metropolitan city known for its culture and economy",
  47. "source_id": "Source3",
  48. },
  49. {
  50. "entity_name": "EventY",
  51. "entity_type": "Event",
  52. "description": "An annual technology conference held in CityC",
  53. "source_id": "Source3",
  54. },
  55. ],
  56. "relationships": [
  57. {
  58. "src_id": "CompanyA",
  59. "tgt_id": "ProductX",
  60. "description": "CompanyA develops ProductX",
  61. "keywords": "develop, produce",
  62. "weight": 1.0,
  63. "source_id": "Source1",
  64. },
  65. {
  66. "src_id": "PersonA",
  67. "tgt_id": "UniversityB",
  68. "description": "PersonA works at UniversityB",
  69. "keywords": "employment, affiliation",
  70. "weight": 0.9,
  71. "source_id": "Source2",
  72. },
  73. {
  74. "src_id": "CityC",
  75. "tgt_id": "EventY",
  76. "description": "EventY is hosted in CityC",
  77. "keywords": "host, location",
  78. "weight": 0.8,
  79. "source_id": "Source3",
  80. },
  81. ],
  82. "chunks": [
  83. {
  84. "content": "ProductX, developed by CompanyA, has revolutionized the market with its cutting-edge features.",
  85. "source_id": "Source1",
  86. "source_chunk_index": 0,
  87. },
  88. {
  89. "content": "One outstanding feature of ProductX is its advanced AI capabilities.",
  90. "source_id": "Source1",
  91. "chunk_order_index": 1,
  92. },
  93. {
  94. "content": "PersonA is a prominent researcher at UniversityB, focusing on artificial intelligence and machine learning.",
  95. "source_id": "Source2",
  96. "source_chunk_index": 0,
  97. },
  98. {
  99. "content": "EventY, held in CityC, attracts technology enthusiasts and companies from around the globe.",
  100. "source_id": "Source3",
  101. "source_chunk_index": 0,
  102. },
  103. {
  104. "content": "None",
  105. "source_id": "UNKNOWN",
  106. "source_chunk_index": 0,
  107. },
  108. ],
  109. }
  110. rag.insert_custom_kg(custom_kg)