test_description_api_validation.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. import pytest
  2. from lightrag.constants import SOURCE_IDS_LIMIT_METHOD_KEEP
  3. from lightrag.constants import GRAPH_FIELD_SEP
  4. from lightrag.operate import (
  5. _handle_single_entity_extraction,
  6. _merge_nodes_then_upsert,
  7. _normalize_text_extraction_record_attributes,
  8. _handle_single_relationship_extraction,
  9. )
  10. from lightrag import utils_graph
  11. class DummyGraphStorage:
  12. def __init__(self, node=None):
  13. self.node = node
  14. self.upserted_nodes = []
  15. async def get_node(self, node_id):
  16. return self.node
  17. async def upsert_node(self, node_id, node_data):
  18. self.upserted_nodes.append((node_id, node_data))
  19. self.node = dict(node_data)
  20. class DummyVectorStorage:
  21. def __init__(self):
  22. self.global_config = {"workspace": "test"}
  23. self.upserts = []
  24. self.deletes = []
  25. async def upsert(self, data):
  26. self.upserts.append(data)
  27. return None
  28. async def delete(self, ids):
  29. self.deletes.append(ids)
  30. return None
  31. async def get_by_id(self, id_):
  32. return None
  33. async def index_done_callback(self):
  34. return True
  35. class DummyAsyncContext:
  36. async def __aenter__(self):
  37. return None
  38. async def __aexit__(self, exc_type, exc, tb):
  39. return False
  40. class DummyMergeGraphStorage:
  41. def __init__(self):
  42. self.nodes = {
  43. "Canonical": {
  44. "entity_id": "Canonical",
  45. "description": "canonical desc",
  46. "entity_type": "ORG",
  47. "source_id": "chunk-1",
  48. "file_path": "canonical.md",
  49. },
  50. "Alias": {
  51. "entity_id": "Alias",
  52. "description": "alias desc",
  53. "entity_type": "ORG",
  54. "source_id": "chunk-2",
  55. "file_path": "alias.md",
  56. },
  57. "Neighbor": {
  58. "entity_id": "Neighbor",
  59. "description": "neighbor desc",
  60. "entity_type": "ORG",
  61. "source_id": "chunk-3",
  62. "file_path": "neighbor.md",
  63. },
  64. }
  65. self.edges = {
  66. ("Alias", "Neighbor"): {
  67. "description": "rel desc",
  68. "keywords": "alias",
  69. "source_id": "chunk-rel",
  70. "weight": 1.0,
  71. "file_path": "rel.md",
  72. }
  73. }
  74. async def has_node(self, node_id):
  75. return node_id in self.nodes
  76. async def get_node(self, node_id):
  77. return self.nodes[node_id]
  78. async def upsert_node(self, node_id, node_data):
  79. self.nodes[node_id] = dict(node_data)
  80. async def get_node_edges(self, node_id):
  81. results = []
  82. for src, tgt in self.edges:
  83. if src == node_id or tgt == node_id:
  84. results.append((src, tgt))
  85. return results
  86. async def get_edge(self, src, tgt):
  87. return self.edges.get((src, tgt)) or self.edges.get((tgt, src))
  88. async def upsert_edge(self, src, tgt, edge_data):
  89. self.edges[(src, tgt)] = dict(edge_data)
  90. async def delete_node(self, node_id):
  91. self.nodes.pop(node_id, None)
  92. self.edges = {
  93. (src, tgt): data
  94. for (src, tgt), data in self.edges.items()
  95. if src != node_id and tgt != node_id
  96. }
  97. async def index_done_callback(self):
  98. return True
  99. @pytest.mark.asyncio
  100. async def test_merge_nodes_then_upsert_handles_missing_legacy_description():
  101. graph = DummyGraphStorage(node={"source_id": "chunk-1"})
  102. global_config = {
  103. "source_ids_limit_method": SOURCE_IDS_LIMIT_METHOD_KEEP,
  104. "max_source_ids_per_entity": 20,
  105. }
  106. result = await _merge_nodes_then_upsert(
  107. entity_name="LegacyEntity",
  108. nodes_data=[],
  109. knowledge_graph_inst=graph,
  110. entity_vdb=None,
  111. global_config=global_config,
  112. )
  113. assert result["description"] == "Entity LegacyEntity"
  114. assert graph.upserted_nodes[-1][1]["description"] == "Entity LegacyEntity"
  115. @pytest.mark.asyncio
  116. async def test_acreate_entity_rejects_empty_description():
  117. with pytest.raises(ValueError, match="description cannot be empty"):
  118. await utils_graph.acreate_entity(
  119. chunk_entity_relation_graph=None,
  120. entities_vdb=None,
  121. relationships_vdb=None,
  122. entity_name="EntityA",
  123. entity_data={"description": " "},
  124. )
  125. @pytest.mark.asyncio
  126. async def test_acreate_relation_rejects_empty_description():
  127. with pytest.raises(ValueError, match="description cannot be empty"):
  128. await utils_graph.acreate_relation(
  129. chunk_entity_relation_graph=None,
  130. entities_vdb=None,
  131. relationships_vdb=None,
  132. source_entity="A",
  133. target_entity="B",
  134. relation_data={"description": ""},
  135. )
  136. @pytest.mark.asyncio
  137. async def test_aedit_entity_rejects_empty_description():
  138. with pytest.raises(ValueError, match="description cannot be empty"):
  139. await utils_graph.aedit_entity(
  140. chunk_entity_relation_graph=None,
  141. entities_vdb=None,
  142. relationships_vdb=None,
  143. entity_name="EntityA",
  144. updated_data={"description": None},
  145. )
  146. @pytest.mark.asyncio
  147. async def test_aedit_relation_rejects_empty_description():
  148. with pytest.raises(ValueError, match="description cannot be empty"):
  149. await utils_graph.aedit_relation(
  150. chunk_entity_relation_graph=None,
  151. entities_vdb=None,
  152. relationships_vdb=None,
  153. source_entity="A",
  154. target_entity="B",
  155. updated_data={"description": " "},
  156. )
  157. @pytest.mark.asyncio
  158. async def test_aedit_entity_allows_updates_without_description(monkeypatch):
  159. async def fake_edit_impl(*args, **kwargs):
  160. return {"entity_name": "EntityA", "description": "kept", "source_id": "chunk-1"}
  161. monkeypatch.setattr(
  162. utils_graph, "get_storage_keyed_lock", lambda *a, **k: DummyAsyncContext()
  163. )
  164. monkeypatch.setattr(utils_graph, "_edit_entity_impl", fake_edit_impl)
  165. result = await utils_graph.aedit_entity(
  166. chunk_entity_relation_graph=None,
  167. entities_vdb=DummyVectorStorage(),
  168. relationships_vdb=DummyVectorStorage(),
  169. entity_name="EntityA",
  170. updated_data={"entity_type": "ORG"},
  171. )
  172. assert result["operation_summary"]["operation_status"] == "success"
  173. def test_handle_single_relationship_extraction_ignores_empty_description():
  174. relation = _handle_single_relationship_extraction(
  175. ["relation", "Alice", "Bob", "works_with", " "],
  176. chunk_key="chunk-1",
  177. timestamp=1,
  178. )
  179. assert relation is None
  180. def test_mis_prefixed_relation_row_is_recovered():
  181. record = _normalize_text_extraction_record_attributes(
  182. ["entity", "Alice", "Acme Corp", "founded", "Alice founded Acme Corp."],
  183. chunk_key="chunk-1",
  184. )
  185. relation = _handle_single_relationship_extraction(
  186. record,
  187. chunk_key="chunk-1",
  188. timestamp=1,
  189. )
  190. assert relation is not None
  191. assert relation["src_id"] == "Alice"
  192. assert relation["tgt_id"] == "Acme Corp"
  193. def test_four_part_entity_row_remains_entity():
  194. record = _normalize_text_extraction_record_attributes(
  195. ["entity", "Alice", "Person", "Alice is the founder of Acme Corp."],
  196. chunk_key="chunk-1",
  197. )
  198. entity = _handle_single_entity_extraction(
  199. record,
  200. chunk_key="chunk-1",
  201. timestamp=1,
  202. )
  203. assert entity is not None
  204. assert entity["entity_name"] == "Alice"
  205. def test_malformed_recovered_relation_still_fails():
  206. record = _normalize_text_extraction_record_attributes(
  207. ["entity", "Alice", "Acme Corp", "founded", " "],
  208. chunk_key="chunk-1",
  209. )
  210. relation = _handle_single_relationship_extraction(
  211. record,
  212. chunk_key="chunk-1",
  213. timestamp=1,
  214. )
  215. assert relation is None
  216. def test_unrelated_five_part_prefix_remains_invalid():
  217. record = _normalize_text_extraction_record_attributes(
  218. ["edge", "Alice", "Acme Corp", "founded", "Alice founded Acme Corp."],
  219. chunk_key="chunk-1",
  220. )
  221. relation = _handle_single_relationship_extraction(
  222. record,
  223. chunk_key="chunk-1",
  224. timestamp=1,
  225. )
  226. assert relation is None
  227. @pytest.mark.asyncio
  228. async def test_merge_entities_preserves_file_path_in_vector_updates(monkeypatch):
  229. graph = DummyMergeGraphStorage()
  230. entities_vdb = DummyVectorStorage()
  231. relationships_vdb = DummyVectorStorage()
  232. async def fake_get_entity_info(*args, **kwargs):
  233. return {"entity_name": "Canonical"}
  234. monkeypatch.setattr(utils_graph, "get_entity_info", fake_get_entity_info)
  235. await utils_graph._merge_entities_impl(
  236. chunk_entity_relation_graph=graph,
  237. entities_vdb=entities_vdb,
  238. relationships_vdb=relationships_vdb,
  239. source_entities=["Alias", "Canonical"],
  240. target_entity="Canonical",
  241. )
  242. relationship_payload = relationships_vdb.upserts[-1]
  243. entity_payload = entities_vdb.upserts[-1]
  244. assert next(iter(relationship_payload.values()))["file_path"] == "rel.md"
  245. assert set(
  246. next(iter(entity_payload.values()))["file_path"].split(GRAPH_FIELD_SEP)
  247. ) == {
  248. "alias.md",
  249. "canonical.md",
  250. }