test_graph_storage.py 67 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574
  1. #!/usr/bin/env python
  2. """
  3. General-purpose graph storage test program.
  4. This program selects the graph storage type to use based on the LIGHTRAG_GRAPH_STORAGE configuration in .env,
  5. and tests its basic and advanced operations.
  6. Supported graph storage types include:
  7. - NetworkXStorage
  8. - Neo4JStorage
  9. - MongoDBStorage
  10. - PGGraphStorage
  11. - MemgraphStorage
  12. """
  13. import asyncio
  14. import os
  15. import sys
  16. import importlib
  17. import numpy as np
  18. import pytest
  19. from dotenv import load_dotenv
  20. from ascii_colors import ASCIIColors
  21. # Add the project root directory to the front of the Python path so this
  22. # script always exercises the checked-out source tree, not a stale installed
  23. # lightrag package from the active virtualenv.
  24. sys.path.insert(
  25. 0,
  26. os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
  27. )
  28. from lightrag.types import KnowledgeGraph
  29. from lightrag.kg import (
  30. STORAGE_IMPLEMENTATIONS,
  31. STORAGE_ENV_REQUIREMENTS,
  32. STORAGES,
  33. verify_storage_implementation,
  34. )
  35. from lightrag.kg.shared_storage import initialize_share_data
  36. from lightrag.constants import GRAPH_FIELD_SEP
  37. # Mock embedding function that returns random vectors
  38. async def mock_embedding_func(texts):
  39. return np.random.rand(len(texts), 10) # Return 10-dimensional random vectors
  40. def check_env_file():
  41. """
  42. Check if the .env file exists and issue a warning if it does not.
  43. Returns True to continue execution, False to exit.
  44. """
  45. if not os.path.exists(".env"):
  46. warning_msg = "Warning: .env file not found in the current directory. This may affect storage configuration loading."
  47. ASCIIColors.yellow(warning_msg)
  48. # Check if running in an interactive terminal
  49. if sys.stdin.isatty():
  50. response = input("Do you want to continue? (yes/NO): ")
  51. if response.lower() != "yes":
  52. ASCIIColors.red("Test program cancelled.")
  53. return False
  54. return True
  55. async def initialize_graph_storage():
  56. """
  57. Initialize the corresponding graph storage instance based on environment variables.
  58. Returns the initialized storage instance.
  59. """
  60. # Get the graph storage type from environment variables
  61. graph_storage_type = os.getenv("LIGHTRAG_GRAPH_STORAGE", "NetworkXStorage")
  62. # Validate the storage type
  63. try:
  64. verify_storage_implementation("GRAPH_STORAGE", graph_storage_type)
  65. except ValueError as e:
  66. ASCIIColors.red(f"Error: {str(e)}")
  67. ASCIIColors.yellow(
  68. f"Supported graph storage types: {', '.join(STORAGE_IMPLEMENTATIONS['GRAPH_STORAGE']['implementations'])}"
  69. )
  70. return None
  71. # Check for required environment variables
  72. required_env_vars = STORAGE_ENV_REQUIREMENTS.get(graph_storage_type, [])
  73. missing_env_vars = [var for var in required_env_vars if not os.getenv(var)]
  74. if missing_env_vars:
  75. ASCIIColors.red(
  76. f"Error: {graph_storage_type} requires the following environment variables, but they are not set: {', '.join(missing_env_vars)}"
  77. )
  78. return None
  79. # Dynamically import the corresponding module
  80. module_path = STORAGES.get(graph_storage_type)
  81. if not module_path:
  82. ASCIIColors.red(f"Error: Module path for {graph_storage_type} not found.")
  83. return None
  84. try:
  85. module = importlib.import_module(module_path, package="lightrag")
  86. storage_class = getattr(module, graph_storage_type)
  87. except (ImportError, AttributeError) as e:
  88. ASCIIColors.red(f"Error: Failed to import {graph_storage_type}: {str(e)}")
  89. return None
  90. # Initialize the storage instance
  91. global_config = {
  92. "embedding_batch_num": 10, # Batch size
  93. "vector_db_storage_cls_kwargs": {
  94. "cosine_better_than_threshold": 0.5 # Cosine similarity threshold
  95. },
  96. "working_dir": os.environ.get(
  97. "WORKING_DIR", "./rag_storage"
  98. ), # Working directory
  99. }
  100. # Initialize shared_storage for all storage types (required for locks)
  101. initialize_share_data() # Use single-process mode (workers=1)
  102. try:
  103. storage = storage_class(
  104. namespace="test_graph",
  105. workspace="test_workspace",
  106. global_config=global_config,
  107. embedding_func=mock_embedding_func,
  108. )
  109. # Initialize the connection
  110. await storage.initialize()
  111. return storage
  112. except Exception as e:
  113. ASCIIColors.red(f"Error: Failed to initialize {graph_storage_type}: {str(e)}")
  114. return None
  115. @pytest.fixture
  116. async def storage():
  117. """
  118. Pytest fixture for graph storage integration tests.
  119. Each test gets an initialized storage instance with a clean graph state.
  120. """
  121. load_dotenv(dotenv_path=".env", override=False)
  122. if not check_env_file():
  123. pytest.skip(".env file not available for graph storage integration tests")
  124. storage_instance = await initialize_graph_storage()
  125. if storage_instance is None:
  126. pytest.skip("Graph storage backend is not configured for integration tests")
  127. try:
  128. await storage_instance.drop()
  129. yield storage_instance
  130. finally:
  131. try:
  132. await storage_instance.drop()
  133. except Exception as exc:
  134. ASCIIColors.yellow(f"Warning: failed to drop test graph data: {exc}")
  135. finally:
  136. await storage_instance.finalize()
  137. @pytest.mark.integration
  138. @pytest.mark.requires_db
  139. async def test_graph_basic(storage):
  140. """
  141. Test basic graph database operations:
  142. 1. Use upsert_node to insert two nodes.
  143. 2. Use upsert_edge to insert an edge connecting the two nodes.
  144. 3. Use get_node to read a node.
  145. 4. Use get_edge to read an edge.
  146. """
  147. try:
  148. # 1. Insert the first node
  149. node1_id = "Artificial Intelligence"
  150. node1_data = {
  151. "entity_id": node1_id,
  152. "description": "Artificial intelligence is a branch of computer science that aims to understand the essence of intelligence and produce a new kind of intelligent machine that can react in a manner similar to human intelligence.",
  153. "keywords": "AI,Machine Learning,Deep Learning",
  154. "entity_type": "Technology Field",
  155. }
  156. print(f"Inserting node 1: {node1_id}")
  157. await storage.upsert_node(node1_id, node1_data)
  158. # 2. Insert the second node
  159. node2_id = "Machine Learning"
  160. node2_data = {
  161. "entity_id": node2_id,
  162. "description": "Machine learning is a branch of artificial intelligence that uses statistical methods to enable computer systems to learn without being explicitly programmed.",
  163. "keywords": "Supervised Learning,Unsupervised Learning,Reinforcement Learning",
  164. "entity_type": "Technology Field",
  165. }
  166. print(f"Inserting node 2: {node2_id}")
  167. await storage.upsert_node(node2_id, node2_data)
  168. # 3. Insert the connecting edge
  169. edge_data = {
  170. "relationship": "includes",
  171. "weight": 1.0,
  172. "description": "The field of artificial intelligence includes the subfield of machine learning.",
  173. }
  174. print(f"Inserting edge: {node1_id} -> {node2_id}")
  175. await storage.upsert_edge(node1_id, node2_id, edge_data)
  176. # 4. Read node properties
  177. print(f"Reading node properties: {node1_id}")
  178. node1_props = await storage.get_node(node1_id)
  179. if node1_props:
  180. print(f"Successfully read node properties: {node1_id}")
  181. print(
  182. f"Node description: {node1_props.get('description', 'No description')}"
  183. )
  184. print(f"Node type: {node1_props.get('entity_type', 'No type')}")
  185. print(f"Node keywords: {node1_props.get('keywords', 'No keywords')}")
  186. # Verify that the returned properties are correct
  187. assert (
  188. node1_props.get("entity_id") == node1_id
  189. ), f"Node ID mismatch: expected {node1_id}, got {node1_props.get('entity_id')}"
  190. assert (
  191. node1_props.get("description") == node1_data["description"]
  192. ), "Node description mismatch"
  193. assert (
  194. node1_props.get("entity_type") == node1_data["entity_type"]
  195. ), "Node type mismatch"
  196. else:
  197. print(f"Failed to read node properties: {node1_id}")
  198. assert False, f"Failed to read node properties: {node1_id}"
  199. # 5. Read edge properties
  200. print(f"Reading edge properties: {node1_id} -> {node2_id}")
  201. edge_props = await storage.get_edge(node1_id, node2_id)
  202. if edge_props:
  203. print(f"Successfully read edge properties: {node1_id} -> {node2_id}")
  204. print(
  205. f"Edge relationship: {edge_props.get('relationship', 'No relationship')}"
  206. )
  207. print(
  208. f"Edge description: {edge_props.get('description', 'No description')}"
  209. )
  210. print(f"Edge weight: {edge_props.get('weight', 'No weight')}")
  211. # Verify that the returned properties are correct
  212. assert (
  213. edge_props.get("relationship") == edge_data["relationship"]
  214. ), "Edge relationship mismatch"
  215. assert (
  216. edge_props.get("description") == edge_data["description"]
  217. ), "Edge description mismatch"
  218. assert (
  219. edge_props.get("weight") == edge_data["weight"]
  220. ), "Edge weight mismatch"
  221. else:
  222. print(f"Failed to read edge properties: {node1_id} -> {node2_id}")
  223. assert False, f"Failed to read edge properties: {node1_id} -> {node2_id}"
  224. # 5.1 Verify undirected graph property - read reverse edge properties
  225. print(f"Reading reverse edge properties: {node2_id} -> {node1_id}")
  226. reverse_edge_props = await storage.get_edge(node2_id, node1_id)
  227. if reverse_edge_props:
  228. print(
  229. f"Successfully read reverse edge properties: {node2_id} -> {node1_id}"
  230. )
  231. print(
  232. f"Reverse edge relationship: {reverse_edge_props.get('relationship', 'No relationship')}"
  233. )
  234. print(
  235. f"Reverse edge description: {reverse_edge_props.get('description', 'No description')}"
  236. )
  237. print(
  238. f"Reverse edge weight: {reverse_edge_props.get('weight', 'No weight')}"
  239. )
  240. # Verify that forward and reverse edge properties are the same
  241. assert (
  242. edge_props == reverse_edge_props
  243. ), "Forward and reverse edge properties are not consistent, undirected graph property verification failed"
  244. print(
  245. "Undirected graph property verification successful: forward and reverse edge properties are consistent"
  246. )
  247. else:
  248. print(f"Failed to read reverse edge properties: {node2_id} -> {node1_id}")
  249. assert False, f"Failed to read reverse edge properties: {node2_id} -> {node1_id}, undirected graph property verification failed"
  250. print("Basic tests completed, data is preserved in the database.")
  251. return True
  252. except Exception as e:
  253. ASCIIColors.red(f"An error occurred during the test: {str(e)}")
  254. return False
  255. @pytest.mark.integration
  256. @pytest.mark.requires_db
  257. async def test_graph_advanced(storage):
  258. """
  259. Test advanced graph database operations:
  260. 1. Use node_degree to get the degree of a node.
  261. 2. Use edge_degree to get the degree of an edge.
  262. 3. Use get_node_edges to get all edges of a node.
  263. 4. Use get_all_labels to get all labels.
  264. 5. Use get_knowledge_graph to get a knowledge graph.
  265. 6. Use delete_node to delete a node.
  266. 7. Use remove_nodes to delete multiple nodes.
  267. 8. Use remove_edges to delete edges.
  268. 9. Use drop to clean up data.
  269. """
  270. try:
  271. # 1. Insert test data
  272. # Insert node 1: Artificial Intelligence
  273. node1_id = "Artificial Intelligence"
  274. node1_data = {
  275. "entity_id": node1_id,
  276. "description": "Artificial intelligence is a branch of computer science that aims to understand the essence of intelligence and produce a new kind of intelligent machine that can react in a manner similar to human intelligence.",
  277. "keywords": "AI,Machine Learning,Deep Learning",
  278. "entity_type": "Technology Field",
  279. }
  280. print(f"Inserting node 1: {node1_id}")
  281. await storage.upsert_node(node1_id, node1_data)
  282. # Insert node 2: Machine Learning
  283. node2_id = "Machine Learning"
  284. node2_data = {
  285. "entity_id": node2_id,
  286. "description": "Machine learning is a branch of artificial intelligence that uses statistical methods to enable computer systems to learn without being explicitly programmed.",
  287. "keywords": "Supervised Learning,Unsupervised Learning,Reinforcement Learning",
  288. "entity_type": "Technology Field",
  289. }
  290. print(f"Inserting node 2: {node2_id}")
  291. await storage.upsert_node(node2_id, node2_data)
  292. # Insert node 3: Deep Learning
  293. node3_id = "Deep Learning"
  294. node3_data = {
  295. "entity_id": node3_id,
  296. "description": "Deep learning is a branch of machine learning that uses multi-layered neural networks to simulate the learning process of the human brain.",
  297. "keywords": "Neural Networks,CNN,RNN",
  298. "entity_type": "Technology Field",
  299. }
  300. print(f"Inserting node 3: {node3_id}")
  301. await storage.upsert_node(node3_id, node3_data)
  302. # Insert edge 1: Artificial Intelligence -> Machine Learning
  303. edge1_data = {
  304. "relationship": "includes",
  305. "weight": 1.0,
  306. "description": "The field of artificial intelligence includes the subfield of machine learning.",
  307. }
  308. print(f"Inserting edge 1: {node1_id} -> {node2_id}")
  309. await storage.upsert_edge(node1_id, node2_id, edge1_data)
  310. # Insert edge 2: Machine Learning -> Deep Learning
  311. edge2_data = {
  312. "relationship": "includes",
  313. "weight": 1.0,
  314. "description": "The field of machine learning includes the subfield of deep learning.",
  315. }
  316. print(f"Inserting edge 2: {node2_id} -> {node3_id}")
  317. await storage.upsert_edge(node2_id, node3_id, edge2_data)
  318. # 2. Test node_degree - get the degree of a node
  319. print(f"== Testing node_degree: {node1_id}")
  320. node1_degree = await storage.node_degree(node1_id)
  321. print(f"Degree of node {node1_id}: {node1_degree}")
  322. assert (
  323. node1_degree == 1
  324. ), f"Degree of node {node1_id} should be 1, but got {node1_degree}"
  325. # 2.1 Test degrees of all nodes
  326. print("== Testing degrees of all nodes")
  327. node2_degree = await storage.node_degree(node2_id)
  328. node3_degree = await storage.node_degree(node3_id)
  329. print(f"Degree of node {node2_id}: {node2_degree}")
  330. print(f"Degree of node {node3_id}: {node3_degree}")
  331. assert (
  332. node2_degree == 2
  333. ), f"Degree of node {node2_id} should be 2, but got {node2_degree}"
  334. assert (
  335. node3_degree == 1
  336. ), f"Degree of node {node3_id} should be 1, but got {node3_degree}"
  337. # 3. Test edge_degree - get the degree of an edge
  338. print(f"== Testing edge_degree: {node1_id} -> {node2_id}")
  339. edge_degree = await storage.edge_degree(node1_id, node2_id)
  340. print(f"Degree of edge {node1_id} -> {node2_id}: {edge_degree}")
  341. assert (
  342. edge_degree == 3
  343. ), f"Degree of edge {node1_id} -> {node2_id} should be 3, but got {edge_degree}"
  344. # 3.1 Test reverse edge degree - verify undirected graph property
  345. print(f"== Testing reverse edge degree: {node2_id} -> {node1_id}")
  346. reverse_edge_degree = await storage.edge_degree(node2_id, node1_id)
  347. print(f"Degree of reverse edge {node2_id} -> {node1_id}: {reverse_edge_degree}")
  348. assert (
  349. edge_degree == reverse_edge_degree
  350. ), "Degrees of forward and reverse edges are not consistent, undirected graph property verification failed"
  351. print(
  352. "Undirected graph property verification successful: degrees of forward and reverse edges are consistent"
  353. )
  354. # 4. Test get_node_edges - get all edges of a node
  355. print(f"== Testing get_node_edges: {node2_id}")
  356. node2_edges = await storage.get_node_edges(node2_id)
  357. print(f"All edges of node {node2_id}: {node2_edges}")
  358. assert (
  359. len(node2_edges) == 2
  360. ), f"Node {node2_id} should have 2 edges, but got {len(node2_edges)}"
  361. # 4.1 Verify undirected graph property of node edges
  362. print("== Verifying undirected graph property of node edges")
  363. # Check if it includes connections with node1 and node3 (regardless of direction)
  364. has_connection_with_node1 = False
  365. has_connection_with_node3 = False
  366. for edge in node2_edges:
  367. # Check for connection with node1 (regardless of direction)
  368. if (edge[0] == node1_id and edge[1] == node2_id) or (
  369. edge[0] == node2_id and edge[1] == node1_id
  370. ):
  371. has_connection_with_node1 = True
  372. # Check for connection with node3 (regardless of direction)
  373. if (edge[0] == node2_id and edge[1] == node3_id) or (
  374. edge[0] == node3_id and edge[1] == node2_id
  375. ):
  376. has_connection_with_node3 = True
  377. assert (
  378. has_connection_with_node1
  379. ), f"Edge list of node {node2_id} should include a connection with {node1_id}"
  380. assert (
  381. has_connection_with_node3
  382. ), f"Edge list of node {node2_id} should include a connection with {node3_id}"
  383. print(
  384. f"Undirected graph property verification successful: edge list of node {node2_id} contains all relevant edges"
  385. )
  386. # 5. Test get_all_labels - get all labels
  387. print("== Testing get_all_labels")
  388. all_labels = await storage.get_all_labels()
  389. print(f"All labels: {all_labels}")
  390. assert len(all_labels) == 3, f"Should have 3 labels, but got {len(all_labels)}"
  391. assert node1_id in all_labels, f"{node1_id} should be in the label list"
  392. assert node2_id in all_labels, f"{node2_id} should be in the label list"
  393. assert node3_id in all_labels, f"{node3_id} should be in the label list"
  394. # 6. Test get_knowledge_graph - get a knowledge graph
  395. print("== Testing get_knowledge_graph")
  396. kg = await storage.get_knowledge_graph("*", max_depth=2, max_nodes=10)
  397. print(f"Number of nodes in knowledge graph: {len(kg.nodes)}")
  398. print(f"Number of edges in knowledge graph: {len(kg.edges)}")
  399. assert isinstance(
  400. kg, KnowledgeGraph
  401. ), "The returned result should be of type KnowledgeGraph"
  402. assert (
  403. len(kg.nodes) == 3
  404. ), f"The knowledge graph should have 3 nodes, but got {len(kg.nodes)}"
  405. assert (
  406. len(kg.edges) == 2
  407. ), f"The knowledge graph should have 2 edges, but got {len(kg.edges)}"
  408. # 7. Test delete_node - delete a node
  409. print(f"== Testing delete_node: {node3_id}")
  410. await storage.delete_node(node3_id)
  411. node3_props = await storage.get_node(node3_id)
  412. print(f"Querying node properties after deletion {node3_id}: {node3_props}")
  413. assert node3_props is None, f"Node {node3_id} should have been deleted"
  414. # Re-insert node 3 for subsequent tests
  415. await storage.upsert_node(node3_id, node3_data)
  416. await storage.upsert_edge(node2_id, node3_id, edge2_data)
  417. # 8. Test remove_edges - delete edges
  418. print(f"== Testing remove_edges: {node2_id} -> {node3_id}")
  419. await storage.remove_edges([(node2_id, node3_id)])
  420. edge_props = await storage.get_edge(node2_id, node3_id)
  421. print(
  422. f"Querying edge properties after deletion {node2_id} -> {node3_id}: {edge_props}"
  423. )
  424. assert (
  425. edge_props is None
  426. ), f"Edge {node2_id} -> {node3_id} should have been deleted"
  427. # 8.1 Verify undirected graph property of edge deletion
  428. print(
  429. f"== Verifying undirected graph property of edge deletion: {node3_id} -> {node2_id}"
  430. )
  431. reverse_edge_props = await storage.get_edge(node3_id, node2_id)
  432. print(
  433. f"Querying reverse edge properties after deletion {node3_id} -> {node2_id}: {reverse_edge_props}"
  434. )
  435. assert (
  436. reverse_edge_props is None
  437. ), f"Reverse edge {node3_id} -> {node2_id} should also be deleted, undirected graph property verification failed"
  438. print(
  439. "Undirected graph property verification successful: deleting an edge in one direction also deletes the reverse edge"
  440. )
  441. # 9. Test remove_nodes - delete multiple nodes
  442. print(f"== Testing remove_nodes: [{node2_id}, {node3_id}]")
  443. await storage.remove_nodes([node2_id, node3_id])
  444. node2_props = await storage.get_node(node2_id)
  445. node3_props = await storage.get_node(node3_id)
  446. print(f"Querying node properties after deletion {node2_id}: {node2_props}")
  447. print(f"Querying node properties after deletion {node3_id}: {node3_props}")
  448. assert node2_props is None, f"Node {node2_id} should have been deleted"
  449. assert node3_props is None, f"Node {node3_id} should have been deleted"
  450. print("\nAdvanced tests completed.")
  451. return True
  452. except Exception as e:
  453. ASCIIColors.red(f"An error occurred during the test: {str(e)}")
  454. return False
  455. @pytest.mark.integration
  456. @pytest.mark.requires_db
  457. async def test_graph_batch_operations(storage):
  458. """
  459. Test batch operations of the graph database:
  460. 1. Use get_nodes_batch to get properties of multiple nodes in batch.
  461. 2. Use node_degrees_batch to get degrees of multiple nodes in batch.
  462. 3. Use edge_degrees_batch to get degrees of multiple edges in batch.
  463. 4. Use get_edges_batch to get properties of multiple edges in batch.
  464. 5. Use get_nodes_edges_batch to get all edges of multiple nodes in batch.
  465. """
  466. try:
  467. chunk1_id = "1"
  468. chunk2_id = "2"
  469. chunk3_id = "3"
  470. # 1. Insert test data
  471. # Insert node 1: Artificial Intelligence
  472. node1_id = "Artificial Intelligence"
  473. node1_data = {
  474. "entity_id": node1_id,
  475. "description": "Artificial intelligence is a branch of computer science that aims to understand the essence of intelligence and produce a new kind of intelligent machine that can react in a manner similar to human intelligence.",
  476. "keywords": "AI,Machine Learning,Deep Learning",
  477. "entity_type": "Technology Field",
  478. "source_id": GRAPH_FIELD_SEP.join([chunk1_id, chunk2_id]),
  479. }
  480. print(f"Inserting node 1: {node1_id}")
  481. await storage.upsert_node(node1_id, node1_data)
  482. # Insert node 2: Machine Learning
  483. node2_id = "Machine Learning"
  484. node2_data = {
  485. "entity_id": node2_id,
  486. "description": "Machine learning is a branch of artificial intelligence that uses statistical methods to enable computer systems to learn without being explicitly programmed.",
  487. "keywords": "Supervised Learning,Unsupervised Learning,Reinforcement Learning",
  488. "entity_type": "Technology Field",
  489. "source_id": GRAPH_FIELD_SEP.join([chunk2_id, chunk3_id]),
  490. }
  491. print(f"Inserting node 2: {node2_id}")
  492. await storage.upsert_node(node2_id, node2_data)
  493. # Insert node 3: Deep Learning
  494. node3_id = "Deep Learning"
  495. node3_data = {
  496. "entity_id": node3_id,
  497. "description": "Deep learning is a branch of machine learning that uses multi-layered neural networks to simulate the learning process of the human brain.",
  498. "keywords": "Neural Networks,CNN,RNN",
  499. "entity_type": "Technology Field",
  500. "source_id": GRAPH_FIELD_SEP.join([chunk3_id]),
  501. }
  502. print(f"Inserting node 3: {node3_id}")
  503. await storage.upsert_node(node3_id, node3_data)
  504. # Insert node 4: Natural Language Processing
  505. node4_id = "Natural Language Processing"
  506. node4_data = {
  507. "entity_id": node4_id,
  508. "description": "Natural language processing is a branch of artificial intelligence that focuses on enabling computers to understand and process human language.",
  509. "keywords": "NLP,Text Analysis,Language Models",
  510. "entity_type": "Technology Field",
  511. }
  512. print(f"Inserting node 4: {node4_id}")
  513. await storage.upsert_node(node4_id, node4_data)
  514. # Insert node 5: Computer Vision
  515. node5_id = "Computer Vision"
  516. node5_data = {
  517. "entity_id": node5_id,
  518. "description": "Computer vision is a branch of artificial intelligence that focuses on enabling computers to gain information from images or videos.",
  519. "keywords": "CV,Image Recognition,Object Detection",
  520. "entity_type": "Technology Field",
  521. }
  522. print(f"Inserting node 5: {node5_id}")
  523. await storage.upsert_node(node5_id, node5_data)
  524. # Insert edge 1: Artificial Intelligence -> Machine Learning
  525. edge1_data = {
  526. "relationship": "includes",
  527. "weight": 1.0,
  528. "description": "The field of artificial intelligence includes the subfield of machine learning.",
  529. "source_id": GRAPH_FIELD_SEP.join([chunk1_id, chunk2_id]),
  530. }
  531. print(f"Inserting edge 1: {node1_id} -> {node2_id}")
  532. await storage.upsert_edge(node1_id, node2_id, edge1_data)
  533. # Insert edge 2: Machine Learning -> Deep Learning
  534. edge2_data = {
  535. "relationship": "includes",
  536. "weight": 1.0,
  537. "description": "The field of machine learning includes the subfield of deep learning.",
  538. "source_id": GRAPH_FIELD_SEP.join([chunk2_id, chunk3_id]),
  539. }
  540. print(f"Inserting edge 2: {node2_id} -> {node3_id}")
  541. await storage.upsert_edge(node2_id, node3_id, edge2_data)
  542. # Insert edge 3: Artificial Intelligence -> Natural Language Processing
  543. edge3_data = {
  544. "relationship": "includes",
  545. "weight": 1.0,
  546. "description": "The field of artificial intelligence includes the subfield of natural language processing.",
  547. "source_id": GRAPH_FIELD_SEP.join([chunk3_id]),
  548. }
  549. print(f"Inserting edge 3: {node1_id} -> {node4_id}")
  550. await storage.upsert_edge(node1_id, node4_id, edge3_data)
  551. # Insert edge 4: Artificial Intelligence -> Computer Vision
  552. edge4_data = {
  553. "relationship": "includes",
  554. "weight": 1.0,
  555. "description": "The field of artificial intelligence includes the subfield of computer vision.",
  556. }
  557. print(f"Inserting edge 4: {node1_id} -> {node5_id}")
  558. await storage.upsert_edge(node1_id, node5_id, edge4_data)
  559. # Insert edge 5: Deep Learning -> Natural Language Processing
  560. edge5_data = {
  561. "relationship": "applied to",
  562. "weight": 0.8,
  563. "description": "Deep learning techniques are applied in the field of natural language processing.",
  564. }
  565. print(f"Inserting edge 5: {node3_id} -> {node4_id}")
  566. await storage.upsert_edge(node3_id, node4_id, edge5_data)
  567. # Insert edge 6: Deep Learning -> Computer Vision
  568. edge6_data = {
  569. "relationship": "applied to",
  570. "weight": 0.8,
  571. "description": "Deep learning techniques are applied in the field of computer vision.",
  572. }
  573. print(f"Inserting edge 6: {node3_id} -> {node5_id}")
  574. await storage.upsert_edge(node3_id, node5_id, edge6_data)
  575. # 2. Test get_nodes_batch - batch get properties of multiple nodes
  576. print("== Testing get_nodes_batch")
  577. node_ids = [node1_id, node2_id, node3_id]
  578. nodes_dict = await storage.get_nodes_batch(node_ids)
  579. print(f"Batch get node properties result: {nodes_dict.keys()}")
  580. assert len(nodes_dict) == 3, f"Should return 3 nodes, but got {len(nodes_dict)}"
  581. assert node1_id in nodes_dict, f"{node1_id} should be in the result"
  582. assert node2_id in nodes_dict, f"{node2_id} should be in the result"
  583. assert node3_id in nodes_dict, f"{node3_id} should be in the result"
  584. assert (
  585. nodes_dict[node1_id]["description"] == node1_data["description"]
  586. ), f"{node1_id} description mismatch"
  587. assert (
  588. nodes_dict[node2_id]["description"] == node2_data["description"]
  589. ), f"{node2_id} description mismatch"
  590. assert (
  591. nodes_dict[node3_id]["description"] == node3_data["description"]
  592. ), f"{node3_id} description mismatch"
  593. # 3. Test node_degrees_batch - batch get degrees of multiple nodes
  594. print("== Testing node_degrees_batch")
  595. node_degrees = await storage.node_degrees_batch(node_ids)
  596. print(f"Batch get node degrees result: {node_degrees}")
  597. assert (
  598. len(node_degrees) == 3
  599. ), f"Should return degrees of 3 nodes, but got {len(node_degrees)}"
  600. assert node1_id in node_degrees, f"{node1_id} should be in the result"
  601. assert node2_id in node_degrees, f"{node2_id} should be in the result"
  602. assert node3_id in node_degrees, f"{node3_id} should be in the result"
  603. assert (
  604. node_degrees[node1_id] == 3
  605. ), f"Degree of {node1_id} should be 3, but got {node_degrees[node1_id]}"
  606. assert (
  607. node_degrees[node2_id] == 2
  608. ), f"Degree of {node2_id} should be 2, but got {node_degrees[node2_id]}"
  609. assert (
  610. node_degrees[node3_id] == 3
  611. ), f"Degree of {node3_id} should be 3, but got {node_degrees[node3_id]}"
  612. # 4. Test edge_degrees_batch - batch get degrees of multiple edges
  613. print("== Testing edge_degrees_batch")
  614. edges = [(node1_id, node2_id), (node2_id, node3_id), (node3_id, node4_id)]
  615. edge_degrees = await storage.edge_degrees_batch(edges)
  616. print(f"Batch get edge degrees result: {edge_degrees}")
  617. assert (
  618. len(edge_degrees) == 3
  619. ), f"Should return degrees of 3 edges, but got {len(edge_degrees)}"
  620. assert (
  621. node1_id,
  622. node2_id,
  623. ) in edge_degrees, f"Edge {node1_id} -> {node2_id} should be in the result"
  624. assert (
  625. node2_id,
  626. node3_id,
  627. ) in edge_degrees, f"Edge {node2_id} -> {node3_id} should be in the result"
  628. assert (
  629. node3_id,
  630. node4_id,
  631. ) in edge_degrees, f"Edge {node3_id} -> {node4_id} should be in the result"
  632. # Verify edge degrees (sum of source and target node degrees)
  633. assert (
  634. edge_degrees[(node1_id, node2_id)] == 5
  635. ), f"Degree of edge {node1_id} -> {node2_id} should be 5, but got {edge_degrees[(node1_id, node2_id)]}"
  636. assert (
  637. edge_degrees[(node2_id, node3_id)] == 5
  638. ), f"Degree of edge {node2_id} -> {node3_id} should be 5, but got {edge_degrees[(node2_id, node3_id)]}"
  639. assert (
  640. edge_degrees[(node3_id, node4_id)] == 5
  641. ), f"Degree of edge {node3_id} -> {node4_id} should be 5, but got {edge_degrees[(node3_id, node4_id)]}"
  642. # 5. Test get_edges_batch - batch get properties of multiple edges
  643. print("== Testing get_edges_batch")
  644. # Convert list of tuples to list of dicts for Neo4j style
  645. edge_dicts = [{"src": src, "tgt": tgt} for src, tgt in edges]
  646. edges_dict = await storage.get_edges_batch(edge_dicts)
  647. print(f"Batch get edge properties result: {edges_dict.keys()}")
  648. assert (
  649. len(edges_dict) == 3
  650. ), f"Should return properties of 3 edges, but got {len(edges_dict)}"
  651. assert (
  652. node1_id,
  653. node2_id,
  654. ) in edges_dict, f"Edge {node1_id} -> {node2_id} should be in the result"
  655. assert (
  656. node2_id,
  657. node3_id,
  658. ) in edges_dict, f"Edge {node2_id} -> {node3_id} should be in the result"
  659. assert (
  660. node3_id,
  661. node4_id,
  662. ) in edges_dict, f"Edge {node3_id} -> {node4_id} should be in the result"
  663. assert (
  664. edges_dict[(node1_id, node2_id)]["relationship"]
  665. == edge1_data["relationship"]
  666. ), f"Edge {node1_id} -> {node2_id} relationship mismatch"
  667. assert (
  668. edges_dict[(node2_id, node3_id)]["relationship"]
  669. == edge2_data["relationship"]
  670. ), f"Edge {node2_id} -> {node3_id} relationship mismatch"
  671. assert (
  672. edges_dict[(node3_id, node4_id)]["relationship"]
  673. == edge5_data["relationship"]
  674. ), f"Edge {node3_id} -> {node4_id} relationship mismatch"
  675. # 5.1 Test batch get of reverse edges - verify undirected property
  676. print("== Testing batch get of reverse edges")
  677. # Create list of dicts for reverse edges
  678. reverse_edge_dicts = [{"src": tgt, "tgt": src} for src, tgt in edges]
  679. reverse_edges_dict = await storage.get_edges_batch(reverse_edge_dicts)
  680. print(f"Batch get reverse edge properties result: {reverse_edges_dict.keys()}")
  681. assert (
  682. len(reverse_edges_dict) == 3
  683. ), f"Should return properties of 3 reverse edges, but got {len(reverse_edges_dict)}"
  684. # Verify that properties of forward and reverse edges are consistent
  685. for (src, tgt), props in edges_dict.items():
  686. assert (
  687. (
  688. tgt,
  689. src,
  690. )
  691. in reverse_edges_dict
  692. ), f"Reverse edge {tgt} -> {src} should be in the result"
  693. assert (
  694. props == reverse_edges_dict[(tgt, src)]
  695. ), f"Properties of edge {src} -> {tgt} and reverse edge {tgt} -> {src} are inconsistent"
  696. print(
  697. "Undirected graph property verification successful: properties of batch-retrieved forward and reverse edges are consistent"
  698. )
  699. # 6. Test get_nodes_edges_batch - batch get all edges of multiple nodes
  700. print("== Testing get_nodes_edges_batch")
  701. nodes_edges = await storage.get_nodes_edges_batch([node1_id, node3_id])
  702. print(f"Batch get node edges result: {nodes_edges.keys()}")
  703. assert (
  704. len(nodes_edges) == 2
  705. ), f"Should return edges for 2 nodes, but got {len(nodes_edges)}"
  706. assert node1_id in nodes_edges, f"{node1_id} should be in the result"
  707. assert node3_id in nodes_edges, f"{node3_id} should be in the result"
  708. assert (
  709. len(nodes_edges[node1_id]) == 3
  710. ), f"{node1_id} should have 3 edges, but has {len(nodes_edges[node1_id])}"
  711. assert (
  712. len(nodes_edges[node3_id]) == 3
  713. ), f"{node3_id} should have 3 edges, but has {len(nodes_edges[node3_id])}"
  714. # 6.1 Verify undirected property of batch-retrieved node edges
  715. print("== Verifying undirected property of batch-retrieved node edges")
  716. # Check if node 1's edges include all relevant edges (regardless of direction)
  717. node1_outgoing_edges = [
  718. (src, tgt) for src, tgt in nodes_edges[node1_id] if src == node1_id
  719. ]
  720. node1_incoming_edges = [
  721. (src, tgt) for src, tgt in nodes_edges[node1_id] if tgt == node1_id
  722. ]
  723. print(f"Outgoing edges of node {node1_id}: {node1_outgoing_edges}")
  724. print(f"Incoming edges of node {node1_id}: {node1_incoming_edges}")
  725. # Check for edges to Machine Learning, Natural Language Processing, and Computer Vision
  726. has_edge_to_node2 = any(tgt == node2_id for _, tgt in node1_outgoing_edges)
  727. has_edge_to_node4 = any(tgt == node4_id for _, tgt in node1_outgoing_edges)
  728. has_edge_to_node5 = any(tgt == node5_id for _, tgt in node1_outgoing_edges)
  729. assert (
  730. has_edge_to_node2
  731. ), f"Edge list of node {node1_id} should include an edge to {node2_id}"
  732. assert (
  733. has_edge_to_node4
  734. ), f"Edge list of node {node1_id} should include an edge to {node4_id}"
  735. assert (
  736. has_edge_to_node5
  737. ), f"Edge list of node {node1_id} should include an edge to {node5_id}"
  738. # Check if node 3's edges include all relevant edges (regardless of direction)
  739. node3_outgoing_edges = [
  740. (src, tgt) for src, tgt in nodes_edges[node3_id] if src == node3_id
  741. ]
  742. node3_incoming_edges = [
  743. (src, tgt) for src, tgt in nodes_edges[node3_id] if tgt == node3_id
  744. ]
  745. print(f"Outgoing edges of node {node3_id}: {node3_outgoing_edges}")
  746. print(f"Incoming edges of node {node3_id}: {node3_incoming_edges}")
  747. # Check for connections with Machine Learning, Natural Language Processing, and Computer Vision (ignoring direction)
  748. has_connection_with_node2 = any(
  749. (src == node2_id and tgt == node3_id)
  750. or (src == node3_id and tgt == node2_id)
  751. for src, tgt in nodes_edges[node3_id]
  752. )
  753. has_connection_with_node4 = any(
  754. (src == node3_id and tgt == node4_id)
  755. or (src == node4_id and tgt == node3_id)
  756. for src, tgt in nodes_edges[node3_id]
  757. )
  758. has_connection_with_node5 = any(
  759. (src == node3_id and tgt == node5_id)
  760. or (src == node5_id and tgt == node3_id)
  761. for src, tgt in nodes_edges[node3_id]
  762. )
  763. assert (
  764. has_connection_with_node2
  765. ), f"Edge list of node {node3_id} should include a connection with {node2_id}"
  766. assert (
  767. has_connection_with_node4
  768. ), f"Edge list of node {node3_id} should include a connection with {node4_id}"
  769. assert (
  770. has_connection_with_node5
  771. ), f"Edge list of node {node3_id} should include a connection with {node5_id}"
  772. print(
  773. "Undirected graph property verification successful: batch-retrieved node edges include all relevant edges (regardless of direction)"
  774. )
  775. print("\nBatch operations tests completed.")
  776. return True
  777. except Exception as e:
  778. ASCIIColors.red(f"An error occurred during the test: {str(e)}")
  779. return False
  780. @pytest.mark.integration
  781. @pytest.mark.requires_db
  782. async def test_graph_special_characters(storage):
  783. """
  784. Test the graph database's handling of special characters:
  785. 1. Test node names and descriptions containing single quotes, double quotes, and backslashes.
  786. 2. Test edge descriptions containing single quotes, double quotes, and backslashes.
  787. 3. Verify that special characters are saved and retrieved correctly.
  788. """
  789. try:
  790. # 1. Test special characters in node name
  791. node1_id = "Node with 'single quotes'"
  792. node1_data = {
  793. "entity_id": node1_id,
  794. "description": "This description contains 'single quotes', \"double quotes\", and \\backslashes",
  795. "keywords": "special characters,quotes,escaping",
  796. "entity_type": "Test Node",
  797. }
  798. print(f"Inserting node with special characters 1: {node1_id}")
  799. await storage.upsert_node(node1_id, node1_data)
  800. # 2. Test double quotes in node name
  801. node2_id = 'Node with "double quotes"'
  802. node2_data = {
  803. "entity_id": node2_id,
  804. "description": "This description contains both 'single quotes' and \"double quotes\" and \\a\\path",
  805. "keywords": "special characters,quotes,JSON",
  806. "entity_type": "Test Node",
  807. }
  808. print(f"Inserting node with special characters 2: {node2_id}")
  809. await storage.upsert_node(node2_id, node2_data)
  810. # 3. Test backslashes in node name
  811. node3_id = "Node with \\backslashes\\"
  812. node3_data = {
  813. "entity_id": node3_id,
  814. "description": "This description contains a Windows path C:\\Program Files\\ and escape characters \\n\\t",
  815. "keywords": "backslashes,paths,escaping",
  816. "entity_type": "Test Node",
  817. }
  818. print(f"Inserting node with special characters 3: {node3_id}")
  819. await storage.upsert_node(node3_id, node3_data)
  820. # 4. Test special characters in edge description
  821. edge1_data = {
  822. "relationship": "special 'relationship'",
  823. "weight": 1.0,
  824. "description": "This edge description contains 'single quotes', \"double quotes\", and \\backslashes",
  825. }
  826. print(f"Inserting edge with special characters: {node1_id} -> {node2_id}")
  827. await storage.upsert_edge(node1_id, node2_id, edge1_data)
  828. # 5. Test more complex combination of special characters in edge description
  829. edge2_data = {
  830. "relationship": 'complex "relationship"\\type',
  831. "weight": 0.8,
  832. "description": "Contains SQL injection attempt: SELECT * FROM users WHERE name='admin'--",
  833. }
  834. print(
  835. f"Inserting edge with complex special characters: {node2_id} -> {node3_id}"
  836. )
  837. await storage.upsert_edge(node2_id, node3_id, edge2_data)
  838. # 6. Verify that node special characters are saved correctly
  839. print("\n== Verifying node special characters")
  840. for node_id, original_data in [
  841. (node1_id, node1_data),
  842. (node2_id, node2_data),
  843. (node3_id, node3_data),
  844. ]:
  845. node_props = await storage.get_node(node_id)
  846. if node_props:
  847. print(f"Successfully read node: {node_id}")
  848. print(
  849. f"Node description: {node_props.get('description', 'No description')}"
  850. )
  851. # Verify node ID is saved correctly
  852. assert (
  853. node_props.get("entity_id") == node_id
  854. ), f"Node ID mismatch: expected {node_id}, got {node_props.get('entity_id')}"
  855. # Verify description is saved correctly
  856. assert (
  857. node_props.get("description") == original_data["description"]
  858. ), f"Node description mismatch: expected {original_data['description']}, got {node_props.get('description')}"
  859. print(f"Node {node_id} special character verification successful")
  860. else:
  861. print(f"Failed to read node properties: {node_id}")
  862. assert False, f"Failed to read node properties: {node_id}"
  863. # 7. Verify that edge special characters are saved correctly
  864. print("\n== Verifying edge special characters")
  865. edge1_props = await storage.get_edge(node1_id, node2_id)
  866. if edge1_props:
  867. print(f"Successfully read edge: {node1_id} -> {node2_id}")
  868. print(
  869. f"Edge relationship: {edge1_props.get('relationship', 'No relationship')}"
  870. )
  871. print(
  872. f"Edge description: {edge1_props.get('description', 'No description')}"
  873. )
  874. # Verify edge relationship is saved correctly
  875. assert (
  876. edge1_props.get("relationship") == edge1_data["relationship"]
  877. ), f"Edge relationship mismatch: expected {edge1_data['relationship']}, got {edge1_props.get('relationship')}"
  878. # Verify edge description is saved correctly
  879. assert (
  880. edge1_props.get("description") == edge1_data["description"]
  881. ), f"Edge description mismatch: expected {edge1_data['description']}, got {edge1_props.get('description')}"
  882. print(
  883. f"Edge {node1_id} -> {node2_id} special character verification successful"
  884. )
  885. else:
  886. print(f"Failed to read edge properties: {node1_id} -> {node2_id}")
  887. assert False, f"Failed to read edge properties: {node1_id} -> {node2_id}"
  888. edge2_props = await storage.get_edge(node2_id, node3_id)
  889. if edge2_props:
  890. print(f"Successfully read edge: {node2_id} -> {node3_id}")
  891. print(
  892. f"Edge relationship: {edge2_props.get('relationship', 'No relationship')}"
  893. )
  894. print(
  895. f"Edge description: {edge2_props.get('description', 'No description')}"
  896. )
  897. # Verify edge relationship is saved correctly
  898. assert (
  899. edge2_props.get("relationship") == edge2_data["relationship"]
  900. ), f"Edge relationship mismatch: expected {edge2_data['relationship']}, got {edge2_props.get('relationship')}"
  901. # Verify edge description is saved correctly
  902. assert (
  903. edge2_props.get("description") == edge2_data["description"]
  904. ), f"Edge description mismatch: expected {edge2_data['description']}, got {edge2_props.get('description')}"
  905. print(
  906. f"Edge {node2_id} -> {node3_id} special character verification successful"
  907. )
  908. else:
  909. print(f"Failed to read edge properties: {node2_id} -> {node3_id}")
  910. assert False, f"Failed to read edge properties: {node2_id} -> {node3_id}"
  911. print("\nSpecial character tests completed, data is preserved in the database.")
  912. return True
  913. except Exception as e:
  914. ASCIIColors.red(f"An error occurred during the test: {str(e)}")
  915. return False
  916. @pytest.mark.integration
  917. @pytest.mark.requires_db
  918. async def test_graph_string_escaping_regressions(storage):
  919. """
  920. Regression coverage for entity IDs and properties that require Cypher escaping.
  921. Covers quoted and backslash-heavy node IDs across single-node reads, batch reads,
  922. edge retrieval, and delete/remove write paths.
  923. """
  924. center_id = 'Danh mục "bài toán lớn"'
  925. backslash_id = r"C:\Program Files\LightRAG"
  926. mixed_id = 'Path "C:\\RAG\\docs"'
  927. single_quote_id = "Node with 'single quotes'"
  928. node_payloads = {
  929. center_id: {
  930. "entity_id": center_id,
  931. "description": 'Quoted entity with JSON-ish payload {"path": "C:\\\\temp"}',
  932. "keywords": 'quotes,"double quotes",unicode',
  933. "entity_type": "Regression Node",
  934. },
  935. backslash_id: {
  936. "entity_id": backslash_id,
  937. "description": r"Windows path C:\Program Files\LightRAG\bin",
  938. "keywords": r"paths,C:\temp,backslashes",
  939. "entity_type": "Regression Node",
  940. },
  941. mixed_id: {
  942. "entity_id": mixed_id,
  943. "description": 'Mixed quotes "and" slashes \\ in one entity id',
  944. "keywords": r'mixed,"quoted",C:\RAG\docs',
  945. "entity_type": "Regression Node",
  946. },
  947. single_quote_id: {
  948. "entity_id": single_quote_id,
  949. "description": "Single quotes stay literal in entity identifiers",
  950. "keywords": "single quotes,escaping",
  951. "entity_type": "Regression Node",
  952. },
  953. }
  954. for node_id, payload in node_payloads.items():
  955. await storage.upsert_node(node_id, payload)
  956. edge_payloads = {
  957. (center_id, backslash_id): {
  958. "relationship": r'contains "path"\edge',
  959. "weight": 1.0,
  960. "description": r'Links "quoted" title to C:\Program Files\LightRAG',
  961. },
  962. (center_id, mixed_id): {
  963. "relationship": 'references "docs"',
  964. "weight": 0.8,
  965. "description": r'Contains both "quotes" and \\backslashes\\',
  966. },
  967. (center_id, single_quote_id): {
  968. "relationship": "mentions 'alias'",
  969. "weight": 0.6,
  970. "description": 'Single quote entity linked to "quoted" center node',
  971. },
  972. }
  973. for (src_id, tgt_id), payload in edge_payloads.items():
  974. await storage.upsert_edge(src_id, tgt_id, payload)
  975. for node_id, payload in node_payloads.items():
  976. node = await storage.get_node(node_id)
  977. assert node is not None, f"Expected node {node_id!r} to round-trip"
  978. assert node["entity_id"] == node_id
  979. assert node["description"] == payload["description"]
  980. nodes_batch = await storage.get_nodes_batch(list(node_payloads))
  981. assert set(nodes_batch) == set(node_payloads)
  982. for node_id, payload in node_payloads.items():
  983. assert nodes_batch[node_id]["entity_id"] == node_id
  984. assert nodes_batch[node_id]["description"] == payload["description"]
  985. degrees = await storage.node_degrees_batch(list(node_payloads))
  986. assert degrees[center_id] == 3
  987. assert degrees[backslash_id] == 1
  988. assert degrees[mixed_id] == 1
  989. assert degrees[single_quote_id] == 1
  990. # Helper: undirected graph has no canonical direction, so accept either (a,b) or (b,a).
  991. def connects(edges, a, b):
  992. return any(
  993. (src == a and tgt == b) or (src == b and tgt == a) for src, tgt in edges
  994. )
  995. center_edges = await storage.get_node_edges(center_id)
  996. assert center_edges is not None
  997. assert connects(
  998. center_edges, center_id, backslash_id
  999. ), f"center_edges should contain connection to {backslash_id}"
  1000. assert connects(
  1001. center_edges, center_id, mixed_id
  1002. ), f"center_edges should contain connection to {mixed_id}"
  1003. assert connects(
  1004. center_edges, center_id, single_quote_id
  1005. ), f"center_edges should contain connection to {single_quote_id}"
  1006. batch_edges = await storage.get_nodes_edges_batch(
  1007. [center_id, mixed_id, backslash_id, single_quote_id]
  1008. )
  1009. assert set(batch_edges) == {center_id, mixed_id, backslash_id, single_quote_id}
  1010. assert connects(batch_edges[center_id], center_id, backslash_id)
  1011. assert connects(batch_edges[center_id], center_id, mixed_id)
  1012. assert connects(batch_edges[center_id], center_id, single_quote_id)
  1013. assert connects(batch_edges[mixed_id], center_id, mixed_id)
  1014. assert connects(batch_edges[backslash_id], center_id, backslash_id)
  1015. assert connects(batch_edges[single_quote_id], center_id, single_quote_id)
  1016. # --- Undirected property: get_edge in both directions ---
  1017. print("\n== Verifying undirected property: get_edge forward and reverse")
  1018. for (src_id, tgt_id), payload in edge_payloads.items():
  1019. fwd = await storage.get_edge(src_id, tgt_id)
  1020. rev = await storage.get_edge(tgt_id, src_id)
  1021. assert (
  1022. fwd is not None
  1023. ), f"get_edge({src_id!r}, {tgt_id!r}) returned None after insertion"
  1024. assert rev is not None, (
  1025. f"get_edge({tgt_id!r}, {src_id!r}) returned None — "
  1026. f"storage is not treating the edge as undirected"
  1027. )
  1028. assert fwd["relationship"] == payload["relationship"]
  1029. assert fwd["description"] == payload["description"]
  1030. assert rev["relationship"] == fwd["relationship"], (
  1031. f"Reverse get_edge returned different relationship for "
  1032. f"({src_id!r}, {tgt_id!r})"
  1033. )
  1034. assert rev["description"] == fwd["description"], (
  1035. f"Reverse get_edge returned different description for "
  1036. f"({src_id!r}, {tgt_id!r})"
  1037. )
  1038. print(
  1039. "Undirected property verification successful: "
  1040. "get_edge returns consistent data in both directions"
  1041. )
  1042. # --- Undirected property: has_edge in both directions ---
  1043. print("\n== Verifying undirected property: has_edge forward and reverse")
  1044. for src_id, tgt_id in edge_payloads:
  1045. assert await storage.has_edge(
  1046. src_id, tgt_id
  1047. ), f"has_edge({src_id!r}, {tgt_id!r}) returned False after insertion"
  1048. assert await storage.has_edge(tgt_id, src_id), (
  1049. f"has_edge({tgt_id!r}, {src_id!r}) returned False — "
  1050. f"storage is not treating the edge as undirected"
  1051. )
  1052. print(
  1053. "Undirected property verification successful: "
  1054. "has_edge returns True in both directions"
  1055. )
  1056. # --- Undirected property: get_edges_batch forward and reverse ---
  1057. print("\n== Verifying undirected property: get_edges_batch forward and reverse")
  1058. forward_edges = await storage.get_edges_batch(
  1059. [{"src": src_id, "tgt": tgt_id} for src_id, tgt_id in edge_payloads]
  1060. )
  1061. reverse_edges = await storage.get_edges_batch(
  1062. [{"src": tgt_id, "tgt": src_id} for src_id, tgt_id in edge_payloads]
  1063. )
  1064. assert set(forward_edges) == set(edge_payloads)
  1065. for pair, payload in edge_payloads.items():
  1066. assert forward_edges[pair]["relationship"] == payload["relationship"]
  1067. assert forward_edges[pair]["description"] == payload["description"]
  1068. reverse_pair = (pair[1], pair[0])
  1069. assert (
  1070. reverse_pair in reverse_edges
  1071. ), f"get_edges_batch did not return reverse pair {reverse_pair!r}"
  1072. assert reverse_edges[reverse_pair]["relationship"] == payload["relationship"]
  1073. assert reverse_edges[reverse_pair]["description"] == payload["description"]
  1074. print(
  1075. "Undirected property verification successful: "
  1076. "get_edges_batch returns consistent data in both directions"
  1077. )
  1078. # --- Undirected property: edge deletion removes both directions ---
  1079. print("\n== Verifying undirected property: edge deletion removes both directions")
  1080. await storage.remove_edges([(center_id, mixed_id)])
  1081. assert (
  1082. await storage.get_edge(center_id, mixed_id) is None
  1083. ), f"Forward edge ({center_id!r} -> {mixed_id!r}) should be deleted"
  1084. assert await storage.get_edge(mixed_id, center_id) is None, (
  1085. f"Reverse edge ({mixed_id!r} -> {center_id!r}) should also be deleted "
  1086. f"— storage is not treating deletion as undirected"
  1087. )
  1088. remaining_center_edges = await storage.get_node_edges(center_id)
  1089. assert remaining_center_edges is not None
  1090. assert not connects(
  1091. remaining_center_edges, center_id, mixed_id
  1092. ), "Edge between center and mixed_id should have been removed"
  1093. print(
  1094. "Undirected property verification successful: "
  1095. "deleting an edge removes it in both directions"
  1096. )
  1097. await storage.delete_node(single_quote_id)
  1098. assert await storage.get_node(single_quote_id) is None
  1099. await storage.remove_nodes([center_id, backslash_id])
  1100. assert await storage.get_node(center_id) is None
  1101. assert await storage.get_node(backslash_id) is None
  1102. assert await storage.get_node(mixed_id) is not None
  1103. @pytest.mark.integration
  1104. @pytest.mark.requires_db
  1105. async def test_graph_undirected_property(storage):
  1106. """
  1107. Specifically test the undirected graph property of the storage:
  1108. 1. Verify that after inserting an edge in one direction, a reverse query can retrieve the same result.
  1109. 2. Verify that edge properties are consistent in forward and reverse queries.
  1110. 3. Verify that after deleting an edge in one direction, the edge in the other direction is also deleted.
  1111. 4. Verify the undirected property in batch operations.
  1112. """
  1113. try:
  1114. # 1. Insert test data
  1115. # Insert node 1: Computer Science
  1116. node1_id = "Computer Science"
  1117. node1_data = {
  1118. "entity_id": node1_id,
  1119. "description": "Computer science is the study of computers and their applications.",
  1120. "keywords": "computer,science,technology",
  1121. "entity_type": "Discipline",
  1122. }
  1123. print(f"Inserting node 1: {node1_id}")
  1124. await storage.upsert_node(node1_id, node1_data)
  1125. # Insert node 2: Data Structures
  1126. node2_id = "Data Structures"
  1127. node2_data = {
  1128. "entity_id": node2_id,
  1129. "description": "A data structure is a fundamental concept in computer science used to organize and store data.",
  1130. "keywords": "data,structure,organization",
  1131. "entity_type": "Concept",
  1132. }
  1133. print(f"Inserting node 2: {node2_id}")
  1134. await storage.upsert_node(node2_id, node2_data)
  1135. # Insert node 3: Algorithms
  1136. node3_id = "Algorithms"
  1137. node3_data = {
  1138. "entity_id": node3_id,
  1139. "description": "An algorithm is a set of steps and methods for solving problems.",
  1140. "keywords": "algorithm,steps,methods",
  1141. "entity_type": "Concept",
  1142. }
  1143. print(f"Inserting node 3: {node3_id}")
  1144. await storage.upsert_node(node3_id, node3_data)
  1145. # 2. Test undirected property after edge insertion
  1146. print("\n== Testing undirected property after edge insertion")
  1147. # Insert edge 1: Computer Science -> Data Structures
  1148. edge1_data = {
  1149. "relationship": "includes",
  1150. "weight": 1.0,
  1151. "description": "Computer science includes the concept of data structures.",
  1152. }
  1153. print(f"Inserting edge 1: {node1_id} -> {node2_id}")
  1154. await storage.upsert_edge(node1_id, node2_id, edge1_data)
  1155. # Verify forward query
  1156. forward_edge = await storage.get_edge(node1_id, node2_id)
  1157. print(f"Forward edge properties: {forward_edge}")
  1158. assert (
  1159. forward_edge is not None
  1160. ), f"Failed to read forward edge properties: {node1_id} -> {node2_id}"
  1161. # Verify reverse query
  1162. reverse_edge = await storage.get_edge(node2_id, node1_id)
  1163. print(f"Reverse edge properties: {reverse_edge}")
  1164. assert (
  1165. reverse_edge is not None
  1166. ), f"Failed to read reverse edge properties: {node2_id} -> {node1_id}"
  1167. # Verify that forward and reverse edge properties are consistent
  1168. assert (
  1169. forward_edge == reverse_edge
  1170. ), "Forward and reverse edge properties are inconsistent, undirected property verification failed"
  1171. print(
  1172. "Undirected property verification successful: forward and reverse edge properties are consistent"
  1173. )
  1174. # 3. Test undirected property of edge degree
  1175. print("\n== Testing undirected property of edge degree")
  1176. # Insert edge 2: Computer Science -> Algorithms
  1177. edge2_data = {
  1178. "relationship": "includes",
  1179. "weight": 1.0,
  1180. "description": "Computer science includes the concept of algorithms.",
  1181. }
  1182. print(f"Inserting edge 2: {node1_id} -> {node3_id}")
  1183. await storage.upsert_edge(node1_id, node3_id, edge2_data)
  1184. # Verify degrees of forward and reverse edges
  1185. forward_degree = await storage.edge_degree(node1_id, node2_id)
  1186. reverse_degree = await storage.edge_degree(node2_id, node1_id)
  1187. print(f"Degree of forward edge {node1_id} -> {node2_id}: {forward_degree}")
  1188. print(f"Degree of reverse edge {node2_id} -> {node1_id}: {reverse_degree}")
  1189. assert (
  1190. forward_degree == reverse_degree
  1191. ), "Degrees of forward and reverse edges are inconsistent, undirected property verification failed"
  1192. print(
  1193. "Undirected property verification successful: degrees of forward and reverse edges are consistent"
  1194. )
  1195. # 4. Test undirected property of edge deletion
  1196. print("\n== Testing undirected property of edge deletion")
  1197. # Delete forward edge
  1198. print(f"Deleting edge: {node1_id} -> {node2_id}")
  1199. await storage.remove_edges([(node1_id, node2_id)])
  1200. # Verify forward edge is deleted
  1201. forward_edge = await storage.get_edge(node1_id, node2_id)
  1202. print(
  1203. f"Querying forward edge properties after deletion {node1_id} -> {node2_id}: {forward_edge}"
  1204. )
  1205. assert (
  1206. forward_edge is None
  1207. ), f"Edge {node1_id} -> {node2_id} should have been deleted"
  1208. # Verify reverse edge is also deleted
  1209. reverse_edge = await storage.get_edge(node2_id, node1_id)
  1210. print(
  1211. f"Querying reverse edge properties after deletion {node2_id} -> {node1_id}: {reverse_edge}"
  1212. )
  1213. assert (
  1214. reverse_edge is None
  1215. ), f"Reverse edge {node2_id} -> {node1_id} should also be deleted, undirected property verification failed"
  1216. print(
  1217. "Undirected property verification successful: deleting an edge in one direction also deletes the reverse edge"
  1218. )
  1219. # 5. Test undirected property in batch operations
  1220. print("\n== Testing undirected property in batch operations")
  1221. # Re-insert edge
  1222. await storage.upsert_edge(node1_id, node2_id, edge1_data)
  1223. # Batch get edge properties
  1224. edge_dicts = [
  1225. {"src": node1_id, "tgt": node2_id},
  1226. {"src": node1_id, "tgt": node3_id},
  1227. ]
  1228. reverse_edge_dicts = [
  1229. {"src": node2_id, "tgt": node1_id},
  1230. {"src": node3_id, "tgt": node1_id},
  1231. ]
  1232. edges_dict = await storage.get_edges_batch(edge_dicts)
  1233. reverse_edges_dict = await storage.get_edges_batch(reverse_edge_dicts)
  1234. print(f"Batch get forward edge properties result: {edges_dict.keys()}")
  1235. print(f"Batch get reverse edge properties result: {reverse_edges_dict.keys()}")
  1236. # Verify that properties of forward and reverse edges are consistent
  1237. for (src, tgt), props in edges_dict.items():
  1238. assert (
  1239. (
  1240. tgt,
  1241. src,
  1242. )
  1243. in reverse_edges_dict
  1244. ), f"Reverse edge {tgt} -> {src} should be in the result"
  1245. assert (
  1246. props == reverse_edges_dict[(tgt, src)]
  1247. ), f"Properties of edge {src} -> {tgt} and reverse edge {tgt} -> {src} are inconsistent"
  1248. print(
  1249. "Undirected property verification successful: properties of batch-retrieved forward and reverse edges are consistent"
  1250. )
  1251. # 6. Test undirected property of batch-retrieved node edges
  1252. print("\n== Testing undirected property of batch-retrieved node edges")
  1253. nodes_edges = await storage.get_nodes_edges_batch([node1_id, node2_id])
  1254. print(f"Batch get node edges result: {nodes_edges.keys()}")
  1255. # Check if node 1's edges include all relevant edges (regardless of direction)
  1256. node1_edges = nodes_edges[node1_id]
  1257. node2_edges = nodes_edges[node2_id]
  1258. # Check if node 1 has edges to node 2 and node 3
  1259. has_edge_to_node2 = any(
  1260. (src == node1_id and tgt == node2_id) for src, tgt in node1_edges
  1261. )
  1262. has_edge_to_node3 = any(
  1263. (src == node1_id and tgt == node3_id) for src, tgt in node1_edges
  1264. )
  1265. assert (
  1266. has_edge_to_node2
  1267. ), f"Edge list of node {node1_id} should include an edge to {node2_id}"
  1268. assert (
  1269. has_edge_to_node3
  1270. ), f"Edge list of node {node1_id} should include an edge to {node3_id}"
  1271. # Check if node 2 has a connection with node 1
  1272. has_edge_to_node1 = any(
  1273. (src == node2_id and tgt == node1_id)
  1274. or (src == node1_id and tgt == node2_id)
  1275. for src, tgt in node2_edges
  1276. )
  1277. assert (
  1278. has_edge_to_node1
  1279. ), f"Edge list of node {node2_id} should include a connection with {node1_id}"
  1280. print(
  1281. "Undirected property verification successful: batch-retrieved node edges include all relevant edges (regardless of direction)"
  1282. )
  1283. print("\nUndirected property tests completed.")
  1284. return True
  1285. except Exception as e:
  1286. ASCIIColors.red(f"An error occurred during the test: {str(e)}")
  1287. return False
  1288. async def main():
  1289. """Main function"""
  1290. # Display program title
  1291. ASCIIColors.cyan("""
  1292. ╔══════════════════════════════════════════════════════════════╗
  1293. ║ General Graph Storage Test Program ║
  1294. ╚══════════════════════════════════════════════════════════════╝
  1295. """)
  1296. # Check for .env file
  1297. if not check_env_file():
  1298. return
  1299. # Load environment variables
  1300. load_dotenv(dotenv_path=".env", override=False)
  1301. # Get graph storage type
  1302. graph_storage_type = os.getenv("LIGHTRAG_GRAPH_STORAGE", "NetworkXStorage")
  1303. ASCIIColors.magenta(
  1304. f"\nCurrently configured graph storage type: {graph_storage_type}"
  1305. )
  1306. ASCIIColors.white(
  1307. f"Supported graph storage types: {', '.join(STORAGE_IMPLEMENTATIONS['GRAPH_STORAGE']['implementations'])}"
  1308. )
  1309. # Initialize storage instance
  1310. storage = await initialize_graph_storage()
  1311. if not storage:
  1312. ASCIIColors.red("Failed to initialize storage instance, exiting test program.")
  1313. return
  1314. try:
  1315. async def reset_storage(test_name: str) -> None:
  1316. ASCIIColors.yellow(f"\nCleaning data before {test_name}...")
  1317. await storage.drop()
  1318. ASCIIColors.green("Data cleanup complete\n")
  1319. # Display test options
  1320. ASCIIColors.yellow("\nPlease select a test type:")
  1321. ASCIIColors.white("1. Basic Test (Node and edge insertion, reading)")
  1322. ASCIIColors.white(
  1323. "2. Advanced Test (Degree, labels, knowledge graph, deletion, etc.)"
  1324. )
  1325. ASCIIColors.white(
  1326. "3. Batch Operations Test (Batch get node/edge properties, degrees, etc.)"
  1327. )
  1328. ASCIIColors.white(
  1329. "4. Undirected Property Test (Verify undirected properties of the storage)"
  1330. )
  1331. ASCIIColors.white(
  1332. "5. Special Characters Test (Verify handling of single/double quotes, backslashes, etc.)"
  1333. )
  1334. ASCIIColors.white(
  1335. "6. String Escaping Regression Test (Quoted and escaped entity IDs across graph operations)"
  1336. )
  1337. ASCIIColors.white("7. All Tests")
  1338. choice = input("\nEnter your choice (1/2/3/4/5/6/7): ")
  1339. # Clean data before running tests
  1340. if choice in ["1", "2", "3", "4", "5", "6", "7"]:
  1341. await reset_storage("running tests")
  1342. if choice == "1":
  1343. await test_graph_basic(storage)
  1344. elif choice == "2":
  1345. await test_graph_advanced(storage)
  1346. elif choice == "3":
  1347. await test_graph_batch_operations(storage)
  1348. elif choice == "4":
  1349. await test_graph_undirected_property(storage)
  1350. elif choice == "5":
  1351. await test_graph_special_characters(storage)
  1352. elif choice == "6":
  1353. await test_graph_string_escaping_regressions(storage)
  1354. elif choice == "7":
  1355. ASCIIColors.cyan("\n=== Starting Basic Test ===")
  1356. await reset_storage("Basic Test")
  1357. basic_result = await test_graph_basic(storage)
  1358. if basic_result:
  1359. ASCIIColors.cyan("\n=== Starting Advanced Test ===")
  1360. await reset_storage("Advanced Test")
  1361. advanced_result = await test_graph_advanced(storage)
  1362. if advanced_result:
  1363. ASCIIColors.cyan("\n=== Starting Batch Operations Test ===")
  1364. await reset_storage("Batch Operations Test")
  1365. batch_result = await test_graph_batch_operations(storage)
  1366. if batch_result:
  1367. ASCIIColors.cyan("\n=== Starting Undirected Property Test ===")
  1368. await reset_storage("Undirected Property Test")
  1369. undirected_result = await test_graph_undirected_property(
  1370. storage
  1371. )
  1372. if undirected_result:
  1373. ASCIIColors.cyan(
  1374. "\n=== Starting Special Characters Test ==="
  1375. )
  1376. await reset_storage("Special Characters Test")
  1377. special_result = await test_graph_special_characters(
  1378. storage
  1379. )
  1380. if special_result:
  1381. ASCIIColors.cyan(
  1382. "\n=== Starting String Escaping Regression Test ==="
  1383. )
  1384. await reset_storage("String Escaping Regression Test")
  1385. await test_graph_string_escaping_regressions(storage)
  1386. else:
  1387. ASCIIColors.red("Invalid choice")
  1388. finally:
  1389. # Close connection
  1390. if storage:
  1391. await storage.finalize()
  1392. ASCIIColors.green("\nStorage connection closed.")
  1393. if __name__ == "__main__":
  1394. asyncio.run(main())