test_no_model_suffix_safety.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. """
  2. Tests for safety when model suffix is absent (no model_name provided).
  3. This test module verifies that the system correctly handles the case when
  4. no model_name is provided, preventing accidental deletion of the only table/collection
  5. on restart.
  6. Critical Bug: When model_suffix is empty, table_name == legacy_table_name.
  7. On second startup, Case 1 logic would delete the only table/collection thinking
  8. it's "legacy", causing all subsequent operations to fail.
  9. """
  10. from unittest.mock import MagicMock, AsyncMock, patch
  11. from lightrag.kg.qdrant_impl import QdrantVectorDBStorage
  12. from lightrag.kg.postgres_impl import PGVectorStorage
  13. class TestNoModelSuffixSafety:
  14. """Test suite for preventing data loss when model_suffix is absent."""
  15. def test_qdrant_no_suffix_second_startup(self):
  16. """
  17. Test Qdrant doesn't delete collection on second startup when no model_name.
  18. Scenario:
  19. 1. First startup: Creates collection without suffix
  20. 2. Collection is empty
  21. 3. Second startup: Should NOT delete the collection
  22. Bug: Without fix, Case 1 would delete the only collection.
  23. """
  24. from qdrant_client import models
  25. client = MagicMock()
  26. # Simulate second startup: collection already exists and is empty
  27. # IMPORTANT: Without suffix, collection_name == legacy collection name
  28. collection_name = "lightrag_vdb_chunks" # No suffix, same as legacy
  29. # Both exist (they're the same collection)
  30. client.collection_exists.return_value = True
  31. # Collection is empty
  32. client.count.return_value.count = 0
  33. # Patch _find_legacy_collection to return the SAME collection name
  34. # This simulates the scenario where new collection == legacy collection
  35. with patch(
  36. "lightrag.kg.qdrant_impl._find_legacy_collection",
  37. return_value="lightrag_vdb_chunks", # Same as collection_name
  38. ):
  39. # Call setup_collection
  40. # This should detect that new == legacy and skip deletion
  41. QdrantVectorDBStorage.setup_collection(
  42. client,
  43. collection_name,
  44. namespace="chunks",
  45. workspace="_",
  46. vectors_config=models.VectorParams(
  47. size=1536, distance=models.Distance.COSINE
  48. ),
  49. hnsw_config=models.HnswConfigDiff(
  50. payload_m=16,
  51. m=0,
  52. ),
  53. model_suffix="", # Empty suffix to simulate no model_name provided
  54. )
  55. # CRITICAL: Collection should NOT be deleted
  56. client.delete_collection.assert_not_called()
  57. # Verify we returned early (skipped Case 1 cleanup)
  58. # The collection_exists was checked, but we didn't proceed to count
  59. # because we detected same name
  60. assert client.collection_exists.call_count >= 1
  61. async def test_postgres_no_suffix_second_startup(self):
  62. """
  63. Test PostgreSQL doesn't delete table on second startup when no model_name.
  64. Scenario:
  65. 1. First startup: Creates table without suffix
  66. 2. Table is empty
  67. 3. Second startup: Should NOT delete the table
  68. Bug: Without fix, Case 1 would delete the only table.
  69. """
  70. db = AsyncMock()
  71. # Configure mock return values to avoid unawaited coroutine warnings
  72. db.query.return_value = {"count": 0}
  73. db._create_vector_index.return_value = None
  74. # Simulate second startup: table already exists and is empty
  75. # IMPORTANT: table_name and legacy_table_name are THE SAME
  76. table_name = "LIGHTRAG_VDB_CHUNKS" # No suffix
  77. legacy_table_name = "LIGHTRAG_VDB_CHUNKS" # Same as new
  78. # Setup mock responses using check_table_exists on db
  79. async def check_table_exists_side_effect(name):
  80. # Both tables exist (they're the same)
  81. return True
  82. db.check_table_exists = AsyncMock(side_effect=check_table_exists_side_effect)
  83. # Call setup_table
  84. # This should detect that new == legacy and skip deletion
  85. await PGVectorStorage.setup_table(
  86. db,
  87. table_name,
  88. workspace="test_workspace",
  89. embedding_dim=1536,
  90. legacy_table_name=legacy_table_name,
  91. base_table="LIGHTRAG_VDB_CHUNKS",
  92. )
  93. # CRITICAL: Table should NOT be deleted (no DROP TABLE)
  94. drop_calls = [
  95. call
  96. for call in db.execute.call_args_list
  97. if call[0][0] and "DROP TABLE" in call[0][0]
  98. ]
  99. assert (
  100. len(drop_calls) == 0
  101. ), "Should not drop table when new and legacy are the same"
  102. # Note: COUNT queries for workspace data are expected behavior in Case 1
  103. # (for logging/warning purposes when workspace data is empty).
  104. # The critical safety check is that DROP TABLE is not called.
  105. def test_qdrant_with_suffix_case1_still_works(self):
  106. """
  107. Test that Case 1 cleanup still works when there IS a suffix.
  108. This ensures our fix doesn't break the normal Case 1 scenario.
  109. """
  110. from qdrant_client import models
  111. client = MagicMock()
  112. # Different names (normal case)
  113. collection_name = "lightrag_vdb_chunks_ada_002_1536d" # With suffix
  114. legacy_collection = "lightrag_vdb_chunks" # Without suffix
  115. # Setup: both exist
  116. def collection_exists_side_effect(name):
  117. return name in [collection_name, legacy_collection]
  118. client.collection_exists.side_effect = collection_exists_side_effect
  119. # Legacy is empty
  120. client.count.return_value.count = 0
  121. # Call setup_collection
  122. QdrantVectorDBStorage.setup_collection(
  123. client,
  124. collection_name,
  125. namespace="chunks",
  126. workspace="_",
  127. vectors_config=models.VectorParams(
  128. size=1536, distance=models.Distance.COSINE
  129. ),
  130. hnsw_config=models.HnswConfigDiff(
  131. payload_m=16,
  132. m=0,
  133. ),
  134. model_suffix="ada_002_1536d",
  135. )
  136. # SHOULD delete legacy (normal Case 1 behavior)
  137. client.delete_collection.assert_called_once_with(
  138. collection_name=legacy_collection
  139. )
  140. async def test_postgres_with_suffix_case1_still_works(self):
  141. """
  142. Test that Case 1 cleanup still works when there IS a suffix.
  143. This ensures our fix doesn't break the normal Case 1 scenario.
  144. """
  145. db = AsyncMock()
  146. # Different names (normal case)
  147. table_name = "LIGHTRAG_VDB_CHUNKS_ADA_002_1536D" # With suffix
  148. legacy_table_name = "LIGHTRAG_VDB_CHUNKS" # Without suffix
  149. # Setup mock responses using check_table_exists on db
  150. async def check_table_exists_side_effect(name):
  151. # Both tables exist
  152. return True
  153. db.check_table_exists = AsyncMock(side_effect=check_table_exists_side_effect)
  154. # Mock empty table
  155. async def query_side_effect(sql, params, **kwargs):
  156. if "COUNT(*)" in sql:
  157. return {"count": 0}
  158. return {}
  159. db.query.side_effect = query_side_effect
  160. # Call setup_table
  161. await PGVectorStorage.setup_table(
  162. db,
  163. table_name,
  164. workspace="test_workspace",
  165. embedding_dim=1536,
  166. legacy_table_name=legacy_table_name,
  167. base_table="LIGHTRAG_VDB_CHUNKS",
  168. )
  169. # SHOULD delete legacy (normal Case 1 behavior)
  170. drop_calls = [
  171. call
  172. for call in db.execute.call_args_list
  173. if call[0][0] and "DROP TABLE" in call[0][0]
  174. ]
  175. assert len(drop_calls) == 1, "Should drop legacy table in normal Case 1"
  176. assert legacy_table_name in drop_calls[0][0][0]