test_milvus_kwargs_bridge.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. """
  2. Tests for bridging vector_db_storage_cls_kwargs to MilvusIndexConfig
  3. This test suite validates that MilvusIndexConfig parameters can be passed
  4. through vector_db_storage_cls_kwargs and that backward compatibility is maintained.
  5. """
  6. import pytest
  7. from unittest.mock import patch, MagicMock
  8. from lightrag.kg.milvus_impl import MilvusVectorDBStorage
  9. @pytest.mark.offline
  10. class TestMilvusKwargsParameterBridge:
  11. """Test parameter bridging from vector_db_storage_cls_kwargs to MilvusIndexConfig"""
  12. def test_kwargs_to_index_config_basic(self):
  13. """Test that basic HNSW parameters are passed from kwargs to MilvusIndexConfig"""
  14. # Mock the embedding function
  15. mock_embedding_func = MagicMock()
  16. mock_embedding_func.embedding_dim = 128
  17. # Create storage instance with custom index config parameters in kwargs
  18. storage = MilvusVectorDBStorage(
  19. namespace="test_entities",
  20. workspace="test_workspace",
  21. global_config={
  22. "embedding_batch_num": 100,
  23. "vector_db_storage_cls_kwargs": {
  24. "cosine_better_than_threshold": 0.3,
  25. "hnsw_m": 32,
  26. "hnsw_ef": 256,
  27. "hnsw_ef_construction": 300,
  28. },
  29. },
  30. embedding_func=mock_embedding_func,
  31. meta_fields=set(),
  32. )
  33. # Verify that parameters were passed to index_config
  34. assert storage.index_config.hnsw_m == 32
  35. assert storage.index_config.hnsw_ef == 256
  36. assert storage.index_config.hnsw_ef_construction == 300
  37. def test_kwargs_to_index_config_index_and_metric_types(self):
  38. """Test that index_type and metric_type are passed from kwargs"""
  39. mock_embedding_func = MagicMock()
  40. mock_embedding_func.embedding_dim = 128
  41. storage = MilvusVectorDBStorage(
  42. namespace="test_entities",
  43. workspace="test_workspace",
  44. global_config={
  45. "embedding_batch_num": 100,
  46. "vector_db_storage_cls_kwargs": {
  47. "cosine_better_than_threshold": 0.3,
  48. "index_type": "IVF_FLAT",
  49. "metric_type": "L2",
  50. "ivf_nlist": 2048,
  51. },
  52. },
  53. embedding_func=mock_embedding_func,
  54. meta_fields=set(),
  55. )
  56. # Verify that parameters were passed to index_config
  57. assert storage.index_config.index_type == "IVF_FLAT"
  58. assert storage.index_config.metric_type == "L2"
  59. assert storage.index_config.ivf_nlist == 2048
  60. def test_kwargs_to_index_config_sq_parameters(self):
  61. """Test that HNSW_SQ parameters are passed from kwargs"""
  62. mock_embedding_func = MagicMock()
  63. mock_embedding_func.embedding_dim = 128
  64. storage = MilvusVectorDBStorage(
  65. namespace="test_entities",
  66. workspace="test_workspace",
  67. global_config={
  68. "embedding_batch_num": 100,
  69. "vector_db_storage_cls_kwargs": {
  70. "cosine_better_than_threshold": 0.3,
  71. "index_type": "HNSW_SQ",
  72. "sq_type": "SQ8",
  73. "sq_refine": True,
  74. "sq_refine_type": "FP16",
  75. "sq_refine_k": 20,
  76. },
  77. },
  78. embedding_func=mock_embedding_func,
  79. meta_fields=set(),
  80. )
  81. # Verify that parameters were passed to index_config
  82. assert storage.index_config.index_type == "HNSW_SQ"
  83. assert storage.index_config.sq_type == "SQ8"
  84. assert storage.index_config.sq_refine is True
  85. assert storage.index_config.sq_refine_type == "FP16"
  86. assert storage.index_config.sq_refine_k == 20
  87. def test_backward_compatibility_no_index_params(self):
  88. """Test backward compatibility when no index parameters are provided in kwargs"""
  89. mock_embedding_func = MagicMock()
  90. mock_embedding_func.embedding_dim = 128
  91. # Create storage without any index config parameters in kwargs
  92. storage = MilvusVectorDBStorage(
  93. namespace="test_entities",
  94. workspace="test_workspace",
  95. global_config={
  96. "embedding_batch_num": 100,
  97. "vector_db_storage_cls_kwargs": {
  98. "cosine_better_than_threshold": 0.3,
  99. },
  100. },
  101. embedding_func=mock_embedding_func,
  102. meta_fields=set(),
  103. )
  104. # Verify that default values are used (from environment variables or defaults)
  105. # Defaults aligned with Milvus 2.4+ official documentation
  106. assert storage.index_config.index_type == "AUTOINDEX" # Default
  107. assert storage.index_config.metric_type == "COSINE" # Default
  108. assert storage.index_config.hnsw_m == 16 # Default (Milvus 2.4+)
  109. assert storage.index_config.hnsw_ef_construction == 360 # Default (Milvus 2.4+)
  110. def test_kwargs_params_override_environment_variables(self):
  111. """Test that kwargs parameters take precedence over environment variables"""
  112. mock_embedding_func = MagicMock()
  113. mock_embedding_func.embedding_dim = 128
  114. # Set environment variables
  115. with patch.dict(
  116. "os.environ",
  117. {
  118. "MILVUS_INDEX_TYPE": "IVF_FLAT",
  119. "MILVUS_HNSW_M": "16",
  120. },
  121. ):
  122. # Create storage with kwargs parameters that should override env vars
  123. storage = MilvusVectorDBStorage(
  124. namespace="test_entities",
  125. workspace="test_workspace",
  126. global_config={
  127. "embedding_batch_num": 100,
  128. "vector_db_storage_cls_kwargs": {
  129. "cosine_better_than_threshold": 0.3,
  130. "index_type": "HNSW",
  131. "hnsw_m": 64,
  132. },
  133. },
  134. embedding_func=mock_embedding_func,
  135. meta_fields=set(),
  136. )
  137. # Verify that kwargs parameters override environment variables
  138. assert (
  139. storage.index_config.index_type == "HNSW"
  140. ) # From kwargs, not IVF_FLAT
  141. assert storage.index_config.hnsw_m == 64 # From kwargs, not 16
  142. def test_non_index_params_ignored(self):
  143. """Test that non-index-config parameters in kwargs are ignored"""
  144. mock_embedding_func = MagicMock()
  145. mock_embedding_func.embedding_dim = 128
  146. storage = MilvusVectorDBStorage(
  147. namespace="test_entities",
  148. workspace="test_workspace",
  149. global_config={
  150. "embedding_batch_num": 100,
  151. "vector_db_storage_cls_kwargs": {
  152. "cosine_better_than_threshold": 0.3,
  153. "hnsw_m": 32,
  154. "some_other_param": "ignored", # Should be ignored
  155. "another_param": 123, # Should be ignored
  156. },
  157. },
  158. embedding_func=mock_embedding_func,
  159. meta_fields=set(),
  160. )
  161. # Verify that valid parameter was passed
  162. assert storage.index_config.hnsw_m == 32
  163. # Verify that invalid parameters were ignored (no AttributeError)
  164. assert not hasattr(storage.index_config, "some_other_param")
  165. assert not hasattr(storage.index_config, "another_param")
  166. def test_raganything_framework_integration_scenario(self):
  167. """Test configuration passing through frameworks like RAGAnything
  168. This test validates the use case where a framework (like RAGAnything)
  169. sits on top of LightRAG and needs to pass Milvus index configuration
  170. through to LightRAG without modifying environment variables.
  171. The framework can pass all index config parameters via
  172. vector_db_storage_cls_kwargs, and they will be properly extracted
  173. and applied to MilvusIndexConfig.
  174. """
  175. mock_embedding_func = MagicMock()
  176. mock_embedding_func.embedding_dim = 128
  177. # Simulate RAGAnything framework passing configuration to LightRAG
  178. # All index configuration parameters are passed through kwargs
  179. framework_config = {
  180. "embedding_batch_num": 100,
  181. "vector_db_storage_cls_kwargs": {
  182. # Required for vector storage
  183. "cosine_better_than_threshold": 0.2,
  184. # Milvus index configuration - all parameters supported
  185. "index_type": "HNSW",
  186. "metric_type": "L2",
  187. "hnsw_m": 48,
  188. "hnsw_ef_construction": 400,
  189. "hnsw_ef": 200,
  190. # Framework-specific parameters (should be ignored by Milvus)
  191. "framework_version": "1.0.0",
  192. "custom_setting": "value",
  193. },
  194. }
  195. # Create storage instance with framework configuration
  196. storage = MilvusVectorDBStorage(
  197. namespace="test_entities",
  198. workspace="raganything_workspace",
  199. global_config=framework_config,
  200. embedding_func=mock_embedding_func,
  201. meta_fields=set(),
  202. )
  203. # Verify all Milvus parameters were correctly extracted and applied
  204. assert storage.index_config.index_type == "HNSW"
  205. assert storage.index_config.metric_type == "L2"
  206. assert storage.index_config.hnsw_m == 48
  207. assert storage.index_config.hnsw_ef_construction == 400
  208. assert storage.index_config.hnsw_ef == 200
  209. # Verify framework-specific parameters were ignored
  210. assert not hasattr(storage.index_config, "framework_version")
  211. assert not hasattr(storage.index_config, "custom_setting")
  212. # Verify workspace isolation is maintained
  213. assert storage.workspace == "raganything_workspace"
  214. def test_all_milvus_parameters_supported_via_kwargs(self):
  215. """Test that all 11 MilvusIndexConfig parameters can be configured via kwargs
  216. This comprehensive test ensures that every single index configuration
  217. parameter defined in MilvusIndexConfig can be passed through
  218. vector_db_storage_cls_kwargs, which is critical for framework integration.
  219. """
  220. mock_embedding_func = MagicMock()
  221. mock_embedding_func.embedding_dim = 128
  222. # Pass ALL 11 MilvusIndexConfig parameters via kwargs
  223. storage = MilvusVectorDBStorage(
  224. namespace="test_entities",
  225. workspace="test_workspace",
  226. global_config={
  227. "embedding_batch_num": 100,
  228. "vector_db_storage_cls_kwargs": {
  229. "cosine_better_than_threshold": 0.3,
  230. # All 11 MilvusIndexConfig parameters
  231. "index_type": "HNSW_SQ",
  232. "metric_type": "IP",
  233. "hnsw_m": 64,
  234. "hnsw_ef_construction": 512,
  235. "hnsw_ef": 256,
  236. "sq_type": "SQ8",
  237. "sq_refine": True,
  238. "sq_refine_type": "FP16",
  239. "sq_refine_k": 30,
  240. "ivf_nlist": 4096,
  241. "ivf_nprobe": 64,
  242. },
  243. },
  244. embedding_func=mock_embedding_func,
  245. meta_fields=set(),
  246. )
  247. # Verify EVERY parameter was correctly applied
  248. assert storage.index_config.index_type == "HNSW_SQ"
  249. assert storage.index_config.metric_type == "IP"
  250. assert storage.index_config.hnsw_m == 64
  251. assert storage.index_config.hnsw_ef_construction == 512
  252. assert storage.index_config.hnsw_ef == 256
  253. assert storage.index_config.sq_type == "SQ8"
  254. assert storage.index_config.sq_refine is True
  255. assert storage.index_config.sq_refine_type == "FP16"
  256. assert storage.index_config.sq_refine_k == 30
  257. assert storage.index_config.ivf_nlist == 4096
  258. assert storage.index_config.ivf_nprobe == 64
  259. if __name__ == "__main__":
  260. pytest.main([__file__, "-v"])