| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- """
- Tests for bridging vector_db_storage_cls_kwargs to MilvusIndexConfig
- This test suite validates that MilvusIndexConfig parameters can be passed
- through vector_db_storage_cls_kwargs and that backward compatibility is maintained.
- """
- import pytest
- from unittest.mock import patch, MagicMock
- from lightrag.kg.milvus_impl import MilvusVectorDBStorage
- @pytest.mark.offline
- class TestMilvusKwargsParameterBridge:
- """Test parameter bridging from vector_db_storage_cls_kwargs to MilvusIndexConfig"""
- def test_kwargs_to_index_config_basic(self):
- """Test that basic HNSW parameters are passed from kwargs to MilvusIndexConfig"""
- # Mock the embedding function
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- # Create storage instance with custom index config parameters in kwargs
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- "hnsw_m": 32,
- "hnsw_ef": 256,
- "hnsw_ef_construction": 300,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that parameters were passed to index_config
- assert storage.index_config.hnsw_m == 32
- assert storage.index_config.hnsw_ef == 256
- assert storage.index_config.hnsw_ef_construction == 300
- def test_kwargs_to_index_config_index_and_metric_types(self):
- """Test that index_type and metric_type are passed from kwargs"""
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- "index_type": "IVF_FLAT",
- "metric_type": "L2",
- "ivf_nlist": 2048,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that parameters were passed to index_config
- assert storage.index_config.index_type == "IVF_FLAT"
- assert storage.index_config.metric_type == "L2"
- assert storage.index_config.ivf_nlist == 2048
- def test_kwargs_to_index_config_sq_parameters(self):
- """Test that HNSW_SQ parameters are passed from kwargs"""
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- "index_type": "HNSW_SQ",
- "sq_type": "SQ8",
- "sq_refine": True,
- "sq_refine_type": "FP16",
- "sq_refine_k": 20,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that parameters were passed to index_config
- assert storage.index_config.index_type == "HNSW_SQ"
- assert storage.index_config.sq_type == "SQ8"
- assert storage.index_config.sq_refine is True
- assert storage.index_config.sq_refine_type == "FP16"
- assert storage.index_config.sq_refine_k == 20
- def test_backward_compatibility_no_index_params(self):
- """Test backward compatibility when no index parameters are provided in kwargs"""
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- # Create storage without any index config parameters in kwargs
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that default values are used (from environment variables or defaults)
- # Defaults aligned with Milvus 2.4+ official documentation
- assert storage.index_config.index_type == "AUTOINDEX" # Default
- assert storage.index_config.metric_type == "COSINE" # Default
- assert storage.index_config.hnsw_m == 16 # Default (Milvus 2.4+)
- assert storage.index_config.hnsw_ef_construction == 360 # Default (Milvus 2.4+)
- def test_kwargs_params_override_environment_variables(self):
- """Test that kwargs parameters take precedence over environment variables"""
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- # Set environment variables
- with patch.dict(
- "os.environ",
- {
- "MILVUS_INDEX_TYPE": "IVF_FLAT",
- "MILVUS_HNSW_M": "16",
- },
- ):
- # Create storage with kwargs parameters that should override env vars
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- "index_type": "HNSW",
- "hnsw_m": 64,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that kwargs parameters override environment variables
- assert (
- storage.index_config.index_type == "HNSW"
- ) # From kwargs, not IVF_FLAT
- assert storage.index_config.hnsw_m == 64 # From kwargs, not 16
- def test_non_index_params_ignored(self):
- """Test that non-index-config parameters in kwargs are ignored"""
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- "hnsw_m": 32,
- "some_other_param": "ignored", # Should be ignored
- "another_param": 123, # Should be ignored
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify that valid parameter was passed
- assert storage.index_config.hnsw_m == 32
- # Verify that invalid parameters were ignored (no AttributeError)
- assert not hasattr(storage.index_config, "some_other_param")
- assert not hasattr(storage.index_config, "another_param")
- def test_raganything_framework_integration_scenario(self):
- """Test configuration passing through frameworks like RAGAnything
- This test validates the use case where a framework (like RAGAnything)
- sits on top of LightRAG and needs to pass Milvus index configuration
- through to LightRAG without modifying environment variables.
- The framework can pass all index config parameters via
- vector_db_storage_cls_kwargs, and they will be properly extracted
- and applied to MilvusIndexConfig.
- """
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- # Simulate RAGAnything framework passing configuration to LightRAG
- # All index configuration parameters are passed through kwargs
- framework_config = {
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- # Required for vector storage
- "cosine_better_than_threshold": 0.2,
- # Milvus index configuration - all parameters supported
- "index_type": "HNSW",
- "metric_type": "L2",
- "hnsw_m": 48,
- "hnsw_ef_construction": 400,
- "hnsw_ef": 200,
- # Framework-specific parameters (should be ignored by Milvus)
- "framework_version": "1.0.0",
- "custom_setting": "value",
- },
- }
- # Create storage instance with framework configuration
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="raganything_workspace",
- global_config=framework_config,
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify all Milvus parameters were correctly extracted and applied
- assert storage.index_config.index_type == "HNSW"
- assert storage.index_config.metric_type == "L2"
- assert storage.index_config.hnsw_m == 48
- assert storage.index_config.hnsw_ef_construction == 400
- assert storage.index_config.hnsw_ef == 200
- # Verify framework-specific parameters were ignored
- assert not hasattr(storage.index_config, "framework_version")
- assert not hasattr(storage.index_config, "custom_setting")
- # Verify workspace isolation is maintained
- assert storage.workspace == "raganything_workspace"
- def test_all_milvus_parameters_supported_via_kwargs(self):
- """Test that all 11 MilvusIndexConfig parameters can be configured via kwargs
- This comprehensive test ensures that every single index configuration
- parameter defined in MilvusIndexConfig can be passed through
- vector_db_storage_cls_kwargs, which is critical for framework integration.
- """
- mock_embedding_func = MagicMock()
- mock_embedding_func.embedding_dim = 128
- # Pass ALL 11 MilvusIndexConfig parameters via kwargs
- storage = MilvusVectorDBStorage(
- namespace="test_entities",
- workspace="test_workspace",
- global_config={
- "embedding_batch_num": 100,
- "vector_db_storage_cls_kwargs": {
- "cosine_better_than_threshold": 0.3,
- # All 11 MilvusIndexConfig parameters
- "index_type": "HNSW_SQ",
- "metric_type": "IP",
- "hnsw_m": 64,
- "hnsw_ef_construction": 512,
- "hnsw_ef": 256,
- "sq_type": "SQ8",
- "sq_refine": True,
- "sq_refine_type": "FP16",
- "sq_refine_k": 30,
- "ivf_nlist": 4096,
- "ivf_nprobe": 64,
- },
- },
- embedding_func=mock_embedding_func,
- meta_fields=set(),
- )
- # Verify EVERY parameter was correctly applied
- assert storage.index_config.index_type == "HNSW_SQ"
- assert storage.index_config.metric_type == "IP"
- assert storage.index_config.hnsw_m == 64
- assert storage.index_config.hnsw_ef_construction == 512
- assert storage.index_config.hnsw_ef == 256
- assert storage.index_config.sq_type == "SQ8"
- assert storage.index_config.sq_refine is True
- assert storage.index_config.sq_refine_type == "FP16"
- assert storage.index_config.sq_refine_k == 30
- assert storage.index_config.ivf_nlist == 4096
- assert storage.index_config.ivf_nprobe == 64
- if __name__ == "__main__":
- pytest.main([__file__, "-v"])
|