test_llm_binding_image_payload.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. """Offline tests for image_inputs payload shape per LLM binding.
  2. These tests stub the underlying network clients with ``unittest.mock`` so they
  3. exercise only the message-construction layer that this repository owns.
  4. """
  5. from __future__ import annotations
  6. import base64
  7. from typing import Any
  8. from unittest.mock import AsyncMock, MagicMock, patch
  9. import pytest
  10. pytestmark = pytest.mark.offline
  11. PNG_BYTES = (
  12. b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
  13. b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8"
  14. b"\xcf\xc0\x00\x00\x00\x03\x00\x01\x5c\xcc\xd9\x9e\x00\x00\x00\x00"
  15. b"IEND\xaeB`\x82"
  16. )
  17. PNG_B64 = base64.b64encode(PNG_BYTES).decode("ascii")
  18. @pytest.mark.asyncio
  19. async def test_openai_binding_inserts_image_url_content_block():
  20. from lightrag.llm import openai as openai_mod
  21. fake_choice = MagicMock()
  22. fake_choice.message.content = "ok"
  23. fake_choice.message.reasoning_content = None
  24. fake_choice.finish_reason = "stop"
  25. fake_response = MagicMock()
  26. fake_response.choices = [fake_choice]
  27. fake_response.usage = None
  28. fake_client = MagicMock()
  29. fake_client.chat.completions.create = AsyncMock(return_value=fake_response)
  30. fake_client.close = AsyncMock()
  31. with patch.object(
  32. openai_mod, "create_openai_async_client", return_value=fake_client
  33. ):
  34. await openai_mod.openai_complete_if_cache(
  35. model="gpt-4o-mini",
  36. prompt="describe",
  37. api_key="dummy",
  38. image_inputs=[PNG_B64],
  39. )
  40. _, kwargs = fake_client.chat.completions.create.call_args
  41. messages = kwargs["messages"]
  42. assert messages[-1]["role"] == "user"
  43. user_content = messages[-1]["content"]
  44. assert isinstance(user_content, list)
  45. assert user_content[0] == {"type": "text", "text": "describe"}
  46. assert user_content[1]["type"] == "image_url"
  47. assert user_content[1]["image_url"]["url"].startswith("data:image/png;base64,")
  48. @pytest.mark.asyncio
  49. async def test_openai_binding_text_only_remains_plain_string():
  50. from lightrag.llm import openai as openai_mod
  51. fake_choice = MagicMock()
  52. fake_choice.message.content = "ok"
  53. fake_choice.message.reasoning_content = None
  54. fake_choice.finish_reason = "stop"
  55. fake_response = MagicMock()
  56. fake_response.choices = [fake_choice]
  57. fake_response.usage = None
  58. fake_client = MagicMock()
  59. fake_client.chat.completions.create = AsyncMock(return_value=fake_response)
  60. fake_client.close = AsyncMock()
  61. with patch.object(
  62. openai_mod, "create_openai_async_client", return_value=fake_client
  63. ):
  64. await openai_mod.openai_complete_if_cache(
  65. model="gpt-4o-mini",
  66. prompt="describe",
  67. api_key="dummy",
  68. )
  69. _, kwargs = fake_client.chat.completions.create.call_args
  70. assert kwargs["messages"][-1]["content"] == "describe"
  71. @pytest.mark.asyncio
  72. async def test_ollama_binding_attaches_images_to_user_message():
  73. from lightrag.llm import ollama as ollama_mod
  74. fake_client = MagicMock()
  75. fake_client.chat = AsyncMock(return_value={"message": {"content": "ok"}})
  76. fake_client._client = MagicMock()
  77. fake_client._client.aclose = AsyncMock()
  78. with patch.object(ollama_mod.ollama, "AsyncClient", return_value=fake_client):
  79. await ollama_mod._ollama_model_if_cache(
  80. model="llava",
  81. prompt="describe",
  82. image_inputs=[PNG_B64],
  83. )
  84. _, kwargs = fake_client.chat.call_args
  85. user_msg = kwargs["messages"][-1]
  86. assert user_msg["role"] == "user"
  87. assert user_msg["content"] == "describe"
  88. assert user_msg["images"] == [PNG_B64]
  89. @pytest.mark.asyncio
  90. async def test_anthropic_binding_inserts_image_content_block():
  91. from lightrag.llm import anthropic as anthropic_mod
  92. captured: dict[str, Any] = {}
  93. class FakeMessages:
  94. async def create(self, **kwargs):
  95. captured.update(kwargs)
  96. return MagicMock(content=[MagicMock(text="")])
  97. fake_client = MagicMock()
  98. fake_client.messages = FakeMessages()
  99. with patch.object(anthropic_mod, "AsyncAnthropic", return_value=fake_client):
  100. await anthropic_mod.anthropic_complete_if_cache(
  101. model="claude-3-opus",
  102. prompt="describe",
  103. api_key="dummy",
  104. image_inputs=[PNG_B64],
  105. )
  106. user_content = captured["messages"][-1]["content"]
  107. assert isinstance(user_content, list)
  108. image_blocks = [b for b in user_content if b.get("type") == "image"]
  109. assert len(image_blocks) == 1
  110. assert image_blocks[0]["source"] == {
  111. "type": "base64",
  112. "media_type": "image/png",
  113. "data": PNG_B64,
  114. }
  115. assert user_content[-1] == {"type": "text", "text": "describe"}
  116. @pytest.mark.asyncio
  117. async def test_lollms_binding_rejects_image_inputs():
  118. from lightrag.llm import lollms as lollms_mod
  119. with pytest.raises(NotImplementedError):
  120. await lollms_mod.lollms_model_if_cache(
  121. model="unused",
  122. prompt="hi",
  123. image_inputs=[PNG_B64],
  124. )
  125. @pytest.mark.asyncio
  126. async def test_bedrock_binding_forces_non_stream_when_image_present():
  127. from lightrag.llm import bedrock as bedrock_mod
  128. captured: dict[str, Any] = {}
  129. class FakeBedrockClient:
  130. async def __aenter__(self):
  131. return self
  132. async def __aexit__(self, exc_type, exc, tb):
  133. return False
  134. async def converse(self, **kwargs):
  135. captured["mode"] = "converse"
  136. captured["args"] = kwargs
  137. return {
  138. "output": {"message": {"content": [{"text": "ok"}]}},
  139. "stopReason": "end_turn",
  140. }
  141. async def converse_stream(self, **kwargs):
  142. captured["mode"] = "converse_stream"
  143. captured["args"] = kwargs
  144. return {"stream": []}
  145. class FakeSession:
  146. def client(self, *_, **__):
  147. return FakeBedrockClient()
  148. with patch.object(bedrock_mod.aioboto3, "Session", return_value=FakeSession()):
  149. await bedrock_mod.bedrock_complete_if_cache(
  150. "anthropic.claude-3-haiku-20240307-v1:0",
  151. "describe",
  152. stream=True,
  153. image_inputs=[PNG_B64],
  154. aws_region="us-east-1",
  155. )
  156. assert captured["mode"] == "converse"
  157. user_msg = captured["args"]["messages"][-1]
  158. image_blocks = [block for block in user_msg["content"] if "image" in block]
  159. assert len(image_blocks) == 1
  160. assert image_blocks[0]["image"]["format"] == "png"
  161. assert image_blocks[0]["image"]["source"]["bytes"] == PNG_BYTES