test_multimodal_outputs.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """Integration tests for multimodal attachments."""
  2. from pathlib import Path
  3. import pytest
  4. from pydantic import Field
  5. from agency_swarm import Agency, Agent, BaseTool, ToolOutputFileContent, ToolOutputImage
  6. from agency_swarm.tools.utils import (
  7. tool_output_file_from_path,
  8. tool_output_file_from_url,
  9. tool_output_image_from_path,
  10. )
  11. FILES_DIR = Path(__file__).resolve().parents[2] / "data" / "files"
  12. class LoadShowcaseImage(BaseTool):
  13. """Provide the sample PNG so you can identify the function it illustrates."""
  14. path: Path = Field(default=FILES_DIR / "test-image.png", description="Image path")
  15. detail: str = Field(default="auto", description="Vision detail level")
  16. def run(self) -> ToolOutputImage:
  17. return tool_output_image_from_path(self.path, detail=self.detail)
  18. class LoadReferenceReportFromUrl(BaseTool):
  19. """Fetch the reference PDF hosted on main for summarisation."""
  20. source_url: str = Field(
  21. default="https://raw.githubusercontent.com/VRSEN/agency-swarm/main/tests/data/files/test-pdf.pdf",
  22. description="Remote PDF URL",
  23. )
  24. def run(self) -> ToolOutputFileContent:
  25. return tool_output_file_from_url(self.source_url)
  26. class LoadReferenceReportFromPath(BaseTool):
  27. """Load the local reference PDF so you can summarise it."""
  28. path: Path = Field(default=FILES_DIR / "test-pdf.pdf", description="Local PDF path")
  29. def run(self) -> ToolOutputFileContent:
  30. return tool_output_file_from_path(self.path)
  31. def _build_agency(*tool_types: type[BaseTool]) -> Agency:
  32. agent = Agent(
  33. name="GalleryAgent",
  34. description="Provides gallery outputs with narrative context.",
  35. instructions=(
  36. "Use each tool's description to decide which attachment to load for analysis. "
  37. "When a matching attachment tool exists, call it immediately instead of asking for confirmation."
  38. ),
  39. tools=list(tool_types),
  40. model="gpt-5.4-mini",
  41. )
  42. return Agency(agent)
  43. @pytest.mark.asyncio
  44. async def test_multimodal_outputs_image_description() -> None:
  45. agency = _build_agency(LoadShowcaseImage)
  46. result = await agency.get_response("Describe the provided diagram image and name the function shown.")
  47. assert isinstance(result.final_output, str)
  48. output = result.final_output.lower()
  49. assert "sum_of_squares" in output or "sum of squares" in output
  50. @pytest.mark.asyncio
  51. async def test_multimodal_outputs_remote_pdf() -> None:
  52. agency = _build_agency(LoadReferenceReportFromUrl)
  53. result = await agency.get_response(
  54. "Use the PDF URL tool now, then summarise the attached PDF and quote its secret phrase."
  55. )
  56. assert isinstance(result.final_output, str)
  57. output = result.final_output.lower()
  58. assert "first pdf secret phrase" in output
  59. assert "pdf" in output or "report" in output
  60. @pytest.mark.asyncio
  61. async def test_multimodal_outputs_local_pdf() -> None:
  62. agency = _build_agency(LoadReferenceReportFromPath)
  63. result = await agency.get_response(
  64. "Use the local PDF tool now, then summarise the attached PDF and quote its secret phrase."
  65. )
  66. assert isinstance(result.final_output, str)
  67. output = result.final_output.lower()
  68. assert "first pdf secret phrase" in output
  69. assert "pdf" in output or "report" in output