| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- """Integration tests for multimodal attachments."""
- from pathlib import Path
- import pytest
- from pydantic import Field
- from agency_swarm import Agency, Agent, BaseTool, ToolOutputFileContent, ToolOutputImage
- from agency_swarm.tools.utils import (
- tool_output_file_from_path,
- tool_output_file_from_url,
- tool_output_image_from_path,
- )
- FILES_DIR = Path(__file__).resolve().parents[2] / "data" / "files"
- class LoadShowcaseImage(BaseTool):
- """Provide the sample PNG so you can identify the function it illustrates."""
- path: Path = Field(default=FILES_DIR / "test-image.png", description="Image path")
- detail: str = Field(default="auto", description="Vision detail level")
- def run(self) -> ToolOutputImage:
- return tool_output_image_from_path(self.path, detail=self.detail)
- class LoadReferenceReportFromUrl(BaseTool):
- """Fetch the reference PDF hosted on main for summarisation."""
- source_url: str = Field(
- default="https://raw.githubusercontent.com/VRSEN/agency-swarm/main/tests/data/files/test-pdf.pdf",
- description="Remote PDF URL",
- )
- def run(self) -> ToolOutputFileContent:
- return tool_output_file_from_url(self.source_url)
- class LoadReferenceReportFromPath(BaseTool):
- """Load the local reference PDF so you can summarise it."""
- path: Path = Field(default=FILES_DIR / "test-pdf.pdf", description="Local PDF path")
- def run(self) -> ToolOutputFileContent:
- return tool_output_file_from_path(self.path)
- def _build_agency(*tool_types: type[BaseTool]) -> Agency:
- agent = Agent(
- name="GalleryAgent",
- description="Provides gallery outputs with narrative context.",
- instructions=(
- "Use each tool's description to decide which attachment to load for analysis. "
- "When a matching attachment tool exists, call it immediately instead of asking for confirmation."
- ),
- tools=list(tool_types),
- model="gpt-5.4-mini",
- )
- return Agency(agent)
- @pytest.mark.asyncio
- async def test_multimodal_outputs_image_description() -> None:
- agency = _build_agency(LoadShowcaseImage)
- result = await agency.get_response("Describe the provided diagram image and name the function shown.")
- assert isinstance(result.final_output, str)
- output = result.final_output.lower()
- assert "sum_of_squares" in output or "sum of squares" in output
- @pytest.mark.asyncio
- async def test_multimodal_outputs_remote_pdf() -> None:
- agency = _build_agency(LoadReferenceReportFromUrl)
- result = await agency.get_response(
- "Use the PDF URL tool now, then summarise the attached PDF and quote its secret phrase."
- )
- assert isinstance(result.final_output, str)
- output = result.final_output.lower()
- assert "first pdf secret phrase" in output
- assert "pdf" in output or "report" in output
- @pytest.mark.asyncio
- async def test_multimodal_outputs_local_pdf() -> None:
- agency = _build_agency(LoadReferenceReportFromPath)
- result = await agency.get_response(
- "Use the local PDF tool now, then summarise the attached PDF and quote its secret phrase."
- )
- assert isinstance(result.final_output, str)
- output = result.final_output.lower()
- assert "first pdf secret phrase" in output
- assert "pdf" in output or "report" in output
|