wxcz_admin
/
agency-swarm-cn-git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
							"""
Example demonstrating multimodal tool outputs (image + file) using Agency Swarm.
Flow: tools return images or files -> agent reads them -> responds with a description.

Two BaseTool classes are defined:
1. ``LoadShowcaseImage`` serves a local image via ``tool_output_image_from_path``.
2. ``LoadReferenceReport`` returns a remotely hosted PDF via ``tool_output_file_from_url``.

Run with a valid OpenAI API key configured in your environment.
"""

import asyncio
import os
import sys
from pathlib import Path

# Allow running the example directly from the repository.
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))

from pydantic import Field

from agency_swarm import Agency, Agent, BaseTool, ToolOutputFileContent, ToolOutputImage
from agency_swarm.tools.utils import tool_output_file_from_url, tool_output_image_from_path

DATA_DIR = Path(__file__).resolve().parent / "data"
REFERENCE_PDF_URL = "https://raw.githubusercontent.com/VRSEN/agency-swarm/main/examples/data/daily_revenue_report.pdf"


class LoadShowcaseImage(BaseTool):
    """Return the latest gallery image as a multimodal output."""

    path: Path = Field(default=DATA_DIR / "daily_revenue.png", description="Image to publish")
    detail: str = Field(default="auto", description="Vision model detail level")

    def run(self) -> ToolOutputImage:
        return tool_output_image_from_path(self.path, detail=self.detail)


class LoadReferenceReport(BaseTool):
    """Return the reference PDF hosted remotely."""

    source_url: str = Field(default=REFERENCE_PDF_URL, description="Remote PDF to attach")

    def run(self) -> ToolOutputFileContent:
        return tool_output_file_from_url(self.source_url)


def create_multimodal_agency() -> Agency:
    gallery_agent = Agent(
        name="GalleryAgent",
        description="Provides gallery outputs with narrative context.",
        instructions="Call LoadShowcaseImage when asked for the latest gallery image. "
        "Use LoadReferenceReport when a supporting document is requested.",
        tools=[LoadShowcaseImage, LoadReferenceReport],
        model="gpt-5.4-mini",
    )
    return Agency(gallery_agent)


async def main() -> None:
    agency = create_multimodal_agency()
    response = await agency.get_response("Analyze the daily revenue graph, and summarize the supporting report.")

    print("Final response:")
    print(response.final_output)


if __name__ == "__main__":
    asyncio.run(main())