wxcz_admin
/
agency-swarm-cn-git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
							# examples/message_attachments.py
"""
Message Attachments Example

This example demonstrates how to provide file attachments to agents after
the agency has been initialized.

Warning: this feature does not utilize file search tool.
To see an example of how to use file search tool, please refer to the file_search.py example.
"""

import asyncio
import base64
import os
import sys
from pathlib import Path

# Path setup for standalone examples
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))

from openai import AsyncOpenAI

from agency_swarm import Agency, Agent, ModelSettings

client = AsyncOpenAI()


def image_to_base64(image_path: Path) -> str:
    """Convert image file to base64 string for vision processing."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


async def demo_file_processing(agency: Agency) -> list[str]:
    """Demonstrate file processing with uploaded files. Returns uploaded file IDs for later cleanup."""
    print("\nFile ids example")
    print("-" * 25)
    uploaded_file = None
    uploaded_image = None
    try:
        # Upload the PDF
        pdf_path = Path(__file__).parent / "data" / "sample_report.pdf"
        with open(pdf_path, "rb") as f:
            uploaded_file = await client.files.create(file=f, purpose="assistants")

        # Upload the image file
        shapes_path = Path(__file__).parent / "data" / "shapes_and_text.png"
        with open(shapes_path, "rb") as f:
            uploaded_image = await client.files.create(file=f, purpose="assistants")

        print(f"📤 Uploaded: {pdf_path.name} and {shapes_path.name}")

        # Analyze the PDF using agency (file uploads work correctly through agency)
        response = await agency.get_response(
            message=(
                "Please analyze the attached PDF and extract data from it. "
                "Then analyze the attached image and describe the shapes and text in it."
            ),
            file_ids=[uploaded_file.id, uploaded_image.id],
        )

        print(f"Analysis:\n{response.final_output}")

    except Exception as e:
        print(
            f"❌ Error in file handling demo: {e}. "
            "If you haven't modified the demo code, please open an issue on GitHub: https://github.com/VRSEN/agency-swarm/issues"
        )
        return []

    # Return uploaded file IDs for cleanup after all demos finish
    ids: list[str] = []
    if uploaded_file:
        ids.append(uploaded_file.id)
    if uploaded_image:
        ids.append(uploaded_image.id)
    return ids


async def demo_vision_processing(agency: Agency) -> None:
    """Demonstrate vision processing with base64 images."""
    print("\nMessage input example (only for pdf and image attachments)")
    print("-" * 20)

    try:
        # Load and analyze the landscape scene
        scene_path = Path(__file__).parent / "data" / "landscape_scene.png"
        b64_scene = image_to_base64(scene_path)

        print(f"🖼️  Analyzing: {scene_path.name}")

        # Create scene analysis message - exact format from working integration test
        message_with_scene = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "input_image",
                        "detail": "high",
                        "image_url": f"data:image/png;base64,{b64_scene}",
                    }
                ],
            },
            {"role": "user", "content": "Describe this scene. How many trees do you see?"},
        ]

        # Call agent directly for vision
        response = await agency.get_response(message_with_scene)

        print(f"Scene: {response.final_output}")
    except Exception as e:
        print(
            f"❌ Error in vision processing demo: {e}. "
            "If you haven't modified the demo code, please open an issue on GitHub: https://github.com/VRSEN/agency-swarm/issues"
        )


async def main():
    """Run file handling and vision examples."""

    # Create a single agent that can handle both files and vision
    agent = Agent(
        name="FileAndVisionAgent",
        instructions="""You are an expert at analyzing files and images.
        When files or images are provided, examine them carefully and provide detailed analysis.
        Be precise and specific in your responses.
        You are allowed to share all data found within documents with the user.""",
        model_settings=ModelSettings(temperature=0.0),  # Deterministic responses
    )

    # Create agency with the single agent
    agency = Agency(agent, shared_instructions="Demonstrate file and vision processing.")

    print("Agency Swarm File Handling & Vision Demo")
    print("=" * 50)

    # Run demos
    uploaded_ids = await demo_file_processing(agency)
    await demo_vision_processing(agency)

    print("\n✅ Demo complete!")
    print("\nKey Points:")
    print("   • Message attachments can be provided in two ways")
    print("   • No custom tools needed - OpenAI handles everything")

    # Cleanup uploaded files after both demos complete
    for fid in uploaded_ids:
        try:
            await client.files.delete(fid)
        except Exception:
            pass


if __name__ == "__main__":
    asyncio.run(main())