message_attachments.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # examples/message_attachments.py
  2. """
  3. Message Attachments Example
  4. This example demonstrates how to provide file attachments to agents after
  5. the agency has been initialized.
  6. Warning: this feature does not utilize file search tool.
  7. To see an example of how to use file search tool, please refer to the file_search.py example.
  8. """
  9. import asyncio
  10. import base64
  11. import os
  12. import sys
  13. from pathlib import Path
  14. # Path setup for standalone examples
  15. sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "src")))
  16. from openai import AsyncOpenAI
  17. from agency_swarm import Agency, Agent, ModelSettings
  18. client = AsyncOpenAI()
  19. def image_to_base64(image_path: Path) -> str:
  20. """Convert image file to base64 string for vision processing."""
  21. with open(image_path, "rb") as image_file:
  22. return base64.b64encode(image_file.read()).decode("utf-8")
  23. async def demo_file_processing(agency: Agency) -> list[str]:
  24. """Demonstrate file processing with uploaded files. Returns uploaded file IDs for later cleanup."""
  25. print("\nFile ids example")
  26. print("-" * 25)
  27. uploaded_file = None
  28. uploaded_image = None
  29. try:
  30. # Upload the PDF
  31. pdf_path = Path(__file__).parent / "data" / "sample_report.pdf"
  32. with open(pdf_path, "rb") as f:
  33. uploaded_file = await client.files.create(file=f, purpose="assistants")
  34. # Upload the image file
  35. shapes_path = Path(__file__).parent / "data" / "shapes_and_text.png"
  36. with open(shapes_path, "rb") as f:
  37. uploaded_image = await client.files.create(file=f, purpose="assistants")
  38. print(f"📤 Uploaded: {pdf_path.name} and {shapes_path.name}")
  39. # Analyze the PDF using agency (file uploads work correctly through agency)
  40. response = await agency.get_response(
  41. message=(
  42. "Please analyze the attached PDF and extract data from it. "
  43. "Then analyze the attached image and describe the shapes and text in it."
  44. ),
  45. file_ids=[uploaded_file.id, uploaded_image.id],
  46. )
  47. print(f"Analysis:\n{response.final_output}")
  48. except Exception as e:
  49. print(
  50. f"❌ Error in file handling demo: {e}. "
  51. "If you haven't modified the demo code, please open an issue on GitHub: https://github.com/VRSEN/agency-swarm/issues"
  52. )
  53. return []
  54. # Return uploaded file IDs for cleanup after all demos finish
  55. ids: list[str] = []
  56. if uploaded_file:
  57. ids.append(uploaded_file.id)
  58. if uploaded_image:
  59. ids.append(uploaded_image.id)
  60. return ids
  61. async def demo_vision_processing(agency: Agency) -> None:
  62. """Demonstrate vision processing with base64 images."""
  63. print("\nMessage input example (only for pdf and image attachments)")
  64. print("-" * 20)
  65. try:
  66. # Load and analyze the landscape scene
  67. scene_path = Path(__file__).parent / "data" / "landscape_scene.png"
  68. b64_scene = image_to_base64(scene_path)
  69. print(f"🖼️ Analyzing: {scene_path.name}")
  70. # Create scene analysis message - exact format from working integration test
  71. message_with_scene = [
  72. {
  73. "role": "user",
  74. "content": [
  75. {
  76. "type": "input_image",
  77. "detail": "high",
  78. "image_url": f"data:image/png;base64,{b64_scene}",
  79. }
  80. ],
  81. },
  82. {"role": "user", "content": "Describe this scene. How many trees do you see?"},
  83. ]
  84. # Call agent directly for vision
  85. response = await agency.get_response(message_with_scene)
  86. print(f"Scene: {response.final_output}")
  87. except Exception as e:
  88. print(
  89. f"❌ Error in vision processing demo: {e}. "
  90. "If you haven't modified the demo code, please open an issue on GitHub: https://github.com/VRSEN/agency-swarm/issues"
  91. )
  92. async def main():
  93. """Run file handling and vision examples."""
  94. # Create a single agent that can handle both files and vision
  95. agent = Agent(
  96. name="FileAndVisionAgent",
  97. instructions="""You are an expert at analyzing files and images.
  98. When files or images are provided, examine them carefully and provide detailed analysis.
  99. Be precise and specific in your responses.
  100. You are allowed to share all data found within documents with the user.""",
  101. model_settings=ModelSettings(temperature=0.0), # Deterministic responses
  102. )
  103. # Create agency with the single agent
  104. agency = Agency(agent, shared_instructions="Demonstrate file and vision processing.")
  105. print("Agency Swarm File Handling & Vision Demo")
  106. print("=" * 50)
  107. # Run demos
  108. uploaded_ids = await demo_file_processing(agency)
  109. await demo_vision_processing(agency)
  110. print("\n✅ Demo complete!")
  111. print("\nKey Points:")
  112. print(" • Message attachments can be provided in two ways")
  113. print(" • No custom tools needed - OpenAI handles everything")
  114. # Cleanup uploaded files after both demos complete
  115. for fid in uploaded_ids:
  116. try:
  117. await client.files.delete(fid)
  118. except Exception:
  119. pass
  120. if __name__ == "__main__":
  121. asyncio.run(main())