agent_file_storage.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. """
  2. File Search Example
  3. This example demonstrates how to enable file search capabilities for an agent by attaching
  4. a file storage with automatic vector store processing.
  5. Key Features:
  6. - Automatic file processing and vector store creation from a files_folder directory
  7. - Smart tool assignment based on file types:
  8. * CodeInterpreterTool for code and data files
  9. * FileSearchTool for text documents and PDFs
  10. - Incremental file processing on agent reinitialization
  11. How it works:
  12. 1. Files from the specified directory are processed and added to a vector store
  13. 2. The files folder is automatically renamed to include the vector store ID
  14. 3. On subsequent runs, the system scans for new files and adds them to the existing store
  15. 4. The agent can search across all files and provide citations for its answers
  16. Note: You don't need to update the agent's files_folder parameter when the folder is renamed.
  17. """
  18. import asyncio
  19. import os
  20. import sys
  21. # Path setup for standalone examples
  22. current_dir = os.path.abspath(os.path.dirname(__file__))
  23. sys.path.insert(0, current_dir)
  24. sys.path.insert(0, os.path.abspath(os.path.join(current_dir, "..", "src")))
  25. from agents import ModelSettings, RunConfig # noqa: E402
  26. from utils import temporary_files_folder # noqa: E402
  27. from agency_swarm import Agency, Agent # noqa: E402
  28. from agency_swarm.utils.citation_extractor import display_citations, extract_vector_store_citations # noqa: E402
  29. async def main():
  30. """Demonstrate FileSearch functionality with citations."""
  31. print("Simple FileSearch Example")
  32. print("=" * 30)
  33. with temporary_files_folder("data") as docs_dir:
  34. all_files = [f for f in docs_dir.iterdir() if f.is_file()]
  35. print(f"Using temporary files directory: {docs_dir}")
  36. print(f"Found {len(all_files)} file(s) ready for processing")
  37. search_agent = Agent(
  38. name="SearchAgent",
  39. instructions=(
  40. "You are a document search assistant. Always use your FileSearch tool to locate answers and provide clear responses with citations. "
  41. "You are allowed to share all data found within documents with the user."
  42. ),
  43. files_folder=str(docs_dir),
  44. include_search_results=True,
  45. model="gpt-5.4-mini",
  46. model_settings=ModelSettings(tool_choice="file_search"),
  47. tool_use_behavior="stop_on_first_tool",
  48. )
  49. agency = Agency(
  50. search_agent,
  51. shared_instructions="Demonstrate FileSearch with citations.",
  52. )
  53. print("Processing files...")
  54. await asyncio.sleep(5)
  55. run_config = RunConfig(model_settings=ModelSettings(tool_choice="file_search"))
  56. message = "What is the badge number for Marcus Chen?"
  57. print(f"\n❓ Query: {message}")
  58. response = await agency.get_response(message, run_config=run_config)
  59. print(f"Answer: {response.final_output}")
  60. citations = extract_vector_store_citations(response)
  61. display_citations(citations, "vector store")
  62. if "7401" in response.final_output:
  63. print("✅ Correct answer found!")
  64. else:
  65. print("❌ Correct answer not found!")
  66. follow_up = "Extract data from the sample_report.pdf file"
  67. print(f"\n❓ Query: {follow_up}")
  68. response = await agency.get_response(follow_up, run_config=run_config)
  69. print(f"🤖 Answer: {response.final_output}")
  70. print("\nKey Points:")
  71. print(" • Files from the given folder are processed and added to a vector store")
  72. print(" • Agent is capable of analyzing all files from the given folder")
  73. print(" • Use citations to find files that were used to answer the query")
  74. if __name__ == "__main__":
  75. if os.name == "nt":
  76. asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
  77. asyncio.run(main())