test_hosted_tool_results.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import pytest
  2. from agents.items import MessageOutputItem, ToolCallItem
  3. from openai.types.responses.response_file_search_tool_call import (
  4. ResponseFileSearchToolCall,
  5. Result as FileSearchResult,
  6. )
  7. from openai.types.responses.response_function_web_search import ActionSearch, ResponseFunctionWebSearch
  8. from openai.types.responses.response_output_message import ResponseOutputMessage, ResponseOutputText
  9. from agency_swarm.agent.core import Agent
  10. from agency_swarm.messages import MessageFormatter
  11. @pytest.mark.asyncio
  12. async def test_web_search_results_have_metadata():
  13. """Verify web search results are returned as user messages with metadata."""
  14. agent = Agent(name="MetaAgent", instructions="Test")
  15. web_call = ResponseFunctionWebSearch(
  16. id="1",
  17. action=ActionSearch(query="hello", type="search"),
  18. status="completed",
  19. type="web_search_call",
  20. )
  21. assistant_msg = ResponseOutputMessage(
  22. id="m1",
  23. content=[ResponseOutputText(annotations=[], text="result", type="output_text")],
  24. role="assistant",
  25. status="completed",
  26. type="message",
  27. )
  28. run_items = [
  29. ToolCallItem(agent, web_call),
  30. MessageOutputItem(agent, assistant_msg),
  31. ]
  32. results = MessageFormatter.extract_hosted_tool_results(
  33. agent,
  34. run_items,
  35. caller_agent="Researcher",
  36. )
  37. assert results, "Expected hosted tool result"
  38. result = results[0]
  39. assert result.get("agent") == agent.name
  40. assert result.get("callerAgent") == "Researcher"
  41. assert "WEB_SEARCH_RESULTS" in result.get("content", "")
  42. def test_extract_no_results_returns_empty():
  43. """Ensure empty list is returned when no hosted tool calls present."""
  44. agent = Agent(name="EmptyAgent", instructions="Test")
  45. results = MessageFormatter.extract_hosted_tool_results(
  46. agent,
  47. [],
  48. caller_agent="AnyAgent",
  49. )
  50. assert results == []
  51. def test_web_search_results_deduplicated():
  52. """Only one synthetic result should be created for multiple assistant messages."""
  53. agent = Agent(name="MetaAgent", instructions="Test")
  54. web_call = ResponseFunctionWebSearch(
  55. id="1",
  56. action=ActionSearch(query="hello", type="search"),
  57. status="completed",
  58. type="web_search_call",
  59. )
  60. assistant_msgs = [
  61. ResponseOutputMessage(
  62. id="m1",
  63. content=[ResponseOutputText(annotations=[], text="result1", type="output_text")],
  64. role="assistant",
  65. status="completed",
  66. type="message",
  67. ),
  68. ResponseOutputMessage(
  69. id="m2",
  70. content=[ResponseOutputText(annotations=[], text="result2", type="output_text")],
  71. role="assistant",
  72. status="completed",
  73. type="message",
  74. ),
  75. ]
  76. run_items = [ToolCallItem(agent, web_call)] + [MessageOutputItem(agent, m) for m in assistant_msgs]
  77. results = MessageFormatter.extract_hosted_tool_results(agent, run_items) # type: ignore[arg-type]
  78. assert len(results) == 1
  79. assert "result1" in results[0]["content"]
  80. assert "result2" not in results[0]["content"]
  81. def test_multiple_web_searches_get_distinct_results():
  82. """Each web search should get its own corresponding assistant message content."""
  83. agent = Agent(name="SearchAgent", instructions="Test")
  84. # First web search and its result
  85. web_call1 = ResponseFunctionWebSearch(
  86. id="search_1",
  87. action=ActionSearch(query="python", type="search"),
  88. status="completed",
  89. type="web_search_call",
  90. )
  91. assistant_msg1 = ResponseOutputMessage(
  92. id="msg_1",
  93. content=[ResponseOutputText(annotations=[], text="Python results", type="output_text")],
  94. role="assistant",
  95. status="completed",
  96. type="message",
  97. )
  98. # Second web search and its result
  99. web_call2 = ResponseFunctionWebSearch(
  100. id="search_2",
  101. action=ActionSearch(query="javascript", type="search"),
  102. status="completed",
  103. type="web_search_call",
  104. )
  105. assistant_msg2 = ResponseOutputMessage(
  106. id="msg_2",
  107. content=[ResponseOutputText(annotations=[], text="JavaScript results", type="output_text")],
  108. role="assistant",
  109. status="completed",
  110. type="message",
  111. )
  112. # Build run items in order: search1, msg1, search2, msg2
  113. run_items = [
  114. ToolCallItem(agent, web_call1),
  115. MessageOutputItem(agent, assistant_msg1),
  116. ToolCallItem(agent, web_call2),
  117. MessageOutputItem(agent, assistant_msg2),
  118. ]
  119. results = MessageFormatter.extract_hosted_tool_results(agent, run_items)
  120. # Should create two synthetic results
  121. assert len(results) == 2, "Expected two results for two web searches"
  122. # First result should have Python content
  123. assert "search_1" in results[0]["content"]
  124. assert "Python results" in results[0]["content"]
  125. assert "JavaScript results" not in results[0]["content"]
  126. # Second result should have JavaScript content
  127. assert "search_2" in results[1]["content"]
  128. assert "JavaScript results" in results[1]["content"]
  129. assert "Python results" not in results[1]["content"]
  130. def test_file_search_results_only_persist_for_executing_agent():
  131. """Ensure hosted tool preservation is only emitted by the agent that ran the tool."""
  132. ceo = Agent(name="CEO", instructions="Test")
  133. worker = Agent(name="Worker", instructions="Test")
  134. tool_call = ResponseFileSearchToolCall(
  135. id="fs_unique",
  136. queries=["favorite books"],
  137. status="completed",
  138. type="file_search_call",
  139. results=[
  140. FileSearchResult(
  141. file_id="file-1",
  142. filename="favorite_books.txt",
  143. score=0.9,
  144. text="Books list",
  145. )
  146. ],
  147. )
  148. hosted_run_items = [ToolCallItem(ceo, tool_call)]
  149. ceo_results = MessageFormatter.extract_hosted_tool_results(
  150. ceo,
  151. hosted_run_items,
  152. caller_agent="Worker",
  153. )
  154. assert ceo_results, "Executing agent should persist hosted tool results"
  155. worker_results = MessageFormatter.extract_hosted_tool_results(
  156. worker,
  157. hosted_run_items,
  158. caller_agent="Worker",
  159. )
  160. assert worker_results == [], "Non-executing agent must not duplicate hosted tool preservation"
  161. def test_web_search_results_only_persist_for_executing_agent():
  162. """Ensure web search preservation is written only by the executing agent."""
  163. ceo = Agent(name="CEO", instructions="Test")
  164. worker = Agent(name="Worker", instructions="Test")
  165. web_call = ResponseFunctionWebSearch(
  166. id="web_unique",
  167. action=ActionSearch(query="web search", type="search"),
  168. status="completed",
  169. type="web_search_call",
  170. )
  171. assistant_msg = ResponseOutputMessage(
  172. id="web_msg",
  173. content=[ResponseOutputText(annotations=[], text="Search content", type="output_text")],
  174. role="assistant",
  175. status="completed",
  176. type="message",
  177. )
  178. run_items = [
  179. ToolCallItem(ceo, web_call),
  180. MessageOutputItem(ceo, assistant_msg),
  181. ]
  182. ceo_results = MessageFormatter.extract_hosted_tool_results(
  183. ceo,
  184. run_items,
  185. caller_agent="Worker",
  186. )
  187. assert ceo_results, "Executing agent should persist web search results"
  188. assert "WEB_SEARCH_RESULTS" in ceo_results[0]["content"]
  189. worker_results = MessageFormatter.extract_hosted_tool_results(
  190. worker,
  191. run_items,
  192. caller_agent="Worker",
  193. )
  194. assert worker_results == [], "Non-executing agent must not duplicate web search preservation"