test_compact_client_passthrough.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. import pytest
  2. from agency_swarm import Agent
  3. from agency_swarm.ui.demos.launcher import TerminalDemoLauncher
  4. from agency_swarm.utils.thread import ThreadManager
  5. def _seed_messages(agent_name: str) -> list[dict[str, str]]:
  6. return [
  7. {"role": "user", "content": "hello"},
  8. {"role": "assistant", "agent": agent_name, "content": "hi"},
  9. ]
  10. @pytest.fixture(autouse=True)
  11. def _reset_launcher_state():
  12. TerminalDemoLauncher.set_current_chat_id(None)
  13. yield
  14. TerminalDemoLauncher.set_current_chat_id(None)
  15. class _FakeResponses:
  16. def __init__(self, calls_ref: list[dict]):
  17. self._calls = calls_ref
  18. def create(self, *, model: str, input: str, reasoning=None):
  19. self._calls.append({"model": model, "input": input, "reasoning": reasoning})
  20. class _R:
  21. output_text = "summary from fake client"
  22. return _R()
  23. class _FakeClient:
  24. def __init__(self):
  25. self.calls: list[dict] = []
  26. self.responses = _FakeResponses(self.calls)
  27. class _FailingResponses:
  28. def create(self, *_, **__):
  29. raise RuntimeError("network down")
  30. class _FailingClient:
  31. def __init__(self):
  32. self.responses = _FailingResponses()
  33. def _real_agent_with_client(name: str, model: str, client):
  34. a = Agent(name=name, instructions="test")
  35. a.model = model # type: ignore[attr-defined]
  36. a._openai_client_sync = client
  37. return a
  38. class _Agency:
  39. def __init__(self, agent):
  40. self.entry_points = [agent]
  41. self.thread_manager = ThreadManager()
  42. self.thread_manager.replace_messages(_seed_messages(agent.name))
  43. class _SessionAgency:
  44. def __init__(self) -> None:
  45. self.thread_manager = ThreadManager()
  46. @pytest.mark.asyncio
  47. async def test_compact_uses_entry_agent_client_sync_and_model_passthrough():
  48. # Use a non-GPT model to exercise the non-OpenAI reasoning branch
  49. fake_client = _FakeClient()
  50. agent = _real_agent_with_client(name="Coordinator", model="anthropic/claude-3-5-sonnet", client=fake_client)
  51. agency = _Agency(agent)
  52. chat_id = await TerminalDemoLauncher.compact_thread(agency, [])
  53. assert chat_id.startswith("run_demo_chat_")
  54. # Verify the thread was compacted into a single system message
  55. msgs = agency.thread_manager.get_all_messages()
  56. assert len(msgs) == 1
  57. assert msgs[0]["role"] == "system"
  58. assert msgs[0]["content"].startswith("System summary (generated via /compact")
  59. # Verify that the fake client's responses.create was called with the agent's model
  60. assert len(fake_client.calls) >= 1
  61. last = fake_client.calls[-1]
  62. assert last["model"] == "anthropic/claude-3-5-sonnet"
  63. # Non-OpenAI provider branch should not include reasoning param
  64. assert last["reasoning"] is None
  65. # Ensure the conversation payload wrapper is present
  66. assert "<conversation_json>" in last["input"] and "</conversation_json>" in last["input"]
  67. @pytest.mark.asyncio
  68. async def test_compact_omits_reasoning_param_for_openai_model():
  69. """Compact omits reasoning even for OpenAI models (simpler, safe default)."""
  70. fake_client = _FakeClient()
  71. agent = _real_agent_with_client(name="Coordinator", model="gpt-5.4-mini", client=fake_client)
  72. agency = _Agency(agent)
  73. await TerminalDemoLauncher.compact_thread(agency, [])
  74. last = fake_client.calls[-1]
  75. assert last["model"] == "gpt-5.4-mini"
  76. assert last["reasoning"] is None
  77. @pytest.mark.asyncio
  78. async def test_compact_failure_surfaces_error_and_preserves_state(monkeypatch):
  79. failing_agent = _real_agent_with_client(name="Coordinator", model="anthropic/model", client=_FailingClient())
  80. agency = _Agency(failing_agent)
  81. original_messages = agency.thread_manager.get_all_messages()
  82. TerminalDemoLauncher.set_current_chat_id("chat_existing")
  83. with pytest.raises(RuntimeError) as ei:
  84. await TerminalDemoLauncher.compact_thread(agency, [])
  85. # Error is surfaced with context and original cause
  86. assert "/compact failed:" in str(ei.value)
  87. assert "network down" in str(ei.value)
  88. # State is preserved (no chat switch, no message mutation)
  89. assert TerminalDemoLauncher.get_current_chat_id() == "chat_existing"
  90. assert agency.thread_manager.get_all_messages() == original_messages
  91. def test_resume_interactive_list_and_select(tmp_path, monkeypatch):
  92. # Prepare fake chats dir
  93. TerminalDemoLauncher.set_chats_dir(str(tmp_path))
  94. # Build a minimal agency shim compatible with resume/save
  95. class _T:
  96. def __init__(self):
  97. self._msgs = []
  98. def get_all_messages(self):
  99. return list(self._msgs)
  100. def replace_messages(self, msgs):
  101. self._msgs = list(msgs)
  102. def clear(self):
  103. self._msgs.clear()
  104. def add_message(self, m):
  105. self._msgs.append(m)
  106. def add_messages(self, ms):
  107. self._msgs.extend(ms)
  108. class _A:
  109. def __init__(self):
  110. self.thread_manager = ThreadManager()
  111. agency = _A()
  112. # Chat A
  113. agency.thread_manager.clear()
  114. agency.thread_manager.add_message({"role": "user", "content": "hey bro"})
  115. cid_a = "chat_a"
  116. TerminalDemoLauncher.save_current_chat(agency, cid_a)
  117. # Chat B
  118. agency.thread_manager.clear()
  119. agency.thread_manager.add_message({"role": "user", "content": "poem request"})
  120. cid_b = "chat_b"
  121. TerminalDemoLauncher.save_current_chat(agency, cid_b)
  122. # Intercept input to choose the second entry (B)
  123. inputs = iter(["2"]) # select index 2
  124. def fake_input(prompt: str = "") -> str:
  125. try:
  126. return next(inputs)
  127. except StopIteration:
  128. return ""
  129. printed: list[str] = []
  130. def fake_print(*args, **kwargs):
  131. line = " ".join(str(a) for a in args)
  132. printed.append(line)
  133. # Avoid radiolist UI by simulating a running loop so fallback path is taken
  134. import asyncio
  135. monkeypatch.setattr(asyncio, "get_running_loop", lambda: object())
  136. chosen = TerminalDemoLauncher.resume_interactive(agency, input_func=fake_input, print_func=fake_print)
  137. assert chosen in {cid_a, cid_b}
  138. # After resume, agency should have loaded selected chat (either A or B)
  139. msgs = agency.thread_manager.get_all_messages()
  140. assert isinstance(msgs, list) and len(msgs) >= 1
  141. # Printed list should include header and at least two rows
  142. assert any("Modified" in ln and "Created" in ln for ln in printed)
  143. assert sum(1 for ln in printed if ln.strip().startswith("1.")) >= 1
  144. assert sum(1 for ln in printed if ln.strip().startswith("2.")) >= 1
  145. # Index file should exist and include both chats with summaries
  146. import json
  147. import os
  148. index_path = TerminalDemoLauncher._index_file_path()
  149. assert os.path.exists(index_path)
  150. with open(index_path) as f:
  151. idx = json.load(f)
  152. assert "chat_a" in idx and "chat_b" in idx
  153. assert idx["chat_a"].get("summary") == "hey bro"
  154. def test_start_new_chat_switches_context_without_touching_saved_history(tmp_path):
  155. TerminalDemoLauncher.set_chats_dir(str(tmp_path))
  156. agency = _SessionAgency()
  157. agency.thread_manager.add_message({"role": "user", "content": "hello"})
  158. agency.thread_manager.add_message({"role": "assistant", "content": "hi"})
  159. original_chat_id = "chat_original"
  160. TerminalDemoLauncher.save_current_chat(agency, original_chat_id)
  161. existing_files = {path.name for path in tmp_path.iterdir()}
  162. next_chat_id = TerminalDemoLauncher.start_new_chat(agency)
  163. assert next_chat_id != original_chat_id
  164. assert TerminalDemoLauncher.get_current_chat_id() == next_chat_id
  165. assert agency.thread_manager.get_all_messages() == []
  166. assert {path.name for path in tmp_path.iterdir()} == existing_files
  167. def test_load_chat_sets_current_id_without_creating_new_files(tmp_path):
  168. TerminalDemoLauncher.set_chats_dir(str(tmp_path))
  169. seed_agency = _SessionAgency()
  170. seed_agency.thread_manager.add_message({"role": "user", "content": "hello"})
  171. seed_agency.thread_manager.add_message({"role": "assistant", "content": "hi"})
  172. chat_id = "chat_existing"
  173. TerminalDemoLauncher.save_current_chat(seed_agency, chat_id)
  174. existing_files = {path.name for path in tmp_path.iterdir()}
  175. resumed = _SessionAgency()
  176. assert TerminalDemoLauncher.load_chat(resumed, chat_id)
  177. assert [m["content"] for m in resumed.thread_manager.get_all_messages()] == ["hello", "hi"]
  178. assert TerminalDemoLauncher.get_current_chat_id() == chat_id
  179. assert {path.name for path in tmp_path.iterdir()} == existing_files