test_agent_file_manager.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. """Tests for agency_swarm.agent.file_manager module."""
  2. import logging
  3. import os
  4. import tempfile
  5. from pathlib import Path
  6. from typing import Literal
  7. from unittest.mock import Mock, call, patch
  8. import pytest
  9. from agents import CodeInterpreterTool, FileSearchTool
  10. from agents.exceptions import AgentsException
  11. from openai import NotFoundError
  12. from openai._types import omit
  13. from openai.pagination import SyncCursorPage
  14. from openai.types.vector_stores.vector_store_file import LastError, VectorStoreFile
  15. from agency_swarm.agent.file_manager import AgentFileManager
  16. from agency_swarm.agent.file_sync import FileSync
  17. def make_vector_store_file(
  18. *,
  19. file_id: str,
  20. vector_store_id: str,
  21. status: Literal["in_progress", "completed", "cancelled", "failed"] = "completed",
  22. last_error: LastError | None = None,
  23. ) -> VectorStoreFile:
  24. return VectorStoreFile.model_construct(
  25. id=file_id,
  26. created_at=0,
  27. object="vector_store.file",
  28. status=status,
  29. usage_bytes=0,
  30. vector_store_id=vector_store_id,
  31. last_error=last_error,
  32. attributes=None,
  33. chunking_strategy=None,
  34. )
  35. class TestAgentFileManager:
  36. """Test AgentFileManager class functionality."""
  37. def test_should_ignore_file(self):
  38. """Test _should_ignore_file method ignores files starting with '.' or '__'."""
  39. mock_agent = Mock()
  40. mock_agent.name = "TestAgent"
  41. file_manager = AgentFileManager(mock_agent)
  42. # Files that should be ignored
  43. assert file_manager._should_skip_file(".gitignore") is True
  44. assert file_manager._should_skip_file("__pycache__") is True
  45. assert file_manager._should_skip_file("__init__.py") is True
  46. # Files that should not be ignored
  47. assert file_manager._should_skip_file("regular_file.txt") is False
  48. assert file_manager._should_skip_file("file_with_underscore.txt") is False
  49. @patch("agency_swarm.agent.file_manager.AgentFileManager._upload_file_by_type")
  50. def test_parse_files_folder_ignores_dot_and_dunder_files(self, mock_upload):
  51. """Test that parse_files_folder_for_vs_id ignores files starting with '.' or '__'."""
  52. mock_agent = Mock()
  53. mock_agent.name = "TestAgent"
  54. mock_agent.files_folder = "test_files"
  55. mock_agent.get_class_folder_path.return_value = "/fake/path"
  56. mock_agent.client_sync.vector_stores.create.return_value = Mock(id="vs_123")
  57. mock_agent.tools = [] # Empty tools list
  58. file_manager = AgentFileManager(mock_agent)
  59. # Define which entries are files vs directories
  60. def mock_isfile(path):
  61. filename = os.path.basename(path)
  62. # __pycache__ is a directory, everything else is a file
  63. return filename != "__pycache__"
  64. # Mock the path operations
  65. with (
  66. patch("pathlib.Path.exists", return_value=True),
  67. patch("pathlib.Path.is_dir", return_value=True),
  68. patch("pathlib.Path.resolve", return_value=Path("/fake/path/test_files_vs_123")),
  69. patch("pathlib.Path.mkdir"),
  70. patch("pathlib.Path.rename"),
  71. patch("os.listdir") as mock_listdir,
  72. patch("os.path.isfile", side_effect=mock_isfile),
  73. patch.object(file_manager, "add_file_search_tool"),
  74. patch.object(file_manager, "add_code_interpreter_tool"),
  75. ):
  76. # Simulate files in the directory, including ones that should be ignored
  77. mock_listdir.return_value = [
  78. "regular_file.txt",
  79. ".gitignore",
  80. "__pycache__",
  81. ".env",
  82. "document.pdf",
  83. "__init__.py",
  84. ]
  85. # Mock upload method to return None (no file ID)
  86. mock_upload.return_value = None
  87. file_manager.parse_files_folder_for_vs_id()
  88. # Verify that only non-ignored files were processed
  89. actual_calls = mock_upload.call_args_list
  90. assert len(actual_calls) == 2
  91. # Check that ignored files were not processed
  92. processed_files = [str(call[0][0].name) for call in actual_calls]
  93. assert "regular_file.txt" in processed_files
  94. assert "document.pdf" in processed_files
  95. assert ".gitignore" not in processed_files
  96. assert "__pycache__" not in processed_files
  97. assert ".env" not in processed_files
  98. assert "__init__.py" not in processed_files
  99. def test_upload_file_input_validation_errors(self):
  100. """upload_file should reject missing file paths and missing agent files_folder_path state."""
  101. missing_file_manager = AgentFileManager(Mock(name="TestAgent"))
  102. with pytest.raises(FileNotFoundError, match="File not found at /nonexistent/file.txt"):
  103. missing_file_manager.upload_file("/nonexistent/file.txt")
  104. mock_agent = Mock()
  105. mock_agent.name = "TestAgent"
  106. mock_agent.files_folder_path = None
  107. no_folder_manager = AgentFileManager(mock_agent)
  108. with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp_file:
  109. tmp_file.write(b"test content")
  110. tmp_file_path = tmp_file.name
  111. try:
  112. with pytest.raises(AgentsException, match="Cannot upload file. Agent_files_folder_path is not set"):
  113. no_folder_manager.upload_file(tmp_file_path)
  114. finally:
  115. os.unlink(tmp_file_path)
  116. def test_upload_file_association_failures_are_non_fatal(self):
  117. """Missing vector stores or association wait failures should still return uploaded file ids."""
  118. with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp_file:
  119. tmp_file.write(b"test content")
  120. tmp_file_path = tmp_file.name
  121. try:
  122. not_found_agent = Mock()
  123. not_found_agent.name = "TestAgent"
  124. not_found_agent.files_folder_path = Path("/tmp/test")
  125. not_found_agent._associated_vector_store_id = "vs_missing123"
  126. not_found_agent.client_sync.files.create.return_value = Mock(id="file-123")
  127. not_found_response = Mock()
  128. not_found_response.status_code = 404
  129. not_found_agent.client_sync.vector_stores.retrieve.side_effect = NotFoundError(
  130. "Vector store not found", response=not_found_response, body={"error": "not_found"}
  131. )
  132. not_found_agent.client_sync.vector_stores.files.create = Mock()
  133. not_found_manager = AgentFileManager(not_found_agent)
  134. not_found_manager.get_id_from_file = Mock(return_value=None)
  135. assert not_found_manager.upload_file(tmp_file_path) == "file-123"
  136. not_found_agent.client_sync.vector_stores.files.create.assert_not_called()
  137. association_agent = Mock()
  138. association_agent.name = "TestAgent"
  139. association_agent.files_folder_path = Path("/tmp/test")
  140. association_agent._associated_vector_store_id = "vs_valid123"
  141. association_agent.client_sync.files.create.return_value = Mock(id="file-123")
  142. association_agent.client_sync.vector_stores.retrieve.return_value = Mock()
  143. association_manager = AgentFileManager(association_agent)
  144. association_manager.get_id_from_file = Mock(return_value=None)
  145. with patch.object(
  146. association_manager._sync,
  147. "wait_for_vector_store_files_ready",
  148. side_effect=Exception("Association failed"),
  149. ):
  150. assert association_manager.upload_file(tmp_file_path) == "file-123"
  151. finally:
  152. os.unlink(tmp_file_path)
  153. def test_upload_file_waits_for_vector_store_ingestion(self, tmp_path):
  154. """Association tolerates initial NotFound and polls until completion."""
  155. mock_agent = Mock()
  156. mock_agent.name = "PollingAgent"
  157. mock_agent.files_folder_path = tmp_path
  158. mock_agent._associated_vector_store_id = "vs_valid123"
  159. mock_agent.client_sync = Mock()
  160. uploaded = Mock(id="file-abc123")
  161. uploaded.created_at = 1_700_000_000
  162. mock_agent.client_sync.files.create.return_value = uploaded
  163. mock_agent.client_sync.vector_stores.retrieve.return_value = Mock()
  164. mock_agent.client_sync.vector_stores.files.create.return_value = Mock(status="in_progress")
  165. file_manager = AgentFileManager(mock_agent)
  166. file_manager.get_id_from_file = Mock(return_value=None)
  167. local_file = tmp_path / "report.txt"
  168. local_file.write_text("contents", encoding="utf-8")
  169. with patch.object(file_manager._sync, "wait_for_vector_store_files_ready") as mock_wait:
  170. result = file_manager.upload_file(str(local_file))
  171. assert result == uploaded.id
  172. assert mock_agent.client_sync.vector_stores.files.create.call_count == 1
  173. mock_wait.assert_called_once_with([("vs_valid123", uploaded.id)])
  174. def test_upload_file_defers_wait_when_pending_list_provided(self, tmp_path):
  175. """upload_file can defer vector store polling when provided with a pending list."""
  176. mock_agent = Mock()
  177. mock_agent.name = "BatchingAgent"
  178. mock_agent.files_folder_path = tmp_path
  179. mock_agent._associated_vector_store_id = "vs_batch123"
  180. mock_agent.client_sync = Mock()
  181. uploaded = Mock(id="file-batch123")
  182. uploaded.created_at = 1_700_000_000
  183. mock_agent.client_sync.files.create.return_value = uploaded
  184. mock_agent.client_sync.vector_stores.retrieve.return_value = Mock()
  185. mock_agent.client_sync.vector_stores.files.create.return_value = Mock(status="in_progress")
  186. file_manager = AgentFileManager(mock_agent)
  187. file_manager.get_id_from_file = Mock(return_value=None)
  188. local_file = tmp_path / "report.txt"
  189. local_file.write_text("contents", encoding="utf-8")
  190. pending: list[tuple[str, str]] = []
  191. with patch.object(file_manager._sync, "wait_for_vector_store_files_ready") as mock_wait:
  192. result = file_manager.upload_file(
  193. str(local_file),
  194. wait_for_ingestion=False,
  195. pending_ingestions=pending,
  196. )
  197. assert result == uploaded.id
  198. assert pending == [("vs_batch123", uploaded.id)]
  199. mock_wait.assert_not_called()
  200. def test_upload_file_requires_pending_list_when_wait_deferred(self, tmp_path):
  201. """wait_for_ingestion=False without pending list raises to prevent silent starvation."""
  202. mock_agent = Mock()
  203. mock_agent.name = "GuardAgent"
  204. mock_agent.files_folder_path = tmp_path
  205. mock_agent._associated_vector_store_id = "vs_guard123"
  206. mock_agent.client_sync = Mock()
  207. uploaded = Mock(id="file-guard123")
  208. uploaded.created_at = 1_700_000_000
  209. mock_agent.client_sync.files.create.return_value = uploaded
  210. mock_agent.client_sync.vector_stores.retrieve.return_value = Mock()
  211. mock_agent.client_sync.vector_stores.files.create.return_value = Mock(status="in_progress")
  212. file_manager = AgentFileManager(mock_agent)
  213. file_manager.get_id_from_file = Mock(return_value=None)
  214. local_file = tmp_path / "report.txt"
  215. local_file.write_text("contents", encoding="utf-8")
  216. with pytest.raises(ValueError, match="pending_ingestions"):
  217. file_manager.upload_file(
  218. str(local_file),
  219. wait_for_ingestion=False,
  220. )
  221. def test_upload_file_raises_on_vector_store_ingestion_failure(self, tmp_path):
  222. """Vector store ingestion failures surface as AgentsException."""
  223. mock_agent = Mock()
  224. mock_agent.name = "FailingAgent"
  225. mock_agent.files_folder_path = tmp_path
  226. mock_agent._associated_vector_store_id = "vs_fail123"
  227. mock_agent.client_sync = Mock()
  228. uploaded = Mock()
  229. uploaded.id = "file-fail123"
  230. uploaded.created_at = 1_700_000_000
  231. mock_agent.client_sync.files.create.return_value = uploaded
  232. mock_agent.client_sync.vector_stores.retrieve.return_value = Mock()
  233. mock_agent.client_sync.vector_stores.files.create.return_value = Mock(status="in_progress")
  234. failure_vs_file = make_vector_store_file(
  235. file_id=uploaded.id,
  236. vector_store_id="vs_fail123",
  237. status="failed",
  238. last_error=LastError(code="server_error", message="ingestion failed"),
  239. )
  240. mock_agent.client_sync.vector_stores.files.retrieve.return_value = failure_vs_file
  241. file_manager = AgentFileManager(mock_agent)
  242. file_manager.get_id_from_file = Mock(return_value=None)
  243. local_file = tmp_path / "report.txt"
  244. local_file.write_text("contents", encoding="utf-8")
  245. with pytest.raises(AgentsException, match="ingestion failed"):
  246. file_manager.upload_file(str(local_file))
  247. def test_file_sync_wait_for_vector_store_file_ready_handles_not_found(self):
  248. """FileSync polling handles initial NotFound and completes when status ready."""
  249. mock_agent = Mock()
  250. mock_agent.name = "SyncAgent"
  251. mock_agent.client_sync = Mock()
  252. not_found_error = NotFoundError(
  253. "missing",
  254. response=Mock(status_code=404),
  255. body={"error": "not_found"},
  256. )
  257. mock_agent.client_sync.vector_stores.files.retrieve.side_effect = [
  258. not_found_error,
  259. Mock(status="in_progress"),
  260. Mock(status="completed"),
  261. ]
  262. sync = FileSync(mock_agent)
  263. with patch.object(sync, "_sleep") as mock_sleep:
  264. sync.wait_for_vector_store_file_ready(vector_store_id="vs123", file_id="file456", timeout_seconds=5.0)
  265. assert mock_agent.client_sync.vector_stores.files.retrieve.call_count == 3
  266. assert mock_sleep.call_count == 2
  267. def test_file_sync_list_all_vector_store_files_passes_after_parameter(self):
  268. """FileSync.list_all_vector_store_files includes None and cursor values for after parameter."""
  269. mock_agent = Mock()
  270. mock_agent.name = "SyncAgent"
  271. mock_agent.client_sync = Mock()
  272. first_file = make_vector_store_file(file_id="file-1", vector_store_id="vs123")
  273. second_file = make_vector_store_file(file_id="file-2", vector_store_id="vs123")
  274. first_resp = SyncCursorPage(
  275. data=[first_file],
  276. has_more=True,
  277. client=None,
  278. params={},
  279. options={},
  280. )
  281. second_resp = SyncCursorPage(
  282. data=[second_file],
  283. has_more=False,
  284. client=None,
  285. params={},
  286. options={},
  287. )
  288. mock_agent.client_sync.vector_stores.files.list.side_effect = [first_resp, second_resp]
  289. sync = FileSync(mock_agent)
  290. result = sync.list_all_vector_store_files("vs123")
  291. assert result == [first_file, second_file]
  292. calls = mock_agent.client_sync.vector_stores.files.list.call_args_list
  293. assert calls[0].kwargs == {"vector_store_id": "vs123", "limit": 100, "after": omit}
  294. assert calls[1].kwargs == {"vector_store_id": "vs123", "limit": 100, "after": "file-1"}
  295. def test_upload_file_preserves_stem_and_sets_remote_mtime(self, tmp_path):
  296. """Uploading files with '_file-' in the stem preserves the stem and aligns mtime with remote timestamp."""
  297. mock_agent = Mock()
  298. mock_agent.name = "TestAgent"
  299. mock_agent.files_folder_path = tmp_path
  300. mock_agent._associated_vector_store_id = None
  301. mock_agent.client_sync = Mock()
  302. uploaded = Mock()
  303. uploaded.id = "file-S1ABocPKz5LspVToYHJXWP"
  304. uploaded.created_at = 1_700_000_000
  305. mock_agent.client_sync.files.create.return_value = uploaded
  306. file_manager = AgentFileManager(mock_agent)
  307. original_path = tmp_path / "report_file-final.txt"
  308. original_path.write_text("content", encoding="utf-8")
  309. # Sanity check: filenames containing '_file-' but lacking an id should produce no match
  310. assert file_manager.get_id_from_file(original_path) is None
  311. with patch("agency_swarm.agent.file_manager.os.utime") as mock_utime:
  312. result = file_manager.upload_file(str(original_path))
  313. assert result == uploaded.id
  314. renamed_path = tmp_path / "report_file-final_file-S1ABocPKz5LspVToYHJXWP.txt"
  315. assert renamed_path.exists()
  316. assert file_manager.get_id_from_file(renamed_path) == uploaded.id
  317. mock_agent.client_sync.files.retrieve.assert_not_called()
  318. mock_utime.assert_called_once_with(renamed_path, (float(uploaded.created_at), float(uploaded.created_at)))
  319. def test_get_id_from_file_handles_missing_and_valid_id_patterns(self, tmp_path: Path):
  320. """get_id_from_file should fail for missing files and parse supported OpenAI id formats."""
  321. mock_agent = Mock()
  322. file_manager = AgentFileManager(mock_agent)
  323. with pytest.raises(FileNotFoundError, match="File not found: /nonexistent/file.txt"):
  324. file_manager.get_id_from_file("/nonexistent/file.txt")
  325. valid_cases = [
  326. ("notes_file-XugufptanjcVTjYYDTTadG.txt", "file-XugufptanjcVTjYYDTTadG"),
  327. ("draft_file-abcdefghijklmnopqrstuv.txt", "file-abcdefghijklmnopqrstuv"),
  328. ]
  329. for filename, expected in valid_cases:
  330. path = tmp_path / filename
  331. path.write_text("content", encoding="utf-8")
  332. assert file_manager.get_id_from_file(path) == expected
  333. def test_parse_files_folder_reuses_detected_vector_store(self, tmp_path, caplog):
  334. """Reuse an existing vector store directory without logging errors."""
  335. mock_agent = Mock()
  336. mock_agent.name = "TestAgent"
  337. mock_agent.files_folder = "files_vs_outdated812h32989d18h2g8h213h912"
  338. mock_agent.get_class_folder_path.return_value = str(tmp_path)
  339. mock_agent.tools = []
  340. mock_agent.add_tool = Mock()
  341. mock_agent.client_sync.vector_stores.create = Mock()
  342. existing_vs_dir = tmp_path / "files_vs_existing98123hv8912h982y912df"
  343. existing_vs_dir.mkdir()
  344. file_manager = AgentFileManager(mock_agent)
  345. with (
  346. patch.object(AgentFileManager, "upload_file", return_value="file-1"),
  347. patch.object(AgentFileManager, "add_file_search_tool"),
  348. patch.object(AgentFileManager, "add_code_interpreter_tool"),
  349. patch("os.listdir", return_value=[]),
  350. caplog.at_level(logging.ERROR),
  351. ):
  352. file_manager.parse_files_folder_for_vs_id()
  353. assert mock_agent.client_sync.vector_stores.create.call_count == 0
  354. assert mock_agent._associated_vector_store_id == "vs_existing98123hv8912h982y912df"
  355. assert mock_agent.files_folder_path == existing_vs_dir.resolve()
  356. assert mock_agent.files_folder == str(existing_vs_dir)
  357. assert "Files folder" not in caplog.text
  358. def test_parse_files_folder_creates_vector_store_without_warning(self, tmp_path, caplog):
  359. """Create and rename folder on first discovery when directory has files."""
  360. mock_agent = Mock()
  361. mock_agent.name = "TestAgent"
  362. mock_agent.files_folder = "files"
  363. mock_agent.get_class_folder_path.return_value = str(tmp_path)
  364. mock_agent.tools = []
  365. mock_agent.add_tool = Mock()
  366. mock_agent.client_sync.vector_stores.create.return_value = Mock(id="vs_created456")
  367. original_dir = tmp_path / "files"
  368. original_dir.mkdir()
  369. # Create a file so the folder is not empty
  370. (original_dir / "document.txt").write_text("content")
  371. file_manager = AgentFileManager(mock_agent)
  372. with (
  373. patch.object(AgentFileManager, "upload_file", return_value="file-1"),
  374. patch.object(AgentFileManager, "add_file_search_tool"),
  375. patch.object(AgentFileManager, "add_code_interpreter_tool"),
  376. patch("os.listdir", return_value=["document.txt"]),
  377. caplog.at_level(logging.ERROR),
  378. ):
  379. file_manager.parse_files_folder_for_vs_id()
  380. expected_dir = tmp_path / "files_vs_created456"
  381. assert expected_dir.exists()
  382. assert mock_agent._associated_vector_store_id == "vs_created456"
  383. assert mock_agent.files_folder_path == expected_dir.resolve()
  384. assert "Files folder" not in caplog.text
  385. def test_parse_files_folder_invalid_path_shapes_log_and_skip(self, tmp_path, caplog):
  386. """Missing folders, file paths, and missing explicit vector-store paths should log and skip setup."""
  387. cases = [
  388. ("missing_folder", "missing_folder", None, "missing_folder' does not exist. Skipping..."),
  389. ("path_is_file", "files", "This is a file, not a directory", "is not a directory"),
  390. ("missing_vector_store", "files_vs_missing123", None, "does not exist"),
  391. ]
  392. for case_name, files_folder, file_contents, expected_message in cases:
  393. case_root = tmp_path / case_name
  394. case_root.mkdir()
  395. if file_contents is not None:
  396. (case_root / files_folder).write_text(file_contents, encoding="utf-8")
  397. mock_agent = Mock()
  398. mock_agent.name = "TestAgent"
  399. mock_agent.files_folder = files_folder
  400. mock_agent.get_class_folder_path.return_value = str(case_root)
  401. mock_agent.tools = []
  402. mock_agent.client_sync = Mock()
  403. mock_agent.add_tool = Mock()
  404. file_manager = AgentFileManager(mock_agent)
  405. with caplog.at_level(logging.ERROR):
  406. file_manager.parse_files_folder_for_vs_id()
  407. assert expected_message in caplog.text
  408. assert mock_agent.files_folder_path is None
  409. assert mock_agent._associated_vector_store_id is None
  410. caplog.clear()
  411. def test_vector_store_discovery_patterns(self, tmp_path):
  412. """Vector store discovery should handle multi-_vs_ names, explicit targets, and underscored base folders."""
  413. cases = [
  414. {
  415. "name": "multiple_vs",
  416. "files_folder": "my_vs_test",
  417. "dirs": [
  418. "my_vs_test_vs_abc0890f12h897fvh189072gvh",
  419. "my_vs_other_project_vs_xyz78977j12gh89102h3g09123hf",
  420. ],
  421. "expected_dir": "my_vs_test_vs_abc0890f12h897fvh189072gvh",
  422. "expected_vs": "vs_abc0890f12h897fvh189072gvh",
  423. },
  424. {
  425. "name": "explicit",
  426. "files_folder": "files_vs_explicit891y2390g8h1298vh",
  427. "dirs": [
  428. "files_vs_abc89123ty892g1h98h1289008i12h",
  429. "files_vs_explicit891y2390g8h1298vh",
  430. "files_vs_xyz987123yt891h2890fh12890vh",
  431. ],
  432. "expected_dir": "files_vs_explicit891y2390g8h1298vh",
  433. "expected_vs": "vs_explicit891y2390g8h1298vh",
  434. },
  435. {
  436. "name": "underscored_base",
  437. "files_folder": "my_project_files",
  438. "dirs": [
  439. "my_project_files_vs_correct9281gh891h9vb191290vb",
  440. "my_project_vs_other2891ghf981gv981bvaqw",
  441. ],
  442. "expected_dir": "my_project_files_vs_correct9281gh891h9vb191290vb",
  443. "expected_vs": "vs_correct9281gh891h9vb191290vb",
  444. },
  445. ]
  446. for case in cases:
  447. case_root = tmp_path / case["name"]
  448. case_root.mkdir()
  449. for directory in case["dirs"]:
  450. (case_root / directory).mkdir()
  451. mock_agent = Mock()
  452. mock_agent.name = "TestAgent"
  453. mock_agent.files_folder = case["files_folder"]
  454. mock_agent.get_class_folder_path.return_value = str(case_root)
  455. mock_agent.tools = []
  456. mock_agent.add_tool = Mock()
  457. mock_agent.client_sync.vector_stores.create = Mock()
  458. file_manager = AgentFileManager(mock_agent)
  459. with (
  460. patch.object(AgentFileManager, "upload_file", return_value="file-1"),
  461. patch.object(AgentFileManager, "add_file_search_tool"),
  462. patch.object(AgentFileManager, "add_code_interpreter_tool"),
  463. patch("os.listdir", return_value=[]),
  464. ):
  465. file_manager.parse_files_folder_for_vs_id()
  466. expected_dir = (case_root / case["expected_dir"]).resolve()
  467. assert mock_agent.files_folder_path == expected_dir
  468. assert mock_agent._associated_vector_store_id == case["expected_vs"]
  469. def test_add_file_search_tool_no_vector_store_ids(self):
  470. """Test add_file_search_tool when existing tool has no vector store IDs."""
  471. mock_agent = Mock()
  472. mock_agent.name = "TestAgent"
  473. # Add existing FileSearchTool with no vector store IDs
  474. mock_file_search_tool = Mock(spec=FileSearchTool)
  475. mock_file_search_tool.vector_store_ids = []
  476. mock_agent.tools = [mock_file_search_tool]
  477. file_manager = AgentFileManager(mock_agent)
  478. # Should raise AgentsException
  479. with pytest.raises(AgentsException, match="FileSearchTool has no vector store IDs"):
  480. file_manager.add_file_search_tool("vs_test123")
  481. def test_add_file_search_tool_existing_tool_association_and_merge_behavior(self):
  482. """Existing FileSearchTool should set association when missing and merge include/vector store values."""
  483. mock_agent = Mock()
  484. mock_agent.name = "TestAgent"
  485. mock_agent._associated_vector_store_id = None
  486. mock_agent.include_search_results = True
  487. mock_file_search_tool = Mock(spec=FileSearchTool)
  488. mock_file_search_tool.vector_store_ids = ["vs_existing456"]
  489. mock_file_search_tool.include_search_results = False
  490. mock_agent.tools = [mock_file_search_tool]
  491. file_manager = AgentFileManager(mock_agent)
  492. file_manager.add_file_search_tool("vs_test123")
  493. assert mock_agent._associated_vector_store_id == "vs_existing456"
  494. assert "vs_existing456" in mock_file_search_tool.vector_store_ids
  495. assert "vs_test123" in mock_file_search_tool.vector_store_ids
  496. assert mock_file_search_tool.include_search_results is True
  497. file_manager.add_file_search_tool("vs_existing456")
  498. assert mock_file_search_tool.vector_store_ids.count("vs_existing456") == 1
  499. assert mock_file_search_tool.include_search_results is True
  500. def test_add_code_interpreter_tool_handles_container_shapes_and_dedupes_files(self):
  501. """CodeInterpreter tool wiring should preserve string containers and dedupe explicit file ids."""
  502. string_container_agent = Mock()
  503. string_container_agent.name = "TestAgent"
  504. string_container_tool = Mock(spec=CodeInterpreterTool)
  505. string_container_tool.tool_config = {"container": "some_container_id"}
  506. string_container_agent.tools = [string_container_tool]
  507. AgentFileManager(string_container_agent).add_code_interpreter_tool(["file-123"])
  508. assert string_container_tool.tool_config["container"] == "some_container_id"
  509. list_container_agent = Mock()
  510. list_container_agent.name = "TestAgent"
  511. list_container_tool = Mock(spec=CodeInterpreterTool)
  512. list_container_tool.tool_config = {"container": {"file_ids": ["file-existing123"]}}
  513. list_container_agent.tools = [list_container_tool]
  514. AgentFileManager(list_container_agent).add_code_interpreter_tool(["file-existing123", "file-new456"])
  515. assert list_container_tool.tool_config["container"]["file_ids"] == ["file-existing123", "file-new456"]
  516. def test_add_files_to_vector_store_existing_file_skip(self):
  517. """Test add_files_to_vector_store skipping files that already exist in VS."""
  518. mock_agent = Mock()
  519. mock_agent.name = "TestAgent"
  520. mock_existing_file = make_vector_store_file(file_id="file-existing123", vector_store_id="vs_test123")
  521. mock_files_list = Mock()
  522. mock_files_list.data = [mock_existing_file]
  523. mock_agent.client_sync.vector_stores.files.list.return_value = mock_files_list
  524. mock_agent.client_sync.vector_stores.files.create_and_poll.return_value = make_vector_store_file(
  525. file_id="file-new456", vector_store_id="vs_test123"
  526. )
  527. file_manager = AgentFileManager(mock_agent)
  528. # Should skip existing file and add new one
  529. file_manager.add_files_to_vector_store("vs_test123", ["file-existing123", "file-new456"])
  530. # Should only call create for the new file
  531. mock_agent.client_sync.vector_stores.files.create_and_poll.assert_called_once_with(
  532. vector_store_id="vs_test123", file_id="file-new456"
  533. )
  534. def test_add_files_to_vector_store_failure_paths(self, caplog):
  535. """Vector store add_files_to_vector_store should raise on creation errors and bad poll statuses."""
  536. mock_agent = Mock()
  537. mock_agent.name = "TestAgent"
  538. mock_files_list = Mock()
  539. mock_files_list.data = []
  540. mock_agent.client_sync.vector_stores.files.list.return_value = mock_files_list
  541. file_manager = AgentFileManager(mock_agent)
  542. mock_agent.client_sync.vector_stores.files.create_and_poll.side_effect = Exception("Create failed")
  543. with pytest.raises(AgentsException, match="Failed to add file file-123 to Vector Store vs_test123"):
  544. file_manager.add_files_to_vector_store("vs_test123", ["file-123"])
  545. mock_agent.client_sync.vector_stores.files.create_and_poll.side_effect = None
  546. mock_agent.client_sync.vector_stores.files.create_and_poll.return_value = make_vector_store_file(
  547. file_id="file-123",
  548. vector_store_id="vs_test123",
  549. status="failed",
  550. last_error=LastError(code="server_error", message="ingestion failed"),
  551. )
  552. with pytest.raises(AgentsException, match="status failed"):
  553. file_manager.add_files_to_vector_store("vs_test123", ["file-123"])
  554. mock_agent.client_sync.vector_stores.files.create_and_poll.return_value = make_vector_store_file(
  555. file_id="file-456",
  556. vector_store_id="vs_test123",
  557. status="cancelled",
  558. )
  559. with caplog.at_level(logging.INFO):
  560. with pytest.raises(AgentsException, match="status cancelled"):
  561. file_manager.add_files_to_vector_store("vs_test123", ["file-456"])
  562. assert all("Added file" not in record.getMessage() for record in caplog.records)
  563. def test_read_instructions_prefers_class_relative_then_falls_back_to_absolute_path(self):
  564. """read_instructions should resolve class-relative files first and otherwise fall back to absolute paths."""
  565. mock_agent = Mock()
  566. mock_agent.instructions = "instructions.md"
  567. file_manager = AgentFileManager(mock_agent)
  568. file_manager.get_class_folder_path = Mock(return_value="/base/path")
  569. with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as relative_tmp:
  570. relative_tmp.write("Test instructions content")
  571. relative_tmp_path = relative_tmp.name
  572. with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as absolute_tmp:
  573. absolute_tmp.write("Absolute path instructions")
  574. absolute_tmp_path = absolute_tmp.name
  575. try:
  576. with (
  577. patch("os.path.normpath") as mock_normpath,
  578. patch("os.path.isfile", wraps=os.path.isfile) as mock_isfile,
  579. ):
  580. mock_normpath.return_value = relative_tmp_path
  581. file_manager.read_instructions()
  582. assert mock_agent.instructions == "Test instructions content"
  583. mock_normpath.assert_called_once()
  584. mock_isfile.assert_called_once_with(relative_tmp_path)
  585. mock_agent.instructions = absolute_tmp_path
  586. with (
  587. patch("os.path.normpath", return_value="/nonexistent/path"),
  588. patch("os.path.isfile", wraps=os.path.isfile) as mock_isfile,
  589. ):
  590. file_manager.read_instructions()
  591. assert mock_agent.instructions == "Absolute path instructions"
  592. assert mock_isfile.call_args_list == [call("/nonexistent/path"), call(absolute_tmp_path)]
  593. finally:
  594. os.unlink(relative_tmp_path)
  595. os.unlink(absolute_tmp_path)
  596. def test_non_processable_folder_shapes_do_not_create_vector_store(self, tmp_path: Path):
  597. """Folders without processable files should not create vector stores or be renamed."""
  598. mock_agent = Mock()
  599. mock_agent.name = "TestAgent"
  600. mock_agent.files_folder = "files"
  601. mock_agent.client_sync = Mock()
  602. mock_agent.tools = []
  603. file_manager = AgentFileManager(mock_agent)
  604. file_manager.get_class_folder_path = Mock(return_value=str(tmp_path))
  605. folders = [
  606. ("files_empty", []),
  607. ("files_hidden", [".hidden_file"]),
  608. ("files_subdirs", ["subdir1/", "subdir2/"]),
  609. ]
  610. for folder_name, entries in folders:
  611. files_folder = tmp_path / folder_name
  612. files_folder.mkdir()
  613. for entry in entries:
  614. if entry.endswith("/"):
  615. (files_folder / entry.rstrip("/")).mkdir()
  616. else:
  617. (files_folder / entry).write_text("content", encoding="utf-8")
  618. result = file_manager._create_or_identify_vector_store(files_folder)
  619. assert result is None
  620. assert files_folder.exists()
  621. assert not any(p.name.startswith(f"{folder_name}_vs_") for p in tmp_path.iterdir())
  622. mock_agent.client_sync.vector_stores.create.assert_not_called()
  623. def test_folder_with_files_creates_vector_store(self, tmp_path: Path):
  624. """Test that folders with processable files do create vector stores and get renamed."""
  625. mock_agent = Mock()
  626. mock_agent.name = "TestAgent"
  627. mock_agent.files_folder = "files"
  628. mock_agent.client_sync = Mock()
  629. mock_agent.tools = []
  630. # Mock vector store creation
  631. mock_vs = Mock()
  632. mock_vs.id = "vs_abc123456789012"
  633. mock_agent.client_sync.vector_stores.create.return_value = mock_vs
  634. file_manager = AgentFileManager(mock_agent)
  635. file_manager.get_class_folder_path = Mock(return_value=str(tmp_path))
  636. # Create files folder with a processable file
  637. files_folder = tmp_path / "files"
  638. files_folder.mkdir()
  639. (files_folder / "document.txt").write_text("content")
  640. (files_folder / ".hidden_file").write_text("hidden") # Should be ignored
  641. # Should create vector store for folder with processable files
  642. result = file_manager._create_or_identify_vector_store(files_folder)
  643. assert result == "vs_abc123456789012"
  644. mock_agent.client_sync.vector_stores.create.assert_called_once()
  645. # Folder should be renamed with VS ID
  646. expected_folder = tmp_path / "files_vs_abc123456789012"
  647. assert expected_folder.exists()
  648. assert not files_folder.exists()