test_file_sync.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. from __future__ import annotations
  2. from dataclasses import dataclass
  3. import httpx
  4. from openai import NotFoundError
  5. from agency_swarm.agent.file_sync import FileSync
  6. @dataclass(frozen=True)
  7. class _VectorStoreFile:
  8. id: str
  9. class _FakeFilesClient:
  10. def __init__(self, *, attached_file_ids: set[str]) -> None:
  11. self._attached_file_ids = attached_file_ids
  12. self.deleted_file_ids: list[str] = []
  13. def delete(self, *, file_id: str) -> None:
  14. self._attached_file_ids.discard(file_id)
  15. self.deleted_file_ids.append(file_id)
  16. def retrieve(self, file_id: str) -> None: # pragma: no cover - not used in these tests
  17. raise NotFoundError(
  18. "not found",
  19. response=httpx.Response(404, request=httpx.Request("GET", "https://example.test")),
  20. body=None,
  21. )
  22. class _FakeVectorStoreFilesClient:
  23. def __init__(self, *, attached_file_ids: set[str]) -> None:
  24. self._attached_file_ids = attached_file_ids
  25. self.detached_file_ids: list[str] = []
  26. self.retrieve_calls: list[tuple[str, str]] = []
  27. def delete(self, *, vector_store_id: str, file_id: str) -> None:
  28. self._attached_file_ids.discard(file_id)
  29. self.detached_file_ids.append(file_id)
  30. def retrieve(self, *, vector_store_id: str, file_id: str) -> None:
  31. self.retrieve_calls.append((vector_store_id, file_id))
  32. if file_id not in self._attached_file_ids:
  33. raise NotFoundError(
  34. "not found",
  35. response=httpx.Response(404, request=httpx.Request("GET", "https://example.test")),
  36. body=None,
  37. )
  38. return None
  39. class _FakeVectorStoresClient:
  40. def __init__(self, *, attached_file_ids: set[str]) -> None:
  41. self.files = _FakeVectorStoreFilesClient(attached_file_ids=attached_file_ids)
  42. class _FakeClientSync:
  43. def __init__(self, *, attached_file_ids: set[str]) -> None:
  44. self.files = _FakeFilesClient(attached_file_ids=attached_file_ids)
  45. self.vector_stores = _FakeVectorStoresClient(attached_file_ids=attached_file_ids)
  46. class _FakeAgent:
  47. def __init__(self, *, vs_id: str, client_sync: _FakeClientSync) -> None:
  48. self.name = "TestAgent"
  49. self._associated_vector_store_id = vs_id
  50. self.client_sync = client_sync
  51. self.files_folder_path = None
  52. self.file_manager = None
  53. def test_sync_with_folder_file_delete_detaches_from_vector_store() -> None:
  54. """OpenAI file deletion should remove the file from all vector stores."""
  55. attached_file_ids = {"file-1"}
  56. client_sync = _FakeClientSync(attached_file_ids=attached_file_ids)
  57. agent = _FakeAgent(vs_id="vs_123", client_sync=client_sync)
  58. class _FileSyncFixedList(FileSync):
  59. def list_all_vector_store_files(self, vector_store_id: str) -> list[object]:
  60. assert vector_store_id == "vs_123"
  61. return [_VectorStoreFile(id="file-1")]
  62. sync = _FileSyncFixedList(agent)
  63. sync.sync_with_folder()
  64. assert client_sync.vector_stores.files.detached_file_ids == []
  65. assert client_sync.files.deleted_file_ids == ["file-1"]
  66. assert attached_file_ids == set()
  67. def test_remove_file_from_vs_and_oai_relies_on_openai_delete_detachment() -> None:
  68. """`remove_file_from_vs_and_oai` should not need explicit Vector Store detachment."""
  69. attached_file_ids = {"file-1"}
  70. client_sync = _FakeClientSync(attached_file_ids=attached_file_ids)
  71. agent = _FakeAgent(vs_id="vs_123", client_sync=client_sync)
  72. sync = FileSync(agent)
  73. sync.remove_file_from_vs_and_oai("file-1")
  74. assert client_sync.vector_stores.files.detached_file_ids == []
  75. assert client_sync.files.deleted_file_ids == ["file-1"]
  76. assert attached_file_ids == set()
  77. def test_remove_file_from_vs_and_oai_polls_vector_store_via_retrieve() -> None:
  78. """Removal waits should poll Vector Store via retrieve, not list."""
  79. attached_file_ids = {"file-1"}
  80. client_sync = _FakeClientSync(attached_file_ids=attached_file_ids)
  81. agent = _FakeAgent(vs_id="vs_123", client_sync=client_sync)
  82. class _FileSyncNoList(FileSync):
  83. def list_all_vector_store_files(self, vector_store_id: str) -> list[object]:
  84. raise AssertionError("list should not be used for absence polling")
  85. sync = _FileSyncNoList(agent)
  86. sync.remove_file_from_vs_and_oai("file-1")
  87. assert client_sync.vector_stores.files.retrieve_calls == [("vs_123", "file-1")]