test_file_handler.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. """Unit tests for fastapi_utils file_handler module."""
  2. import sys
  3. from pathlib import Path
  4. import pytest
  5. from agency_swarm.integrations.fastapi_utils.file_handler import upload_from_urls
  6. @pytest.mark.asyncio
  7. async def test_upload_from_urls_rejects_unsupported_sources() -> None:
  8. """Unsupported or relative sources should raise a clear validation error."""
  9. invalid_sources = [
  10. "s3://bucket/key",
  11. "ftp://example.com/file.pdf",
  12. "uploads/file.pdf",
  13. "./uploads/file.pdf",
  14. ]
  15. for source in invalid_sources:
  16. with pytest.raises(ValueError, match="Unsupported URL scheme"):
  17. await upload_from_urls({"file.pdf": source})
  18. @pytest.mark.asyncio
  19. @pytest.mark.skipif(sys.platform == "win32", reason="On Windows, // paths are treated as UNC")
  20. async def test_upload_from_urls_rejects_protocol_relative_on_non_windows() -> None:
  21. """Protocol-relative URLs should be rejected before download on non-Windows hosts."""
  22. protocol_relative_urls = ["//example.com/file.pdf", "//cdn.example.com/file.js"]
  23. for source in protocol_relative_urls:
  24. with pytest.raises(ValueError, match="URL scheme is required"):
  25. await upload_from_urls({"file.pdf": source})
  26. @pytest.mark.asyncio
  27. async def test_upload_from_urls_uploads_supported_local_sources(
  28. monkeypatch: pytest.MonkeyPatch,
  29. tmp_path: Path,
  30. ) -> None:
  31. """Absolute and file:// sources should all resolve to local uploads."""
  32. plain_file = tmp_path / "doc.txt"
  33. plain_file.write_text("hello", encoding="utf-8")
  34. spaced_file = tmp_path / "uri file.txt"
  35. spaced_file.write_text("hello", encoding="utf-8")
  36. async def fake_upload(path: str) -> str:
  37. return f"uploaded:{Path(path).name}"
  38. async def fake_wait(_file_id: str) -> None:
  39. return None
  40. monkeypatch.setattr(
  41. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  42. fake_upload,
  43. )
  44. monkeypatch.setattr(
  45. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  46. fake_wait,
  47. )
  48. sources = {
  49. "absolute": ("doc.txt", str(plain_file), {"doc.txt": "uploaded:doc.txt"}),
  50. "file_uri": ("doc.txt", plain_file.as_uri(), {"doc.txt": "uploaded:doc.txt"}),
  51. "encoded_space_uri": (
  52. "uri file.txt",
  53. spaced_file.as_uri(),
  54. {"uri file.txt": "uploaded:uri file.txt"},
  55. ),
  56. }
  57. if sys.platform != "win32":
  58. localhost_uri = f"file://localhost{plain_file}"
  59. sources["localhost_uri"] = ("doc.txt", localhost_uri, {"doc.txt": "uploaded:doc.txt"})
  60. for _name, (filename, source, expected) in sources.items():
  61. result = await upload_from_urls({filename: source}, allowed_local_dirs=[str(tmp_path)])
  62. assert result == expected
  63. @pytest.mark.asyncio
  64. async def test_upload_from_urls_forwards_openai_client(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
  65. """Request-scoped OpenAI client should be forwarded to upload and poll helpers."""
  66. file_path = tmp_path / "doc.txt"
  67. file_path.write_text("hello", encoding="utf-8")
  68. client_sentinel = object()
  69. seen: list[object] = []
  70. async def fake_upload(path: str, openai_client: object | None = None) -> str:
  71. del path
  72. seen.append(openai_client)
  73. return "uploaded:doc.txt"
  74. async def fake_wait(_file_id: str, timeout: int = 60, openai_client: object | None = None) -> None:
  75. del timeout
  76. seen.append(openai_client)
  77. return None
  78. monkeypatch.setattr(
  79. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  80. fake_upload,
  81. )
  82. monkeypatch.setattr(
  83. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  84. fake_wait,
  85. )
  86. result = await upload_from_urls(
  87. {"doc.txt": str(file_path)},
  88. allowed_local_dirs=[str(tmp_path)],
  89. openai_client=client_sentinel,
  90. )
  91. assert result == {"doc.txt": "uploaded:doc.txt"}
  92. assert seen == [client_sentinel, client_sentinel]
  93. @pytest.mark.asyncio
  94. async def test_upload_from_urls_rejects_directory(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
  95. """Directories should not be accepted as local file attachments."""
  96. directory = tmp_path / "folder"
  97. directory.mkdir()
  98. async def fake_wait(_file_id: str) -> None:
  99. return None
  100. monkeypatch.setattr(
  101. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  102. fake_wait,
  103. )
  104. with pytest.raises(IsADirectoryError, match="must be a file"):
  105. await upload_from_urls({"folder": str(directory)}, allowed_local_dirs=[str(tmp_path)])
  106. @pytest.mark.asyncio
  107. async def test_upload_from_urls_allowlist_enforcement(tmp_path: Path) -> None:
  108. """Disallowed or missing allowlist paths should block local uploads."""
  109. allowed_dir = tmp_path / "allowed"
  110. allowed_dir.mkdir()
  111. disallowed_dir = tmp_path / "other"
  112. disallowed_dir.mkdir()
  113. allowed_file = allowed_dir / "allowed.txt"
  114. allowed_file.write_text("ok", encoding="utf-8")
  115. disallowed_file = disallowed_dir / "doc.txt"
  116. disallowed_file.write_text("hello", encoding="utf-8")
  117. cases: list[tuple[str, list[str | Path] | None, str]] = [
  118. (str(disallowed_file), [str(allowed_dir)], "allowed directories"),
  119. (str(allowed_file), None, "Local file access is disabled"),
  120. (str(allowed_file), [tmp_path / "missing"], "Local file access is disabled"),
  121. ]
  122. for source, allowlist, error_match in cases:
  123. with pytest.raises(PermissionError, match=error_match):
  124. await upload_from_urls({"doc.txt": source}, allowed_local_dirs=allowlist)
  125. @pytest.mark.asyncio
  126. async def test_upload_from_urls_skips_missing_allowlist_when_valid_dir_exists(
  127. monkeypatch: pytest.MonkeyPatch,
  128. tmp_path: Path,
  129. ) -> None:
  130. """Missing allowlist entries should not block uploads from existing allowed dirs."""
  131. allowed_dir = tmp_path / "allowed"
  132. allowed_dir.mkdir()
  133. file_path = allowed_dir / "doc.txt"
  134. file_path.write_text("hello", encoding="utf-8")
  135. missing_dir = tmp_path / "missing"
  136. async def fake_upload(path: str) -> str:
  137. return f"uploaded:{Path(path).name}"
  138. async def fake_wait(_file_id: str) -> None:
  139. return None
  140. monkeypatch.setattr(
  141. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  142. fake_upload,
  143. )
  144. monkeypatch.setattr(
  145. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  146. fake_wait,
  147. )
  148. result = await upload_from_urls(
  149. {"doc.txt": str(file_path)},
  150. allowed_local_dirs=[str(allowed_dir), str(missing_dir)],
  151. )
  152. assert result == {"doc.txt": "uploaded:doc.txt"}
  153. @pytest.mark.asyncio
  154. async def test_upload_from_urls_rejects_non_directory_allowlist_even_with_valid_dir(tmp_path: Path) -> None:
  155. """Non-directory allowlist entries should fail fast instead of being silently ignored."""
  156. allowed_dir = tmp_path / "allowed"
  157. allowed_dir.mkdir()
  158. file_path = allowed_dir / "doc.txt"
  159. file_path.write_text("hello", encoding="utf-8")
  160. non_directory_entry = tmp_path / "not-a-dir.txt"
  161. non_directory_entry.write_text("x", encoding="utf-8")
  162. with pytest.raises(NotADirectoryError, match="Allowed path must be a directory"):
  163. await upload_from_urls(
  164. {"doc.txt": str(file_path)},
  165. allowed_local_dirs=[str(allowed_dir), str(non_directory_entry)],
  166. )
  167. @pytest.mark.asyncio
  168. async def test_upload_from_urls_expands_user_path_allowlist(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
  169. """Allowlist provided as Path('~') should expand and permit uploads in home."""
  170. home_dir = tmp_path / "home"
  171. home_dir.mkdir()
  172. file_path = home_dir / "doc.txt"
  173. file_path.write_text("hello", encoding="utf-8")
  174. monkeypatch.setenv("HOME", str(home_dir))
  175. monkeypatch.setenv("USERPROFILE", str(home_dir))
  176. async def fake_upload(path: str) -> str:
  177. return f"uploaded:{Path(path).name}"
  178. async def fake_wait(_file_id: str) -> None:
  179. return None
  180. monkeypatch.setattr(
  181. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  182. fake_upload,
  183. )
  184. monkeypatch.setattr(
  185. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  186. fake_wait,
  187. )
  188. result = await upload_from_urls({"doc.txt": str(file_path)}, allowed_local_dirs=[Path("~")])
  189. assert result == {"doc.txt": "uploaded:doc.txt"}
  190. @pytest.mark.asyncio
  191. async def test_upload_from_urls_remote_only_skips_allowlist_validation(
  192. monkeypatch: pytest.MonkeyPatch,
  193. tmp_path: Path,
  194. ) -> None:
  195. """Remote-only uploads should not fail when allowlist entries are missing."""
  196. async def fake_download(url: str, name: str, save_dir: str) -> str:
  197. dest = Path(save_dir) / name
  198. dest.write_text("remote data", encoding="utf-8")
  199. return str(dest)
  200. async def fake_upload(path: str) -> str:
  201. return f"uploaded:{Path(path).name}"
  202. async def fake_wait(_file_id: str) -> None:
  203. return None
  204. monkeypatch.setattr(
  205. "agency_swarm.integrations.fastapi_utils.file_handler.download_file",
  206. fake_download,
  207. )
  208. monkeypatch.setattr(
  209. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  210. fake_upload,
  211. )
  212. monkeypatch.setattr(
  213. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  214. fake_wait,
  215. )
  216. result = await upload_from_urls(
  217. {"doc.txt": "https://example.com/file.txt"},
  218. allowed_local_dirs=[str(tmp_path / "missing")],
  219. )
  220. assert result == {"doc.txt": "uploaded:doc.txt"}
  221. @pytest.mark.asyncio
  222. @pytest.mark.skipif(sys.platform != "win32", reason="UNC paths are Windows-specific")
  223. async def test_upload_from_urls_uploads_unc_path(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
  224. """UNC paths (//server/share) should be treated as local on Windows."""
  225. file_path = tmp_path / "doc.txt"
  226. file_path.write_text("hello", encoding="utf-8")
  227. async def fake_upload(path: str) -> str:
  228. return f"uploaded:{Path(path).name}"
  229. async def fake_wait(_file_id: str) -> None:
  230. return None
  231. monkeypatch.setattr(
  232. "agency_swarm.integrations.fastapi_utils.file_handler.upload_to_openai",
  233. fake_upload,
  234. )
  235. monkeypatch.setattr(
  236. "agency_swarm.integrations.fastapi_utils.file_handler._wait_for_file_processed",
  237. fake_wait,
  238. )
  239. unc_style = f"//{tmp_path.parts[0].rstrip(':')}/{'/'.join(tmp_path.parts[1:])}/doc.txt"
  240. with pytest.raises((PermissionError, FileNotFoundError)):
  241. await upload_from_urls({"doc.txt": unc_style}, allowed_local_dirs=[str(tmp_path)])
  242. @pytest.mark.asyncio
  243. async def test_upload_from_urls_rejects_nonexistent_local_file(tmp_path: Path) -> None:
  244. """Local paths to non-existent files should raise FileNotFoundError."""
  245. file_path = tmp_path / "nonexistent.txt"
  246. with pytest.raises(FileNotFoundError, match="Local file not found"):
  247. await upload_from_urls({"doc.txt": str(file_path)}, allowed_local_dirs=[str(tmp_path)])