main.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. from __future__ import annotations
  2. import os
  3. import re
  4. import shutil
  5. import tempfile
  6. from pathlib import Path
  7. from fastapi import FastAPI, File, Form, UploadFile
  8. from fastapi.responses import JSONResponse, PlainTextResponse
  9. from .config import settings
  10. from .errors import ConversionError
  11. from .executor import run_conversion
  12. app = FastAPI(title="Doc2MarkdownService", version="0.1.0")
  13. @app.post("/convert")
  14. async def convert(
  15. file: UploadFile = File(...),
  16. include_images: bool = Form(False),
  17. ):
  18. original_name = sanitize_filename(file.filename or "upload.bin")
  19. temp_dir = Path(tempfile.mkdtemp(prefix="doc2md-upload-"))
  20. input_path = temp_dir / original_name
  21. total_bytes = 0
  22. try:
  23. with input_path.open("wb") as output_file:
  24. while True:
  25. chunk = await file.read(settings.chunk_size_bytes)
  26. if not chunk:
  27. break
  28. total_bytes += len(chunk)
  29. if total_bytes > settings.max_upload_bytes:
  30. raise ConversionError(
  31. code="file_too_large",
  32. message="Uploaded file exceeds size limit",
  33. status_code=413,
  34. details={
  35. "filename": original_name,
  36. "max_upload_mb": settings.max_upload_mb,
  37. },
  38. )
  39. output_file.write(chunk)
  40. markdown = await run_conversion(input_path, include_images, original_name)
  41. response_bytes = len(markdown.encode("utf-8"))
  42. if response_bytes > settings.max_response_bytes:
  43. raise ConversionError(
  44. code="response_too_large",
  45. message="Converted Markdown exceeds response size limit",
  46. status_code=413,
  47. details={
  48. "filename": original_name,
  49. "max_response_mb": settings.max_response_mb,
  50. },
  51. )
  52. return PlainTextResponse(markdown, media_type="text/markdown")
  53. except ConversionError as exc:
  54. return JSONResponse(
  55. status_code=exc.status_code, content={"error": exc.to_dict()}
  56. )
  57. except Exception as exc: # noqa: BLE001
  58. error = ConversionError(
  59. code="internal_error",
  60. message="Unexpected server error",
  61. status_code=500,
  62. details={"filename": original_name, "reason": str(exc)},
  63. )
  64. return JSONResponse(
  65. status_code=error.status_code, content={"error": error.to_dict()}
  66. )
  67. finally:
  68. await file.close()
  69. shutil.rmtree(temp_dir, ignore_errors=True)
  70. def sanitize_filename(value: str) -> str:
  71. name = Path(value).name or "upload.bin"
  72. sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", name)
  73. return sanitized or "upload.bin"
  74. if __name__ == "__main__":
  75. import uvicorn
  76. host = os.getenv("DOC2MD_HOST", "0.0.0.0")
  77. port = int(os.getenv("DOC2MD_PORT", "8000"))
  78. uvicorn.run("app.main:app", host=host, port=port, reload=False)