merge.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #!/usr/bin/env python3
  2. """
  3. merge.py — Merge cover.pdf + body.pdf → final.pdf and print a QA report.
  4. Usage:
  5. python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf
  6. python3 merge.py --cover cover.pdf --body body.pdf --out final.pdf --title "My Report"
  7. Exit codes: 0 success, 1 bad args/missing file, 2 missing dep, 3 merge error
  8. """
  9. import argparse
  10. import importlib.util
  11. import json
  12. import os
  13. import sys
  14. def ensure_deps():
  15. if importlib.util.find_spec("pypdf") is None:
  16. import subprocess
  17. subprocess.check_call(
  18. [sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
  19. )
  20. ensure_deps()
  21. from pypdf import PdfWriter, PdfReader
  22. def merge(cover_path: str, body_path: str, out_path: str, title: str = "") -> dict:
  23. writer = PdfWriter()
  24. for fpath, label in [(cover_path, "cover"), (body_path, "body")]:
  25. if not os.path.exists(fpath):
  26. return {"status": "error", "error": f"{label} file not found: {fpath}"}
  27. reader = PdfReader(fpath)
  28. for page in reader.pages:
  29. writer.add_page(page)
  30. # Set PDF metadata
  31. if title:
  32. writer.add_metadata({"/Title": title})
  33. os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
  34. with open(out_path, "wb") as f:
  35. writer.write(f)
  36. size_kb = os.path.getsize(out_path) // 1024
  37. total_pages = len(writer.pages)
  38. # ── QA checks ─────────────────────────────────────────────────────────────
  39. warnings = []
  40. # Page count sanity
  41. cover_pages = len(PdfReader(cover_path).pages)
  42. body_pages = len(PdfReader(body_path).pages)
  43. if cover_pages != 1:
  44. warnings.append(f"Cover PDF has {cover_pages} pages (expected 1)")
  45. # File size sanity
  46. if size_kb < 20:
  47. warnings.append(f"Output is very small ({size_kb} KB) — may have blank pages")
  48. if size_kb > 50_000:
  49. warnings.append(f"Output is very large ({size_kb} KB) — consider compressing images")
  50. report = {
  51. "status": "ok",
  52. "out": out_path,
  53. "total_pages": total_pages,
  54. "cover_pages": cover_pages,
  55. "body_pages": body_pages,
  56. "size_kb": size_kb,
  57. }
  58. if warnings:
  59. report["warnings"] = warnings
  60. return report
  61. def main():
  62. parser = argparse.ArgumentParser(description="Merge cover + body PDFs")
  63. parser.add_argument("--cover", required=True)
  64. parser.add_argument("--body", required=True)
  65. parser.add_argument("--out", required=True)
  66. parser.add_argument("--title", default="")
  67. args = parser.parse_args()
  68. result = merge(args.cover, args.body, args.out, args.title)
  69. if result["status"] == "error":
  70. print(json.dumps(result), file=sys.stderr)
  71. sys.exit(3)
  72. print(json.dumps(result))
  73. # Human-readable QA summary
  74. print(f"\n── Build complete ──────────────────────────────────────")
  75. print(f" Output : {result['out']}")
  76. print(f" Pages : {result['total_pages']} total (1 cover + {result['body_pages']} body)")
  77. print(f" Size : {result['size_kb']} KB")
  78. if result.get("warnings"):
  79. print(f" ⚠ Warnings:")
  80. for w in result["warnings"]:
  81. print(f" • {w}")
  82. else:
  83. print(f" ✓ No issues detected")
  84. print(f"────────────────────────────────────────────────────────\n")
  85. if __name__ == "__main__":
  86. main()