fill_write.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. #!/usr/bin/env python3
  2. """
  3. fill_write.py — Write values into PDF form fields.
  4. Usage:
  5. # From a JSON data file
  6. python3 fill_write.py --input form.pdf --data values.json --out filled.pdf
  7. # Inline JSON
  8. python3 fill_write.py --input form.pdf --out filled.pdf \
  9. --values '{"FirstName": "Jane", "Agree": "true"}'
  10. values format:
  11. {
  12. "FieldName": "text value", # text field
  13. "CheckBox1": "true", # checkbox (true / false)
  14. "Dropdown1": "OptionValue", # dropdown (must match an existing choice value)
  15. "Radio1": "/Choice2" # radio (must match a radio value)
  16. }
  17. Exit codes: 0 success, 1 bad args, 2 dep missing, 3 read/write error, 4 validation error
  18. """
  19. import argparse
  20. import json
  21. import os
  22. import sys
  23. import importlib.util
  24. def ensure_deps():
  25. if importlib.util.find_spec("pypdf") is None:
  26. import subprocess
  27. subprocess.check_call(
  28. [sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
  29. )
  30. ensure_deps()
  31. from pypdf import PdfReader, PdfWriter
  32. from pypdf.generic import NameObject, TextStringObject, BooleanObject
  33. # ── Field helpers ─────────────────────────────────────────────────────────────
  34. def _field_type(field) -> str:
  35. ft = str(field.get("/FT", ""))
  36. if ft == "/Tx": return "text"
  37. if ft == "/Btn":
  38. ff = int(field.get("/Ff", 0))
  39. return "radio" if ff & (1 << 15) else "checkbox"
  40. if ft == "/Ch":
  41. ff = int(field.get("/Ff", 0))
  42. return "dropdown" if ff & (1 << 17) else "listbox"
  43. return "unknown"
  44. def _get_checkbox_on_value(field) -> str:
  45. """Return the /AP /N key that means 'checked' (anything except /Off)."""
  46. ap = field.get("/AP")
  47. if ap and "/N" in ap:
  48. for k in ap["/N"]:
  49. if str(k) != "/Off":
  50. return str(k)
  51. return "/Yes"
  52. def _get_dropdown_values(field) -> list[str]:
  53. opt = field.get("/Opt")
  54. if not opt:
  55. return []
  56. values = []
  57. for item in opt:
  58. try:
  59. from pypdf.generic import ArrayObject
  60. if isinstance(item, (list, ArrayObject)) and len(item) >= 1:
  61. values.append(str(item[0]))
  62. else:
  63. values.append(str(item))
  64. except Exception:
  65. values.append(str(item))
  66. return values
  67. # ── Walk + fill ───────────────────────────────────────────────────────────────
  68. def _walk_and_fill(fields, data: dict, filled: list, errors: list, parent: str = ""):
  69. for field in fields:
  70. name = str(field.get("/T", ""))
  71. full = f"{parent}.{name}" if parent else name
  72. # Recurse into named groups
  73. kids = field.get("/Kids")
  74. if kids:
  75. named = [k for k in kids if "/T" in k]
  76. if named:
  77. _walk_and_fill(named, data, filled, errors, full)
  78. continue
  79. if full not in data:
  80. continue
  81. value = data[full]
  82. ftype = _field_type(field)
  83. if ftype == "text":
  84. field.update({
  85. NameObject("/V"): TextStringObject(str(value)),
  86. NameObject("/DV"): TextStringObject(str(value)),
  87. })
  88. filled.append(full)
  89. elif ftype == "checkbox":
  90. truthy = str(value).lower() in ("true", "1", "yes", "on")
  91. on_val = _get_checkbox_on_value(field)
  92. pdf_val = on_val if truthy else "/Off"
  93. field.update({
  94. NameObject("/V"): NameObject(pdf_val),
  95. NameObject("/AS"): NameObject(pdf_val),
  96. })
  97. filled.append(full)
  98. elif ftype in ("dropdown", "listbox"):
  99. allowed = _get_dropdown_values(field)
  100. if allowed and str(value) not in allowed:
  101. errors.append({
  102. "field": full,
  103. "error": f"Value '{value}' not in allowed choices: {allowed}"
  104. })
  105. continue
  106. field.update({NameObject("/V"): TextStringObject(str(value))})
  107. filled.append(full)
  108. elif ftype == "radio":
  109. # Radio value must start with /
  110. pdf_val = str(value) if str(value).startswith("/") else f"/{value}"
  111. field.update({
  112. NameObject("/V"): NameObject(pdf_val),
  113. NameObject("/AS"): NameObject(pdf_val),
  114. })
  115. filled.append(full)
  116. else:
  117. errors.append({"field": full, "error": f"Unsupported field type: {ftype}"})
  118. def fill(pdf_path: str, out_path: str, data: dict) -> dict:
  119. try:
  120. reader = PdfReader(pdf_path)
  121. except Exception as e:
  122. return {"status": "error", "error": str(e)}
  123. writer = PdfWriter()
  124. writer.clone_document_from_reader(reader)
  125. acroform = writer._root_object.get("/AcroForm") # type: ignore[attr-defined]
  126. if acroform is None or "/Fields" not in acroform:
  127. return {
  128. "status": "error",
  129. "error": "This PDF has no fillable form fields.",
  130. "hint": "Run fill_inspect.py first to confirm the PDF has fields.",
  131. }
  132. # Enable appearance regeneration so viewers show the new values
  133. acroform.update({NameObject("/NeedAppearances"): BooleanObject(True)})
  134. filled: list[str] = []
  135. errors: list[dict] = []
  136. _walk_and_fill(list(acroform["/Fields"]), data, filled, errors)
  137. # Warn about requested fields that were never found
  138. not_found = [k for k in data if k not in filled and not any(e["field"] == k for e in errors)]
  139. try:
  140. os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
  141. with open(out_path, "wb") as f:
  142. writer.write(f)
  143. except Exception as e:
  144. return {"status": "error", "error": f"Write failed: {e}"}
  145. result = {
  146. "status": "ok",
  147. "out": out_path,
  148. "filled_count": len(filled),
  149. "filled_fields": filled,
  150. "size_kb": os.path.getsize(out_path) // 1024,
  151. }
  152. if errors:
  153. result["validation_errors"] = errors
  154. if not_found:
  155. result["not_found"] = not_found
  156. result["hint"] = "Run fill_inspect.py to see all available field names."
  157. return result
  158. def main():
  159. parser = argparse.ArgumentParser(description="Fill PDF form fields")
  160. parser.add_argument("--input", required=True, help="Input PDF with form fields")
  161. parser.add_argument("--out", required=True, help="Output PDF path")
  162. group = parser.add_mutually_exclusive_group(required=True)
  163. group.add_argument("--data", help="Path to JSON file with field values")
  164. group.add_argument("--values", help="Inline JSON string with field values")
  165. args = parser.parse_args()
  166. if not os.path.exists(args.input):
  167. print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
  168. file=sys.stderr)
  169. sys.exit(1)
  170. # Load data
  171. try:
  172. if args.data:
  173. with open(args.data) as f:
  174. data = json.load(f)
  175. else:
  176. data = json.loads(args.values)
  177. except Exception as e:
  178. print(json.dumps({"status": "error", "error": f"JSON parse error: {e}"}),
  179. file=sys.stderr)
  180. sys.exit(1)
  181. result = fill(args.input, args.out, data)
  182. print(json.dumps(result, indent=2, ensure_ascii=False))
  183. if result["status"] == "ok":
  184. print(f"\n── Fill complete ───────────────────────────────────────",
  185. file=sys.stderr)
  186. print(f" Output : {result['out']}", file=sys.stderr)
  187. print(f" Filled : {result['filled_count']} field(s)", file=sys.stderr)
  188. if result.get("validation_errors"):
  189. print(f" Errors :", file=sys.stderr)
  190. for e in result["validation_errors"]:
  191. print(f" • {e['field']}: {e['error']}", file=sys.stderr)
  192. if result.get("not_found"):
  193. print(f" Not found: {result['not_found']}", file=sys.stderr)
  194. print("", file=sys.stderr)
  195. else:
  196. sys.exit(3)
  197. if __name__ == "__main__":
  198. main()