| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242 |
- #!/usr/bin/env python3
- """
- fill_write.py — Write values into PDF form fields.
- Usage:
- # From a JSON data file
- python3 fill_write.py --input form.pdf --data values.json --out filled.pdf
- # Inline JSON
- python3 fill_write.py --input form.pdf --out filled.pdf \
- --values '{"FirstName": "Jane", "Agree": "true"}'
- values format:
- {
- "FieldName": "text value", # text field
- "CheckBox1": "true", # checkbox (true / false)
- "Dropdown1": "OptionValue", # dropdown (must match an existing choice value)
- "Radio1": "/Choice2" # radio (must match a radio value)
- }
- Exit codes: 0 success, 1 bad args, 2 dep missing, 3 read/write error, 4 validation error
- """
- import argparse
- import json
- import os
- import sys
- import importlib.util
- def ensure_deps():
- if importlib.util.find_spec("pypdf") is None:
- import subprocess
- subprocess.check_call(
- [sys.executable, "-m", "pip", "install", "--break-system-packages", "-q", "pypdf"]
- )
- ensure_deps()
- from pypdf import PdfReader, PdfWriter
- from pypdf.generic import NameObject, TextStringObject, BooleanObject
- # ── Field helpers ─────────────────────────────────────────────────────────────
- def _field_type(field) -> str:
- ft = str(field.get("/FT", ""))
- if ft == "/Tx": return "text"
- if ft == "/Btn":
- ff = int(field.get("/Ff", 0))
- return "radio" if ff & (1 << 15) else "checkbox"
- if ft == "/Ch":
- ff = int(field.get("/Ff", 0))
- return "dropdown" if ff & (1 << 17) else "listbox"
- return "unknown"
- def _get_checkbox_on_value(field) -> str:
- """Return the /AP /N key that means 'checked' (anything except /Off)."""
- ap = field.get("/AP")
- if ap and "/N" in ap:
- for k in ap["/N"]:
- if str(k) != "/Off":
- return str(k)
- return "/Yes"
- def _get_dropdown_values(field) -> list[str]:
- opt = field.get("/Opt")
- if not opt:
- return []
- values = []
- for item in opt:
- try:
- from pypdf.generic import ArrayObject
- if isinstance(item, (list, ArrayObject)) and len(item) >= 1:
- values.append(str(item[0]))
- else:
- values.append(str(item))
- except Exception:
- values.append(str(item))
- return values
- # ── Walk + fill ───────────────────────────────────────────────────────────────
- def _walk_and_fill(fields, data: dict, filled: list, errors: list, parent: str = ""):
- for field in fields:
- name = str(field.get("/T", ""))
- full = f"{parent}.{name}" if parent else name
- # Recurse into named groups
- kids = field.get("/Kids")
- if kids:
- named = [k for k in kids if "/T" in k]
- if named:
- _walk_and_fill(named, data, filled, errors, full)
- continue
- if full not in data:
- continue
- value = data[full]
- ftype = _field_type(field)
- if ftype == "text":
- field.update({
- NameObject("/V"): TextStringObject(str(value)),
- NameObject("/DV"): TextStringObject(str(value)),
- })
- filled.append(full)
- elif ftype == "checkbox":
- truthy = str(value).lower() in ("true", "1", "yes", "on")
- on_val = _get_checkbox_on_value(field)
- pdf_val = on_val if truthy else "/Off"
- field.update({
- NameObject("/V"): NameObject(pdf_val),
- NameObject("/AS"): NameObject(pdf_val),
- })
- filled.append(full)
- elif ftype in ("dropdown", "listbox"):
- allowed = _get_dropdown_values(field)
- if allowed and str(value) not in allowed:
- errors.append({
- "field": full,
- "error": f"Value '{value}' not in allowed choices: {allowed}"
- })
- continue
- field.update({NameObject("/V"): TextStringObject(str(value))})
- filled.append(full)
- elif ftype == "radio":
- # Radio value must start with /
- pdf_val = str(value) if str(value).startswith("/") else f"/{value}"
- field.update({
- NameObject("/V"): NameObject(pdf_val),
- NameObject("/AS"): NameObject(pdf_val),
- })
- filled.append(full)
- else:
- errors.append({"field": full, "error": f"Unsupported field type: {ftype}"})
- def fill(pdf_path: str, out_path: str, data: dict) -> dict:
- try:
- reader = PdfReader(pdf_path)
- except Exception as e:
- return {"status": "error", "error": str(e)}
- writer = PdfWriter()
- writer.clone_document_from_reader(reader)
- acroform = writer._root_object.get("/AcroForm") # type: ignore[attr-defined]
- if acroform is None or "/Fields" not in acroform:
- return {
- "status": "error",
- "error": "This PDF has no fillable form fields.",
- "hint": "Run fill_inspect.py first to confirm the PDF has fields.",
- }
- # Enable appearance regeneration so viewers show the new values
- acroform.update({NameObject("/NeedAppearances"): BooleanObject(True)})
- filled: list[str] = []
- errors: list[dict] = []
- _walk_and_fill(list(acroform["/Fields"]), data, filled, errors)
- # Warn about requested fields that were never found
- not_found = [k for k in data if k not in filled and not any(e["field"] == k for e in errors)]
- try:
- os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
- with open(out_path, "wb") as f:
- writer.write(f)
- except Exception as e:
- return {"status": "error", "error": f"Write failed: {e}"}
- result = {
- "status": "ok",
- "out": out_path,
- "filled_count": len(filled),
- "filled_fields": filled,
- "size_kb": os.path.getsize(out_path) // 1024,
- }
- if errors:
- result["validation_errors"] = errors
- if not_found:
- result["not_found"] = not_found
- result["hint"] = "Run fill_inspect.py to see all available field names."
- return result
- def main():
- parser = argparse.ArgumentParser(description="Fill PDF form fields")
- parser.add_argument("--input", required=True, help="Input PDF with form fields")
- parser.add_argument("--out", required=True, help="Output PDF path")
- group = parser.add_mutually_exclusive_group(required=True)
- group.add_argument("--data", help="Path to JSON file with field values")
- group.add_argument("--values", help="Inline JSON string with field values")
- args = parser.parse_args()
- if not os.path.exists(args.input):
- print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
- file=sys.stderr)
- sys.exit(1)
- # Load data
- try:
- if args.data:
- with open(args.data) as f:
- data = json.load(f)
- else:
- data = json.loads(args.values)
- except Exception as e:
- print(json.dumps({"status": "error", "error": f"JSON parse error: {e}"}),
- file=sys.stderr)
- sys.exit(1)
- result = fill(args.input, args.out, data)
- print(json.dumps(result, indent=2, ensure_ascii=False))
- if result["status"] == "ok":
- print(f"\n── Fill complete ───────────────────────────────────────",
- file=sys.stderr)
- print(f" Output : {result['out']}", file=sys.stderr)
- print(f" Filled : {result['filled_count']} field(s)", file=sys.stderr)
- if result.get("validation_errors"):
- print(f" Errors :", file=sys.stderr)
- for e in result["validation_errors"]:
- print(f" • {e['field']}: {e['error']}", file=sys.stderr)
- if result.get("not_found"):
- print(f" Not found: {result['not_found']}", file=sys.stderr)
- print("", file=sys.stderr)
- else:
- sys.exit(3)
- if __name__ == "__main__":
- main()
|