word_export_service.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. """Word 导出服务。"""
  2. import io
  3. import re
  4. from urllib.parse import quote
  5. import docx
  6. from docx.enum.text import WD_ALIGN_PARAGRAPH
  7. from docx.oxml.ns import qn
  8. from docx.shared import Pt
  9. from ..models.schemas import WordExportOutlineItem, WordExportRequest
  10. def _set_run_font_simsun(run: docx.text.run.Run) -> None:
  11. run.font.name = "宋体"
  12. rpr = run._element.rPr
  13. if rpr is not None and rpr.rFonts is not None:
  14. rpr.rFonts.set(qn("w:eastAsia"), "宋体")
  15. def _set_paragraph_font_simsun(paragraph: docx.text.paragraph.Paragraph) -> None:
  16. for run in paragraph.runs:
  17. _set_run_font_simsun(run)
  18. class WordExportService:
  19. """负责将目录数据导出为 Word 文档。"""
  20. @staticmethod
  21. def export_outline(request: WordExportRequest) -> tuple[io.BytesIO, dict[str, str]]:
  22. doc = docx.Document()
  23. WordExportService._init_document_styles(doc)
  24. WordExportService._add_document_intro(doc, request.project_name)
  25. WordExportService._add_outline_items(doc, request.outline)
  26. buffer = io.BytesIO()
  27. doc.save(buffer)
  28. buffer.seek(0)
  29. filename = f"{request.project_name or '标书文档'}.docx"
  30. headers = {
  31. "Content-Disposition": f"attachment; filename*=UTF-8''{quote(filename)}",
  32. }
  33. return buffer, headers
  34. @staticmethod
  35. def _init_document_styles(doc: docx.Document) -> None:
  36. try:
  37. styles = doc.styles
  38. base_styles = ["Normal", "Heading 1", "Heading 2", "Heading 3", "Title"]
  39. for style_name in base_styles:
  40. if style_name not in styles:
  41. continue
  42. style = styles[style_name]
  43. font = style.font
  44. font.name = "宋体"
  45. if style._element.rPr is None:
  46. style._element._add_rPr()
  47. style._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体")
  48. if style_name == "Normal":
  49. font.bold = False
  50. except Exception:
  51. pass
  52. @staticmethod
  53. def _add_document_intro(
  54. doc: docx.Document, project_name: str | None
  55. ) -> None:
  56. declaration = doc.add_paragraph()
  57. declaration_run = declaration.add_run("内容由AI生成")
  58. declaration_run.italic = True
  59. declaration_run.font.size = Pt(9)
  60. _set_run_font_simsun(declaration_run)
  61. declaration.alignment = WD_ALIGN_PARAGRAPH.CENTER
  62. title_paragraph = doc.add_paragraph()
  63. title_run = title_paragraph.add_run(project_name or "投标技术文件")
  64. title_run.bold = True
  65. title_run.font.size = Pt(16)
  66. _set_run_font_simsun(title_run)
  67. title_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
  68. @staticmethod
  69. def _add_markdown_runs(para: docx.text.paragraph.Paragraph, text: str) -> None:
  70. pattern = r"(\*\*.*?\*\*|\*.*?\*|`.*?`)"
  71. parts = re.split(pattern, text)
  72. for part in parts:
  73. if not part:
  74. continue
  75. run = para.add_run()
  76. if part.startswith("**") and part.endswith("**") and len(part) > 4:
  77. run.text = part[2:-2]
  78. run.bold = True
  79. elif part.startswith("*") and part.endswith("*") and len(part) > 2:
  80. run.text = part[1:-1]
  81. run.italic = True
  82. elif part.startswith("`") and part.endswith("`") and len(part) > 2:
  83. run.text = part[1:-1]
  84. else:
  85. run.text = part
  86. _set_run_font_simsun(run)
  87. @staticmethod
  88. def _add_markdown_paragraph(doc: docx.Document, text: str) -> None:
  89. para = doc.add_paragraph()
  90. WordExportService._add_markdown_runs(para, text)
  91. para.paragraph_format.space_after = Pt(6)
  92. @staticmethod
  93. def _parse_markdown_blocks(content: str) -> list[tuple]:
  94. blocks: list[tuple] = []
  95. lines = content.split("\n")
  96. i = 0
  97. while i < len(lines):
  98. line = lines[i].rstrip("\r").strip()
  99. if not line:
  100. i += 1
  101. continue
  102. if (
  103. line.startswith("- ")
  104. or line.startswith("* ")
  105. or re.match(r"^\d+\.\s", line)
  106. ):
  107. items: list[tuple] = []
  108. while i < len(lines):
  109. raw = lines[i].rstrip("\r")
  110. stripped = raw.strip()
  111. if stripped.startswith("- ") or stripped.startswith("* "):
  112. text = re.sub(r"^[-*]\s+", "", stripped).strip()
  113. if text:
  114. items.append(("unordered", None, text))
  115. i += 1
  116. continue
  117. match_number = re.match(r"^(\d+)\.\s+(.*)$", stripped)
  118. if match_number:
  119. num_str, text = match_number.groups()
  120. if text.strip():
  121. items.append(("ordered", num_str, text.strip()))
  122. i += 1
  123. continue
  124. break
  125. if items:
  126. blocks.append(("list", items))
  127. continue
  128. if "|" in line:
  129. rows: list[str] = []
  130. while i < len(lines):
  131. stripped = lines[i].rstrip("\r").strip()
  132. if "|" not in stripped:
  133. break
  134. if not re.match(r"^\|?[-\s\|]+\|?$", stripped):
  135. cells = [cell.strip() for cell in stripped.split("|")]
  136. row_text = " | ".join([cell for cell in cells if cell])
  137. if row_text:
  138. rows.append(row_text)
  139. i += 1
  140. if rows:
  141. blocks.append(("table", rows))
  142. continue
  143. if line.startswith("#"):
  144. match_heading = re.match(r"^(#+)\s*(.*)$", line)
  145. if match_heading:
  146. level_marks, title_text = match_heading.groups()
  147. blocks.append(
  148. ("heading", min(len(level_marks), 3), title_text.strip())
  149. )
  150. i += 1
  151. continue
  152. para_lines: list[str] = []
  153. while i < len(lines):
  154. stripped = lines[i].rstrip("\r").strip()
  155. if (
  156. stripped
  157. and not stripped.startswith("-")
  158. and not stripped.startswith("*")
  159. and "|" not in stripped
  160. and not stripped.startswith("#")
  161. ):
  162. para_lines.append(stripped)
  163. i += 1
  164. else:
  165. break
  166. if para_lines:
  167. blocks.append(("paragraph", " ".join(para_lines)))
  168. else:
  169. i += 1
  170. return blocks
  171. @staticmethod
  172. def _render_markdown_blocks(doc: docx.Document, blocks: list[tuple]) -> None:
  173. for block in blocks:
  174. kind = block[0]
  175. if kind == "list":
  176. for item_kind, num_str, text in block[1]:
  177. paragraph = doc.add_paragraph()
  178. prefix = "• " if item_kind == "unordered" else f"{num_str}. "
  179. run = paragraph.add_run(prefix)
  180. _set_run_font_simsun(run)
  181. WordExportService._add_markdown_runs(paragraph, text)
  182. elif kind == "table":
  183. for row in block[1]:
  184. WordExportService._add_markdown_paragraph(doc, row)
  185. elif kind == "heading":
  186. _, level, text = block
  187. heading = doc.add_heading(text, level=level)
  188. heading.alignment = WD_ALIGN_PARAGRAPH.LEFT
  189. _set_paragraph_font_simsun(heading)
  190. elif kind == "paragraph":
  191. WordExportService._add_markdown_paragraph(doc, block[1])
  192. @staticmethod
  193. def _add_markdown_content(doc: docx.Document, content: str) -> None:
  194. blocks = WordExportService._parse_markdown_blocks(content)
  195. WordExportService._render_markdown_blocks(doc, blocks)
  196. @staticmethod
  197. def _add_outline_items(
  198. doc: docx.Document, items: list[WordExportOutlineItem], level: int = 1
  199. ) -> None:
  200. for item in items:
  201. if level <= 3:
  202. heading = doc.add_heading(f"{item.id} {item.title}", level=level)
  203. heading.alignment = WD_ALIGN_PARAGRAPH.LEFT
  204. _set_paragraph_font_simsun(heading)
  205. else:
  206. para = doc.add_paragraph()
  207. run = para.add_run(f"{item.id} {item.title}")
  208. run.bold = True
  209. _set_run_font_simsun(run)
  210. para.paragraph_format.space_before = Pt(6)
  211. para.paragraph_format.space_after = Pt(3)
  212. if not item.children:
  213. content = item.content or ""
  214. if content.strip():
  215. WordExportService._add_markdown_content(doc, content)
  216. continue
  217. WordExportService._add_outline_items(doc, item.children, level + 1)