exportService.cjs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. const fs = require('node:fs');
  2. const path = require('node:path');
  3. const zlib = require('node:zlib');
  4. const { fileURLToPath } = require('node:url');
  5. const { app, dialog, nativeImage } = require('electron');
  6. const cheerio = require('cheerio');
  7. const { imageSize } = require('image-size');
  8. const { getGeneratedImagesDir, getImportedImagesDir } = require('../utils/paths.cjs');
  9. const {
  10. AlignmentType,
  11. BorderStyle,
  12. Document,
  13. ExternalHyperlink,
  14. HeadingLevel,
  15. ImageRun,
  16. LevelFormat,
  17. Packer,
  18. Paragraph,
  19. ShadingType,
  20. Table,
  21. TableCell,
  22. TableLayoutType,
  23. TableRow,
  24. TextRun,
  25. UnderlineType,
  26. WidthType,
  27. } = require('docx');
  28. const MAX_IMAGE_WIDTH = 520;
  29. const NUMBERING_REFERENCE_PREFIX = 'technical-plan-numbering';
  30. const DOCX_TABLE_WIDTH_TWIPS = 9000;
  31. const MERMAID_EXPORT_RETRY_ATTEMPTS = 2;
  32. const MERMAID_EXPORT_RETRY_DELAY_MS = 3000;
  33. function encodeMermaidForInk(code) {
  34. const state = JSON.stringify({
  35. code: String(code || ''),
  36. mermaid: { theme: 'default' },
  37. });
  38. return `pako:${zlib.deflateSync(Buffer.from(state, 'utf-8')).toString('base64url')}`;
  39. }
  40. function mermaidInkUrl(code) {
  41. return `https://mermaid.ink/img/${encodeMermaidForInk(code)}?type=png&bgColor=!white`;
  42. }
  43. function delay(ms) {
  44. return new Promise((resolve) => setTimeout(resolve, ms));
  45. }
  46. function clampPercent(value) {
  47. return Math.max(0, Math.min(Math.round(Number(value) || 0), 100));
  48. }
  49. function reportProgress(context, progress, message, extra = {}) {
  50. if (!context?.onProgress) return;
  51. try {
  52. context.onProgress({
  53. phase: extra.phase || 'running',
  54. progress: clampPercent(progress),
  55. message,
  56. warnings: [...(context.warnings || [])],
  57. ...extra,
  58. });
  59. } catch (error) {
  60. console.warn('[export-word] progress callback failed', error);
  61. }
  62. }
  63. function reportConversionProgress(context, message) {
  64. const stats = context?.stats || {};
  65. const total = Math.max(1, (stats.leafCount || 0) + (stats.mermaidCount || 0));
  66. const done = Math.min(total, (context.convertedLeafCount || 0) + (context.convertedMermaidCount || 0));
  67. reportProgress(context, 10 + (done / total) * 78, message);
  68. }
  69. function addWarning(context, message) {
  70. if (context?.warnings) {
  71. context.warnings.push(message);
  72. }
  73. console.warn(`[export-word] ${message}`);
  74. }
  75. function addUnsupportedHtmlWarning(context, tagName) {
  76. const tag = String(tagName || '').toLowerCase();
  77. if (!tag) return;
  78. if (!context.unsupportedHtmlTags) {
  79. context.unsupportedHtmlTags = new Set();
  80. }
  81. if (context.unsupportedHtmlTags.has(tag)) {
  82. return;
  83. }
  84. context.unsupportedHtmlTags.add(tag);
  85. addWarning(context, `HTML 标签 <${tag}> 导出时已降级,请核对 Word 内容。`);
  86. }
  87. function compactText(value, maxLength = 140) {
  88. const text = String(value || '').replace(/\s+/g, ' ').trim();
  89. return text.length > maxLength ? `${text.slice(0, maxLength)}...` : text;
  90. }
  91. function countMermaidBlocks(content) {
  92. return (String(content || '').match(/```mermaid[\s\S]*?```/gi) || []).length;
  93. }
  94. function countOutlineStats(items = []) {
  95. let leafCount = 0;
  96. let mermaidCount = 0;
  97. for (const item of items || []) {
  98. if (item.children?.length) {
  99. const childStats = countOutlineStats(item.children);
  100. leafCount += childStats.leafCount;
  101. mermaidCount += childStats.mermaidCount;
  102. } else {
  103. leafCount += 1;
  104. mermaidCount += countMermaidBlocks(item.content);
  105. }
  106. }
  107. return { leafCount, mermaidCount };
  108. }
  109. function sanitizeFilename(value) {
  110. return String(value || '标书文档')
  111. .replace(/[<>:"/\\|?*\x00-\x1F]/g, '_')
  112. .replace(/\s+/g, ' ')
  113. .trim()
  114. .slice(0, 120) || '标书文档';
  115. }
  116. function cleanText(value) {
  117. return String(value || '').replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, '');
  118. }
  119. function textRun(text, options = {}) {
  120. return new TextRun({
  121. text: cleanText(text),
  122. font: '宋体',
  123. size: options.size || 24,
  124. bold: options.bold,
  125. italics: options.italics,
  126. strike: options.strike,
  127. color: options.color,
  128. underline: options.underline ? { type: UnderlineType.SINGLE } : undefined,
  129. });
  130. }
  131. function lineBreakRun() {
  132. return new TextRun({ break: 1 });
  133. }
  134. function textRunsWithBreaks(value, options = {}) {
  135. const parts = String(value || '').split(/<br\s*\/?\s*>/gi);
  136. const runs = [];
  137. parts.forEach((part, index) => {
  138. if (index > 0) {
  139. runs.push(lineBreakRun());
  140. }
  141. if (part) {
  142. runs.push(textRun(part, options));
  143. }
  144. });
  145. return runs;
  146. }
  147. function paragraph(children, options = {}) {
  148. return new Paragraph({
  149. children: children?.length ? children : [textRun('')],
  150. heading: options.heading,
  151. alignment: options.alignment,
  152. bullet: options.bullet,
  153. numbering: options.numbering,
  154. spacing: { before: options.before || 0, after: options.after ?? 160, line: 360 },
  155. indent: options.indent,
  156. border: options.border,
  157. shading: options.shading,
  158. });
  159. }
  160. function tableBorders() {
  161. return {
  162. top: { style: BorderStyle.SINGLE, size: 1, color: 'DCDFF6' },
  163. bottom: { style: BorderStyle.SINGLE, size: 1, color: 'DCDFF6' },
  164. left: { style: BorderStyle.SINGLE, size: 1, color: 'DCDFF6' },
  165. right: { style: BorderStyle.SINGLE, size: 1, color: 'DCDFF6' },
  166. insideHorizontal: { style: BorderStyle.SINGLE, size: 1, color: 'E8EDF6' },
  167. insideVertical: { style: BorderStyle.SINGLE, size: 1, color: 'E8EDF6' },
  168. };
  169. }
  170. function tableColumnWidths(columnCount) {
  171. const safeCount = Math.max(1, columnCount || 1);
  172. const base = Math.floor(DOCX_TABLE_WIDTH_TWIPS / safeCount);
  173. const widths = Array.from({ length: safeCount }, () => base);
  174. widths[widths.length - 1] += DOCX_TABLE_WIDTH_TWIPS - (base * safeCount);
  175. return widths;
  176. }
  177. function tableCellWidth(columnSpan, totalColumns) {
  178. const safeTotal = Math.max(1, totalColumns || 1);
  179. const safeSpan = Math.max(1, columnSpan || 1);
  180. return Math.round((DOCX_TABLE_WIDTH_TWIPS * safeSpan) / safeTotal);
  181. }
  182. function createTableCell({ children, isHeader = false, columnSpan = 1, totalColumns = 1 }) {
  183. const safeSpan = Math.max(1, columnSpan || 1);
  184. return new TableCell({
  185. children,
  186. shading: isHeader ? { type: ShadingType.CLEAR, fill: 'F1F6FF' } : undefined,
  187. margins: { top: 120, bottom: 120, left: 140, right: 140 },
  188. columnSpan: safeSpan > 1 ? safeSpan : undefined,
  189. width: { size: tableCellWidth(safeSpan, totalColumns), type: WidthType.DXA },
  190. });
  191. }
  192. function createDocxTable(rows, columnCount) {
  193. return new Table({
  194. rows,
  195. width: { size: 100, type: WidthType.PERCENTAGE },
  196. columnWidths: tableColumnWidths(columnCount),
  197. layout: TableLayoutType.FIXED,
  198. borders: tableBorders(),
  199. });
  200. }
  201. function normalizeColumnSpan(value) {
  202. const span = Number.parseInt(String(value || ''), 10);
  203. return Number.isFinite(span) && span > 1 ? span : 1;
  204. }
  205. function isMarkdownTableRowLine(line) {
  206. return /^\s*\|.*\|\s*$/.test(String(line || ''));
  207. }
  208. function isMarkdownTableDelimiterLine(line) {
  209. return /^\s*\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/.test(String(line || ''));
  210. }
  211. function splitMarkdownTableCells(line) {
  212. let source = String(line || '').trim();
  213. if (!source.includes('|')) {
  214. return [];
  215. }
  216. if (source.startsWith('|')) {
  217. source = source.slice(1);
  218. }
  219. if (source.endsWith('|')) {
  220. source = source.slice(0, -1);
  221. }
  222. const cells = [];
  223. let current = '';
  224. let escaped = false;
  225. for (const char of source) {
  226. if (char === '|' && !escaped) {
  227. cells.push(current.trim());
  228. current = '';
  229. continue;
  230. }
  231. current += char;
  232. escaped = char === '\\' && !escaped;
  233. }
  234. cells.push(current.trim());
  235. return cells;
  236. }
  237. function isMarkdownTableDelimiterCell(cell) {
  238. return /^:?-{3,}:?$/.test(String(cell || '').trim());
  239. }
  240. function markdownTableRowIndent(line) {
  241. const match = /^(\s*)\|/.exec(String(line || ''));
  242. return match ? match[1] : '';
  243. }
  244. function formatMarkdownTableRow(cells, indent = '') {
  245. return `${indent}| ${cells.map((cell) => String(cell || '').trim()).join(' | ')} |`;
  246. }
  247. function expandCompressedMarkdownTableRows(headerLine, nextLine) {
  248. if (!isMarkdownTableRowLine(headerLine) || !isMarkdownTableRowLine(nextLine)) {
  249. return null;
  250. }
  251. const headerCells = splitMarkdownTableCells(headerLine);
  252. const nextCells = splitMarkdownTableCells(nextLine);
  253. const columnCount = headerCells.length;
  254. if (columnCount < 2 || nextCells.length <= columnCount) {
  255. return null;
  256. }
  257. const delimiterCells = nextCells.slice(0, columnCount);
  258. if (!delimiterCells.every(isMarkdownTableDelimiterCell)) {
  259. return null;
  260. }
  261. // 模型有时会把分隔行和后续数据行压成同一行,这里按表头列数拆回 GFM 表格。
  262. const indent = markdownTableRowIndent(headerLine);
  263. const lines = [formatMarkdownTableRow(headerCells, indent), formatMarkdownTableRow(delimiterCells, indent)];
  264. const remainingCells = nextCells.slice(columnCount);
  265. while (remainingCells.length) {
  266. if (remainingCells.length > columnCount && !remainingCells[0] && remainingCells.length % columnCount !== 0) {
  267. remainingCells.shift();
  268. continue;
  269. }
  270. const rowCells = remainingCells.splice(0, columnCount);
  271. if (rowCells.some((cell) => String(cell || '').trim())) {
  272. lines.push(formatMarkdownTableRow(rowCells, indent));
  273. }
  274. }
  275. return lines;
  276. }
  277. function expandInlineMarkdownTableRows(line) {
  278. const source = String(line || '');
  279. if (!/\|\s*:?-{3,}:?\s*\|/.test(source)) {
  280. return [source];
  281. }
  282. const firstPipeIndex = source.indexOf('|');
  283. if (firstPipeIndex < 0) {
  284. return [source];
  285. }
  286. const prefix = source.slice(0, firstPipeIndex);
  287. const isIndentedTableLine = /^\s*$/.test(prefix);
  288. const tableText = source.slice(firstPipeIndex).trim();
  289. const tableRows = tableText
  290. .replace(/\|\s+\|/g, '|\n|')
  291. .split('\n')
  292. .map((row) => row.trim())
  293. .filter(Boolean);
  294. if (isIndentedTableLine) {
  295. return tableRows.map((row) => `${prefix}${row}`);
  296. }
  297. return [prefix.trimEnd(), ...tableRows];
  298. }
  299. function normalizeMarkdownTablesForDocx(content) {
  300. const expandedLines = String(content || '')
  301. .replace(/\r\n?/g, '\n')
  302. .split('\n')
  303. .flatMap(expandInlineMarkdownTableRows);
  304. const lines = [];
  305. for (let index = 0; index < expandedLines.length; index += 1) {
  306. const line = expandedLines[index];
  307. const nextLine = expandedLines[index + 1] || '';
  308. const compressedTableRows = expandCompressedMarkdownTableRows(line, nextLine);
  309. const startsCompressedTable = Boolean(compressedTableRows);
  310. const startsTable = isMarkdownTableRowLine(line) && isMarkdownTableDelimiterLine(nextLine);
  311. const previousLine = lines[lines.length - 1] || '';
  312. if ((startsTable || startsCompressedTable) && previousLine.trim() && !isMarkdownTableRowLine(previousLine)) {
  313. lines.push('');
  314. }
  315. if (compressedTableRows) {
  316. lines.push(...compressedTableRows);
  317. index += 1;
  318. continue;
  319. }
  320. lines.push(line);
  321. }
  322. return lines.join('\n');
  323. }
  324. function createOrderedListReference(context) {
  325. if (!context.numberingReferences) {
  326. context.numberingReferences = [];
  327. }
  328. context.numberingIndex = (context.numberingIndex || 0) + 1;
  329. const reference = `${NUMBERING_REFERENCE_PREFIX}-${context.numberingIndex}`;
  330. context.numberingReferences.push(reference);
  331. return reference;
  332. }
  333. function headingLevel(level) {
  334. if (level <= 1) return HeadingLevel.HEADING_1;
  335. if (level === 2) return HeadingLevel.HEADING_2;
  336. if (level === 3) return HeadingLevel.HEADING_3;
  337. return HeadingLevel.HEADING_4;
  338. }
  339. function imageTypeFromMime(mime) {
  340. if (!mime) return null;
  341. if (mime.includes('png')) return 'png';
  342. if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpg';
  343. if (mime.includes('gif')) return 'gif';
  344. if (mime.includes('bmp')) return 'bmp';
  345. if (mime.includes('webp')) return 'webp';
  346. return null;
  347. }
  348. function imageTypeFromPath(filePath) {
  349. const ext = path.extname(filePath || '').toLowerCase().replace('.', '');
  350. if (ext === 'jpeg') return 'jpg';
  351. return ['png', 'jpg', 'gif', 'bmp', 'webp'].includes(ext) ? ext : null;
  352. }
  353. function normalizeImageForDocx(loaded) {
  354. if (!loaded?.buffer || !loaded.type) {
  355. return loaded;
  356. }
  357. if (loaded.type !== 'webp') {
  358. return loaded;
  359. }
  360. const image = nativeImage?.createFromBuffer ? nativeImage.createFromBuffer(loaded.buffer) : null;
  361. if (!image || image.isEmpty()) {
  362. throw new Error('WebP 图片转换失败');
  363. }
  364. return { buffer: image.toPNG(), type: 'png' };
  365. }
  366. function resolveAssetImagePath(url) {
  367. if (!app?.getPath) return null;
  368. const assetUrl = new URL(url);
  369. const assetRoots = {
  370. 'generated-images': getGeneratedImagesDir(app),
  371. 'imported-images': getImportedImagesDir(app),
  372. };
  373. const rootDir = assetRoots[assetUrl.hostname];
  374. if (!rootDir) return null;
  375. const relativePath = decodeURIComponent(assetUrl.pathname.replace(/^\/+/, ''));
  376. if (!relativePath) return null;
  377. const baseDir = path.resolve(rootDir);
  378. const resolvedPath = path.resolve(baseDir, relativePath);
  379. if (resolvedPath !== baseDir && !resolvedPath.startsWith(`${baseDir}${path.sep}`)) {
  380. return null;
  381. }
  382. return resolvedPath;
  383. }
  384. async function loadImage(source, context = {}) {
  385. const url = String(source || '').trim();
  386. if (!url) return null;
  387. const dataUrlMatch = /^data:([^;,]+);base64,(.+)$/i.exec(url);
  388. if (dataUrlMatch) {
  389. return {
  390. buffer: Buffer.from(dataUrlMatch[2], 'base64'),
  391. type: imageTypeFromMime(dataUrlMatch[1]),
  392. };
  393. }
  394. if (/^yibiao-asset:\/\//i.test(url)) {
  395. const assetPath = resolveAssetImagePath(url);
  396. if (!assetPath || !fs.existsSync(assetPath)) {
  397. return null;
  398. }
  399. return {
  400. buffer: fs.readFileSync(assetPath),
  401. type: imageTypeFromPath(assetPath),
  402. };
  403. }
  404. if (/^https?:\/\//i.test(url)) {
  405. const response = await fetch(url);
  406. if (!response.ok) {
  407. throw new Error(`图片下载失败:${url}`);
  408. }
  409. const type = imageTypeFromMime(response.headers.get('content-type')) || imageTypeFromPath(new URL(url).pathname);
  410. return { buffer: Buffer.from(await response.arrayBuffer()), type };
  411. }
  412. const fileUrlPrefix = 'file://';
  413. const rawPath = url.startsWith(fileUrlPrefix) ? fileURLToPath(url) : url;
  414. const resolvedPath = path.isAbsolute(rawPath)
  415. ? rawPath
  416. : path.resolve(context.baseDir || process.cwd(), rawPath);
  417. if (!fs.existsSync(resolvedPath)) {
  418. return null;
  419. }
  420. return {
  421. buffer: fs.readFileSync(resolvedPath),
  422. type: imageTypeFromPath(resolvedPath),
  423. };
  424. }
  425. async function loadImageWithRetry(source, context = {}, options = {}) {
  426. const retryAttempts = Math.max(0, Number(options.retryAttempts) || 0);
  427. const retryDelayMs = Math.max(0, Number(options.retryDelayMs) || 0);
  428. let attempt = 0;
  429. while (attempt <= retryAttempts) {
  430. try {
  431. return await loadImage(source, context);
  432. } catch (error) {
  433. if (attempt >= retryAttempts) {
  434. throw error;
  435. }
  436. attempt += 1;
  437. if (typeof options.onRetry === 'function') {
  438. options.onRetry(attempt, error);
  439. }
  440. if (retryDelayMs > 0) {
  441. await delay(retryDelayMs);
  442. }
  443. }
  444. }
  445. return null;
  446. }
  447. async function imageRunFromNode(node, context, options = {}) {
  448. let loaded = null;
  449. const imageLabel = compactText(node.alt || node.url || '未知图片');
  450. try {
  451. loaded = await loadImageWithRetry(node.url, context, options.loadRetry);
  452. } catch (error) {
  453. const message = `图片无法导出:${imageLabel},${compactText(error.message || '下载失败', 120)}`;
  454. addWarning(context, message);
  455. return textRun(`[${message}]`, { color: 'C83220' });
  456. }
  457. if (!loaded?.buffer || !loaded.type) {
  458. const message = `图片无法导出:${imageLabel},未找到可用图片数据`;
  459. addWarning(context, message);
  460. return textRun(`[${message}]`, { color: 'C83220' });
  461. }
  462. try {
  463. loaded = normalizeImageForDocx(loaded);
  464. } catch (error) {
  465. const message = `图片无法导出:${imageLabel},${error.message || '图片格式转换失败'}`;
  466. addWarning(context, message);
  467. return textRun(`[${message}]`, { color: 'C83220' });
  468. }
  469. let size;
  470. try {
  471. size = imageSize(loaded.buffer);
  472. } catch (error) {
  473. const message = `图片无法导出:${imageLabel},图片尺寸识别失败`;
  474. addWarning(context, message);
  475. return textRun(`[${message}]`, { color: 'C83220' });
  476. }
  477. const sourceWidth = size.width || MAX_IMAGE_WIDTH;
  478. const sourceHeight = size.height || Math.round(MAX_IMAGE_WIDTH * 0.62);
  479. const ratio = Math.min(1, MAX_IMAGE_WIDTH / sourceWidth);
  480. const width = Math.round(sourceWidth * ratio);
  481. const height = Math.round(sourceHeight * ratio);
  482. return new ImageRun({
  483. type: loaded.type,
  484. data: loaded.buffer,
  485. transformation: { width, height },
  486. altText: {
  487. title: cleanText(node.alt || '图片'),
  488. description: cleanText(node.alt || node.url || 'Markdown 图片'),
  489. name: cleanText(node.alt || 'image'),
  490. },
  491. });
  492. }
  493. async function imageParagraphFromSource(source, alt, context, options = {}) {
  494. return paragraph([await imageRunFromNode({ url: source, alt }, context, options)], { alignment: AlignmentType.CENTER });
  495. }
  496. async function inlineRuns(nodes = [], context = {}, marks = {}) {
  497. const runs = [];
  498. for (const node of nodes) {
  499. if (node.type === 'text') {
  500. runs.push(...textRunsWithBreaks(node.value, marks));
  501. } else if (node.type === 'strong') {
  502. runs.push(...await inlineRuns(node.children, context, { ...marks, bold: true }));
  503. } else if (node.type === 'emphasis') {
  504. runs.push(...await inlineRuns(node.children, context, { ...marks, italics: true }));
  505. } else if (node.type === 'delete') {
  506. runs.push(...await inlineRuns(node.children, context, { ...marks, strike: true }));
  507. } else if (node.type === 'inlineCode') {
  508. runs.push(new TextRun({ text: cleanText(node.value), font: 'Consolas', size: 22, color: '155BD7' }));
  509. } else if (node.type === 'break') {
  510. runs.push(lineBreakRun());
  511. } else if (node.type === 'html' && /^<br\s*\/?\s*>$/i.test(String(node.value || '').trim())) {
  512. runs.push(lineBreakRun());
  513. } else if (node.type === 'html') {
  514. const $ = cheerio.load(String(node.value || ''), null, false);
  515. runs.push(...await htmlInlineRuns($, $.root().contents().toArray(), context, marks));
  516. } else if (node.type === 'link') {
  517. const children = await inlineRuns(node.children, context, { ...marks, color: '2174FD', underline: true });
  518. runs.push(new ExternalHyperlink({ link: node.url, children }));
  519. } else if (node.type === 'image') {
  520. runs.push(await imageRunFromNode(node, context));
  521. } else if (node.children) {
  522. runs.push(...await inlineRuns(node.children, context, marks));
  523. }
  524. }
  525. return runs;
  526. }
  527. function nodeText(node) {
  528. if (!node) return '';
  529. if (node.type === 'text' || node.type === 'inlineCode') return String(node.value || '');
  530. return (node.children || []).map(nodeText).join('');
  531. }
  532. function isImageOnlyParagraph(node) {
  533. return (node.children || []).filter((child) => child.type !== 'text' || String(child.value || '').trim()).length === 1
  534. && (node.children || []).some((child) => child.type === 'image');
  535. }
  536. function isFigureCaptionParagraph(node) {
  537. return /^图[::]/.test(nodeText(node).trim());
  538. }
  539. function htmlTagName(node) {
  540. return String(node?.name || '').toLowerCase();
  541. }
  542. function hasBlockHtmlChildren($, node) {
  543. return $(node).contents().toArray().some((child) => ['table', 'ul', 'ol', 'blockquote', 'pre', 'div', 'section', 'article', 'img'].includes(htmlTagName(child)));
  544. }
  545. async function htmlInlineRuns($, nodes = [], context = {}, marks = {}) {
  546. const runs = [];
  547. for (const node of nodes) {
  548. if (node.type === 'text') {
  549. runs.push(...textRunsWithBreaks(node.data || '', marks));
  550. continue;
  551. }
  552. if (node.type !== 'tag') {
  553. continue;
  554. }
  555. const tag = htmlTagName(node);
  556. if (tag === 'br') {
  557. runs.push(lineBreakRun());
  558. } else if (tag === 'strong' || tag === 'b') {
  559. runs.push(...await htmlInlineRuns($, $(node).contents().toArray(), context, { ...marks, bold: true }));
  560. } else if (tag === 'em' || tag === 'i') {
  561. runs.push(...await htmlInlineRuns($, $(node).contents().toArray(), context, { ...marks, italics: true }));
  562. } else if (tag === 'code') {
  563. runs.push(new TextRun({ text: cleanText($(node).text()), font: 'Consolas', size: 22, color: '155BD7' }));
  564. } else if (tag === 'a') {
  565. const href = $(node).attr('href') || '';
  566. const children = await htmlInlineRuns($, $(node).contents().toArray(), context, { ...marks, color: '2174FD', underline: true });
  567. if (href) {
  568. runs.push(new ExternalHyperlink({ link: href, children }));
  569. } else {
  570. runs.push(...children);
  571. }
  572. } else if (tag === 'img') {
  573. runs.push(await imageRunFromNode({ url: $(node).attr('src'), alt: $(node).attr('alt') || 'HTML 图片' }, context));
  574. } else {
  575. if (!['span', 'small', 'sub', 'sup'].includes(tag)) {
  576. addUnsupportedHtmlWarning(context, tag);
  577. }
  578. runs.push(...await htmlInlineRuns($, $(node).contents().toArray(), context, marks));
  579. }
  580. }
  581. return runs;
  582. }
  583. async function htmlTableToDocx($, tableNode, context) {
  584. const rows = [];
  585. const rowDescriptors = $(tableNode).find('tr').toArray().map((rowNode) => {
  586. const cells = $(rowNode).children('th,td').toArray().map((cellNode) => ({
  587. node: cellNode,
  588. columnSpan: normalizeColumnSpan($(cellNode).attr('colspan')),
  589. }));
  590. return {
  591. cells,
  592. columnCount: cells.reduce((sum, cell) => sum + cell.columnSpan, 0),
  593. };
  594. }).filter((row) => row.cells.length);
  595. const maxColumns = Math.max(1, ...rowDescriptors.map((row) => row.columnCount));
  596. for (const row of rowDescriptors) {
  597. const cells = [];
  598. for (const [cellIndex, cell] of row.cells.entries()) {
  599. const cellNode = cell.node;
  600. const isHeader = htmlTagName(cellNode) === 'th';
  601. const remainingSpan = cellIndex === row.cells.length - 1 ? maxColumns - row.columnCount : 0;
  602. cells.push(createTableCell({
  603. children: [paragraph(await htmlInlineRuns($, $(cellNode).contents().toArray(), context, { bold: isHeader }), { after: 80 })],
  604. isHeader,
  605. columnSpan: cell.columnSpan + Math.max(0, remainingSpan),
  606. totalColumns: maxColumns,
  607. }));
  608. }
  609. rows.push(new TableRow({ children: cells }));
  610. }
  611. if (!rows.length) {
  612. return [];
  613. }
  614. return [createDocxTable(rows, maxColumns)];
  615. }
  616. async function htmlListToDocx($, listNode, context, options = {}) {
  617. const blocks = [];
  618. const ordered = htmlTagName(listNode) === 'ol';
  619. const numberingReference = ordered ? createOrderedListReference(context) : null;
  620. for (const itemNode of $(listNode).children('li').toArray()) {
  621. const inlineNodes = $(itemNode).contents().toArray().filter((child) => !['ul', 'ol'].includes(htmlTagName(child)));
  622. const listOptions = ordered
  623. ? { numbering: { reference: numberingReference, level: Math.min(options.listLevel || 0, 2) } }
  624. : { bullet: { level: Math.min(options.listLevel || 0, 2) } };
  625. blocks.push(paragraph(await htmlInlineRuns($, inlineNodes, context), listOptions));
  626. for (const childList of $(itemNode).children('ul,ol').toArray()) {
  627. blocks.push(...await htmlListToDocx($, childList, context, { ...options, listLevel: (options.listLevel || 0) + 1 }));
  628. }
  629. }
  630. return blocks;
  631. }
  632. async function htmlNodeToDocxBlocks($, node, context, options = {}) {
  633. if (node.type === 'text') {
  634. const text = String(node.data || '').trim();
  635. return text ? [paragraph([textRun(text)])] : [];
  636. }
  637. if (node.type !== 'tag') {
  638. return [];
  639. }
  640. const tag = htmlTagName(node);
  641. if (tag === 'table') {
  642. return htmlTableToDocx($, node, context);
  643. }
  644. if (tag === 'img') {
  645. return [await imageParagraphFromSource($(node).attr('src'), $(node).attr('alt') || 'HTML 图片', context)];
  646. }
  647. if (tag === 'ul' || tag === 'ol') {
  648. return htmlListToDocx($, node, context, options);
  649. }
  650. if (tag === 'blockquote') {
  651. return [paragraph(await htmlInlineRuns($, $(node).contents().toArray(), context, { color: '536176' }), {
  652. indent: { left: 360 },
  653. border: { left: { style: BorderStyle.SINGLE, size: 12, color: '2174FD' } },
  654. shading: { type: ShadingType.CLEAR, fill: 'F6F9FF' },
  655. })];
  656. }
  657. if (tag === 'pre') {
  658. return [paragraph([new TextRun({ text: cleanText($(node).text()), font: 'Consolas', size: 21, color: '243048' })], {
  659. shading: { type: ShadingType.CLEAR, fill: 'F6F9FF' },
  660. indent: { left: 260, right: 260 },
  661. })];
  662. }
  663. if (tag === 'br') {
  664. return [paragraph([lineBreakRun()])];
  665. }
  666. if (['div', 'section', 'article'].includes(tag) && hasBlockHtmlChildren($, node)) {
  667. return htmlNodesToDocxBlocks($, $(node).contents().toArray(), context, options);
  668. }
  669. if (tag === 'p' && hasBlockHtmlChildren($, node)) {
  670. return htmlNodesToDocxBlocks($, $(node).contents().toArray(), context, options);
  671. }
  672. if (['p', 'div', 'section', 'article', 'span', 'strong', 'b', 'em', 'i', 'a', 'code'].includes(tag)) {
  673. return [paragraph(await htmlInlineRuns($, $(node).contents().toArray(), context), {
  674. alignment: /^图[::]/.test($(node).text().trim()) ? AlignmentType.CENTER : undefined,
  675. })];
  676. }
  677. addUnsupportedHtmlWarning(context, tag);
  678. return htmlNodesToDocxBlocks($, $(node).contents().toArray(), context, options);
  679. }
  680. async function htmlNodesToDocxBlocks($, nodes = [], context = {}, options = {}) {
  681. const blocks = [];
  682. for (const node of nodes) {
  683. blocks.push(...await htmlNodeToDocxBlocks($, node, context, options));
  684. }
  685. return blocks;
  686. }
  687. async function htmlToDocxBlocks(html, context = {}, options = {}) {
  688. const source = String(html || '').trim();
  689. if (!source) {
  690. return [];
  691. }
  692. const $ = cheerio.load(source, null, false);
  693. const blocks = await htmlNodesToDocxBlocks($, $.root().contents().toArray(), context, options);
  694. if (!blocks.length) {
  695. addWarning(context, '部分 HTML 内容未能导出,请核对 Word 内容。');
  696. }
  697. return blocks;
  698. }
  699. async function tableCellParagraphs(cell, context, isHeader = false) {
  700. const phrasingNodes = (cell.children || []).filter((child) => child.type !== 'paragraph');
  701. if (phrasingNodes.length) {
  702. return [paragraph(await inlineRuns(phrasingNodes, context, { bold: isHeader }), { after: 80 })];
  703. }
  704. const blocks = await markdownNodesToDocx(cell.children || [], context, { inTable: true });
  705. if (!blocks.length) return [paragraph([textRun('')], { after: 80 })];
  706. return blocks.filter((block) => block instanceof Paragraph);
  707. }
  708. async function markdownNodesToDocx(nodes = [], context = {}, options = {}) {
  709. const blocks = [];
  710. for (const node of nodes) {
  711. if (node.type === 'heading') {
  712. blocks.push(paragraph(await inlineRuns(node.children, context), {
  713. heading: headingLevel(node.depth),
  714. before: node.depth === 1 ? 280 : 180,
  715. after: 120,
  716. }));
  717. } else if (node.type === 'paragraph') {
  718. blocks.push(paragraph(await inlineRuns(node.children, context), {
  719. after: options.inTable ? 80 : 160,
  720. alignment: !options.inTable && (isImageOnlyParagraph(node) || isFigureCaptionParagraph(node)) ? AlignmentType.CENTER : undefined,
  721. }));
  722. } else if (node.type === 'list') {
  723. const numberingReference = node.ordered ? createOrderedListReference(context) : null;
  724. for (const item of node.children || []) {
  725. const firstParagraph = (item.children || []).find((child) => child.type === 'paragraph');
  726. const restChildren = (item.children || []).filter((child) => child !== firstParagraph);
  727. const listOptions = node.ordered
  728. ? { numbering: { reference: numberingReference, level: Math.min(options.listLevel || 0, 2) } }
  729. : { bullet: { level: Math.min(options.listLevel || 0, 2) } };
  730. blocks.push(paragraph(await inlineRuns(firstParagraph?.children || [], context), listOptions));
  731. blocks.push(...await markdownNodesToDocx(restChildren, context, { ...options, listLevel: (options.listLevel || 0) + 1 }));
  732. }
  733. } else if (node.type === 'table') {
  734. const rows = [];
  735. const maxColumns = Math.max(1, ...(node.children || []).map((row) => row.children?.length || 0));
  736. for (const [rowIndex, row] of (node.children || []).entries()) {
  737. const cells = [];
  738. const rowCells = row.children || [];
  739. for (const [cellIndex, cell] of rowCells.entries()) {
  740. const columnSpan = cellIndex === rowCells.length - 1
  741. ? Math.max(1, maxColumns - rowCells.length + 1)
  742. : 1;
  743. cells.push(createTableCell({
  744. children: await tableCellParagraphs(cell, context, rowIndex === 0),
  745. isHeader: rowIndex === 0,
  746. columnSpan,
  747. totalColumns: maxColumns,
  748. }));
  749. }
  750. rows.push(new TableRow({ children: cells }));
  751. }
  752. if (rows.length) {
  753. blocks.push(createDocxTable(rows, maxColumns));
  754. }
  755. } else if (node.type === 'blockquote') {
  756. for (const child of node.children || []) {
  757. if (child.type === 'paragraph') {
  758. blocks.push(paragraph(await inlineRuns(child.children, context, { color: '536176' }), {
  759. indent: { left: 360 },
  760. border: { left: { style: BorderStyle.SINGLE, size: 12, color: '2174FD' } },
  761. shading: { type: ShadingType.CLEAR, fill: 'F6F9FF' },
  762. }));
  763. } else {
  764. blocks.push(...await markdownNodesToDocx([child], context, options));
  765. }
  766. }
  767. } else if (node.type === 'code') {
  768. if (String(node.lang || '').toLowerCase() === 'mermaid') {
  769. const nextIndex = (context.convertedMermaidCount || 0) + 1;
  770. const total = context.stats?.mermaidCount || nextIndex;
  771. reportConversionProgress(context, `正在转换 Mermaid 图 ${nextIndex}/${total},可能需要联网等待。`);
  772. blocks.push(await imageParagraphFromSource(mermaidInkUrl(node.value), 'Mermaid 图', context, {
  773. loadRetry: {
  774. retryAttempts: MERMAID_EXPORT_RETRY_ATTEMPTS,
  775. retryDelayMs: MERMAID_EXPORT_RETRY_DELAY_MS,
  776. onRetry: (attempt) => {
  777. reportConversionProgress(context, `Mermaid 图 ${nextIndex}/${total} 转换失败,3 秒后第 ${attempt} 次重试。`);
  778. },
  779. },
  780. }));
  781. context.convertedMermaidCount = nextIndex;
  782. reportConversionProgress(context, `Mermaid 图 ${nextIndex}/${total} 已处理。`);
  783. } else {
  784. blocks.push(paragraph([new TextRun({ text: cleanText(node.value), font: 'Consolas', size: 21, color: '243048' })], {
  785. shading: { type: ShadingType.CLEAR, fill: 'F6F9FF' },
  786. indent: { left: 260, right: 260 },
  787. }));
  788. }
  789. } else if (node.type === 'html') {
  790. blocks.push(...await htmlToDocxBlocks(node.value, context, options));
  791. } else if (node.type === 'thematicBreak') {
  792. blocks.push(paragraph([textRun('────────────────────────', { color: 'DCDFF6' })], { alignment: AlignmentType.CENTER }));
  793. } else if (node.children) {
  794. blocks.push(...await markdownNodesToDocx(node.children, context, options));
  795. }
  796. }
  797. return blocks;
  798. }
  799. async function parseMarkdown(content) {
  800. const [{ unified }, remarkParse, remarkGfm] = await Promise.all([
  801. import('unified'),
  802. import('remark-parse'),
  803. import('remark-gfm'),
  804. ]);
  805. return unified().use(remarkParse.default).use(remarkGfm.default).parse(normalizeMarkdownTablesForDocx(content));
  806. }
  807. async function markdownToDocxBlocks(content, context = {}) {
  808. const tree = await parseMarkdown(content);
  809. return markdownNodesToDocx(tree.children || [], context);
  810. }
  811. async function addMarkdownContent(children, content, context) {
  812. children.push(...await markdownToDocxBlocks(content, context));
  813. }
  814. async function addOutlineItems(children, items, context, level = 1) {
  815. for (const item of items || []) {
  816. const title = `${item.id || ''} ${item.title || '未命名章节'}`.trim();
  817. children.push(paragraph([textRun(title, { bold: true })], {
  818. heading: headingLevel(level),
  819. before: level === 1 ? 320 : 200,
  820. after: 120,
  821. }));
  822. if (!item.children?.length) {
  823. if (String(item.content || '').trim()) {
  824. await addMarkdownContent(children, item.content, context);
  825. }
  826. context.convertedLeafCount = (context.convertedLeafCount || 0) + 1;
  827. reportConversionProgress(context, `已处理 ${context.convertedLeafCount}/${context.stats?.leafCount || context.convertedLeafCount} 个正文小节。`);
  828. continue;
  829. }
  830. await addOutlineItems(children, item.children, context, level + 1);
  831. }
  832. }
  833. function createNumberingConfig(context) {
  834. const references = context.numberingReferences || [];
  835. if (!references.length) {
  836. return undefined;
  837. }
  838. return {
  839. config: references.map((reference) => ({
  840. reference,
  841. levels: [0, 1, 2].map((level) => ({
  842. level,
  843. format: LevelFormat.DECIMAL,
  844. text: `%${level + 1}.`,
  845. alignment: AlignmentType.START,
  846. style: {
  847. paragraph: {
  848. indent: { left: 720 + level * 420, hanging: 260 },
  849. },
  850. },
  851. })),
  852. })),
  853. };
  854. }
  855. async function buildDocxResult(payload, options = {}) {
  856. const stats = countOutlineStats(payload.outline || []);
  857. const context = {
  858. baseDir: payload.base_dir || payload.baseDir,
  859. onProgress: options.onProgress,
  860. warnings: options.warnings || [],
  861. stats,
  862. convertedLeafCount: 0,
  863. convertedMermaidCount: 0,
  864. numberingReferences: [],
  865. numberingIndex: 0,
  866. unsupportedHtmlTags: new Set(),
  867. };
  868. const children = [
  869. paragraph([textRun('内容由 AI 生成', { italics: true, size: 18 })], { alignment: AlignmentType.CENTER, after: 120 }),
  870. paragraph([textRun(payload.project_name || '投标技术文件', { bold: true, size: 34 })], { alignment: AlignmentType.CENTER, after: 300 }),
  871. ];
  872. reportProgress(context, 10, stats.mermaidCount
  873. ? `准备导出正文,并转换 ${stats.mermaidCount} 张 Mermaid 图。`
  874. : '准备导出正文。');
  875. await addOutlineItems(children, payload.outline || [], context);
  876. reportProgress(context, 90, '正在生成 Word 文件。');
  877. const numbering = createNumberingConfig(context);
  878. const doc = new Document({
  879. ...(numbering ? { numbering } : {}),
  880. styles: {
  881. default: {
  882. document: {
  883. run: { font: '宋体', size: 24 },
  884. paragraph: { spacing: { line: 360, after: 160 } },
  885. },
  886. },
  887. },
  888. sections: [{
  889. properties: {
  890. page: {
  891. margin: { top: 1440, right: 1440, bottom: 1440, left: 1440 },
  892. },
  893. },
  894. children,
  895. }],
  896. });
  897. return { buffer: await Packer.toBuffer(doc), warnings: context.warnings, stats };
  898. }
  899. async function buildDocxBuffer(payload, options = {}) {
  900. const result = await buildDocxResult(payload, options);
  901. return result.buffer;
  902. }
  903. function createExportService() {
  904. return {
  905. async exportWord(payload = {}, onProgress) {
  906. if (!Array.isArray(payload.outline) || !payload.outline.length) {
  907. throw new Error('没有可导出的目录内容');
  908. }
  909. const stats = countOutlineStats(payload.outline || []);
  910. const progressContext = { onProgress, warnings: [], stats };
  911. reportProgress(progressContext, 2, stats.mermaidCount
  912. ? `检测到 ${stats.mermaidCount} 张 Mermaid 图,导出时会转换为 Word 图片。`
  913. : '正在准备 Word 导出。');
  914. const defaultFilename = `${sanitizeFilename(payload.project_name || '标书文档')}.docx`;
  915. const defaultDir = app?.getPath ? app.getPath('documents') : process.env.USERPROFILE || process.cwd();
  916. const result = await dialog.showSaveDialog({
  917. title: '导出 Word 文档',
  918. defaultPath: path.join(defaultDir, defaultFilename),
  919. filters: [{ name: 'Word 文档', extensions: ['docx'] }],
  920. });
  921. if (result.canceled || !result.filePath) {
  922. reportProgress(progressContext, 0, '已取消导出。', { phase: 'canceled' });
  923. return { success: false, canceled: true, message: '已取消导出' };
  924. }
  925. const warnings = [];
  926. const buildResult = await buildDocxResult(payload, { onProgress, warnings });
  927. reportProgress({ onProgress, warnings: buildResult.warnings, stats: buildResult.stats }, 96, '正在写入 Word 文件。');
  928. fs.writeFileSync(result.filePath, buildResult.buffer);
  929. const message = buildResult.warnings.length
  930. ? `Word 已导出,但有 ${buildResult.warnings.length} 处图片未能插入,请打开文档核对。`
  931. : 'Word 已导出,请打开文档核对图片、表格和版式。';
  932. reportProgress({ onProgress, warnings: buildResult.warnings, stats: buildResult.stats }, 100, message, { phase: 'success' });
  933. return { success: true, path: result.filePath, message, warnings: buildResult.warnings };
  934. },
  935. };
  936. }
  937. module.exports = {
  938. buildDocxBuffer,
  939. buildDocxResult,
  940. createExportService,
  941. };