fileService.cjs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. const fs = require('node:fs/promises');
  2. const path = require('node:path');
  3. const crypto = require('node:crypto');
  4. const { dialog } = require('electron');
  5. const AdmZip = require('adm-zip');
  6. const { getImportedImagesDir } = require('../utils/paths.cjs');
  7. const parserLabels = {
  8. local: '本地解析',
  9. 'mineru-accurate-api': 'MinerU 精准解析 API',
  10. 'mineru-agent-api': 'MinerU-Agent 轻量解析 API',
  11. };
  12. const localSupportedExtensions = new Set(['.txt', '.md', '.markdown', '.docx', '.pdf', '.doc', '.wps']);
  13. const mineruAgentSupportedExtensions = new Set([
  14. '.pdf', '.doc', '.docx', '.ppt', '.pptx', '.png', '.jpg', '.jpeg', '.jp2', '.webp', '.gif', '.bmp', '.xls', '.xlsx',
  15. ]);
  16. const mineruAccurateSupportedExtensions = new Set([
  17. '.pdf', '.doc', '.docx', '.ppt', '.pptx', '.png', '.jpg', '.jpeg', '.jp2', '.webp', '.gif', '.bmp', '.html',
  18. ]);
  19. const duplicateCheckSupportedExtensions = new Set(['.doc', '.docx', '.wps', '.pdf', '.md', '.markdown']);
  20. const remoteImageTimeoutMs = 10000;
  21. const markdownImagePattern = /!\[(?<alt>[^\]]*)\]\((?<target><[^>]+>|[^)\s]+)(?<title>\s+"[^"]*")?\)/gi;
  22. const htmlImageSrcPattern = /(<img\b[^>]*?\bsrc=["'])(?<src>[^"']+)(["'][^>]*>)/gi;
  23. function sleep(ms) {
  24. return new Promise((resolve) => setTimeout(resolve, ms));
  25. }
  26. function getSupportedExtensions(provider) {
  27. if (provider === 'mineru-agent-api') {
  28. return mineruAgentSupportedExtensions;
  29. }
  30. if (provider === 'mineru-accurate-api') {
  31. return mineruAccurateSupportedExtensions;
  32. }
  33. return localSupportedExtensions;
  34. }
  35. function getSelectableExtensions(provider) {
  36. if (provider === 'local') {
  37. return localSupportedExtensions;
  38. }
  39. return new Set([...getSupportedExtensions(provider), ...localSupportedExtensions]);
  40. }
  41. function resolveFileParser(config, filePath) {
  42. const requestedProvider = config.file_parser?.provider || 'local';
  43. const ext = path.extname(filePath).toLowerCase();
  44. const requestedSupported = getSupportedExtensions(requestedProvider).has(ext);
  45. if (requestedSupported) {
  46. return { provider: requestedProvider, requestedProvider, ext, supported: true, fallbackToLocal: false };
  47. }
  48. if (requestedProvider !== 'local' && localSupportedExtensions.has(ext)) {
  49. return { provider: 'local', requestedProvider, ext, supported: true, fallbackToLocal: true };
  50. }
  51. return { provider: requestedProvider, requestedProvider, ext, supported: false, fallbackToLocal: false };
  52. }
  53. async function parseLocalDocument(filePath, options = {}) {
  54. const ext = path.extname(filePath).toLowerCase();
  55. if (ext === '.txt') {
  56. return fs.readFile(filePath, 'utf-8');
  57. }
  58. const { convertPathToMarkdown } = await import('./doc2markdown/convert.mjs');
  59. return convertPathToMarkdown(filePath, {
  60. includeImages: options.preserveImages,
  61. imageResolver: options.imageResolver,
  62. });
  63. }
  64. function formatImportError(error) {
  65. const rawMessage = error instanceof Error ? error.message : String(error || '未知错误');
  66. if (/Can't find end of central directory|is this a zip file/i.test(rawMessage)) {
  67. return '文件解析失败:该文件不是有效的 DOCX 文档,请用 Word/WPS 另存为标准 DOCX 后重试';
  68. }
  69. return `文件解析失败:${rawMessage || '未知错误'}`;
  70. }
  71. async function parseWithMineruAgent(filePath, options = {}) {
  72. const fileName = path.basename(filePath);
  73. const createResponse = await fetch('https://mineru.net/api/v1/agent/parse/file', {
  74. method: 'POST',
  75. headers: { 'Content-Type': 'application/json' },
  76. body: JSON.stringify({
  77. file_name: fileName,
  78. language: 'ch',
  79. enable_table: true,
  80. is_ocr: true,
  81. enable_formula: true,
  82. }),
  83. });
  84. const createResult = await createResponse.json();
  85. if (!createResponse.ok || createResult.code !== 0) {
  86. throw new Error(`申请 MinerU-Agent 上传链接失败:HTTP ${createResponse.status},${JSON.stringify(createResult)}`);
  87. }
  88. const taskId = createResult.data?.task_id;
  89. const fileUrl = createResult.data?.file_url;
  90. if (!taskId || !fileUrl) {
  91. throw new Error(`MinerU-Agent 响应缺少 task_id/file_url:${JSON.stringify(createResult)}`);
  92. }
  93. await uploadFile(fileUrl, filePath);
  94. const finalResult = await pollMineruAgent(taskId, fileName);
  95. const markdownUrl = finalResult.data.markdown_url;
  96. if (!markdownUrl) {
  97. throw new Error('MinerU-Agent 解析完成但未返回 markdown_url');
  98. }
  99. return downloadText(markdownUrl, '下载 MinerU-Agent Markdown 失败').then((markdown) => (
  100. options.preserveImages
  101. ? rewriteMarkdownImages(markdown, options.assets, { baseUrl: markdownUrl })
  102. : stripMarkdownImages(markdown)
  103. ));
  104. }
  105. async function pollMineruAgent(taskId, fileName) {
  106. const startedAt = Date.now();
  107. const timeoutMs = 300000;
  108. const intervalMs = 3000;
  109. while (Date.now() - startedAt < timeoutMs) {
  110. const response = await fetch(`https://mineru.net/api/v1/agent/parse/${taskId}`);
  111. const result = await response.json();
  112. if (!response.ok || result.code !== 0) {
  113. throw new Error(`查询 MinerU-Agent 任务失败:HTTP ${response.status},${JSON.stringify(result)}`);
  114. }
  115. const data = result.data || {};
  116. if (data.state === 'done') {
  117. return { raw: result, data };
  118. }
  119. if (data.state === 'failed') {
  120. throw new Error(`MinerU-Agent 解析失败:${data.err_msg || '未知错误'}${data.err_code ? ` (${data.err_code})` : ''}`);
  121. }
  122. console.log(`WAIT ${fileName}: ${data.state || 'unknown'}`);
  123. await sleep(intervalMs);
  124. }
  125. throw new Error(`MinerU-Agent 轮询超时,请稍后重试,task_id: ${taskId}`);
  126. }
  127. async function parseWithMineruAccurate(filePath, token, options = {}) {
  128. if (!token) {
  129. throw new Error('请先在设置中填写 MinerU Token');
  130. }
  131. const fileName = path.basename(filePath);
  132. const createResponse = await fetch('https://mineru.net/api/v4/file-urls/batch', {
  133. method: 'POST',
  134. headers: {
  135. Authorization: `Bearer ${token}`,
  136. 'Content-Type': 'application/json',
  137. },
  138. body: JSON.stringify({
  139. files: [{ name: fileName, data_id: makeDataId(fileName), is_ocr: true }],
  140. model_version: 'vlm',
  141. language: 'ch',
  142. enable_table: true,
  143. enable_formula: true,
  144. }),
  145. });
  146. const createResult = await createResponse.json();
  147. if (!createResponse.ok || createResult.code !== 0) {
  148. throw new Error(`申请 MinerU 精准解析上传链接失败:HTTP ${createResponse.status},${JSON.stringify(createResult)}`);
  149. }
  150. const batchId = createResult.data?.batch_id;
  151. const fileUrl = createResult.data?.file_urls?.[0];
  152. if (!batchId || !fileUrl) {
  153. throw new Error(`MinerU 精准解析响应缺少 batch_id/file_url:${JSON.stringify(createResult)}`);
  154. }
  155. await uploadFile(fileUrl, filePath);
  156. const finalResult = await pollMineruAccurate(token, batchId, fileName);
  157. const fullZipUrl = finalResult.item.full_zip_url;
  158. if (!fullZipUrl) {
  159. throw new Error('MinerU 精准解析完成但未返回 full_zip_url');
  160. }
  161. const zipBuffer = await downloadBuffer(fullZipUrl);
  162. return extractMarkdownFromZip(zipBuffer, options);
  163. }
  164. async function pollMineruAccurate(token, batchId, fileName) {
  165. const startedAt = Date.now();
  166. const timeoutMs = 600000;
  167. const intervalMs = 5000;
  168. while (Date.now() - startedAt < timeoutMs) {
  169. const response = await fetch(`https://mineru.net/api/v4/extract-results/batch/${batchId}`, {
  170. headers: { Authorization: `Bearer ${token}`, Accept: '*/*' },
  171. });
  172. const result = await response.json();
  173. if (!response.ok || result.code !== 0) {
  174. throw new Error(`查询 MinerU 精准解析任务失败:HTTP ${response.status},${JSON.stringify(result)}`);
  175. }
  176. const items = result.data?.extract_result || [];
  177. const item = items.find((candidate) => candidate.file_name === fileName) || items[0];
  178. if (item?.state === 'done') {
  179. return { raw: result, item };
  180. }
  181. if (item?.state === 'failed') {
  182. throw new Error(`MinerU 精准解析失败:${item.err_msg || '未知错误'}`);
  183. }
  184. console.log(`WAIT ${fileName}: ${item?.state || 'unknown'}`);
  185. await sleep(intervalMs);
  186. }
  187. throw new Error(`MinerU 精准解析轮询超时,请稍后重试,batch_id: ${batchId}`);
  188. }
  189. async function uploadFile(fileUrl, filePath) {
  190. const buffer = await fs.readFile(filePath);
  191. const response = await fetch(fileUrl, { method: 'PUT', body: buffer });
  192. if (!response.ok) {
  193. throw new Error(`文件上传失败:HTTP ${response.status},${await response.text()}`);
  194. }
  195. }
  196. async function downloadText(url, fallbackMessage) {
  197. const response = await fetch(url);
  198. if (!response.ok) {
  199. throw new Error(`${fallbackMessage}:HTTP ${response.status}`);
  200. }
  201. return response.text();
  202. }
  203. async function downloadBuffer(url) {
  204. const response = await fetch(url);
  205. if (!response.ok) {
  206. throw new Error(`下载 MinerU 精准解析结果失败:HTTP ${response.status}`);
  207. }
  208. return Buffer.from(await response.arrayBuffer());
  209. }
  210. async function extractMarkdownFromZip(zipBuffer, options = {}) {
  211. const zip = new AdmZip(zipBuffer);
  212. const entries = zip.getEntries();
  213. const fullMd = entries.find((entry) => /(^|[/\\])full\.md$/i.test(entry.entryName));
  214. const anyMd = entries.find((entry) => entry.entryName.toLowerCase().endsWith('.md'));
  215. const target = fullMd || anyMd;
  216. if (!target) {
  217. throw new Error('MinerU 精准解析结果 zip 中未找到 Markdown 文件');
  218. }
  219. const markdown = target.getData().toString('utf8');
  220. if (!options.preserveImages) {
  221. return stripMarkdownImages(markdown);
  222. }
  223. return rewriteMarkdownImages(markdown, options.assets, {
  224. zipEntries: entries,
  225. markdownEntryName: target.entryName,
  226. });
  227. }
  228. function makeDataId(fileName) {
  229. return fileName.replace(/[^A-Za-z0-9_.-]+/g, '_').slice(0, 96) || 'document';
  230. }
  231. async function createLocalFileSelection(filePath) {
  232. const stats = await fs.stat(filePath);
  233. const extension = path.extname(filePath).toLowerCase();
  234. return {
  235. id: crypto.createHash('sha1').update(filePath).digest('hex'),
  236. file_name: path.basename(filePath),
  237. file_path: filePath,
  238. extension,
  239. size: stats.size,
  240. modified_at: stats.mtime.toISOString(),
  241. };
  242. }
  243. function stripMarkdownImages(text) {
  244. return String(text || '')
  245. .replace(markdownImagePattern, '')
  246. .replace(/<img\b[^>]*>/gi, '')
  247. .replace(/\n{3,}/g, '\n\n');
  248. }
  249. function createAssetContext(app, scope = 'documents') {
  250. if (!app?.getPath) return null;
  251. const safeScope = String(scope || 'documents').replace(/[^A-Za-z0-9._-]+/g, '_') || 'documents';
  252. const batchId = `${safeScope}-${Date.now()}-${crypto.randomUUID().slice(0, 8)}`;
  253. return {
  254. baseDir: path.join(getImportedImagesDir(app), batchId),
  255. urlPrefix: `yibiao-asset://imported-images/${encodeURIComponent(batchId)}`,
  256. index: 0,
  257. };
  258. }
  259. async function deleteImportedImageAssets(assets) {
  260. if (!assets?.baseDir) return;
  261. await fs.rm(assets.baseDir, { recursive: true, force: true });
  262. }
  263. function imageExtensionFromMime(mime) {
  264. const normalized = String(mime || '').toLowerCase();
  265. if (normalized.includes('jpeg') || normalized.includes('jpg')) return '.jpg';
  266. if (normalized.includes('png')) return '.png';
  267. if (normalized.includes('gif')) return '.gif';
  268. if (normalized.includes('bmp')) return '.bmp';
  269. if (normalized.includes('webp')) return '.webp';
  270. return '';
  271. }
  272. function imageExtensionFromPath(value) {
  273. const ext = path.extname(String(value || '').split(/[?#]/)[0]).toLowerCase();
  274. return ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'].includes(ext) ? (ext === '.jpeg' ? '.jpg' : ext) : '';
  275. }
  276. async function saveImportedImage(assets, buffer, sourceName, mime) {
  277. if (!assets || !buffer?.length) return null;
  278. const ext = imageExtensionFromMime(mime) || imageExtensionFromPath(sourceName) || '.png';
  279. assets.index += 1;
  280. const fileName = `image-${String(assets.index).padStart(4, '0')}${ext}`;
  281. await fs.mkdir(assets.baseDir, { recursive: true });
  282. await fs.writeFile(path.join(assets.baseDir, fileName), buffer);
  283. return `${assets.urlPrefix}/${encodeURIComponent(fileName)}`;
  284. }
  285. function createImageResolver(assets) {
  286. if (!assets) return null;
  287. return ({ buffer, mime, sourceName }) => saveImportedImage(assets, Buffer.isBuffer(buffer) ? buffer : Buffer.from(buffer), sourceName, mime);
  288. }
  289. function cleanMarkdownImageTarget(target) {
  290. const value = String(target || '').trim();
  291. return value.startsWith('<') && value.endsWith('>') ? value.slice(1, -1) : value;
  292. }
  293. function parseDataUrl(value) {
  294. const match = /^data:([^;,]+);base64,(.+)$/i.exec(String(value || ''));
  295. if (!match) return null;
  296. return { mime: match[1], buffer: Buffer.from(match[2], 'base64') };
  297. }
  298. async function loadRemoteImage(url) {
  299. const controller = new AbortController();
  300. const timeout = setTimeout(() => controller.abort(), remoteImageTimeoutMs);
  301. try {
  302. const response = await fetch(url, { signal: controller.signal });
  303. if (!response.ok) return null;
  304. const contentType = response.headers.get('content-type') || '';
  305. if (contentType && !/^image\//i.test(contentType)) return null;
  306. return { buffer: Buffer.from(await response.arrayBuffer()), mime: contentType };
  307. } finally {
  308. clearTimeout(timeout);
  309. }
  310. }
  311. function findZipEntryImage(zipEntries, imagePath, markdownEntryName) {
  312. let decodedPath = imagePath;
  313. try {
  314. decodedPath = decodeURIComponent(imagePath);
  315. } catch {
  316. decodedPath = imagePath;
  317. }
  318. const normalized = decodedPath.replace(/\\/g, '/').replace(/^\.\//, '');
  319. const markdownDir = path.posix.dirname(String(markdownEntryName || '').replace(/\\/g, '/'));
  320. const candidates = [
  321. normalized,
  322. path.posix.normalize(path.posix.join(markdownDir === '.' ? '' : markdownDir, normalized)),
  323. ].map((item) => item.replace(/^\/+/, '').toLowerCase());
  324. const direct = zipEntries.find((entry) => candidates.includes(entry.entryName.replace(/\\/g, '/').replace(/^\/+/, '').toLowerCase()));
  325. if (direct) return direct;
  326. const basename = path.posix.basename(normalized).toLowerCase();
  327. return zipEntries.find((entry) => path.posix.basename(entry.entryName.replace(/\\/g, '/')).toLowerCase() === basename);
  328. }
  329. function isPathInsideDirectory(baseDir, targetPath) {
  330. const relative = path.relative(baseDir, targetPath);
  331. return relative === '' || (relative && !relative.startsWith('..') && !path.isAbsolute(relative));
  332. }
  333. async function resolveImageToAssetUrl(source, assets, context = {}) {
  334. const value = cleanMarkdownImageTarget(source);
  335. if (!value) return null;
  336. if (/^yibiao-asset:\/\//i.test(value)) return value;
  337. const data = parseDataUrl(value);
  338. if (data) {
  339. return saveImportedImage(assets, data.buffer, 'data-image', data.mime);
  340. }
  341. if (/^https?:\/\//i.test(value) || context.baseUrl) {
  342. try {
  343. const url = /^https?:\/\//i.test(value) ? value : new URL(value, context.baseUrl).toString();
  344. const loaded = await loadRemoteImage(url);
  345. if (loaded) {
  346. return saveImportedImage(assets, loaded.buffer, url, loaded.mime);
  347. }
  348. } catch {
  349. return null;
  350. }
  351. }
  352. if (context.zipEntries) {
  353. const entry = findZipEntryImage(context.zipEntries, value, context.markdownEntryName);
  354. if (entry && !entry.isDirectory) {
  355. return saveImportedImage(assets, entry.getData(), entry.entryName, '');
  356. }
  357. }
  358. if (context.localBaseDir && !/^[a-z][a-z0-9+.-]*:/i.test(value)) {
  359. try {
  360. let decodedValue = value;
  361. try {
  362. decodedValue = decodeURIComponent(value);
  363. } catch {
  364. decodedValue = value;
  365. }
  366. if (path.isAbsolute(decodedValue)) {
  367. return null;
  368. }
  369. const baseDir = path.resolve(context.localBaseDir);
  370. const localPath = path.resolve(baseDir, decodedValue);
  371. if (!isPathInsideDirectory(baseDir, localPath)) {
  372. return null;
  373. }
  374. const buffer = await fs.readFile(localPath);
  375. return saveImportedImage(assets, buffer, localPath, '');
  376. } catch {
  377. return null;
  378. }
  379. }
  380. return null;
  381. }
  382. async function rewriteMarkdownImages(markdown, assets, context = {}) {
  383. let result = await replaceMatchesAsync(String(markdown || ''), markdownImagePattern, async (match) => {
  384. const nextUrl = await resolveImageToAssetUrl(match.groups?.target || '', assets, context);
  385. const alt = match.groups?.alt || '';
  386. const title = match.groups?.title || '';
  387. return nextUrl ? `![${alt}](${nextUrl}${title})` : '';
  388. });
  389. result = await replaceMatchesAsync(result, htmlImageSrcPattern, async (match) => {
  390. const nextUrl = await resolveImageToAssetUrl(match.groups?.src || '', assets, context);
  391. return nextUrl ? `${match[1]}${nextUrl}${match[3]}` : '';
  392. });
  393. return result;
  394. }
  395. async function replaceMatchesAsync(text, pattern, createReplacement) {
  396. const matches = [...String(text || '').matchAll(pattern)];
  397. if (!matches.length) return text;
  398. const parts = [];
  399. let lastIndex = 0;
  400. for (const match of matches) {
  401. const index = match.index ?? 0;
  402. parts.push(text.slice(lastIndex, index));
  403. parts.push(await createReplacement(match));
  404. lastIndex = index + match[0].length;
  405. }
  406. parts.push(text.slice(lastIndex));
  407. return parts.join('');
  408. }
  409. async function parseDocumentWithConfig(app, filePath, config, options = {}) {
  410. const parser = resolveFileParser(config, filePath);
  411. if (!parser.supported) {
  412. throw new Error(`当前${parserLabels[parser.requestedProvider] || '解析方式'}不支持该文件格式`);
  413. }
  414. const provider = parser.provider;
  415. const preserveImages = options.preserveImages === true;
  416. const assets = preserveImages ? createAssetContext(app, options.assetScope || 'documents') : null;
  417. const parseOptions = { preserveImages, assets, imageResolver: createImageResolver(assets) };
  418. let markdown = '';
  419. try {
  420. if (provider === 'mineru-agent-api') {
  421. markdown = await parseWithMineruAgent(filePath, parseOptions);
  422. } else if (provider === 'mineru-accurate-api') {
  423. markdown = await parseWithMineruAccurate(filePath, config.file_parser?.mineru_token || '', parseOptions);
  424. } else {
  425. markdown = await parseLocalDocument(filePath, parseOptions);
  426. markdown = preserveImages ? await rewriteMarkdownImages(markdown, assets, { localBaseDir: path.dirname(filePath) }) : stripMarkdownImages(markdown);
  427. }
  428. } catch (error) {
  429. await deleteImportedImageAssets(assets).catch(() => undefined);
  430. throw error;
  431. }
  432. return preserveImages ? markdown : stripMarkdownImages(markdown);
  433. }
  434. function createFileService({ app, configStore } = {}) {
  435. return {
  436. async importDocument() {
  437. const config = configStore ? configStore.load() : { file_parser: { provider: 'local' } };
  438. const provider = config.file_parser?.provider || 'local';
  439. const supportedExtensions = getSelectableExtensions(provider);
  440. const result = await dialog.showOpenDialog({
  441. title: '选择招标文件',
  442. properties: ['openFile'],
  443. filters: [
  444. { name: parserLabels[provider] || '招标文件', extensions: [...supportedExtensions].map((item) => item.slice(1)) },
  445. { name: '所有文件', extensions: ['*'] },
  446. ],
  447. });
  448. if (result.canceled || result.filePaths.length === 0) {
  449. return { success: false, message: '已取消选择' };
  450. }
  451. const filePath = result.filePaths[0];
  452. const ext = path.extname(filePath).toLowerCase();
  453. const parser = resolveFileParser(config, filePath);
  454. if (!supportedExtensions.has(ext)) {
  455. return { success: false, message: `当前${parserLabels[provider] || '解析方式'}不支持该文件格式` };
  456. }
  457. let fileContent = '';
  458. try {
  459. fileContent = (await parseDocumentWithConfig(app, filePath, config, { assetScope: 'technical-plan', preserveImages: false })).trim();
  460. } catch (error) {
  461. return {
  462. success: false,
  463. message: formatImportError(error),
  464. file_name: path.basename(filePath),
  465. parser_provider: parser.provider,
  466. parser_label: parserLabels[parser.provider] || '本地解析',
  467. };
  468. }
  469. if (!fileContent) {
  470. return { success: false, message: '未提取到有效 Markdown 内容,请检查文件内容' };
  471. }
  472. return {
  473. success: true,
  474. message: parser.fallbackToLocal ? '文件解析完成,当前格式已自动使用本地解析' : '文件解析完成',
  475. file_content: fileContent,
  476. file_name: path.basename(filePath),
  477. parser_provider: parser.provider,
  478. parser_label: parserLabels[parser.provider] || '本地解析',
  479. };
  480. },
  481. async selectDuplicateCheckFiles(options = {}) {
  482. const multiple = options?.multiple !== false;
  483. const result = await dialog.showOpenDialog({
  484. title: multiple ? '选择投标文件' : '选择招标文件',
  485. properties: multiple ? ['openFile', 'multiSelections'] : ['openFile'],
  486. filters: [
  487. { name: '标书文档', extensions: [...duplicateCheckSupportedExtensions].map((item) => item.slice(1)) },
  488. { name: '所有文件', extensions: ['*'] },
  489. ],
  490. });
  491. if (result.canceled || result.filePaths.length === 0) {
  492. return { success: false, message: '已取消选择', files: [] };
  493. }
  494. const supportedPaths = result.filePaths.filter((filePath) => duplicateCheckSupportedExtensions.has(path.extname(filePath).toLowerCase()));
  495. if (!supportedPaths.length) {
  496. return { success: false, message: '未选择支持的文件类型', files: [] };
  497. }
  498. const files = await Promise.all(supportedPaths.map(createLocalFileSelection));
  499. return {
  500. success: true,
  501. message: `已选择 ${files.length} 个文件`,
  502. files,
  503. };
  504. },
  505. };
  506. }
  507. module.exports = {
  508. createFileService,
  509. parseDocumentWithConfig,
  510. resolveFileParser,
  511. };