import { renderAsync } from 'docx-preview'; import html2canvas from 'html2canvas'; import jsPDF from 'jspdf'; import { injectDocxJustifyMitigationStyles } from '../lib/docxJustifyMitigationCss'; import { injectDocxTableReflowStyles } from '../lib/docxTableReflow'; import { SHARED_DOCX_OFFICIAL_FORM_RENDER_OPTIONS } from '../lib/sharedDocxOfficialFormRenderOptions'; const PX_TO_MM = 25.4 / 96; async function waitForRenderableAssets(container: HTMLElement): Promise { const images = Array.from(container.querySelectorAll('img')); if (images.length === 0) return; await Promise.all( images.map(async (img) => { if (img.complete) return; if (typeof img.decode === 'function') { try { await img.decode(); return; } catch { // Fallback to load/error listeners below. } } await new Promise((resolve) => { const done = () => resolve(); img.addEventListener('load', done, { once: true }); img.addEventListener('error', done, { once: true }); }); }), ); } /** * Matches `docx-to-pdf-demo.html`: `.shell { max-width }` + `.preview { padding }`. * Visible preview and off-screen capture use the same numbers so line breaks and tables match. */ export const DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX = 980; export const DOCX_PDF_PREVIEW_INNER_PADDING_PX = 28; export type ConvertDocxToPdfOptions = { /** html2canvas scale. Default 2. */ renderScale?: number; /** JPEG quality 0–1 when not lossless. Default 0.95. */ imageQuality?: number; /** PNG page images in the PDF instead of JPEG. */ losslessImages?: boolean; /** Fired before `renderAsync` (docx-preview). */ onPhaseRendering?: () => void; /** Fired after layout settle, before per-page capture. */ onPhaseCapturing?: (pageCount: number) => void; /** After each page is rasterised (`current` is 1-based). */ onCaptureProgress?: (current: number, total: number) => void; /** * Extra ms after layout settles (tables/fonts). Mirrors docx-to-pdf-demo.html (120). */ layoutSettleExtraMs?: number; /** Optional callback with render-layout signals for QA/advisory UI. */ onLayoutAnalysed?: (insights: ConvertDocxToPdfLayoutInsights) => void; }; export type ConvertDocxToPdfLayoutInsights = { /** True when the rendered page appears to include absolute-positioned drawing/shape elements. */ hasFloatingShapeCandidates: boolean; /** True when Times New Roman is unavailable; capture CSS still uses a serif stack. */ appliedTimesFallbackOverride: boolean; }; function isTimesNewRomanAvailable(): boolean { if (!document.fonts || typeof document.fonts.check !== 'function') return true; try { return document.fonts.check('16px "Times New Roman"'); } catch { return true; } } /** Word-accurate typography for raster PDF: force serif stack (html2canvas often used system sans otherwise). */ function injectCaptureTypographyStyles(scope: HTMLElement): void { if (scope.querySelector('style[data-docx-capture-typography="1"]')) return; if (!scope.dataset.docxCaptureRoot) scope.dataset.docxCaptureRoot = '1'; const style = document.createElement('style'); style.setAttribute('data-docx-capture-typography', '1'); style.textContent = ` [data-docx-capture-root] .docx-wrapper, [data-docx-capture-root] .docx-wrapper * { font-family: "Times New Roman", Times, "Liberation Serif", "Noto Serif", serif !important; } `; scope.appendChild(style); } /** * docx-preview can keep « BỘ Y TẾ » bold (style / strong) and may leak italic from inherited * styles even when the OOXML run is regular. Normalize the first page letterhead in the DOM * so the rasterised PDF matches the official template: * * - « BỘ Y TẾ » → regular (400), upright * - « ĐẠI HỌC Y DƯỢC » / « THÀNH PHỐ HỒ CHÍ MINH » → bold (700), upright * * The source DOCX contains the typo « ĐẠI HỘC »; match both spellings so we keep working if * the template is ever corrected. */ function normalizeOfficialFormCoverForPdfCapture(root: HTMLElement): void { const section = root.querySelector('section.docx'); if (!section) return; const setLetterheadTypography = (el: HTMLElement, weight: '400' | '700') => { el.style.setProperty('font-weight', weight, 'important'); el.style.setProperty('font-style', 'normal', 'important'); el.querySelectorAll('*').forEach((c) => { c.style.setProperty('font-weight', weight, 'important'); c.style.setProperty('font-style', 'normal', 'important'); }); }; const hasUniversity = (line: string) => line.includes('ĐẠI HỌC Y DƯỢC') || line.includes('ĐẠI HỘC Y DƯỢC'); const isUniversityOnly = (line: string) => line === 'ĐẠI HỌC Y DƯỢC' || line === 'ĐẠI HỘC Y DƯỢC'; const paras = section.querySelectorAll('p'); for (const el of paras) { const line = (el.textContent ?? '').replace(/\s+/g, ' ').trim(); if (line === 'BỘ Y TẾ') { el.style.setProperty('text-align', 'center', 'important'); setLetterheadTypography(el, '400'); continue; } const isUniversity = (hasUniversity(line) && line.includes('THÀNH PHỐ HỒ CHÍ MINH')) || isUniversityOnly(line) || line === 'THÀNH PHỐ HỒ CHÍ MINH'; if (isUniversity) { el.style.setProperty('text-align', 'center', 'important'); setLetterheadTypography(el, '700'); } } } function hasFloatingShapeCandidates(container: HTMLElement): boolean { const obvious = container.querySelector( '.docx-drawing, [data-anchor], [data-wrap], [style*="position:absolute"]', ); if (obvious) return true; const all = Array.from(container.querySelectorAll('.docx *')); for (const el of all) { const style = window.getComputedStyle(el); if (style.position !== 'absolute') continue; if (el.querySelector('svg, canvas, img') || el.tagName.toLowerCase() === 'svg') { return true; } } return false; } /** * Creates a body-mounted host positioned off-screen so html2canvas can capture * without `display` / `visibility` / `opacity` hiding the tree (PDF_converter.md §7 Rule 2). */ export function createOffScreenDocxCaptureHost(): HTMLDivElement { const host = document.createElement('div'); host.setAttribute('aria-hidden', 'true'); host.setAttribute('inert', ''); Object.assign(host.style, { position: 'fixed', left: '-100000px', top: '0', boxSizing: 'border-box', width: `${DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX}px`, maxWidth: `${DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX}px`, padding: `${DOCX_PDF_PREVIEW_INNER_PADDING_PX}px`, pointerEvents: 'none', overflow: 'visible', backgroundColor: '#ffffff', }); document.body.appendChild(host); return host; } /** * Renders a .docx into `container`, then rasterises each `section` page to a multi-page PDF. * Clears `container.innerHTML` before rendering. The element must be attached to the document. */ export async function convertDocxToPdfBlob( source: Blob | File, container: HTMLElement, options: ConvertDocxToPdfOptions = {}, ): Promise { const renderScale = options.renderScale ?? 2; const imageQuality = options.imageQuality ?? 0.95; const losslessImages = options.losslessImages ?? false; const layoutSettleExtraMs = options.layoutSettleExtraMs ?? 120; options.onPhaseRendering?.(); container.innerHTML = ''; container.dataset.docxCaptureRoot = '1'; await renderAsync(source, container, undefined, { ...SHARED_DOCX_OFFICIAL_FORM_RENDER_OPTIONS, ignoreLastRenderedPageBreak: false, useBase64URL: true, renderHeaders: true, renderFooters: true, renderFootnotes: true, }); injectDocxTableReflowStyles(container, { pdfPreviewChrome: true }); injectCaptureTypographyStyles(container); injectDocxJustifyMitigationStyles(container); normalizeOfficialFormCoverForPdfCapture(container); const appliedTimesFallbackOverride = !isTimesNewRomanAvailable(); try { await (document.fonts?.ready ?? Promise.resolve()); } catch { /* ignore */ } await waitForRenderableAssets(container); await new Promise((r) => requestAnimationFrame(() => r())); await new Promise((r) => requestAnimationFrame(() => r())); await new Promise((r) => setTimeout(r, layoutSettleExtraMs)); normalizeOfficialFormCoverForPdfCapture(container); options.onLayoutAnalysed?.({ hasFloatingShapeCandidates: hasFloatingShapeCandidates(container), appliedTimesFallbackOverride, }); let pages = Array.from(container.querySelectorAll('section.docx')); if (pages.length === 0) { pages = Array.from(container.querySelectorAll('section')); } if (pages.length === 0) { throw new Error( 'docx-preview rendered the document but produced no
page elements.', ); } options.onPhaseCapturing?.(pages.length); const imgType = losslessImages ? 'PNG' : 'JPEG'; let pdf: jsPDF | null = null; for (let i = 0; i < pages.length; i++) { const page = pages[i]; const canvas = await html2canvas(page, { scale: renderScale, useCORS: true, backgroundColor: '#ffffff', logging: false, windowWidth: page.offsetWidth, windowHeight: page.offsetHeight, }); const cssWpx = canvas.width / renderScale; const cssHpx = canvas.height / renderScale; const pageWidthMm = cssWpx * PX_TO_MM; const pageHeightMm = cssHpx * PX_TO_MM; const imgData = canvas.toDataURL( losslessImages ? 'image/png' : 'image/jpeg', losslessImages ? undefined : imageQuality, ); if (!pdf) { pdf = new jsPDF({ orientation: pageWidthMm > pageHeightMm ? 'landscape' : 'portrait', unit: 'mm', format: [pageWidthMm, pageHeightMm], compress: true, }); } else { pdf.addPage( [pageWidthMm, pageHeightMm], pageWidthMm > pageHeightMm ? 'landscape' : 'portrait', ); } pdf.addImage(imgData, imgType, 0, 0, pageWidthMm, pageHeightMm, undefined, 'FAST'); options.onCaptureProgress?.(i + 1, pages.length); } return pdf!.output('blob'); }