sciagent code + Gitea Actions CI/CD
CI/CD / backend (push) Failing after 2m8s
CI/CD / frontend (push) Failing after 1m40s
CI/CD / deploy (push) Has been skipped

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Thinh Lam
2026-06-30 09:38:30 +07:00
commit 688fac73e9
1167 changed files with 158244 additions and 0 deletions
@@ -0,0 +1,288 @@
import { renderAsync } from 'docx-preview';
import html2canvas from 'html2canvas';
import jsPDF from 'jspdf';
import { injectDocxJustifyMitigationStyles } from '../lib/docxJustifyMitigationCss';
import { injectDocxTableReflowStyles } from '../lib/docxTableReflow';
import { SHARED_DOCX_OFFICIAL_FORM_RENDER_OPTIONS } from '../lib/sharedDocxOfficialFormRenderOptions';
const PX_TO_MM = 25.4 / 96;
async function waitForRenderableAssets(container: HTMLElement): Promise<void> {
const images = Array.from(container.querySelectorAll<HTMLImageElement>('img'));
if (images.length === 0) return;
await Promise.all(
images.map(async (img) => {
if (img.complete) return;
if (typeof img.decode === 'function') {
try {
await img.decode();
return;
} catch {
// Fallback to load/error listeners below.
}
}
await new Promise<void>((resolve) => {
const done = () => resolve();
img.addEventListener('load', done, { once: true });
img.addEventListener('error', done, { once: true });
});
}),
);
}
/**
* Matches `docx-to-pdf-demo.html`: `.shell { max-width }` + `.preview { padding }`.
* Visible preview and off-screen capture use the same numbers so line breaks and tables match.
*/
export const DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX = 980;
export const DOCX_PDF_PREVIEW_INNER_PADDING_PX = 28;
export type ConvertDocxToPdfOptions = {
/** html2canvas scale. Default 2. */
renderScale?: number;
/** JPEG quality 01 when not lossless. Default 0.95. */
imageQuality?: number;
/** PNG page images in the PDF instead of JPEG. */
losslessImages?: boolean;
/** Fired before `renderAsync` (docx-preview). */
onPhaseRendering?: () => void;
/** Fired after layout settle, before per-page capture. */
onPhaseCapturing?: (pageCount: number) => void;
/** After each page is rasterised (`current` is 1-based). */
onCaptureProgress?: (current: number, total: number) => void;
/**
* Extra ms after layout settles (tables/fonts). Mirrors docx-to-pdf-demo.html (120).
*/
layoutSettleExtraMs?: number;
/** Optional callback with render-layout signals for QA/advisory UI. */
onLayoutAnalysed?: (insights: ConvertDocxToPdfLayoutInsights) => void;
};
export type ConvertDocxToPdfLayoutInsights = {
/** True when the rendered page appears to include absolute-positioned drawing/shape elements. */
hasFloatingShapeCandidates: boolean;
/** True when Times New Roman is unavailable; capture CSS still uses a serif stack. */
appliedTimesFallbackOverride: boolean;
};
function isTimesNewRomanAvailable(): boolean {
if (!document.fonts || typeof document.fonts.check !== 'function') return true;
try {
return document.fonts.check('16px "Times New Roman"');
} catch {
return true;
}
}
/** Word-accurate typography for raster PDF: force serif stack (html2canvas often used system sans otherwise). */
function injectCaptureTypographyStyles(scope: HTMLElement): void {
if (scope.querySelector('style[data-docx-capture-typography="1"]')) return;
if (!scope.dataset.docxCaptureRoot) scope.dataset.docxCaptureRoot = '1';
const style = document.createElement('style');
style.setAttribute('data-docx-capture-typography', '1');
style.textContent = `
[data-docx-capture-root] .docx-wrapper,
[data-docx-capture-root] .docx-wrapper * {
font-family: "Times New Roman", Times, "Liberation Serif", "Noto Serif", serif !important;
}
`;
scope.appendChild(style);
}
/**
* docx-preview can keep « BỘ Y TẾ » bold (style / strong) and may leak italic from inherited
* styles even when the OOXML run is regular. Normalize the first page letterhead in the DOM
* so the rasterised PDF matches the official template:
*
* - « BỘ Y TẾ » → regular (400), upright
* - « ĐẠI HỌC Y DƯỢC » / « THÀNH PHỐ HỒ CHÍ MINH » → bold (700), upright
*
* The source DOCX contains the typo « ĐẠI HỘC »; match both spellings so we keep working if
* the template is ever corrected.
*/
function normalizeOfficialFormCoverForPdfCapture(root: HTMLElement): void {
const section = root.querySelector<HTMLElement>('section.docx');
if (!section) return;
const setLetterheadTypography = (el: HTMLElement, weight: '400' | '700') => {
el.style.setProperty('font-weight', weight, 'important');
el.style.setProperty('font-style', 'normal', 'important');
el.querySelectorAll<HTMLElement>('*').forEach((c) => {
c.style.setProperty('font-weight', weight, 'important');
c.style.setProperty('font-style', 'normal', 'important');
});
};
const hasUniversity = (line: string) =>
line.includes('ĐẠI HỌC Y DƯỢC') || line.includes('ĐẠI HỘC Y DƯỢC');
const isUniversityOnly = (line: string) =>
line === 'ĐẠI HỌC Y DƯỢC' || line === 'ĐẠI HỘC Y DƯỢC';
const paras = section.querySelectorAll<HTMLElement>('p');
for (const el of paras) {
const line = (el.textContent ?? '').replace(/\s+/g, ' ').trim();
if (line === 'BỘ Y TẾ') {
el.style.setProperty('text-align', 'center', 'important');
setLetterheadTypography(el, '400');
continue;
}
const isUniversity =
(hasUniversity(line) && line.includes('THÀNH PHỐ HỒ CHÍ MINH')) ||
isUniversityOnly(line) ||
line === 'THÀNH PHỐ HỒ CHÍ MINH';
if (isUniversity) {
el.style.setProperty('text-align', 'center', 'important');
setLetterheadTypography(el, '700');
}
}
}
function hasFloatingShapeCandidates(container: HTMLElement): boolean {
const obvious = container.querySelector(
'.docx-drawing, [data-anchor], [data-wrap], [style*="position:absolute"]',
);
if (obvious) return true;
const all = Array.from(container.querySelectorAll<HTMLElement>('.docx *'));
for (const el of all) {
const style = window.getComputedStyle(el);
if (style.position !== 'absolute') continue;
if (el.querySelector('svg, canvas, img') || el.tagName.toLowerCase() === 'svg') {
return true;
}
}
return false;
}
/**
* Creates a body-mounted host positioned off-screen so html2canvas can capture
* without `display` / `visibility` / `opacity` hiding the tree (PDF_converter.md §7 Rule 2).
*/
export function createOffScreenDocxCaptureHost(): HTMLDivElement {
const host = document.createElement('div');
host.setAttribute('aria-hidden', 'true');
host.setAttribute('inert', '');
Object.assign(host.style, {
position: 'fixed',
left: '-100000px',
top: '0',
boxSizing: 'border-box',
width: `${DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX}px`,
maxWidth: `${DOCX_PDF_PREVIEW_SHELL_MAX_WIDTH_PX}px`,
padding: `${DOCX_PDF_PREVIEW_INNER_PADDING_PX}px`,
pointerEvents: 'none',
overflow: 'visible',
backgroundColor: '#ffffff',
});
document.body.appendChild(host);
return host;
}
/**
* Renders a .docx into `container`, then rasterises each `section` page to a multi-page PDF.
* Clears `container.innerHTML` before rendering. The element must be attached to the document.
*/
export async function convertDocxToPdfBlob(
source: Blob | File,
container: HTMLElement,
options: ConvertDocxToPdfOptions = {},
): Promise<Blob> {
const renderScale = options.renderScale ?? 2;
const imageQuality = options.imageQuality ?? 0.95;
const losslessImages = options.losslessImages ?? false;
const layoutSettleExtraMs = options.layoutSettleExtraMs ?? 120;
options.onPhaseRendering?.();
container.innerHTML = '';
container.dataset.docxCaptureRoot = '1';
await renderAsync(source, container, undefined, {
...SHARED_DOCX_OFFICIAL_FORM_RENDER_OPTIONS,
ignoreLastRenderedPageBreak: false,
useBase64URL: true,
renderHeaders: true,
renderFooters: true,
renderFootnotes: true,
});
injectDocxTableReflowStyles(container, { pdfPreviewChrome: true });
injectCaptureTypographyStyles(container);
injectDocxJustifyMitigationStyles(container);
normalizeOfficialFormCoverForPdfCapture(container);
const appliedTimesFallbackOverride = !isTimesNewRomanAvailable();
try {
await (document.fonts?.ready ?? Promise.resolve());
} catch {
/* ignore */
}
await waitForRenderableAssets(container);
await new Promise<void>((r) => requestAnimationFrame(() => r()));
await new Promise<void>((r) => requestAnimationFrame(() => r()));
await new Promise<void>((r) => setTimeout(r, layoutSettleExtraMs));
normalizeOfficialFormCoverForPdfCapture(container);
options.onLayoutAnalysed?.({
hasFloatingShapeCandidates: hasFloatingShapeCandidates(container),
appliedTimesFallbackOverride,
});
let pages = Array.from(container.querySelectorAll<HTMLElement>('section.docx'));
if (pages.length === 0) {
pages = Array.from(container.querySelectorAll<HTMLElement>('section'));
}
if (pages.length === 0) {
throw new Error(
'docx-preview rendered the document but produced no <section> page elements.',
);
}
options.onPhaseCapturing?.(pages.length);
const imgType = losslessImages ? 'PNG' : 'JPEG';
let pdf: jsPDF | null = null;
for (let i = 0; i < pages.length; i++) {
const page = pages[i];
const canvas = await html2canvas(page, {
scale: renderScale,
useCORS: true,
backgroundColor: '#ffffff',
logging: false,
windowWidth: page.offsetWidth,
windowHeight: page.offsetHeight,
});
const cssWpx = canvas.width / renderScale;
const cssHpx = canvas.height / renderScale;
const pageWidthMm = cssWpx * PX_TO_MM;
const pageHeightMm = cssHpx * PX_TO_MM;
const imgData = canvas.toDataURL(
losslessImages ? 'image/png' : 'image/jpeg',
losslessImages ? undefined : imageQuality,
);
if (!pdf) {
pdf = new jsPDF({
orientation: pageWidthMm > pageHeightMm ? 'landscape' : 'portrait',
unit: 'mm',
format: [pageWidthMm, pageHeightMm],
compress: true,
});
} else {
pdf.addPage(
[pageWidthMm, pageHeightMm],
pageWidthMm > pageHeightMm ? 'landscape' : 'portrait',
);
}
pdf.addImage(imgData, imgType, 0, 0, pageWidthMm, pageHeightMm, undefined, 'FAST');
options.onCaptureProgress?.(i + 1, pages.length);
}
return pdf!.output('blob');
}