sciagent code + Gitea Actions CI/CD

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-30 09:38:30 +07:00
commit 688fac73e9
1167 changed files with 158244 additions and 0 deletions
@@ -0,0 +1,221 @@
+#!/usr/bin/env python3
+"""
+Build Word template from original form file by replacing dots-lines ("..........")
+với placeholders {{xxx}} theo vị trí label trong document.
+
+Usage:
+    python scripts/build-word-template.py
+
+Input:  src/Backend/DYD.Api/Templates/bao-cao-template-original.docx
+Output: src/Backend/DYD.Api/Templates/bao-cao-template.docx
+"""
+
+import re
+import shutil
+import sys
+import tempfile
+import xml.etree.ElementTree as ET
+import zipfile
+from pathlib import Path
+
+NS_URI = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+W = f'{{{NS_URI}}}'
+ET.register_namespace('w', NS_URI)
+
+# Paragraph index → placeholder token (after dot-line replacement).
+# Determined by running script once and printing all paragraphs, then mapping manually.
+# Key = paragraph index (in document iteration order)
+# Value = placeholder name ({{VALUE}} will be inserted if current paragraph is dots-line
+#         OR appended to preceding label if current paragraph IS the label itself)
+
+# For cover page (no dots after labels), we append placeholder AT END of label paragraph
+# Format: label_text_regex → placeholder (inject at end of paragraph)
+COVER_LABEL_APPEND = [
+    (re.compile(r'^Tên sáng kiến \(Tiếng Việt\):\s*$'), 'coverInitiativeName'),
+    (re.compile(r'^Tác giả/nhóm tác giả sáng kiến:\s*$'), 'coverAuthors'),
+    (re.compile(r'^Đơn vị công tác:\s*$'), 'coverUnit'),
+    (re.compile(r'^Thông tin liên hệ \(Điện thoại, Email\):\s*$'), 'coverContact'),
+    (re.compile(r'^NĂM 20\.\.\.$'), 'coverYear'),
+]
+
+# Map: label regex (previous paragraph) → placeholder for the dots-paragraph following
+LABEL_TO_PLACEHOLDER = [
+    # Mẫu 01
+    (re.compile(r'^1\.\s*Mở đầu'), 'introduction'),
+    (re.compile(r'^2\.\s*Tên sáng kiến\b'), 'initiativeName'),
+    (re.compile(r'^3\.\s*Lĩnh vực áp dụng'), 'applicationField'),
+    (re.compile(r'^4\.1\s*Tình trạng giải pháp'), 'currentStatus'),
+    (re.compile(r'^-\s*Mục đích của sáng kiến'), 'purpose'),
+    (re.compile(r'^\+\s*Các bước thực hiện'), 'implementationSteps'),
+    (re.compile(r'^\+\s*Các điều kiện cần thiết'), 'conditions'),
+    (re.compile(r'^-\s*Về tính mới'), 'novelty'),
+    # Effectiveness 10 items
+    (re.compile(r'^\+\s*Tạo ra lợi ích kinh tế'), 'effEconomic'),
+    (re.compile(r'^\+\s*Đem lại hiệu quả trong giảng dạy'), 'effTeaching'),
+    (re.compile(r'^\+\s*Tăng năng suất lao động'), 'effProductivity'),
+    (re.compile(r'^\+\s*Nâng cao hiệu quả công việc'), 'effSocial'),
+    (re.compile(r'^\+\s*Nâng cao chất lượng công việc'), 'effQuality'),
+    (re.compile(r'^\+\s*Giảm chi phí'), 'effCost'),
+    (re.compile(r'^\+\s*Cải thiện môi trường'), 'effEnvironment'),
+    (re.compile(r'^\+\s*Bảo vệ sức khỏe'), 'effHealth'),
+    (re.compile(r'^\+\s*Đảm bảo an toàn lao động'), 'effLaborSafety'),
+    (re.compile(r'^\+\s*Nâng cao khả năng, trình độ'), 'effAwareness'),
+    (re.compile(r'^6\.\s*Những thông tin cần được bảo mật'), 'confidentialInfo'),
+    # Mẫu 02
+    (re.compile(r'^-\s*Chủ đầu tư tạo ra sáng kiến'), 'investorName'),
+    (re.compile(r'^-\s*Lĩnh vực áp dụng sáng kiến'), 'applicationField02'),
+    (re.compile(r'^-\s*Ngày sáng kiến được áp dụng'), 'firstApplyDate'),
+    (re.compile(r'^-\s*Nội dung của sáng kiến'), 'contentSummary'),
+    (re.compile(r'^Những thông tin cần được bảo mật'), 'confidentialInfo02'),
+    (re.compile(r'^Các điều kiện cần thiết để áp dụng'), 'conditions02'),
+    (re.compile(r'^Đánh giá lợi ích thu được hoặc dự kiến có thể thu được do áp dụng sáng kiến theo ý kiến của tác giả'), 'authorEvaluation'),
+    (re.compile(r'^Đánh giá lợi ích thu được hoặc dự kiến có thể thu được do áp dụng sáng kiến theo ý kiến của tổ chức'), 'trialEvaluation'),
+    # Mẫu 03
+    (re.compile(r'^1\.\s*Tên sáng kiến'), 'initiativeName03'),
+    (re.compile(r'^2\.\s*Tác giả chính'), 'mainAuthor03'),
+    (re.compile(r'^Chức vụ, đơn vị công tác'), 'position03'),
+    # Mẫu 04
+    (re.compile(r'^Kết luận'), 'conclusion'),
+]
+
+# Dots-line pattern — paragraph text (stripped, whitespace collapsed) is 50+ dots
+DOTS_PATTERN = re.compile(r'^[\s\.]{50,}$')
+
+
+def para_text(p):
+    """Concat all w:t text of paragraph p."""
+    return ''.join((t.text or '') for t in p.iter(f'{W}t')).strip()
+
+
+def set_para_text(p, new_text):
+    """Replace paragraph's run text with single new_text. Keeps first run's properties."""
+    # Find all runs
+    runs = list(p.findall(f'{W}r'))
+    if not runs:
+        # No run — add one with text
+        r = ET.SubElement(p, f'{W}r')
+        t = ET.SubElement(r, f'{W}t')
+        t.text = new_text
+        t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+        return
+
+    # Remove all runs except first
+    for r in runs[1:]:
+        p.remove(r)
+
+    # Clear all <w:t> in first run, leave <w:rPr> intact
+    first = runs[0]
+    for t in list(first.findall(f'{W}t')):
+        first.remove(t)
+    # Remove all non-rPr, non-text children? Leave them alone (breaks etc).
+    # Add new text element
+    t = ET.SubElement(first, f'{W}t')
+    t.text = new_text
+    t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+
+
+def append_placeholder_to_para(p, placeholder):
+    """Append text ' {{placeholder}}' to end of paragraph (new run)."""
+    r = ET.SubElement(p, f'{W}r')
+    t = ET.SubElement(r, f'{W}t')
+    t.text = f' {{{{{placeholder}}}}}'
+    t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
+
+
+def find_placeholder_for_label(text):
+    for regex, placeholder in LABEL_TO_PLACEHOLDER:
+        if regex.match(text):
+            return placeholder
+    return None
+
+
+def find_cover_label(text):
+    for regex, placeholder in COVER_LABEL_APPEND:
+        if regex.match(text):
+            return placeholder
+    return None
+
+
+def process(xml_path):
+    tree = ET.parse(xml_path)
+    root = tree.getroot()
+
+    paragraphs = list(root.iter(f'{W}p'))
+
+    prev_label_placeholder = None
+    used_placeholders = set()
+    cover_done = set()  # only replace cover labels once (file has 2 cover pages)
+    dots_counter = 0
+
+    for i, p in enumerate(paragraphs):
+        text = para_text(p)
+        if not text:
+            continue
+
+        # 1. Cover label — append placeholder
+        cover_pl = find_cover_label(text)
+        if cover_pl and cover_pl not in cover_done:
+            append_placeholder_to_para(p, cover_pl)
+            cover_done.add(cover_pl)
+            prev_label_placeholder = None
+            continue
+
+        # 2. Dots line → replace with placeholder from prev label
+        if DOTS_PATTERN.match(text):
+            dots_counter += 1
+            if prev_label_placeholder and prev_label_placeholder not in used_placeholders:
+                set_para_text(p, f'{{{{{prev_label_placeholder}}}}}')
+                used_placeholders.add(prev_label_placeholder)
+                prev_label_placeholder = None
+            else:
+                # extra dots line without matching label — tag with generic counter
+                set_para_text(p, f'{{{{extra_{dots_counter}}}}}')
+            continue
+
+        # 3. Label paragraph → remember placeholder for NEXT dots line
+        label_pl = find_placeholder_for_label(text)
+        if label_pl:
+            prev_label_placeholder = label_pl
+            continue
+
+        # 4. Other paragraph — reset label if it wasn't matched
+        # Don't reset prev_label if current para is just description (italic note etc.)
+        # Keep prev_label until we see dots or a new label
+
+    tree.write(xml_path, encoding='UTF-8', xml_declaration=True)
+    return used_placeholders, dots_counter
+
+
+def main():
+    repo_root = Path(__file__).parent.parent
+    src = repo_root / 'src/Backend/DYD.Api/Templates/bao-cao-template-original.docx'
+    dst = repo_root / 'src/Backend/DYD.Api/Templates/bao-cao-template.docx'
+
+    if not src.exists():
+        print(f'ERROR: source template not found at {src}')
+        sys.exit(1)
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp = Path(tmpdir)
+        # Unzip
+        with zipfile.ZipFile(src, 'r') as z:
+            z.extractall(tmp)
+        doc_xml = tmp / 'word' / 'document.xml'
+
+        used, dots = process(doc_xml)
+        print(f'Replaced {len(used)} placeholders from {dots} dots-lines.')
+        print(f'Placeholders: {sorted(used)}')
+
+        # Rezip
+        if dst.exists():
+            dst.unlink()
+        with zipfile.ZipFile(dst, 'w', zipfile.ZIP_DEFLATED) as zout:
+            for path in tmp.rglob('*'):
+                if path.is_file():
+                    zout.write(path, path.relative_to(tmp))
+
+        print(f'Wrote {dst}')
+
+
+if __name__ == '__main__':
+    main()