thu-thesis — Skillopedia

清华 MBA 论文 Word → PDF 一键转换 ⚠️ 核心操作原则（不得违反）只从 Word 中提取信息，不修改 thuthesis 模板格式。 - thuthesis 的封面、页眉、目录、参考文献、图表样式等，全部由自动生成 - 脚本只负责把 Word 里的内容（标题、摘要、章节、图表、参考文献等）提取出来填入文件 - 若 Word 中某字段缺失，对应 LaTeX 字段留空，不删除、不跳过、不用占位符替代 - 任何格式上的"改进"都必须以中的官方示例为准，不得自行发挥架构：新三层 AI-native 流程关键设计原则：Python 脚本不调用任何 LLM，不持有 API key。AI 在两个关键环节介入：(1) 阅读骨架生成 struct.json；(2) Rubric 评测 + 自动修复。依赖格式参考：assets/databk/ 是从官方 thuthesis 项目备份的原始示例 data 文件，是本工具一切格式决策的黄金标准： | 文件 | 参考内容 | |------|----------| | | 正文章节、三线表、图片、公式格式 | | | 中英文摘要格式 | | | 缩略语/符号说明格式 | | | 致谢格式 | | | 个人简历格式 | 遇到任何格式问题，先查里的对应文件，再动代码。初次使用 / 更新格式参考做三件事： 1. 从…

, cell)\n if m:\n meta['author_en'] = m.group(1).strip()\n if re.sub(r'\\s', '', cell) in ('指导教师', '导师') and i + 2 \u003c len(cells):\n meta['supervisor'] = cells[i + 2].strip()\n if re.sub(r'\\s', '', cell) in ('培养单位', '院系', '学院') and i + 2 \u003c len(cells):\n meta['department'] = cells[i + 2].strip()\n if cell in ('学位类别', '申请学位') and i + 2 \u003c len(cells):\n meta['degree_category'] = cells[i + 2].strip()\n CN_DIGITS = {'○': '0', '〇': '0', '一': '1', '二': '2', '三': '3', '四': '4',\n '五': '5', '六': '6', '七': '7', '八': '8', '九': '9'}\n # 先处理复合月份（十二月/十一月/十月），再逐字转数字\n cell_pre = re.sub(r'十二月', '#12月', cell)\n cell_pre = re.sub(r'十一月', '#11月', cell_pre)\n cell_pre = re.sub(r'十月', '#10月', cell_pre)\n cell_arabic = ''.join(CN_DIGITS.get(c, c) for c in cell_pre).replace('#', '')\n m = re.search(r'(\\d{4})\\s*年\\s*(\\d{1,2})\\s*月', cell_arabic)\n if m and not meta['date']:\n meta['date'] = f\"{m.group(1)}-{int(m.group(2)):02d}\"\n\n return meta\n\n\n# ── 章节内容组装 ──────────────────────────────────────────────────────────────\n\ndef build_chapter_content(chap_struct, paragraphs, next_chap_start,\n figures_by_para=None, tables_by_para=None,\n extra_tables=None):\n content = []\n cr = chap_struct.get('content_range', [0, 9999])\n start = int(cr[0]) if cr[0] is not None else 0\n end = int(cr[1]) if (len(cr) > 1 and cr[1] is not None) else next_chap_start - 1\n\n sec_map = {}\n for sec in chap_struct.get('sections', []):\n sec_map[int(sec['title_para'])] = sec\n\n fig_map = figures_by_para or {}\n tbl_map = tables_by_para or {}\n para_by_idx = {p['idx']: p for p in paragraphs}\n\n for p in paragraphs:\n idx = p.get('idx', 0)\n if idx \u003c start or idx > end:\n continue\n text = p.get('text', '').strip()\n\n # 插入图片块\n for fig in fig_map.get(idx, []):\n fname = fig.get('filename', '')\n ext = Path(fname).suffix.lower()\n if ext == '.svg':\n continue\n caption = ''\n for nidx in [idx, idx + 1, idx + 2]:\n nt = para_by_idx.get(nidx, {}).get('text', '').strip()\n if re.match(r'^图\\s*\\d', nt):\n caption = nt\n break\n content.append({\n \"type\": \"figure\",\n \"embed\": fig.get('rId', ''),\n \"path\": f\"figures/{fname}\",\n \"caption\": caption,\n })\n\n # 插入表格块（before_para == idx）\n for tbl in tbl_map.get(idx, []):\n rows = tbl.get('rows', [])\n if not rows:\n continue\n caption = ''\n for nidx in [idx, idx + 1, idx + 2]:\n nt = para_by_idx.get(nidx, {}).get('text', '').strip()\n if re.match(r'^表\\s*\\d', nt):\n caption = nt\n break\n content.append({\"type\": \"table\", \"caption\": caption, \"rows\": rows})\n\n if not text:\n continue\n if re.match(r'^图\\s*\\d', text):\n continue\n if re.match(r'^表\\s*\\d', text):\n continue\n\n if idx in sec_map:\n sec = sec_map[idx]\n content.append({\"type\": \"section\", \"level\": sec['level'],\n \"number\": sec['number'], \"title\": sec['title']})\n else:\n content.append({\"type\": \"text\", \"content\": text})\n\n # 追加范围外但属于本章的表格\n for bp, tbl in sorted(extra_tables or [], key=lambda x: x[0]):\n rows = tbl.get('rows', [])\n if rows:\n caption = ''\n for nidx in [bp, bp + 1, bp + 2]:\n nt = para_by_idx.get(nidx, {}).get('text', '').strip()\n if re.match(r'^表\\s*\\d', nt):\n caption = nt\n break\n content.append({\"type\": \"table\", \"caption\": caption, \"rows\": rows})\n\n return content\n\n\n# ── 辅助：提取段落文字 ────────────────────────────────────────────────────────\n\ndef paras_in_range(paragraphs, start, end):\n if start is None:\n return []\n return [p for p in paragraphs if start \u003c= p['idx'] \u003c= end]\n\n\ndef paras_text(paragraphs, start, end):\n return '\\n'.join(\n p['text'].strip() for p in paras_in_range(paragraphs, start, end)\n if p['text'].strip()\n )\n\n\n# ── 主流程 ────────────────────────────────────────────────────────────────────\n\ndef build_parsed(raw_json_path, struct_json_path, output_dir=None):\n raw_path = Path(raw_json_path).resolve()\n struct_path = Path(struct_json_path).resolve()\n\n raw = json.loads(raw_path.read_text(encoding='utf-8'))\n struct = json.loads(struct_path.read_text(encoding='utf-8'))\n\n if output_dir is None:\n output_dir = raw_path.parent\n output_dir = Path(output_dir)\n output_dir.mkdir(parents=True, exist_ok=True)\n\n paragraphs = raw.get('paragraphs', [])\n tables = raw.get('tables', [])\n figures = raw.get('figures', [])\n\n print(f\"✅ 读取: {len(paragraphs)}段落 {len(tables)}表格 {len(figures)}图片\")\n\n para_by_idx = {p['idx']: p for p in paragraphs}\n\n # ── meta ──\n meta = extract_meta_from_tables(tables)\n\n # 摘要\n cover = struct.get('cover', {})\n abs_cn_range = cover.get('abstract_cn_range', [None, None])\n abs_en_range = cover.get('abstract_en_range', [None, None])\n kw_cn_idx = cover.get('keywords_cn_para')\n kw_en_idx = cover.get('keywords_en_para')\n\n abstract_cn = paras_text(paragraphs, abs_cn_range[0], abs_cn_range[1] if len(abs_cn_range) > 1 else None)\n abstract_en = paras_text(paragraphs, abs_en_range[0], abs_en_range[1] if len(abs_en_range) > 1 else None)\n\n def parse_keywords(idx, is_english=False):\n if idx is None:\n return []\n p = para_by_idx.get(idx, {})\n t = p.get('text', '')\n t = re.sub(r'^关键词[：:\\s]*', '', t)\n t = re.sub(r'^[Kk]ey\\s*[Ww]ords[：:\\s]*', '', t)\n if is_english:\n # 英文关键词用分号/逗号分隔，不用空格拆分（关键词可能是多词短语）\n return [k.strip() for k in re.split(r'[；;，,]+', t) if k.strip()]\n else:\n return [k.strip() for k in re.split(r'[；;，,、\\s]+', t) if k.strip()]\n\n keywords_cn = parse_keywords(kw_cn_idx, is_english=False)\n keywords_en = parse_keywords(kw_en_idx, is_english=True)\n\n # ── 图表分配 ──\n figures_by_para = {}\n for fig in figures:\n pid = fig.get('para_idx')\n if pid is not None:\n figures_by_para.setdefault(pid, []).append(fig)\n\n chap_structs = struct.get('chapters', [])\n chap_ranges = []\n for cs in chap_structs:\n cr = cs.get('content_range', [0, 9999])\n s = int(cr[0]) if cr[0] is not None else 0\n e = int(cr[1]) if (len(cr) > 1 and cr[1] is not None) else 9999\n chap_ranges.append((s, e))\n\n first_chap_start = chap_ranges[0][0] if chap_ranges else 0\n\n tables_by_para = {}\n extra_by_chap = {} # chap_idx → [(before_para, tbl)]\n\n for tbl in tables:\n bp = tbl.get('before_para')\n if bp is None or bp \u003c first_chap_start:\n continue\n # 找所属章节\n assigned = False\n for ci, (s, e) in enumerate(chap_ranges):\n if s \u003c= bp \u003c= e:\n tables_by_para.setdefault(bp, []).append(tbl)\n assigned = True\n break\n if not assigned:\n # 分配给最近章节\n best_ci = min(range(len(chap_ranges)),\n key=lambda i: min(abs(bp - chap_ranges[i][0]), abs(bp - chap_ranges[i][1])))\n extra_by_chap.setdefault(best_ci, []).append((bp, tbl))\n\n # ── 章节 ──\n chapters = []\n for i, cs in enumerate(chap_structs):\n next_start = chap_ranges[i + 1][0] if i + 1 \u003c len(chap_ranges) else 99999\n content = build_chapter_content(\n cs, paragraphs, next_start,\n figures_by_para, tables_by_para,\n extra_tables=extra_by_chap.get(i, [])\n )\n chapters.append({\n \"level\": 1,\n \"number\": cs.get('number', ''),\n \"title\": cs.get('title', ''),\n \"content\": content\n })\n\n # ── 参考文献 ──\n ref_range = struct.get('references_range', [None, None])\n references = []\n if ref_range[0]:\n for p in paras_in_range(paragraphs, ref_range[0], ref_range[1] or 99999):\n t = p.get('text', '').strip()\n if t and len(t) > 5:\n references.append(t)\n\n # ── 致谢 / 简历 ──\n ack_range = struct.get('acknowledgements_range', [None, None])\n res_range = struct.get('resume_range', [None, None])\n acknowledgements = paras_text(paragraphs, ack_range[0], ack_range[1] or 99999) if ack_range[0] else ''\n resume = paras_text(paragraphs, res_range[0], res_range[1] or 99999) if res_range[0] else ''\n\n # ── 拷贝 figures ──\n src_figures = raw_path.parent / 'figures'\n dst_figures = output_dir / 'figures'\n if src_figures.exists() and src_figures != dst_figures:\n dst_figures.mkdir(parents=True, exist_ok=True)\n for f in src_figures.iterdir():\n shutil.copy2(f, dst_figures / f.name)\n\n # ── 输出 ──\n stem = raw_path.stem.removeprefix('raw_')\n out_path = output_dir / f\"parsed_{stem}.json\"\n result = {\n \"meta\": meta,\n \"abstract_cn\": abstract_cn,\n \"abstract_en\": abstract_en,\n \"keywords_cn\": keywords_cn,\n \"keywords_en\": keywords_en,\n \"chapters\": chapters,\n \"references\": references,\n \"acknowledgements\": acknowledgements,\n \"resume\": resume,\n }\n out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding='utf-8')\n\n tc = sum(1 for c in chapters for item in c['content'] if item.get('type') == 'table')\n fc = sum(1 for c in chapters for item in c['content'] if item.get('type') == 'figure')\n print(f\"✅ 完成 → {out_path}\")\n print(f\" 章节: {len(chapters)} 参考文献: {len(references)} 表格: {tc} 图片: {fc}\")\n for c in chapters:\n n_blocks = len(c['content'])\n print(f\" [{c['number']}] {c['title'][:30]} ({n_blocks}块)\")\n\n return out_path\n\n\nif __name__ == '__main__':\n if len(sys.argv) \u003c 3:\n print(\"用法: python3 build_parsed.py raw_xxx.json struct.json [out_dir]\")\n sys.exit(1)\n raw_p = sys.argv[1]\n struct_p = sys.argv[2]\n out_d = sys.argv[3] if len(sys.argv) > 3 else None\n build_parsed(raw_p, struct_p, out_d)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":14024,"content_sha256":"608e1b92358f4399c51761282b68bc1753ae7297455697abee1f0d46955616e9"},{"filename":"scripts/convert.py","content":"#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\"\"\"\nconvert.py - 清华 MBA 论文 Word → PDF 转换器\n\n新三层架构（AI-native 流程）：\n Step 1: extract_raw.py → raw_xxx.json（纯机械提取，无 LLM）\n Step 2: AI（Claude） → struct_xxx.json（阅读骨架，理解章节结构）\n Step 3: build_parsed.py → parsed_xxx.json（纯 Python 组装）\n Step 4: render.py → LaTeX 项目\n Step 5: xelatex + bibtex → thesis.pdf\n Step 6: AI Rubric 评测 → evaluation_report.md\n\n用法：\n # 第一步：机械提取，输出骨架供 AI 阅读\n python3 convert.py extract \u003cinput.docx> [output_dir]\n\n # 第三步（AI 生成 struct.json 后）：完成转换\n python3 convert.py build \u003craw_json> \u003cstruct_json> [latex_dir]\n\n # 或一步调用（需要已有 struct.json）：\n python3 convert.py build raw_xxx.json struct_xxx.json ./my-thesis\n\nAI（Claude）负责：\n - 读取 extract 输出的骨架\n - 生成 struct_xxx.json（章节划分、段落 idx 映射）\n - 调用 build 命令完成剩余步骤\n\"\"\"\n\nimport sys\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\n\n# ── 自动补全未引用文献 ──────────────────────────────────────────────────────────\n\ndef _auto_cite_missing(latex_dir: Path):\n \"\"\"检测 refs.bib 中未被正文引用的文献，关键词匹配补 \\\\cite；无匹配用 \\\\nocite 兜底。\"\"\"\n bib_file = latex_dir / 'ref' / 'refs.bib'\n if not bib_file.exists():\n return\n\n bib_text = bib_file.read_text(encoding='utf-8')\n entries = {}\n for m in re.finditer(r'@\\w+\\{(\\w+),(.*?)^\\}', bib_text, re.MULTILINE | re.DOTALL):\n key = m.group(1)\n block = m.group(2)\n title_m = re.search(r'title\\s*=\\s*\\{(.+?)\\}', block, re.DOTALL)\n author_m = re.search(r'author\\s*=\\s*\\{(.+?)\\}', block, re.DOTALL)\n year_m = re.search(r'year\\s*=\\s*\\{(\\d+)\\}', block)\n entries[key] = {\n 'title': title_m.group(1).replace('\\n', ' ').strip() if title_m else '',\n 'author': author_m.group(1).replace('\\n', ' ').strip() if author_m else '',\n 'year': year_m.group(1) if year_m else '',\n }\n\n if not entries:\n return\n\n chap_files = sorted(latex_dir.glob('data/chap*.tex'))\n cited_keys = set()\n for cf in chap_files:\n for m in re.finditer(r'\\\\cite\\{([^}]+)\\}', cf.read_text(encoding='utf-8')):\n for k in m.group(1).split(','):\n cited_keys.add(k.strip())\n\n missing = [k for k in entries if k not in cited_keys]\n if not missing:\n print(f' ✅ 所有 {len(entries)} 条文献均已被引用')\n return\n\n print(f' 发现 {len(missing)} 条未引用文献，进行关键词匹配...')\n\n def _extract_keywords(key):\n e = entries[key]\n text = e['title'] + ' ' + e['author']\n zh_words = re.findall(r'[\\u4e00-\\u9fff]{3,}', text)\n stopwords = {'with', 'from', 'that', 'this', 'their', 'have', 'been', 'into',\n 'drug', 'price', 'pricing', 'china', 'market', 'company', 'strategy'}\n en_words = [w.lower() for w in re.findall(r'[a-zA-Z]{4,}', text)\n if w.lower() not in stopwords]\n return zh_words[:3] + en_words[:3]\n\n inserted = {}\n used_sentences = set()\n for key in missing:\n keywords = _extract_keywords(key)\n if not keywords:\n continue\n best_match = None\n best_score = 0\n for cf in chap_files:\n content = cf.read_text(encoding='utf-8')\n for line_m in re.finditer(r'^([^%\\\\][^\\n]{10,}[。！？])', content, re.MULTILINE):\n line = line_m.group(1)\n if line in used_sentences:\n continue\n score = sum(1 for kw in keywords if kw.lower() in line.lower())\n if score > best_score:\n best_score = score\n best_match = (cf, line)\n if best_match and best_score > 0:\n cf, matched_line = best_match\n last_punct = max(matched_line.rfind('。'), matched_line.rfind('！'), matched_line.rfind('？'))\n if last_punct \u003c 0:\n continue\n prefix = matched_line[:last_punct]\n suffix = matched_line[last_punct:]\n existing_cite_m = re.search(r'\\\\cite\\{([^}]+)\\}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, prefix)\n if existing_cite_m:\n old_cite = existing_cite_m.group(0)\n new_cite = old_cite.replace('}', f',{key}}}')\n new_line = prefix[:existing_cite_m.start()] + new_cite + suffix\n else:\n new_line = prefix + f'\\\\cite{{{key}}}' + suffix\n inserted[key] = (cf, matched_line, new_line)\n used_sentences.add(matched_line)\n\n success = 0\n for key, (cf, old_text, new_text) in inserted.items():\n content = cf.read_text(encoding='utf-8')\n if old_text in content:\n cf.write_text(content.replace(old_text, new_text, 1), encoding='utf-8')\n print(f' ✓ {key}: 插入 → {cf.name}')\n success += 1\n\n still_missing = [k for k in missing if k not in inserted]\n if still_missing:\n print(f' {len(still_missing)} 条无关键词匹配，用 \\\\nocite 强制输出...')\n thesis_tex = latex_dir / 'thesis.tex'\n if thesis_tex.exists():\n content = thesis_tex.read_text(encoding='utf-8')\n nocite_lines = '\\n'.join(f'\\\\nocite{{{k}}}' for k in still_missing)\n marker = '\\\\bibliographystyle{'\n if marker in content:\n content = content.replace(marker, nocite_lines + '\\n' + marker, 1)\n thesis_tex.write_text(content, encoding='utf-8')\n\n print(f' 完成：{success + len(still_missing)}/{len(missing)} 条文献已补全引用')\n\n\n# ── 编译 PDF ──────────────────────────────────────────────────────────────────\n\ndef compile_pdf(latex_dir: Path) -> Path:\n \"\"\"运行 xelatex + bibtex 完整编译流程，返回 PDF 路径。\"\"\"\n _tex_bin = os.environ.get('XELATEX_PATH', '')\n if _tex_bin:\n extra_path = str(Path(_tex_bin).parent)\n elif Path('/Library/TeX/texbin/xelatex').exists():\n extra_path = '/Library/TeX/texbin'\n elif Path('/usr/local/texlive/2025basic/bin/universal-darwin/xelatex').exists():\n extra_path = '/usr/local/texlive/2025basic/bin/universal-darwin'\n else:\n extra_path = ''\n\n env = os.environ.copy()\n if extra_path:\n env['PATH'] = extra_path + ':' + env.get('PATH', '')\n\n def xelatex():\n result = subprocess.run(\n ['xelatex', '-interaction=nonstopmode', 'thesis.tex'],\n cwd=latex_dir, env=env,\n capture_output=True, text=True\n )\n for line in result.stdout.split('\\n'):\n if any(k in line for k in ['Error', 'error', 'Fatal', '!']):\n if 'Font Warning' not in line and 'microtype' not in line:\n print(f' {line}')\n return result.returncode\n\n def toc_hash():\n import hashlib\n toc = latex_dir / 'thesis.toc'\n return hashlib.md5(toc.read_bytes()).hexdigest() if toc.exists() else ''\n\n print(' 第 1 次编译（生成 .aux）...')\n xelatex()\n\n print(' 运行 bibtex...')\n subprocess.run(['bibtex', 'thesis'], cwd=latex_dir, env=env,\n capture_output=True, text=True)\n\n print(' 第 2 次编译（写入参考文献）...')\n xelatex()\n h2 = toc_hash()\n\n print(' 第 3 次编译（稳定目录）...')\n xelatex()\n h3 = toc_hash()\n\n if h3 != h2:\n print(' 第 4 次编译（目录稳定中）...')\n xelatex()\n\n return latex_dir / 'thesis.pdf'\n\n\n# ── 子命令：extract ────────────────────────────────────────────────────────────\n\ndef cmd_extract(args):\n \"\"\"\n Step 1: Word → raw_xxx.json（机械提取）\n 同时确定并创建 LaTeX 工程目录（\u003cdocx同目录>/\u003cstem>-latex/），\n 输出骨架文本供 AI 阅读，生成 struct.json 后调用 build。\n \"\"\"\n if not args:\n print('用法: python3 convert.py extract \u003cinput.docx> [output_dir]')\n sys.exit(1)\n\n docx_path = Path(args[0]).resolve()\n if not docx_path.exists():\n print(f'❌ 找不到文件: {docx_path}')\n sys.exit(1)\n\n scripts_dir = Path(__file__).parent\n project_root = scripts_dir.parent\n output_dir = Path(args[1]).resolve() if len(args) >= 2 else project_root / 'output'\n output_dir.mkdir(parents=True, exist_ok=True)\n\n # ── 在项目开始时就确定并创建 LaTeX 工程目录 ──\n stem = docx_path.stem\n latex_dir = docx_path.parent / f'{stem}-latex'\n latex_dir.mkdir(parents=True, exist_ok=True)\n\n print(f'\\n{\"=\"*60}')\n print(f'📄 输入: {docx_path}')\n print(f'📁 中间文件: {output_dir}')\n print(f'📁 LaTeX 工程: {latex_dir}')\n print(f'{\"=\"*60}\\n')\n\n print('【Step 1】机械提取 Word 文档...')\n result = subprocess.run(\n [sys.executable, str(scripts_dir / 'extract_raw.py'), str(docx_path), str(output_dir)],\n capture_output=False\n )\n if result.returncode != 0:\n print('❌ extract_raw.py 失败')\n sys.exit(1)\n\n # 找输出的 raw json\n raw_files = sorted(output_dir.glob('raw_*.json'), key=lambda f: f.stat().st_mtime, reverse=True)\n if not raw_files:\n print('❌ 未找到 raw_*.json 输出')\n sys.exit(1)\n raw_path = raw_files[0]\n\n # 打印骨架供 AI 阅读\n print(f'\\n{\"=\"*60}')\n print(f'✅ 提取完成: {raw_path.name}')\n print(f'{\"=\"*60}')\n print('\\n📋 文档骨架（供 AI 阅读，生成 struct.json）：\\n')\n\n import json\n raw = json.loads(raw_path.read_text(encoding='utf-8'))\n paras = raw.get('paragraphs', [])\n figures = raw.get('figures', [])\n tables = raw.get('tables', [])\n skip_styles = {'toc 1', 'toc 2', 'toc 3', 'toc1', 'toc2', 'toc3', 'toc'}\n for p in paras:\n style = p.get('style', '').strip().lower()\n if style in skip_styles:\n continue\n text = p.get('text', '').strip()\n if not text:\n continue\n print(f\"{p['idx']:04d} [{p.get('style', ''):18s}] {text[:70]}\")\n\n print(f'\\n{\"=\"*60}')\n print(f'📊 图片: {len(figures)} 张 | 图片 para_idx: {[f[\"para_idx\"] for f in figures]}')\n print(f'📊 表格: {len(tables)} 张 | 表格 before_para: {[t[\"before_para\"] for t in tables]}')\n\n # ── 检测 .doc 转换工具是否破坏了表格结构 ──\n if len(tables) == 0:\n # 检测骨架里是否有疑似表格的段落（如\"序号\"+\"股东名称\"相邻或含多个数字列）\n table_hint_patterns = ['序号', '表头', '合计', '股东名称', '持股', '占比', '金额（万元）']\n tbl_hint_count = sum(1 for p in paras if any(kw in p.get('text','') for kw in table_hint_patterns))\n if tbl_hint_count >= 3:\n print(f'\\n{\"!\"*60}')\n print('⚠️ 警告：提取到 0 张表格，但骨架中检测到疑似表格内容（如\"序号\"、\"合计\"等）！')\n print(' 这通常是因为用了 textutil 等工具转换 .doc，表格被压平成了普通段落。')\n print(' 这些表格内容将以纯文本形式出现在正文中，严重影响论文质量。')\n print(' 解决方案：用 Microsoft Word 打开 .doc，另存为 .docx，然后重新运行 extract。')\n print(f'{\"!\"*60}')\n elif len(figures) == 0:\n print(' ℹ️ 论文无图片和表格（纯文字论文），属于正常情况。')\n print(f'{\"=\"*60}')\n print(f'\\n📝 下一步：')\n print(f' 1. AI 读取上方骨架，生成 struct.json（写到 {output_dir}/struct_{stem}.json）')\n print(f' ⚠️ 确保所有图片 para_idx 和表格 before_para 都在 content_range 内！')\n print(f' 2. 运行 build：')\n print(f' python3 convert.py build {raw_path} {output_dir}/struct_{stem}.json {latex_dir}')\n print(f'{\"=\"*60}\\n')\n\n\n# ── 子命令：build ─────────────────────────────────────────────────────────────\n\ndef cmd_build(args):\n \"\"\"\n Step 3-6: raw_json + struct_json → parsed_json → LaTeX → PDF → 评测\n \"\"\"\n if len(args) \u003c 2:\n print('用法: python3 convert.py build \u003craw_json> \u003cstruct_json> [latex_dir]')\n sys.exit(1)\n\n raw_path = Path(args[0]).resolve()\n struct_path = Path(args[1]).resolve()\n if not raw_path.exists():\n print(f'❌ 找不到: {raw_path}')\n sys.exit(1)\n if not struct_path.exists():\n print(f'❌ 找不到: {struct_path}')\n sys.exit(1)\n\n scripts_dir = Path(__file__).parent\n project_root = scripts_dir.parent\n output_dir = project_root / 'output'\n\n # latex_dir 默认用 struct 文件名推断\n stem = raw_path.stem.removeprefix('raw_')\n latex_dir = Path(args[2]).resolve() if len(args) >= 3 else Path(f'./{stem}-latex')\n\n print(f'\\n{\"=\"*60}')\n print(f'📄 raw: {raw_path.name}')\n print(f'📄 struct: {struct_path.name}')\n print(f'📁 输出: {latex_dir}')\n print(f'{\"=\"*60}\\n')\n\n # Step 3: build_parsed\n print('【Step 3】组装 parsed JSON...')\n result = subprocess.run(\n [sys.executable, str(scripts_dir / 'build_parsed.py'),\n str(raw_path), str(struct_path), str(output_dir)],\n capture_output=False\n )\n if result.returncode != 0:\n print('❌ build_parsed.py 失败')\n sys.exit(1)\n\n parsed_files = sorted(output_dir.glob('parsed_*.json'),\n key=lambda f: f.stat().st_mtime, reverse=True)\n if not parsed_files:\n print('❌ 未找到 parsed_*.json')\n sys.exit(1)\n parsed_path = parsed_files[0]\n print(f' → {parsed_path.name}')\n\n # Step 4: render\n print('\\n【Step 4】渲染 LaTeX 项目...')\n result = subprocess.run(\n [sys.executable, str(scripts_dir / 'render.py'), str(parsed_path), str(latex_dir)],\n capture_output=False\n )\n if result.returncode != 0:\n print('❌ render.py 失败')\n sys.exit(1)\n\n # Step 4.5: 自动补全未引用文献\n print('\\n【Step 4.5】检测未引用文献并自动补全 \\\\cite{}...')\n _auto_cite_missing(latex_dir)\n\n # Step 5: 编译 PDF\n print('\\n【Step 5】编译 PDF...')\n pdf_path = compile_pdf(latex_dir)\n\n if not pdf_path.exists():\n print(f'\\n❌ PDF 未生成，请检查 {latex_dir}/thesis.log')\n sys.exit(1)\n\n size_kb = pdf_path.stat().st_size // 1024\n print(f'\\n{\"=\"*60}')\n print(f'✅ 完成！PDF 已生成: {pdf_path} ({size_kb} KB)')\n print(f'{\"=\"*60}\\n')\n\n # Step 6: Rubric 评测由 AI 执行，此处仅提示路径\n print('\\n【Step 6】Rubric 评测（由 AI 执行）')\n print(f' parsed JSON : {parsed_path}')\n print(f' LaTeX 工程 : {latex_dir}')\n print(' 请 AI 按 SKILL.md 中的 Rubric 细则逐项评测，并输出 evaluation_report.md')\n\n import platform\n if platform.system() == 'Darwin':\n subprocess.run(['open', str(pdf_path)], check=False)\n\n\n# ── 入口 ──────────────────────────────────────────────────────────────────────\n\ndef main():\n if len(sys.argv) \u003c 2:\n print(__doc__)\n sys.exit(1)\n\n subcmd = sys.argv[1]\n rest = sys.argv[2:]\n\n if subcmd == 'extract':\n cmd_extract(rest)\n elif subcmd == 'build':\n cmd_build(rest)\n else:\n print(f'❌ 未知子命令: {subcmd}')\n print('可用命令: extract | build')\n sys.exit(1)\n\n\nif __name__ == '__main__':\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":16211,"content_sha256":"4514268c42ffd29d6c248aad8e269300acf65f849694ef589912aff2eb3acf8f"},{"filename":"scripts/extract_raw.py","content":"\"\"\"\nextract_raw.py - Word 文档机械提取（不做任何理解/判断）\n\n职责：把 .docx 里的内容原样导出为 raw.json，供 llm_parse.py 阅读理解。\n\n输出格式：\n{\n \"source\": \"filename.docx\",\n \"paragraphs\": [\n {\n \"idx\": 0,\n \"style\": \"Heading 1\",\n \"text\": \"第4章 W信托...\",\n \"bold\": true,\n \"font_size\": 16\n },\n ...\n ],\n \"tables\": [\n {\n \"idx\": 0, # 在文档流中的位置（相对于段落）\n \"before_para\": 5, # 紧接在第几个段落之后\n \"rows\": [[\"表头1\",\"表头2\"], [\"数据1\",\"数据2\"], ...]\n }\n ],\n \"figures\": [\n {\n \"rId\": \"rId5\",\n \"filename\": \"image1.png\", # 从 word/media 提取的文件名\n \"para_idx\": 12 # 出现在第几个段落\n }\n ]\n}\n\"\"\"\n\nimport json\nimport sys\nimport zipfile\nfrom pathlib import Path\n\nimport docx\nfrom docx.oxml.ns import qn\n\n\ndef _render_chart(chart_xml_bytes: bytes, out_path: Path):\n \"\"\"解析 chart XML 数据，用 matplotlib 渲染为 PNG\"\"\"\n import xml.etree.ElementTree as ET\n import matplotlib\n matplotlib.use('Agg')\n import matplotlib.pyplot as plt\n import numpy as np\n\n NS = {\n 'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',\n 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',\n }\n root = ET.fromstring(chart_xml_bytes)\n\n def get_vals(ser, tag):\n node = ser.find(f'c:{tag}/c:numRef/c:numCache', NS) or \\\n ser.find(f'c:{tag}/c:numLit', NS)\n if node is None:\n return []\n return [float(pt.find('c:v', NS).text)\n for pt in node.findall('c:pt', NS)\n if pt.find('c:v', NS) is not None]\n\n def get_cats(ser):\n node = ser.find('c:cat/c:strRef/c:strCache', NS) or \\\n ser.find('c:cat/c:strLit', NS) or \\\n ser.find('c:cat/c:numRef/c:numCache', NS)\n if node is None:\n return []\n return [pt.find('c:v', NS).text for pt in node.findall('c:pt', NS)\n if pt.find('c:v', NS) is not None]\n\n def ser_name(ser):\n t = ser.find('c:tx/c:strRef/c:strCache/c:pt/c:v', NS)\n return t.text if t is not None else ''\n\n fig, ax = plt.subplots(figsize=(6, 4))\n\n # 尝试柱状图、折线图、饼图\n for chart_tag in ('barChart', 'lineChart', 'pieChart', 'areaChart', 'scatterChart'):\n chart_node = root.find(f'.//c:{chart_tag}', NS)\n if chart_node is None:\n continue\n sers = chart_node.findall('c:ser', NS)\n cats = get_cats(sers[0]) if sers else []\n x = np.arange(len(cats)) if cats else None\n\n if chart_tag == 'pieChart':\n vals = get_vals(sers[0], 'val') if sers else []\n if vals:\n ax.pie(vals, labels=cats or None, autopct='%1.1f%%')\n elif chart_tag == 'scatterChart':\n for ser in sers:\n xv = get_vals(ser, 'xVal')\n yv = get_vals(ser, 'yVal')\n if xv and yv:\n ax.scatter(xv[:len(yv)], yv)\n else:\n width = 0.8 / max(len(sers), 1)\n for i, ser in enumerate(sers):\n vals = get_vals(ser, 'val')\n if not vals:\n continue\n offset = (i - len(sers) / 2 + 0.5) * width\n label = ser_name(ser)\n if chart_tag == 'barChart':\n direction = chart_node.find('c:barDir', NS)\n horiz = direction is not None and direction.get('val') == 'bar'\n if horiz:\n ax.barh(np.arange(len(vals)) + offset, vals, width, label=label)\n else:\n ax.bar((x[:len(vals)] if x is not None else np.arange(len(vals))) + offset,\n vals, width, label=label)\n else:\n ax.plot(x[:len(vals)] if x is not None else range(len(vals)),\n vals, label=label)\n\n if cats and x is not None:\n ax.set_xticks(x)\n ax.set_xticklabels(cats, rotation=30, ha='right', fontsize=8)\n if any(ser_name(s) for s in sers):\n ax.legend(fontsize=8)\n break\n\n plt.tight_layout()\n plt.savefig(str(out_path), dpi=150, bbox_inches='tight')\n plt.close(fig)\n\n\ndef extract_raw(docx_path: str, output_dir: str = \"output\") -> dict:\n docx_path = Path(docx_path).resolve()\n output_dir = Path(output_dir)\n output_dir.mkdir(parents=True, exist_ok=True)\n\n doc = docx.Document(str(docx_path))\n\n # ── 1. 提取段落（扁平列表，不做结构判断）──\n paragraphs_out = []\n for idx, p in enumerate(doc.paragraphs):\n text = p.text.strip()\n # 字体大小：取第一个有字号的 run\n font_size = None\n bold = False\n for run in p.runs:\n if run.font.size is not None and font_size is None:\n font_size = int(run.font.size.pt) if run.font.size else None\n if run.bold:\n bold = True\n paragraphs_out.append({\n \"idx\": idx,\n \"style\": p.style.name,\n \"text\": text,\n \"bold\": bold,\n \"font_size\": font_size,\n })\n\n # ── 2. 提取表格（记录在文档流中的位置）──\n tables_out = []\n # 遍历 body 元素，找每个表格紧跟在哪个段落后面\n body_elems = list(doc.element.body)\n para_elements = [p._element for p in doc.paragraphs]\n tbl_elements = [t._element for t in doc.tables]\n\n last_para_idx = -1\n tbl_counter = 0\n for elem in body_elems:\n tag = elem.tag.split('}')[-1]\n if tag == 'p':\n # 找段落索引\n try:\n last_para_idx = para_elements.index(elem)\n except ValueError:\n pass\n elif tag == 'tbl':\n tbl = doc.tables[tbl_counter] if tbl_counter \u003c len(doc.tables) else None\n rows_data = []\n if tbl:\n for row in tbl.rows:\n rows_data.append([cell.text.strip() for cell in row.cells])\n tables_out.append({\n \"idx\": tbl_counter,\n \"before_para\": last_para_idx,\n \"rows\": rows_data,\n })\n tbl_counter += 1\n\n # ── 3. 提取图片（普通图片 + chart）──\n import re, shutil\n figures_out = []\n figures_dir = output_dir / \"figures\"\n if figures_dir.exists():\n shutil.rmtree(figures_dir)\n figures_dir.mkdir(parents=True)\n\n R_NS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'\n C_CHART_NS = 'http://schemas.openxmlformats.org/drawingml/2006/chart'\n\n with zipfile.ZipFile(str(docx_path), 'r') as z:\n # 复制所有 media 文件\n for mf in [f for f in z.namelist() if f.startswith('word/media/')]:\n dest = figures_dir / Path(mf).name\n dest.write_bytes(z.read(mf))\n\n # 读关系表：rId → Target\n try:\n rels_xml = z.read('word/_rels/document.xml.rels').decode('utf-8')\n except Exception:\n rels_xml = ''\n\n # rId → media filename（普通图片）\n rid_to_media = {}\n for m in re.finditer(r'Id=\"(rId\\d+)\"[^>]+Target=\"media/([^\"]+)\"', rels_xml):\n rid_to_media[m.group(1)] = m.group(2)\n\n # rId → chart xml path\n rid_to_chart = {}\n for m in re.finditer(r'Id=\"(rId\\d+)\"[^>]+Target=\"(charts/[^\"]+)\"', rels_xml):\n rid_to_chart[m.group(1)] = 'word/' + m.group(2)\n\n # 渲染 chart → PNG（用 matplotlib 解析 chart XML 数据）\n chart_png_map = {} # chart_path → png filename\n chart_counter = 1\n for rid, chart_path in rid_to_chart.items():\n png_name = f'chart{chart_counter}.png'\n try:\n chart_xml = z.read(chart_path)\n _render_chart(chart_xml, figures_dir / png_name)\n chart_png_map[chart_path] = png_name\n print(f' ✅ chart {chart_path} → {png_name}')\n except Exception as e:\n print(f' ⚠️ chart {chart_path} 渲染失败: {e}')\n chart_counter += 1\n\n # rId → png filename（chart）\n rid_to_chart_png = {}\n for rid, chart_path in rid_to_chart.items():\n if chart_path in chart_png_map:\n rid_to_chart_png[rid] = chart_png_map[chart_path]\n\n # 遍历段落，找图片位置（a:blip = 普通图, c:chart ref = chart）\n seen_rids = set()\n for para_idx, p in enumerate(doc.paragraphs):\n for run in p.runs:\n elem = run._element\n # 普通图片\n for pic in elem.iter(qn('a:blip')):\n rid = pic.get(f'{{{R_NS}}}embed')\n if rid and rid in rid_to_media and rid not in seen_rids:\n seen_rids.add(rid)\n figures_out.append({\n \"rId\": rid,\n \"filename\": rid_to_media[rid],\n \"para_idx\": para_idx,\n })\n # chart\n for chart_ref in elem.iter(f'{{{C_CHART_NS}}}chart'):\n rid = chart_ref.get(f'{{{R_NS}}}id')\n if rid and rid in rid_to_chart_png and rid not in seen_rids:\n seen_rids.add(rid)\n figures_out.append({\n \"rId\": rid,\n \"filename\": rid_to_chart_png[rid],\n \"para_idx\": para_idx,\n })\n\n result = {\n \"source\": docx_path.name,\n \"paragraphs\": paragraphs_out,\n \"tables\": tables_out,\n \"figures\": figures_out,\n }\n\n out_path = output_dir / f\"raw_{docx_path.stem}.json\"\n out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding='utf-8')\n print(f\"✅ raw 提取完成 → {out_path}\")\n print(f\" {len(paragraphs_out)} 段落 {len(tables_out)} 表格 {len(figures_out)} 图片\")\n return result\n\n\nif __name__ == '__main__':\n if len(sys.argv) \u003c 2:\n print(\"用法: python extract_raw.py \u003cdocx文件> [输出目录]\")\n sys.exit(1)\n docx_file = sys.argv[1]\n out_dir = sys.argv[2] if len(sys.argv) > 2 else \"output\"\n extract_raw(docx_file, out_dir)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10376,"content_sha256":"7bf06c947252ea8bf04a8f131c63ba2a99157cd7a4e55852a6a1434b3cd4471b"},{"filename":"scripts/render.py","content":"#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\"\"\"\nrender.py - 将 parse_docx.py 输出的 JSON 渲染为 thuthesis LaTeX 项目\n用法: python3 render.py \u003cparsed.json> \u003coutput_dir>\n\"\"\"\n\nimport json\nimport os\nimport sys\nimport shutil\nimport re\nfrom pathlib import Path\nfrom jinja2 import Environment, FileSystemLoader\n\n# ── MBA 论文常见缩略语词典（兜底用） ────────────────────────────────────────\nMBA_ABBREV_DICT = {\n 'AI': '人工智能（Artificial Intelligence）',\n 'AWS': '亚马逊云服务（Amazon Web Services）',\n 'BCG': '波士顿咨询集团（Boston Consulting Group）',\n 'CAGE': '文化-行政-地理-经济距离框架（Cultural-Administrative-Geographic-Economic）',\n 'CEO': '首席执行官（Chief Executive Officer）',\n 'CFO': '首席财务官（Chief Financial Officer）',\n 'CTO': '首席技术官（Chief Technology Officer）',\n 'COO': '首席运营官（Chief Operating Officer）',\n 'CSR': '企业社会责任（Corporate Social Responsibility）',\n 'CVC': '企业风险投资（Corporate Venture Capital）',\n 'DC': '动态能力（Dynamic Capabilities）',\n 'DCS': '分散控制系统（Distributed Control System）',\n 'ERP': '企业资源计划（Enterprise Resource Planning）',\n 'ESG': '环境、社会和治理（Environmental, Social and Governance）',\n 'FDI': '对外直接投资（Foreign Direct Investment）',\n 'GDP': '国内生产总值（Gross Domestic Product）',\n 'IoT': '物联网（Internet of Things）',\n 'IP': '知识产权（Intellectual Property）',\n 'IPO': '首次公开募股（Initial Public Offering）',\n 'KPI': '关键绩效指标（Key Performance Indicator）',\n 'MAD': '试验数据互认（Mutual Acceptance of Data）',\n 'MBA': '工商管理硕士（Master of Business Administration）',\n 'MES': '制造执行系统（Manufacturing Execution System）',\n 'ML': '机器学习（Machine Learning）',\n 'MNC': '跨国公司（Multinational Corporation）',\n 'MNE': '跨国企业（Multinational Enterprise）',\n 'NPD': '新产品开发（New Product Development）',\n 'OLI': '所有权-区位-内部化范式（Ownership-Location-Internalization Paradigm）',\n 'PEST': '政治、经济、社会、技术分析法（Political-Economic-Social-Technological）',\n 'PLC': '可编程逻辑控制器（Programmable Logic Controller）',\n 'R&D': '研究与开发（Research and Development）',\n 'ROE': '净资产收益率（Return on Equity）',\n 'ROI': '投资回报率（Return on Investment）',\n 'SaaS': '软件即服务（Software as a Service）',\n 'SBU': '战略业务单元（Strategic Business Unit）',\n 'SCM': '供应链管理（Supply Chain Management）',\n 'SIS': '安全仪表系统（Safety Instrumented System）',\n 'SME': '中小企业（Small and Medium-sized Enterprise）',\n 'SWOT': '优势、劣势、机会、威胁分析法（Strengths-Weaknesses-Opportunities-Threats）',\n 'VUCA': '易变性、不确定性、复杂性、模糊性（Volatility, Uncertainty, Complexity, Ambiguity）',\n 'VRIO': '价值、稀缺、难以模仿、组织支撑框架（Value, Rarity, Inimitability, Organization）',\n}\n\ndef extract_abbreviations(data: dict) -> dict:\n \"\"\"\n 从论文 JSON 数据中自动提取缩略语，返回 {缩写: 解释} 字典。\n 策略：\n 1. 扫描全文，匹配 \"XXX（全称）\" 和 \"全称（XXX）\" 两种模式\n 2. 用 MBA_ABBREV_DICT 兜底：凡是全文出现过的缩写，补充其解释\n \"\"\"\n # 收集全文\n texts = []\n texts.append(data.get('abstract_cn', ''))\n texts.append(data.get('abstract_en', ''))\n for ch in data.get('chapters', []):\n for item in ch.get('content', []):\n if item.get('type') == 'text':\n texts.append(item.get('content', ''))\n elif item.get('type') == 'section':\n texts.append(item.get('title', ''))\n full_text = ' '.join(texts)\n\n found = {}\n\n # 模式1: 英文缩写（2-8位大写）+ 括号中文解释\n # 要求括号内是中文主导（至少含2个汉字），限制长度避免截入上下文\n for m in re.finditer(\n r'\\b([A-Z][A-Z0-9&]{1,7})\\s*[（(]([\\u4e00-\\u9fff][^）)]{2,30})[）)]',\n full_text\n ):\n abbr, expansion = m.group(1), m.group(2).strip()\n if abbr not in found:\n found[abbr] = expansion\n\n # 模式2: 中文名称（纯中文，4-20字）+ 括号英文缩写（反向定义）\n for m in re.finditer(\n r'([\\u4e00-\\u9fff]{2,15})\\s*[（(]([A-Z][A-Z0-9&]{1,7})[）)]',\n full_text\n ):\n expansion, abbr = m.group(1).strip(), m.group(2)\n if abbr not in found:\n found[abbr] = expansion\n\n # 模式3: 英文全称（首字母大写，3个以上单词）+ 括号英文缩写\n for m in re.finditer(\n r'([A-Z][a-z]+(?:\\s+[A-Za-z]+){2,8})\\s*\$([A-Z][A-Z0-9&]{1,7})\$',\n full_text\n ):\n expansion, abbr = m.group(1).strip(), m.group(2)\n # 校验：缩写应与全称首字母对应（简单校验）\n words = expansion.split()\n initials = ''.join(w[0].upper() for w in words if w[0].isupper())\n if abbr not in found and (abbr in initials or len(abbr) \u003c= 3):\n found[abbr] = expansion\n\n # 兜底：词典中有，且全文出现过的缩写；词典解释优先覆盖正则提取结果\n for abbr, explanation in MBA_ABBREV_DICT.items():\n pattern = r'\\b' + re.escape(abbr) + r'\\b'\n if re.search(pattern, full_text):\n found[abbr] = explanation # 词典优先，直接覆盖\n\n # 过滤：去掉解释为纯中文但不像解释的条目（如 \"PEST\" 解释成 \"宏观环境分析\"）\n # 保留词典里的原始解释，不被错误解析覆盖\n filtered = {}\n for k, v in found.items():\n if len(k) \u003c 2 or k.isdigit():\n continue\n # 如果解释太短（少于3个字符），跳过\n if len(v) \u003c 3:\n continue\n filtered[k] = v\n\n return dict(sorted(filtered.items()))\n\n\n# 排除列表：太短、太通用、不是缩略语的大写词\n_ABBREV_STOPWORDS = {\n 'A', 'I', 'IT', 'OK', 'NO', 'IS', 'OR', 'AND', 'THE', 'FOR',\n 'BUT', 'IN', 'ON', 'AT', 'BY', 'TO', 'AS', 'OF', 'DO', 'IF',\n 'BE', 'US', 'AN', 'SO', 'UP',\n}\n\ndef fix_orphan_abbrevs(data: dict) -> tuple:\n \"\"\"\n 检测正文中\"孤儿缩略语\"（出现≥2次但从未给出解释的缩略语），\n 在其**第一次出现处**插入 \"XXX（待补充全称）\"，并加入缩略语表。\n\n 返回: (updated_data, all_abbrevs)\n - updated_data: 正文已补写括号注释的 data（深拷贝）\n - all_abbrevs: 完整缩略语字典（已知 + 孤儿）\n \"\"\"\n import copy\n data = copy.deepcopy(data)\n\n # 1. 先提取已识别缩略语\n known = extract_abbreviations(data)\n\n # 2. 收集全文，统计大写词频次\n def iter_text_items(data):\n \"\"\"遍历所有文本段落，yield (chapter_idx, item_idx, text)\"\"\"\n for ci, ch in enumerate(data.get('chapters', [])):\n for ii, item in enumerate(ch.get('content', [])):\n if item.get('type') == 'text':\n yield ci, ii, item.get('content', '')\n\n full_text = ' '.join(t for _, _, t in iter_text_items(data))\n full_text += ' ' + data.get('abstract_cn', '') + ' ' + data.get('abstract_en', '')\n\n # 统计所有大写词频次\n upper_words = re.findall(r'\\b([A-Z][A-Z0-9&]{1,7})\\b', full_text)\n from collections import Counter\n freq = Counter(upper_words)\n\n # 3. 找孤儿：出现≥2次，不在已知，不在词典，不在停用词\n all_known_keys = set(known.keys()) | set(MBA_ABBREV_DICT.keys())\n orphans = {\n w for w, cnt in freq.items()\n if cnt >= 2\n and w not in all_known_keys\n and w not in _ABBREV_STOPWORDS\n and len(w) >= 2\n }\n\n if orphans:\n print(f' 发现 {len(orphans)} 个孤儿缩略语: {sorted(orphans)}')\n\n # 4. 对每个孤儿，在正文第一次出现处插入括号说明\n inserted = set()\n for ci, ch in enumerate(data.get('chapters', [])):\n for ii, item in enumerate(ch['content']):\n if item.get('type') != 'text':\n continue\n text = item['content']\n modified = text\n for abbr in sorted(orphans): # 排序保证处理顺序确定\n if abbr in inserted:\n continue\n # 检查此段落是否含该缩略语（词边界）\n pat = r'\\b' + re.escape(abbr) + r'\\b'\n if re.search(pat, modified):\n # 第一次出现：替换为 \"XXX（待补充全称）\"\n replacement = f'{abbr}（待补充全称）'\n modified = re.sub(pat, replacement, modified, count=1)\n inserted.add(abbr)\n item['content'] = modified\n\n # 5. 合并孤儿到缩略语字典（标记为待补充）\n orphan_abbrevs = {\n abbr: '待补充全称'\n for abbr in sorted(orphans)\n }\n all_abbrevs = dict(sorted({**known, **orphan_abbrevs}.items()))\n\n return data, all_abbrevs\n\n\n# ── LaTeX 特殊字符转义 ──────────────────────────────────────────────────────\nLATEX_ESCAPE = [\n ('\\\\', r'\\textbackslash{}'),\n ('&', r'\\&'),\n ('%', r'\\%'),\n ('

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, r'\\

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

),\n ('#', r'\\#'),\n ('_', r'\\_'),\n ('{', r'\\{'),\n ('}', r'\\}'),\n ('~', r'\\textasciitilde{}'),\n ('^', r'\\textasciicircum{}'),\n]\n\ndef escape_latex(text: str) -> str:\n \"\"\"转义 LaTeX 特殊字符，保留已有的 LaTeX 命令\"\"\"\n if not text:\n return ''\n # 先处理反斜杠（避免二次转义）\n result = text.replace('\\\\', r'\\textbackslash{}')\n for char, replacement in LATEX_ESCAPE[1:]:\n result = result.replace(char, replacement)\n return result\n\ndef clean_caption(text: str) -> str:\n \"\"\"去掉 caption 里的图/表编号前缀，如'图1-1 '、'表4-1 '，让 thuthesis 自动编号\"\"\"\n return re.sub(r'^[图表]\\s*\\d+[-–—]\\d+\\s*', '', text.strip())\n\ndef convert_citations(text: str, mapping: dict = None, ay_lookup: dict = None) -> str:\n \"\"\"把正文中的引用标记转换为 LaTeX \\\\cite{key}\n\n 支持两种引用格式：\n 1. 数字引用 [N] / [N,M] / [N-M]\n mapping: {编号(int): bibtex_key}，若为 None 则用 refNNN 格式（向后兼容）\n [10] → \\\\cite{key10}\n [1,2,3] → \\\\cite{key1,key2,key3}\n [1-3] → \\\\cite{key1,key2,key3}（展开范围）\n 不转换：[图X] [表X] 等非纯数字方括号内容\n\n 2. Author-year 行文引用（文献综述常见，如曹玉（2025）/ Smith (2023)）\n ay_lookup: {(surname_lower, year): bibtex_key}\n 匹配后在年份括号后插入 \\\\cite{key}，保留原文作者名\n 例：曹玉（2025）分析了... → 曹玉（2025）\\\\cite{cao2025...}分析了...\n \"\"\"\n def _expand(m):\n inner = m.group(1).strip()\n # 跳过含中文或字母的方括号（如[图1]、[附录A]）\n if re.search(r'[^\\d,\\-\\s]', inner):\n return m.group(0)\n # 范围展开：1-3 → 1,2,3\n range_m = re.match(r'^(\\d+)\\s*[-–]\\s*(\\d+)

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, inner)\n if range_m:\n start, end = int(range_m.group(1)), int(range_m.group(2))\n nums = list(range(start, end + 1))\n else:\n nums = [int(n.strip()) for n in re.split(r'[,，]', inner) if n.strip().isdigit()]\n if not nums:\n return m.group(0)\n if mapping:\n keys = ','.join(mapping.get(n, f'ref{n:03d}') for n in nums)\n else:\n keys = ','.join(f'ref{n:03d}' for n in nums)\n return f'\\\\cite{{{keys}}}'\n\n # Step 1: 数字引用 [N]\n text = re.sub(r'\\[([^\\[\\]]+)\\]', _expand, text)\n\n # Step 2: Author-year 行文引用\n if ay_lookup:\n text = _convert_author_year_citations(text, ay_lookup)\n\n return text\n\n\n# 拼音姓氏表（用于 author-year lookup key 生成）\n_PINYIN_MAP = {\n '赵': 'zhao', '钱': 'qian', '孙': 'sun', '李': 'li', '周': 'zhou',\n '吴': 'wu', '郑': 'zheng', '王': 'wang', '冯': 'feng', '陈': 'chen',\n '褚': 'chu', '卫': 'wei', '蒋': 'jiang', '沈': 'shen', '韩': 'han',\n '杨': 'yang', '朱': 'zhu', '秦': 'qin', '尤': 'you', '许': 'xu',\n '何': 'he', '吕': 'lv', '施': 'shi', '张': 'zhang', '孔': 'kong',\n '曹': 'cao', '严': 'yan', '华': 'hua', '金': 'jin', '魏': 'wei',\n '陶': 'tao', '姜': 'jiang', '戚': 'qi', '谢': 'xie', '邹': 'zou',\n '喻': 'yu', '柏': 'bai', '水': 'shui', '窦': 'dou', '章': 'zhang',\n '云': 'yun', '苏': 'su', '潘': 'pan', '葛': 'ge', '奚': 'xi',\n '范': 'fan', '彭': 'peng', '郎': 'lang', '鲁': 'lu', '韦': 'wei',\n '昌': 'chang', '马': 'ma', '苗': 'miao', '凤': 'feng', '花': 'hua',\n '方': 'fang', '俞': 'yu', '任': 'ren', '袁': 'yuan', '柳': 'liu',\n '鲍': 'bao', '史': 'shi', '唐': 'tang', '费': 'fei', '薛': 'xue',\n '雷': 'lei', '贺': 'he', '倪': 'ni', '汤': 'tang', '滕': 'teng',\n '殷': 'yin', '罗': 'luo', '毕': 'bi', '郝': 'hao', '安': 'an',\n '常': 'chang', '于': 'yu', '时': 'shi', '傅': 'fu', '皮': 'pi',\n '卞': 'bian', '齐': 'qi', '康': 'kang', '伍': 'wu', '余': 'yu',\n '黄': 'huang', '穆': 'mu', '萧': 'xiao', '尹': 'yin', '姚': 'yao',\n '邵': 'shao', '汪': 'wang', '祁': 'qi', '毛': 'mao', '贝': 'bei',\n '明': 'ming', '计': 'ji', '成': 'cheng', '戴': 'dai', '谈': 'tan',\n '宋': 'song', '庞': 'pang', '熊': 'xiong', '纪': 'ji', '舒': 'shu',\n '屈': 'qu', '项': 'xiang', '祝': 'zhu', '董': 'dong', '梁': 'liang',\n '杜': 'du', '阮': 'ruan', '蓝': 'lan', '闵': 'min', '席': 'xi',\n '季': 'ji', '麻': 'ma', '强': 'qiang', '贾': 'jia', '路': 'lu',\n '江': 'jiang', '童': 'tong', '颜': 'yan', '郭': 'guo', '梅': 'mei',\n '盛': 'sheng', '林': 'lin', '钟': 'zhong', '徐': 'xu', '邱': 'qiu',\n '骆': 'luo', '高': 'gao', '夏': 'xia', '蔡': 'cai', '田': 'tian',\n '樊': 'fan', '胡': 'hu', '凌': 'ling', '霍': 'huo', '万': 'wan',\n '柯': 'ke', '管': 'guan', '卢': 'lu', '莫': 'mo', '房': 'fang',\n '干': 'gan', '解': 'xie', '宗': 'zong', '丁': 'ding', '宣': 'xuan',\n '邓': 'deng', '郁': 'yu', '单': 'shan', '杭': 'hang', '洪': 'hong',\n '包': 'bao', '诸': 'zhu', '左': 'zuo', '石': 'shi', '崔': 'cui',\n '吉': 'ji', '龚': 'gong', '翟': 'zhai', '付': 'fu', '葛': 'ge',\n '耿': 'geng', '高': 'gao', '刘': 'liu', '欧': 'ou', '肖': 'xiao',\n '陆': 'lu', '魏': 'wei', '吴': 'wu', '程': 'cheng', '谢': 'xie',\n '赖': 'lai', '钱': 'qian', '武': 'wu', '韩': 'han', '谭': 'tan',\n}\n\n\ndef _surname_to_pinyin(surname: str) -> str:\n \"\"\"把中文姓（单字或双字复姓）转换为拼音（小写），用于 ay_lookup key\"\"\"\n if not surname:\n return ''\n # 双字复姓\n two = _PINYIN_MAP.get(surname[:2], '')\n if two:\n return two\n return _PINYIN_MAP.get(surname[0], surname[0].lower())\n\n\ndef _convert_author_year_citations(text: str, ay_lookup: dict) -> str:\n \"\"\"把正文中的 author-year 行文引用替换为 author\\\\cite{key}year 格式。\n\n 支持的格式：\n - 中文作者全角括号：曹玉（2025）/ 葛文婕和任海全（2025）/ 杜盼盼和张倩妹（2025）\n - 英文作者半角括号：Smith (2023) / BiyuTang (2023)\n - CamelCase 拼音名：BiyuTang（2023）→ 提取最后一个词作为姓\n - 多作者：第一作者等（2025）/ Smith et al. (2023)\n\n 替换策略：保留原始文字，在右括号后紧接插入 \\\\cite{key}\n 例：曹玉（2025）→ 曹玉（2025）\\\\cite{cao2025aigcnews}\n \"\"\"\n result = []\n pos = 0\n\n # 匹配 author-year 模式：\n # Group 1: 作者文字部分（中文/英文/CamelCase拼音）\n # Group 2: 左括号（全角/半角）\n # Group 3: 年份 4位数字\n # Group 5: 右括号（全角/半角）\n pattern = re.compile(\n r'((?:[\\u4e00-\\u9fff]{1,6}(?:[和与及、][\\u4e00-\\u9fff]{1,4})*(?:等)?)' # 中文作者(含多作者和\"等\")\n r'|(?:[A-Z][a-z]+(?:[A-Z][a-z]+)+)' # CamelCase 拼音名（如 BiyuTang）\n r'|(?:[A-Z][a-zA-Z]+(?:\\s+(?:et\\s+al\\.?|and\\s+[A-Z][a-zA-Z]+))*)' # 普通英文作者\n r')'\n r'([（(])' # 左括号\n r'((19|20)\\d{2})' # 年份\n r'([）)])' # 右括号\n )\n\n for m in pattern.finditer(text):\n author_text = m.group(1)\n year = m.group(3)\n full_match = m.group(0)\n match_end = m.end()\n\n # 提取第一作者姓氏用于查找\n zh_m = re.match(r'^([\\u4e00-\\u9fff]{1,3})', author_text)\n camel_m = re.match(r'^([A-Z][a-z]+(?:[A-Z][a-z]+)+)

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, author_text)\n en_m = re.match(r'^([A-Z][a-zA-Z]+)', author_text)\n\n key = None\n if zh_m:\n # 中文：取姓（第一个字）→ 拼音查\n surname = zh_m.group(1)[0]\n pinyin = _surname_to_pinyin(surname)\n key = ay_lookup.get((pinyin, year)) or ay_lookup.get((surname, year))\n elif camel_m:\n # CamelCase 拼音名（如 BiyuTang）：\n # 先尝试最后一个词（姓），再尝试第一个词（名，用于\"名在前\"格式如 Biyu Tang）\n words = re.findall(r'[A-Z][a-z]+', author_text)\n if words:\n # 尝试最后一个词作姓\n key = ay_lookup.get((words[-1].lower(), year))\n if not key and len(words) > 1:\n # 尝试第一个词（名前置情况：Biyu Tang → biyu）\n key = ay_lookup.get((words[0].lower(), year))\n if not key:\n # 尝试所有词\n for w in words:\n key = ay_lookup.get((w.lower(), year))\n if key:\n break\n elif en_m:\n # 普通英文：第一个词（姓）\n en_name = en_m.group(1).lower()\n en_name = re.sub(r'[^a-z]', '', en_name)\n key = ay_lookup.get((en_name, year))\n\n # 追加原文，若找到 key 则在右括号后插入 \\cite\n result.append(text[pos:m.start()])\n if key:\n result.append(full_match + f'\\\\cite{{{key}}}')\n else:\n result.append(full_match)\n pos = match_end\n\n result.append(text[pos:])\n return ''.join(result)\n\ndef escape_meta(text: str) -> str:\n \"\"\"元数据字段的轻量转义（标题、姓名等，保留中文标点）\"\"\"\n if not text:\n return ''\n # 只转义 & % # _ 等，不转义括号（中文括号无需转义）\n for char, replacement in [('&', r'\\&'), ('%', r'\\%'), ('#', r'\\#')]:\n text = text.replace(char, replacement)\n return text\n\n\ndef strip_section_number(title: str) -> str:\n \"\"\"剔除节标题里的原始章节编号前缀，避免 LaTeX 自动编号与原文编号叠加。\n\n 例：\n '3.1信托行业...' → '信托行业...'\n '4.2.1运营体系搭建' → '运营体系搭建'\n '3.2.1运营类模式' → '运营类模式'\n '4.5.2.1公司层面优化' → '公司层面优化'\n '图4.2 W信托...' → '图4.2 W信托...' （图题不处理）\n 'W信托基本情况' → 'W信托基本情况' （无前缀，不变）\n \"\"\"\n if not title:\n return title\n # 匹配形如 \"3.\" / \"3.1\" / \"3.1.1\" / \"3.1.1.1\" 开头的编号（仅数字+点）\n # 后面必须跟非数字字符（汉字、字母等），避免把\"3.14...\"这类误删\n m = re.match(r'^(\\d+(?:\\.\\d+)*)\\s*(?=[^\\d\\s.])', title)\n if m:\n return title[m.end():]\n return title\n\n# ── BibTeX 生成 ─────────────────────────────────────────────────────────────\n\ndef _make_bib_key(ref_text: str, idx: int, used_keys: set) -> str:\n \"\"\"从参考文献原文生成 BibTeX key，格式：作者姓年份关键词\"\"\"\n text = re.sub(r'^\\[\\d+\\]\\s*', '', ref_text.strip())\n\n # 提取年份\n year_m = re.search(r'\\b(19|20)\\d{2}\\b', text)\n year = year_m.group(0) if year_m else 'nodate'\n\n # 提取第一作者姓（英文：取逗号前 / 中文：取前2字）\n # 英文模式：Lastname, F. 或 Lastname F.\n en_m = re.match(r'([A-Z][a-zA-ZÀ-ÿ\\-]+),?\\s+[A-Z]', text)\n # 中文模式：开头是中文字符\n zh_m = re.match(r'^([\\u4e00-\\u9fff]{2,3})[、，,\\s（(]', text)\n # 机构名（全大写英文单词）\n org_m = re.match(r'^([A-Z]{2,})[.\\s]', text)\n\n if zh_m:\n # 中文作者：转拼音首字母（简单方案：直接用字符unicode位置做hash，或保留汉字）\n author_part = zh_m.group(1).lower()\n # 简单拼音映射（常见姓）\n pinyin_map = {\n '赵': 'zhao', '钱': 'qian', '孙': 'sun', '李': 'li', '周': 'zhou',\n '吴': 'wu', '郑': 'zheng', '王': 'wang', '冯': 'feng', '陈': 'chen',\n '褚': 'chu', '卫': 'wei', '蒋': 'jiang', '沈': 'shen', '韩': 'han',\n '杨': 'yang', '朱': 'zhu', '秦': 'qin', '尤': 'you', '许': 'xu',\n '何': 'he', '吕': 'lv', '施': 'shi', '张': 'zhang', '孔': 'kong',\n '曹': 'cao', '严': 'yan', '华': 'hua', '金': 'jin', '魏': 'wei',\n '陶': 'tao', '姜': 'jiang', '戚': 'qi', '谢': 'xie', '邹': 'zou',\n '喻': 'yu', '柏': 'bai', '水': 'shui', '窦': 'dou', '章': 'zhang',\n '云': 'yun', '苏': 'su', '潘': 'pan', '葛': 'ge', '奚': 'xi',\n '范': 'fan', '彭': 'peng', '郎': 'lang', '鲁': 'lu', '韦': 'wei',\n '昌': 'chang', '马': 'ma', '苗': 'miao', '凤': 'feng', '花': 'hua',\n '方': 'fang', '俞': 'yu', '任': 'ren', '袁': 'yuan', '柳': 'liu',\n '酆': 'feng', '鲍': 'bao', '史': 'shi', '唐': 'tang', '费': 'fei',\n '廉': 'lian', '岑': 'cen', '薛': 'xue', '雷': 'lei', '贺': 'he',\n '倪': 'ni', '汤': 'tang', '滕': 'teng', '殷': 'yin', '罗': 'luo',\n '毕': 'bi', '郝': 'hao', '邬': 'wu', '安': 'an', '常': 'chang',\n '乐': 'le', '于': 'yu', '时': 'shi', '傅': 'fu', '皮': 'pi',\n '卞': 'bian', '齐': 'qi', '康': 'kang', '伍': 'wu', '余': 'yu',\n '元': 'yuan', '卜': 'bu', '顾': 'gu', '孟': 'meng', '平': 'ping',\n '黄': 'huang', '和': 'he', '穆': 'mu', '萧': 'xiao', '尹': 'yin',\n '姚': 'yao', '邵': 'shao', '湛': 'zhan', '汪': 'wang', '祁': 'qi',\n '毛': 'mao', '禹': 'yu', '狄': 'di', '米': 'mi', '贝': 'bei',\n '明': 'ming', '臧': 'zang', '计': 'ji', '伏': 'fu', '成': 'cheng',\n '戴': 'dai', '谈': 'tan', '宋': 'song', '茅': 'mao', '庞': 'pang',\n '熊': 'xiong', '纪': 'ji', '舒': 'shu', '屈': 'qu', '项': 'xiang',\n '祝': 'zhu', '董': 'dong', '梁': 'liang', '杜': 'du', '阮': 'ruan',\n '蓝': 'lan', '闵': 'min', '席': 'xi', '季': 'ji', '麻': 'ma',\n '强': 'qiang', '贾': 'jia', '路': 'lu', '娄': 'lou', '危': 'wei',\n '江': 'jiang', '童': 'tong', '颜': 'yan', '郭': 'guo', '梅': 'mei',\n '盛': 'sheng', '林': 'lin', '刁': 'diao', '钟': 'zhong', '徐': 'xu',\n '邱': 'qiu', '骆': 'luo', '高': 'gao', '夏': 'xia', '蔡': 'cai',\n '田': 'tian', '樊': 'fan', '胡': 'hu', '凌': 'ling', '霍': 'huo',\n '虞': 'yu', '万': 'wan', '支': 'zhi', '柯': 'ke', '昝': 'zan',\n '管': 'guan', '卢': 'lu', '莫': 'mo', '经': 'jing', '房': 'fang',\n '裘': 'qiu', '缪': 'miao', '干': 'gan', '解': 'xie', '应': 'ying',\n '宗': 'zong', '丁': 'ding', '宣': 'xuan', '贲': 'ben', '邓': 'deng',\n '郁': 'yu', '单': 'shan', '杭': 'hang', '洪': 'hong', '包': 'bao',\n '诸': 'zhu', '左': 'zuo', '石': 'shi', '崔': 'cui', '吉': 'ji',\n '钮': 'niu', '龚': 'gong',\n }\n surname = zh_m.group(1)[0]\n author_part = pinyin_map.get(surname, 'zh')\n # 只取姓的拼音，不保留剩余中文字符\n elif en_m:\n author_part = en_m.group(1).lower()\n # 去除特殊字符\n author_part = re.sub(r'[^a-z]', '', author_part)\n elif org_m:\n author_part = org_m.group(1).lower()\n else:\n author_part = f'ref{idx:03d}'\n\n # 提取标题关键词（前2个英文词或前3个中文字）\n # 去掉作者和年份后取标题\n title_part = re.sub(r'^[^a-zA-Z\\u4e00-\\u9fff]*', '', text)\n title_part = re.sub(r'\\b(19|20)\\d{2}\\b', '', title_part)\n en_words = re.findall(r'[a-zA-Z]+', title_part)\n # 取第一个有意义的词（跳过介词/连词）\n stopwords = {'a', 'an', 'the', 'of', 'in', 'on', 'at', 'to', 'for',\n 'and', 'or', 'but', 'with', 'from', 'by', 'how', 'what',\n 'its', 'as', 'is', 'are', 'be', 'an'}\n kw = next((w.lower() for w in en_words if w.lower() not in stopwords and len(w) > 2), '')\n if not kw:\n zh_words = re.findall(r'[\\u4e00-\\u9fff]{2,4}', title_part)\n kw = zh_words[0] if zh_words else ''\n\n key = f'{author_part}{year}{kw}'\n # BibTeX key 只允许 ASCII 字母数字，中文字符全部剥除\n key = re.sub(r'[^a-zA-Z0-9]', '', key)[:40]\n\n # 保证唯一性\n base_key = key\n suffix = 2\n while key in used_keys:\n key = f'{base_key}{suffix}'\n suffix += 1\n used_keys.add(key)\n return key\n\n\ndef _parse_ref_to_bibtex(ref_text: str, key: str) -> str:\n \"\"\"把一条参考文献原文解析为 BibTeX 条目（规则based）\"\"\"\n text = re.sub(r'^\\[\\d+\\]\\s*', '', ref_text.strip())\n text = text.replace(r'\\&', '&').replace(r'\\%', '%')\n\n # ── 优先检测 APA 机构引用格式：机构名. (Year). Title. ──\n # 例：高力国际. (2025). 2025 年酒店业展望报告.\n # 美国旅游协会（U.S. Travel Association）. (2024). U.S. Travel & Tourism Statistics\n # Washington State University Carson College of Business. (2024). Hospitality...\n # 德勤. (2024). 2024 年假期旅游调查.\n m_org_apa = re.match(r'^(.+?)\\.\\s*\$(19|20\\d{2})\$\\.\\s*(.+)', text.strip())\n if m_org_apa:\n org_candidate = m_org_apa.group(1).strip()\n year_org = m_org_apa.group(2)\n title_raw_org = m_org_apa.group(3).strip().rstrip('.')\n # 判断是否机构名（不是个人名）：\n # 个人名特征 = 含逗号 + 较短（如 \"Verma, Y\" \"Quach, T\"）\n # 机构名特征 = 无逗号，或纯汉字机构，或多词英文机构\n has_comma = ',' in org_candidate\n is_short_personal = has_comma and len(org_candidate) \u003c 25\n is_org = not is_short_personal\n if is_org:\n def _esc(s):\n return s.replace('&', r'\\&').replace('%', r'\\%')\n # 截取标题到第一个句点\n title_end_m = re.search(r'\\.\\s*(Retrieved|Available|http|DOI|doi|\\[)', title_raw_org)\n title_org = title_raw_org[:title_end_m.start()].strip() if title_end_m else title_raw_org\n title_org = title_org.rstrip('.,。')\n return (\n f'@misc{{{key},\\n'\n f' author = {{{{{_esc(org_candidate)}}}}},\\n'\n f' title = {{{_esc(title_org)}}},\\n'\n f' year = {{{year_org}}}\\n'\n f'}}'\n )\n\n # 判断是否中文条目（首字符是汉字）\n is_zh = bool(re.match(r'^[\\u4e00-\\u9fff]', text))\n\n # 全角标点规范化（Word 导出常见问题）\n text_norm = text\n if is_zh:\n # 中文条目：全角逗号/冒号 → 半角\n text_norm = text.replace('，', ',').replace('：', ':')\n text_norm = re.sub(r'[\"\\u201c\\u201d]', '\"', text_norm)\n else:\n # 英文条目：全角逗号 → \", \"（保持 \". \" 不变）\n text_norm = text.replace('，', ', ').replace('：', ': ')\n text_norm = re.sub(r'[\"\\u201c\\u201d]', '\"', text_norm)\n\n has_journal_marker = bool(re.search(r'\\[J\\]|\\[j\\]', text_norm))\n has_book_marker = bool(re.search(r'\\[M\\]|\\[m\\]|《|》|出版社|Press\\b|McGraw|Harvard Business Press', text_norm))\n has_online_marker = bool(re.search(r'\\[EB/OL\\]|\\[eb/ol\\]|EB/OL', text_norm))\n has_volume = bool(re.search(r',\\s*\\d+\\s*\$\\d+\$', text_norm))\n\n if has_online_marker:\n entry_type = 'misc'\n elif has_book_marker and not has_journal_marker and not has_volume:\n entry_type = 'book'\n else:\n entry_type = 'article'\n\n year_m = re.search(r'\\b(19|20)(\\d{2})\\b', text_norm)\n year = year_m.group(0) if year_m else ''\n\n # ── 提取作者和标题 ─────────────────────────────────────────────\n def _split_author_title(txt: str, zh: bool):\n \"\"\"返回 (author_raw, rest_after_separator)\"\"\"\n if zh:\n # 模式0（最高优先级）: 标准中文学术引用格式 \"作者. 标题[J/M/C/R/N/Z].\"\n # 格式：中文姓名（逗号分隔）+ \". \" + 标题（通常接[文献类型标识]或汉字）\n # 例: \"李卫华, 王雪. 药品价格...[J].\"\n # 例: \"国家医疗保障局. 2023年医疗保障事业...\"（标题含数字开头）\n # 特征：\".\" 后接汉字、数字+汉字、方括号、书名号\n m_std = re.match(\n r'^([\\u4e00-\\u9fff\\w·，,、\\s（）()]+?)' # 作者部分\n r'\\.\\s*' # 英文句点（可带空格）\n r'(?=[\\u4e00-\\u9fff\"「《〔\\[0-9])', # 后面是汉字/引号/方括号/数字\n txt\n )\n if m_std:\n author_candidate = m_std.group(1).strip()\n rest = txt[m_std.end():].strip()\n # 验证作者部分：必须含汉字，不能太长（>50字符通常是标题混入了）\n if re.search(r'[\\u4e00-\\u9fff]', author_candidate) and len(author_candidate) \u003c= 50:\n return author_candidate, rest\n\n # 模式1a: 作者,\"标题\" 格式（半角引号，如吴芳,\"标题\",）\n m0 = re.match(r'^([^,\"\"《]{1,20}),\\s*\"(.+?)\"', txt)\n if m0:\n return m0.group(1).strip(), m0.group(2).strip()\n\n # 模式1b: 作者，\"标题\" 格式（全角逗号+全角引号，如谢伟，\"技术学习...\"）\n m0b = re.match(r'^([\\u4e00-\\u9fff]{1,10}(?:[，,、]\\s*[\\u4e00-\\u9fff]{1,10})*)\\s*[，,]\\s*[\"\"「]', txt)\n if m0b:\n sep_pos = m0b.end() - 1\n return m0b.group(1).strip(), txt[sep_pos:].strip()\n\n # 模式2: 作者(年) 格式（如赵敏,宁振波 (2020),《书名》）\n m1 = re.match(r'^([\\u4e00-\\u9fff\\s,、·]+?)\\s*[(（]\\d{4}[)）]', txt)\n if m1:\n return m1.group(1).strip(), txt[m1.end():].lstrip(',、。. ')\n\n # 模式3: 中文句号分隔（全角）\n zh_dot = txt.find('。')\n if zh_dot > 0:\n return txt[:zh_dot].strip(), txt[zh_dot+1:].strip()\n\n # 模式4: 英文句点后面直接跟汉字或引号（无空格，如 \"唐未兵.战略转型\" 或 '.\"标题\"'）\n for m2 in re.finditer(r'\\.(?=[\\u4e00-\\u9fff\"\"「])', txt):\n pos = m2.start()\n return txt[:pos].strip(), txt[pos+1:].strip()\n\n # 模式5: \". \" 后跟大写字母（英文句号+空格，后跟大写）\n for m3 in re.finditer(r'\\. ', txt):\n pos = m3.start()\n after = txt[pos+2:pos+3]\n if after and after[0].isupper():\n return txt[:pos].strip(), txt[pos+2:].strip()\n\n # 模式6: 全角逗号后接《书名》（如谢伟，《战略管理》）\n m6 = re.match(r'^([\\u4e00-\\u9fff]{1,10}(?:[，,]\\s*[\\u4e00-\\u9fff]{1,10})*)\\s*[，,]\\s*[《]', txt)\n if m6:\n return m6.group(1).strip(), txt[m6.end()-1:].strip()\n\n return '', txt\n else:\n # 英文格式：先尝试匹配 \"作者列表 (年). 标题\" 模式\n # 这样能一步拿到完整 author（含年份括号前）和 title\n year_pattern = re.search(r'\$(19|20)\\d{2}\$\\. ', txt)\n if year_pattern:\n # author = 年份括号前的内容，rest = 年份括号后的 \". 标题...\"\n # 找年份括号开始位置\n yp_start = year_pattern.start()\n author_part = txt[:yp_start].strip().rstrip(',.')\n rest_part = txt[year_pattern.end():].strip()\n return author_part, rest_part\n\n # 模式2：\"作者.标题\" 格式（无空格，如 \"Biyu Tang.Analysis of...\"）\n # 匹配：点前是英文姓名（姓名格式），点后是大写字母\n for m in re.finditer(r'\\.(?=[A-Z])', txt):\n pos = m.start()\n before = txt[:pos]\n after = txt[pos+1:]\n # 确认点前是英文姓名（最后一个词是大写开头的姓，如 \"Tang\"）\n before_words = before.strip().split()\n if before_words and re.match(r'^[A-Z][a-z]+

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, before_words[-1]):\n # 不是缩写（缩写通常只有1-2个字符，如 \"J.\" \"M.\"）\n if len(before_words[-1]) > 2:\n return before.strip(), after.strip()\n\n # 兜底：找 \". \" 后面是大写字母（跳过缩写 K. M. J. A. 等）\n for m in re.finditer(r'\\. ', txt):\n pos = m.start()\n after = txt[pos+2:pos+25]\n if not after:\n continue\n first = after[0]\n if first.islower():\n continue\n if re.match(r'^[A-Z]\\. ', after):\n continue\n if first.isdigit():\n continue\n if re.match(r'^[A-Z][a-z]?\\.,', after):\n continue\n return txt[:pos].strip(), txt[pos+2:].strip()\n return '', txt\n\n author_raw, rest = _split_author_title(text_norm, is_zh)\n\n # 格式化 author\n if author_raw:\n if is_zh:\n parts = re.split(r'[、,]+', author_raw)\n author = ' and '.join(f'{{{p.strip()}}}' for p in parts if p.strip())\n else:\n author_raw = re.sub(r'\\s*[&]\\s*', ' and ', author_raw)\n author_raw = re.sub(r'[.,;(（\\s]+

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, '', author_raw)\n parts = re.split(r'\\s+and\\s+', author_raw, flags=re.IGNORECASE)\n author = ' and '.join(f'{{{p.strip()}}}' for p in parts if p.strip())\n else:\n author = ''\n\n # 提取标题\n type_marker = re.search(r'\\[[A-Z/]+\\]', rest)\n if type_marker:\n title = rest[:type_marker.start()].strip().rstrip('.,。，')\n else:\n first_dot = re.search(r'[.。]', rest)\n title = rest[:first_dot.start()].strip() if first_dot else rest[:100].strip()\n title = re.sub(r'[《》]', '', title).strip()\n\n # 期刊名、卷期页\n journal, volume, number, pages = '', '', '', ''\n if entry_type == 'article':\n vol_m = re.search(r',?\\s*(\\d+)\\s*[（(](\\d+[-–]\\d+|\\d+)[）)],?\\s*([\\d–-]+)', text_norm)\n if vol_m:\n volume = vol_m.group(1)\n number = vol_m.group(2)\n pages = re.sub(r'[-–]', '--', vol_m.group(3))\n j_start = text_norm.rfind('.', 0, vol_m.start())\n if j_start >= 0:\n journal = text_norm[j_start+1:vol_m.start()].strip().rstrip(',. ')\n\n publisher, address = '', ''\n if entry_type == 'book':\n pub_m = re.search(r'[:：]\\s*([^,.]+(?:Press|出版社|Publisher)[^,.]*)', text_norm)\n if pub_m:\n publisher = pub_m.group(1).strip()\n addr_m = re.search(r'([A-Z][a-z]+(?:\\s[A-Z][a-z]+)?)\\s*[:：]', text_norm)\n if addr_m:\n address = addr_m.group(1).strip()\n\n def _bib_escape(s: str) -> str:\n return s.replace('&', r'\\&') if s else s\n\n fields = []\n if author:\n fields.append(f' author = {{{_bib_escape(author)}}}')\n if title:\n fields.append(f' title = {{{_bib_escape(title)}}}')\n if journal:\n fields.append(f' journal = {{{_bib_escape(journal)}}}')\n if volume:\n fields.append(f' volume = {{{volume}}}')\n if number:\n fields.append(f' number = {{{number}}}')\n if pages:\n fields.append(f' pages = {{{pages}}}')\n if year:\n fields.append(f' year = {{{year}}}')\n if publisher:\n fields.append(f' publisher = {{{_bib_escape(publisher)}}}')\n if address:\n fields.append(f' address = {{{address}}}')\n if entry_type == 'misc':\n fields.append(f' note = {{[EB/OL]}}')\n\n body = ',\\n'.join(fields)\n return f'@{entry_type}{{{key},\\n{body}\\n}}'\n\ndef generate_bibtex(refs: list, output_dir: Path) -> tuple:\n \"\"\"把参考文献列表生成 ref/refs.bib，返回 (编号→key 映射字典, author-year lookup 字典)\n\n 返回：\n mapping: {编号(int): bibtex_key}\n ay_lookup: {(surname_pinyin_or_lower, year_str): bibtex_key}\n 用于正文中 author-year 行文引用的匹配\n \"\"\"\n ref_dir = output_dir / 'ref'\n ref_dir.mkdir(exist_ok=True)\n\n used_keys: set = set()\n mapping = {} # {编号(int): key}\n ay_lookup = {} # {(surname, year): key}\n entries = []\n\n for i, ref in enumerate(refs, start=1):\n key = _make_bib_key(ref, i, used_keys)\n mapping[i] = key\n entry = _parse_ref_to_bibtex(ref, key)\n entries.append(entry)\n\n # 构建 author-year lookup\n # 从 ref 原文提取：所有作者姓 + 年份（多作者情况都注册，以防正文引用非第一作者）\n text_clean = re.sub(r'^\\[\\d+\\]\\s*', '', ref.strip())\n year_m = re.search(r'\\b(19|20)\\d{2}\\b', text_clean)\n if year_m:\n year = year_m.group(0)\n # 提取作者段：截止到第一个 \". \" 或 \"。\"（句点+空格，排除姓名缩写中的点）\n # 中文格式：曹玉,肖华.标题英文格式：Smith J, Tang B. Title\n author_segment = re.split(r'\\.\\s+[^\\d]|[.。]\\s', text_clean)[0]\n # 中文作者：仅匹配2-4字的姓名（避免把标题词当作者）\n zh_authors = re.findall(r'[\\u4e00-\\u9fff]{2,4}(?=\\s*[,，、和与&以]|\\s*$|\\s*等)', author_segment)\n if not zh_authors:\n # 兜底：取首字段（逗号或顿号前）的中文词\n first_author_seg = re.split(r'[,，、和与&]', author_segment)[0].strip()\n zh_authors = re.findall(r'[\\u4e00-\\u9fff]{2,4}', first_author_seg)\n if zh_authors:\n for zh_name in zh_authors[:4]: # 最多取前4个作者\n surname_char = zh_name[0]\n pinyin = _surname_to_pinyin(surname_char)\n if (pinyin, year) not in ay_lookup:\n ay_lookup[(pinyin, year)] = key\n if (surname_char, year) not in ay_lookup:\n ay_lookup[(surname_char, year)] = key\n else:\n # 英文/拼音作者（单词序列，逗号分隔）\n # 支持：Biyu Tang, Smith J, BiyuTang 等格式\n en_authors = re.findall(r'[A-Z][a-zA-ZÀ-ÿ\\-]+', author_segment)\n for en_name in en_authors[:4]: # 最多取前4个作者\n name_lower = re.sub(r'[^a-z]', '', en_name.lower())\n if name_lower and (name_lower, year) not in ay_lookup:\n ay_lookup[(name_lower, year)] = key\n\n # 映射注释表\n map_lines = ['% ' + '=' * 58,\n '% 参考文献映射表（原编号 → BibTeX key）',\n '% ' + '=' * 58]\n for i, key in mapping.items():\n map_lines.append(f'% [{i:2d}] → {key}')\n map_lines.append('% ' + '=' * 58)\n map_lines.append('')\n\n bib_content = '\\n'.join(map_lines) + '\\n\\n'.join(entries) + '\\n'\n (ref_dir / 'refs.bib').write_text(bib_content, encoding='utf-8')\n return mapping, ay_lookup\n\n\n# ── 章节渲染 ────────────────────────────────────────────────────────────────\nCMD_MAP = {1: 'chapter', 2: 'section', 3: 'subsection', 4: 'subsubsection'}\n\ndef render_content_items(items: list, cite_mapping: dict = None, ay_lookup: dict = None) -> str:\n \"\"\"把章节的 content 列表渲染成 LaTeX 代码\"\"\"\n lines = []\n for item in items:\n t = item.get('type', '')\n if t == 'section':\n lvl = item.get('level', 2)\n cmd = CMD_MAP.get(lvl, 'paragraph')\n title = escape_meta(strip_section_number(item.get('title', '')))\n lines.append(f'\\n\\\\{cmd}{{{title}}}\\n')\n elif t == 'text':\n content = item.get('content', '').strip()\n if content:\n lines.append(convert_citations(escape_latex(content), cite_mapping, ay_lookup) + '\\n')\n elif t == 'table':\n lines.append(render_table(item))\n elif t == 'list_item':\n content = convert_citations(escape_latex(item.get('content', '')), cite_mapping, ay_lookup)\n lines.append(f'\\\\item {content}')\n elif t == 'list_start':\n lines.append('\\\\begin{itemize}')\n elif t == 'list_end':\n lines.append('\\\\end{itemize}\\n')\n return '\\n'.join(lines)\n\ndef render_table(item: dict) -> str:\n \"\"\"把表格数据渲染为 thuthesis 标准三线表（booktabs + tabularx，无竖线）\n\n 列宽策略：\n - 分析各列最大内容长度（字符数），按比例分配 tabularx X 列宽\n - 短列（序号、日期等）用 l（左对齐固定宽）；长列用 X（自动拉伸）\n - 至少有一列为 X（tabularx 要求）\n \"\"\"\n rows = item.get('rows', [])\n if not rows:\n return ''\n caption = escape_latex(item.get('caption', '').strip())\n ncols = max(len(r) for r in rows)\n\n # 计算每列最大内容长度\n col_max_lens = []\n for ci in range(ncols):\n max_len = 0\n for row in rows:\n if ci \u003c len(row):\n cell_text = str(row[ci])\n # 汉字算2个字符宽度\n char_width = sum(2 if '\\u4e00' \u003c= c \u003c= '\\u9fff' else 1 for c in cell_text)\n max_len = max(max_len, char_width)\n col_max_lens.append(max(max_len, 1))\n\n # 判断哪些列用固定宽（l），哪些用 X（自动拉伸）\n # 规则：列最大宽度 \u003c= 12 字符等效 → 固定宽；其余用 X\n SHORT_THRESHOLD = 12\n x_count = sum(1 for l in col_max_lens if l > SHORT_THRESHOLD)\n if x_count == 0:\n # 全是短列，最长那列改为 X\n x_count = 1\n max_idx = col_max_lens.index(max(col_max_lens))\n col_specs = ['l'] * ncols\n col_specs[max_idx] = 'X'\n else:\n col_specs = ['X' if l > SHORT_THRESHOLD else 'l' for l in col_max_lens]\n\n col_spec = ' '.join(col_specs)\n\n lines = ['']\n if caption:\n lines += [\n '\\\\begin{table}[htbp]',\n f' \\\\caption{{{caption}}}',\n ' \\\\label{tab:auto}',\n f' \\\\begin{{tabularx}}{{\\\\linewidth}}{{{col_spec}}}',\n ' \\\\toprule',\n ]\n else:\n lines += [\n '\\\\begin{table}[htbp]',\n f' \\\\begin{{tabularx}}{{\\\\linewidth}}{{{col_spec}}}',\n ' \\\\toprule',\n ]\n for i, row in enumerate(rows):\n cells = [escape_latex(str(c)) for c in row]\n while len(cells) \u003c ncols:\n cells.append('')\n lines.append(' ' + ' & '.join(cells) + r' \\\\')\n if i == 0: # 表头后加 midrule\n lines.append(' \\\\midrule')\n lines += [\n ' \\\\bottomrule',\n ' \\\\end{tabularx}',\n '\\\\end{table}',\n '',\n ]\n return '\\n'.join(lines)\n\n# ── 主渲染逻辑 ───────────────────────────────────────────────────────────────\ndef render_project(json_path: str, output_dir: str):\n # 1. 加载 JSON\n with open(json_path, encoding='utf-8') as f:\n data = json.load(f)\n\n output_dir = Path(output_dir)\n output_dir.mkdir(parents=True, exist_ok=True)\n\n # 2. 从 thuthesis 源目录复制类文件和基础资源\n # 查找顺序：\n # 1. 环境变量 THUTHESIS_DIR\n # 2. setup.sh 拉取的位置 /tmp/thuthesis-latest\n # 3. 脚本同目录下的 thuthesis/（本地开发用）\n _candidates = [\n os.environ.get('THUTHESIS_DIR', ''),\n '/tmp/thuthesis-latest',\n str(Path(__file__).parent.parent / 'thuthesis'),\n ]\n src_base = None\n for _c in _candidates:\n if _c and Path(_c).is_dir() and (Path(_c) / 'thuthesis.cls').exists():\n src_base = Path(_c)\n break\n if src_base is None:\n print(\"❌ 找不到 thuthesis 源文件！请先运行：\")\n print(\" bash \u003cskill目录>/scripts/setup.sh \u003cskill目录>\")\n print(\" 或设置环境变量 THUTHESIS_DIR 指向 thuthesis 目录\")\n sys.exit(1)\n cls_files = [\n 'thuthesis.cls', 'thuthesis.dtx', 'thuthesis.ins',\n 'thuthesis-numeric.bst', 'thuthesis-author-year.bst', 'thuthesis-bachelor.bst',\n 'thuthesis-numeric.bbx', 'thuthesis-author-year.bbx', 'thuthesis-bachelor.bbx',\n 'thuthesis-numeric.cbx', 'thuthesis-author-year.cbx', 'thuthesis-bachelor.cbx',\n 'thuthesis-inline.cbx',\n 'thu-fig-logo.pdf', 'thu-text-logo.pdf',\n 'dtx-style.sty',\n ]\n for f in cls_files:\n src = src_base / f\n if src.exists():\n shutil.copy2(src, output_dir / f)\n\n # 3. 创建子目录\n for d in ['data', 'figures', 'ref']:\n (output_dir / d).mkdir(exist_ok=True)\n\n # 复制提取出的图片到 LaTeX 项目的 figures/ 目录\n src_figures = Path(json_path).parent / 'figures' # output/figures/\n dst_figures = output_dir / 'figures'\n dst_figures.mkdir(exist_ok=True)\n if src_figures.exists():\n copied = 0\n for img in src_figures.iterdir():\n if img.is_file():\n shutil.copy2(img, dst_figures / img.name)\n copied += 1\n print(f'✅ figures/ ({copied} 张图片)')\n\n # 4. 初始化 Jinja2 环境\n # 模板目录查找顺序：\n # 1. scripts/../templates/ （docx2thu 开发目录结构）\n # 2. scripts/../assets/templates/ （skill 安装目录结构）\n _script_dir = Path(__file__).parent\n _templates_candidates = [\n _script_dir.parent / 'templates',\n _script_dir.parent / 'assets' / 'templates',\n ]\n templates_dir = next((p for p in _templates_candidates if p.is_dir()), None)\n if templates_dir is None:\n print(\"❌ 找不到模板目录！期望路径：\")\n for p in _templates_candidates:\n print(f\" {p}\")\n sys.exit(1)\n env = Environment(\n loader=FileSystemLoader(str(templates_dir)),\n keep_trailing_newline=True,\n trim_blocks=True,\n lstrip_blocks=True,\n )\n # 注册转义过滤器\n env.filters['escape_latex'] = escape_latex\n env.filters['escape_meta'] = escape_meta\n\n meta = data.get('meta', {})\n\n # title_en 超过200字符说明把摘要误当标题了，截断保护\n if meta.get('title_en') and len(meta['title_en']) > 200:\n # 取第一句（到第一个句号）\n first_sentence = meta['title_en'].split('.')[0].strip()\n meta['title_en'] = first_sentence if len(first_sentence) > 10 else meta['title_en'][:200]\n\n # 补全日期为 ISO 格式：\n # \"2025-11\" → \"2025-11-01\"\n # \"25-11\" → 补全年份前缀（当前世纪）→ \"2025-11-01\"\n # \"二○二六年四月\" → \"2026-04-01\"（build_parsed 已处理，此处双重兜底）\n date_str = meta.get('date', '')\n CN_DIGITS_R = {'○': '0', '〇': '0', '一': '1', '二': '2', '三': '3', '四': '4',\n '五': '5', '六': '6', '七': '7', '八': '8', '九': '9'}\n if date_str and re.search(r'[\\u4e00-\\u9fff○〇]', date_str):\n # 含中文字符，先处理复合月份再转阿拉伯数字\n d_pre = re.sub(r'十二月', '#12月', date_str)\n d_pre = re.sub(r'十一月', '#11月', d_pre)\n d_pre = re.sub(r'十月', '#10月', d_pre)\n d_arabic = ''.join(CN_DIGITS_R.get(c, c) for c in d_pre).replace('#', '')\n m = re.search(r'(\\d{4})\\s*年\\s*(\\d{1,2})\\s*月', d_arabic)\n if m:\n date_str = f\"{m.group(1)}-{int(m.group(2)):02d}\"\n meta['date'] = date_str\n if date_str:\n parts = date_str.split('-')\n if len(parts) == 2:\n yr, mo = parts\n if len(yr) == 2: # \"25\" → \"2025\"\n yr = '20' + yr\n meta['date'] = f\"{yr}-{mo}-01\"\n elif len(parts) == 1 and len(date_str) \u003c= 4:\n meta['date'] = '' # 无效日期清空\n\n # 日期兜底：如果为空，用当前年月\n if not meta.get('date'):\n import datetime\n today = datetime.date.today()\n meta['date'] = f\"{today.year}-{today.month:02d}-01\"\n print(f' ℹ️ 封面日期为空，使用当前年月：{meta[\"date\"]}')\n\n # 5. 渲染 thusetup.tex\n tmpl = env.get_template('thusetup.tex.j2')\n (output_dir / 'thusetup.tex').write_text(\n tmpl.render(meta=meta), encoding='utf-8'\n )\n print('✅ thusetup.tex')\n\n # 6. 渲染 abstract.tex\n # 修复：如果 keywords_en 是逐词拆散的（含停用词），从 abstract_en 末尾重新提取\n _EN_STOPWORDS = {'and', 'of', 'for', 'the', 'a', 'an', 'in', 'on', 'at',\n 'to', 'with', 'by', 'from', 'or', 'but', 'nor', 'as'}\n keywords_en = data.get('keywords_en', [])\n if keywords_en and any(w.lower() in _EN_STOPWORDS for w in keywords_en):\n # 检测到逐词拆散，尝试从 abstract_en 重新提取\n abstract_en_text = data.get('abstract_en', '')\n m_kw = re.search(r'[Kk]ey\\s*[Ww]ords[：:\\s]*(.+)

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, abstract_en_text.strip())\n if m_kw:\n kw_text = m_kw.group(1).strip()\n keywords_en = [k.strip() for k in re.split(r'[；;，,]+', kw_text) if k.strip()]\n print(f' ℹ️ 英文关键词已从 abstract 重新提取: {keywords_en}')\n\n tmpl = env.get_template('abstract.tex.j2')\n (output_dir / 'data' / 'abstract.tex').write_text(\n tmpl.render(\n abstract_cn=escape_latex(data.get('abstract_cn', '')),\n keywords_cn=[escape_latex(k) for k in data.get('keywords_cn', [])],\n abstract_en=escape_latex(data.get('abstract_en', '')),\n keywords_en=[escape_latex(k) for k in keywords_en],\n ), encoding='utf-8'\n )\n print('✅ data/abstract.tex')\n\n # 7. 孤儿缩略语检测 + 正文补写（必须在章节渲染之前，保证 data 只修改一次）\n data, abbrevs = fix_orphan_abbrevs(data)\n\n # 7b. 生成 BibTeX 映射（提前，章节渲染时用于 \\cite 替换）\n refs = data.get('references', [])\n cite_mapping, ay_lookup = generate_bibtex(refs, output_dir)\n print(f'✅ ref/refs.bib ({len(refs)} 条参考文献，author-year lookup: {len(ay_lookup)} 条)')\n\n # 8. 渲染每个章节（使用补写后的 data 和 cite_mapping）\n chapters_info = []\n for i, chap in enumerate(data.get('chapters', []), start=1):\n filename = f'chap{i:02d}'\n title = escape_meta(chap.get('title', ''))\n blocks = []\n for item in chap.get('content', []):\n t = item.get('type', '')\n if t == 'section':\n blocks.append({\n 'type': 'heading',\n 'level': item.get('level', 2),\n 'title': escape_meta(strip_section_number(item.get('title', ''))),\n })\n elif t == 'text':\n content = item.get('content', '').strip()\n # 跳过独立的图题/表题行（已被收入 figure/table 的 caption，避免重复）\n if re.match(r'^[图表]\\s*\\d+[-–—]\\d+', content):\n continue\n content = convert_citations(escape_latex(content), cite_mapping, ay_lookup)\n if content:\n blocks.append({'type': 'paragraph', 'text': content})\n elif t == 'figure':\n embed = item.get('embed', '')\n caption = item.get('caption', '')\n # ★ 优先从 item 自身的 path 字段获取文件名（parse_docx 新格式）\n # 兼容旧格式：从 data['figures'] 字典中查找\n fig_filename = ''\n if item.get('path'):\n fig_filename = Path(item['path']).name # 去掉 'figures/' 前缀\n else:\n fig_info = data.get('figures', {}).get(embed, {})\n fig_filename = fig_info.get('filename', '')\n if not caption:\n caption = fig_info.get('caption', '')\n if fig_filename:\n # 跳过 SVG（依赖 inkscape，可能不可用），只处理 PNG/JPG/PDF\n ext = Path(fig_filename).suffix.lower()\n if ext == '.svg':\n print(f' ⚠️ 跳过 SVG 图片: {fig_filename}（LaTeX 不直接支持，请手动转换）')\n else:\n cap_clean = clean_caption(caption)\n label_raw = re.sub(r'[^a-zA-Z0-9\\u4e00-\\u9fff]', '-', cap_clean)[:30]\n label = re.sub(r'-+', '-', label_raw).strip('-')\n blocks.append({\n 'type': 'figure',\n 'path': fig_filename,\n 'caption': escape_meta(cap_clean),\n 'label': label,\n })\n elif t == 'table':\n rows = item.get('rows', [])\n caption = clean_caption(item.get('caption', ''))\n if not rows or item.get('caption', '').startswith('图'):\n continue\n # 直接用 render_table 生成完整 LaTeX，避免模板列数推断问题\n table_item = {\n 'rows': rows,\n 'caption': caption,\n }\n raw_tex = render_table(table_item)\n if raw_tex.strip():\n blocks.append({\n 'type': 'raw_latex',\n 'content': raw_tex,\n })\n tmpl = env.get_template('chapter.tex.j2')\n # 从章编号提取章序号，用于设置 LaTeX counter（保留原文章号）\n # number 字段可能是 \"第3章\"、\"3\"、\"\" 等形式\n raw_num = chap.get('number', '')\n chap_counter = None\n m_num = re.search(r'(\\d+)', raw_num)\n if m_num:\n n = int(m_num.group(1))\n chap_counter = n - 1 # \\setcounter{chapter}{N-1} 使下一个 \\chapter 编为第N章\n chapter_obj = {\n 'level': 1,\n 'title': title,\n 'number': raw_num,\n 'chap_counter': chap_counter,\n 'content': blocks,\n }\n tex = tmpl.render(chapter=chapter_obj)\n (output_dir / 'data' / f'{filename}.tex').write_text(tex, encoding='utf-8')\n chapters_info.append({'filename': filename, 'title': title})\n print(f'✅ data/{filename}.tex ({title})')\n\n # 统计实际渲染出的图片和表格数量（用于控制是否生成插图/附表清单）\n actual_has_figures = any(\n b.get('type') == 'figure'\n for ci in chapters_info\n for tex_path in [output_dir / 'data' / f'{ci[\"filename\"]}.tex']\n if False # 占位：由下面的 blocks 实时统计\n )\n # 更可靠：直接扫描已生成的 .tex 文件\n has_figures_real = False\n has_tables_real = False\n for ci in chapters_info:\n tex_path = output_dir / 'data' / f'{ci[\"filename\"]}.tex'\n if tex_path.exists():\n tex_content = tex_path.read_text(encoding='utf-8')\n if r'\\begin{figure}' in tex_content:\n has_figures_real = True\n if r'\\begin{table}' in tex_content or r'\\begin{tabularx}' in tex_content:\n has_tables_real = True\n\n # 8. 渲染 acknowledgements.tex（每行独立段落：单\\n → \\n\\n）\n ack = data.get('acknowledgements', '')\n ack_paragraphs = '\\n\\n'.join(\n escape_latex(line.strip()) for line in ack.split('\\n') if line.strip()\n )\n tmpl = env.get_template('acknowledgements.tex.j2')\n (output_dir / 'data' / 'acknowledgements.tex').write_text(\n tmpl.render(acknowledgements=ack_paragraphs), encoding='utf-8'\n )\n print('✅ data/acknowledgements.tex')\n\n # 9. 渲染 resume.tex（每行独立段落：单\\n → \\n\\n 让 LaTeX 产生段间距）\n resume = data.get('resume', '')\n resume_paragraphs = '\\n\\n'.join(\n escape_latex(line.strip()) for line in resume.split('\\n') if line.strip()\n )\n tmpl = env.get_template('resume.tex.j2')\n (output_dir / 'data' / 'resume.tex').write_text(\n tmpl.render(resume_text=resume_paragraphs), encoding='utf-8'\n )\n print('✅ data/resume.tex')\n\n # 11. 生成占位 committee.tex（答辩委员会，通常需要手工填写）\n committee_tex = r\"\"\"% !TeX root = ../thesis.tex\n% 答辩委员会名单 - 请手工填写\n\n\\begin{committee}[name={学位论文指导小组、公开评阅人和答辩委员会名单}]\n\n \\newcolumntype{C}[1]{@{}>{\\centering\\arraybackslash}p{#1}}\n\n \\section*{指导小组名单}\n\n \\begin{center}\n \\begin{tabular}{C{3cm}C{3cm}C{9cm}@{}}\n % 请填写 \\\\\n \\end{tabular}\n \\end{center}\n\n\\end{committee}\n\"\"\"\n (output_dir / 'data' / 'committee.tex').write_text(committee_tex, encoding='utf-8')\n print('✅ data/committee.tex (占位，请手工填写)')\n\n # 12. 生成 denotation.tex（abbrevs 已由步骤 7 的 fix_orphan_abbrevs 生成）\n deno_lines = [\n '% !TeX root = ../thesis.tex',\n '% 符号和缩略语说明（由转换脚本自动提取，请人工核查）',\n '% 标注 \"← 请人工填写\" 的条目为正文中出现但未给出解释的缩略语',\n '',\n r'\\begin{denotation}[3cm]',\n ]\n for abbr, explanation in abbrevs.items():\n abbr_esc = escape_meta(abbr)\n exp_esc = escape_latex(explanation)\n if explanation == '待补充全称':\n deno_lines.append(f' \\\\item[{abbr_esc}] {exp_esc} % ← 请人工填写完整解释')\n else:\n deno_lines.append(f' \\\\item[{abbr_esc}] {exp_esc}')\n deno_lines.append(r'\\end{denotation}')\n (output_dir / 'data' / 'denotation.tex').write_text(\n '\\n'.join(deno_lines), encoding='utf-8'\n )\n orphan_count = sum(1 for v in abbrevs.values() if v == '待补充全称')\n print(f'✅ data/denotation.tex ({len(abbrevs)} 个缩略语，其中 {orphan_count} 个待人工补充)')\n\n # 13. 生成占位 comments.tex 和 resolution.tex\n (output_dir / 'data' / 'comments.tex').write_text(\n '% 指导教师评语（答辩后手工填写或插入扫描件）\\n', encoding='utf-8')\n (output_dir / 'data' / 'resolution.tex').write_text(\n '% 答辩委员会决议书（答辩后手工填写或插入扫描件）\\n', encoding='utf-8')\n print('✅ data/comments.tex + resolution.tex (占位，答辩后填写)')\n\n # 12. 渲染主文件 thesis.tex\n tmpl = env.get_template('main.tex.j2')\n (output_dir / 'thesis.tex').write_text(\n tmpl.render(\n meta=meta,\n chapters=chapters_info,\n has_resume=bool(resume),\n has_acknowledgements=bool(ack),\n has_figures_list=has_figures_real,\n has_tables_list=has_tables_real,\n ), encoding='utf-8'\n )\n print('✅ thesis.tex')\n\n print(f'\\n📁 LaTeX 项目已生成到: {output_dir}')\n print('下一步: 运行 compile.sh 编译 PDF')\n\nif __name__ == '__main__':\n if len(sys.argv) \u003c 3:\n print('用法: python3 render.py \u003cparsed.json> \u003coutput_dir>')\n sys.exit(1)\n render_project(sys.argv[1], sys.argv[2])\n","content_type":"text/x-python; charset=utf-8","language":"python","size":61118,"content_sha256":"e92ee181aa511363ad49ff02c502142b53e87149090eec752525df6570754475"},{"filename":"scripts/setup.sh","content":"#!/bin/bash\n# setup.sh - 初始化 thu-thesis 工作环境\n#\n# 功能：\n# 1. 从 GitHub 拉取最新 thuthesis 源码\n# 2. 编译生成 thuthesis.cls（如尚未生成）\n# 3. 将 data/ 目录复制为 assets/databk/（格式参考基准）\n#\n# 用法：\n# bash setup.sh [skill目录]\n#\n# 环境变量（可选）：\n# THUTHESIS_DIR - 指定已有的 thuthesis 目录（跳过拉取步骤）\n# XELATEX_PATH - xelatex 完整路径（默认自动探测）\n\nset -e\n\nSKILL_DIR=\"${1:-$(dirname \"$0\")/..}\"\nSKILL_DIR=\"$(cd \"$SKILL_DIR\" && pwd)\"\nASSETS_DIR=\"$SKILL_DIR/assets\"\nDATABK_DIR=\"$ASSETS_DIR/databk\"\nTMP_DIR=\"${THUTHESIS_DIR:-/tmp/thuthesis-latest}\"\n\n# ── 安全检查：禁止对敏感路径执行 rm -rf ──\n_is_safe_tmpdir() {\n local p\n p=\"$(cd \"$1\" 2>/dev/null && pwd || echo \"$1\")\"\n # 必须是 /tmp/ 下的目录，或明确是用户指定的 THUTHESIS_DIR（此时跳过删除）\n [[ \"$p\" == /tmp/* ]]\n}\n\n\necho \"=== thu-thesis 环境初始化 ===\"\necho \"Skill 目录: $SKILL_DIR\"\necho \"thuthesis 目录: $TMP_DIR\"\n\n# ── 探测 xelatex ──\nif [ -n \"$XELATEX_PATH\" ]; then\n XELATEX=\"$XELATEX_PATH\"\nelif [ -f \"/Library/TeX/texbin/xelatex\" ]; then\n XELATEX=\"/Library/TeX/texbin/xelatex\"\nelif command -v xelatex &>/dev/null; then\n XELATEX=\"$(command -v xelatex)\"\nelse\n echo \"⚠️ 未找到 xelatex，跳过 .cls 编译（可手动设置 XELATEX_PATH）\"\n XELATEX=\"\"\nfi\n[ -n \"$XELATEX\" ] && echo \"xelatex: $XELATEX\"\n\n# ── 1. 拉取最新 thuthesis ──\necho \"\"\necho \"[1/3] 拉取最新 thuthesis...\"\nif [ -d \"$TMP_DIR/.git\" ]; then\n echo \" 已存在，执行 git pull...\"\n git -C \"$TMP_DIR\" pull --quiet\nelse\n echo \" 克隆 https://github.com/tuna/thuthesis ...\"\n if _is_safe_tmpdir \"$TMP_DIR\"; then\n rm -rf \"$TMP_DIR\"\n else\n echo \" ⚠️ $TMP_DIR 不在 /tmp/ 下，跳过删除（保留现有内容直接 clone 到新目录不可行）\"\n echo \" ❌ 请手动清理后重试，或不设置 THUTHESIS_DIR 使用默认路径 /tmp/thuthesis-latest\"\n exit 1\n fi\n git clone --depth=1 --quiet https://github.com/tuna/thuthesis.git \"$TMP_DIR\"\nfi\necho \" ✅ thuthesis 拉取完成\"\n\n# ── 2. 编译生成 thuthesis.cls ──\necho \"\"\necho \"[2/3] 编译 thuthesis.cls ...\"\nif [ -f \"$TMP_DIR/thuthesis.cls\" ]; then\n echo \" ✅ thuthesis.cls 已存在，跳过编译\"\nelif [ -n \"$XELATEX\" ] && [ -f \"$TMP_DIR/thuthesis.ins\" ]; then\n (cd \"$TMP_DIR\" && \"$XELATEX\" -interaction=nonstopmode thuthesis.ins > /dev/null 2>&1)\n if [ -f \"$TMP_DIR/thuthesis.cls\" ]; then\n echo \" ✅ thuthesis.cls 编译成功\"\n else\n echo \" ❌ 编译失败，请手动运行：\"\n echo \" cd $TMP_DIR && xelatex thuthesis.ins\"\n fi\nelse\n echo \" ⚠️ 跳过（无 xelatex 或无 .ins 文件）\"\nfi\n\n# ── 3. 复制 data/ → assets/databk/ ──\necho \"\"\necho \"[3/3] 更新 assets/databk/ ...\"\nif [ ! -d \"$TMP_DIR/data\" ]; then\n echo \" ⚠️ 未找到 data/ 目录，跳过\"\nelse\n rm -rf \"$DATABK_DIR\"\n cp -r \"$TMP_DIR/data\" \"$DATABK_DIR\"\n echo \" ✅ databk 已更新（$(ls \"$DATABK_DIR\" | wc -l | tr -d ' ') 个文件）\"\nfi\n\necho \"\"\necho \"=== 初始化完成 ===\"\necho \"thuthesis 源码：$TMP_DIR\"\necho \"格式参考：$DATABK_DIR\"\necho \"\"\necho \"可选环境变量：\"\necho \" THUTHESIS_DIR=/path/to/thuthesis # 使用本地已有版本\"\necho \" XELATEX_PATH=/path/to/xelatex # 指定 xelatex 路径\"\n","content_type":"application/x-sh; charset=utf-8","language":"bash","size":3389,"content_sha256":"cf9b6211d0628bdbdb6762d04b144a7159f355420f3fbdbba76d943e32ec8cb1"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"清华 MBA 论文 Word → PDF 一键转换","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"⚠️ 核心操作原则（不得违反）","type":"text"}]},{"type":"blockquote","content":[{"type":"paragraph","content":[{"text":"只从 Word 中提取信息，不修改 thuthesis 模板格式。","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"thuthesis 的封面、页眉、目录、参考文献、图表样式等，全部由 ","type":"text"},{"text":"thuthesis.cls","type":"text","marks":[{"type":"code_inline"}]},{"text":" 自动生成","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"脚本只负责把 Word 里的内容（标题、摘要、章节、图表、参考文献等）提取出来填入 ","type":"text"},{"text":".tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 文件","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"若 Word 中某字段缺失，对应 LaTeX 字段留空，","type":"text"},{"text":"不删除","type":"text","marks":[{"type":"strong"}]},{"text":"、不跳过、不用占位符替代","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"任何格式上的\"改进\"都必须以 ","type":"text"},{"text":"assets/databk/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中的官方示例为准，不得自行发挥","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"架构：新三层 AI-native 流程","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"Word 文件\n ↓ [extract_raw.py] 纯机械提取，无 LLM\nraw_xxx.json + 文档骨架（段落 idx + 样式 + 文字）\n ↓ [我（AI）阅读骨架] 理解章节结构\nstruct_xxx.json（章节划分、段落 idx 映射）\n ↓ [build_parsed.py] 纯 Python 组装，无 LLM\nparsed_xxx.json\n ↓ [render.py] 填充 thuthesis LaTeX 模板\nLaTeX 项目目录\n ↓ [xelatex + bibtex] 编译\nthesis.pdf ✅\n ↓ [我（AI）Rubric 评测] 阅读产物，逐项打分 + 自动修复\nevaluation_report.md","type":"text"}]},{"type":"paragraph","content":[{"text":"关键设计原则：Python 脚本不调用任何 LLM，不持有 API key。AI 在两个关键环节介入：(1) 阅读骨架生成 struct.json；(2) Rubric 评测 + 自动修复。","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"依赖","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"pip3 install python-docx jinja2 matplotlib\n# 需要已安装 TeX Live","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"格式参考：assets/databk/","type":"text"}]},{"type":"paragraph","content":[{"text":"assets/databk/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 是从官方 thuthesis 项目备份的原始示例 data 文件，是本工具一切格式决策的","type":"text"},{"text":"黄金标准","type":"text","marks":[{"type":"strong"}]},{"text":"：","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"文件","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"参考内容","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"chap01.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" ~ ","type":"text"},{"text":"chap04.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"正文章节、三线表、图片、公式格式","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"abstract.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"中英文摘要格式","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"denotation.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"缩略语/符号说明格式","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"acknowledgements.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"致谢格式","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"resume.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"个人简历格式","type":"text"}]}]}]}]},{"type":"paragraph","content":[{"text":"遇到任何格式问题，先查 ","type":"text","marks":[{"type":"strong"}]},{"text":"databk/","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 里的对应文件，再动代码。","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"初次使用 / 更新格式参考","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# SKILL_DIR = 本 skill 的根目录（thu-thesis/）\nSKILL_DIR=\"$(cd \"$(dirname \"$0\")/..\" && pwd)\" # 在 scripts/ 内执行时\n# 或直接写绝对路径，例如：\n# SKILL_DIR=\"/path/to/skills/thu-thesis\"\nbash \"$SKILL_DIR/scripts/setup.sh\" \"$SKILL_DIR\"","type":"text"}]},{"type":"paragraph","content":[{"text":"setup.sh","type":"text","marks":[{"type":"code_inline"}]},{"text":" 做三件事：","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"从 GitHub clone（首次）或 ","type":"text"},{"text":"git pull","type":"text","marks":[{"type":"code_inline"}]},{"text":"（已有）最新 ","type":"text"},{"text":"thuthesis","type":"text","marks":[{"type":"link","attrs":{"href":"https://github.com/tuna/thuthesis","title":null}}]},{"text":" 到 ","type":"text"},{"text":"/tmp/thuthesis-latest","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"编译生成 ","type":"text"},{"text":"thuthesis.cls","type":"text","marks":[{"type":"code_inline"}]},{"text":"（如尚未生成）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"rm -rf assets/databk/ && cp -r data/ assets/databk/","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" → 保持格式参考始终为最新版本","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"每次 thuthesis 版本有重大更新时，重跑 ","type":"text","marks":[{"type":"strong"}]},{"text":"setup.sh","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 即可刷新 databk。","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"输出路径规范","type":"text"}]},{"type":"paragraph","content":[{"text":"LaTeX 工程输出位置：与输入 ","type":"text","marks":[{"type":"strong"}]},{"text":".docx","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 同目录，子文件夹命名为 ","type":"text","marks":[{"type":"strong"}]},{"text":"\u003c原文件名去扩展>-latex","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":"。","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"例如：输入 ","type":"text"},{"text":"/path/to/foo.docx","type":"text","marks":[{"type":"code_inline"}]},{"text":"，则 LaTeX 工程输出到 ","type":"text"},{"text":"/path/to/foo-latex/","type":"text","marks":[{"type":"code_inline"}]},{"text":"。","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"中间临时文件（raw/struct/parsed JSON）放在 workspace 临时目录，转换完成后可清理","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"最终交付给用户的是 ","type":"text"},{"text":"-latex/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录（含 ","type":"text"},{"text":"thesis.pdf","type":"text","marks":[{"type":"code_inline"}]},{"text":"）","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"完整转换流程","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Step 1：机械提取（同时创建 LaTeX 工程目录）","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# SKILL_DIR = 本 skill 根目录，按实际安装路径设置\nSKILL_DIR=\"/path/to/skills/thu-thesis\"\n\npython3 \"$SKILL_DIR/scripts/convert.py\" extract /path/to/论文.docx output/","type":"text"}]},{"type":"paragraph","content":[{"text":"extract 会立即做两件事：","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"在 ","type":"text"},{"text":".docx","type":"text","marks":[{"type":"code_inline"}]},{"text":" ","type":"text"},{"text":"同目录","type":"text","marks":[{"type":"strong"}]},{"text":"创建 ","type":"text"},{"text":"\u003cstem>-latex/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 工程目录（项目开始即确定输出位置）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"机械提取，输出 ","type":"text"},{"text":"output/raw_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" + 终端骨架","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"终端输出示例：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"📄 输入: /path/to/foo.docx\n📁 中间文件: output/\n📁 LaTeX 工程: /path/to/foo-latex/ ← 已创建\n📊 图片: 5 张 | para_idx: [102, 115, ...]\n📊 表格: 3 张 | before_para: [88, 134, ...]","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Step 2：AI 阅读骨架，生成 struct.json","type":"text"}]},{"type":"paragraph","content":[{"text":"AI（我）读取骨架，识别：","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"摘要范围（","type":"text"},{"text":"abstract_cn_range","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":"abstract_en_range","type":"text","marks":[{"type":"code_inline"}]},{"text":"）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"各章节标题段落 idx（","type":"text"},{"text":"title_para","type":"text","marks":[{"type":"code_inline"}]},{"text":"）、正文范围（","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":"）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"各级小节（sections）的 idx 和编号","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"参考文献、致谢、简历的范围","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"如何输出","type":"text","marks":[{"type":"strong"}]},{"text":"：使用 Write 工具，把 struct.json 写到与 ","type":"text"},{"text":"raw_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" ","type":"text"},{"text":"同一目录","type":"text","marks":[{"type":"strong"}]},{"text":"，命名为 ","type":"text"},{"text":"struct_\u003c论文标题>.json","type":"text","marks":[{"type":"code_inline"}]},{"text":"（与 raw 文件保持同级）：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"output/\n raw_论文标题.json ← Step 1 生成\n struct_论文标题.json ← AI 用 Write 工具写到这里 ✅\n figures/ ← Step 1 提取的图片","type":"text"}]},{"type":"paragraph","content":[{"text":"写入前必须检查","type":"text","marks":[{"type":"strong"}]},{"text":"（防图表丢失）：","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"列出 ","type":"text"},{"text":"raw_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中所有图片的 ","type":"text"},{"text":"para_idx","type":"text","marks":[{"type":"code_inline"}]},{"text":" → 确保每个都落在某章 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 内","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"列出所有表格的 ","type":"text"},{"text":"before_para","type":"text","marks":[{"type":"code_inline"}]},{"text":" → 确保都 ≥ 第一章 ","type":"text"},{"text":"content_range[0]","type":"text","marks":[{"type":"code_inline"}]},{"text":" 且在某章范围内","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"相邻章节 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" ","type":"text"},{"text":"不能有间隙","type":"text","marks":[{"type":"strong"}]}]}]}]},{"type":"paragraph","content":[{"text":"输出 ","type":"text"},{"text":"struct_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":"，格式：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"json"},"content":[{"text":"{\n \"cover\": {\n \"abstract_cn_range\": [27, 31],\n \"abstract_en_range\": [35, 44],\n \"keywords_cn_para\": 31,\n \"keywords_en_para\": 44\n },\n \"chapters\": [\n {\n \"number\": \"第1章\",\n \"title\": \"引言\",\n \"title_para\": 109,\n \"content_range\": [110, 142],\n \"sections\": [\n {\"level\": 2, \"number\": \"1.1\", \"title\": \"选题背景\", \"title_para\": 110},\n {\"level\": 3, \"number\": \"1.1.1\", \"title\": \"子节标题\", \"title_para\": 115}\n ]\n }\n ],\n \"references_range\": [388, 409],\n \"acknowledgements_range\": [412, 412],\n \"resume_range\": [423, 428]\n}","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Step 3-5：组装、渲染、编译","type":"text"}]},{"type":"paragraph","content":[{"text":"Step 1 已经创建好 LaTeX 工程目录，直接指向它：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"SKILL_DIR=\"/path/to/skills/thu-thesis\"\nDOCX=\"/path/to/foo.docx\"\n\npython3 \"$SKILL_DIR/scripts/convert.py\" build \\\n output/raw_foo.json \\\n output/struct_foo.json \\\n \"$(dirname \"$DOCX\")/foo-latex\"","type":"text"}]},{"type":"paragraph","content":[{"text":"自动完成：","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"build_parsed.py","type":"text","marks":[{"type":"code_inline"}]},{"text":"：raw + struct → parsed JSON（含表格、图片正确插入）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"render.py","type":"text","marks":[{"type":"code_inline"}]},{"text":"：parsed JSON → LaTeX 项目","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"xelatex + bibtex","type":"text","marks":[{"type":"code_inline"}]},{"text":"：编译 PDF（3~4 次，保证目录稳定）","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Step 6：AI Rubric 评测 + 自动修复","type":"text"}]},{"type":"paragraph","content":[{"text":"编译完成后，我（AI）逐项检查转换质量，详见下方「Rubric 评测细则」。可自动修复的问题直接修复并重新编译，最多 ","type":"text"},{"text":"3 轮","type":"text","marks":[{"type":"strong"}]},{"text":"；不可修复的问题在报告中标注。最终输出 ","type":"text"},{"text":"evaluation_report.md","type":"text","marks":[{"type":"code_inline"}]},{"text":" 到 LaTeX 工程目录。","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"文件说明","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"路径","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"说明","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scripts/convert.py","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"入口，","type":"text"},{"text":"extract","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"build","type":"text","marks":[{"type":"code_inline"}]},{"text":" 两个子命令","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scripts/extract_raw.py","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Word → raw JSON（纯机械提取，段落/表格/图表）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scripts/build_parsed.py","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"raw + struct → parsed JSON（纯 Python，无 LLM）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scripts/render.py","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"parsed JSON → LaTeX 项目（填充模板，生成 BibTeX）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"assets/templates/*.j2","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Jinja2 模板","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"assets/databk/","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"thuthesis 官方格式示例，格式决策唯一参考","type":"text","marks":[{"type":"strong"}]}]}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Rubric 评测细则（AI 执行）","type":"text"}]},{"type":"blockquote","content":[{"type":"paragraph","content":[{"text":"评测由 AI 直接完成，不使用 Python 脚本。","type":"text","marks":[{"type":"strong"}]},{"text":" AI 阅读生成产物（parsed JSON、.tex 文件、refs.bib、thesis.log、thesis.pdf），按下方 Rubric 逐项打分，可修复问题直接修改后重新编译（最多 3 轮），最终输出 ","type":"text"},{"text":"evaluation_report.md","type":"text","marks":[{"type":"code_inline"}]},{"text":"。","type":"text"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"评测流程","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"读取产物","type":"text","marks":[{"type":"strong"}]},{"text":"：","type":"text"},{"text":"parsed_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"data/*.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"ref/refs.bib","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"thesis.log","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"thesis.pdf","type":"text","marks":[{"type":"code_inline"}]},{"text":"（检查是否存在及大小）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"逐项评分","type":"text","marks":[{"type":"strong"}]},{"text":"：按下方 38 项 Rubric 逐一检查，给出 PASS / WARN / FAIL + 原因","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"自动修复","type":"text","marks":[{"type":"strong"}]},{"text":"：对可修复的 FAIL/WARN 项直接修改 ","type":"text"},{"text":".tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 文件，然后重新 ","type":"text"},{"text":"xelatex + bibtex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 编译（最多 3 轮）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"输出报告","type":"text","marks":[{"type":"strong"}]},{"text":"：生成 ","type":"text"},{"text":"evaluation_report.md","type":"text","marks":[{"type":"code_inline"}]},{"text":"，含总分、维度得分、所有扣分项明细","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"评分制度","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"满分","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"PASS","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"WARN","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"FAIL","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要项","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"1.5","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要项","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"亮点项","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0.5","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"失误扣分项","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"—","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"—","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"每处扣1分，最多扣10","type":"text"}]}]}]}]},{"type":"paragraph","content":[{"text":"评级标准","type":"text","marks":[{"type":"strong"}]},{"text":"：总分/满分 → 优秀≥90% / 良好≥75% / 合格≥60% / 不合格\u003c60%","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"可自动修复 vs 不可修复","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"可自动修复（直接改文件重编译）","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"不可自动修复（报告中标注）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":".bbl","type":"text","marks":[{"type":"code_inline"}]},{"text":" 出现 ","type":"text"},{"text":"佚名","type":"text","marks":[{"type":"code_inline"}]},{"text":" → 读 refs.bib 找对应条目，手工补全 author 字段，然后重跑 bibtex+xelatex","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"表格/图片无 caption（Word 原文没有，不可凭空生成）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"author 字段被截断（如 ","type":"text"},{"text":"美国旅游协会（U.S","type":"text","marks":[{"type":"code_inline"}]},{"text":"）→ 直接修 refs.bib 中该条目的 author 字段","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"文献只有 ","type":"text"},{"text":"\\nocite","type":"text","marks":[{"type":"code_inline"}]},{"text":"（正文本来就没有引用，是原文问题）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"author-year 引用未转 ","type":"text"},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"}]},{"text":" → 在 .tex 中手工补 ","type":"text"},{"text":"\\cite{key}","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"author-year 匹配失败（姓名简称无法映射，原文限制）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX 编译报错（","type":"text"},{"text":"\\&","type":"text","marks":[{"type":"code_inline"}]},{"text":" 转义等）→ 修 .tex 中的特殊字符","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"committee/comments/resolution 占位（答辩后才有内容）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"\\listoffigures","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"\\listoftables","type":"text","marks":[{"type":"code_inline"}]},{"text":" 缺失 → 补入 ","type":"text"},{"text":"thesis.tex","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"—","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Rubric 明细（38项，满分 90 分）","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"A. 元信息（11项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"中文标题","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thusetup.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中 ","type":"text"},{"text":"title","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空且≥5字。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"英文标题","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"title*","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空且≥10字符。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"作者姓名","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"author","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A4","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"英文作者名","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"author*","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空。WARN：缺失（可人工补充）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A5","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"导师信息","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"supervisor","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空且含职称（教授/研究员/副教授/讲师）。WARN：有名无职称。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A6","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"培养单位","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"department","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：非空。WARN：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A7","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"日期格式","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"date","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：格式为 ","type":"text"},{"text":"YYYY-MM","type":"text","marks":[{"type":"code_inline"}]},{"text":"。WARN：缺失或格式异常","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A8","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"中文摘要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"abstract.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中文摘要内容。PASS：≥50字。WARN：\u003c50字。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A9","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"英文摘要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查英文摘要内容。PASS：≥100字符。WARN：\u003c100字符。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A10","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"中文关键词","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thusetup.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中 ","type":"text"},{"text":"keywords","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：≥2个。WARN：仅1个。FAIL：缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"A11","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"英文关键词","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"keywords*","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段。PASS：≥2个。WARN：仅1个。FAIL：缺失","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"B. 正文（5项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"B1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"章节结构","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 parsed JSON 中 chapters。PASS：≥3章且每章≥2个内容块。WARN：有章内容极少（\u003c2块）。FAIL：\u003c3章","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"B2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"章节 .tex 文件","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"data/chap*.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 文件。PASS：所有文件存在且≥200字符。WARN：文件过短。FAIL：文件缺失","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"B3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"正文文字总量","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"统计所有章节文本字数。PASS：≥8000字。WARN：3000-8000字。FAIL：\u003c3000字（可能解析失败）","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"B4","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"节级标题","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 .tex 中是否有 ","type":"text"},{"text":"\\section","type":"text","marks":[{"type":"code_inline"}]},{"text":"/","type":"text"},{"text":"\\subsection","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：存在多个节级标题。WARN：未检测到","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"B5","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"目录一致性","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"编译后检查 ","type":"text"},{"text":"thesis.toc","type":"text","marks":[{"type":"code_inline"}]},{"text":"：章标题与 .tex 文件一致，无残留编号（如标题中出现 ","type":"text"},{"text":"1.1","type":"text","marks":[{"type":"code_inline"}]},{"text":" 前缀）。PASS：一致。WARN：有不一致","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"C. 参考文献（7项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"参考文献列表","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 parsed JSON 中 references。PASS：≥10条。WARN：\u003c10条。FAIL：为空","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"refs.bib 生成","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"ref/refs.bib","type":"text","marks":[{"type":"code_inline"}]},{"text":" 文件。PASS：存在且有 ","type":"text"},{"text":"@article","type":"text","marks":[{"type":"code_inline"}]},{"text":"/","type":"text"},{"text":"@book","type":"text","marks":[{"type":"code_inline"}]},{"text":"/","type":"text"},{"text":"@misc","type":"text","marks":[{"type":"code_inline"}]},{"text":" 等 BibTeX 条目。FAIL：不存在或为空","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"BibTeX 字段质量","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必须同时检查 ","type":"text","marks":[{"type":"strong"}]},{"text":"refs.bib","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 和 ","type":"text","marks":[{"type":"strong"}]},{"text":"thesis.bbl","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 两个文件，逐条核查：","type":"text","marks":[{"type":"strong"}]},{"type":"br"},{"text":"①读取","type":"text"},{"text":"refs.bib","type":"text","marks":[{"type":"code_inline"}]},{"text":"，统计 ","type":"text"},{"text":"author","type":"text","marks":[{"type":"code_inline"}]},{"text":" 字段为空 ","type":"text"},{"text":"{}","type":"text","marks":[{"type":"code_inline"}]},{"text":"的条目数；","type":"text"},{"type":"br"},{"text":"②读取","type":"text"},{"text":"thesis.bbl","type":"text","marks":[{"type":"code_inline"}]},{"text":"，搜索 ","type":"text"},{"text":"佚名","type":"text","marks":[{"type":"code_inline"}]},{"text":"字样（bibtex 给无 author 条目的默认值），每出现一次表示有一条参考文献无法正确显示；","type":"text"},{"type":"br"},{"text":"③检查","type":"text"},{"text":"refs.bib","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中 author 值是否存在截断（如以 ","type":"text"},{"text":"（","type":"text","marks":[{"type":"code_inline"}]},{"text":" 结尾、或机构名中间被切断如 ","type":"text"},{"text":"美国旅游协会（U.S","type":"text","marks":[{"type":"code_inline"}]},{"text":"）；","type":"text"},{"type":"br"},{"text":"④检查 title 字段是否异常短（\u003c5字符）或与实际文献标题明显不符。","type":"text"},{"type":"br"},{"text":"PASS：所有条目 author/title 非空，","type":"text"},{"text":".bbl","type":"text","marks":[{"type":"code_inline"}]},{"text":" 无 ","type":"text"},{"text":"佚名","type":"text","marks":[{"type":"code_inline"}]},{"text":"，无截断。WARN：有1-2个问题条目（列出）。FAIL：≥3个问题条目，","type":"text"},{"text":"或 ","type":"text","marks":[{"type":"strong"}]},{"text":".bbl","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 中出现 ","type":"text","marks":[{"type":"strong"}]},{"text":"佚名","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C4","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"PDF 文献完整","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"比较 BibTeX 条目数与 Word 原文参考文献条数。","type":"text"},{"text":"同时读取 ","type":"text","marks":[{"type":"strong"}]},{"text":"thesis.bbl","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":"，检查每条 ","type":"text","marks":[{"type":"strong"}]},{"text":"\\bibitem","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 的内容是否合理","type":"text","marks":[{"type":"strong"}]},{"text":"（作者/年份/标题是否看起来正确，而不仅是数量匹配）。PASS：条目数一致，","type":"text"},{"text":".bbl","type":"text","marks":[{"type":"code_inline"}]},{"text":" 内容合理。WARN：条目数一致但有个别条目内容异常。FAIL：BibTeX 条目少于原文","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C5","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"引用覆盖率","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"严格区分 ","type":"text","marks":[{"type":"strong"}]},{"text":"\\cite{}","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 和 ","type":"text","marks":[{"type":"strong"}]},{"text":"\\nocite{}","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":"：","type":"text"},{"type":"br"},{"text":"①统计章节 .tex 文件（","type":"text"},{"text":"data/chap*.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":"）中 ","type":"text"},{"text":"\\cite{key}","type":"text","marks":[{"type":"code_inline"}]},{"text":"的唯一 key 集合（正文有引用）；","type":"text"},{"type":"br"},{"text":"②统计","type":"text"},{"text":"thesis.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中 ","type":"text"},{"text":"\\nocite{key}","type":"text","marks":[{"type":"code_inline"}]},{"text":"（只进入参考文献列表，正文无引用）；","type":"text"},{"type":"br"},{"text":"③对每个 bib key 归类：有","type":"text"},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"}]},{"text":" / 只有 ","type":"text"},{"text":"\\nocite","type":"text","marks":[{"type":"code_inline"}]},{"text":"/ 都没有。","type":"text"},{"type":"br"},{"text":"PASS：所有 key 都有正文","type":"text"},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"}]},{"text":"。WARN：有 key 只在 ","type":"text"},{"text":"\\nocite","type":"text","marks":[{"type":"code_inline"}]},{"text":"（列出这些 key，说明正文缺少引用）。FAIL：有 key 既无 ","type":"text"},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"}]},{"text":" 也无 ","type":"text"},{"text":"\\nocite","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C6","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"cite 关联性","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"扣分","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"随机抽取 5-10 处 ","type":"text"},{"text":"\\cite{key}","type":"text","marks":[{"type":"code_inline"}]},{"text":"，阅读上下文与对应文献，判断是否内容相关。不相关每处扣1分，最多扣10分。抽检样本须列入报告供人工复核","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"C7","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"author-year 引用","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"亮点","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查正文中 ","type":"text"},{"text":"曹玉（2025）","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"Smith (2020)","type":"text","marks":[{"type":"code_inline"}]},{"text":" 等行文引用是否已转为 ","type":"text"},{"text":"\\cite{key}","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：无遗漏。WARN：有少量遗漏（列出原文片段）。FAIL：大量未转换","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"D. 图片（4项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"D1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"图片提取数量","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"比较 ","type":"text"},{"text":"figures/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录文件数与 parsed JSON 图片数。PASS：一致。WARN：不一致","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"D2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"图片 caption","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 .tex 中图片是否有 ","type":"text"},{"text":"\\caption{}","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：全部有。WARN：部分无 caption","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"D3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX 渲染","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 .tex 中有对应的 ","type":"text"},{"text":"\\includegraphics","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：图片均被引用。FAIL：有图片但未渲染","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"D4","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"插图清单","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thesis.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 是否含 ","type":"text"},{"text":"\\listoffigures","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：存在且所有图有 caption → 清单完整。WARN：存在但部分图无 caption。FAIL：缺少 ","type":"text"},{"text":"\\listoffigures","type":"text","marks":[{"type":"code_inline"}]},{"text":"（应自动修复）","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"E. 表格（4项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"E1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"表格提取数量","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 parsed JSON 中表格数量是否合理。PASS：数量合理","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"E2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"三线表格式","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 .tex 中表格是否使用 ","type":"text"},{"text":"tabularx","type":"text","marks":[{"type":"code_inline"}]},{"text":" + ","type":"text"},{"text":"booktabs","type":"text","marks":[{"type":"code_inline"}]},{"text":"（","type":"text"},{"text":"\\toprule","type":"text","marks":[{"type":"code_inline"}]},{"text":"/","type":"text"},{"text":"\\midrule","type":"text","marks":[{"type":"code_inline"}]},{"text":"/","type":"text"},{"text":"\\bottomrule","type":"text","marks":[{"type":"code_inline"}]},{"text":"），无竖线。PASS：格式合规。FAIL：不合规","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"E3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"表格 caption","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查表格是否有 ","type":"text"},{"text":"\\caption{}","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：全部有。WARN：部分无 caption","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"E4","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"附表清单","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thesis.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 是否含 ","type":"text"},{"text":"\\listoftables","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：存在且所有表有 caption → 清单完整。WARN：存在但部分表无 caption。FAIL：缺少 ","type":"text"},{"text":"\\listoftables","type":"text","marks":[{"type":"code_inline"}]},{"text":"（应自动修复）","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"F. 缩略语（2项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"F1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"缩略语表","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"data/denotation.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：存在且有 ","type":"text"},{"text":"\\item[...]","type":"text","marks":[{"type":"code_inline"}]},{"text":" 条目。WARN：无条目。FAIL：文件不存在","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"F2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"孤儿缩略语","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"亮点","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查缩略语表中是否有正文未出现的\"孤儿\"。PASS：无孤儿。WARN：有孤儿但已标注","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"G. 附件（2项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"G1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"致谢","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"data/acknowledgements.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：有实质内容（>50字）。WARN：缺失或过短","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"G2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"个人简历","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"data/resume.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：有实质内容（>50字）。WARN：缺失或过短","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"H. 编译（3项）","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ID","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查项","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"评判标准","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"H1","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"PDF 已生成","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thesis.pdf","type":"text","marks":[{"type":"code_inline"}]},{"text":" 是否存在。PASS：存在且≥50KB。WARN：文件过小（\u003c50KB）。FAIL：不存在","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"H2","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"无 LaTeX Error","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thesis.log","type":"text","marks":[{"type":"code_inline"}]},{"text":"。PASS：无 ","type":"text"},{"text":"LaTeX Error","type":"text","marks":[{"type":"code_inline"}]},{"text":"。WARN：有 ","type":"text"},{"text":"Overfull \\hbox","type":"text","marks":[{"type":"code_inline"}]},{"text":" 警告。FAIL：有 Error","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"H3","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"thusetup 格式","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"重要","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"检查 ","type":"text"},{"text":"thusetup.tex","type":"text","marks":[{"type":"code_inline"}]},{"text":" 是否含 MBA 专业硕士配置（degree=master, degree-type=professional）。PASS：配置正确。WARN：配置异常","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"报告格式","type":"text"}]},{"type":"paragraph","content":[{"text":"评测报告 ","type":"text"},{"text":"evaluation_report.md","type":"text","marks":[{"type":"code_inline"}]},{"text":" 须包含：","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"总分","type":"text","marks":[{"type":"strong"}]},{"text":"：","type":"text"},{"text":"XX / 90 分（XX%）— 评级","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"维度得分表","type":"text","marks":[{"type":"strong"}]},{"text":"：每个维度（A~H）的得分/满分/得分率","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"扣分明细","type":"text","marks":[{"type":"strong"}]},{"text":"：所有 FAIL / WARN 项列出 ID、检查项、扣分原因（","type":"text"},{"text":"不截断","type":"text","marks":[{"type":"strong"}]},{"text":"）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"C6 抽检样本","type":"text","marks":[{"type":"strong"}]},{"text":"：随机抽取的 cite 关联性样本（文献信息 + 上下文），供人工核查","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"完整明细表","type":"text","marks":[{"type":"strong"}]},{"text":"：38 项逐一列出 ID、类型、满分、得分、说明","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"参考文献处理","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Word 原文参考文献列表 → ","type":"text"},{"text":"ref/refs.bib","type":"text","marks":[{"type":"code_inline"}]},{"text":"（自动解析为 BibTeX）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"正文 ","type":"text"},{"text":"[10]","type":"text","marks":[{"type":"code_inline"}]},{"text":" → ","type":"text"},{"text":"\\cite{key}","type":"text","marks":[{"type":"code_inline"}]},{"text":"，支持 ","type":"text"},{"text":"[1,2,3]","type":"text","marks":[{"type":"code_inline"}]},{"text":" 和 ","type":"text"},{"text":"[1-3]","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Author-year 行文引用自动补 ","type":"text","marks":[{"type":"strong"}]},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":"：","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"曹玉（2025）分析了...","type":"text","marks":[{"type":"code_inline"}]},{"text":" → ","type":"text"},{"text":"曹玉（2025）\\cite{cao2025aigc}分析了...","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"支持中文全角 ","type":"text"},{"text":"（年）","type":"text","marks":[{"type":"code_inline"}]},{"text":"、英文半角 ","type":"text"},{"text":"(年)","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"匹配失败时保留原文，rubric C7 会警告","type":"text"}]}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"未被引用文献：关键词匹配补 ","type":"text"},{"text":"\\cite","type":"text","marks":[{"type":"code_inline"}]},{"text":"；无匹配用 ","type":"text"},{"text":"\\nocite","type":"text","marks":[{"type":"code_inline"}]},{"text":" 兜底","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"图片和表格处理全链路","type":"text"}]},{"type":"blockquote","content":[{"type":"paragraph","content":[{"text":"图表丢失是最常见的转换问题。","type":"text","marks":[{"type":"strong"}]},{"text":" 根本原因几乎总是 Step 2（AI 生成 struct.json）时 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 设定不准，导致图表所在段落被排除在章节范围之外。","type":"text"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"完整流程","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"Step 1 extract_raw.py\n ├── 图片：扫描段落 XML 的 a:blip（普通图）/ c:chart（图表对象）\n │ 记录 {filename, para_idx}，文件存入 output/figures/\n └── 表格：扫描 body 元素顺序，记录 {rows, before_para}\n before_para = 表格紧跟在哪个段落之后的 idx\n\nStep 2 AI 生成 struct.json\n └── ⚠️ 关键：content_range 必须覆盖图表所在的 para_idx！\n\nStep 3 build_parsed.py\n ├── 图片分配：figures_by_para[para_idx] → 章节的 content_range 内 → 插入\n │ caption 检测：图片 para_idx 后 0-2 段内匹配 ^图\\s*\\d\n ├── 表格分配：before_para 落在 content_range 内 → 插入\n │ before_para \u003c first_chap_start → 跳过（封面/目录页表格）\n │ before_para 不在任何章节范围 → 分配给最近章节末尾（extra_tables）\n │ caption 检测：before_para 后 0-2 段内匹配 ^表\\s*\\d\n └── figures/ 目录拷贝至 output_dir/figures/\n\nStep 4 render.py\n ├── 图片：type=figure → \\begin{figure}...\\includegraphics{figures/xxx}\\caption{}\\end{figure}\n │ SVG 跳过并警告；\\caption 自动去掉\"图X-X \"前缀（thuthesis 自动编号）\n ├── 表格：type=table → render_table() 生成三线表 raw_latex 块\n │ 列宽：内容≤12字符宽 → l；>12字符 → X；至少一列为 X\n │ \\caption 自动去掉\"表X-X \"前缀；\\caption 在表格上方\n └── 扫描生成的 .tex：有 \\begin{figure} → 加 \\listoffigures；有 \\begin{table} → 加 \\listoftables","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"⚠️ 图表丢失的常见原因及修复","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"原因 1（最常见）：struct.json 的 content_range 设定偏窄，图表 para_idx 在范围外","type":"text"}]},{"type":"paragraph","content":[{"text":"排查方法","type":"text","marks":[{"type":"strong"}]},{"text":"：检查 ","type":"text"},{"text":"raw_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中图片/表格的 ","type":"text"},{"text":"para_idx","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"before_para","type":"text","marks":[{"type":"code_inline"}]},{"text":"，与 struct.json 的 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 对比：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import json\nraw = json.load(open('output/raw_xxx.json'))\nprint(\"图片 para_idx:\", [(f['filename'], f['para_idx']) for f in raw['figures']])\nprint(\"表格 before_para:\", [(t['idx'], t['before_para']) for t in raw['tables']])","type":"text"}]},{"type":"paragraph","content":[{"text":"修复","type":"text","marks":[{"type":"strong"}]},{"text":"：重新生成 struct.json，确保每章 ","type":"text"},{"text":"content_range[1]","type":"text","marks":[{"type":"code_inline"}]},{"text":"（结束 idx）足够大，覆盖该章所有图表段落，然后重跑 ","type":"text"},{"text":"build","type":"text","marks":[{"type":"code_inline"}]},{"text":" 命令。","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"原因 2：图片/表格出现在章节之间的\"间隙\"段落","type":"text"}]},{"type":"paragraph","content":[{"text":"两章 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 之间可能有空隙（如过渡段落 idx 130-135 不在任何章节范围内）。","type":"text"},{"text":"build_parsed.py","type":"text","marks":[{"type":"code_inline"}]},{"text":" 会把这些表格用 ","type":"text"},{"text":"extra_tables","type":"text","marks":[{"type":"code_inline"}]},{"text":" 附到最近章节末尾，但图片无此兜底，","type":"text"},{"text":"直接丢失","type":"text","marks":[{"type":"strong"}]},{"text":"。","type":"text"}]},{"type":"paragraph","content":[{"text":"修复","type":"text","marks":[{"type":"strong"}]},{"text":"：struct.json 相邻章节的 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 不能有间隙，确保：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":""},"content":[{"text":"第1章 content_range[1] + 1 ≥ 第2章 content_range[0]","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"原因 3：图片的 ","type":"text"},{"text":"para_idx","type":"text","marks":[{"type":"code_inline"}]},{"text":" 为 0 或 None","type":"text"}]},{"type":"paragraph","content":[{"text":"Word 中图片有时嵌在空段落、文本框或表格单元格中，导致 ","type":"text"},{"text":"extract_raw.py","type":"text","marks":[{"type":"code_inline"}]},{"text":" 无法找到对应段落。此时 ","type":"text"},{"text":"para_idx=0","type":"text","marks":[{"type":"code_inline"}]},{"text":"，图片会被分配到第 0 段落，不在任何章节 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 内，丢失。","type":"text"}]},{"type":"paragraph","content":[{"text":"排查","type":"text","marks":[{"type":"strong"}]},{"text":"：","type":"text"},{"text":"raw_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中 ","type":"text"},{"text":"figures","type":"text","marks":[{"type":"code_inline"}]},{"text":" 里 ","type":"text"},{"text":"para_idx=0","type":"text","marks":[{"type":"code_inline"}]},{"text":" 且文档实际有图的，属于此类。","type":"text"}]},{"type":"paragraph","content":[{"text":"修复","type":"text","marks":[{"type":"strong"}]},{"text":"：在 struct.json 中找到该图片实际所在的段落（根据骨架文本目视定位），手动在对应章节的 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 里调整，或在 ","type":"text"},{"text":"parsed_xxx.json","type":"text","marks":[{"type":"code_inline"}]},{"text":" 中手动插入 figure 块后重跑 render。","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"原因 4：caption 未被识别（图/表无编号）","type":"text"}]},{"type":"paragraph","content":[{"text":"caption 检测依赖 ","type":"text"},{"text":"^图\\s*\\d","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"^表\\s*\\d","type":"text","marks":[{"type":"code_inline"}]},{"text":" 正则：","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Word 中 caption 写法是 ","type":"text"},{"text":"图1-1 标题","type":"text","marks":[{"type":"code_inline"}]},{"text":" → 可识别","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"写法是 ","type":"text"},{"text":"图一标题","type":"text","marks":[{"type":"code_inline"}]},{"text":"（中文数字）、","type":"text"},{"text":"Figure 1","type":"text","marks":[{"type":"code_inline"}]},{"text":" 或 caption 与图片不在相邻段落 → ","type":"text"},{"text":"识别失败，caption 为空","type":"text","marks":[{"type":"strong"}]}]}]}]},{"type":"paragraph","content":[{"text":"caption 为空不影响图表出现在 PDF，但插图清单/附表清单会不完整（D4/E4 WARN）。","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"AI 在 Step 2 生成 struct.json 的图表注意事项","type":"text"}]},{"type":"blockquote","content":[{"type":"paragraph","content":[{"text":"生成 struct.json 时必须执行以下检查，否则极可能导致图表丢失：","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"对照 raw_xxx.json 的图片 ","type":"text","marks":[{"type":"strong"}]},{"text":"para_idx","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 列表","type":"text","marks":[{"type":"strong"}]},{"text":"，确认每张图的 ","type":"text"},{"text":"para_idx","type":"text","marks":[{"type":"code_inline"}]},{"text":" 都落在某章的 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" 内","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"对照表格的 ","type":"text","marks":[{"type":"strong"}]},{"text":"before_para","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 列表","type":"text","marks":[{"type":"strong"}]},{"text":"，确认每个 ","type":"text"},{"text":"before_para","type":"text","marks":[{"type":"code_inline"}]},{"text":" 都 ≥ ","type":"text"},{"text":"first_chap_start","type":"text","marks":[{"type":"code_inline"}]},{"text":"（第一章 ","type":"text"},{"text":"content_range[0]","type":"text","marks":[{"type":"code_inline"}]},{"text":"）且落在某章范围内","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"相邻章节的 ","type":"text"},{"text":"content_range","type":"text","marks":[{"type":"code_inline"}]},{"text":" ","type":"text"},{"text":"不能有间隙","type":"text","marks":[{"type":"strong"}]},{"text":"，末尾章节的 ","type":"text"},{"text":"content_range[1]","type":"text","marks":[{"type":"code_inline"}]},{"text":" 要覆盖到参考文献段之前","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"表格格式（三线表）","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"latex"},"content":[{"text":"\\begin{table}[htbp]\n \\caption{表题}\n \\begin{tabularx}{\\linewidth}{l X}\n \\toprule\n 短列头 & 长文本列头 \\\\\n \\midrule\n 内容1 & 内容2 \\\\\n \\bottomrule\n \\end{tabularx}\n\\end{table}","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"无竖线，三线（","type":"text"},{"text":"\\toprule","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"\\midrule","type":"text","marks":[{"type":"code_inline"}]},{"text":" / ","type":"text"},{"text":"\\bottomrule","type":"text","marks":[{"type":"code_inline"}]},{"text":"）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"短列（≤12字符宽）用 ","type":"text"},{"text":"l","type":"text","marks":[{"type":"code_inline"}]},{"text":"，长文本列用 ","type":"text"},{"text":"X","type":"text","marks":[{"type":"code_inline"}]},{"text":"；至少一列为 ","type":"text"},{"text":"X","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\\caption","type":"text","marks":[{"type":"code_inline"}]},{"text":" 在 ","type":"text"},{"text":"\\begin{tabularx}","type":"text","marks":[{"type":"code_inline"}]},{"text":" ","type":"text"},{"text":"上方","type":"text","marks":[{"type":"strong"}]},{"text":"（thuthesis 规范：表题在上）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"render.py","type":"text","marks":[{"type":"code_inline"}]},{"text":" 自动去掉 caption 里的\"表X-X \"前缀，由 thuthesis 自动编号","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"图片格式","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"latex"},"content":[{"text":"\\begin{figure}[htbp]\n \\centering\n \\includegraphics[width=0.9\\linewidth]{figures/image1.png}\n \\caption{图题}\n \\label{fig:xxx}\n\\end{figure}","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"图片文件放在 LaTeX 工程的 ","type":"text"},{"text":"figures/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"render.py","type":"text","marks":[{"type":"code_inline"}]},{"text":" 自动去掉 caption 里的\"图X-X \"前缀，由 thuthesis 自动编号","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"SVG 跳过，需手动转 PNG/PDF 后补入","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"chart 对象（Excel 图表）由 matplotlib 重绘为 PNG，自动纳入流程","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"已知限制","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"SVG 图片跳过（需手动转为 PNG/PDF 后补入）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"committee.tex / comments.tex / resolution.tex 为占位，需手工填写（答辩后）","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"⚠️ .doc 格式必须用 Word 转换，不能用 textutil","type":"text"}]},{"type":"paragraph","content":[{"text":"唯一可靠方法：用 Microsoft Word 打开 .doc 文件，另存为 .docx。","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"macOS ","type":"text"},{"text":"textutil","type":"text","marks":[{"type":"code_inline"}]},{"text":"、Python ","type":"text"},{"text":"docx2txt","type":"text","marks":[{"type":"code_inline"}]},{"text":" 等工具","type":"text"},{"text":"会把表格压平为普通段落","type":"text","marks":[{"type":"strong"}]},{"text":"，导致：","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"extract","type":"text","marks":[{"type":"code_inline"}]},{"text":" 步骤报告\"0 表格\"","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"表格数据变成一行行普通文字进入正文","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"图片和封面元信息也可能丢失","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"判断转换是否正确的方法","type":"text","marks":[{"type":"strong"}]},{"text":"：运行 ","type":"text"},{"text":"extract","type":"text","marks":[{"type":"code_inline"}]},{"text":" 后，看终端输出的\"📊 表格 X 张\"行：","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"输出","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"原因","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"处理","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"表格: N 张","type":"text","marks":[{"type":"code_inline"}]},{"text":" N > 0","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"转换正确，表格保留","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"正常继续","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"表格: 0 张","type":"text","marks":[{"type":"code_inline"}]},{"text":" 但论文明显有表","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"转换工具破坏了表格结构","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"必须重新用 Word 另存为 .docx","type":"text","marks":[{"type":"strong"}]}]}]}]}]},{"type":"paragraph","content":[{"text":"如果没有 Word，可在 macOS 用 LibreOffice（需安装）：","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"soffice --headless --convert-to docx /path/to/论文.doc --outdir /path/to/","type":"text"}]},{"type":"paragraph","content":[{"text":"LibreOffice 保留表格结构，textutil 不保留。","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"thuthesis 配置（MBA 专业硕士）","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"latex"},"content":[{"text":"\\thusetup{\n degree = {master},\n degree-type = {professional},\n degree-category = {工商管理硕士},\n degree-category* = {Master of Business Administration},\n department = {经济管理学院},\n}","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}}]},"metadata":{"date":"2026-06-05","author":"@skillopedia","source":{"stars":2012,"repo_name":"openclaw-master-skills","origin_url":"https://github.com/leoyeai/openclaw-master-skills/blob/HEAD/skills/thu-thesis/SKILL.md","repo_owner":"leoyeai","body_sha256":"c420aa744c25087e1526628142c3a3eca04a157c23451691306cfe8b471f0be3","cluster_key":"54214a0266219460c3d032ff40086115a8376b80ce7902315e9eb327dd55029a","clean_bundle":{"format":"clean-skill-bundle-v1","source":"leoyeai/openclaw-master-skills/skills/thu-thesis/SKILL.md","attachments":[{"id":"35fe1f70-ea45-500f-93b9-14bf911cdd7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/35fe1f70-ea45-500f-93b9-14bf911cdd7a/attachment.json","path":"_meta.json","size":1156,"sha256":"2943115f66940d1febb5ba224dfb09f2f31cada48ced7f6100cfcdc0704f81df","contentType":"application/json; charset=utf-8"},{"id":"ad54277f-4094-5789-95d7-47853835f36e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ad54277f-4094-5789-95d7-47853835f36e/attachment.md","path":"output/2023211612-王亚玲-论文-latex/evaluation_report.md","size":9357,"sha256":"1c84e75f23167f0dc60115e9c49bbf5044127f121aea3e941be8e7477cdae56f","contentType":"text/markdown; charset=utf-8"},{"id":"e2b8d10d-208d-5dc4-8df0-bc30b6950a85","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e2b8d10d-208d-5dc4-8df0-bc30b6950a85/attachment.json","path":"output/parsed_2023211588-黄杰-肖勇波.json","size":185957,"sha256":"d7a892de01bf0790907f42948cfdf3112a386faadcd8bfa3fdf6bdef8fbcaa89","contentType":"application/json; charset=utf-8"},{"id":"6755334e-452e-53fa-a838-dc52cfc83398","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6755334e-452e-53fa-a838-dc52cfc83398/attachment.json","path":"output/parsed_2023211600-倪嘉宝-中期报告.json","size":82659,"sha256":"cbdc1fecc84adc2f8816a99f222d1a7fb9730f20392272e7c22e9cad935888d4","contentType":"application/json; charset=utf-8"},{"id":"50f618b1-0eb4-5969-aee7-9f0c4ad44079","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/50f618b1-0eb4-5969-aee7-9f0c4ad44079/attachment.json","path":"output/parsed_2023211612-王亚玲-论文.json","size":167631,"sha256":"df8791f43955a6c702b2ca46a193c8bd25d1a6026710f35da29444a0b8fb2dc8","contentType":"application/json; charset=utf-8"},{"id":"833ea582-539d-5b48-af47-bc8df48b1b3c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/833ea582-539d-5b48-af47-bc8df48b1b3c/attachment.json","path":"output/parsed_fgb-毕业论文0319.json","size":157360,"sha256":"ee3084ed5c968d61c099a213aa02b3898b4f4b10664287e47dc4400826d373cc","contentType":"application/json; charset=utf-8"},{"id":"5ab28d3b-3483-5c99-9d41-efe0f1062f4c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5ab28d3b-3483-5c99-9d41-efe0f1062f4c/attachment.json","path":"output/parsed_test_thesis.json","size":6988,"sha256":"c0929ae2712d3655d1cca85cb085306c7924cd86fc47d3dc876adfaa080e1591","contentType":"application/json; charset=utf-8"},{"id":"4e10087b-2304-5aa3-8b82-cf50872c603a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4e10087b-2304-5aa3-8b82-cf50872c603a/attachment.json","path":"output/parsed_王智旸-毕业论文.json","size":154575,"sha256":"5c6f43e634925ab67d4de71716b694c480f1aab4bf44c613ece625467fa6342c","contentType":"application/json; charset=utf-8"},{"id":"b3fa13cb-9dfb-5f5d-a58e-c9a718c359ca","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b3fa13cb-9dfb-5f5d-a58e-c9a718c359ca/attachment.json","path":"output/raw_2023211600-倪嘉宝-中期报告.json","size":105464,"sha256":"c072e7a9cc12d162d513a5460107771487bba7d2cfa8324ffc6285e81cb20431","contentType":"application/json; charset=utf-8"},{"id":"880af34b-8f91-5723-8686-d267ba716bca","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/880af34b-8f91-5723-8686-d267ba716bca/attachment.json","path":"output/raw_2023211612-王亚玲-论文.json","size":221666,"sha256":"9a1aba0baca42aae7f601a5988a09fbfe71bcddad4a6b01144597915d5f0b163","contentType":"application/json; charset=utf-8"},{"id":"83536cdd-34a2-51fe-aa55-976e5702d22b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/83536cdd-34a2-51fe-aa55-976e5702d22b/attachment.json","path":"output/raw_fgb-毕业论文0319.json","size":273675,"sha256":"b60163ecc0164f2d73f3457880a09faa974be8347d867b8817d2370aae8ffe76","contentType":"application/json; charset=utf-8"},{"id":"645d431e-d9d6-5654-866b-d74502a58bf7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/645d431e-d9d6-5654-866b-d74502a58bf7/attachment.json","path":"output/raw_王智旸-毕业论文.json","size":211656,"sha256":"ca27e7e06cec74acbecaab0847d38d5cae17224109fb509afa3ec596629d0258","contentType":"application/json; charset=utf-8"},{"id":"e53b81b5-49b7-5ab5-8f38-978370b6e7e8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e53b81b5-49b7-5ab5-8f38-978370b6e7e8/attachment.json","path":"output/struct_2023211600-倪嘉宝-中期报告.json","size":7249,"sha256":"9cdbf9e390259d8aa1df4bd5ab48d376afd88c52f92d236005fd5082ac196e94","contentType":"application/json; charset=utf-8"},{"id":"c6225a06-4368-5667-8616-a70b623027f7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c6225a06-4368-5667-8616-a70b623027f7/attachment.json","path":"output/struct_2023211612-王亚玲-论文.json","size":11573,"sha256":"1208c845d4bf92a2be263069ed0418d331dba94522b3458b70b548f3ee297f9e","contentType":"application/json; charset=utf-8"},{"id":"117b71ec-1980-5932-bc2c-10cd3b42dcb8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/117b71ec-1980-5932-bc2c-10cd3b42dcb8/attachment.json","path":"output/struct_fgb-毕业论文0319.json","size":11991,"sha256":"de097cdb134827a577adc93351dc312af0af5c948b837da0e05bf7305efcb0c2","contentType":"application/json; charset=utf-8"},{"id":"17fa4ab8-83fa-5881-9b20-fd34e755494f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/17fa4ab8-83fa-5881-9b20-fd34e755494f/attachment.json","path":"output/struct_王智旸-毕业论文.json","size":12267,"sha256":"cc2566c4da62b8aa08647ec5e13bdd00c7ed1c0029930702e12dd697c1e33782","contentType":"application/json; charset=utf-8"},{"id":"9314a7fc-20b5-5889-bdab-ed2a5c8e2cef","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9314a7fc-20b5-5889-bdab-ed2a5c8e2cef/attachment.py","path":"scripts/build_parsed.py","size":14024,"sha256":"608e1b92358f4399c51761282b68bc1753ae7297455697abee1f0d46955616e9","contentType":"text/x-python; charset=utf-8"},{"id":"3e4b3b9c-4f8a-5702-9ca4-9be256b2a682","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3e4b3b9c-4f8a-5702-9ca4-9be256b2a682/attachment.py","path":"scripts/convert.py","size":16211,"sha256":"4514268c42ffd29d6c248aad8e269300acf65f849694ef589912aff2eb3acf8f","contentType":"text/x-python; charset=utf-8"},{"id":"5c9835cf-6ff5-5790-b92e-3a97c0abc37f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5c9835cf-6ff5-5790-b92e-3a97c0abc37f/attachment.py","path":"scripts/extract_raw.py","size":10376,"sha256":"7bf06c947252ea8bf04a8f131c63ba2a99157cd7a4e55852a6a1434b3cd4471b","contentType":"text/x-python; charset=utf-8"},{"id":"f3c7178d-62e7-5f26-a028-06a54ddbdcd8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f3c7178d-62e7-5f26-a028-06a54ddbdcd8/attachment.py","path":"scripts/render.py","size":61118,"sha256":"e92ee181aa511363ad49ff02c502142b53e87149090eec752525df6570754475","contentType":"text/x-python; charset=utf-8"},{"id":"a6d39027-c889-56f3-afb8-d32c7641bf5d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a6d39027-c889-56f3-afb8-d32c7641bf5d/attachment.sh","path":"scripts/setup.sh","size":3389,"sha256":"cf9b6211d0628bdbdb6762d04b144a7159f355420f3fbdbba76d943e32ec8cb1","contentType":"application/x-sh; charset=utf-8"}],"bundle_sha256":"d274b2e90ef1e957b2b8ff08502847ab1646a2fc52e11b1eb3b7edeeca383b09","attachment_count":21,"text_attachments":21,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"skills/thu-thesis/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"general","category_label":"General"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"general","import_tag":"clean-skills-v1","_yaml_error":"YAMLException: bad indentation of a mapping entry (2:408)\n\n 1 | ... \n 2 | ... /tmp/thuthesis-latest。Use when: 用户需要把 Word 格式的清华毕业论文转为规范 PDF，或 ...\n-----------------------------------------^"}},"renderedAt":1782980625023}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.