kimi-pdf — Skillopedia

Route Selection | Route | Trigger | Route File | |-------|---------|------------| | HTML (default) | All PDF creation requests | | | LaTeX | User explicitly requests LaTeX, .tex, or Tectonic | | | Process | Work with existing PDFs (extract, merge, fill forms, etc.) | | Default to HTML. Only use LaTeX route when user explicitly requests it. MANDATORY: Read Route File Before Implementation <system-reminder You MUST read the corresponding route file before writing ANY code. Route files contain critical implementation details NOT duplicated here. Skipping this step leads to incorrect output (wron…

, pages_str):\n Output.error(\"InvalidPages\", f\"Invalid page range format: {pages_str}\",\n hint=\"Use digits, commas, and hyphens only. Example: 1-3,5,7-9\")\n\n result = []\n for part in pages_str.split(\",\"):\n part = part.strip()\n if \"-\" in part:\n start, end = part.split(\"-\", 1)\n start = int(start) - 1\n end = int(end)\n result.extend(range(start, min(end, total)))\n else:\n idx = int(part) - 1\n if 0 \u003c= idx \u003c total:\n result.append(idx)\n\n return sorted(set(result))\n\n\ndef _sanitize_text(text: str) -> str:\n \"\"\"Sanitize extracted text to mitigate indirect prompt injection.\n\n Wraps content in boundary markers so consuming LLMs can distinguish\n extracted document content from instructions.\n \"\"\"\n return f\"[BEGIN_PDF_CONTENT]\\n{text}\\n[END_PDF_CONTENT]\"\n\n\ndef extract_text(pdf_path: str, pages: str = None):\n \"\"\"Extract text\"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pdfplumber.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n page_indices = _parse_pages(pages, len(pdf.pages))\n\n result = {\n \"total_pages\": len(pdf.pages),\n \"extracted_pages\": len(page_indices),\n \"pages\": []\n }\n\n total_chars = 0\n for idx in page_indices:\n page = pdf.pages[idx]\n text = page.extract_text() or \"\"\n total_chars += len(text)\n\n result[\"pages\"].append({\n \"page\": idx + 1,\n \"chars\": len(text),\n \"text\": _sanitize_text(text)\n })\n\n result[\"total_chars\"] = total_chars\n pdf.close()\n\n Output.success(result)\n\n\ndef extract_table(pdf_path: str, pages: str = None):\n \"\"\"Extract tables\"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pdfplumber.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n page_indices = _parse_pages(pages, len(pdf.pages))\n\n result = {\n \"total_pages\": len(pdf.pages),\n \"extracted_pages\": len(page_indices),\n \"tables\": []\n }\n\n for idx in page_indices:\n page = pdf.pages[idx]\n tables = page.extract_tables()\n\n for i, table in enumerate(tables):\n if not table:\n continue\n\n # Clean and sanitize table data\n cleaned = []\n for row in table:\n cleaned_row = [cell.strip() if cell else \"\" for cell in row]\n cleaned.append(cleaned_row)\n # Note: table data is structured (list of lists), boundary markers\n # are applied at the page level via the \"data\" wrapper\n\n result[\"tables\"].append({\n \"page\": idx + 1,\n \"table_index\": i,\n \"rows\": len(cleaned),\n \"cols\": len(cleaned[0]) if cleaned else 0,\n \"data\": cleaned\n })\n\n result[\"total_tables\"] = len(result[\"tables\"])\n pdf.close()\n\n Output.success(result)\n\n\ndef extract_image(pdf_path: str, output_dir: str):\n \"\"\"Extract embedded images\"\"\"\n path = Output.check_file(pdf_path)\n out_dir = Output.safe_output_path(output_dir)\n out_dir.mkdir(parents=True, exist_ok=True)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n extracted = []\n image_count = 0\n\n for page_num, page in enumerate(pdf.pages, 1):\n # Get images from page resources\n if \"/Resources\" not in page:\n continue\n\n resources = page.Resources\n if \"/XObject\" not in resources:\n continue\n\n xobjects = resources.XObject\n for name, xobj in xobjects.items():\n if not hasattr(xobj, \"objgen\"):\n continue\n\n try:\n obj = pdf.get_object(xobj.objgen)\n if obj.get(\"/Subtype\") != \"/Image\":\n continue\n\n # Get image info\n width = int(obj.get(\"/Width\", 0))\n height = int(obj.get(\"/Height\", 0))\n color_space = str(obj.get(\"/ColorSpace\", \"\"))\n bits = int(obj.get(\"/BitsPerComponent\", 8))\n\n # Try to extract image data\n image_count += 1\n filter_type = obj.get(\"/Filter\")\n\n # Determine file extension\n if filter_type == \"/DCTDecode\":\n ext = \"jpg\"\n elif filter_type == \"/FlateDecode\":\n ext = \"png\"\n elif filter_type == \"/JPXDecode\":\n ext = \"jp2\"\n else:\n ext = \"bin\"\n\n # Save image\n filename = f\"page{page_num}_img{image_count}.{ext}\"\n filepath = out_dir / filename\n\n try:\n raw_data = obj.read_raw_bytes()\n with open(filepath, \"wb\") as f:\n f.write(raw_data)\n\n extracted.append({\n \"page\": page_num,\n \"name\": str(name),\n \"file\": str(filepath),\n \"width\": width,\n \"height\": height,\n \"format\": ext\n })\n except Exception:\n # Some image formats cannot be directly extracted\n pass\n\n except Exception:\n continue\n\n pdf.close()\n\n Output.success({\n \"output_dir\": str(out_dir),\n \"total_images\": len(extracted),\n \"images\": extracted\n })\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6178,"content_sha256":"a9fad96380702cab75b7be2a810dd19f9a662528d99df47a056dc73ba1b43bb7"},{"filename":"scripts/cmd_form.py","content":"\"\"\"Form operation commands\"\"\"\n\nimport pikepdf\nfrom pathlib import Path\nfrom pdf import Output\n\n\ndef _get_field_type(field) -> str:\n \"\"\"Get field type\"\"\"\n ft = str(field.get(\"/FT\", \"\"))\n if ft == \"/Tx\":\n return \"text\"\n elif ft == \"/Btn\":\n # Check if it's a radio button\n ff = int(field.get(\"/Ff\", 0))\n if ff & (1 \u003c\u003c 15): # Radio flag\n return \"radio\"\n return \"checkbox\"\n elif ft == \"/Ch\":\n ff = int(field.get(\"/Ff\", 0))\n if ff & (1 \u003c\u003c 17): # Combo flag\n return \"dropdown\"\n return \"listbox\"\n elif ft == \"/Sig\":\n return \"signature\"\n return \"unknown\"\n\n\ndef _get_field_options(field, field_type: str) -> dict:\n \"\"\"Get field's extra options\"\"\"\n options = {}\n\n if field_type == \"checkbox\":\n # Get checkbox checked/unchecked values\n ap = field.get(\"/AP\")\n if ap and \"/N\" in ap:\n states = [str(k) for k in ap[\"/N\"].keys()]\n options[\"states\"] = states\n options[\"checked_value\"] = next((s for s in states if s != \"/Off\"), states[0] if states else None)\n\n elif field_type in (\"dropdown\", \"listbox\"):\n # Get option list\n opt = field.get(\"/Opt\")\n if opt:\n opts = []\n for item in opt:\n if isinstance(item, list) and len(item) >= 2:\n opts.append({\"value\": str(item[0]), \"label\": str(item[1])})\n else:\n opts.append({\"value\": str(item), \"label\": str(item)})\n options[\"options\"] = opts\n\n elif field_type == \"radio\":\n # Radio options are usually in child fields\n kids = field.get(\"/Kids\")\n if kids:\n radio_opts = []\n for kid in kids:\n ap = kid.get(\"/AP\")\n if ap and \"/N\" in ap:\n states = [str(k) for k in ap[\"/N\"].keys() if str(k) != \"/Off\"]\n if states:\n radio_opts.append(states[0])\n options[\"options\"] = radio_opts\n\n return options\n\n\ndef _get_field_value(field) -> str:\n \"\"\"Get field's current value\"\"\"\n v = field.get(\"/V\")\n if v is not None:\n return str(v)\n return None\n\n\ndef _collect_fields(pdf: pikepdf.Pdf) -> list:\n \"\"\"Collect all form fields\"\"\"\n fields = []\n\n if \"/AcroForm\" not in pdf.Root:\n return fields\n\n acroform = pdf.Root.AcroForm\n if \"/Fields\" not in acroform:\n return fields\n\n def process_field(field, parent_name=\"\"):\n \"\"\"Recursively process fields\"\"\"\n # Get field name\n name = str(field.get(\"/T\", \"\"))\n full_name = f\"{parent_name}.{name}\" if parent_name else name\n\n # Check for child fields\n kids = field.get(\"/Kids\")\n if kids:\n # If children have /T, recursively process\n has_named_kids = any(\"/T\" in kid for kid in kids)\n if has_named_kids:\n for kid in kids:\n process_field(kid, full_name)\n return\n\n # Get field type\n field_type = _get_field_type(field)\n if field_type == \"unknown\":\n return\n\n # Build field info\n field_info = {\n \"id\": full_name,\n \"type\": field_type,\n }\n\n # Current value\n current_value = _get_field_value(field)\n if current_value:\n field_info[\"current_value\"] = current_value\n\n # Extra options\n options = _get_field_options(field, field_type)\n field_info.update(options)\n\n # Get page number (via /P reference)\n page_ref = field.get(\"/P\")\n if page_ref:\n for i, page in enumerate(pdf.pages):\n if page.objgen == page_ref.objgen:\n field_info[\"page\"] = i + 1\n break\n\n fields.append(field_info)\n\n for field in acroform.Fields:\n process_field(field)\n\n return fields\n\n\ndef form_info(pdf_path: str):\n \"\"\"View form field information\"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n fields = _collect_fields(pdf)\n\n if not fields:\n Output.success({\n \"has_fields\": False,\n \"count\": 0,\n \"fields\": [],\n \"hint\": \"This PDF has no fillable form fields\"\n })\n\n Output.success({\n \"has_fields\": True,\n \"count\": len(fields),\n \"fields\": fields\n })\n\n\ndef form_fill(pdf_path: str, output_path: str, data: dict):\n \"\"\"Fill form fields\n\n data format: {\"field_id\": \"value\", ...}\n \"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n if \"/AcroForm\" not in pdf.Root:\n Output.error(\"NoForm\", \"This PDF has no form fields\")\n\n acroform = pdf.Root.AcroForm\n if \"/Fields\" not in acroform:\n Output.error(\"NoForm\", \"This PDF has no form fields\")\n\n # Collect existing fields for validation\n existing_fields = {f[\"id\"]: f for f in _collect_fields(pdf)}\n\n # Validate input data\n errors = []\n for field_id, value in data.items():\n if field_id not in existing_fields:\n errors.append(f\"Field not found: {field_id}\")\n continue\n\n field_info = existing_fields[field_id]\n field_type = field_info[\"type\"]\n\n # Validate checkbox values\n if field_type == \"checkbox\" and \"states\" in field_info:\n valid_states = field_info[\"states\"]\n # Allow \"true\"/\"false\" as boolean shorthand\n bool_values = (\"true\", \"True\", \"false\", \"False\", \"1\", \"0\")\n if value not in valid_states and f\"/{value}\" not in valid_states and value not in bool_values:\n errors.append(f\"Invalid value for field {field_id}, options: {valid_states} or true/false\")\n\n # Validate dropdown/list values\n if field_type in (\"dropdown\", \"listbox\") and \"options\" in field_info:\n valid_values = [opt[\"value\"] for opt in field_info[\"options\"]]\n if value not in valid_values:\n errors.append(f\"Invalid value for field {field_id}, options: {valid_values}\")\n\n if errors:\n Output.error(\"ValidationError\", \"Field validation failed\", hint=\"; \".join(errors))\n\n # Fill fields\n filled_count = 0\n\n def fill_field(field, parent_name=\"\"):\n nonlocal filled_count\n name = str(field.get(\"/T\", \"\"))\n full_name = f\"{parent_name}.{name}\" if parent_name else name\n\n # Check child fields\n kids = field.get(\"/Kids\")\n if kids:\n has_named_kids = any(\"/T\" in kid for kid in kids)\n if has_named_kids:\n for kid in kids:\n fill_field(kid, full_name)\n return\n\n # Fill value\n if full_name in data:\n value = data[full_name]\n field_type = _get_field_type(field)\n\n if field_type == \"checkbox\":\n # Checkbox needs /V and /AS\n if value in (\"true\", \"True\", \"1\", True):\n # Find checked value\n ap = field.get(\"/AP\")\n if ap and \"/N\" in ap:\n checked = next((str(k) for k in ap[\"/N\"].keys() if str(k) != \"/Off\"), \"/Yes\")\n # pikepdf.Name needs / prefix\n name_val = checked if checked.startswith(\"/\") else f\"/{checked}\"\n field[\"/V\"] = pikepdf.Name(name_val)\n field[\"/AS\"] = pikepdf.Name(name_val)\n else:\n field[\"/V\"] = pikepdf.Name(\"/Off\")\n field[\"/AS\"] = pikepdf.Name(\"/Off\")\n else:\n # Text and other types\n field[\"/V\"] = pikepdf.String(str(value))\n\n filled_count += 1\n\n for field in acroform.Fields:\n fill_field(field)\n\n # Mark for appearance regeneration\n if \"/NeedAppearances\" not in acroform:\n acroform[\"/NeedAppearances\"] = True\n\n # Save\n try:\n pdf.save(output_path)\n except Exception as e:\n Output.error(\"SaveError\", f\"Save failed: {e}\", code=4)\n\n Output.success({\n \"output\": output_path,\n \"fields_filled\": filled_count,\n \"fields_requested\": len(data)\n })\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8420,"content_sha256":"7ca2ecc14a687ffe20c2d75123f5377f29f7b6a3cd28af1dfbda8715a9c0db2a"},{"filename":"scripts/cmd_meta.py","content":"\"\"\"Metadata operation commands\"\"\"\n\nimport pikepdf\nfrom datetime import datetime\nfrom pdf import Output\n\n\ndef meta_get(pdf_path: str):\n \"\"\"Read metadata\"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n # Basic info\n info = {\n \"pages\": len(pdf.pages),\n \"pdf_version\": str(pdf.pdf_version),\n }\n\n # Page size (first page)\n if pdf.pages:\n first_page = pdf.pages[0]\n mbox = first_page.mediabox\n info[\"page_size\"] = {\n \"width\": float(mbox[2] - mbox[0]),\n \"height\": float(mbox[3] - mbox[1]),\n \"unit\": \"pt\"\n }\n\n # Document info dictionary\n metadata = {}\n if pdf.docinfo:\n for key in pdf.docinfo.keys():\n try:\n value = pdf.docinfo[key]\n # Convert to string\n if hasattr(value, \"__str__\"):\n metadata[str(key).lstrip(\"/\")] = str(value)\n except:\n pass\n\n info[\"metadata\"] = metadata\n\n # Is encrypted\n info[\"encrypted\"] = pdf.is_encrypted\n\n # Has form\n info[\"has_form\"] = \"/AcroForm\" in pdf.Root\n\n # Has bookmarks\n info[\"has_outlines\"] = \"/Outlines\" in pdf.Root\n\n pdf.close()\n\n Output.success(info)\n\n\ndef meta_set(pdf_path: str, output_path: str, data: dict):\n \"\"\"Set metadata\n\n Supported fields:\n - Title: Document title\n - Author: Author name\n - Subject: Subject\n - Keywords: Keywords\n - Creator: Creator application\n - Producer: Producer application\n \"\"\"\n path = Output.check_file(pdf_path)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n # Allowed metadata fields\n allowed_keys = {\"Title\", \"Author\", \"Subject\", \"Keywords\", \"Creator\", \"Producer\"}\n\n # Create or update docinfo\n with pdf.open_metadata() as meta:\n # Use XMP metadata (more modern approach)\n updated = []\n for key, value in data.items():\n # Normalize key\n normalized_key = key.title()\n if normalized_key not in allowed_keys:\n continue\n\n # Map to XMP namespace\n xmp_map = {\n \"Title\": \"dc:title\",\n \"Author\": \"dc:creator\",\n \"Subject\": \"dc:description\",\n \"Keywords\": \"pdf:Keywords\",\n \"Creator\": \"xmp:CreatorTool\",\n \"Producer\": \"pdf:Producer\"\n }\n\n if normalized_key in xmp_map:\n try:\n meta[xmp_map[normalized_key]] = str(value)\n updated.append(normalized_key)\n except:\n pass\n\n # Also update docinfo (for compatibility)\n if not pdf.docinfo:\n pdf.docinfo = pikepdf.Dictionary()\n\n for key, value in data.items():\n normalized_key = key.title()\n if normalized_key in allowed_keys:\n pdf.docinfo[pikepdf.Name(f\"/{normalized_key}\")] = pikepdf.String(str(value))\n\n # Update modification date\n now = datetime.now().strftime(\"D:%Y%m%d%H%M%S\")\n pdf.docinfo[pikepdf.Name(\"/ModDate\")] = pikepdf.String(now)\n\n try:\n pdf.save(output_path)\n pdf.close()\n except Exception as e:\n Output.error(\"SaveError\", f\"Save failed: {e}\", code=4)\n\n Output.success({\n \"output\": output_path,\n \"updated_fields\": list(data.keys())\n })\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3532,"content_sha256":"6561db265f3d0516d07d5860ac07e62e50cddcf8909294f1059f1abbefcb6bcb"},{"filename":"scripts/cmd_pages.py","content":"\"\"\"Page operation commands\"\"\"\n\nimport pikepdf\nfrom pathlib import Path\nfrom pdf import Output\n\n\ndef _parse_pages(pages_str: str, total: int) -> list:\n \"\"\"Parse page range string, returns 0-indexed list\"\"\"\n if not pages_str:\n return list(range(total))\n\n result = []\n for part in pages_str.split(\",\"):\n part = part.strip()\n if \"-\" in part:\n start, end = part.split(\"-\", 1)\n start = int(start) - 1\n end = int(end)\n result.extend(range(start, min(end, total)))\n else:\n idx = int(part) - 1\n if 0 \u003c= idx \u003c total:\n result.append(idx)\n\n return sorted(set(result))\n\n\ndef pages_merge(pdf_paths: list, output_path: str):\n \"\"\"Merge multiple PDFs\"\"\"\n # Validate all files exist\n paths = []\n for p in pdf_paths:\n path = Output.check_file(p)\n paths.append(path)\n\n opened_pdfs = [] # Track opened PDFs for cleanup\n try:\n output_pdf = pikepdf.new()\n sources = []\n\n for path in paths:\n src = pikepdf.open(path)\n opened_pdfs.append(src) # Track for cleanup\n page_count = len(src.pages)\n sources.append(f\"{path.name} ({page_count} pages)\")\n\n for page in src.pages:\n output_pdf.pages.append(page)\n\n total_pages = len(output_pdf.pages)\n output_pdf.save(output_path)\n output_pdf.close()\n\n # Close all source PDFs\n for src in opened_pdfs:\n src.close()\n\n except Exception as e:\n # Cleanup on error\n for src in opened_pdfs:\n try:\n src.close()\n except:\n pass\n Output.error(\"MergeError\", f\"Merge failed: {e}\", code=4)\n\n Output.success({\n \"output\": output_path,\n \"total_pages\": total_pages,\n \"sources\": sources\n })\n\n\ndef pages_split(pdf_path: str, output_dir: str):\n \"\"\"Split PDF into single-page files\"\"\"\n path = Output.check_file(pdf_path)\n out_dir = Output.safe_output_path(output_dir)\n out_dir.mkdir(parents=True, exist_ok=True)\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n stem = path.stem\n outputs = []\n\n try:\n for i, page in enumerate(pdf.pages, 1):\n out_path = out_dir / f\"{stem}_page{i:03d}.pdf\"\n single = pikepdf.new()\n single.pages.append(page)\n single.save(out_path)\n single.close()\n outputs.append(str(out_path))\n\n pdf.close()\n\n except Exception as e:\n Output.error(\"SplitError\", f\"Split failed: {e}\", code=4)\n\n Output.success({\n \"output_dir\": str(out_dir),\n \"total_pages\": len(outputs),\n \"files\": outputs\n })\n\n\ndef pages_rotate(pdf_path: str, degrees: int, output_path: str, pages: str = None):\n \"\"\"Rotate pages\"\"\"\n path = Output.check_file(pdf_path)\n\n if degrees not in (90, 180, 270):\n Output.error(\"InvalidDegrees\", \"Rotation angle must be 90, 180, or 270\")\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n page_indices = _parse_pages(pages, len(pdf.pages))\n\n try:\n for idx in page_indices:\n page = pdf.pages[idx]\n current = int(page.get(\"/Rotate\", 0))\n page[\"/Rotate\"] = (current + degrees) % 360\n\n pdf.save(output_path)\n pdf.close()\n\n except Exception as e:\n Output.error(\"RotateError\", f\"Rotation failed: {e}\", code=4)\n\n Output.success({\n \"output\": output_path,\n \"degrees\": degrees,\n \"pages_rotated\": len(page_indices)\n })\n\n\ndef pages_crop(pdf_path: str, box: str, output_path: str, pages: str = None):\n \"\"\"Crop pages\n\n box format: \"left,bottom,right,top\" (in pt)\n \"\"\"\n path = Output.check_file(pdf_path)\n\n # Parse crop box\n try:\n parts = [float(x.strip()) for x in box.split(\",\")]\n if len(parts) != 4:\n raise ValueError()\n left, bottom, right, top = parts\n except:\n Output.error(\"InvalidBox\", \"Invalid crop box format, should be: left,bottom,right,top\", hint=\"Example: 50,50,550,750\")\n\n try:\n pdf = pikepdf.open(path)\n except Exception as e:\n Output.error(\"PDFError\", f\"Cannot open PDF: {e}\", code=3)\n\n page_indices = _parse_pages(pages, len(pdf.pages))\n\n try:\n for idx in page_indices:\n page = pdf.pages[idx]\n page.mediabox = pikepdf.Array([left, bottom, right, top])\n # Also set cropbox\n page.cropbox = pikepdf.Array([left, bottom, right, top])\n\n pdf.save(output_path)\n pdf.close()\n\n except Exception as e:\n Output.error(\"CropError\", f\"Crop failed: {e}\", code=4)\n\n Output.success({\n \"output\": output_path,\n \"box\": {\"left\": left, \"bottom\": bottom, \"right\": right, \"top\": top},\n \"pages_cropped\": len(page_indices)\n })\n","content_type":"text/x-python; charset=utf-8","language":"python","size":5029,"content_sha256":"45fd6099ee88208439f9a08cec07439ee6dba37f84058a98699055c86168c928"},{"filename":"scripts/compile_latex.py","content":"#!/usr/bin/env python3\n\"\"\"\nLaTeX Compilation Script - Filter logs and report PDF stats\n\nUsage:\n python3 compile_latex.py main.tex\n python3 compile_latex.py main.tex --runs 2\n python3 compile_latex.py main.tex --runs 3 --keep-logs\n\"\"\"\n\nimport argparse\nimport re\nimport subprocess\nimport sys\nimport os\nimport shutil\nfrom pathlib import Path\n\n# Log patterns to filter out\nFILTER_PATTERNS = [\n r'^note: \"version 2\" Tectonic command-line interface activated',\n r'^note: Running TeX',\n r'^note: Rerunning TeX because',\n r'^note: Running xdvipdfmx',\n r'^note: downloading ',\n r'^note: Skipped writing .* intermediate files',\n]\n\n# Compiled filter regex\nfilter_regex = re.compile('|'.join(FILTER_PATTERNS))\n\n\ndef find_tectonic():\n \"\"\"\n Find tectonic executable.\n Priority:\n 1. ~/tectonic (user home directory)\n 2. tectonic in system PATH\n \"\"\"\n # Check home directory\n home_tectonic = Path.home() / 'tectonic'\n if home_tectonic.exists() and os.access(home_tectonic, os.X_OK):\n return str(home_tectonic)\n\n # Check PATH\n tectonic_path = shutil.which('tectonic')\n if tectonic_path:\n return tectonic_path\n\n return None\n\n\ndef format_size(size_bytes):\n \"\"\"Format file size to human readable string\"\"\"\n for unit in ['B', 'KB', 'MB', 'GB']:\n if size_bytes \u003c 1024.0:\n return f\"{size_bytes:.2f} {unit}\"\n size_bytes /= 1024.0\n return f\"{size_bytes:.2f} TB\"\n\n\ndef extract_pdf_info(pdf_path):\n \"\"\"\n Extract PDF info: pages, word count, image count.\n Uses pypdf library for accurate statistics.\n \"\"\"\n try:\n from pypdf import PdfReader\n except ImportError:\n # If pypdf not installed, try to install\n print(\"Installing pypdf library...\", file=sys.stderr)\n # Try different install methods\n for install_cmd in [\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"pypdf\"],\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"--break-system-packages\", \"pypdf\"],\n [sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"--user\", \"pypdf\"],\n ]:\n result = subprocess.run(install_cmd, check=False, capture_output=True)\n if result.returncode == 0:\n break\n\n try:\n from pypdf import PdfReader\n except ImportError:\n return None, None, None\n\n try:\n reader = PdfReader(pdf_path)\n\n # Page count\n num_pages = len(reader.pages)\n\n # Extract text and count words\n text = \"\"\n for page in reader.pages:\n text += page.extract_text()\n\n # Word count (excluding whitespace)\n word_count = len([w for w in text.split() if w.strip()])\n\n # Image count (count /Image objects)\n image_count = 0\n for page in reader.pages:\n if '/XObject' in page['/Resources']:\n xobjects = page['/Resources']['/XObject'].get_object()\n for obj in xobjects:\n if xobjects[obj]['/Subtype'] == '/Image':\n image_count += 1\n\n return num_pages, word_count, image_count\n\n except Exception as e:\n print(f\"Error extracting PDF info: {e}\", file=sys.stderr)\n return None, None, None\n\n\ndef filter_logs(output_lines):\n \"\"\"\n Filter logs and return:\n - errors list\n - warnings list\n - layout issues list\n - PDF file info line\n \"\"\"\n errors = []\n warnings = []\n layout_issues = []\n pdf_info_line = None\n\n for line in output_lines:\n line = line.rstrip()\n\n # Skip empty lines\n if not line:\n continue\n\n # Check if line should be filtered\n if filter_regex.match(line):\n # Check for PDF output info\n if line.startswith('note: Writing'):\n pdf_info_line = line\n continue\n\n # Collect errors\n if line.startswith('error:'):\n errors.append(line)\n\n # Collect warnings\n elif line.startswith('warning:'):\n warnings.append(line)\n\n # Collect layout issues (Overfull/Underfull hbox/vbox)\n elif re.search(r'(Overfull|Underfull) \\\\[hv]box', line):\n layout_issues.append(line)\n\n # Collect font-related issues\n elif re.search(r'(Font shape|Missing character)', line):\n layout_issues.append(line)\n\n return errors, warnings, layout_issues, pdf_info_line\n\n\ndef parse_pdf_info_line(line):\n \"\"\"Parse PDF filename and size from log line\"\"\"\n # note: Writing `test1_simple.pdf` (22.77 KiB)\n match = re.search(r\"Writing `(.+?)` \$(.+?)\$\", line)\n if match:\n return match.group(1), match.group(2)\n return None, None\n\n\ndef compile_latex(tex_file, runs=1, keep_logs=False):\n \"\"\"\n Compile LaTeX file.\n\n Args:\n tex_file: TeX file path\n runs: Number of compilation runs (for cross-references)\n keep_logs: Whether to keep full logs\n \"\"\"\n tex_path = Path(tex_file)\n\n if not tex_path.exists():\n print(f\"✗ Error: File not found {tex_file}\")\n return 1\n\n print(f\"Compiling {tex_path.name}...\", flush=True)\n if runs > 1:\n print(f\"Running {runs} passes (for cross-references)\", flush=True)\n\n # Collect all compilation output\n all_output = []\n success = False\n\n # Find tectonic command\n tectonic_cmd = find_tectonic()\n if not tectonic_cmd:\n print(\"\\n✗ Error: tectonic command not found\")\n print(\"Please install tectonic: https://tectonic-typesetting.github.io/\")\n print(\"\\nHint: If installed at ~/tectonic, ensure it has execute permission:\")\n print(\" chmod +x ~/tectonic\")\n return 1\n\n # Multiple compilation passes\n for run in range(runs):\n try:\n result = subprocess.run(\n [tectonic_cmd, '-X', 'compile', str(tex_path)],\n capture_output=True,\n text=True,\n timeout=120 # 2 minute timeout\n )\n\n # Merge stdout and stderr\n output = result.stdout + result.stderr\n all_output.extend(output.splitlines())\n\n if result.returncode == 0:\n success = True\n else:\n success = False\n break\n\n except subprocess.TimeoutExpired:\n print(\"\\n✗ Error: Compilation timeout (>2 minutes)\")\n return 1\n except Exception as e:\n print(f\"\\n✗ Error: {e}\")\n return 1\n\n # If user requested full logs\n if keep_logs:\n print(\"\\n\" + \"=\"*50)\n print(\"Full logs:\")\n print(\"=\"*50)\n for line in all_output:\n print(line)\n print(\"=\"*50 + \"\\n\")\n\n # Filter logs\n errors, warnings, layout_issues, pdf_info_line = filter_logs(all_output)\n\n # Parse PDF info\n pdf_filename = None\n pdf_size_str = None\n if pdf_info_line:\n pdf_filename, pdf_size_str = parse_pdf_info_line(pdf_info_line)\n\n # If filename not found in logs, infer from input\n if not pdf_filename:\n pdf_filename = tex_path.stem + '.pdf'\n\n pdf_path = tex_path.parent / pdf_filename\n\n # Output results\n print()\n if success:\n if warnings or layout_issues:\n print(\"✓ Compilation successful (with warnings)\")\n else:\n print(\"✓ Compilation successful\")\n else:\n print(\"✗ Compilation failed\")\n\n # Output PDF info\n if success and pdf_path.exists():\n print()\n print(\"========================\")\n print(\"PDF Information\")\n print(\"========================\")\n print(f\"File: {pdf_filename}\")\n\n # File size\n if pdf_size_str:\n print(f\"Size: {pdf_size_str}\")\n else:\n size = pdf_path.stat().st_size\n print(f\"Size: {format_size(size)}\")\n\n # Extract page count, word count, image count\n num_pages, word_count, image_count = extract_pdf_info(str(pdf_path))\n\n if num_pages is not None:\n print(f\"Pages: {num_pages}\")\n if word_count is not None:\n print(f\"Words: ~{word_count:,}\")\n if image_count is not None:\n print(f\"Images: {image_count}\")\n\n # Output layout issues\n if layout_issues:\n print()\n print(\"========================\")\n print(f\"Layout Issues ({len(layout_issues)})\")\n print(\"========================\")\n for issue in layout_issues:\n print(issue)\n\n # Output warnings\n if warnings:\n print()\n print(\"========================\")\n print(f\"Warnings ({len(warnings)})\")\n print(\"========================\")\n for warning in warnings:\n # Remove \"warning: \" prefix\n clean_warning = warning.replace('warning: ', '', 1)\n print(clean_warning)\n\n # Output errors\n if errors:\n print()\n print(\"========================\")\n print(\"Errors\")\n print(\"========================\")\n for error in errors:\n # Remove \"error: \" prefix\n clean_error = error.replace('error: ', '', 1)\n print(clean_error)\n\n # Output system reminder\n if success and (layout_issues or warnings):\n print()\n print(\"\u003csystem-reminder>\")\n print(f\"Detected {len(layout_issues)} layout issues and {len(warnings)} warnings.\")\n print(\"These issues affect PDF typesetting quality and must be fixed.\")\n print(\"Do not dismiss with 'warnings don't affect output'. Fix all issues.\")\n print(\"\u003c/system-reminder>\")\n\n return 0 if success else 1\n\n\ndef main():\n parser = argparse.ArgumentParser(\n description='Compile LaTeX file, filter logs, and report PDF stats'\n )\n parser.add_argument('tex_file', help='TeX file path')\n parser.add_argument('--runs', type=int, default=1,\n help='Number of compilation runs (for cross-references, default: 1)')\n parser.add_argument('--keep-logs', action='store_true',\n help='Keep full compilation logs')\n\n args = parser.parse_args()\n\n return compile_latex(args.tex_file, args.runs, args.keep_logs)\n\n\nif __name__ == '__main__':\n sys.exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10204,"content_sha256":"cc54af5e83afbac197fbfa099d828e61b8d6c44f2059bf2efb7dbaa53ca26f79"},{"filename":"scripts/package-lock.json","content":"{\n \"name\": \"scripts\",\n \"lockfileVersion\": 3,\n \"requires\": true,\n \"packages\": {\n \"\": {\n \"dependencies\": {\n \"playwright\": \"^1.58.1\"\n }\n },\n \"node_modules/fsevents\": {\n \"version\": \"2.3.2\",\n \"resolved\": \"https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz\",\n \"integrity\": \"sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==\",\n \"hasInstallScript\": true,\n \"license\": \"MIT\",\n \"optional\": true,\n \"os\": [\n \"darwin\"\n ],\n \"engines\": {\n \"node\": \"^8.16.0 || ^10.6.0 || >=11.0.0\"\n }\n },\n \"node_modules/playwright\": {\n \"version\": \"1.58.1\",\n \"resolved\": \"https://registry.npmjs.org/playwright/-/playwright-1.58.1.tgz\",\n \"integrity\": \"sha512-+2uTZHxSCcxjvGc5C891LrS1/NlxglGxzrC4seZiVjcYVQfUa87wBL6rTDqzGjuoWNjnBzRqKmF6zRYGMvQUaQ==\",\n \"license\": \"Apache-2.0\",\n \"dependencies\": {\n \"playwright-core\": \"1.58.1\"\n },\n \"bin\": {\n \"playwright\": \"cli.js\"\n },\n \"engines\": {\n \"node\": \">=18\"\n },\n \"optionalDependencies\": {\n \"fsevents\": \"2.3.2\"\n }\n },\n \"node_modules/playwright-core\": {\n \"version\": \"1.58.1\",\n \"resolved\": \"https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.1.tgz\",\n \"integrity\": \"sha512-bcWzOaTxcW+VOOGBCQgnaKToLJ65d6AqfLVKEWvexyS3AS6rbXl+xdpYRMGSRBClPvyj44njOWoxjNdL/H9UNg==\",\n \"license\": \"Apache-2.0\",\n \"bin\": {\n \"playwright-core\": \"cli.js\"\n },\n \"engines\": {\n \"node\": \">=18\"\n }\n }\n }\n}\n","content_type":"application/json; charset=utf-8","language":"json","size":1591,"content_sha256":"514f2fca37d07282e4bb2addade996058e4acdedf427ae7406440858dbf81899"},{"filename":"scripts/package.json","content":"{\n \"type\": \"commonjs\",\n \"dependencies\": {\n \"playwright\": \"^1.58.1\"\n }\n}\n","content_type":"application/json; charset=utf-8","language":"json","size":78,"content_sha256":"3dc6c686470c40aaf810642a67410ecb26bbb635ac5d192ad1e8bbc122c19362"},{"filename":"scripts/pdf.py","content":"#!/usr/bin/env python3\n\"\"\"\nPDF Processing Tool - Unified Entry Point\n\nUsage:\n pdf.py form info \u003cpdf> View form fields\n pdf.py form fill \u003cpdf> -o \u003cout> -d \u003cjson> Fill form\n pdf.py extract text \u003cpdf> Extract text\n pdf.py extract table \u003cpdf> Extract tables\n pdf.py extract image \u003cpdf> -o \u003cdir> Extract images\n pdf.py pages merge \u003cpdf>... -o \u003cout> Merge PDFs\n pdf.py pages split \u003cpdf> -o \u003cdir> Split PDF\n pdf.py pages rotate \u003cpdf> \u003cdeg> -o \u003cout> Rotate pages\n pdf.py pages crop \u003cpdf> \u003cbox> -o \u003cout> Crop pages\n pdf.py meta get \u003cpdf> Read metadata\n pdf.py meta set \u003cpdf> -o \u003cout> -d \u003cjson> Set metadata\n pdf.py convert \u003cfile> -o \u003cout> Convert to PDF\n\"\"\"\n\nimport argparse\nimport json\nimport sys\nfrom pathlib import Path\n\n# Add script directory to path for relative imports\nsys.path.insert(0, str(Path(__file__).parent))\n\n# Output utilities\nclass Output:\n \"\"\"Unified output handling\"\"\"\n\n @staticmethod\n def success(data: dict):\n \"\"\"Success output to stdout\"\"\"\n print(json.dumps({\"status\": \"success\", \"data\": data}, ensure_ascii=False, indent=2))\n sys.exit(0)\n\n @staticmethod\n def error(error: str, message: str, hint: str = None, code: int = 1):\n \"\"\"Error output to stderr\"\"\"\n result = {\"status\": \"error\", \"error\": error, \"message\": message}\n if hint:\n result[\"hint\"] = hint\n print(json.dumps(result, ensure_ascii=False, indent=2), file=sys.stderr)\n sys.exit(code)\n\n @staticmethod\n def check_file(path: str) -> Path:\n \"\"\"Check if file exists\"\"\"\n p = Path(path)\n if not p.exists():\n Output.error(\"FileNotFound\", f\"File not found: {path}\", code=2)\n return p\n\n @staticmethod\n def safe_output_path(output_path: str) -> Path:\n \"\"\"Validate output path to prevent path traversal.\n\n Ensures the resolved output path stays within the current\n working directory. Rejects absolute paths and any path that\n resolves outside cwd (e.g. ../../etc/passwd).\n \"\"\"\n p = Path(output_path)\n cwd = Path.cwd().resolve()\n resolved = (cwd / p).resolve() if not p.is_absolute() else p.resolve()\n\n if not str(resolved).startswith(str(cwd)):\n Output.error(\n \"PathTraversal\",\n f\"Output path escapes working directory: {output_path}\",\n hint=\"Use a relative path within the current directory\"\n )\n return resolved\n\n\n# ============ form commands ============\ndef cmd_form_info(args):\n \"\"\"View form fields\"\"\"\n from cmd_form import form_info\n form_info(args.pdf)\n\n\ndef cmd_form_fill(args):\n \"\"\"Fill form\"\"\"\n from cmd_form import form_fill\n\n # Parse data\n if args.data:\n try:\n data = json.loads(args.data)\n except json.JSONDecodeError as e:\n Output.error(\"InvalidJSON\", f\"JSON parse error: {e}\")\n elif args.file:\n try:\n with open(args.file) as f:\n data = json.load(f)\n except Exception as e:\n Output.error(\"FileError\", f\"Failed to read file: {e}\")\n else:\n Output.error(\"MissingData\", \"Requires --data or --file argument\")\n\n form_fill(args.pdf, args.output, data)\n\n\n# ============ extract commands ============\ndef cmd_extract_text(args):\n \"\"\"Extract text\"\"\"\n from cmd_extract import extract_text\n extract_text(args.pdf, pages=args.pages)\n\n\ndef cmd_extract_table(args):\n \"\"\"Extract tables\"\"\"\n from cmd_extract import extract_table\n extract_table(args.pdf, pages=args.pages)\n\n\ndef cmd_extract_image(args):\n \"\"\"Extract images\"\"\"\n from cmd_extract import extract_image\n extract_image(args.pdf, args.output)\n\n\n# ============ pages commands ============\ndef cmd_pages_merge(args):\n \"\"\"Merge PDFs\"\"\"\n from cmd_pages import pages_merge\n pages_merge(args.pdfs, args.output)\n\n\ndef cmd_pages_split(args):\n \"\"\"Split PDF\"\"\"\n from cmd_pages import pages_split\n pages_split(args.pdf, args.output)\n\n\ndef cmd_pages_rotate(args):\n \"\"\"Rotate pages\"\"\"\n from cmd_pages import pages_rotate\n pages_rotate(args.pdf, args.degrees, args.output, pages=args.pages)\n\n\ndef cmd_pages_crop(args):\n \"\"\"Crop pages\"\"\"\n from cmd_pages import pages_crop\n pages_crop(args.pdf, args.box, args.output, pages=args.pages)\n\n\n# ============ meta commands ============\ndef cmd_meta_get(args):\n \"\"\"Read metadata\"\"\"\n from cmd_meta import meta_get\n meta_get(args.pdf)\n\n\ndef cmd_meta_set(args):\n \"\"\"Set metadata\"\"\"\n from cmd_meta import meta_set\n\n if args.data:\n try:\n data = json.loads(args.data)\n except json.JSONDecodeError as e:\n Output.error(\"InvalidJSON\", f\"JSON parse error: {e}\")\n else:\n Output.error(\"MissingData\", \"Requires --data argument\")\n\n meta_set(args.pdf, args.output, data)\n\n\n# ============ convert command ============\ndef cmd_convert(args):\n \"\"\"Convert to PDF\"\"\"\n from cmd_convert import convert_to_pdf\n convert_to_pdf(args.file, args.output)\n\n\n# ============ main entry ============\ndef main():\n parser = argparse.ArgumentParser(\n description=\"PDF Processing Tool\",\n formatter_class=argparse.RawDescriptionHelpFormatter\n )\n subparsers = parser.add_subparsers(dest=\"command\", help=\"Available commands\")\n\n # --- form ---\n form_parser = subparsers.add_parser(\"form\", help=\"Form operations\")\n form_sub = form_parser.add_subparsers(dest=\"subcommand\")\n\n form_info_p = form_sub.add_parser(\"info\", help=\"View form fields\")\n form_info_p.add_argument(\"pdf\", help=\"PDF file\")\n form_info_p.set_defaults(func=cmd_form_info)\n\n form_fill_p = form_sub.add_parser(\"fill\", help=\"Fill form\")\n form_fill_p.add_argument(\"pdf\", help=\"Input PDF\")\n form_fill_p.add_argument(\"-o\", \"--output\", required=True, help=\"Output PDF\")\n form_fill_p.add_argument(\"-d\", \"--data\", help=\"Field values in JSON format\")\n form_fill_p.add_argument(\"-f\", \"--file\", help=\"JSON file path\")\n form_fill_p.set_defaults(func=cmd_form_fill)\n\n # --- extract ---\n extract_parser = subparsers.add_parser(\"extract\", help=\"Content extraction\")\n extract_sub = extract_parser.add_subparsers(dest=\"subcommand\")\n\n extract_text_p = extract_sub.add_parser(\"text\", help=\"Extract text\")\n extract_text_p.add_argument(\"pdf\", help=\"PDF file\")\n extract_text_p.add_argument(\"-p\", \"--pages\", help=\"Page range, e.g., 1-3,5\")\n extract_text_p.set_defaults(func=cmd_extract_text)\n\n extract_table_p = extract_sub.add_parser(\"table\", help=\"Extract tables\")\n extract_table_p.add_argument(\"pdf\", help=\"PDF file\")\n extract_table_p.add_argument(\"-p\", \"--pages\", help=\"Page range, e.g., 1-3,5\")\n extract_table_p.set_defaults(func=cmd_extract_table)\n\n extract_image_p = extract_sub.add_parser(\"image\", help=\"Extract images\")\n extract_image_p.add_argument(\"pdf\", help=\"PDF file\")\n extract_image_p.add_argument(\"-o\", \"--output\", default=\".\", help=\"Output directory\")\n extract_image_p.set_defaults(func=cmd_extract_image)\n\n # --- pages ---\n pages_parser = subparsers.add_parser(\"pages\", help=\"Page operations\")\n pages_sub = pages_parser.add_subparsers(dest=\"subcommand\")\n\n pages_merge_p = pages_sub.add_parser(\"merge\", help=\"Merge PDFs\")\n pages_merge_p.add_argument(\"pdfs\", nargs=\"+\", help=\"PDF files to merge\")\n pages_merge_p.add_argument(\"-o\", \"--output\", required=True, help=\"Output PDF\")\n pages_merge_p.set_defaults(func=cmd_pages_merge)\n\n pages_split_p = pages_sub.add_parser(\"split\", help=\"Split PDF\")\n pages_split_p.add_argument(\"pdf\", help=\"PDF file\")\n pages_split_p.add_argument(\"-o\", \"--output\", default=\".\", help=\"Output directory\")\n pages_split_p.set_defaults(func=cmd_pages_split)\n\n pages_rotate_p = pages_sub.add_parser(\"rotate\", help=\"Rotate pages\")\n pages_rotate_p.add_argument(\"pdf\", help=\"PDF file\")\n pages_rotate_p.add_argument(\"degrees\", type=int, choices=[90, 180, 270], help=\"Rotation angle\")\n pages_rotate_p.add_argument(\"-o\", \"--output\", required=True, help=\"Output PDF\")\n pages_rotate_p.add_argument(\"-p\", \"--pages\", help=\"Page range, e.g., 1-3,5 (default: all)\")\n pages_rotate_p.set_defaults(func=cmd_pages_rotate)\n\n pages_crop_p = pages_sub.add_parser(\"crop\", help=\"Crop pages\")\n pages_crop_p.add_argument(\"pdf\", help=\"PDF file\")\n pages_crop_p.add_argument(\"box\", help=\"Crop box left,bottom,right,top (in pt)\")\n pages_crop_p.add_argument(\"-o\", \"--output\", required=True, help=\"Output PDF\")\n pages_crop_p.add_argument(\"-p\", \"--pages\", help=\"Page range (default: all)\")\n pages_crop_p.set_defaults(func=cmd_pages_crop)\n\n # --- meta ---\n meta_parser = subparsers.add_parser(\"meta\", help=\"Metadata operations\")\n meta_sub = meta_parser.add_subparsers(dest=\"subcommand\")\n\n meta_get_p = meta_sub.add_parser(\"get\", help=\"Read metadata\")\n meta_get_p.add_argument(\"pdf\", help=\"PDF file\")\n meta_get_p.set_defaults(func=cmd_meta_get)\n\n meta_set_p = meta_sub.add_parser(\"set\", help=\"Set metadata\")\n meta_set_p.add_argument(\"pdf\", help=\"Input PDF\")\n meta_set_p.add_argument(\"-o\", \"--output\", required=True, help=\"Output PDF\")\n meta_set_p.add_argument(\"-d\", \"--data\", required=True, help=\"Metadata in JSON format\")\n meta_set_p.set_defaults(func=cmd_meta_set)\n\n # --- convert ---\n convert_parser = subparsers.add_parser(\"convert\", help=\"Convert to PDF\")\n convert_parser.add_argument(\"file\", help=\"Input file (docx/pptx/xlsx, etc.)\")\n convert_parser.add_argument(\"-o\", \"--output\", help=\"Output PDF (default: same name)\")\n convert_parser.set_defaults(func=cmd_convert)\n\n # Parse and execute\n args = parser.parse_args()\n\n if not args.command:\n parser.print_help()\n sys.exit(0)\n\n if hasattr(args, 'func'):\n args.func(args)\n else:\n parser.parse_args([args.command, \"-h\"])\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10000,"content_sha256":"cc83373eab2019eba3763b8a9cfc2d1c3ba0b688af4c2b1fa2a95d681dec5722"},{"filename":"scripts/pdf.sh","content":"#!/usr/bin/env bash\n# Unified PDF Skill CLI (aligns with SKILL.md Quick Start)\nset -euo pipefail\n\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\nSETUP_SCRIPT=\"$SCRIPT_DIR/setup.sh\"\nHTML_SCRIPT=\"$SCRIPT_DIR/html_to_pdf.js\"\nLATEX_SCRIPT=\"$SCRIPT_DIR/compile_latex.py\"\nPROCESS_SCRIPT=\"$SCRIPT_DIR/pdf.py\"\n\nusage() {\n cat \u003c\u003c'EOF'\nUsage: pdf.sh \u003ccommand> [options]\n\nCommands:\n check [--json] Run environment diagnostics (setup.sh)\n fix Install/repair dependencies (npm playwright, chromium, pip deps)\n html \u003cargs...> Convert HTML to PDF (delegates to html_to_pdf.js)\n latex \u003cargs...> Compile LaTeX with compile_latex.py\n process \u003cargs...> Run python pdf.py process commands (form/extract/pages/...)\nEOF\n exit 1\n}\n\nappend_node_path() {\n local global_root\n if command -v npm &>/dev/null; then\n global_root=$(npm root -g 2>/dev/null || true)\n if [[ -n \"$global_root\" ]]; then\n if [[ -z \"${NODE_PATH:-}\" ]]; then\n export NODE_PATH=\"$global_root\"\n elif [[ \":$NODE_PATH:\" != *\":$global_root:\"* ]]; then\n export NODE_PATH=\"$NODE_PATH:$global_root\"\n fi\n fi\n fi\n}\n\ncmd_check() {\n set +e\n \"$SETUP_SCRIPT\" \"$@\"\n local rc=$?\n set -e\n exit \"$rc\"\n}\n\ncmd_fix() {\n local rc=0\n\n # Pinned dependency versions for reproducible, auditable installs\n local PLAYWRIGHT_VERSION=\"1.58.1\"\n local PIKEPDF_VERSION=\"9.7.0\"\n local PDFPLUMBER_VERSION=\"0.11.6\"\n\n if command -v npm &>/dev/null; then\n echo \"Installing Playwright@${PLAYWRIGHT_VERSION} (global)...\"\n if ! npm install -g \"playwright@${PLAYWRIGHT_VERSION}\" >/dev/null; then\n echo \"Failed to install Playwright via npm.\"\n rc=3\n fi\n\n echo \"Installing Chromium browser...\"\n if ! npx playwright install chromium >/dev/null; then\n echo \"Failed to install Chromium via Playwright.\"\n rc=3\n fi\n else\n echo \"npm not found; cannot install Playwright automatically.\"\n rc=2\n fi\n\n if command -v python3 &>/dev/null; then\n echo \"Installing Python dependencies (pikepdf==${PIKEPDF_VERSION}, pdfplumber==${PDFPLUMBER_VERSION})...\"\n if ! python3 -m pip install --user \"pikepdf==${PIKEPDF_VERSION}\" \"pdfplumber==${PDFPLUMBER_VERSION}\" >/dev/null; then\n echo \"Failed to install Python dependencies.\"\n rc=3\n fi\n else\n echo \"python3 not found; cannot install PDF processing dependencies.\"\n rc=2\n fi\n\n echo \"Checking environment after fix...\"\n if ! \"$SETUP_SCRIPT\" >/dev/null; then\n rc=3\n fi\n\n exit \"$rc\"\n}\n\ncmd_html() {\n if [[ $# -eq 0 ]]; then\n echo \"Missing html command arguments.\"\n usage\n fi\n if ! command -v node &>/dev/null; then\n echo \"node not found; run pdf.sh fix first.\"\n exit 2\n fi\n append_node_path\n if ! node \"$HTML_SCRIPT\" \"$@\"; then\n exit 3\n fi\n}\n\ncmd_latex() {\n if [[ $# -eq 0 ]]; then\n echo \"Missing latex command arguments.\"\n usage\n fi\n if ! command -v python3 &>/dev/null; then\n echo \"python3 not found; run pdf.sh fix first.\"\n exit 2\n fi\n if ! python3 \"$LATEX_SCRIPT\" \"$@\"; then\n exit 3\n fi\n}\n\ncmd_process() {\n if [[ $# -eq 0 ]]; then\n echo \"Missing process command arguments.\"\n usage\n fi\n if ! command -v python3 &>/dev/null; then\n echo \"python3 not found; run pdf.sh fix first.\"\n exit 2\n fi\n if ! python3 \"$PROCESS_SCRIPT\" \"$@\"; then\n exit 3\n fi\n}\n\nmain() {\n if [[ $# -lt 1 ]]; then\n usage\n fi\n\n local command=\"$1\"\n shift || true\n\n case \"$command\" in\n check)\n cmd_check \"$@\"\n ;;\n fix)\n if [[ $# -ne 0 ]]; then\n usage\n fi\n cmd_fix\n ;;\n html)\n cmd_html \"$@\"\n ;;\n latex)\n cmd_latex \"$@\"\n ;;\n process)\n cmd_process \"$@\"\n ;;\n *)\n usage\n ;;\n esac\n}\n\nmain \"$@\"\n","content_type":"application/x-sh; charset=utf-8","language":"bash","size":3743,"content_sha256":"7072349824a0b69a34b41441deb2aa4c2c6c19a0ef6c576c198475b7c8c49454"},{"filename":"scripts/setup.sh","content":"#!/usr/bin/env bash\n# PDF Skill Environment Check\n# Usage: setup.sh [--json]\n#\n# This script checks environment status AND version compatibility.\n# It does NOT auto-install anything.\n# Agent decides whether to install based on output.\n#\n# Key checks:\n# 1. Required dependencies (node, playwright, chromium, python3, etc.)\n# 2. Playwright-Chromium version compatibility (CRITICAL)\n# 3. Node.js and Python Playwright version sync (if both installed)\n\nset -e\n\nSCRIPT_DIR=\"$(cd \"$(dirname \"${BASH_SOURCE[0]}\")\" && pwd)\"\n\n# ============================================================================\n# Dependency Detection\n# ============================================================================\n\ncheck_node() {\n if command -v node &>/dev/null; then\n echo \"ok:$(node --version 2>/dev/null | sed 's/v//')\"\n else\n echo \"missing\"\n fi\n}\n\n# Get installed Playwright npm version\nget_playwright_npm_version() {\n # Try global first\n local ver=$(npm list -g playwright 2>/dev/null | grep playwright@ | sed 's/.*@//' | head -1)\n if [[ -n \"$ver\" ]]; then\n echo \"$ver\"\n return\n fi\n # Try npx\n ver=$(npx playwright --version 2>/dev/null | grep -oE '[0-9]+\\.[0-9]+\\.[0-9]+' | head -1)\n if [[ -n \"$ver\" ]]; then\n echo \"$ver\"\n return\n fi\n echo \"\"\n}\n\n# Get installed Playwright Python version\nget_playwright_python_version() {\n python3 -c \"import playwright; print(playwright.__version__)\" 2>/dev/null || echo \"\"\n}\n\n# Get Chromium revision from installed browser\nget_installed_chromium_revision() {\n local cache=\"\"\n case \"$(uname -s)\" in\n Darwin) cache=\"$HOME/Library/Caches/ms-playwright\" ;;\n *) cache=\"$HOME/.cache/ms-playwright\" ;;\n esac\n\n if [[ ! -d \"$cache\" ]]; then\n echo \"\"\n return\n fi\n\n # Find chromium directories - supports multiple naming conventions:\n # - chromium-XXXX (older format)\n # - chrome-XXXX\n # - chromium_headless_shell-XXXX (newer format)\n # Get the highest revision number\n local revision=$(ls -1 \"$cache\" 2>/dev/null | grep -oE '(chromium|chrome|chromium_headless_shell)-[0-9]+' | grep -oE '[0-9]+

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

| sort -rn | head -1)\n echo \"$revision\"\n}\n\n# Get expected Chromium revision from Playwright\nget_expected_chromium_revision() {\n # Run playwright install with dry-run to see what it expects\n local output=$(npx playwright install chromium --dry-run 2>&1 || true)\n\n # If already installed, it says \"chromium-XXXX is already installed\" or similar\n # Supports: chromium-XXXX, chrome-XXXX, chromium_headless_shell-XXXX\n local already=$(echo \"$output\" | grep -oE '(chromium|chrome|chromium_headless_shell)-[0-9]+' | grep -oE '[0-9]+

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

| head -1)\n if [[ -n \"$already\" ]]; then\n echo \"$already\"\n return\n fi\n\n # If needs download, extract from \"playwright build vXXXX\"\n local build=$(echo \"$output\" | grep -oE 'playwright build v[0-9]+' | sed 's/playwright build v//' | head -1)\n if [[ -n \"$build\" ]]; then\n echo \"$build\"\n return\n fi\n\n echo \"\"\n}\n\n# Verify browser is actually usable by checking executablePath exists\n# This is more reliable than directory checks because it uses the same\n# require('playwright') resolution that html_to_pdf.js uses\nverify_browser_usable() {\n # Require node to be installed\n if ! command -v node &>/dev/null; then\n echo \"error:node not installed\"\n return\n fi\n\n local helper=\"$SCRIPT_DIR/browser_helper.js\"\n if [[ ! -f \"$helper\" ]]; then\n echo \"error:helper missing\"\n return\n fi\n\n local output\n output=$(node \"$helper\" 2>/dev/null)\n local code=$?\n\n if [[ -z \"$output\" ]]; then\n if [[ $code -eq 0 ]]; then\n echo \"error:unknown\"\n else\n echo \"error:browser probe failed\"\n fi\n else\n echo \"$output\"\n fi\n}\n\ncheck_playwright() {\n local ver=$(get_playwright_npm_version)\n if [[ -n \"$ver\" ]]; then\n echo \"ok:$ver\"\n else\n echo \"missing\"\n fi\n}\n\ncheck_chromium() {\n local revision=$(get_installed_chromium_revision)\n if [[ -n \"$revision\" ]]; then\n echo \"ok:build-$revision\"\n else\n echo \"missing\"\n fi\n}\n\n# Check if Playwright and Chromium versions match\ncheck_playwright_chromium_match() {\n local playwright_ver=$(get_playwright_npm_version)\n if [[ -z \"$playwright_ver\" ]]; then\n echo \"playwright_missing\"\n return\n fi\n\n local installed=$(get_installed_chromium_revision)\n if [[ -z \"$installed\" ]]; then\n echo \"chromium_missing\"\n return\n fi\n\n local expected=$(get_expected_chromium_revision)\n if [[ -z \"$expected\" ]]; then\n # Can't determine expected version, assume ok if both exist\n echo \"ok:unknown\"\n return\n fi\n\n if [[ \"$installed\" == \"$expected\" ]]; then\n echo \"ok:$installed\"\n else\n echo \"mismatch:have=$installed,need=$expected\"\n fi\n}\n\n# Check if Node.js and Python Playwright versions match (if both installed)\ncheck_playwright_versions_sync() {\n local npm_ver=$(get_playwright_npm_version)\n local py_ver=$(get_playwright_python_version)\n\n if [[ -z \"$npm_ver\" ]] && [[ -z \"$py_ver\" ]]; then\n echo \"none_installed\"\n return\n fi\n\n if [[ -z \"$npm_ver\" ]] || [[ -z \"$py_ver\" ]]; then\n # Only one installed, no sync issue\n echo \"ok:single\"\n return\n fi\n\n # Compare major.minor (ignore patch for flexibility)\n local npm_major_minor=$(echo \"$npm_ver\" | cut -d. -f1,2)\n local py_major_minor=$(echo \"$py_ver\" | cut -d. -f1,2)\n\n if [[ \"$npm_major_minor\" == \"$py_major_minor\" ]]; then\n echo \"ok:$npm_ver\"\n else\n echo \"mismatch:npm=$npm_ver,python=$py_ver\"\n fi\n}\n\ncheck_python() {\n if command -v python3 &>/dev/null; then\n echo \"ok:$(python3 --version 2>/dev/null | cut -d' ' -f2)\"\n else\n echo \"missing\"\n fi\n}\n\ncheck_pymod() {\n local mod=\"$1\"\n local ver=$(python3 -c \"import $mod; print(getattr($mod, '__version__', 'installed'))\" 2>/dev/null)\n if [[ -n \"$ver\" ]]; then\n echo \"ok:$ver\"\n else\n echo \"missing\"\n fi\n}\n\ncheck_libreoffice() {\n local paths=(\"soffice\" \"libreoffice\" \"/Applications/LibreOffice.app/Contents/MacOS/soffice\")\n for p in \"${paths[@]}\"; do\n if command -v \"$p\" &>/dev/null || [[ -x \"$p\" ]]; then\n echo \"ok\"\n return\n fi\n done\n echo \"missing\"\n}\n\ncheck_tectonic() {\n if command -v tectonic &>/dev/null || [[ -x \"$HOME/tectonic\" ]]; then\n echo \"ok\"\n else\n echo \"missing\"\n fi\n}\n\n# ============================================================================\n# Status helpers\n# ============================================================================\n\nstatus_is_ok() {\n local status=\"$1\"\n [[ \"$status\" == ok* ]]\n}\n\nstatus_state() {\n local status=\"$1\"\n if [[ -z \"$status\" ]]; then\n echo \"\"\n elif [[ \"$status\" == *:* ]]; then\n echo \"${status%%:*}\"\n else\n echo \"$status\"\n fi\n}\n\nstatus_detail() {\n local status=\"$1\"\n if [[ \"$status\" == *:* ]]; then\n echo \"${status#*:}\"\n else\n echo \"\"\n fi\n}\n\ncollect_statuses() {\n NODE_STATUS=$(check_node)\n PLAYWRIGHT_STATUS=$(check_playwright)\n CHROMIUM_STATUS=$(check_chromium)\n BROWSER_STATUS=$(verify_browser_usable)\n MATCH_STATUS=$(check_playwright_chromium_match)\n SYNC_STATUS=$(check_playwright_versions_sync)\n PYTHON_STATUS=$(check_python)\n PIKEPDF_STATUS=$(check_pymod pikepdf)\n PDFPLUMBER_STATUS=$(check_pymod pdfplumber)\n LIBREOFFICE_STATUS=$(check_libreoffice)\n TECTONIC_STATUS=$(check_tectonic)\n PLAYWRIGHT_PY_VERSION=$(get_playwright_python_version)\n CHROMIUM_REVISION=$(get_installed_chromium_revision)\n}\n\n# ============================================================================\n# Output\n# ============================================================================\n\nshow_status() {\n local name=\"$1\" status=\"$2\" optional=\"$3\"\n local state=$(echo \"$status\" | cut -d: -f1)\n local detail=$(echo \"$status\" | cut -d: -f2-)\n\n if [[ \"$state\" == \"ok\" ]]; then\n if [[ \"$detail\" != \"ok\" && -n \"$detail\" ]]; then\n echo \"✓ $name ($detail)\"\n else\n echo \"✓ $name\"\n fi\n elif [[ \"$state\" == \"mismatch\" ]]; then\n echo \"⚠ $name (MISMATCH: $detail)\"\n elif [[ \"$optional\" == \"optional\" ]]; then\n echo \"○ $name (optional, not installed)\"\n else\n echo \"✗ $name (missing)\"\n fi\n}\n\ncmd_check() {\n collect_statuses\n local exit_code=0\n\n echo \"=== PDF Skill Environment ===\"\n echo \"\"\n echo \"--- HTML Route ---\"\n show_status \"node\" \"$NODE_STATUS\"\n status_is_ok \"$NODE_STATUS\" || exit_code=2\n show_status \"playwright\" \"$PLAYWRIGHT_STATUS\"\n status_is_ok \"$PLAYWRIGHT_STATUS\" || exit_code=2\n show_status \"chromium\" \"$CHROMIUM_STATUS\"\n\n # Direct browser usability check (most reliable)\n local browser_state\n browser_state=$(status_state \"$BROWSER_STATUS\")\n local browser_detail\n browser_detail=$(status_detail \"$BROWSER_STATUS\")\n local browser_fallback=0\n\n if [[ \"$browser_state\" == \"ok\" ]]; then\n echo \"✓ browser executable verified\"\n elif [[ \"$browser_state\" == \"fallback\" ]]; then\n browser_fallback=1\n echo \"✓ browser executable found (existing install)\"\n if [[ -n \"$browser_detail\" ]]; then\n echo \" Path: $browser_detail\"\n fi\n elif [[ \"$browser_state\" == \"missing\" ]]; then\n echo \"\"\n echo \"⚠️ BROWSER EXECUTABLE NOT FOUND\"\n if [[ -n \"$browser_detail\" ]]; then\n echo \" Expected at: $browser_detail\"\n fi\n echo \" Fix: npx playwright install chromium\"\n echo \"\"\n exit_code=2\n elif [[ \"$browser_state\" == \"error\" ]]; then\n echo \"\"\n echo \"⚠️ BROWSER CHECK FAILED\"\n echo \" Error: $browser_detail\"\n echo \" Fix: npm install -g playwright && npx playwright install chromium\"\n echo \"\"\n exit_code=2\n else\n echo \"\"\n echo \"⚠️ Unable to verify browser executable automatically.\"\n echo \" Falling back to version check...\"\n echo \"\"\n fi\n\n local match_state\n match_state=$(status_state \"$MATCH_STATUS\")\n if [[ \"$browser_state\" == \"fallback\" ]]; then\n match_state=\"ok\"\n fi\n if [[ \"$match_state\" == \"mismatch\" ]]; then\n local details\n details=$(status_detail \"$MATCH_STATUS\")\n echo \"\"\n echo \"⚠️ PLAYWRIGHT-CHROMIUM VERSION MISMATCH\"\n echo \" $details\"\n echo \" Fix: npx playwright install chromium\"\n echo \"\"\n if [[ \"$browser_state\" != \"ok\" && \"$browser_state\" != \"fallback\" ]]; then\n exit_code=2\n fi\n elif [[ \"$match_state\" == \"chromium_missing\" && \"$browser_state\" != \"missing\" ]]; then\n echo \"\"\n echo \"⚠️ CHROMIUM NOT INSTALLED\"\n echo \" Fix: npx playwright install chromium\"\n echo \"\"\n exit_code=2\n elif [[ \"$match_state\" == \"playwright_missing\" && \"$browser_state\" != \"error\" ]]; then\n echo \"\"\n echo \"⚠️ PLAYWRIGHT NOT INSTALLED\"\n echo \" Fix: npm install -g playwright && npx playwright install chromium\"\n echo \"\"\n exit_code=2\n fi\n\n local sync_state\n sync_state=$(status_state \"$SYNC_STATUS\")\n\n if [[ \"$sync_state\" == \"mismatch\" ]]; then\n local details\n details=$(status_detail \"$SYNC_STATUS\")\n echo \"\"\n echo \"⚠️ NODE.JS AND PYTHON PLAYWRIGHT VERSION MISMATCH\"\n echo \" $details\"\n echo \" This may cause issues if both are used.\"\n echo \" Fix: Upgrade both to same version, then reinstall chromium:\"\n echo \" npm install -g playwright@latest\"\n echo \" pip install playwright --upgrade\"\n echo \" npx playwright install chromium\"\n echo \"\"\n exit_code=2\n fi\n\n echo \"\"\n echo \"--- Process Route ---\"\n show_status \"python3\" \"$PYTHON_STATUS\"\n status_is_ok \"$PYTHON_STATUS\" || exit_code=2\n show_status \"pikepdf\" \"$PIKEPDF_STATUS\"\n status_is_ok \"$PIKEPDF_STATUS\" || exit_code=2\n show_status \"pdfplumber\" \"$PDFPLUMBER_STATUS\"\n status_is_ok \"$PDFPLUMBER_STATUS\" || exit_code=2\n\n # Show Python Playwright version if installed\n if [[ -n \"$PLAYWRIGHT_PY_VERSION\" ]]; then\n echo \" (playwright-python: $PLAYWRIGHT_PY_VERSION)\"\n fi\n\n echo \"\"\n echo \"--- Optional ---\"\n show_status \"libreoffice\" \"$LIBREOFFICE_STATUS\" optional\n show_status \"tectonic\" \"$TECTONIC_STATUS\" optional\n\n echo \"\"\n echo \"=== Install Commands ===\"\n echo \" Node.js: brew install node (macOS) / apt install nodejs (Ubuntu)\"\n echo \" Playwright: npm install -g playwright && npx playwright install chromium\"\n echo \" Python: brew install python3 (macOS) / apt install python3 (Ubuntu)\"\n echo \" pikepdf: pip install pikepdf pdfplumber --user\"\n echo \" LibreOffice: brew install --cask libreoffice (macOS)\"\n echo \" Tectonic: brew install tectonic (macOS) / cargo install tectonic (Linux)\"\n\n echo \"\"\n echo \"=== Fix Version Mismatch ===\"\n echo \" npx playwright install chromium # Reinstall browser to match current Playwright\"\n\n exit \"$exit_code\"\n}\n\ncmd_json() {\n collect_statuses\n local exit_code=0\n\n cat \u003c\u003cEOF\n{\n \"html_route\": {\n \"node\": \"$(status_state \"$NODE_STATUS\")\",\n \"node_version\": \"$(status_detail \"$NODE_STATUS\")\",\n \"playwright\": \"$(status_state \"$PLAYWRIGHT_STATUS\")\",\n \"playwright_version\": \"$(status_detail \"$PLAYWRIGHT_STATUS\")\",\n \"chromium\": \"$(status_state \"$CHROMIUM_STATUS\")\",\n \"chromium_revision\": \"$CHROMIUM_REVISION\",\n \"browser_usable\": \"$(status_state \"$BROWSER_STATUS\")\",\n \"browser_path\": \"$(status_detail \"$BROWSER_STATUS\")\",\n \"playwright_chromium_match\": \"$(status_state \"$MATCH_STATUS\")\",\n \"playwright_chromium_detail\": \"$(status_detail \"$MATCH_STATUS\")\"\n },\n \"process_route\": {\n \"python3\": \"$(status_state \"$PYTHON_STATUS\")\",\n \"python3_version\": \"$(status_detail \"$PYTHON_STATUS\")\",\n \"pikepdf\": \"$(status_state \"$PIKEPDF_STATUS\")\",\n \"pdfplumber\": \"$(status_state \"$PDFPLUMBER_STATUS\")\",\n \"playwright_python_version\": \"$PLAYWRIGHT_PY_VERSION\"\n },\n \"version_sync\": {\n \"node_python_playwright_sync\": \"$(status_state \"$SYNC_STATUS\")\",\n \"sync_detail\": \"$(status_detail \"$SYNC_STATUS\")\"\n },\n \"optional\": {\n \"libreoffice\": \"$(status_state \"$LIBREOFFICE_STATUS\")\",\n \"tectonic\": \"$(status_state \"$TECTONIC_STATUS\")\"\n }\n}\nEOF\n\n for required in \"$NODE_STATUS\" \"$PLAYWRIGHT_STATUS\" \"$CHROMIUM_STATUS\" \"$PYTHON_STATUS\" \"$PIKEPDF_STATUS\" \"$PDFPLUMBER_STATUS\"; do\n if ! status_is_ok \"$required\"; then\n exit_code=2\n break\n fi\n done\n\n local browser_state\n browser_state=$(status_state \"$BROWSER_STATUS\")\n if [[ \"$browser_state\" != \"ok\" ]]; then\n exit_code=2\n fi\n\n local match_state\n match_state=$(status_state \"$MATCH_STATUS\")\n if [[ \"$match_state\" == \"mismatch\" || \"$match_state\" == \"chromium_missing\" || \"$match_state\" == \"playwright_missing\" ]]; then\n exit_code=2\n fi\n\n if [[ \"$(status_state \"$SYNC_STATUS\")\" == \"mismatch\" ]]; then\n exit_code=2\n fi\n\n exit \"$exit_code\"\n}\n\n# ============================================================================\n# Main\n# ============================================================================\n\ncase \"${1:-}\" in\n --json|-j)\n cmd_json\n ;;\n -h|--help|help)\n cat \u003c\u003c'EOF'\nUsage: setup.sh [options]\n\nOptions:\n (none) Check environment status (default)\n --json JSON output for programmatic use\n --help Show this help\n\nThis script checks:\n 1. Required dependencies (node, playwright, chromium, python3, etc.)\n 2. Playwright-Chromium version compatibility (CRITICAL)\n 3. Node.js and Python Playwright version sync (if both installed)\n\nIf version mismatch is detected, run:\n npx playwright install chromium\n\nThe script does NOT enforce specific versions. It only checks that\nPlaywright and Chromium are compatible with each other.\nEOF\n ;;\n *)\n cmd_check\n ;;\nesac\n","content_type":"application/x-sh; charset=utf-8","language":"bash","size":16223,"content_sha256":"33874d923df5022996618a4afac8d7ac74834c69664512057ad252ff6324edb1"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":2},"content":[{"text":"Route Selection","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Route","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Trigger","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Route File","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML","type":"text","marks":[{"type":"strong"}]},{"text":" (default)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"All PDF creation requests","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"routes/html.md","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"User explicitly requests LaTeX, .tex, or Tectonic","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"routes/latex.md","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Process","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Work with existing PDFs (extract, merge, fill forms, etc.)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"routes/process.md","type":"text","marks":[{"type":"code_inline"}]}]}]}]}]},{"type":"paragraph","content":[{"text":"Default to HTML.","type":"text","marks":[{"type":"strong"}]},{"text":" Only use LaTeX route when user explicitly requests it.","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"MANDATORY: Read Route File Before Implementation","type":"text"}]},{"type":"paragraph","content":[{"text":"\u003csystem-reminder> You MUST read the corresponding route file before writing ANY code. Route files contain critical implementation details NOT duplicated here. Skipping this step leads to incorrect output (wrong scripts, missing CSS, broken layouts). \u003c/system-reminder>","type":"text"}]},{"type":"paragraph","content":[{"text":"Before implementation, you MUST:","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Determine the route (HTML / LaTeX / Process)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Read the route file","type":"text","marks":[{"type":"strong"}]},{"text":" (","type":"text"},{"text":"routes/html.md","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":"routes/latex.md","type":"text","marks":[{"type":"code_inline"}]},{"text":", or ","type":"text"},{"text":"routes/process.md","type":"text","marks":[{"type":"code_inline"}]},{"text":")","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Only then proceed with implementation","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"This file (SKILL.md) contains constraints and principles. Route files contain ","type":"text"},{"text":"how-to details","type":"text","marks":[{"type":"strong"}]},{"text":".","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Decision Rules","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Route Selection","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"User Says","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Route","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"\"Create a PDF\", \"Make a report\", \"Write a paper\"","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"\"Use LaTeX\", \"Compile .tex\", \"Use Tectonic\"","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"\"Extract text from PDF\", \"Merge these PDFs\", \"Fill this form\"","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Process","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Cover Style Selection (HTML Route)","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Context","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Style","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Academic paper, thesis, formal coursework","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Minimal","type":"text","marks":[{"type":"strong"}]},{"text":" (white, centered, no decoration)","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Reports, proposals, professional documents","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Designed","type":"text","marks":[{"type":"strong"}]},{"text":" (choose from style reference in html.md)","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Uncertain","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Default to ","type":"text"},{"text":"Designed","type":"text","marks":[{"type":"strong"}]},{"text":" — plain text cover = mediocre","type":"text"}]}]}]}]},{"type":"paragraph","content":[{"text":"Key principle","type":"text","marks":[{"type":"strong"}]},{"text":": Cover background separates \"acceptable\" from \"impressive\". See html.md for 11 style options.","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Citation Format Selection","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Document Language","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Format","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Chinese","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"GB/T 7714 (use [J][M][D] identifiers)","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"English","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"APA","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Mixed","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Chinese refs → GB/T 7714, English refs → APA","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"Quick Start","type":"text"}]},{"type":"paragraph","content":[{"text":"Use the unified CLI for all operations:","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# Check environment (JSON output, exit code 0=ok, 2=missing deps)\n/app/.kimi/skills/kimi-pdf/scripts/pdf.sh check\n\n# Auto-fix missing dependencies (idempotent, safe to run multiple times)\n/app/.kimi/skills/kimi-pdf/scripts/pdf.sh fix\n\n# Convert HTML to PDF\n/app/.kimi/skills/kimi-pdf/scripts/pdf.sh html input.html\n\n# Compile LaTeX to PDF\n/app/.kimi/skills/kimi-pdf/scripts/pdf.sh latex input.tex","type":"text"}]},{"type":"paragraph","content":[{"text":"Exit codes:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"0","type":"text","marks":[{"type":"code_inline"}]},{"text":" = success","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"1","type":"text","marks":[{"type":"code_inline"}]},{"text":" = usage error","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"2","type":"text","marks":[{"type":"code_inline"}]},{"text":" = dependency missing (run ","type":"text"},{"text":"pdf.sh fix","type":"text","marks":[{"type":"code_inline"}]},{"text":")","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"3","type":"text","marks":[{"type":"code_inline"}]},{"text":" = runtime error","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"Dependencies by route:","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"HTML route","type":"text","marks":[{"type":"strong"}]},{"text":": Node.js, Playwright, Chromium","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Process route","type":"text","marks":[{"type":"strong"}]},{"text":": Python 3, pikepdf, pdfplumber","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"LaTeX route","type":"text","marks":[{"type":"strong"}]},{"text":": Tectonic","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"Core Constraints (Must Follow)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"1. Output Language","type":"text"}]},{"type":"paragraph","content":[{"text":"Output language must match user's query language.","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"User writes in Chinese → PDF content in Chinese","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"User writes in English → PDF content in English","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"User explicitly specifies language → Follow user's specification","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"2. Word Count and Page Constraints","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Strictly follow user-specified word/page count requirements","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Do not arbitrarily inflate content length","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"3. Citation and Search Standards","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"CRITICAL: Search Before Writing","type":"text"}]},{"type":"paragraph","content":[{"text":"DO NOT fabricate information. When in doubt, SEARCH.","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"If content involves ANY of these, you ","type":"text"},{"text":"MUST search FIRST","type":"text","marks":[{"type":"strong"}]},{"text":" before writing:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Statistics, numbers, percentages, rankings","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Policies, regulations, laws, standards","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Academic research, theories, methodologies","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Current events, recent developments","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Anything you're not 100% certain about","type":"text","marks":[{"type":"strong"}]}]}]}]},{"type":"paragraph","content":[{"text":"\u003csystem-reminder> Never proceed with writing if you need statistics, research data, or policy information without searching first. Making up facts is strictly prohibited. When uncertain, search. \u003c/system-reminder>","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"When Search is Required","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Scenario","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Search?","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Notes","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Statistics, data","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Required","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"e.g., \"2024 employment rate\"","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Policies, regulations","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Required","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"e.g., \"startup subsidies\"","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Research, papers","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Required","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"e.g., \"effectiveness of method X\"","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Time-sensitive content","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Required","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Information after knowledge cutoff","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Uncertain facts","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Required","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"If unsure, always search","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Common knowledge","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Not needed","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"e.g., \"water boils at 100°C\"","type":"text"}]}]}]}]},{"type":"paragraph","content":[{"text":"Search workflow","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Identify facts/data requiring verification","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Search for authentic sources","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"If results insufficient, ","type":"text"},{"text":"iterate search","type":"text","marks":[{"type":"strong"}]},{"text":" until reliable info obtained","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Include real sources in references","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"If search fails repeatedly, tell the user","type":"text","marks":[{"type":"strong"}]},{"text":" instead of making up data","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Citations Must Be Real","type":"text"}]},{"type":"paragraph","content":[{"text":"Fabricating references is prohibited","type":"text","marks":[{"type":"strong"}]},{"text":". All citations must have:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Correct author/institution names","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Accurate titles","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Verifiable year, journal/source","type":"text"}]}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Cross-references (Must Be Clickable)","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"html"},"content":[{"text":"As shown in \u003ca href=\"#fig-1-1\">Figure 1-1\u003c/a>...\nFrom \u003ca href=\"#eq-2-1\">Equation (2-1)\u003c/a>...\nSee \u003ca href=\"#sec3\">Section 3\u003c/a>...","type":"text"}]},{"type":"paragraph","content":[{"text":"Note","type":"text","marks":[{"type":"strong"}]},{"text":": ","type":"text"},{"text":"id","type":"text","marks":[{"type":"code_inline"}]},{"text":" must be placed at container top (see CSS Counters section in html.md).","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"Content Quality Constraints","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"1. Word/Page Count Constraints","type":"text"}]},{"type":"paragraph","content":[{"text":"Must strictly follow user-specified word or page count requirements","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"User Request","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Execution Standard","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Specific word count (e.g., \"3000 words\")","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Within ±20%, i.e., 2400-3600 words","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Specific page count (e.g., \"5 pages\")","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Exactly equal, last page may be partial","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Word count range (e.g., \"2000-3000 words\")","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Must fall within range","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"No explicit requirement","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Infer reasonably by document type; prefer thorough over superficial","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Minimum specified (e.g., \"more than 5000 words\")","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"No more than 2x, i.e., 5000-10000 words","type":"text"}]}]}]}]},{"type":"paragraph","content":[{"text":"Prohibited behaviors","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Arbitrarily shortening content (\"concise\" is not an excuse)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Padding pages with excessive bullet lists (maintain high information density)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Exceeding twice the user's requested length","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"Special case - Resume/CV","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Resume should be ","type":"text"},{"text":"1 page","type":"text","marks":[{"type":"strong"}]},{"text":" unless user specifies otherwise","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use compact margins: ","type":"text"},{"text":"margin: 1.5cm","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"2. Outline Adherence (Mandatory)","type":"text"}]},{"type":"paragraph","content":[{"text":"When user provides outline","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Strictly follow","type":"text","marks":[{"type":"strong"}]},{"text":" the user-provided outline structure","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Section titles must match outline (minor wording adjustments OK, no level/order changes)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Do not add or remove sections arbitrarily","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"If outline seems problematic, ","type":"text"},{"text":"ask user first","type":"text","marks":[{"type":"strong"}]},{"text":" before modifying","type":"text"}]}]}]},{"type":"paragraph","content":[{"text":"When no user outline","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Use standard structures based on document type:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Academic papers","type":"text","marks":[{"type":"strong"}]},{"text":": IMRaD (Introduction-Methods-Results-Discussion) or Introduction-Literature Review-Methods-Results-Discussion-Conclusion","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Business reports","type":"text","marks":[{"type":"strong"}]},{"text":": Conclusion-first (Executive Summary → Detailed Analysis → Recommendations)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Technical docs","type":"text","marks":[{"type":"strong"}]},{"text":": Overview → Principles → Usage → Examples → FAQ","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Course assignments","type":"text","marks":[{"type":"strong"}]},{"text":": Follow assignment structure requirements","type":"text"}]}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Sections must have logical progression, no disconnects","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"Tech Stack Overview","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Route","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Tools","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Purpose","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Playwright + Paged.js","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML → PDF conversion","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"KaTeX, Mermaid","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Math formulas, diagrams","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Process","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"pikepdf","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Form filling, page operations, metadata","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Process","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"pdfplumber","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Text and table extraction","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Process","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LibreOffice","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Office → PDF conversion","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Tectonic","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LaTeX → PDF compilation","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}}]},"metadata":{"date":"2026-06-05","name":"kimi-pdf","author":"@skillopedia","source":{"stars":174,"repo_name":"kimi-skills","origin_url":"https://github.com/thvroyal/kimi-skills/blob/HEAD/skills/kimi-pdf/SKILL.md","repo_owner":"thvroyal","body_sha256":"896af6033ff8505e8ad59e0e2112c39af235582272eb153b1bd36cc4bee3d567","cluster_key":"a53f2965d071de13d5631ee0d766f36835fd71b9906f81588e59ca81d4684f2c","clean_bundle":{"format":"clean-skill-bundle-v1","source":"thvroyal/kimi-skills/skills/kimi-pdf/SKILL.md","attachments":[{"id":"8f706340-3fe6-5645-b58a-b055a108756d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8f706340-3fe6-5645-b58a-b055a108756d/attachment.md","path":"routes/html.md","size":23697,"sha256":"0de81e184409f2e42d5b0606fee865061cd90341114829ee910b467f018b4250","contentType":"text/markdown; charset=utf-8"},{"id":"299b757f-ab5b-5faf-8a68-f7d37786c7e6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/299b757f-ab5b-5faf-8a68-f7d37786c7e6/attachment.md","path":"routes/latex.md","size":11204,"sha256":"6f38bc84bbb7b6fbaba5ae2f48f108ece77041aa8bbf9c5ad932a416d5d40f66","contentType":"text/markdown; charset=utf-8"},{"id":"01485ca2-dae1-5851-bbcc-cc12e5480ce6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/01485ca2-dae1-5851-bbcc-cc12e5480ce6/attachment.md","path":"routes/process.md","size":6865,"sha256":"c14cef70530db7e62b4f3cd9deaebec1403e986bb718048a40875c7b3b6f1068","contentType":"text/markdown; charset=utf-8"},{"id":"a5f33d43-61bf-5f42-8748-d21d45b7d029","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a5f33d43-61bf-5f42-8748-d21d45b7d029/attachment.js","path":"scripts/browser_helper.js","size":10907,"sha256":"b43585360c61ef972491d11f8344b23c491dea458e024c2e9ac4d81ed2757852","contentType":"application/javascript; charset=utf-8"},{"id":"e5977a59-97f2-5a33-bc18-142b1a58296e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e5977a59-97f2-5a33-bc18-142b1a58296e/attachment.py","path":"scripts/cmd_convert.py","size":3127,"sha256":"8567f39d856124362628044045fcfccc60120b5171cb42e35d169f1c0ab1723b","contentType":"text/x-python; charset=utf-8"},{"id":"11f4af37-9eb0-59a5-ad32-90b2e4e760a2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/11f4af37-9eb0-59a5-ad32-90b2e4e760a2/attachment.py","path":"scripts/cmd_extract.py","size":6178,"sha256":"a9fad96380702cab75b7be2a810dd19f9a662528d99df47a056dc73ba1b43bb7","contentType":"text/x-python; charset=utf-8"},{"id":"e09db7e8-24ac-5054-9456-a8662ca3dbca","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e09db7e8-24ac-5054-9456-a8662ca3dbca/attachment.py","path":"scripts/cmd_form.py","size":8420,"sha256":"7ca2ecc14a687ffe20c2d75123f5377f29f7b6a3cd28af1dfbda8715a9c0db2a","contentType":"text/x-python; charset=utf-8"},{"id":"d3539d95-874e-5766-8f2e-74e434da364d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d3539d95-874e-5766-8f2e-74e434da364d/attachment.py","path":"scripts/cmd_meta.py","size":3532,"sha256":"6561db265f3d0516d07d5860ac07e62e50cddcf8909294f1059f1abbefcb6bcb","contentType":"text/x-python; charset=utf-8"},{"id":"84d822dc-ad4b-5c78-8e4a-428b74781291","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/84d822dc-ad4b-5c78-8e4a-428b74781291/attachment.py","path":"scripts/cmd_pages.py","size":5029,"sha256":"45fd6099ee88208439f9a08cec07439ee6dba37f84058a98699055c86168c928","contentType":"text/x-python; charset=utf-8"},{"id":"17b2561d-4efd-5185-8e60-b0f693bdc527","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/17b2561d-4efd-5185-8e60-b0f693bdc527/attachment.py","path":"scripts/compile_latex.py","size":10204,"sha256":"cc54af5e83afbac197fbfa099d828e61b8d6c44f2059bf2efb7dbaa53ca26f79","contentType":"text/x-python; charset=utf-8"},{"id":"624f9a11-b740-52b7-8793-9742bf68ae24","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/624f9a11-b740-52b7-8793-9742bf68ae24/attachment.js","path":"scripts/html_to_pdf.js","size":21492,"sha256":"48387acfc7091f040f44a3ebd41718672eab0161ceae28fff324f4b855f1233f","contentType":"application/javascript; charset=utf-8"},{"id":"cc0d852d-15bf-5cae-b23e-97ef56a37f5c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/cc0d852d-15bf-5cae-b23e-97ef56a37f5c/attachment.json","path":"scripts/package-lock.json","size":1591,"sha256":"514f2fca37d07282e4bb2addade996058e4acdedf427ae7406440858dbf81899","contentType":"application/json; charset=utf-8"},{"id":"549d7e33-5db3-5fe6-8d46-3969fa4c4225","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/549d7e33-5db3-5fe6-8d46-3969fa4c4225/attachment.json","path":"scripts/package.json","size":78,"sha256":"3dc6c686470c40aaf810642a67410ecb26bbb635ac5d192ad1e8bbc122c19362","contentType":"application/json; charset=utf-8"},{"id":"759a9e59-6936-506b-85b6-a0ffedb4b5e3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/759a9e59-6936-506b-85b6-a0ffedb4b5e3/attachment.js","path":"scripts/paged.polyfill.js","size":921702,"sha256":"f59f361802416c770d549a647958649af2cf6601999924bc00e4f507dad5269f","contentType":"application/javascript; charset=utf-8"},{"id":"a40533d3-c777-5810-a3fe-b5797575b94f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a40533d3-c777-5810-a3fe-b5797575b94f/attachment.py","path":"scripts/pdf.py","size":10000,"sha256":"cc83373eab2019eba3763b8a9cfc2d1c3ba0b688af4c2b1fa2a95d681dec5722","contentType":"text/x-python; charset=utf-8"},{"id":"97bf83d2-ded8-53c3-a9bf-46511546e28c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/97bf83d2-ded8-53c3-a9bf-46511546e28c/attachment.sh","path":"scripts/pdf.sh","size":3743,"sha256":"7072349824a0b69a34b41441deb2aa4c2c6c19a0ef6c576c198475b7c8c49454","contentType":"application/x-sh; charset=utf-8"},{"id":"15ccc422-54d5-5ffa-a686-6432c5502d76","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/15ccc422-54d5-5ffa-a686-6432c5502d76/attachment.sh","path":"scripts/setup.sh","size":16223,"sha256":"33874d923df5022996618a4afac8d7ac74834c69664512057ad252ff6324edb1","contentType":"application/x-sh; charset=utf-8"}],"bundle_sha256":"3a50a431771d48d73b0534aba175cc7d1b756eaa6f12cd4f3bf0f0b78689a9ca","attachment_count":17,"text_attachments":17,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"skills/kimi-pdf/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"web-development","category_label":"Web"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"web-development","import_tag":"clean-skills-v1","description":"Professional PDF solution. Create PDFs using HTML+Paged.js (academic papers, reports, documents). Process existing PDFs using Python (read, extract, merge, split, fill forms). Supports KaTeX math formulas, Mermaid diagrams, three-line tables, citations, and other academic elements. Also use this skill when user explicitly requests LaTeX (.tex) or native LaTeX compilation."}},"renderedAt":1782980706382}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.