Building GitHub Index Create markdown indexes of GitHub repositories optimized for Claude project knowledge. Indexes enable retrieval via GitHub API with semantic descriptions for effective matching. Quick Start Script Options | Flag | Description | |------|-------------| | | Output file (default: ) | | | GitHub PAT; also reads env | | | Only index matching globs: | | | Skip matching globs: | | | Cap files per repo (default: 200) | | | Tree only, no content fetch (fast, filename-only descriptions) | | | Include code files, extract function/class names via tree-sitter | Description Extraction…

, content, re.MULTILINE)\n if not headings:\n return {}\n \n # Use first heading as title, combine first few as description\n result = {'title': headings[0].strip()}\n if len(headings) > 1:\n topics = [h.strip().lower() for h in headings[:4]]\n result['description'] = ', '.join(topics)\n return result\n\ndef extract_notebook_title(content: str) -> dict:\n try:\n nb = json.loads(content)\n cells = nb.get(\"cells\", [])\n if cells and cells[0].get(\"cell_type\") == \"markdown\":\n source = \"\".join(cells[0].get(\"source\", []))\n match = re.search(r'^#\\s+(.+)

Building GitHub Index Create markdown indexes of GitHub repositories optimized for Claude project knowledge. Indexes enable retrieval via GitHub API with semantic descriptions for effective matching. Quick Start Script Options | Flag | Description | |------|-------------| | | Output file (default: ) | | | GitHub PAT; also reads env | | | Only index matching globs: | | | Skip matching globs: | | | Cap files per repo (default: 200) | | | Tree only, no content fetch (fast, filename-only descriptions) | | | Include code files, extract function/class names via tree-sitter | Description Extraction…

, source, re.MULTILINE)\n if match:\n return {\"title\": match.group(1).strip()}\n except:\n pass\n return {}\n\ndef extract_code_symbols(content: str, lang: str) -> dict:\n \"\"\"Extract public symbols from code using tree-sitter.\"\"\"\n if not TS_AVAILABLE:\n return {}\n \n lang_map = {'py': 'python', 'js': 'javascript', 'ts': 'typescript', \n 'tsx': 'tsx', 'go': 'go', 'rs': 'rust', 'c': 'c', 'h': 'c'}\n if lang not in lang_map:\n return {}\n \n try:\n parser = get_parser(lang_map[lang])\n tree = parser.parse(content.encode())\n symbols = []\n \n def get_text(node):\n return content[node.start_byte:node.end_byte]\n \n for node in tree.root_node.children:\n if node.type in ('function_definition', 'class_definition'):\n name_node = node.child_by_field_name('name')\n if name_node:\n name = get_text(name_node)\n if not name.startswith('_'):\n symbols.append(name)\n elif node.type == 'export_statement':\n for child in node.children:\n if child.type in ('function_declaration', 'class_declaration'):\n name_node = child.child_by_field_name('name')\n if name_node:\n symbols.append(get_text(name_node))\n \n if symbols:\n return {'description': ', '.join(symbols[:6]) + (f' +{len(symbols)-6}' if len(symbols) > 6 else '')}\n except:\n pass\n return {}\n\ndef infer_category(path: str) -> str:\n parts = Path(path).parts\n patterns = [\n (['blog', 'posts'], \"Blog Posts\"),\n (['docs', 'documentation'], \"Documentation\"),\n (['guides', 'tutorials'], \"Guides\"),\n (['api'], \"API Reference\"),\n (['examples'], \"Examples\"),\n (['src', 'lib'], \"Source\"),\n (['apps'], \"Applications\"),\n ]\n for keywords, category in patterns:\n if any(k in parts for k in keywords):\n return category\n if len(parts) > 1:\n return parts[0].replace('_', ' ').replace('-', ' ').title()\n return \"Other\"\n\ndef description_from_path(path: str) -> str:\n stem = Path(path).stem\n if stem.lower() in ('index', 'readme'):\n parent = Path(path).parent.name\n return parent.replace('_', ' ').replace('-', ' ').title() if parent != '.' else stem\n return stem.replace('_', ' ').replace('-', ' ')\n\ndef process_repo(owner: str, repo: str, token: Optional[str] = None,\n include: list[str] = None, exclude: list[str] = None,\n max_files: int = 200, skip_fetch: bool = False,\n code_symbols: bool = False) -> RepoInfo:\n include = include or []\n exclude = exclude or []\n \n print(f\"Processing {owner}/{repo}...\", file=sys.stderr)\n branch, desc = get_repo_info(owner, repo, token)\n all_paths = get_repo_tree(owner, repo, branch, token)\n \n # Filter paths based on mode\n if code_symbols:\n valid_ext = CONTENT_EXTENSIONS | CODE_EXTENSIONS\n else:\n valid_ext = CONTENT_EXTENSIONS\n \n content_paths = [\n p for p in all_paths\n if Path(p).suffix.lower() in valid_ext and should_include(p, include, exclude)\n ]\n \n print(f\" Found {len(content_paths)} files\", file=sys.stderr)\n if len(content_paths) > max_files:\n print(f\" Limiting to {max_files}\", file=sys.stderr)\n content_paths = content_paths[:max_files]\n \n files: list[FileInfo] = []\n \n if skip_fetch:\n for path in content_paths:\n files.append(FileInfo(\n path=path,\n description=description_from_path(path),\n category=infer_category(path)\n ))\n else:\n def process_file(path: str) -> FileInfo:\n content = fetch_file(owner, repo, path, branch, token)\n meta = {}\n suffix = Path(path).suffix.lower()\n \n if content:\n if suffix == '.ipynb':\n meta = extract_notebook_title(content)\n elif suffix in CONTENT_EXTENSIONS:\n # Try frontmatter first, then headings\n meta = extract_frontmatter(content)\n if not meta.get('description') and not meta.get('title'):\n meta = extract_headings(content)\n elif suffix.lstrip('.') in ('py', 'js', 'ts', 'tsx', 'go', 'rs', 'c', 'h') and code_symbols:\n meta = extract_code_symbols(content, suffix.lstrip('.'))\n \n desc = meta.get('description') or meta.get('title') or description_from_path(path)\n return FileInfo(\n path=path,\n title=meta.get('title'),\n description=desc,\n category=infer_category(path)\n )\n \n with ThreadPoolExecutor(max_workers=10) as executor:\n futures = {executor.submit(process_file, p): p for p in content_paths}\n done = 0\n for future in as_completed(futures):\n files.append(future.result())\n done += 1\n if done % 50 == 0:\n print(f\" Processed {done}/{len(content_paths)}\", file=sys.stderr)\n \n return RepoInfo(\n owner=owner, repo=repo, branch=branch,\n url=f\"https://github.com/{owner}/{repo}\",\n description=desc, files=files\n )\n\ndef generate_index(repos: list[RepoInfo]) -> str:\n lines = []\n \n if len(repos) == 1:\n r = repos[0]\n lines.append(f\"# {r.repo} - Content Index\\n\")\n lines.append(f\"**Repository:** {r.url} \")\n lines.append(f\"**Branch:** `{r.branch}`\")\n if r.description:\n lines.append(f\"\\n*{r.description}*\")\n else:\n lines.append(\"# Combined Repository Index\\n\")\n for r in repos:\n lines.append(f\"- [{r.owner}/{r.repo}]({r.url})\")\n \n lines.append(\"\\n## Retrieval Method\\n\")\n lines.append(\"```bash\")\n lines.append('curl -s \"https://api.github.com/repos/OWNER/REPO/contents/PATH?ref=BRANCH\" \\\\')\n lines.append(' -H \"Accept: application/vnd.github+json\" | \\\\')\n lines.append(' python3 -c \"import sys,json,base64; print(base64.b64decode(json.load(sys.stdin)[\\'content\\']).decode())\"')\n lines.append(\"```\\n---\\n\")\n \n for r in repos:\n if len(repos) > 1:\n lines.append(f\"## {r.owner}/{r.repo}\\n\")\n \n by_category: dict[str, list[FileInfo]] = {}\n for f in r.files:\n by_category.setdefault(f.category, []).append(f)\n \n for category in sorted(by_category.keys()):\n cat_files = sorted(by_category[category], key=lambda x: x.path)\n lines.append(f\"### {category}\\n\")\n lines.append(\"| Description | Path |\")\n lines.append(\"|-------------|------|\")\n for f in cat_files:\n desc = f.description or \"—\"\n if len(desc) > 100:\n desc = desc[:97] + \"...\"\n desc = desc.replace(\"|\", \"\\\\|\")\n lines.append(f\"| {desc} | `{f.path}` |\")\n lines.append(\"\")\n \n lines.append(\"---\\n*Generated by building-github-index*\")\n return \"\\n\".join(lines)\n\ndef main():\n parser = argparse.ArgumentParser(description=\"GitHub repo index generator v2\")\n parser.add_argument(\"repos\", nargs=\"+\", help=\"owner/repo\")\n parser.add_argument(\"-o\", \"--output\", default=\"github_index.md\")\n parser.add_argument(\"--token\", help=\"GitHub PAT\")\n parser.add_argument(\"--include-patterns\", nargs=\"*\", default=[])\n parser.add_argument(\"--exclude-patterns\", nargs=\"*\", default=[])\n parser.add_argument(\"--max-files\", type=int, default=200)\n parser.add_argument(\"--skip-fetch\", action=\"store_true\")\n parser.add_argument(\"--code-symbols\", action=\"store_true\", \n help=\"Include code files and extract symbols (requires tree-sitter)\")\n \n args = parser.parse_args()\n token = args.token or os.environ.get(\"GITHUB_TOKEN\") or os.environ.get(\"GITHUB_PAT\")\n \n repos_data = []\n for spec in args.repos:\n if \"/\" not in spec:\n print(f\"Error: Invalid '{spec}'\", file=sys.stderr)\n continue\n owner, repo = spec.split(\"/\", 1)\n try:\n data = process_repo(owner, repo, token, args.include_patterns, \n args.exclude_patterns, args.max_files, \n args.skip_fetch, args.code_symbols)\n repos_data.append(data)\n except Exception as e:\n print(f\"Error: {spec}: {e}\", file=sys.stderr)\n \n if not repos_data:\n sys.exit(1)\n \n Path(args.output).write_text(generate_index(repos_data))\n print(f\"Index written to {args.output}\", file=sys.stderr)\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":14026,"content_sha256":"39fbb34689a84fb5c1c6be7529676ae3dc32b0222892b00aedcefa309d362a1f"},{"filename":"scripts/pk_index.py","content":"#!/usr/bin/env python3\n\"\"\"\nProject Knowledge Index Generator\nProduces flat, token-efficient indexes for Claude project knowledge.\nExtracts semantic info from code (AST) and docs (headings).\n\"\"\"\n\nimport json\nimport os\nimport sys\nimport tarfile\nimport tempfile\nimport urllib.request\nfrom collections import defaultdict\nfrom pathlib import Path\nimport re\n\ntry:\n from tree_sitter_language_pack import get_parser\n TS_AVAILABLE = True\nexcept ImportError:\n TS_AVAILABLE = False\n\nLANG_MAP = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript',\n '.c': 'c', '.h': 'c', '.go': 'go', '.rs': 'rust'}\n\nSKIP_DIRS = {'.git', 'node_modules', '__pycache__', '.venv', 'venv', \n 'dist', 'build', 'test', 'tests', 'docs', '.github',\n 'vendor', 'third_party', 'fixtures', 'examples'}\n\ndef fetch_tarball(owner: str, repo: str, ref: str = 'main') -> bytes:\n url = f'https://api.github.com/repos/{owner}/{repo}/tarball/{ref}'\n req = urllib.request.Request(url, headers={'Accept': 'application/vnd.github+json'})\n with urllib.request.urlopen(req) as resp:\n return resp.read()\n\ndef extract_md_topics(content: str) -> list[str]:\n \"\"\"Extract h1/h2 headings as topics.\"\"\"\n headings = re.findall(r'^#{1,2}\\s+(.+)

Building GitHub Index Create markdown indexes of GitHub repositories optimized for Claude project knowledge. Indexes enable retrieval via GitHub API with semantic descriptions for effective matching. Quick Start Script Options | Flag | Description | |------|-------------| | | Output file (default: ) | | | GitHub PAT; also reads env | | | Only index matching globs: | | | Skip matching globs: | | | Cap files per repo (default: 200) | | | Tree only, no content fetch (fast, filename-only descriptions) | | | Include code files, extract function/class names via tree-sitter | Description Extraction…

, content, re.MULTILINE)\n # Clean and dedupe\n seen = set()\n topics = []\n for h in headings[:8]:\n h = h.strip().lower()\n if h not in seen and len(h) \u003c 60:\n seen.add(h)\n topics.append(h)\n return topics\n\ndef extract_py_symbols(content: bytes, parser) -> list[str]:\n \"\"\"Extract public class/function names from Python.\"\"\"\n tree = parser.parse(content)\n symbols = []\n \n def get_text(node):\n return content[node.start_byte:node.end_byte].decode('utf-8', errors='replace')\n \n for node in tree.root_node.children:\n if node.type in ('function_definition', 'class_definition'):\n name_node = node.child_by_field_name('name')\n if name_node:\n name = get_text(name_node)\n if not name.startswith('_'):\n symbols.append(name)\n return symbols[:10] # Limit\n\ndef extract_js_symbols(content: bytes, parser) -> list[str]:\n \"\"\"Extract exported names from JavaScript (ES6 + CommonJS).\"\"\"\n tree = parser.parse(content)\n symbols = []\n \n def get_text(node):\n return content[node.start_byte:node.end_byte].decode('utf-8', errors='replace')\n \n def walk(node):\n # ES6: export function/class/const\n if node.type == 'export_statement':\n for child in node.children:\n if child.type in ('function_declaration', 'class_declaration'):\n name_node = child.child_by_field_name('name')\n if name_node:\n symbols.append(get_text(name_node))\n # CommonJS: module.exports = { ... } or exports.foo = ...\n if node.type == 'assignment_expression':\n left = node.child_by_field_name('left')\n if left:\n left_text = get_text(left)\n if 'module.exports' in left_text or left_text.startswith('exports.'):\n # Try to get the assigned name\n if '.' in left_text:\n name = left_text.split('.')[-1]\n if name and name not in ('exports', 'module'):\n symbols.append(name)\n for child in node.children:\n walk(child)\n \n walk(tree.root_node)\n return symbols[:10]\n\ndef process_repo(owner: str, repo: str, ref: str = 'main', output: str = None):\n \"\"\"Generate condensed index for project knowledge.\"\"\"\n \n print(f\"Fetching {owner}/{repo}@{ref}...\", file=sys.stderr)\n tarball_data = fetch_tarball(owner, repo, ref)\n \n with tempfile.TemporaryDirectory() as tmpdir:\n tarpath = Path(tmpdir) / 'repo.tar.gz'\n tarpath.write_bytes(tarball_data)\n \n with tarfile.open(tarpath, 'r:gz') as tar:\n tar.extractall(tmpdir)\n \n extracted = [d for d in Path(tmpdir).iterdir() if d.is_dir()]\n repo_root = extracted[0]\n \n # Collect by category\n entries = [] # (category, path, description)\n \n for filepath in repo_root.rglob('*'):\n if not filepath.is_file():\n continue\n \n rel_path = filepath.relative_to(repo_root)\n if any(part in SKIP_DIRS for part in rel_path.parts):\n continue\n \n suffix = filepath.suffix.lower()\n \n try:\n content = filepath.read_bytes()\n except:\n continue\n \n # Markdown files\n if suffix in ('.md', '.mdx', '.qmd'):\n topics = extract_md_topics(content.decode('utf-8', errors='replace'))\n if topics:\n desc = ', '.join(topics[:4])\n entries.append((str(rel_path.parent or 'root'), str(rel_path), desc))\n continue\n \n # Code files\n if suffix in LANG_MAP and TS_AVAILABLE:\n lang = LANG_MAP[suffix]\n try:\n parser = get_parser(lang)\n if lang == 'python':\n symbols = extract_py_symbols(content, parser)\n elif lang in ('javascript', 'typescript'):\n symbols = extract_js_symbols(content, parser)\n else:\n symbols = []\n \n if symbols:\n desc = ', '.join(symbols[:5])\n if len(symbols) > 5:\n desc += f' +{len(symbols)-5}'\n entries.append((str(rel_path.parent or 'root'), str(rel_path), desc))\n except:\n pass\n \n print(f\"Indexed {len(entries)} files\", file=sys.stderr)\n \n # Generate output\n output_path = output or f'/home/claude/{repo}_pk.md'\n generate_pk_output(owner, repo, ref, entries, output_path)\n print(f\"Written to {output_path}\", file=sys.stderr)\n\ndef generate_pk_output(owner: str, repo: str, ref: str, entries: list, output_path: str):\n \"\"\"Generate project-knowledge-optimized index.\"\"\"\n lines = [\n f\"# {repo} Index\",\n f\"Repo: https://github.com/{owner}/{repo} | Branch: `{ref}`\",\n \"\",\n \"Fetch: `curl -sL \\\"https://raw.githubusercontent.com/\" + owner + \"/\" + repo + \"/\" + ref + \"/PATH\\\"`\",\n \"\",\n ]\n \n # Group by category\n by_cat = defaultdict(list)\n for cat, path, desc in entries:\n by_cat[cat].append((path, desc))\n \n for cat in sorted(by_cat.keys()):\n items = by_cat[cat]\n lines.append(f\"## {cat}/\")\n for path, desc in items[:15]: # Limit per category\n lines.append(f\"- `{path}` — {desc}\")\n if len(items) > 15:\n lines.append(f\"- *+{len(items)-15} more*\")\n lines.append(\"\")\n \n Path(output_path).write_text('\\n'.join(lines))\n\nif __name__ == '__main__':\n import argparse\n p = argparse.ArgumentParser()\n p.add_argument('repo', help='owner/repo')\n p.add_argument('-r', '--ref', default='main')\n p.add_argument('-o', '--output')\n args = p.parse_args()\n owner, repo = args.repo.split('/')\n process_repo(owner, repo, args.ref, args.output)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":7474,"content_sha256":"1403a70542cf550ae09e8f6ef69132921da7de1fcf4d41002ac3c11780dff17f"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"Building GitHub Index","type":"text"}]},{"type":"paragraph","content":[{"text":"Create markdown indexes of GitHub repositories optimized for Claude project knowledge. Indexes enable retrieval via GitHub API with semantic descriptions for effective matching.","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Quick Start","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# Documentation repos (markdown/notebooks)\npython scripts/github_index.py owner/repo -o index.md\n\n# Code repos (extract symbols via tree-sitter)\npython scripts/github_index.py owner/repo --code-symbols -o index.md\n\n# Multiple repos combined\npython scripts/github_index.py owner/repo1 owner/repo2 -o combined.md","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Script Options","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Flag","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Description","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"-o, --output","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Output file (default: ","type":"text"},{"text":"github_index.md","type":"text","marks":[{"type":"code_inline"}]},{"text":")","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--token","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"GitHub PAT; also reads ","type":"text"},{"text":"GITHUB_TOKEN","type":"text","marks":[{"type":"code_inline"}]},{"text":" env","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--include-patterns","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Only index matching globs: ","type":"text"},{"text":"\"docs/**\" \"src/**\"","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--exclude-patterns","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Skip matching globs: ","type":"text"},{"text":"\"test/**\"","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--max-files","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Cap files per repo (default: 200)","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--skip-fetch","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Tree only, no content fetch (fast, filename-only descriptions)","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"--code-symbols","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Include code files, extract function/class names via tree-sitter","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Description Extraction Priority","type":"text"}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"YAML frontmatter","type":"text","marks":[{"type":"strong"}]},{"text":" - ","type":"text"},{"text":"title:","type":"text","marks":[{"type":"code_inline"}]},{"text":" and ","type":"text"},{"text":"description:","type":"text","marks":[{"type":"code_inline"}]},{"text":" fields","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Markdown headings","type":"text","marks":[{"type":"strong"}]},{"text":" - First h1/h2 as title, subsequent as topics","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Notebook cells","type":"text","marks":[{"type":"strong"}]},{"text":" - First markdown cell heading","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Code symbols","type":"text","marks":[{"type":"strong"}]},{"text":" - Public function/class names (with ","type":"text"},{"text":"--code-symbols","type":"text","marks":[{"type":"code_inline"}]},{"text":")","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Path-derived","type":"text","marks":[{"type":"strong"}]},{"text":" - Convert filename to words (fallback)","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"When Descriptions Fail","type":"text"}]},{"type":"paragraph","content":[{"text":"Some repos have stub files (links to external docs, empty readmes). In these cases:","type":"text"}]},{"type":"paragraph","content":[{"text":"Manual curation recommended.","type":"text","marks":[{"type":"strong"}]},{"text":" Use the tree output and domain knowledge:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# Get tree structure only (fast)\npython scripts/github_index.py owner/repo --skip-fetch -o skeleton.md\n# Then manually enhance descriptions based on domain knowledge","type":"text"}]},{"type":"paragraph","content":[{"text":"For code-heavy repos with embedded apps:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Directory names encode purpose: ","type":"text"},{"text":"acc_wav_gen","type":"text","marks":[{"type":"code_inline"}]},{"text":" → \"ACC waveform generation\"","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Peripheral acronyms map to functions: AFEC=ADC, MCAN=CAN, TWIHS=I2C","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Operation modes: blocking, interrupt, dma, polled","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Output Format","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"markdown"},"content":[{"text":"# {Repo} - Content Index\n\n**Repository:** {url}\n**Branch:** `{branch}`\n\n## Retrieval Method\n{API curl commands}\n\n---\n\n## {Category}\n\n| Description | Path |\n|-------------|------|\n| {What this covers} | `{path/file.md}` |","type":"text"}]},{"type":"paragraph","content":[{"text":"Description column leads (relevance matching), path follows (retrieval key).","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"API Access","type":"text"}]},{"type":"paragraph","content":[{"text":"Enumerate files:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"curl -sL \"https://api.github.com/repos/OWNER/REPO/git/trees/BRANCH?recursive=1\"","type":"text"}]},{"type":"paragraph","content":[{"text":"Fetch content:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"curl -s \"https://api.github.com/repos/OWNER/REPO/contents/PATH?ref=BRANCH\" \\\n -H \"Accept: application/vnd.github+json\" | \\\n python3 -c \"import sys,json,base64; print(base64.b64decode(json.load(sys.stdin)['content']).decode())\"","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Network","type":"text"}]},{"type":"paragraph","content":[{"text":"Allowlist: ","type":"text"},{"text":"api.github.com","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":"raw.githubusercontent.com","type":"text","marks":[{"type":"code_inline"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Related Skills","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"accessing-github-repos","type":"text","marks":[{"type":"code_inline"}]},{"text":" - Private repos, PAT setup, tarball download","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"mapping-codebases","type":"text","marks":[{"type":"code_inline"}]},{"text":" - Detailed code structure (methods, imports, line numbers)","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Condensed Format (pk_index.py)","type":"text"}]},{"type":"paragraph","content":[{"text":"For token-constrained project knowledge, use the condensed script:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"python scripts/pk_index.py owner/repo -o repo_pk.md","type":"text"}]},{"type":"paragraph","content":[{"text":"Produces ~80% smaller output:","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Single line per file: ","type":"text"},{"text":"path","type":"text","marks":[{"type":"code_inline"}]},{"text":" — description","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Symbols only (no signatures)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"15 files max per category","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"No retrieval instructions section","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"Ideal when adding multiple repo indexes to project knowledge.","type":"text"}]}]},"metadata":{"date":"2026-06-05","name":"building-github-index","author":"@skillopedia","source":{"stars":124,"repo_name":"claude-skills","origin_url":"https://github.com/oaustegard/claude-skills/blob/HEAD/building-github-index-v2/SKILL.md","repo_owner":"oaustegard","body_sha256":"eb75da425c41c9aa66bef3e2edfa0dd3200240ab0368c30e83832003e22d209d","cluster_key":"e2337307b96811bec2235e4ba15da2edf462d4db0685c7e736540d3ad07485bf","clean_bundle":{"format":"clean-skill-bundle-v1","source":"oaustegard/claude-skills/building-github-index-v2/SKILL.md","attachments":[{"id":"6c6f9664-c26c-5077-aeb7-8cc627d34604","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6c6f9664-c26c-5077-aeb7-8cc627d34604/attachment.md","path":"CHANGELOG.md","size":289,"sha256":"4dbb2850c61e3990777447fc3f62b83e50bc6851b2a06471ba712e9bd83ebf51","contentType":"text/markdown; charset=utf-8"},{"id":"11b4227e-99e8-5ec9-ab40-9ef53faf467b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/11b4227e-99e8-5ec9-ab40-9ef53faf467b/attachment.md","path":"README.md","size":395,"sha256":"d577c91cf2a73b9da239f7af143b6638a597f1187f74e205c021fb365b77e85b","contentType":"text/markdown; charset=utf-8"},{"id":"6a2f11d8-3273-5eac-9a1b-4e2fbde74b7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6a2f11d8-3273-5eac-9a1b-4e2fbde74b7a/attachment.py","path":"scripts/github_index.py","size":14026,"sha256":"39fbb34689a84fb5c1c6be7529676ae3dc32b0222892b00aedcefa309d362a1f","contentType":"text/x-python; charset=utf-8"},{"id":"56340066-b12c-5859-ba65-a4122fbe05ed","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/56340066-b12c-5859-ba65-a4122fbe05ed/attachment.py","path":"scripts/pk_index.py","size":7474,"sha256":"1403a70542cf550ae09e8f6ef69132921da7de1fcf4d41002ac3c11780dff17f","contentType":"text/x-python; charset=utf-8"}],"bundle_sha256":"d53b8e5bb1d65d4540f4f1c3e918b1c098d6ab7fd47aafba942235505e815789","attachment_count":4,"text_attachments":4,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"building-github-index-v2/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"web-development","category_label":"Web"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"web-development","metadata":{"version":"2.0.0"},"import_tag":"clean-skills-v1","description":"Generate progressive disclosure indexes for GitHub repositories to use as Claude project knowledge. Use when setting up projects referencing external documentation, creating searchable indexes of technical blogs or knowledge bases, combining multiple repos into one index, or when user mentions \"index\", \"github repo\", \"project knowledge\", or \"documentation reference\"."}},"renderedAt":1782986435414}

Building GitHub Index Create markdown indexes of GitHub repositories optimized for Claude project knowledge. Indexes enable retrieval via GitHub API with semantic descriptions for effective matching. Quick Start Script Options | Flag | Description | |------|-------------| | | Output file (default: ) | | | GitHub PAT; also reads env | | | Only index matching globs: | | | Skip matching globs: | | | Cap files per repo (default: 200) | | | Tree only, no content fetch (fast, filename-only descriptions) | | | Include code files, extract function/class names via tree-sitter | Description Extraction…