AVM Memory Skill AI Virtual Memory — 多 Agent 共享记忆系统 核心能力 - 语义搜索 :embedding + FTS5 混合检索 - Token 感知 :自动截断到 token 预算 - 多 Agent :私有/共享空间隔离 + 订阅通知 - 生命周期 :自动衰减、归档、垃圾清理 - TopicIndex :O(1) recall,已知 topic 1 hop 完成 - Librarian :多 Agent 知识路由,95% hop 减少 - Gossip Protocol :去中心化发现,bloom filter digest - Memory Consolidation :睡眠式记忆整合 --- 快速开始 CLI 方式 FUSE 方式 Python API --- 🆕 多 Agent 发现 方式 1: Librarian(中心化) 当你想知道"谁知道某个话题": 延迟 : 1.7ms,95% hop 减少 方式 2: Gossip Protocol(去中心化) 每个 agent 维护一个 digest(bloom filter),周期性交换: 特点 : - 无单点故障 - 本地查询 O(1) - 假阳性 <15%,假阴性 0% - 每 agent 只需 128 bytes digest 何时用哪个? | 场景 | 推荐 | |--…

, '', content, flags=re.MULTILINE)\n # removeupdatetime\n content = re.sub(r'\\*Updated:.*\\*', '', content)\n # removeemptyline\n content = re.sub(r'\\n{2,}', '\\n', content)\n \n # Extract key lines\n lines = [l.strip() for l in content.split('\\n') if l.strip()]\n \n # Prioritize lines with numbers (likely key data)\n key_lines = [l for l in lines if re.search(r'\\d', l)]\n other_lines = [l for l in lines if l not in key_lines]\n \n # combine\n result_lines = key_lines[:3] + other_lines\n result = ' '.join(result_lines)\n \n if len(result) > max_chars:\n result = result[:max_chars-3] + \"...\"\n \n return result\n \n def _estimate_tokens(self, text: str) -> int:\n \"\"\"Estimate token count\"\"\"\n return int(len(text) / self.config.chars_per_token) + 10 # +10 for formatting\n \n def _compact_synthesis(self, selected: List[ScoredNode],\n query: str,\n max_tokens: int,\n strategy: ScoringStrategy) -> str:\n \"\"\"Generate compact Markdown output\"\"\"\n if not selected:\n return f\"## Memory Recall\\n\\nNo relevant memories found for: \\\"{query}\\\"\"\n \n total_tokens = sum(sn.estimated_tokens for sn in selected)\n \n lines = [\n f\"## Relevant Memory ({len(selected)} items, ~{total_tokens} tokens)\",\n \"\",\n ]\n \n for sn in selected:\n # format: [path] summary\n score_str = f\"{sn.final_score:.2f}\"\n lines.append(f\"[{sn.node.path}] ({score_str}) {sn.summary}\")\n lines.append(\"\")\n \n lines.append(\"---\")\n lines.append(f\"*Tokens: ~{total_tokens}/{max_tokens} | Strategy: {strategy.value} | Query: \\\"{query}\\\"*\")\n \n return \"\\n\".join(lines)\n \n # ─── write ─────────────────────────────────────────────\n \n def remember(self, content: str,\n title: str = None,\n importance: float = 0.5,\n tags: List[str] = None,\n source: str = \"agent\",\n namespace: str = None,\n path: str = None) -> AVMNode:\n \"\"\"\n writememory(supports append-only version)\n \n Args:\n content: memorycontent\n title: title(forgeneratepath)\n importance: importance (0-1)\n tags: tag\n source: source\n namespace: shared namespace (e.g., \"market\", \"projects\")\n path: specifiedpath(for append-only update)\n \"\"\"\n telemetry = get_telemetry()\n \n # Determine target path\n if path:\n target_path = path\n elif namespace:\n timestamp = utcnow().strftime(\"%Y%m%d_%H%M%S_%f\") # add microseconds\n slug = self._make_slug(title) if title else timestamp\n target_path = f\"{self.shared_prefix}/{namespace}/{slug}.md\"\n else:\n timestamp = utcnow().strftime(\"%Y%m%d_%H%M%S_%f\") # add microseconds\n slug = self._make_slug(title) if title else \"\"\n filename = f\"{timestamp}_{slug}.md\" if slug else f\"{timestamp}.md\"\n target_path = f\"{self.private_prefix}/{filename}\"\n \n with telemetry.track(\"remember\", self.agent_id, path=target_path) as t:\n # Check write permission\n if not self._can_write(target_path):\n raise PermissionError(f\"Agent {self.agent_id} cannot write to {target_path}\")\n \n # Check quota\n self._check_quota()\n \n # Format content\n full_content = self._format_content(content, title, tags)\n \n # Track tokens\n t[\"tokens_in\"] = self._estimate_tokens(content)\n \n meta = {\n \"importance\": importance,\n \"tags\": tags or [],\n \"source\": source,\n \"author\": self.agent_id,\n }\n \n # Use versioned write (if updating existing path)\n if path and hasattr(self.avm, '_versioned_memory'):\n node = self.avm._versioned_memory.write_version(\n path, full_content, self.agent_id, meta\n )\n else:\n node = self.avm.write(target_path, full_content, meta)\n \n # recordauditlog\n self._log_operation(\"write\", node.path)\n \n # Check for similar content (after write)\n similar = []\n if self.config.duplicate_check:\n similar = self._find_similar(content, exclude_path=node.path)\n \n t[\"results\"] = 1\n return RememberResult(node=node, similar=similar)\n \n def batch_remember(self, items: List[Dict[str, Any]]) -> List[AVMNode]:\n \"\"\"\n Batch write multiple memories efficiently.\n \n Args:\n items: List of dicts with keys: content, title, importance, tags\n \n Returns:\n List of created nodes\n \"\"\"\n telemetry = get_telemetry()\n results = []\n \n with telemetry.track(\"batch_remember\", self.agent_id, count=len(items)) as t:\n for item in items[:self.config.batch_size]:\n try:\n result = self.remember(\n content=item.get(\"content\", \"\"),\n title=item.get(\"title\"),\n importance=item.get(\"importance\", 0.5),\n tags=item.get(\"tags\"),\n )\n results.append(result.node)\n except Exception as e:\n # Continue on individual failures\n pass\n \n t[\"results\"] = len(results)\n \n return results\n \n def _find_similar(self, content: str, exclude_path: str = None, \n limit: int = 3) -> List[SimilarMatch]:\n \"\"\"Find similar existing memories using text overlap.\"\"\"\n matches = []\n try:\n # Extract keywords for FTS (skip numbers, short words)\n words = content.lower().split()\n keywords = [w for w in words if len(w) > 2 and not w.isdigit()]\n if not keywords:\n keywords = words[:3]\n \n # Search with single most important keyword to get candidates\n # Then filter locally with Jaccard\n candidates = set()\n for kw in keywords[:3]: # Top 3 keywords\n results = self.avm.store.search(kw, limit=limit * 2)\n for node, _ in results:\n candidates.add(node.path)\n \n # Calculate text overlap similarity for each candidate\n content_words = set(words)\n \n for path in candidates:\n if exclude_path and path == exclude_path:\n continue\n \n node = self.avm.store.get_node(path)\n if not node or not node.content:\n continue\n \n # Extract core content (after --- separator)\n node_text = node.content\n if '---' in node_text:\n parts = node_text.split('---', 1)\n if len(parts) > 1:\n node_text = parts[1]\n \n # Jaccard similarity on words\n node_words = set(node_text.lower().split())\n intersection = content_words & node_words\n union = content_words | node_words\n similarity = len(intersection) / len(union) if union else 0\n \n if similarity >= self.config.duplicate_threshold:\n # Get preview from core content\n preview = node_text.strip()[:100]\n matches.append(SimilarMatch(\n path=path,\n similarity=round(similarity, 3),\n preview=preview\n ))\n \n # Sort by similarity\n matches.sort(key=lambda m: m.similarity, reverse=True)\n matches = matches[:limit]\n except Exception:\n pass\n return matches\n \n def _make_slug(self, title: str) -> str:\n \"\"\"generate URL-safe slug\"\"\"\n if not title:\n return \"\"\n slug = re.sub(r'[^\\w\\s-]', '', title.lower())\n slug = re.sub(r'[\\s_]+', '_', slug)\n return slug[:30]\n \n def _format_content(self, content: str, title: str = None, \n tags: List[str] = None) -> str:\n \"\"\"Format memory content\"\"\"\n lines = []\n if title:\n lines.append(f\"# {title}\")\n lines.append(\"\")\n lines.append(f\"*Created: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\")\n if tags:\n lines.append(f\"*Tags: {', '.join(tags)}*\")\n lines.append(\"\")\n lines.append(\"---\")\n lines.append(\"\")\n lines.append(content)\n return \"\\n\".join(lines)\n \n def _can_write(self, path: str) -> bool:\n \"\"\"Check write permission\"\"\"\n if self._agent_config:\n return self._agent_config.namespaces.can_write(path)\n # Default: can only write to private namespace\n return path.startswith(self.private_prefix)\n \n def _can_read(self, path: str) -> bool:\n \"\"\"Check read permission\"\"\"\n if self._agent_config:\n return self._agent_config.namespaces.can_read(path)\n # default:Can read private and shared\n return path.startswith(self.private_prefix) or path.startswith(self.shared_prefix)\n \n def _check_quota(self):\n \"\"\"Check quota\"\"\"\n if hasattr(self.avm, '_agent_registry') and self._agent_config:\n from .multi_agent import QuotaEnforcer\n enforcer = QuotaEnforcer(self.avm.store)\n result = enforcer.check_quota(self.agent_id, self._agent_config.quota)\n if not result[\"ok\"]:\n raise RuntimeError(f\"Quota exceeded: {result['message']}\")\n \n def _log_operation(self, operation: str, path: str, details: Dict = None):\n \"\"\"recordauditlog\"\"\"\n if hasattr(self.avm, '_audit_log'):\n self.avm._audit_log.log(self.agent_id, operation, path, details)\n \n def share(self, path: str, namespace: str,\n new_name: str = None) -> AVMNode:\n \"\"\"\n Share memory to shared namespace\n \n Args:\n path: Original path (private memory)\n namespace: targetnamednamespace\n new_name: New filename (optional)\n \"\"\"\n # Read original node\n node = self.avm.read(path)\n if not node:\n raise ValueError(f\"Node not found: {path}\")\n \n # Generate new path\n if new_name:\n new_path = f\"{self.shared_prefix}/{namespace}/{new_name}\"\n else:\n filename = path.split(\"/\")[-1]\n new_path = f\"{self.shared_prefix}/{namespace}/{filename}\"\n \n # Update metadata\n meta = node.meta.copy()\n meta[\"shared_from\"] = path\n meta[\"shared_by\"] = self.agent_id\n meta[\"shared_at\"] = utcnow().isoformat()\n \n return self.avm.write(new_path, node.content, meta)\n \n # ─── update ─────────────────────────────────────────────\n \n def update_importance(self, path: str, importance: float):\n \"\"\"Update memory importance\"\"\"\n node = self.avm.read(path)\n if not node:\n raise ValueError(f\"Node not found: {path}\")\n \n # checkpermission\n if not path.startswith(self.private_prefix):\n if node.meta.get(\"agent\") != self.agent_id:\n raise PermissionError(f\"Cannot modify: {path}\")\n \n meta = node.meta.copy()\n meta[\"importance\"] = max(0.0, min(1.0, importance))\n \n return self.avm.write(path, node.content, meta)\n \n def mark_accessed(self, path: str):\n \"\"\"Mark memory as accessed (for recency calculation)\"\"\"\n node = self.avm.read(path)\n if node:\n meta = node.meta.copy()\n meta[\"last_accessed\"] = utcnow().isoformat()\n # Update meta only, not content\n self.avm.store._put_node_internal(\n AVMNode(path=path, content=node.content, meta=meta),\n save_diff=False\n )\n \n # ─── columntable ─────────────────────────────────────────────\n \n def list_private(self, limit: int = 100) -> List[AVMNode]:\n \"\"\"List private memories\"\"\"\n return self.avm.list(self.private_prefix, limit)\n \n def list_shared(self, namespace: str = None, \n limit: int = 100) -> List[AVMNode]:\n \"\"\"listsharedmemory\"\"\"\n prefix = f\"{self.shared_prefix}/{namespace}\" if namespace else self.shared_prefix\n return self.avm.list(prefix, limit)\n \n def stats(self) -> Dict[str, Any]:\n \"\"\"statisticsinfo\"\"\"\n private = self.list_private()\n shared = self.list_shared()\n \n return {\n \"agent_id\": self.agent_id,\n \"private_count\": len(private),\n \"shared_accessible\": len(shared),\n \"private_prefix\": self.private_prefix,\n \"config\": {\n \"max_tokens\": self.config.default_max_tokens,\n \"strategy\": self.config.default_strategy.value,\n }\n }\n \n # ─── Advanced Features ─────────────────────────────────────────\n \n def subscribe(self, pattern: str, callback) -> str:\n \"\"\"\n Subscribe to path changes\n \n Args:\n pattern: Glob mode (e.g., \"/memory/shared/market/*\")\n callback: callbackfunction (event) -> None\n \n Returns:\n subscribe ID(forcancelledsubscribe)\n \"\"\"\n from .advanced import SubscriptionManager\n \n if not hasattr(self.avm, '_subscription_manager'):\n self.avm._subscription_manager = SubscriptionManager()\n \n return self.avm._subscription_manager.subscribe(\n pattern, callback, subscriber_id=self.agent_id\n )\n \n def unsubscribe(self, pattern: str = None):\n \"\"\"cancelledsubscribe\"\"\"\n if hasattr(self.avm, '_subscription_manager'):\n self.avm._subscription_manager.unsubscribe(self.agent_id, pattern)\n \n def recall_recent(self, query: str, \n time_range: str = \"last_7d\",\n max_tokens: int = None) -> str:\n \"\"\"\n Retrieve memories within time range\n \n Args:\n query: Query text\n time_range: timerange (\"last_24h\", \"last_7d\", \"last_30d\", \"today\")\n max_tokens: Max token count\n \"\"\"\n from .advanced import TimeQuery\n \n time_query = TimeQuery(self.avm.store)\n recent_nodes = time_query.query(\n prefix=\"/memory\",\n time_range=time_range,\n limit=50\n )\n \n # filterpermission\n recent_nodes = [n for n in recent_nodes if self._can_read(n.path)]\n \n # Convert to scored nodes and synthesize\n max_tokens = max_tokens or self.config.default_max_tokens\n scored = []\n \n for node in recent_nodes:\n sn = ScoredNode(node=node)\n sn.importance_score = node.meta.get(\"importance\", 0.5)\n sn.recency_score = 1.0 # Already recent\n sn.relevance_score = 0.5 # Time query ignores relevance\n sn.final_score = sn.importance_score\n sn.summary = self._extract_summary(node)\n sn.estimated_tokens = self._estimate_tokens(sn.summary)\n scored.append(sn)\n \n selected = self._select_within_budget(scored, max_tokens)\n return self._compact_synthesis(selected, f\"{query} (time: {time_range})\", \n max_tokens, ScoringStrategy.IMPORTANCE)\n \n def remember_derived(self, content: str,\n derived_from: List[str],\n title: str = None,\n reasoning: str = None,\n **kwargs) -> AVMNode:\n \"\"\"\n Write derived memory, auto-establish source links\n \n Args:\n content: derived/conclusioncontent\n derived_from: sourcepathcolumntable\n title: title\n reasoning: Reasoning description\n \"\"\"\n from .advanced import DerivedLinkManager\n \n # writememory\n node = self.remember(content, title=title, **kwargs)\n \n # Establish derived links\n link_mgr = DerivedLinkManager(self.avm.store)\n link_mgr.link_derived(node.path, derived_from, reasoning)\n \n return node\n \n def check_duplicate(self, content: str, \n threshold: float = 0.85) -> \"DedupeResult\":\n \"\"\"\n Check if duplicate with existing memory\n \n Args:\n content: content\n threshold: Similarity threshold (0.85 conservative, 0.95 strict)\n \n Returns:\n DedupeResult\n \"\"\"\n from .advanced import SemanticDeduplicator, DedupeResult\n \n embedding_store = getattr(self.avm, '_embedding_store', None)\n deduper = SemanticDeduplicator(self.avm.store, embedding_store)\n \n return deduper.check_duplicate(\n content, \n prefix=self.private_prefix,\n threshold=threshold\n )\n \n def remember_if_new(self, content: str, \n threshold: float = 0.85,\n **kwargs) -> Optional[AVMNode]:\n \"\"\"\n Write only if content not duplicate\n \n Returns:\n AVMNode if written, None if duplicate\n \"\"\"\n result = self.check_duplicate(content, threshold)\n \n if result.is_duplicate:\n return None\n \n return self.remember(content, **kwargs)\n \n def get_cold_memories(self, threshold: float = 0.1,\n limit: int = 20) -> List[AVMNode]:\n \"\"\"\n Get decayed cold memories\n \n Args:\n threshold: Weight threshold after decay\n limit: maxcount\n \"\"\"\n from .advanced import MemoryDecay\n \n decay = MemoryDecay(self.avm.store)\n return decay.get_cold_memories(\n prefix=self.private_prefix,\n threshold=threshold,\n limit=limit\n )\n \n def compact_versions(self, path: str, \n keep_recent: int = 3) -> \"CompactionResult\":\n \"\"\"\n Compress path history versions\n \n Args:\n path: Path to compress\n keep_recent: Keep recent N versions\n \"\"\"\n from .advanced import MemoryCompactor\n \n compactor = MemoryCompactor(self.avm.store)\n return compactor.compact(path, keep_recent)\n \n # ─── Tag System ─────────────────────────────────────────\n \n def by_tag(self, tag: str, limit: int = 100) -> List[AVMNode]:\n \"\"\"Get memories by tag\"\"\"\n from .advanced import TagManager\n \n tag_mgr = TagManager(self.avm.store)\n \n # Search private and shared namespaces\n private_nodes = tag_mgr.by_tag(tag, prefix=self.private_prefix, limit=limit)\n shared_nodes = tag_mgr.by_tag(tag, prefix=self.shared_prefix, limit=limit)\n \n all_nodes = private_nodes + shared_nodes\n \n # Filter by permission and dedupe\n seen = set()\n result = []\n for n in all_nodes:\n if n.path not in seen and self._can_read(n.path):\n seen.add(n.path)\n result.append(n)\n \n return result[:limit]\n \n def tag_cloud(self) -> Dict[str, int]:\n \"\"\"Get tag cloud (frequency distribution)\"\"\"\n from .advanced import TagManager\n \n tag_mgr = TagManager(self.avm.store)\n \n # Merge tags from private and shared namespaces\n private_cloud = tag_mgr.tag_cloud(prefix=self.private_prefix)\n shared_cloud = tag_mgr.tag_cloud(prefix=self.shared_prefix)\n \n # Merge counts\n combined = {}\n for tag, count in private_cloud.items():\n combined[tag] = combined.get(tag, 0) + count\n for tag, count in shared_cloud.items():\n combined[tag] = combined.get(tag, 0) + count\n \n return dict(sorted(combined.items(), key=lambda x: x[1], reverse=True))\n \n def suggest_tags(self, content: str, top_k: int = 5) -> List[str]:\n \"\"\"contentrecommendationtag\"\"\"\n from .advanced import TagManager\n \n tag_mgr = TagManager(self.avm.store)\n return tag_mgr.suggest_tags(content, top_k)\n \n # ─── Access Statistics ─────────────────────────────────────────\n \n def hot_memories(self, days: int = 7, limit: int = 10) -> List[Tuple[str, int]]:\n \"\"\"Get hot memories (high access)\"\"\"\n from .advanced import AccessStats\n \n stats = AccessStats(self.avm.store)\n return stats.hot_paths(days, limit)\n \n def cold_memories(self, days: int = 30, limit: int = 20) -> List[AVMNode]:\n \"\"\"Get cold memories (rarely accessed)\"\"\"\n from .advanced import AccessStats\n \n stats = AccessStats(self.avm.store)\n nodes = stats.cold_paths(days, prefix=\"/memory\", limit=limit)\n return [n for n in nodes if self._can_read(n.path)]\n \n def my_activity(self, days: int = 7) -> Dict[str, int]:\n \"\"\"Get my activity stats\"\"\"\n from .advanced import AccessStats\n \n stats = AccessStats(self.avm.store)\n return stats.agent_activity(self.agent_id, days)\n \n # ─── export/snapshot ─────────────────────────────────────────\n \n def export(self, format: str = \"jsonl\") -> str:\n \"\"\"\n Export my memories\n \n Args:\n format: \"jsonl\" or \"markdown\"\n \"\"\"\n from .advanced import ExportManager\n \n export_mgr = ExportManager(self.avm.store)\n \n if format == \"markdown\":\n return export_mgr.export_markdown(\n prefix=self.private_prefix,\n agent_id=self.agent_id\n )\n else:\n return export_mgr.export_jsonl(\n prefix=self.private_prefix,\n agent_id=self.agent_id\n )\n \n def import_memories(self, jsonl: str) -> int:\n \"\"\"\n importmemory\n \n Args:\n jsonl: Memory data in JSONL format\n \n Returns:\n importcount\n \"\"\"\n from .advanced import ExportManager\n \n export_mgr = ExportManager(self.avm.store)\n return export_mgr.import_jsonl(jsonl)\n \n # ─── Navigation & Discovery ─────────────────────────────────────────\n \n def browse(self, path: str = \"/memory\", depth: int = 2) -> Dict[str, Any]:\n \"\"\"\n Browse memory structure like a directory tree.\n Helps agent discover memories without knowing exact keywords.\n \n Args:\n path: Starting path (default: /memory)\n depth: How deep to traverse (default: 2)\n \n Returns:\n Tree structure with paths and summaries\n \"\"\"\n nodes = self.avm.list(path, limit=100)\n \n # Build tree structure\n tree = {\"path\": path, \"children\": {}, \"count\": 0}\n \n for node in nodes:\n if not self._can_read(node.path):\n continue\n \n # Get relative path parts\n rel_path = node.path[len(path):].lstrip(\"/\")\n parts = rel_path.split(\"/\")\n \n # Only include up to depth levels\n if len(parts) > depth:\n parts = parts[:depth]\n \n # Build nested structure\n current = tree\n for i, part in enumerate(parts):\n if part not in current[\"children\"]:\n current[\"children\"][part] = {\n \"path\": path + \"/\" + \"/\".join(parts[:i+1]),\n \"children\": {},\n \"count\": 0\n }\n current = current[\"children\"][part]\n current[\"count\"] += 1\n \n return self._format_tree(tree, depth=0)\n \n def _format_tree(self, tree: Dict, depth: int = 0) -> str:\n \"\"\"Format tree as readable string\"\"\"\n lines = []\n indent = \" \" * depth\n \n for name, subtree in sorted(tree.get(\"children\", {}).items()):\n count = subtree.get(\"count\", 0)\n icon = \"📁\" if subtree.get(\"children\") else \"📄\"\n lines.append(f\"{indent}{icon} {name} ({count})\")\n \n if subtree.get(\"children\"):\n lines.append(self._format_tree(subtree, depth + 1))\n \n return \"\\n\".join(lines)\n \n def explore(self, path: str, depth: int = 2) -> str:\n \"\"\"\n Explore from a memory node via knowledge graph.\n Follows links to discover related memories.\n \n Args:\n path: Starting node path\n depth: How many hops to follow (default: 2)\n \n Returns:\n Related memories with relationship types\n \"\"\"\n if not self._can_read(path):\n return f\"Cannot access: {path}\"\n \n # Get the starting node\n node = self.avm.read(path)\n if not node:\n return f\"Not found: {path}\"\n \n # BFS to explore graph\n visited = {path}\n current_level = [path]\n results = [f\"## Starting from: {path}\\n{node.content[:200]}...\\n\"]\n \n for d in range(depth):\n next_level = []\n level_results = []\n \n for p in current_level:\n edges = self.avm.links(p)\n for edge in edges:\n target = edge.source if edge.source != p else edge.target\n if target not in visited and self._can_read(target):\n visited.add(target)\n next_level.append(target)\n \n target_node = self.avm.read(target)\n if target_node:\n rel_type = edge.edge_type.value if hasattr(edge.edge_type, 'value') else str(edge.edge_type)\n preview = target_node.content[:100].replace(\"\\n\", \" \")\n level_results.append(f\" [{rel_type}] {target}\\n {preview}...\")\n \n if level_results:\n results.append(f\"\\n### Hop {d + 1}:\")\n results.extend(level_results)\n \n current_level = next_level\n if not current_level:\n break\n \n if len(results) == 1:\n results.append(\"\\nNo linked memories found. Try creating links with avm.link()\")\n \n return \"\\n\".join(results)\n \n def topics(self, limit: int = 10) -> str:\n \"\"\"\n Get high-level topic overview based on tags and paths.\n Helps agent understand what's in memory without keywords.\n \n Args:\n limit: Max topics to return\n \n Returns:\n Topic summary with counts\n \"\"\"\n # Get tag cloud\n cloud = self.tag_cloud()\n \n # Get path prefixes\n nodes = self.avm.list(\"/memory\", limit=200)\n prefix_counts = {}\n \n for node in nodes:\n if not self._can_read(node.path):\n continue\n # Extract second-level path as category\n parts = node.path.split(\"/\")\n if len(parts) >= 3:\n category = parts[2] # e.g., \"private\", \"shared\", \"market\", etc.\n prefix_counts[category] = prefix_counts.get(category, 0) + 1\n \n # Format output\n lines = [\"## Memory Topics\\n\"]\n \n lines.append(\"### By Category:\")\n for cat, count in sorted(prefix_counts.items(), key=lambda x: x[1], reverse=True)[:limit]:\n lines.append(f\" 📁 {cat}: {count} memories\")\n \n lines.append(\"\\n### By Tag:\")\n for tag, count in list(cloud.items())[:limit]:\n lines.append(f\" 🏷️ {tag}: {count} occurrences\")\n \n return \"\\n\".join(lines)\n \n def timeline(self, days: int = 7, limit: int = 20) -> str:\n \"\"\"\n View memories by time.\n Helps recall \"what did I observe/learn recently?\"\n \n Args:\n days: How many days back to look\n limit: Max memories to return\n \n Returns:\n Timeline of recent memories\n \"\"\"\n cutoff = utcnow() - timedelta(days=days)\n \n nodes = self.avm.query_time(\n prefix=\"/memory\",\n time_range=f\"last_{days}d\"\n )\n \n # Filter by permission\n accessible = [n for n in nodes if self._can_read(n.path)][:limit]\n \n if not accessible:\n return f\"No memories in the last {days} days.\"\n \n # Group by date\n by_date = {}\n for node in accessible:\n date_str = node.created_at.strftime(\"%Y-%m-%d\")\n if date_str not in by_date:\n by_date[date_str] = []\n by_date[date_str].append(node)\n \n # Format output\n lines = [f\"## Timeline (last {days} days)\\n\"]\n \n for date_str in sorted(by_date.keys(), reverse=True):\n lines.append(f\"### {date_str}\")\n for node in by_date[date_str]:\n time_str = node.created_at.strftime(\"%H:%M\")\n title = node.meta.get(\"title\", node.path.split(\"/\")[-1])\n preview = node.content[:60].replace(\"\\n\", \" \")\n lines.append(f\" [{time_str}] {title}: {preview}...\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":44562,"content_sha256":"f892ed7c9f5371d5de3138ac337a8088afa0dafd67af5763bcc0760d7f8e107a"},{"filename":"avm/api_server.py","content":"\"\"\"\nAVM HTTP API Server\n轻量 FastAPI server,提供 REST 接口访问 AVM 记忆。\n供 Docker 容器和 Windows 客户端使用。\n\"\"\"\nfrom __future__ import annotations\nimport os\nfrom pathlib import Path\n\ntry:\n from fastapi import FastAPI, HTTPException\n from fastapi.middleware.cors import CORSMiddleware\n import uvicorn\n HAS_FASTAPI = True\nexcept ImportError:\n HAS_FASTAPI = False\n\nfrom .core import AVM\n\napp = None # lazy init\n\n\ndef create_app(agent_id: str = \"default\") -> \"FastAPI\":\n if not HAS_FASTAPI:\n raise ImportError(\"Install fastapi and uvicorn: pip install fastapi uvicorn\")\n\n from fastapi import FastAPI\n from fastapi.middleware.cors import CORSMiddleware\n from pydantic import BaseModel\n\n avm = AVM(agent_id=agent_id)\n app = FastAPI(title=\"AVM API\", version=\"1.0\")\n app.add_middleware(CORSMiddleware, allow_origins=[\"*\"], allow_methods=[\"*\"], allow_headers=[\"*\"])\n\n class RememberRequest(BaseModel):\n content: str\n importance: float = 0.5\n tags: list[str] = []\n\n class RecallRequest(BaseModel):\n query: str\n max_tokens: int = 500\n\n @app.get(\"/health\")\n def health():\n return {\"status\": \"ok\", \"agent\": agent_id}\n\n @app.post(\"/remember\")\n def remember(req: RememberRequest):\n \"\"\"Store a memory.\"\"\"\n memory = avm.agent_memory(agent_id)\n node = memory.remember(req.content, importance=req.importance, tags=req.tags)\n return {\"path\": str(node.path)}\n\n @app.post(\"/recall\")\n def recall(req: RecallRequest):\n \"\"\"Retrieve relevant memories.\"\"\"\n memory = avm.agent_memory(agent_id)\n results = memory.recall(req.query, max_tokens=req.max_tokens)\n return {\"results\": results, \"query\": req.query}\n\n @app.get(\"/stats\")\n def stats():\n \"\"\"Memory statistics.\"\"\"\n return avm.stats()\n\n return app\n\n\ndef main():\n \"\"\"Entry point for `avm serve` CLI command.\"\"\"\n import argparse\n parser = argparse.ArgumentParser(description=\"AVM HTTP API Server\")\n parser.add_argument(\"--agent\", default=os.environ.get(\"AVM_AGENT\", \"default\"))\n parser.add_argument(\"--host\", default=\"0.0.0.0\")\n parser.add_argument(\"--port\", type=int, default=8765)\n args = parser.parse_args()\n\n app = create_app(args.agent)\n import uvicorn\n uvicorn.run(app, host=args.host, port=args.port)\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":2416,"content_sha256":"50d0c69d1684c2507b0d9d313ab98a7219b6a3c206b69dd776fca23c2789ad01"},{"filename":"avm/cli.py","content":"#!/usr/bin/env python3\n\"\"\"\navm/cli.py - AVM command line interface\n\nConfig-driven virtual filesystem CLI\n\nusage:\n vfs read /market/indicators/AAPL.md\n vfs write /memory/lesson.md --content \"Today learned...\"\n vfs search \"RSI oversold\"\n vfs links /research/MSFT.md\n vfs stats\n\"\"\"\n\nimport argparse\nimport json\nimport sys\nimport platform\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom .core import AVM\nfrom .config import load_config\nfrom .node import AVMNode, NodeType\nfrom .graph import EdgeType\n\nIS_WINDOWS = sys.platform == \"win32\"\n\n# Alias for backwards compatibility\nVFS = AVM\n\n\ndef get_vfs(config_path: Optional[str] = None, db_path: Optional[str] = None) -> AVM:\n \"\"\"Get VFS instance\"\"\"\n config = load_config(config_path)\n if db_path:\n config.db_path = db_path\n return VFS(config)\n\n\ndef cmd_read(args):\n \"\"\"readnode\"\"\"\n vfs = get_vfs(args.config, args.db)\n path = args.path\n \n try:\n if args.as_of:\n node = vfs.read_at_time(path, args.as_of)\n elif args.version:\n node = vfs.read_at_version(path, args.version)\n else:\n node = vfs.read(path, force_refresh=args.refresh)\n except PermissionError as e:\n print(f\"Permission denied: {e}\", file=sys.stderr)\n return 1\n \n if node is None:\n print(f\"Not found: {path}\", file=sys.stderr)\n return 1\n \n if args.json:\n print(json.dumps(node.to_dict(), indent=2, default=str))\n else:\n if args.meta:\n print(f\"# {path}\")\n print(f\"# Version: {node.version}\")\n print(f\"# Updated: {node.updated_at}\")\n print(f\"# Meta: {json.dumps(node.meta)}\")\n print()\n print(node.content)\n \n return 0\n\n\ndef cmd_write(args):\n \"\"\"writenode\"\"\"\n vfs = get_vfs(args.config, args.db)\n path = args.path\n \n # Get content\n if args.content:\n content = args.content\n elif args.file:\n content = Path(args.file).read_text()\n else:\n content = sys.stdin.read()\n \n # Parse metadata\n meta = {}\n if args.meta:\n meta = json.loads(args.meta)\n \n try:\n saved = vfs.write(path, content, meta)\n except PermissionError as e:\n print(f\"Permission denied: {e}\", file=sys.stderr)\n return 1\n \n if args.json:\n print(json.dumps(saved.to_dict(), indent=2, default=str))\n else:\n print(f\"Saved: {saved.path} (v{saved.version})\")\n \n return 0\n\n\ndef cmd_mv(args):\n \"\"\"Move/rename a node or an entire prefix tree.\"\"\"\n vfs = get_vfs(args.config, args.db)\n try:\n n = vfs.rename(args.src, args.dst)\n print(f\"Moved {n} node(s): {args.src} → {args.dst}\")\n return 0\n except FileNotFoundError as e:\n print(f\"mv: {e}\", file=sys.stderr)\n return 1\n except PermissionError as e:\n print(f\"mv: permission denied: {e}\", file=sys.stderr)\n return 1\n\n\ndef cmd_delete(args):\n \"\"\"deletenode\"\"\"\n vfs = get_vfs(args.config, args.db)\n path = args.path\n \n try:\n if vfs.delete(path):\n print(f\"Deleted: {path}\")\n return 0\n else:\n print(f\"Not found: {path}\", file=sys.stderr)\n return 1\n except PermissionError as e:\n print(f\"Permission denied: {e}\", file=sys.stderr)\n return 1\n\n\ndef cmd_list(args):\n \"\"\"listnode\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n nodes = vfs.list(args.prefix, limit=args.limit)\n \n if args.json:\n print(json.dumps([n.to_dict() for n in nodes], indent=2, default=str))\n else:\n for node in nodes:\n size = len(node.content)\n print(f\"{node.path}\\tv{node.version}\\t{size}B\\t{node.updated_at.strftime('%Y-%m-%d %H:%M')}\")\n \n return 0\n\n\ndef cmd_links(args):\n \"\"\"View node relationships\"\"\"\n vfs = get_vfs(args.config, args.db)\n path = args.path\n \n edges = vfs.links(path, direction=args.direction)\n \n if args.json:\n print(json.dumps([\n {\n \"source\": e.source,\n \"target\": e.target,\n \"type\": e.edge_type.value,\n \"weight\": e.weight,\n }\n for e in edges\n ], indent=2))\n else:\n if not edges:\n print(f\"No links for {path}\")\n else:\n print(f\"Links for {path}:\")\n for e in edges:\n arrow = \"-->\" if e.source == path else \"\u003c--\"\n other = e.target if e.source == path else e.source\n print(f\" {arrow} [{e.edge_type.value}] {other}\")\n \n return 0\n\n\ndef cmd_link(args):\n \"\"\"addrelated\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n edge_type = EdgeType(args.type)\n edge = vfs.link(args.source, args.target, edge_type, args.weight)\n \n print(f\"Added: {edge}\")\n return 0\n\n\ndef cmd_search(args):\n \"\"\"full-textsearch\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n results = vfs.search(args.query, limit=args.limit)\n \n if args.json:\n print(json.dumps([\n {\"path\": n.path, \"score\": s, \"snippet\": n.content[:200]}\n for n, s in results\n ], indent=2))\n else:\n if not results:\n print(\"No results found.\")\n else:\n for node, score in results:\n snippet = node.content[:100].replace(\"\\n\", \" \")\n print(f\"[{score:.2f}] {node.path}\")\n print(f\" {snippet}...\")\n print()\n \n return 0\n\n\ndef cmd_history(args):\n \"\"\"View change history\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n diffs = vfs.history(args.path, limit=args.limit)\n \n if args.json:\n print(json.dumps([d.to_dict() for d in diffs], indent=2, default=str))\n else:\n for d in diffs:\n print(f\"v{d.version} [{d.change_type}] {d.changed_at.strftime('%Y-%m-%d %H:%M:%S')}\")\n if args.verbose and d.diff_content:\n print(d.diff_content[:500])\n print()\n \n return 0\n\n\ndef cmd_warmup(args):\n \"\"\"Pre-load embedding model for faster recall\"\"\"\n import time\n import os\n import warnings\n import io\n import contextlib\n \n # Suppress all warnings\n warnings.filterwarnings('ignore')\n os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'\n os.environ['TRANSFORMERS_VERBOSITY'] = 'error'\n \n start = time.time()\n \n # Suppress stderr during model load\n with contextlib.redirect_stderr(io.StringIO()):\n vfs = get_vfs(args.config, args.db)\n \n if vfs._embedding_store is None:\n print(\"Embedding not enabled. Set embedding.enabled=true in config.yaml\")\n return\n \n # Warmup the backend (suppress stderr)\n backend = vfs._embedding_store.backend\n with contextlib.redirect_stderr(io.StringIO()):\n if hasattr(backend, 'warmup'):\n backend.warmup()\n \n elapsed = (time.time() - start) * 1000\n print(f\"✓ Embedding model loaded in {elapsed:.0f}ms\")\n print(f\" Model: {getattr(backend, 'model_name', 'unknown')}\")\n print(f\" Dimension: {backend.dimension}\")\n\n\ndef cmd_stats(args):\n \"\"\"storagestatistics\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n stats = vfs.stats()\n \n if args.json:\n print(json.dumps(stats, indent=2))\n else:\n try:\n from rich.console import Console\n from rich.table import Table\n from rich.panel import Panel\n \n console = Console()\n \n # Main stats panel\n main_table = Table(show_header=False, box=None)\n main_table.add_column(\"Key\", style=\"cyan\")\n main_table.add_column(\"Value\", style=\"green\")\n main_table.add_row(\"📁 Database\", stats['db_path'])\n main_table.add_row(\"📄 Nodes\", str(stats['nodes']))\n main_table.add_row(\"🔗 Edges\", str(stats['edges']))\n main_table.add_row(\"📝 Diffs\", str(stats['diffs']))\n \n console.print(Panel(main_table, title=\"[bold]AVM Statistics[/bold]\", border_style=\"blue\"))\n \n # By prefix table\n if stats.get(\"by_prefix\"):\n prefix_table = Table(title=\"By Prefix\", show_header=True)\n prefix_table.add_column(\"Prefix\", style=\"cyan\")\n prefix_table.add_column(\"Count\", style=\"green\", justify=\"right\")\n \n for prefix, count in sorted(stats[\"by_prefix\"].items()):\n prefix_table.add_row(prefix, str(count))\n \n console.print(prefix_table)\n except ImportError:\n # Fallback to plain text\n print(f\"VFS Statistics\")\n print(f\"==============\")\n print(f\"Database: {stats['db_path']}\")\n print(f\"Nodes: {stats['nodes']}\")\n print(f\"Edges: {stats['edges']}\")\n print(f\"Diffs: {stats['diffs']}\")\n print()\n print(\"By prefix:\")\n for prefix, count in stats.get(\"by_prefix\", {}).items():\n print(f\" {prefix}: {count}\")\n \n return 0\n\n\n\n\n\ndef cmd_refresh(args):\n \"\"\"refresh live node\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n if args.all:\n print(\"Refreshing all live nodes...\")\n nodes = vfs.list(\"/live\", limit=1000)\n count = 0\n for node in nodes:\n try:\n refreshed = vfs.read(node.path, force_refresh=True)\n if refreshed:\n count += 1\n print(f\" {node.path}\")\n except Exception as e:\n print(f\" {node.path} - Error: {e}\")\n print(f\"Refreshed {count} nodes\")\n elif args.path:\n try:\n node = vfs.read(args.path, force_refresh=True)\n if node:\n print(f\"Refreshed: {node.path} (v{node.version})\")\n else:\n print(f\"Not found: {args.path}\", file=sys.stderr)\n return 1\n except PermissionError as e:\n print(f\"Permission denied: {e}\", file=sys.stderr)\n return 1\n else:\n # listexpirednode\n nodes = vfs.list(\"/live\", limit=1000)\n expired = [n for n in nodes if n.is_expired]\n \n if expired:\n print(f\"Expired nodes ({len(expired)}):\")\n for node in expired:\n print(f\" {node.path} (updated: {node.updated_at})\")\n else:\n print(\"No expired nodes.\")\n \n return 0\n\n\ndef cmd_config(args):\n \"\"\"Show configuration\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n if args.json:\n print(json.dumps(vfs.config.to_dict(), indent=2))\n else:\n print(\"VFS Configuration\")\n print(\"=================\")\n print()\n print(\"Providers:\")\n for p in vfs.config.providers:\n print(f\" {p.pattern} -> {p.type} (ttl={p.ttl}s)\")\n print()\n print(\"Permissions:\")\n for r in vfs.config.permissions:\n print(f\" {r.pattern} -> {r.access}\")\n print()\n print(f\"Default access: {vfs.config.default_access}\")\n print(f\"Default TTL: {vfs.config.default_ttl}s\")\n \n return 0\n\n\ndef cmd_retrieve(args):\n \"\"\"Linked retrieval\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n result = vfs.retrieve(\n args.query,\n k=args.limit,\n expand_graph=not args.no_graph,\n graph_depth=args.depth,\n )\n \n if args.json:\n print(json.dumps({\n \"query\": result.query,\n \"nodes\": [{\"path\": n.path, \"score\": result.scores.get(n.path, 0)} \n for n in result.nodes],\n \"sources\": result.sources,\n \"edges\": result.graph_edges,\n }, indent=2))\n else:\n print(f\"Query: {result.query}\")\n print(f\"Found: {len(result.nodes)} nodes\")\n print()\n \n for node in result.nodes:\n score = result.get_score(node.path)\n source = result.get_source(node.path)\n badge = {\"semantic\": \"🎯\", \"fts\": \"📝\", \"graph\": \"🔗\"}.get(source, \"\")\n print(f\"{badge} [{score:.2f}] {node.path}\")\n \n if result.graph_edges:\n print()\n print(\"Graph edges:\")\n for src, tgt, etype in result.graph_edges:\n print(f\" {src} --[{etype}]--> {tgt}\")\n \n return 0\n\n\ndef cmd_synthesize(args):\n \"\"\"Generate synthesized document\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n doc = vfs.synthesize(\n args.query,\n k=args.limit,\n title=args.title,\n )\n \n print(doc)\n\n\ndef cmd_memory_recall(args):\n \"\"\"Agent Memory retrieve\"\"\"\n import io\n import contextlib\n from .agent_memory import ScoringStrategy\n \n # Suppress progress bars by default (unless --verbose)\n if not getattr(args, 'verbose', False):\n # Redirect stderr to suppress progress bars and warnings\n import warnings\n warnings.filterwarnings('ignore')\n # Suppress HF/embedding progress bars via environment\n import os\n os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1'\n os.environ['TRANSFORMERS_VERBOSITY'] = 'error'\n \n vfs = get_vfs(args.config, args.db)\n memory = vfs.agent_memory(args.agent)\n \n strategy = ScoringStrategy(args.strategy) if args.strategy else None\n \n min_rel = getattr(args, 'min_relevance', 0.3)\n \n # Capture and filter output if not verbose\n if not getattr(args, 'verbose', False):\n # Run with stderr suppressed\n with contextlib.redirect_stderr(io.StringIO()):\n result = memory.recall(\n args.query,\n max_tokens=args.max_tokens,\n strategy=strategy,\n include_shared=not args.private_only,\n min_relevance=min_rel,\n )\n else:\n result = memory.recall(\n args.query,\n max_tokens=args.max_tokens,\n strategy=strategy,\n include_shared=not args.private_only,\n min_relevance=min_rel,\n )\n \n print(result)\n\n\ndef cmd_memory_remember(args):\n \"\"\"write Agent Memory\"\"\"\n vfs = get_vfs(args.config, args.db)\n memory = vfs.agent_memory(args.agent)\n \n # Get content\n if args.content:\n content = args.content\n elif args.file:\n content = Path(args.file).read_text()\n else:\n content = sys.stdin.read()\n \n tags = args.tags.split(\",\") if args.tags else None\n \n node = memory.remember(\n content,\n title=args.title,\n importance=args.importance,\n tags=tags,\n )\n \n print(f\"Remembered: {node.path} (importance={args.importance})\")\n\n\ndef cmd_context(args):\n \"\"\"Generate context injection for agent prompts.\n \n Outputs formatted memory context suitable for system prompts.\n \"\"\"\n vfs = get_vfs(args.config, args.db)\n memory = vfs.agent_memory(args.agent)\n \n # Build context from multiple sources\n sections = []\n \n # 1. Recent memories (last 24h)\n recent = memory.recall(\n \"recent activity\",\n max_tokens=args.recent_tokens,\n prefixes=[f\"/memory/private/{args.agent}/\"],\n )\n if recent.strip():\n sections.append(f\"## Recent Activity\\n{recent}\")\n \n # 2. User preferences (if asked)\n if args.preferences:\n prefs = memory.recall(\n \"user preferences settings\",\n max_tokens=args.pref_tokens,\n )\n if prefs.strip():\n sections.append(f\"## User Preferences\\n{prefs}\")\n \n # 3. Lessons learned (high importance)\n if args.lessons:\n lessons = memory.recall(\n \"lesson learned mistake important\",\n max_tokens=args.lesson_tokens,\n )\n if lessons.strip():\n sections.append(f\"## Lessons Learned\\n{lessons}\")\n \n # 4. Custom query\n if args.query:\n custom = memory.recall(args.query, max_tokens=args.query_tokens)\n if custom.strip():\n sections.append(f\"## Relevant Context\\n{custom}\")\n \n # Output\n if not sections:\n if not args.quiet:\n print(\"# No memory context found\", file=sys.stderr)\n return 0\n \n output = \"\\n\\n\".join(sections)\n \n if args.format == \"markdown\":\n print(output)\n elif args.format == \"xml\":\n print(f\"\u003cmemory_context agent=\\\"{args.agent}\\\">\\n{output}\\n\u003c/memory_context>\")\n elif args.format == \"json\":\n print(json.dumps({\"agent\": args.agent, \"context\": output}))\n\n\ndef cmd_ask(args):\n \"\"\"Ask the Librarian for information routing.\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n from .librarian import Librarian, PrivacyPolicy\n \n privacy = PrivacyPolicy(args.privacy)\n librarian = Librarian(vfs.store, vfs.config, privacy)\n \n response = librarian.query(args.agent, args.query, limit=args.limit)\n \n if args.json:\n print(json.dumps(response.to_dict(), indent=2, default=str))\n return\n \n print(f\"Query: {args.query}\")\n print(f\"Requester: {args.agent}\")\n print(f\"Matches: {response.accessible_count}/{response.total_matches} accessible\")\n print()\n \n if response.accessible:\n print(\"## Accessible Content\")\n for node in response.accessible[:5]:\n print(f\" • {node.path}\")\n if node.content:\n snippet = node.content[:100].replace(\"\\n\", \" \")\n print(f\" {snippet}...\")\n print()\n \n if response.suggestions:\n print(\"## Collaboration Suggestions\")\n for s in response.suggestions[:5]:\n print(f\" • Ask **{s.agent}** about: {s.topic}\")\n if s.reason:\n print(f\" ({s.reason})\")\n print()\n\n\ndef cmd_who_knows(args):\n \"\"\"Find agents who know about a topic.\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n from .librarian import Librarian\n \n librarian = Librarian(vfs.store, vfs.config)\n agents = librarian.who_knows(args.topic, limit=args.limit)\n \n if args.json:\n print(json.dumps([a.to_dict() for a in agents], indent=2))\n return\n \n print(f\"Agents who might know about '{args.topic}':\")\n for agent in agents:\n caps = \", \".join(agent.capabilities) if agent.capabilities else \"general\"\n print(f\" • {agent.id} ({caps}) - {agent.memory_count} memories\")\n\n\ndef cmd_gossip(args):\n \"\"\"Gossip protocol commands\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n from .topic_index import TopicIndex\n from .gossip import GossipStore\n \n topic_index = TopicIndex(vfs.store)\n gossip = GossipStore(vfs.store, topic_index, args.agent)\n \n if args.gossip_action == \"publish\":\n digest = gossip.publish_digest()\n print(f\"Published digest v{digest.version} with {len(digest.topics)} topics\")\n \n elif args.gossip_action == \"refresh\":\n gossip.refresh()\n print(f\"Refreshed. Known agents: {len(gossip.agents())}\")\n for agent in gossip.agents():\n d = gossip.get_digest(agent)\n print(f\" • {agent} (v{d.version}, {len(d.topics)} topics)\")\n \n elif args.gossip_action == \"who-knows\":\n results = gossip.who_knows(args.topic)\n if not results:\n print(f\"No agents found for topic: {args.topic}\")\n else:\n print(f\"Agents who might know about '{args.topic}':\")\n for agent, confidence in results:\n print(f\" • {agent} ({confidence:.0%} confidence)\")\n \n elif args.gossip_action == \"stats\":\n stats = gossip.stats()\n print(f\"Gossip Protocol Stats\")\n print(f\"=====================\")\n print(f\"Known agents: {stats['known_agents']}\")\n print(f\"Own version: {stats['own_version']}\")\n print()\n for a in stats['agents']:\n print(f\" • {a['id']}: v{a['version']}, {a['topics']} topics, {a['memories']} memories, {a['age_hours']:.1f}h old\")\n \n else:\n print(\"Usage: avm gossip {publish|refresh|who-knows|stats}\")\n\n\ndef cmd_agents(args):\n \"\"\"List all agents in the system.\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n from .librarian import Librarian\n \n librarian = Librarian(vfs.store, vfs.config)\n \n if args.json:\n print(json.dumps(librarian.directory(), indent=2))\n return\n \n directory = librarian.directory()\n \n print(f\"Agents ({directory['total_agents']} total):\")\n for agent in directory['agents']:\n caps = \", \".join(agent['capabilities']) if agent['capabilities'] else \"general\"\n print(f\" • {agent['id']} ({caps})\")\n \n print()\n print(\"By Capability:\")\n for cap, agent_ids in directory['by_capability'].items():\n print(f\" • {cap}: {', '.join(agent_ids)}\")\n\n\ndef cmd_memory_stats(args):\n \"\"\"Agent Memory statistics\"\"\"\n vfs = get_vfs(args.config, args.db)\n memory = vfs.agent_memory(args.agent)\n \n stats = memory.stats()\n \n if args.json:\n print(json.dumps(stats, indent=2))\n else:\n print(f\"Agent Memory: {stats['agent_id']}\")\n print(f\"================\")\n print(f\"Private memories: {stats['private_count']}\")\n print(f\"Shared accessible: {stats['shared_accessible']}\")\n print(f\"Private prefix: {stats['private_prefix']}\")\n print(f\"Max tokens: {stats['config']['max_tokens']}\")\n print(f\"Strategy: {stats['config']['strategy']}\")\n\n\ndef cmd_semantic(args):\n \"\"\"Semantic search using embedding\"\"\"\n vfs = get_vfs(args.config, args.db)\n\n if vfs._embedding_store is None:\n print(\"Embedding not enabled. Set embedding.enabled=true in config.yaml\", file=sys.stderr)\n return 1\n\n prefix = None\n if args.agent:\n prefix = f\"/memory/private/{args.agent}\"\n\n results = vfs._embedding_store.search(args.query, k=args.limit, prefix=prefix)\n\n if args.json:\n print(json.dumps([\n {\"path\": n.path, \"score\": s, \"snippet\": n.content[:200]}\n for n, s in results\n ], indent=2))\n else:\n if not results:\n print(\"No results found.\")\n else:\n for node, score in results:\n snippet = node.content[:100].replace(\"\\n\", \" \")\n print(f\"[{score:.4f}] {node.path}\")\n print(f\" {snippet}...\")\n print()\n\n return 0\n\n\ndef cmd_telemetry(args):\n \"\"\"Show operation telemetry\"\"\"\n from .telemetry import get_telemetry\n \n telem = get_telemetry()\n \n if args.op == \"stats\":\n stats = telem.stats(agent=args.agent, since=args.since)\n if args.json:\n print(json.dumps(stats, indent=2))\n else:\n print(f\"Total operations: {stats['total_ops']}\")\n print(f\"Error rate: {stats['error_rate']*100:.1f}%\")\n print(\"\\nBy operation:\")\n for op, data in stats['by_op'].items():\n print(f\" {op}: {data['count']} calls, avg {data['avg_latency_ms']}ms\")\n else:\n entries = telem.query(\n agent=args.agent,\n op=args.op,\n since=args.since,\n limit=args.limit\n )\n \n if args.json:\n print(json.dumps(entries, indent=2))\n else:\n for e in entries:\n status = \"✓\" if e['success'] else \"✗\"\n tokens_in = str(e['tokens_in']) if e['tokens_in'] else \"-\"\n tokens_out = str(e['tokens_out']) if e['tokens_out'] else \"-\"\n tokens = f\"{tokens_in:>4}/{tokens_out:\u003c4}\"\n latency = f\"{e['latency_ms']:.0f}ms\" if e['latency_ms'] else \"-\"\n print(f\"{status} [{e['ts'][:19]}] {e['op']:\u003c8} {e['agent']:\u003c15} {tokens} {latency:>5}\")\n\n\ndef cmd_savings(args):\n \"\"\"Show token savings from recall operations\"\"\"\n from .telemetry import get_telemetry\n \n telem = get_telemetry()\n savings = telem.token_savings(agent=args.agent, since=args.since)\n \n if args.json:\n print(json.dumps(savings, indent=2))\n else:\n print(\"Token Savings Report\")\n print(\"====================\")\n print(f\"Total recalls: {savings['recalls']}\")\n print(f\"Tokens returned: {savings['tokens_returned']:,}\")\n print(f\"Tokens available: {savings['tokens_available']:,}\")\n print(f\"Tokens saved: {savings['tokens_saved']:,}\")\n print(f\"Savings: {savings['savings_pct']}%\")\n\n\ndef cmd_subscribe(args):\n \"\"\"Subscribe to path pattern changes\"\"\"\n from .subscriptions import get_subscription_store, SubscriptionMode\n \n store = get_subscription_store()\n mode = SubscriptionMode(args.mode)\n webhook_url = getattr(args, 'webhook', None)\n \n sub = store.subscribe(\n args.agent, args.pattern, \n mode=mode, \n throttle_seconds=args.throttle,\n webhook_url=webhook_url,\n )\n \n if args.json:\n print(json.dumps({\n \"id\": sub.id, \"agent\": sub.agent_id, \"pattern\": sub.pattern,\n \"mode\": sub.mode.value, \"throttle\": sub.throttle_seconds,\n \"webhook\": sub.webhook_url,\n }, indent=2))\n else:\n print(f\"Subscribed: {sub.agent_id} → {sub.pattern}\")\n print(f\" Mode: {sub.mode.value}\")\n if sub.mode == SubscriptionMode.THROTTLED:\n print(f\" Throttle: {sub.throttle_seconds}s\")\n if sub.webhook_url:\n print(f\" Webhook: {sub.webhook_url}\")\n return 0\n\n\ndef cmd_subscriptions(args):\n \"\"\"List subscriptions\"\"\"\n from .subscriptions import get_subscription_store\n \n store = get_subscription_store()\n subs = store.list_subscriptions(agent_id=args.agent)\n \n if args.json:\n print(json.dumps([{\n \"id\": s.id, \"agent\": s.agent_id, \"pattern\": s.pattern,\n \"mode\": s.mode.value, \"throttle\": s.throttle_seconds\n } for s in subs], indent=2))\n else:\n if not subs:\n print(\"No subscriptions.\")\n return 0\n for s in subs:\n mode_info = s.mode.value\n if s.mode.value == \"throttled\":\n mode_info += f\" ({s.throttle_seconds}s)\"\n print(f\"{s.agent_id}: {s.pattern} [{mode_info}]\")\n return 0\n\n\ndef cmd_unsubscribe(args):\n \"\"\"Remove subscription\"\"\"\n from .subscriptions import get_subscription_store\n \n store = get_subscription_store()\n store.unsubscribe(args.agent, args.pattern)\n print(f\"Unsubscribed: {args.agent} ← {args.pattern}\")\n return 0\n\n\ndef cmd_pending(args):\n \"\"\"Show pending notifications\"\"\"\n from .subscriptions import get_subscription_store\n \n store = get_subscription_store()\n pending = store.get_pending(args.agent, mark_delivered=args.clear)\n \n if args.json:\n print(json.dumps(pending, indent=2))\n else:\n if not pending:\n print(\"No pending notifications.\")\n return 0\n print(f\"Pending notifications for {args.agent}:\")\n for p in pending:\n print(f\" [{p['timestamp'][:16]}] {p['path']}\")\n if args.clear:\n print(f\"\\n(Marked {len(pending)} as delivered)\")\n return 0\n\n\ndef cmd_export(args):\n \"\"\"Export memories to archive\"\"\"\n import tarfile\n import io\n from datetime import datetime\n \n vfs = get_vfs(args.config, args.db)\n \n nodes = vfs.list(args.prefix, limit=10000)\n \n if args.format == \"tar.gz\":\n output = args.output or f\"avm-export-{datetime.now().strftime('%Y%m%d-%H%M%S')}.tar.gz\"\n \n with tarfile.open(output, \"w:gz\") as tar:\n for node in nodes:\n content = (node.content or \"\").encode('utf-8')\n info = tarfile.TarInfo(name=node.path.lstrip('/'))\n info.size = len(content)\n tar.addfile(info, io.BytesIO(content))\n \n # Also save metadata\n meta_content = json.dumps(node.meta, indent=2, default=str).encode('utf-8')\n meta_info = tarfile.TarInfo(name=node.path.lstrip('/') + '.meta.json')\n meta_info.size = len(meta_content)\n tar.addfile(meta_info, io.BytesIO(meta_content))\n \n print(f\"Exported {len(nodes)} nodes to {output}\")\n \n elif args.format == \"jsonl\":\n output = args.output or f\"avm-export-{datetime.now().strftime('%Y%m%d-%H%M%S')}.jsonl\"\n \n with open(output, 'w') as f:\n for node in nodes:\n f.write(json.dumps({\n \"path\": node.path,\n \"content\": node.content,\n \"meta\": node.meta,\n \"version\": node.version,\n }, default=str) + '\\n')\n \n print(f\"Exported {len(nodes)} nodes to {output}\")\n \n return 0\n\n\ndef cmd_graph(args):\n \"\"\"Generate knowledge graph visualization\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n # Get starting node\n start_node = vfs.read(args.path)\n if not start_node:\n print(f\"Not found: {args.path}\", file=sys.stderr)\n return 1\n \n # BFS to collect connected nodes\n visited = set()\n edges = []\n queue = [(args.path, 0)]\n \n while queue:\n path, depth = queue.pop(0)\n if path in visited or depth > args.depth:\n continue\n visited.add(path)\n \n # Get links from this node\n links = vfs.store.get_links(path)\n for link in links:\n edges.append((path, link.target, link.relation))\n if link.target not in visited:\n queue.append((link.target, depth + 1))\n \n if not edges and len(visited) == 1:\n # No links, just list children\n children = vfs.list(args.path, limit=50)\n for child in children:\n if child.path != args.path:\n edges.append((args.path, child.path, \"contains\"))\n \n if args.format == \"mermaid\":\n lines = [\"graph TD\"]\n seen_nodes = set()\n for src, dst, rel in edges:\n src_id = src.replace(\"/\", \"_\").replace(\".\", \"_\").replace(\"-\", \"_\")\n dst_id = dst.replace(\"/\", \"_\").replace(\".\", \"_\").replace(\"-\", \"_\")\n src_label = src.split(\"/\")[-1]\n dst_label = dst.split(\"/\")[-1]\n if src not in seen_nodes:\n lines.append(f\" {src_id}[{src_label}]\")\n seen_nodes.add(src)\n if dst not in seen_nodes:\n lines.append(f\" {dst_id}[{dst_label}]\")\n seen_nodes.add(dst)\n lines.append(f\" {src_id} -->|{rel}| {dst_id}\")\n print('\\n'.join(lines))\n \n elif args.format == \"dot\":\n lines = [\"digraph G {\"]\n for src, dst, rel in edges:\n lines.append(f' \"{src}\" -> \"{dst}\" [label=\"{rel}\"];')\n lines.append(\"}\")\n print('\\n'.join(lines))\n \n else: # text\n print(f\"Graph from {args.path} (depth {args.depth}):\")\n print(f\"Nodes: {len(visited)}\")\n print(f\"Edges: {len(edges)}\")\n for src, dst, rel in edges:\n print(f\" {src} --[{rel}]--> {dst}\")\n \n return 0\n\n\ndef cmd_bundle(args):\n \"\"\"Bundle related memories for handoff\"\"\"\n from datetime import datetime, timedelta\n \n vfs = get_vfs(args.config, args.db)\n \n # Calculate since date\n if args.since:\n if args.since.endswith('d'):\n days = int(args.since[:-1])\n since_dt = datetime.now() - timedelta(days=days)\n else:\n since_dt = datetime.fromisoformat(args.since)\n else:\n since_dt = datetime.now() - timedelta(days=7)\n \n # Find all nodes under prefix\n nodes = vfs.list(args.prefix, limit=500)\n \n # Filter by date\n filtered = []\n for node in nodes:\n if node.updated_at and node.updated_at >= since_dt:\n filtered.append(node)\n \n # Sort by date\n filtered.sort(key=lambda n: n.updated_at or datetime.min)\n \n if args.json:\n print(json.dumps([{\n \"path\": n.path,\n \"updated_at\": n.updated_at.isoformat() if n.updated_at else None,\n \"content\": n.content[:500] if n.content else \"\",\n \"meta\": n.meta,\n } for n in filtered], indent=2, default=str))\n else:\n # Generate markdown handoff document\n lines = [\n f\"# Task Bundle: {args.prefix}\",\n f\"Generated: {datetime.now().isoformat()[:16]}\",\n f\"Since: {since_dt.isoformat()[:10]}\",\n f\"Items: {len(filtered)}\",\n \"\",\n \"---\",\n \"\",\n ]\n \n for node in filtered:\n date_str = node.updated_at.strftime(\"%Y-%m-%d %H:%M\") if node.updated_at else \"?\"\n lines.append(f\"## {node.path}\")\n lines.append(f\"*Updated: {date_str}*\")\n lines.append(\"\")\n # Include content (truncated)\n content = node.content or \"\"\n if len(content) > 1000:\n content = content[:1000] + \"\\n\\n*[truncated...]*\"\n lines.append(content)\n lines.append(\"\")\n lines.append(\"---\")\n lines.append(\"\")\n \n print('\\n'.join(lines))\n return 0\n\n\ndef cmd_restore(args):\n \"\"\"Restore a file from trash\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n restored = vfs.restore(args.path)\n if restored:\n print(f\"Restored: {restored.path}\")\n return 0\n else:\n print(f\"Not found or restore failed: {args.path}\", file=sys.stderr)\n return 1\n\n\ndef cmd_trash(args):\n \"\"\"List or empty trash\"\"\"\n vfs = get_vfs(args.config, args.db)\n \n trash_items = vfs.list(\"/trash\", limit=100)\n \n if args.empty:\n for item in trash_items:\n vfs.delete(item.path, hard=True)\n print(f\"Emptied {len(trash_items)} items from trash\")\n return 0\n \n if not trash_items:\n print(\"Trash is empty\")\n return 0\n \n print(f\"Trash ({len(trash_items)} items):\")\n for item in trash_items:\n deleted_at = item.meta.get('deleted_at', '?')[:16]\n original = item.meta.get('original_path', item.path.replace('/trash', '', 1))\n print(f\" [{deleted_at}] {original}\")\n return 0\n\n\ndef cmd_cold(args):\n \"\"\"Show cold (decayed) memories\"\"\"\n from .advanced import MemoryDecay\n \n vfs = get_vfs(args.config, args.db)\n decay = MemoryDecay(vfs.store, half_life_days=args.half_life)\n \n cold = decay.get_cold_memories(\n prefix=args.prefix,\n threshold=args.threshold,\n limit=args.limit\n )\n \n if args.json:\n print(json.dumps([n.to_dict() for n in cold], indent=2, default=str))\n else:\n if not cold:\n print(\"No cold memories found.\")\n return 0\n print(f\"Cold memories (importance × decay \u003c {args.threshold}):\")\n print()\n for node in cold:\n importance = node.meta.get(\"importance\", 0.5)\n decay_factor = decay.calculate_decay(node)\n score = importance * decay_factor\n print(f\" {node.path}\")\n print(f\" importance={importance:.2f} × decay={decay_factor:.2f} = {score:.3f}\")\n print()\n return 0\n\n\ndef cmd_compact(args):\n \"\"\"Compact old versions of a memory\"\"\"\n from .advanced import MemoryCompactor\n \n vfs = get_vfs(args.config, args.db)\n compactor = MemoryCompactor(vfs.store)\n \n result = compactor.compact(args.path, keep_recent=args.keep)\n \n if args.json:\n print(json.dumps({\n \"base_path\": result.base_path,\n \"versions_before\": result.versions_before,\n \"versions_after\": result.versions_after,\n \"summary_path\": result.summary_path,\n \"removed_paths\": result.removed_paths,\n }, indent=2))\n else:\n print(f\"Compacted: {result.base_path}\")\n print(f\" Versions: {result.versions_before} → {result.versions_after}\")\n if result.summary_path:\n print(f\" Summary: {result.summary_path}\")\n if result.removed_paths:\n print(f\" Removed: {len(result.removed_paths)} versions\")\n return 0\n\n\ndef cmd_dedupe(args):\n \"\"\"Check for duplicate content\"\"\"\n from .advanced import SemanticDeduplicator\n \n vfs = get_vfs(args.config, args.db)\n deduper = SemanticDeduplicator(vfs.store)\n \n # Read content\n if args.file:\n content = Path(args.file).read_text()\n elif args.content:\n content = args.content\n else:\n content = sys.stdin.read()\n \n result = deduper.check_duplicate(content, prefix=args.prefix, threshold=args.threshold)\n \n if args.json:\n print(json.dumps({\n \"is_duplicate\": result.is_duplicate,\n \"similar_path\": result.similar_path,\n \"similarity\": result.similarity,\n \"method\": result.method,\n }, indent=2))\n else:\n if result.is_duplicate:\n print(f\"DUPLICATE detected!\")\n print(f\" Similar to: {result.similar_path}\")\n print(f\" Similarity: {result.similarity:.1%}\")\n print(f\" Method: {result.method}\")\n else:\n print(\"No duplicates found.\")\n return 0\n\n\ndef cmd_archive(args):\n \"\"\"Archive cold memories to /archive/\"\"\"\n from .advanced import MemoryDecay\n \n vfs = get_vfs(args.config, args.db)\n decay = MemoryDecay(vfs.store, half_life_days=args.half_life)\n \n cold = decay.get_cold_memories(\n prefix=args.prefix,\n threshold=args.threshold,\n limit=args.limit\n )\n \n if not cold:\n print(\"No cold memories to archive.\")\n return 0\n \n if args.dry_run:\n print(f\"Would archive {len(cold)} memories:\")\n for node in cold:\n archive_path = node.path.replace(\"/memory/\", \"/archive/\", 1)\n print(f\" {node.path} → {archive_path}\")\n return 0\n \n archived = []\n for node in cold:\n archive_path = node.path.replace(\"/memory/\", \"/archive/\", 1)\n # Write to archive\n vfs.write(archive_path, node.content, meta=node.meta)\n # Delete original\n vfs.store.delete_node(node.path)\n archived.append((node.path, archive_path))\n \n if args.json:\n print(json.dumps({\"archived\": archived}, indent=2))\n else:\n print(f\"Archived {len(archived)} memories:\")\n for src, dst in archived:\n print(f\" {src} → {dst}\")\n return 0\n\n\ndef cmd_cluster(args):\n \"\"\"Cluster memories by topic similarity\"\"\"\n from .consolidation import MemoryConsolidator, ConsolidationConfig\n from .topic_index import TopicIndex\n \n vfs = get_vfs(args.config, args.db)\n agent_id = args.agent or vfs.agent_id\n \n config = ConsolidationConfig(\n min_cluster_size=args.min_size,\n max_clusters=args.max_clusters,\n )\n \n topic_index = TopicIndex(vfs.store)\n consolidator = MemoryConsolidator(vfs.store, topic_index, config)\n \n # Get memories\n prefix = f\"/memory/private/{agent_id}\" if agent_id else \"/memory\"\n memories = consolidator._get_memories(prefix)\n \n # Cluster\n clusters = consolidator.cluster_memories(memories)\n \n if not clusters:\n print(\"No clusters found (need more memories or higher topic diversity).\")\n return 0\n \n if args.json:\n print(json.dumps([{\n \"id\": c.id,\n \"topic\": c.topic,\n \"size\": len(c.memories),\n \"avg_importance\": c.avg_importance,\n \"topics\": list(c.centroid_topics)[:10],\n \"memories\": c.memories[:5],\n } for c in clusters], indent=2))\n else:\n print(f\"Found {len(clusters)} clusters:\\n\")\n for c in clusters:\n print(f\" 📁 {c.topic.upper()} ({len(c.memories)} memories)\")\n print(f\" Importance: {c.avg_importance:.2f}\")\n print(f\" Topics: {', '.join(sorted(c.centroid_topics)[:5])}\")\n for p in c.memories[:3]:\n print(f\" - {p}\")\n print()\n \n # Generate summaries if requested\n if args.summarize:\n created = consolidator.generate_cluster_summaries(clusters)\n print(f\"\\n✅ Created {created} cluster summaries in /memory/clusters/\")\n \n return 0\n\n\ndef cmd_consolidate(args):\n \"\"\"Run full memory consolidation\"\"\"\n from .consolidation import MemoryConsolidator, ConsolidationConfig\n from .topic_index import TopicIndex\n \n vfs = get_vfs(args.config, args.db)\n agent_id = args.agent or vfs.agent_id\n \n topic_index = TopicIndex(vfs.store)\n consolidator = MemoryConsolidator(vfs.store, topic_index)\n \n result = consolidator.run(agent_id=agent_id, dry_run=args.dry_run)\n \n if args.json:\n print(json.dumps({\n \"processed\": result.memories_processed,\n \"decayed\": result.importance_decayed,\n \"merged\": result.memories_merged,\n \"summaries\": result.summaries_created,\n \"duration_ms\": result.duration_ms,\n \"dry_run\": args.dry_run,\n }, indent=2))\n else:\n print(f\"Memory Consolidation {'(dry-run)' if args.dry_run else ''}:\")\n print(f\" Memories processed: {result.memories_processed}\")\n print(f\" Importance decayed: {result.importance_decayed}\")\n print(f\" Memories merged: {result.memories_merged}\")\n print(f\" Summaries created: {result.summaries_created}\")\n print(f\" Duration: {result.duration_ms:.1f}ms\")\n \n return 0\n\n\ndef _cmd_serve(args):\n \"\"\"Start HTTP API server.\"\"\"\n import os\n os.environ.setdefault(\"AVM_AGENT\", args.agent)\n # Patch sys.argv so api_server.main() sees the right args\n sys.argv = [\"avm-serve\", \"--agent\", args.agent, \"--host\", args.host, \"--port\", str(args.port)]\n from .api_server import main as serve_main\n serve_main()\n\n\ndef cmd_digest(args):\n \"\"\"Generate memory digest\"\"\"\n from .consolidation import generate_digest\n from .topic_index import TopicIndex\n \n vfs = get_vfs(args.config, args.db)\n agent_id = args.agent or vfs.agent_id\n \n topic_index = TopicIndex(vfs.store)\n \n digest = generate_digest(\n store=vfs.store,\n topic_index=topic_index,\n agent_id=agent_id,\n days=args.days,\n max_items=args.max_items,\n )\n \n if args.output:\n with open(args.output, \"w\") as f:\n f.write(digest)\n print(f\"Digest saved to {args.output}\")\n else:\n print(digest)\n \n return 0\n\n\ndef main():\n parser = argparse.ArgumentParser(\n description=\"AI Virtual Filesystem (config-driven)\",\n prog=\"vfs\"\n )\n parser.add_argument(\"--config\", \"-c\", help=\"Config file path\")\n parser.add_argument(\"--db\", help=\"Database path override\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output as JSON\")\n \n subparsers = parser.add_subparsers(dest=\"command\", required=True)\n \n # read\n p_read = subparsers.add_parser(\"read\", help=\"Read a node\")\n p_read.add_argument(\"path\", help=\"Node path\")\n p_read.add_argument(\"--refresh\", \"-r\", action=\"store_true\", help=\"Force refresh\")\n p_read.add_argument(\"--meta\", \"-m\", action=\"store_true\", help=\"Show metadata\")\n p_read.add_argument(\"--as-of\", help=\"Read as of timestamp (ISO format, time travel)\")\n p_read.add_argument(\"--version\", \"-v\", type=int, help=\"Read specific version\")\n p_read.set_defaults(func=cmd_read)\n \n # write\n p_write = subparsers.add_parser(\"write\", help=\"Write a node\")\n p_write.add_argument(\"path\", help=\"Node path\")\n p_write.add_argument(\"--content\", \"-c\", help=\"Content to write\")\n p_write.add_argument(\"--file\", \"-f\", help=\"Read content from file\")\n p_write.add_argument(\"--meta\", \"-m\", help=\"Metadata as JSON\")\n p_write.set_defaults(func=cmd_write)\n \n # delete\n p_delete = subparsers.add_parser(\"delete\", help=\"Delete a node\")\n p_delete.add_argument(\"path\", help=\"Node path\")\n p_delete.set_defaults(func=cmd_delete)\n\n # mv / move / rename\n p_mv = subparsers.add_parser(\"mv\", aliases=[\"move\", \"rename\"], help=\"Move/rename a node or prefix tree\")\n p_mv.add_argument(\"src\", help=\"Source path (use trailing / for prefix move)\")\n p_mv.add_argument(\"dst\", help=\"Destination path\")\n p_mv.set_defaults(func=cmd_mv)\n\n # list\n p_list = subparsers.add_parser(\"list\", help=\"List nodes\")\n p_list.add_argument(\"prefix\", nargs=\"?\", default=\"/\", help=\"Path prefix\")\n p_list.add_argument(\"--limit\", \"-n\", type=int, default=100, help=\"Max results\")\n p_list.set_defaults(func=cmd_list)\n \n # links\n p_links = subparsers.add_parser(\"links\", help=\"Show node links\")\n p_links.add_argument(\"path\", help=\"Node path\")\n p_links.add_argument(\"--direction\", \"-d\", choices=[\"in\", \"out\", \"both\"], default=\"both\")\n p_links.set_defaults(func=cmd_links)\n \n # link (add)\n p_link = subparsers.add_parser(\"link\", help=\"Add a link\")\n p_link.add_argument(\"source\", help=\"Source path\")\n p_link.add_argument(\"target\", help=\"Target path\")\n p_link.add_argument(\"--type\", \"-t\", default=\"related\", \n choices=[\"peer\", \"parent\", \"citation\", \"derived\", \"related\"])\n p_link.add_argument(\"--weight\", \"-w\", type=float, default=1.0)\n p_link.set_defaults(func=cmd_link)\n \n # search\n p_search = subparsers.add_parser(\"search\", help=\"Full-text search\")\n p_search.add_argument(\"query\", help=\"Search query\")\n p_search.add_argument(\"--limit\", \"-n\", type=int, default=10)\n p_search.set_defaults(func=cmd_search)\n \n # history\n p_history = subparsers.add_parser(\"history\", help=\"Show change history\")\n p_history.add_argument(\"path\", help=\"Node path\")\n p_history.add_argument(\"--limit\", \"-n\", type=int, default=10)\n p_history.add_argument(\"--verbose\", \"-v\", action=\"store_true\")\n p_history.set_defaults(func=cmd_history)\n \n # stats\n p_stats = subparsers.add_parser(\"stats\", help=\"Show storage stats\")\n p_stats.set_defaults(func=cmd_stats)\n \n # warmup - pre-load embedding model\n p_warmup = subparsers.add_parser(\"warmup\", help=\"Pre-load embedding model for faster recall\")\n p_warmup.set_defaults(func=cmd_warmup)\n \n\n \n # refresh\n p_refresh = subparsers.add_parser(\"refresh\", help=\"Refresh live nodes\")\n p_refresh.add_argument(\"path\", nargs=\"?\", help=\"Path to refresh\")\n p_refresh.add_argument(\"--all\", \"-a\", action=\"store_true\", help=\"Refresh all\")\n p_refresh.set_defaults(func=cmd_refresh)\n \n # config\n p_config = subparsers.add_parser(\"config\", help=\"Show configuration\")\n p_config.set_defaults(func=cmd_config)\n \n # retrieve (Linked retrieval)\n p_retrieve = subparsers.add_parser(\"retrieve\", help=\"Linked retrieval\")\n p_retrieve.add_argument(\"query\", help=\"Search query\")\n p_retrieve.add_argument(\"--limit\", \"-n\", type=int, default=5)\n p_retrieve.add_argument(\"--depth\", \"-d\", type=int, default=1, help=\"Graph expansion depth\")\n p_retrieve.add_argument(\"--no-graph\", action=\"store_true\", help=\"Disable graph expansion\")\n p_retrieve.set_defaults(func=cmd_retrieve)\n \n # synthesize (dynamic document)\n p_synth = subparsers.add_parser(\"synthesize\", aliases=[\"synth\"], help=\"Generate dynamic document\")\n p_synth.add_argument(\"query\", help=\"Query topic\")\n p_synth.add_argument(\"--limit\", \"-n\", type=int, default=5)\n p_synth.add_argument(\"--title\", \"-t\", help=\"Document title\")\n p_synth.set_defaults(func=cmd_synthesize)\n \n # memory recall\n p_mem_recall = subparsers.add_parser(\"memory-recall\", aliases=[\"recall\"], \n help=\"Agent memory recall\")\n p_mem_recall.add_argument(\"query\", help=\"Query\")\n p_mem_recall.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n p_mem_recall.add_argument(\"--max-tokens\", \"-t\", type=int, default=4000)\n p_mem_recall.add_argument(\"--strategy\", \"-s\", \n choices=[\"importance\", \"recency\", \"relevance\", \"balanced\"])\n p_mem_recall.add_argument(\"--private-only\", action=\"store_true\")\n p_mem_recall.add_argument(\"--verbose\", \"-v\", action=\"store_true\", \n help=\"Show progress bars and warnings (default: quiet)\")\n p_mem_recall.add_argument(\"--min-relevance\", \"-r\", type=float, default=0.3,\n help=\"Min relevance score (0-1), filters noise (default: 0.3)\")\n p_mem_recall.set_defaults(func=cmd_memory_recall)\n \n # memory remember\n p_mem_write = subparsers.add_parser(\"memory-remember\", aliases=[\"remember\"],\n help=\"Write to agent memory\")\n p_mem_write.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n p_mem_write.add_argument(\"--content\", \"-c\", help=\"Content\")\n p_mem_write.add_argument(\"--file\", \"-f\", help=\"Read from file\")\n p_mem_write.add_argument(\"--title\", \"-t\", help=\"Memory title\")\n p_mem_write.add_argument(\"--importance\", \"-i\", type=float, default=0.5)\n p_mem_write.add_argument(\"--tags\", help=\"Comma-separated tags\")\n p_mem_write.set_defaults(func=cmd_memory_remember)\n \n # context (for prompt injection)\n p_context = subparsers.add_parser(\"context\", help=\"Generate memory context for prompts\")\n p_context.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n p_context.add_argument(\"--query\", \"-q\", help=\"Custom query to include\")\n p_context.add_argument(\"--preferences\", \"-p\", action=\"store_true\", help=\"Include user preferences\")\n p_context.add_argument(\"--lessons\", \"-l\", action=\"store_true\", help=\"Include lessons learned\")\n p_context.add_argument(\"--format\", \"-f\", choices=[\"markdown\", \"xml\", \"json\"], default=\"markdown\")\n p_context.add_argument(\"--recent-tokens\", type=int, default=300, help=\"Tokens for recent activity\")\n p_context.add_argument(\"--pref-tokens\", type=int, default=200, help=\"Tokens for preferences\")\n p_context.add_argument(\"--lesson-tokens\", type=int, default=200, help=\"Tokens for lessons\")\n p_context.add_argument(\"--query-tokens\", type=int, default=300, help=\"Tokens for custom query\")\n p_context.add_argument(\"--quiet\", action=\"store_true\", help=\"No stderr output\")\n p_context.set_defaults(func=cmd_context)\n \n # Librarian: ask\n p_ask = subparsers.add_parser(\"ask\", help=\"Ask the Librarian for information routing\")\n p_ask.add_argument(\"query\", help=\"Query to ask\")\n p_ask.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Requester agent ID\")\n p_ask.add_argument(\"--limit\", \"-n\", type=int, default=10, help=\"Max results\")\n p_ask.add_argument(\"--privacy\", \"-p\", choices=[\"full\", \"owner\", \"existence\", \"none\"], \n default=\"owner\", help=\"Privacy level for suggestions\")\n p_ask.set_defaults(func=cmd_ask)\n \n # Librarian: who-knows\n p_who = subparsers.add_parser(\"who-knows\", help=\"Find agents who know about a topic\")\n p_who.add_argument(\"topic\", help=\"Topic to search\")\n p_who.add_argument(\"--limit\", \"-n\", type=int, default=5, help=\"Max agents to return\")\n p_who.set_defaults(func=cmd_who_knows)\n \n # Librarian: agents\n p_agents = subparsers.add_parser(\"agents\", help=\"List all agents and their capabilities\")\n p_agents.set_defaults(func=cmd_agents)\n \n # Gossip protocol\n p_gossip = subparsers.add_parser(\"gossip\", help=\"Gossip protocol for decentralized discovery\")\n p_gossip_sub = p_gossip.add_subparsers(dest=\"gossip_action\")\n \n p_gossip_publish = p_gossip_sub.add_parser(\"publish\", help=\"Publish own digest\")\n p_gossip_publish.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n \n p_gossip_refresh = p_gossip_sub.add_parser(\"refresh\", help=\"Refresh known digests\")\n p_gossip_refresh.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n \n p_gossip_who = p_gossip_sub.add_parser(\"who-knows\", help=\"Find agents who know a topic\")\n p_gossip_who.add_argument(\"topic\", help=\"Topic to search\")\n p_gossip_who.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n \n p_gossip_stats = p_gossip_sub.add_parser(\"stats\", help=\"Show gossip stats\")\n p_gossip_stats.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n \n p_gossip.set_defaults(func=cmd_gossip)\n \n # memory stats\n p_mem_stats = subparsers.add_parser(\"memory-stats\", help=\"Agent memory stats\")\n p_mem_stats.add_argument(\"--agent\", \"-a\", default=\"default\", help=\"Agent ID\")\n p_mem_stats.set_defaults(func=cmd_memory_stats)\n \n # semantic search\n p_semantic = subparsers.add_parser(\"semantic\", help=\"Semantic search (embedding)\")\n p_semantic.add_argument(\"query\", help=\"Search query\")\n p_semantic.add_argument(\"--limit\", \"-n\", type=int, default=10)\n p_semantic.add_argument(\"--agent\", \"-a\", help=\"Agent context (search within agent prefix)\")\n p_semantic.set_defaults(func=cmd_semantic)\n\n # Telemetry commands\n p_telemetry = subparsers.add_parser(\"telemetry\", aliases=[\"telem\"], help=\"Show operation telemetry\")\n p_telemetry.add_argument(\"--agent\", \"-a\", help=\"Filter by agent\")\n p_telemetry.add_argument(\"--op\", help=\"Filter by operation (recall, remember)\")\n p_telemetry.add_argument(\"--since\", help=\"Filter since timestamp (ISO format)\")\n p_telemetry.add_argument(\"--limit\", \"-n\", type=int, default=20, help=\"Max entries\")\n p_telemetry.set_defaults(func=cmd_telemetry)\n \n p_savings = subparsers.add_parser(\"savings\", help=\"Show token savings from recall\")\n p_savings.add_argument(\"--agent\", \"-a\", help=\"Filter by agent\")\n p_savings.add_argument(\"--since\", help=\"Filter since timestamp (ISO format)\")\n p_savings.set_defaults(func=cmd_savings)\n \n # subscribe command\n p_subscribe = subparsers.add_parser(\"subscribe\", help=\"Subscribe to path pattern changes\")\n p_subscribe.add_argument(\"pattern\", help=\"Path pattern (e.g., /memory/shared/*)\")\n p_subscribe.add_argument(\"--agent\", \"-a\", required=True, help=\"Agent ID\")\n p_subscribe.add_argument(\"--mode\", \"-m\", default=\"batched\", \n choices=[\"realtime\", \"throttled\", \"batched\", \"digest\"],\n help=\"Notification mode\")\n p_subscribe.add_argument(\"--throttle\", \"-t\", type=int, default=60, help=\"Throttle window (seconds)\")\n p_subscribe.add_argument(\"--webhook\", \"-w\", help=\"Webhook URL for push notifications\")\n p_subscribe.set_defaults(func=cmd_subscribe)\n \n # subscriptions list\n p_subs_list = subparsers.add_parser(\"subscriptions\", help=\"List subscriptions\")\n p_subs_list.add_argument(\"--agent\", \"-a\", help=\"Filter by agent\")\n p_subs_list.set_defaults(func=cmd_subscriptions)\n \n # unsubscribe\n p_unsub = subparsers.add_parser(\"unsubscribe\", help=\"Remove subscription\")\n p_unsub.add_argument(\"pattern\", help=\"Path pattern\")\n p_unsub.add_argument(\"--agent\", \"-a\", required=True, help=\"Agent ID\")\n p_unsub.set_defaults(func=cmd_unsubscribe)\n \n # pending notifications\n p_pending = subparsers.add_parser(\"pending\", help=\"Show pending notifications\")\n p_pending.add_argument(\"--agent\", \"-a\", required=True, help=\"Agent ID\")\n p_pending.add_argument(\"--clear\", action=\"store_true\", help=\"Mark as delivered\")\n p_pending.set_defaults(func=cmd_pending)\n \n # export\n p_export = subparsers.add_parser(\"export\", help=\"Export memories to archive\")\n p_export.add_argument(\"prefix\", help=\"Path prefix to export (e.g., /memory)\")\n p_export.add_argument(\"--output\", \"-o\", help=\"Output file path\")\n p_export.add_argument(\"--format\", \"-f\", default=\"tar.gz\", choices=[\"tar.gz\", \"jsonl\"])\n p_export.set_defaults(func=cmd_export)\n \n # graph visualization\n p_graph = subparsers.add_parser(\"graph\", help=\"Generate knowledge graph\")\n p_graph.add_argument(\"path\", help=\"Starting path\")\n p_graph.add_argument(\"--depth\", \"-d\", type=int, default=2, help=\"Max depth\")\n p_graph.add_argument(\"--format\", \"-f\", default=\"mermaid\", choices=[\"mermaid\", \"dot\", \"text\"])\n p_graph.set_defaults(func=cmd_graph)\n \n # bundle for handoff\n p_bundle = subparsers.add_parser(\"bundle\", help=\"Bundle related memories for handoff\")\n p_bundle.add_argument(\"prefix\", help=\"Path prefix (e.g., /task/project-x)\")\n p_bundle.add_argument(\"--since\", \"-s\", default=\"7d\", help=\"Since when (e.g., 7d, 2026-03-15)\")\n p_bundle.set_defaults(func=cmd_bundle)\n \n # restore from trash\n p_restore = subparsers.add_parser(\"restore\", help=\"Restore file from trash\")\n p_restore.add_argument(\"path\", help=\"Path in /trash/\")\n p_restore.set_defaults(func=cmd_restore)\n \n # trash management\n p_trash = subparsers.add_parser(\"trash\", help=\"List or empty trash\")\n p_trash.add_argument(\"--empty\", action=\"store_true\", help=\"Empty trash permanently\")\n p_trash.set_defaults(func=cmd_trash)\n \n # cold memories (decayed below threshold)\n p_cold = subparsers.add_parser(\"cold\", help=\"Show cold (decayed) memories\")\n p_cold.add_argument(\"--prefix\", default=\"/memory\", help=\"Path prefix to scan\")\n p_cold.add_argument(\"--threshold\", \"-t\", type=float, default=0.1, help=\"Decay threshold\")\n p_cold.add_argument(\"--half-life\", type=float, default=7.0, help=\"Half-life in days\")\n p_cold.add_argument(\"--limit\", \"-n\", type=int, default=20, help=\"Max results\")\n p_cold.set_defaults(func=cmd_cold)\n \n # compact old versions\n p_compact = subparsers.add_parser(\"compact\", help=\"Compact old versions into summary\")\n p_compact.add_argument(\"path\", help=\"Base path to compact\")\n p_compact.add_argument(\"--keep\", \"-k\", type=int, default=3, help=\"Keep N recent versions\")\n p_compact.set_defaults(func=cmd_compact)\n \n # dedupe check\n p_dedupe = subparsers.add_parser(\"dedupe\", help=\"Check for duplicate content\")\n p_dedupe.add_argument(\"--content\", \"-c\", help=\"Content to check\")\n p_dedupe.add_argument(\"--file\", \"-f\", help=\"Read content from file\")\n p_dedupe.add_argument(\"--prefix\", default=\"/memory\", help=\"Path prefix to search\")\n p_dedupe.add_argument(\"--threshold\", \"-t\", type=float, default=0.8, help=\"Similarity threshold\")\n p_dedupe.set_defaults(func=cmd_dedupe)\n \n # archive cold memories\n p_archive = subparsers.add_parser(\"archive\", help=\"Archive cold memories\")\n p_archive.add_argument(\"--prefix\", default=\"/memory\", help=\"Path prefix to scan\")\n p_archive.add_argument(\"--threshold\", \"-t\", type=float, default=0.1, help=\"Decay threshold\")\n p_archive.add_argument(\"--half-life\", type=float, default=7.0, help=\"Half-life in days\")\n p_archive.add_argument(\"--limit\", \"-n\", type=int, default=100, help=\"Max to archive\")\n p_archive.add_argument(\"--dry-run\", action=\"store_true\", help=\"Show what would be archived\")\n p_archive.set_defaults(func=cmd_archive)\n \n # cluster memories\n p_cluster = subparsers.add_parser(\"cluster\", help=\"Cluster memories by topic similarity\")\n p_cluster.add_argument(\"--agent\", \"-a\", help=\"Agent ID (default: from config)\")\n p_cluster.add_argument(\"--min-size\", type=int, default=3, help=\"Minimum cluster size\")\n p_cluster.add_argument(\"--max-clusters\", type=int, default=20, help=\"Maximum clusters\")\n p_cluster.add_argument(\"--summarize\", \"-s\", action=\"store_true\", help=\"Generate summaries\")\n p_cluster.set_defaults(func=cmd_cluster)\n \n # consolidate memories\n p_consolidate = subparsers.add_parser(\"consolidate\", help=\"Run memory consolidation\")\n p_consolidate.add_argument(\"--agent\", \"-a\", help=\"Agent ID (default: from config)\")\n p_consolidate.add_argument(\"--dry-run\", action=\"store_true\", help=\"Preview without modifying\")\n p_consolidate.set_defaults(func=cmd_consolidate)\n \n # digest - generate memory summary\n p_digest = subparsers.add_parser(\"digest\", help=\"Generate memory digest/summary\")\n p_digest.add_argument(\"--agent\", \"-a\", help=\"Agent ID (default: from config)\")\n p_digest.add_argument(\"--days\", \"-d\", type=int, default=1, help=\"Days to look back (default: 1)\")\n p_digest.add_argument(\"--max-items\", \"-n\", type=int, default=10, help=\"Max items per category\")\n p_digest.add_argument(\"--output\", \"-o\", help=\"Save to file instead of stdout\")\n p_digest.set_defaults(func=cmd_digest)\n\n # serve - HTTP API server (for Docker / Windows / no-FUSE environments)\n p_serve = subparsers.add_parser(\"serve\", help=\"Start HTTP API server (no FUSE required)\")\n p_serve.add_argument(\"--agent\", default=\"default\", help=\"Agent ID\")\n p_serve.add_argument(\"--host\", default=\"0.0.0.0\", help=\"Bind host\")\n p_serve.add_argument(\"--port\", type=int, default=8765, help=\"Port\")\n p_serve.set_defaults(func=lambda a: _cmd_serve(a))\n\n args = parser.parse_args()\n \n # On Windows, FUSE-related commands are not supported\n FUSE_COMMANDS = {\"mount\", \"unmount\", \"daemon\"}\n if IS_WINDOWS and getattr(args, \"command\", None) in FUSE_COMMANDS:\n print(\n f\"Error: '{args.command}' is not supported on Windows (FUSE unavailable).\\n\"\n \"Use 'avm serve' to start the HTTP API server and access memories via HTTP.\",\n file=sys.stderr,\n )\n return 1\n\n try:\n return args.func(args)\n except Exception as e:\n print(f\"Error: {e}\", file=sys.stderr)\n if args.json:\n print(json.dumps({\"error\": str(e)}), file=sys.stderr)\n return 1\n\n\nif __name__ == \"__main__\":\n sys.exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":61202,"content_sha256":"ceb17620c6b887c6e11bcb2e328242f8f78e66a8b4686b46ff4f2adee0e07bfa"},{"filename":"avm/config_handler.py","content":"\"\"\"\navm/config_handler.py - Agent-writable Configuration\n\nLayered config:\n Defaults → User config.yaml → Agent runtime changes\n\nPaths:\n /.config/settings.yaml - Main settings (merged view)\n /.config/raw - Runtime changes only\n /.meta/version - AVM version (read-only)\n /.meta/stats - Store stats (read-only)\n\"\"\"\n\nimport json\nimport yaml\nfrom pathlib import Path\nfrom typing import Dict, Any, Optional\nfrom dataclasses import dataclass, field, asdict\n\nfrom .handlers import BaseHandler, handler\n\n\n# ─── Default Settings ─────────────────────────────────────\n\nDEFAULT_SETTINGS = {\n \"memory\": {\n \"duplicate_check\": False,\n \"duplicate_threshold\": 0.85,\n \"default_max_tokens\": 4000,\n \"chars_per_token\": 4.0,\n },\n \"scoring\": {\n \"importance_weight\": 0.3,\n \"recency_weight\": 0.2,\n \"relevance_weight\": 0.5,\n },\n \"compaction\": {\n \"enabled\": False,\n \"target_tokens\": 2000,\n \"threshold_tokens\": 10000,\n },\n \"decay\": {\n \"enabled\": False,\n \"half_life_days\": 7.0,\n },\n \"policies\": {\n \"on_conflict\": \"append\", # append | overwrite\n \"on_similar\": \"warn\", # warn | skip | force\n },\n}\n\n\ndef deep_merge(base: Dict, overlay: Dict) -> Dict:\n \"\"\"Deep merge overlay into base.\"\"\"\n result = base.copy()\n for key, value in overlay.items():\n if key in result and isinstance(result[key], dict) and isinstance(value, dict):\n result[key] = deep_merge(result[key], value)\n else:\n result[key] = value\n return result\n\n\n# ─── Config Store ─────────────────────────────────────────\n\nclass ConfigStore:\n \"\"\"Manages layered configuration.\"\"\"\n \n def __init__(self, user_config: Dict = None, storage_path: str = None):\n self.user_config = user_config or {}\n self.runtime_changes: Dict[str, Any] = {}\n self._storage_path = storage_path\n self._load_runtime()\n \n def _runtime_file(self) -> Path:\n if self._storage_path:\n return Path(self._storage_path).parent / \"runtime_config.json\"\n return Path.home() / \".local\" / \"share\" / \"avm\" / \"runtime_config.json\"\n \n def _load_runtime(self):\n path = self._runtime_file()\n if path.exists():\n try:\n self.runtime_changes = json.loads(path.read_text())\n except (json.JSONDecodeError, IOError):\n pass\n \n def _save_runtime(self):\n path = self._runtime_file()\n path.parent.mkdir(parents=True, exist_ok=True)\n path.write_text(json.dumps(self.runtime_changes, indent=2))\n \n def get_merged(self) -> Dict:\n \"\"\"Get fully merged config (defaults + user + runtime).\"\"\"\n merged = deep_merge(DEFAULT_SETTINGS, self.user_config)\n merged = deep_merge(merged, self.runtime_changes)\n return merged\n \n def get_value(self, key_path: str) -> Any:\n \"\"\"Get a specific config value by dot-path.\"\"\"\n config = self.get_merged()\n parts = key_path.split(\".\")\n for part in parts:\n if isinstance(config, dict) and part in config:\n config = config[part]\n else:\n return None\n return config\n \n def set_value(self, key_path: str, value: Any):\n \"\"\"Set a runtime config value.\"\"\"\n parts = key_path.split(\".\")\n target = self.runtime_changes\n for part in parts[:-1]:\n if part not in target:\n target[part] = {}\n target = target[part]\n target[parts[-1]] = value\n self._save_runtime()\n \n def update(self, changes: Dict):\n \"\"\"Update runtime config with changes.\"\"\"\n self.runtime_changes = deep_merge(self.runtime_changes, changes)\n self._save_runtime()\n \n def reset(self, key_path: str = None):\n \"\"\"Reset runtime changes.\"\"\"\n if key_path:\n parts = key_path.split(\".\")\n target = self.runtime_changes\n for part in parts[:-1]:\n if part not in target:\n return\n target = target[part]\n target.pop(parts[-1], None)\n else:\n self.runtime_changes = {}\n self._save_runtime()\n\n\n# ─── Config Handler ─────────────────────────────────────────\n\n@handler(\"config\",\n description=\"Agent-writable configuration with layered merge\",\n usage=\"\"\"pattern: \"/.config/**\"\nhandler: config\"\"\",\n examples=[\n \"cat /.config/settings.yaml\",\n \"echo 'duplicate_check: true' > /.config/memory\",\n ])\nclass ConfigHandler(BaseHandler):\n \"\"\"\n Handler for agent configuration.\n \n Paths:\n - /.config/settings.yaml - Full merged config\n - /.config/settings.json - Same, JSON format\n - /.config/{section} - Specific section\n - /.config/raw - Runtime changes only\n \"\"\"\n \n _store: Optional[ConfigStore] = None\n \n def __init__(self, config: Dict[str, Any]):\n super().__init__(config)\n if ConfigHandler._store is None:\n ConfigHandler._store = ConfigStore(config.get(\"user_config\", {}))\n \n @property\n def store(self) -> ConfigStore:\n if ConfigHandler._store is None:\n ConfigHandler._store = ConfigStore()\n return ConfigHandler._store\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n path = path.strip(\"/\")\n \n if path in (\".config/settings.yaml\", \".config/settings\"):\n return yaml.dump(self.store.get_merged(), default_flow_style=False)\n \n if path == \".config/settings.json\":\n return json.dumps(self.store.get_merged(), indent=2)\n \n if path == \".config/raw\":\n return yaml.dump(self.store.runtime_changes, default_flow_style=False)\n \n if path == \".config/defaults\":\n return yaml.dump(DEFAULT_SETTINGS, default_flow_style=False)\n \n # Specific section: /.config/memory, /.config/policies, etc.\n if path.startswith(\".config/\"):\n section = path[8:] # Remove \".config/\"\n value = self.store.get_value(section.replace(\"/\", \".\"))\n if value is not None:\n if isinstance(value, dict):\n return yaml.dump(value, default_flow_style=False)\n return str(value)\n return None\n \n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n path = path.strip(\"/\")\n content = content.strip()\n \n # Reset\n if not content or content.lower() in (\"reset\", \"default\"):\n if path == \".config/raw\" or path == \".config/settings\":\n self.store.reset()\n elif path.startswith(\".config/\"):\n section = path[8:].replace(\"/\", \".\")\n self.store.reset(section)\n return True\n \n # Parse content\n try:\n if content.startswith(\"{\"):\n changes = json.loads(content)\n else:\n changes = yaml.safe_load(content)\n except Exception:\n return False\n \n if not isinstance(changes, dict):\n # Single value: /.config/memory/duplicate_check = true\n if path.startswith(\".config/\"):\n key_path = path[8:].replace(\"/\", \".\")\n self.store.set_value(key_path, changes)\n return True\n return False\n \n # Full update\n if path in (\".config/settings\", \".config/settings.yaml\", \".config/raw\"):\n self.store.update(changes)\n return True\n \n # Section update: /.config/memory\n if path.startswith(\".config/\"):\n section = path[8:].replace(\"/\", \".\")\n self.store.update({section: changes})\n return True\n \n return False\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n path = path.strip(\"/\")\n if path == \".config/raw\":\n self.store.reset()\n return True\n if path.startswith(\".config/\"):\n section = path[8:].replace(\"/\", \".\")\n self.store.reset(section)\n return True\n return False\n \n def list(self, prefix: str, context: Dict[str, Any]) -> list:\n return [\"settings.yaml\", \"settings.json\", \"raw\", \"defaults\",\n \"memory\", \"scoring\", \"compaction\", \"decay\", \"policies\"]\n\n\n# ─── Meta Handler ─────────────────────────────────────────\n\n@handler(\"meta\",\n description=\"Read-only system metadata\",\n usage=\"\"\"pattern: \"/.meta/**\"\nhandler: meta\"\"\",\n examples=[\n \"cat /.meta/version\",\n \"cat /.meta/stats\",\n ])\nclass MetaHandler(BaseHandler):\n \"\"\"\n Handler for read-only system metadata.\n \n Paths:\n - /.meta/version - AVM version\n - /.meta/stats - Store statistics\n - /.meta/info - System info\n \"\"\"\n \n def __init__(self, config: Dict[str, Any]):\n super().__init__(config)\n self._store = config.get(\"store\")\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n path = path.strip(\"/\")\n \n if path == \".meta/version\":\n from . import __version__\n return __version__\n \n if path == \".meta/stats\":\n if self._store and hasattr(self._store, 'stats'):\n return json.dumps(self._store.stats(), indent=2)\n return \"{}\"\n \n if path == \".meta/info\":\n from . import __version__\n import sys\n info = {\n \"version\": __version__,\n \"python\": sys.version.split()[0],\n }\n return json.dumps(info, indent=2)\n \n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n return False # Read-only\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n return False # Read-only\n \n def list(self, prefix: str, context: Dict[str, Any]) -> list:\n return [\"version\", \"stats\", \"info\"]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10604,"content_sha256":"112c6a03264b692939d2f63d285bb2028c52b76aa9c5784585383c1cee693cab"},{"filename":"avm/config.py","content":"\"\"\"\nvfs/config.py - Config-driven VFS\n\nSupports YAML configuration files with zero hardcoding.\n\"\"\"\n\nimport os\nimport re\nimport fnmatch\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, Any, Type, Callable\nimport yaml\n\n\n@dataclass\nclass ProviderSpec:\n \"\"\"Provider specification\"\"\"\n pattern: str # glob pattern, e.g. \"/trading/positions*\"\n type: str # provider type name\n ttl: int = 0 # TTL in seconds (0 = no expiry)\n config: Dict[str, Any] = field(default_factory=dict) # provider-specific config\n \n def matches(self, path: str) -> bool:\n \"\"\"Check if path matches this provider pattern\"\"\"\n return fnmatch.fnmatch(path, self.pattern)\n\n\n@dataclass \nclass PermissionRule:\n \"\"\"Permission rule\"\"\"\n pattern: str # glob pattern\n access: str = \"ro\" # \"ro\" | \"rw\" | \"none\"\n \n def matches(self, path: str) -> bool:\n return fnmatch.fnmatch(path, self.pattern)\n \n @property\n def can_read(self) -> bool:\n return self.access in (\"ro\", \"rw\")\n \n @property\n def can_write(self) -> bool:\n return self.access == \"rw\"\n\n\n@dataclass\nclass AVMConfig:\n \"\"\"\n VFS Configuration\n \n Loads from YAML file with environment variable expansion.\n \"\"\"\n providers: List[ProviderSpec] = field(default_factory=list)\n permissions: List[PermissionRule] = field(default_factory=list)\n db_path: str = \"\"\n default_ttl: int = 300\n \n # Default access if no matching rule\n default_access: str = \"ro\"\n\n # Embedding config (optional)\n embedding: Dict[str, Any] = field(default_factory=dict)\n \n # Decay/archive config (optional)\n decay: Dict[str, Any] = field(default_factory=dict)\n \n # Performance tuning (for ablation experiments)\n performance: Dict[str, Any] = field(default_factory=lambda: {\n \"wal_mode\": True, # SQLite WAL mode\n \"async_embedding\": True, # Async embedding indexing\n \"hot_cache\": True, # LRU hot cache\n \"cache_size\": 100, # Cache max size\n \"sync_mode\": \"NORMAL\", # SQLite sync mode (NORMAL/FULL/OFF)\n })\n \n @classmethod\n def from_yaml(cls, path: str) -> \"AVMConfig\":\n \"\"\"Load configuration from YAML file\"\"\"\n with open(path) as f:\n raw = f.read()\n \n # Expand env vars ${VAR} or $VAR\n def expand_env(match):\n var = match.group(1) or match.group(2)\n return os.environ.get(var, match.group(0))\n \n raw = re.sub(r'\\$\\{(\\w+)\\}|\\$(\\w+)', expand_env, raw)\n \n data = yaml.safe_load(raw)\n return cls.from_dict(data)\n \n @classmethod\n def from_dict(cls, data: Dict) -> \"AVMConfig\":\n \"\"\"Create configuration from dictionary\"\"\"\n providers = [\n ProviderSpec(\n pattern=p.get(\"pattern\", \"/*\"),\n type=p.get(\"type\", \"static\"),\n ttl=p.get(\"ttl\", 0),\n config=p.get(\"config\", {}),\n )\n for p in data.get(\"providers\", [])\n ]\n \n # Use default permissions if not specified in config\n raw_permissions = data.get(\"permissions\")\n if raw_permissions is None:\n # Default permissions for memory paths\n permissions = [\n PermissionRule(pattern=\"/memory/private/*\", access=\"rw\"),\n PermissionRule(pattern=\"/memory/shared/*\", access=\"rw\"),\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n PermissionRule(pattern=\"/shared/*\", access=\"rw\"),\n PermissionRule(pattern=\"/task/*\", access=\"rw\"),\n PermissionRule(pattern=\"/trash/*\", access=\"rw\"),\n PermissionRule(pattern=\"/archive/*\", access=\"rw\"),\n PermissionRule(pattern=\"/snapshots/*\", access=\"rw\"),\n PermissionRule(pattern=\"/live/*\", access=\"ro\"),\n PermissionRule(pattern=\"/research/*\", access=\"ro\"),\n ]\n else:\n permissions = [\n PermissionRule(\n pattern=p.get(\"pattern\", \"/*\"),\n access=p.get(\"access\", \"ro\"),\n )\n for p in raw_permissions\n ]\n \n return cls(\n providers=providers,\n permissions=permissions,\n db_path=data.get(\"db_path\", \"\"),\n default_ttl=data.get(\"default_ttl\", 300),\n default_access=data.get(\"default_access\", \"ro\"),\n embedding=data.get(\"embedding\", {}),\n decay=data.get(\"decay\", {}),\n performance=data.get(\"performance\", {\n \"wal_mode\": True,\n \"async_embedding\": True,\n \"hot_cache\": True,\n \"cache_size\": 100,\n \"sync_mode\": \"NORMAL\",\n }),\n )\n \n def to_dict(self) -> Dict:\n \"\"\"Export as dictionary\"\"\"\n return {\n \"providers\": [\n {\"pattern\": p.pattern, \"type\": p.type, \"ttl\": p.ttl, \"config\": p.config}\n for p in self.providers\n ],\n \"permissions\": [\n {\"pattern\": p.pattern, \"access\": p.access}\n for p in self.permissions\n ],\n \"db_path\": self.db_path,\n \"default_ttl\": self.default_ttl,\n \"default_access\": self.default_access,\n }\n \n def get_provider_spec(self, path: str) -> Optional[ProviderSpec]:\n \"\"\"Get provider spec matching path\"\"\"\n for spec in self.providers:\n if spec.matches(path):\n return spec\n return None\n \n def check_permission(self, path: str, action: str = \"read\") -> bool:\n \"\"\"\n Check path permission\n \n Args:\n path: path\n action: \"read\" | \"write\"\n \"\"\"\n for rule in self.permissions:\n if rule.matches(path):\n if action == \"read\":\n return rule.can_read\n elif action == \"write\":\n return rule.can_write\n return False\n \n # Default permission\n if action == \"read\":\n return self.default_access in (\"ro\", \"rw\")\n elif action == \"write\":\n return self.default_access == \"rw\"\n return False\n\n\n# Default configuration (backward compatible)\nDEFAULT_CONFIG = AVMConfig(\n providers=[\n ProviderSpec(pattern=\"/live/positions*\", type=\"alpaca_positions\", ttl=60),\n ProviderSpec(pattern=\"/live/orders*\", type=\"alpaca_orders\", ttl=30),\n ProviderSpec(pattern=\"/live/indicators/*\", type=\"technical_indicators\", ttl=300),\n ProviderSpec(pattern=\"/live/news/*\", type=\"news\", ttl=600),\n ProviderSpec(pattern=\"/live/watchlist*\", type=\"watchlist\", ttl=300),\n ],\n permissions=[\n PermissionRule(pattern=\"/memory/private/*\", access=\"rw\"),\n PermissionRule(pattern=\"/memory/shared/*\", access=\"rw\"),\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n PermissionRule(pattern=\"/snapshots/*\", access=\"rw\"),\n PermissionRule(pattern=\"/live/*\", access=\"ro\"),\n PermissionRule(pattern=\"/research/*\", access=\"ro\"),\n ],\n default_access=\"ro\",\n)\n\n\ndef load_config(config_path: str = None) -> AVMConfig:\n \"\"\"\n Load configuration\n \n Priority:\n 1. Specified path\n 2. Environment variable VFS_CONFIG\n 3. ~/.avm/config.yaml\n 4. Default configuration\n \"\"\"\n paths_to_try = []\n \n if config_path:\n paths_to_try.append(config_path)\n \n if os.environ.get(\"VFS_CONFIG\"):\n paths_to_try.append(os.environ[\"VFS_CONFIG\"])\n \n paths_to_try.append(str(Path.home() / \".avm\" / \"config.yaml\"))\n paths_to_try.append(str(Path.home() / \".openclaw\" / \"vfs\" / \"config.yaml\"))\n \n for path in paths_to_try:\n if os.path.exists(path):\n return AVMConfig.from_yaml(path)\n \n return DEFAULT_CONFIG\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8074,"content_sha256":"a1bf8defefd0c5e6b2db691391ac273cdd00cd7c87bcab5bb1e8cd799e888b1c"},{"filename":"avm/consolidation.py","content":"\"\"\"\navm/consolidation.py - Memory Consolidation\n\nLike human sleep consolidation:\n- Merge similar memories\n- Extract common themes into summaries\n- Decay old memory importance\n- Can run as scheduled cron job\n\nUsage:\n consolidator = MemoryConsolidator(avm)\n consolidator.run() # Full consolidation\n consolidator.decay_importance() # Just decay\n consolidator.merge_similar() # Just merge\n\"\"\"\n\nimport math\nfrom dataclasses import dataclass\nfrom datetime import datetime, timedelta\nfrom typing import List, Dict, Set, Optional, Tuple\nfrom collections import defaultdict\n\nfrom .store import AVMStore\nfrom .node import AVMNode\nfrom .topic_index import TopicIndex\nfrom .utils import utcnow\n\n\n@dataclass\nclass ConsolidationConfig:\n \"\"\"Configuration for memory consolidation\"\"\"\n # Importance decay\n decay_half_life_days: float = 30.0 # Importance halves every 30 days\n min_importance: float = 0.1 # Floor for importance decay\n \n # Similarity merging\n similarity_threshold: float = 0.8 # Jaccard threshold for merging\n min_age_for_merge_days: float = 7.0 # Don't merge recent memories\n \n # Summary extraction\n min_cluster_size: int = 3 # Minimum memories to form a cluster\n max_summary_length: int = 500 # Characters per summary\n \n # Clustering\n cluster_min_similarity: float = 0.3 # Minimum similarity for clustering\n max_clusters: int = 20 # Maximum number of clusters to create\n\n\n@dataclass\nclass MemoryCluster:\n \"\"\"A cluster of related memories\"\"\"\n id: str\n topic: str\n memories: List[str] # paths\n centroid_topics: Set[str]\n avg_importance: float\n created_at: datetime\n summary: str = \"\"\n\n\n@dataclass\nclass ConsolidationResult:\n \"\"\"Result of a consolidation run\"\"\"\n memories_processed: int = 0\n importance_decayed: int = 0\n memories_merged: int = 0\n summaries_created: int = 0\n duration_ms: float = 0.0\n\n\nclass MemoryConsolidator:\n \"\"\"\n Consolidates agent memories to reduce noise and extract patterns.\n \n Inspired by sleep consolidation in biological memory systems.\n \"\"\"\n \n def __init__(self, store: AVMStore, topic_index: TopicIndex = None,\n config: ConsolidationConfig = None):\n self.store = store\n self.topic_index = topic_index or TopicIndex(store)\n self.config = config or ConsolidationConfig()\n \n def run(self, agent_id: str = None, dry_run: bool = False) -> ConsolidationResult:\n \"\"\"\n Run full consolidation.\n \n Args:\n agent_id: Consolidate specific agent, or all if None\n dry_run: If True, don't actually modify anything\n \n Returns:\n ConsolidationResult with stats\n \"\"\"\n import time\n start = time.perf_counter()\n \n result = ConsolidationResult()\n \n # Get memories to process\n prefix = f\"/memory/private/{agent_id}\" if agent_id else \"/memory\"\n memories = self._get_memories(prefix)\n result.memories_processed = len(memories)\n \n if not dry_run:\n # Step 1: Decay importance\n decayed = self.decay_importance(memories)\n result.importance_decayed = decayed\n \n # Step 2: Merge similar memories\n merged = self.merge_similar(memories)\n result.memories_merged = merged\n \n # Step 3: Extract summaries from clusters\n summaries = self.extract_summaries(memories)\n result.summaries_created = summaries\n \n result.duration_ms = (time.perf_counter() - start) * 1000\n return result\n \n def _get_memories(self, prefix: str) -> List[AVMNode]:\n \"\"\"Get all memories under a prefix\"\"\"\n return self.store.list_nodes(prefix, limit=10000)\n \n def decay_importance(self, memories: List[AVMNode] = None) -> int:\n \"\"\"\n Apply time-based importance decay.\n \n Importance decays exponentially: I(t) = I(0) * 0.5^(t/half_life)\n \"\"\"\n if memories is None:\n memories = self._get_memories(\"/memory\")\n \n now = utcnow()\n decayed_count = 0\n \n for mem in memories:\n # Handle timezone-aware and naive datetimes\n mem_time = mem.updated_at\n if mem_time.tzinfo is None:\n from datetime import timezone\n mem_time = mem_time.replace(tzinfo=timezone.utc)\n now_aware = now if now.tzinfo else now.replace(tzinfo=timezone.utc)\n age_days = (now_aware - mem_time).total_seconds() / 86400\n current_importance = mem.meta.get(\"importance\", 0.5)\n \n # Calculate decayed importance\n decay_factor = 0.5 ** (age_days / self.config.decay_half_life_days)\n new_importance = max(\n current_importance * decay_factor,\n self.config.min_importance\n )\n \n # Only update if significant change\n if abs(new_importance - current_importance) > 0.01:\n mem.meta[\"importance\"] = round(new_importance, 3)\n mem.meta[\"last_decay\"] = now.isoformat()\n self.store.put_node(mem)\n decayed_count += 1\n \n return decayed_count\n \n def merge_similar(self, memories: List[AVMNode] = None) -> int:\n \"\"\"\n Merge similar memories into consolidated versions.\n \n Uses topic overlap (Jaccard similarity) to find candidates.\n \"\"\"\n if memories is None:\n memories = self._get_memories(\"/memory\")\n \n now = utcnow()\n min_age = timedelta(days=self.config.min_age_for_merge_days)\n merged_count = 0\n \n # Filter to old enough memories\n eligible = [m for m in memories \n if (now - m.updated_at) > min_age]\n \n # Build topic sets for each memory\n mem_topics: Dict[str, Set[str]] = {}\n for mem in eligible:\n topics = set(self.topic_index.topics_for_path(mem.path))\n if topics:\n mem_topics[mem.path] = topics\n \n # Find similar pairs\n merged_paths: Set[str] = set()\n \n for path1, topics1 in mem_topics.items():\n if path1 in merged_paths:\n continue\n \n similar = []\n for path2, topics2 in mem_topics.items():\n if path2 == path1 or path2 in merged_paths:\n continue\n \n # Jaccard similarity\n intersection = len(topics1 & topics2)\n union = len(topics1 | topics2)\n if union > 0:\n similarity = intersection / union\n if similarity >= self.config.similarity_threshold:\n similar.append(path2)\n \n if similar:\n # Merge similar memories into path1\n mem1 = self.store.get_node(path1)\n if mem1:\n merged_content = [mem1.content or \"\"]\n merged_importance = mem1.meta.get(\"importance\", 0.5)\n \n for path2 in similar:\n mem2 = self.store.get_node(path2)\n if mem2:\n merged_content.append(mem2.content or \"\")\n merged_importance = max(\n merged_importance,\n mem2.meta.get(\"importance\", 0.5)\n )\n # Mark as merged (soft delete)\n mem2.meta[\"merged_into\"] = path1\n mem2.meta[\"merged_at\"] = now.isoformat()\n self.store.put_node(mem2)\n merged_paths.add(path2)\n \n # Update consolidated memory\n mem1.content = \"\\n\\n---\\n\\n\".join(merged_content)\n mem1.meta[\"importance\"] = merged_importance\n mem1.meta[\"consolidated_from\"] = list(similar)\n mem1.meta[\"consolidated_at\"] = now.isoformat()\n self.store.put_node(mem1)\n \n # Re-index topics\n self.topic_index.index_path(path1, mem1.content)\n \n merged_count += len(similar)\n \n return merged_count\n \n def extract_summaries(self, memories: List[AVMNode] = None) -> int:\n \"\"\"\n Extract topic summaries from memory clusters.\n \n Groups memories by topic and creates summary nodes.\n \"\"\"\n if memories is None:\n memories = self._get_memories(\"/memory\")\n \n # Group by topic\n topic_memories: Dict[str, List[AVMNode]] = defaultdict(list)\n \n for mem in memories:\n topics = self.topic_index.topics_for_path(mem.path)\n for topic in topics:\n topic_memories[topic].append(mem)\n \n summaries_created = 0\n now = utcnow()\n \n for topic, mems in topic_memories.items():\n if len(mems) \u003c self.config.min_cluster_size:\n continue\n \n # Check if summary already exists and is recent\n summary_path = f\"/memory/summaries/{topic}.md\"\n existing = self.store.get_node(summary_path)\n if existing:\n age = (now - existing.updated_at).total_seconds() / 86400\n if age \u003c 7: # Don't re-summarize within a week\n continue\n \n # Create summary\n summary_content = self._create_summary(topic, mems)\n \n summary_node = AVMNode(\n path=summary_path,\n content=summary_content,\n meta={\n \"type\": \"summary\",\n \"topic\": topic,\n \"source_count\": len(mems),\n \"generated_at\": now.isoformat(),\n \"importance\": 0.8, # Summaries are important\n }\n )\n self.store.put_node(summary_node)\n self.topic_index.index_path(summary_path, summary_content, topic)\n summaries_created += 1\n \n return summaries_created\n \n def _create_summary(self, topic: str, memories: List[AVMNode]) -> str:\n \"\"\"Create a summary from a list of memories\"\"\"\n # Simple extractive summary: first sentence of each memory\n lines = [f\"# Summary: {topic.title()}\\n\"]\n lines.append(f\"*Generated from {len(memories)} memories*\\n\")\n \n for mem in sorted(memories, \n key=lambda m: m.meta.get(\"importance\", 0.5),\n reverse=True)[:10]:\n content = mem.content or \"\"\n # First sentence or first 100 chars\n first_sentence = content.split(\".\")[0][:100]\n if first_sentence:\n lines.append(f\"- {first_sentence.strip()}\")\n \n return \"\\n\".join(lines)[:self.config.max_summary_length]\n\n\n def cluster_memories(self, memories: List[AVMNode] = None) -> List[MemoryCluster]:\n \"\"\"\n Cluster memories by topic similarity using agglomerative clustering.\n \n Returns a list of MemoryCluster objects.\n \"\"\"\n if memories is None:\n memories = self._get_memories(\"/memory\")\n \n # Build topic vectors for each memory\n mem_topics: Dict[str, Set[str]] = {}\n for mem in memories:\n topics = set(self.topic_index.topics_for_path(mem.path))\n if topics:\n mem_topics[mem.path] = topics\n \n if len(mem_topics) \u003c 2:\n return []\n \n # Build similarity matrix\n paths = list(mem_topics.keys())\n n = len(paths)\n \n def jaccard(s1: Set[str], s2: Set[str]) -> float:\n if not s1 or not s2:\n return 0.0\n return len(s1 & s2) / len(s1 | s2)\n \n # Simple agglomerative clustering\n # Start with each memory as its own cluster\n clusters: List[Set[int]] = [{i} for i in range(n)]\n cluster_topics: List[Set[str]] = [mem_topics[paths[i]] for i in range(n)]\n \n # Merge until we hit max_clusters or no similar pairs\n while len(clusters) > 1:\n # Find most similar pair\n best_sim = 0.0\n best_pair = (-1, -1)\n \n for i in range(len(clusters)):\n for j in range(i + 1, len(clusters)):\n sim = jaccard(cluster_topics[i], cluster_topics[j])\n if sim > best_sim:\n best_sim = sim\n best_pair = (i, j)\n \n if best_sim \u003c self.config.cluster_min_similarity:\n break\n \n # Merge clusters\n i, j = best_pair\n clusters[i] = clusters[i] | clusters[j]\n cluster_topics[i] = cluster_topics[i] | cluster_topics[j]\n del clusters[j]\n del cluster_topics[j]\n \n # Convert to MemoryCluster objects\n result = []\n now = utcnow()\n \n for idx, (cluster_indices, topics) in enumerate(zip(clusters, cluster_topics)):\n if len(cluster_indices) \u003c self.config.min_cluster_size:\n continue\n \n cluster_paths = [paths[i] for i in cluster_indices]\n \n # Calculate average importance\n total_importance = 0.0\n for path in cluster_paths:\n node = self.store.get_node(path)\n if node:\n total_importance += node.meta.get(\"importance\", 0.5)\n avg_importance = total_importance / len(cluster_paths) if cluster_paths else 0.5\n \n # Find dominant topic (most frequent across cluster)\n topic_counts: Dict[str, int] = defaultdict(int)\n for path in cluster_paths:\n for t in mem_topics.get(path, set()):\n topic_counts[t] += 1\n \n dominant_topic = max(topic_counts.keys(), key=lambda t: topic_counts[t]) if topic_counts else \"misc\"\n \n cluster = MemoryCluster(\n id=f\"cluster-{idx}\",\n topic=dominant_topic,\n memories=cluster_paths,\n centroid_topics=topics,\n avg_importance=avg_importance,\n created_at=now,\n )\n result.append(cluster)\n \n # Sort by importance\n result.sort(key=lambda c: c.avg_importance, reverse=True)\n \n return result\n \n def generate_cluster_summaries(self, clusters: List[MemoryCluster] = None) -> int:\n \"\"\"\n Generate summaries for memory clusters.\n \n Returns number of summaries created.\n \"\"\"\n if clusters is None:\n memories = self._get_memories(\"/memory\")\n clusters = self.cluster_memories(memories)\n \n now = utcnow()\n created = 0\n \n for cluster in clusters:\n # Load memory contents\n contents = []\n for path in cluster.memories[:10]: # Limit to top 10\n node = self.store.get_node(path)\n if node and node.content:\n contents.append(node.content)\n \n if not contents:\n continue\n \n # Generate summary\n summary_lines = [\n f\"# Cluster Summary: {cluster.topic.title()}\",\n f\"*{len(cluster.memories)} memories, avg importance: {cluster.avg_importance:.2f}*\",\n f\"*Topics: {', '.join(sorted(cluster.centroid_topics)[:10])}*\",\n \"\",\n \"## Key Points:\",\n ]\n \n # Extract key sentences from each memory\n for content in contents:\n # Get first meaningful sentence\n sentences = content.replace(\"\\n\", \" \").split(\".\")\n for sent in sentences:\n sent = sent.strip()\n if len(sent) > 20: # Skip too short\n summary_lines.append(f\"- {sent[:150]}\")\n break\n \n cluster.summary = \"\\n\".join(summary_lines)[:self.config.max_summary_length]\n \n # Save summary\n summary_path = f\"/memory/clusters/{cluster.topic}.md\"\n summary_node = AVMNode(\n path=summary_path,\n content=cluster.summary,\n meta={\n \"type\": \"cluster_summary\",\n \"cluster_id\": cluster.id,\n \"topic\": cluster.topic,\n \"source_count\": len(cluster.memories),\n \"source_paths\": cluster.memories[:20],\n \"centroid_topics\": list(cluster.centroid_topics)[:20],\n \"avg_importance\": cluster.avg_importance,\n \"generated_at\": now.isoformat(),\n \"importance\": min(cluster.avg_importance + 0.2, 1.0),\n }\n )\n self.store.put_node(summary_node)\n self.topic_index.index_path(summary_path, cluster.summary, cluster.topic)\n created += 1\n \n return created\n\n\ndef generate_digest(\n store: AVMStore,\n topic_index: TopicIndex = None,\n agent_id: str = None,\n days: int = 1,\n max_items: int = 10,\n) -> str:\n \"\"\"\n Generate a memory digest for recent activity.\n \n Returns a markdown summary of what happened recently.\n \n Args:\n store: AVM store instance\n topic_index: Optional topic index for better categorization\n agent_id: Filter by agent (optional)\n days: How many days back to look\n max_items: Maximum items per category\n \n Returns:\n Markdown formatted digest\n \"\"\"\n from collections import defaultdict\n \n now = utcnow()\n cutoff = now - timedelta(days=days)\n \n # Get recent memories\n prefix = f\"/memory/private/{agent_id}\" if agent_id else \"/memory\"\n memories = []\n \n for node in store.list_nodes(prefix=prefix, limit=1000):\n if node and node.updated_at:\n # Handle timezone\n node_time = node.updated_at\n if node_time.tzinfo is None:\n from datetime import timezone\n node_time = node_time.replace(tzinfo=timezone.utc)\n cutoff_aware = cutoff if cutoff.tzinfo else cutoff.replace(tzinfo=timezone.utc)\n \n if node_time >= cutoff_aware:\n memories.append(node)\n \n if not memories:\n return f\"# Memory Digest\\n\\n*No activity in the last {days} day(s).*\"\n \n # Sort by time (most recent first)\n memories.sort(key=lambda n: n.updated_at, reverse=True)\n \n # Categorize by topic if possible\n categorized: Dict[str, List[AVMNode]] = defaultdict(list)\n \n if topic_index:\n for mem in memories:\n topics = topic_index.topics_for_path(mem.path)\n if topics:\n # Use first topic as category\n categorized[topics[0]].append(mem)\n else:\n categorized[\"uncategorized\"].append(mem)\n else:\n categorized[\"all\"] = memories\n \n # Build digest\n lines = [\n f\"# Memory Digest\",\n f\"*{len(memories)} memories from the last {days} day(s)*\",\n f\"*Generated: {now.strftime('%Y-%m-%d %H:%M UTC')}*\",\n \"\",\n ]\n \n # Statistics\n total_importance = sum(m.meta.get(\"importance\", 0.5) for m in memories)\n avg_importance = total_importance / len(memories) if memories else 0\n \n lines.extend([\n \"## Summary\",\n f\"- Total memories: {len(memories)}\",\n f\"- Categories: {len(categorized)}\",\n f\"- Avg importance: {avg_importance:.2f}\",\n \"\",\n ])\n \n # Category breakdown\n lines.append(\"## By Topic\")\n \n for topic in sorted(categorized.keys(), key=lambda t: len(categorized[t]), reverse=True):\n mems = categorized[topic][:max_items]\n lines.append(f\"\\n### {topic.title()} ({len(categorized[topic])})\")\n \n for mem in mems:\n importance = mem.meta.get(\"importance\", 0.5)\n # Get first line of content as preview\n preview = (mem.content or \"\").split(\"\\n\")[0][:80]\n if len(mem.content or \"\") > 80:\n preview += \"...\"\n lines.append(f\"- [{importance:.1f}] {preview}\")\n \n # High importance highlights\n high_importance = [m for m in memories if m.meta.get(\"importance\", 0.5) >= 0.8]\n if high_importance:\n lines.extend([\n \"\",\n \"## 🔥 High Importance\",\n ])\n for mem in high_importance[:5]:\n importance = mem.meta.get(\"importance\", 0.5)\n preview = (mem.content or \"\").split(\"\\n\")[0][:100]\n lines.append(f\"- **[{importance:.1f}]** {preview}\")\n \n return \"\\n\".join(lines)\n\n\ndef schedule_consolidation(store: AVMStore, interval_hours: int = 24):\n \"\"\"\n Schedule periodic consolidation.\n \n Can be called from a cron job or background thread.\n \"\"\"\n import threading\n import time\n \n def _consolidation_loop():\n consolidator = MemoryConsolidator(store)\n while True:\n try:\n result = consolidator.run()\n print(f\"[Consolidation] Processed {result.memories_processed}, \"\n f\"decayed {result.importance_decayed}, \"\n f\"merged {result.memories_merged}, \"\n f\"summaries {result.summaries_created}\")\n except Exception as e:\n print(f\"[Consolidation] Error: {e}\")\n \n time.sleep(interval_hours * 3600)\n \n thread = threading.Thread(target=_consolidation_loop, daemon=True)\n thread.start()\n return thread\n","content_type":"text/x-python; charset=utf-8","language":"python","size":22187,"content_sha256":"d100974166df4e104cdffc2e054571362d1018892ffc4a282e9fc5e90567cfc6"},{"filename":"avm/core.py","content":"\"\"\"\navm/core.py - AVM core class\n\nConfig-driven virtual filesystem\n\"\"\"\n\nfrom typing import Dict, List, Optional, Type, Callable, Any, Tuple\nfrom pathlib import Path\n\nfrom .config import AVMConfig, ProviderSpec, load_config\nfrom .store import AVMStore\nfrom .node import AVMNode, NodeType\nfrom .graph import EdgeType\n\n\nclass ProviderRegistry:\n \"\"\"\n Provider registertable\n \n Manage provider type name -> provider class mapping\n \"\"\"\n \n def __init__(self):\n self._types: Dict[str, Type] = {}\n self._factories: Dict[str, Callable] = {}\n \n def register(self, name: str, provider_class: Type = None, \n factory: Callable = None):\n \"\"\"\n register provider type\n \n Args:\n name: type name\n provider_class: Provider class\n factory: factory function (store, spec) -> Provider\n \"\"\"\n if provider_class:\n self._types[name] = provider_class\n if factory:\n self._factories[name] = factory\n \n def create(self, name: str, store: AVMStore, \n spec: ProviderSpec) -> Optional[Any]:\n \"\"\"create provider instance\"\"\"\n if name in self._factories:\n return self._factories[name](store, spec)\n \n if name in self._types:\n cls = self._types[name]\n return cls(store, spec.pattern, spec.ttl, **spec.config)\n \n return None\n \n def list_types(self) -> List[str]:\n \"\"\"List all registered types\"\"\"\n return list(set(self._types.keys()) | set(self._factories.keys()))\n\n\n# Global registration table\n_registry = ProviderRegistry()\n\n\ndef register_provider_type(name: str, provider_class: Type = None,\n factory: Callable = None):\n \"\"\"Register provider type (global)\"\"\"\n _registry.register(name, provider_class, factory)\n\n\nclass AVM:\n \"\"\"\n Virtual filesystem\n \n Config-driven, supports:\n - Dynamic provider registration\n - Configurable permission rules\n - TTL cache\n - relationgraph\n \"\"\"\n \n def __init__(self, config: AVMConfig = None, config_path: str = None,\n agent_id: str = None):\n \"\"\"\n Args:\n config: AVMConfig instance\n config_path: Configuration file path\n agent_id: Current agent ID for private space isolation\n \"\"\"\n if config:\n self.config = config\n else:\n self.config = load_config(config_path)\n \n # Agent isolation\n self.agent_id = agent_id\n \n # initializestorage\n db_path = self.config.db_path or None\n self.store = AVMStore(db_path)\n \n # Provider instancecache\n self._providers: Dict[str, Any] = {}\n \n # useGlobal registration table\n self._registry = _registry\n \n # Performance settings (for ablation experiments)\n perf_cfg = getattr(self.config, 'performance', None) or {}\n self._hot_cache_enabled = perf_cfg.get('hot_cache', True) if isinstance(perf_cfg, dict) else True\n self._async_embedding = perf_cfg.get('async_embedding', True) if isinstance(perf_cfg, dict) else True\n \n # Hot memory cache (LRU)\n cache_cfg = getattr(self.config, 'cache', None) or {}\n cache_size = perf_cfg.get('cache_size', 100) if isinstance(perf_cfg, dict) else 100\n self._cache_max_size = cache_cfg.get('max_size', cache_size) if isinstance(cache_cfg, dict) else cache_size\n self._cache: Dict[str, AVMNode] = {}\n self._cache_order: list = [] # For LRU eviction\n \n # Configure store performance\n wal_mode = perf_cfg.get('wal_mode', True) if isinstance(perf_cfg, dict) else True\n sync_mode = perf_cfg.get('sync_mode', 'NORMAL') if isinstance(perf_cfg, dict) else 'NORMAL'\n self.store.configure_performance(wal_mode=wal_mode, sync_mode=sync_mode)\n \n # registerbuilt-in provider type\n self._register_builtin_providers()\n\n # Auto-enable embedding if configured\n self._embedding_store = None\n emb_cfg = getattr(self.config, 'embedding', None) or {}\n if isinstance(emb_cfg, dict) and emb_cfg.get('enabled'):\n try:\n from .embedding import LocalEmbedding, OpenAIEmbedding, EmbeddingStore\n backend_type = emb_cfg.get('backend', 'local')\n model = emb_cfg.get('model', 'all-MiniLM-L6-v2')\n if backend_type == 'local':\n backend = LocalEmbedding(model)\n else:\n backend = OpenAIEmbedding(model)\n self._embedding_store = EmbeddingStore(self.store, backend)\n self._auto_index_embedding = emb_cfg.get('auto_index', True)\n # Background index all existing nodes\n import threading\n def _bg_embed_all(es=self._embedding_store):\n try:\n es.embeend_all(\"/\")\n except Exception:\n pass\n threading.Thread(target=_bg_embed_all, daemon=True).start()\n except Exception:\n pass # silently skip if sentence-transformers not installed\n \n def _resolve_path(self, path: str) -> str:\n \"\"\"Resolve /private/... shorthand to /memory/private/{agent_id}/...\n\n When an agent accesses /private/foo, it automatically maps to\n /memory/private/{agent_id}/foo so agent_id never needs to be typed\n explicitly. Paths not starting with /private/ pass through unchanged.\n \"\"\"\n if self.agent_id and path.startswith(\"/private/\"):\n return f\"/memory/private/{self.agent_id}/{path[len('/private/'):]}\"\n if self.agent_id and path == \"/private\":\n return f\"/memory/private/{self.agent_id}\"\n return path\n\n def _check_private_access(self, path: str) -> bool:\n \"\"\"Check if current agent can access path (private space isolation)\"\"\"\n if not self.agent_id:\n return True # No agent context = admin mode\n \n # Check private space pattern: /memory/private/{agent_id}/...\n if path.startswith(\"/memory/private/\"):\n parts = path.split(\"/\")\n if len(parts) >= 4:\n path_agent = parts[3]\n if path_agent != self.agent_id:\n return False\n return True\n \n def _register_builtin_providers(self):\n \"\"\"registerbuilt-in provider\"\"\"\n from .providers import (\n AlpacaPositionsProvider, AlpacaOrdersProvider,\n TechnicalIndicatorsProvider, NewsProvider,\n WatchlistProvider, MemoryProvider,\n )\n \n # Alpaca (requires config)\n def create_alpaca_positions(store, spec):\n config = spec.config\n if not config.get(\"api_key\"):\n # tryfrom env_file load\n env_file = config.get(\"env_file\", \"\")\n if env_file:\n env_path = Path(env_file).expanduser()\n if env_path.exists():\n env = dict(\n line.split(\"=\", 1)\n for line in env_path.read_text().splitlines()\n if \"=\" in line and not line.startswith(\"#\")\n )\n config = {**config, **{\n \"api_key\": env.get(\"ALPACA_API_KEY\", \"\"),\n \"secret_key\": env.get(\"ALPACA_SECRET_KEY\", \"\"),\n \"base_url\": env.get(\"ALPACA_BASE_URL\", \"https://paper-api.alpaca.markets\"),\n }}\n \n return AlpacaPositionsProvider(\n store,\n api_key=config.get(\"api_key\", \"\"),\n secret_key=config.get(\"secret_key\", \"\"),\n base_url=config.get(\"base_url\", \"https://paper-api.alpaca.markets\"),\n ttl_seconds=spec.ttl or 60,\n )\n \n def create_alpaca_orders(store, spec):\n config = spec.config\n env_file = config.get(\"env_file\", \"\")\n if env_file and not config.get(\"api_key\"):\n env_path = Path(env_file).expanduser()\n if env_path.exists():\n env = dict(\n line.split(\"=\", 1)\n for line in env_path.read_text().splitlines()\n if \"=\" in line and not line.startswith(\"#\")\n )\n config = {**config, **{\n \"api_key\": env.get(\"ALPACA_API_KEY\", \"\"),\n \"secret_key\": env.get(\"ALPACA_SECRET_KEY\", \"\"),\n \"base_url\": env.get(\"ALPACA_BASE_URL\", \"https://paper-api.alpaca.markets\"),\n }}\n \n return AlpacaOrdersProvider(\n store,\n api_key=config.get(\"api_key\", \"\"),\n secret_key=config.get(\"secret_key\", \"\"),\n base_url=config.get(\"base_url\", \"https://paper-api.alpaca.markets\"),\n ttl_seconds=spec.ttl or 30,\n )\n \n self._registry.register(\"alpaca_positions\", factory=create_alpaca_positions)\n self._registry.register(\"alpaca_orders\", factory=create_alpaca_orders)\n \n # Providers that need no config\n self._registry.register(\"technical_indicators\", TechnicalIndicatorsProvider)\n self._registry.register(\"news\", NewsProvider)\n self._registry.register(\"watchlist\", WatchlistProvider)\n self._registry.register(\"memory\", MemoryProvider)\n \n def register_provider_type(self, name: str, provider_class: Type = None,\n factory: Callable = None):\n \"\"\"registercustom provider type\"\"\"\n self._registry.register(name, provider_class, factory)\n \n def _cache_get(self, path: str) -> Optional[AVMNode]:\n \"\"\"Get from hot cache\"\"\"\n if path in self._cache:\n # Move to end (most recent)\n if path in self._cache_order:\n self._cache_order.remove(path)\n self._cache_order.append(path)\n return self._cache[path]\n return None\n \n def _cache_put(self, node: AVMNode):\n \"\"\"Put into hot cache\"\"\"\n path = node.path\n if path in self._cache:\n self._cache_order.remove(path)\n self._cache[path] = node\n self._cache_order.append(path)\n # Evict if over limit\n while len(self._cache) > self._cache_max_size:\n oldest = self._cache_order.pop(0)\n self._cache.pop(oldest, None)\n \n def _cache_invalidate(self, path: str):\n \"\"\"Invalidate cache entry\"\"\"\n self._cache.pop(path, None)\n if path in self._cache_order:\n self._cache_order.remove(path)\n \n def _get_provider(self, path: str) -> Optional[Any]:\n \"\"\"Get or create provider for path\"\"\"\n spec = self.config.get_provider_spec(path)\n if not spec:\n return None\n \n # cache key\n cache_key = f\"{spec.type}:{spec.pattern}\"\n \n if cache_key not in self._providers:\n provider = self._registry.create(spec.type, self.store, spec)\n if provider:\n self._providers[cache_key] = provider\n \n return self._providers.get(cache_key)\n \n # ─── Read/Write Interface ─────────────────────────────────────────\n \n def read(self, path: str, force_refresh: bool = False) -> Optional[AVMNode]:\n \"\"\"\n readnode\n \n 1. Check read permission\n 2. Find provider\n 3. Fetch via provider (with TTL cache)\n 4. Or read directly from store\n \"\"\"\n path = self._resolve_path(path)\n # Virtual paths for skill discovery\n if path == \"/:handlers\":\n from .handlers import get_handlers_skill_info\n return AVMNode(\n path=path,\n content=get_handlers_skill_info(),\n node_type=NodeType.FILE,\n )\n \n if path.startswith(\"/:handlers/\"):\n from .handlers import HANDLERS\n handler_name = path.split(\"/\")[-1]\n if handler_name in HANDLERS:\n handler_class = HANDLERS[handler_name]\n return AVMNode(\n path=path,\n content=handler_class.skill_info(),\n node_type=NodeType.FILE,\n )\n else:\n available = \", \".join(HANDLERS.keys())\n return AVMNode(\n path=path,\n content=f\"Handler '{handler_name}' not found.\\n\\nAvailable: {available}\",\n node_type=NodeType.FILE,\n )\n \n if not self.config.check_permission(path, \"read\"):\n raise PermissionError(f\"No read permission for {path}\")\n \n if not self._check_private_access(path):\n raise PermissionError(f\"Agent {self.agent_id} cannot access {path}\")\n \n # Check hot cache first (if enabled and not forcing refresh)\n if self._hot_cache_enabled and not force_refresh:\n cached = self._cache_get(path)\n if cached:\n return cached\n \n provider = self._get_provider(path)\n if provider:\n node = provider.get(path, force_refresh=force_refresh)\n if node and self._hot_cache_enabled:\n self._cache_put(node)\n return node\n \n node = self.store.get_node(path)\n if node and self._hot_cache_enabled:\n self._cache_put(node)\n return node\n \n def write(self, path: str, content: str, \n meta: Dict = None) -> AVMNode:\n \"\"\"\n writenode\n \n 1. Check write permission\n 2. Create or update node\n \"\"\"\n path = self._resolve_path(path)\n if not self.config.check_permission(path, \"write\"):\n raise PermissionError(f\"No write permission for {path}\")\n \n if not self._check_private_access(path):\n raise PermissionError(f\"Agent {self.agent_id} cannot write to {path}\")\n \n node = AVMNode(\n path=path,\n content=content,\n meta=meta or {},\n node_type=NodeType.FILE,\n )\n \n result = self.store.put_node(node)\n \n # Invalidate and update cache\n if self._hot_cache_enabled:\n self._cache_invalidate(path)\n self._cache_put(result)\n\n # Auto-index for semantic search\n if self._embedding_store and getattr(self, '_auto_index_embedding', False):\n if self._async_embedding:\n # Async: don't block writes\n import threading\n def _async_embed(es=self._embedding_store, node=result):\n try:\n es.embeend_node(node)\n except Exception:\n pass\n threading.Thread(target=_async_embed, daemon=True).start()\n else:\n # Sync: block until embedding complete\n try:\n self._embedding_store.embeend_node(result)\n except Exception:\n pass\n \n # Trigger subscription notifications\n try:\n from .subscriptions import get_subscription_store\n sub_store = get_subscription_store()\n sub_store.on_write(path, author=self.agent_id)\n except Exception:\n pass # non-fatal\n\n # Auto-index for TopicIndex (async)\n try:\n if not hasattr(self, '_topic_index'):\n from .topic_index import TopicIndex\n self._topic_index = TopicIndex(self.store)\n \n title = meta.get(\"title\", \"\") if meta else \"\"\n import threading\n def _async_topic_index():\n try:\n self._topic_index.index_path(path, content, title)\n except Exception:\n pass\n threading.Thread(target=_async_topic_index, daemon=True).start()\n except Exception:\n pass # non-fatal\n\n return result\n \n def delete(self, path: str, hard: bool = False) -> bool:\n \"\"\"Delete node (soft delete to /trash/ by default)\"\"\"\n path = self._resolve_path(path)\n if not self.config.check_permission(path, \"write\"):\n raise PermissionError(f\"No write permission for {path}\")\n \n if not self._check_private_access(path):\n raise PermissionError(f\"Agent {self.agent_id} cannot delete {path}\")\n \n # Invalidate cache\n self._cache_invalidate(path)\n \n if hard or path.startswith(\"/trash/\"):\n # Hard delete (permanent) or already in trash\n return self.store.delete_node(path)\n \n # Soft delete: move to /trash/\n node = self.store.get_node(path)\n if not node:\n return False\n \n from .utils import utcnow\n trash_path = f\"/trash{path}\"\n node.meta['deleted_at'] = utcnow().isoformat()\n node.meta['deleted_by'] = self.agent_id\n node.meta['original_path'] = path\n \n self.store.put_node(AVMNode(\n path=trash_path,\n content=node.content,\n meta=node.meta,\n node_type=node.node_type,\n ))\n return self.store.delete_node(path)\n \n def restore(self, trash_path: str) -> Optional[AVMNode]:\n \"\"\"Restore a file from trash\"\"\"\n if not trash_path.startswith(\"/trash/\"):\n raise ValueError(\"Path must be in /trash/\")\n \n node = self.store.get_node(trash_path)\n if not node:\n return None\n \n original_path = node.meta.get('original_path')\n if not original_path:\n # Infer from trash path\n original_path = trash_path.replace(\"/trash\", \"\", 1)\n \n # Clean up trash metadata\n node.meta.pop('deleted_at', None)\n node.meta.pop('deleted_by', None)\n node.meta.pop('original_path', None)\n \n restored = self.write(original_path, node.content, meta=node.meta)\n self.store.delete_node(trash_path)\n return restored\n \n def list(self, prefix: str = \"/\", limit: int = 100) -> List[AVMNode]:\n \"\"\"listnode\"\"\"\n prefix = self._resolve_path(prefix)\n nodes = self.store.list_nodes(prefix, limit)\n # Filter by private access\n if self.agent_id:\n nodes = [n for n in nodes if self._check_private_access(n.path)]\n return nodes\n \n # ─── rename / move ──────────────────────────────────────────────────────────\n\n def rename(self, src: str, dst: str, *, hard: bool = False) -> int:\n \"\"\"Rename / move a single node or an entire prefix tree.\n\n If *src* ends with ``/`` or matches multiple nodes as a prefix, all\n matching nodes are moved (prefix replacement). Otherwise only the\n exact node is moved.\n\n Returns the number of nodes renamed.\n \"\"\"\n src = self._resolve_path(src)\n dst = self._resolve_path(dst)\n if not self.config.check_permission(src, \"write\"):\n raise PermissionError(f\"No write permission for {src}\")\n if not self.config.check_permission(dst, \"write\"):\n raise PermissionError(f\"No write permission for {dst}\")\n\n # Normalise trailing slash on source prefix\n src_prefix = src.rstrip(\"/\")\n\n # Gather candidates: exact match first, then prefix tree\n candidates: list = []\n exact = self.store.get_node(src_prefix)\n if exact:\n candidates.append(exact)\n\n # Also collect everything under src_prefix/\n subtree = self.store.list_nodes(src_prefix + \"/\", limit=10_000)\n candidates.extend(subtree)\n\n if not candidates:\n raise FileNotFoundError(f\"No node found at {src!r}\")\n\n moved = 0\n for node in candidates:\n old_path = node.path\n if old_path == src_prefix:\n new_path = dst.rstrip(\"/\")\n else:\n # Replace prefix\n rel = old_path[len(src_prefix):] # starts with \"/\"\n new_path = dst.rstrip(\"/\") + rel\n\n new_node = AVMNode(\n path=new_path,\n content=node.content,\n meta={**node.meta, \"renamed_from\": old_path},\n node_type=node.node_type,\n )\n self.store.put_node(new_node)\n self.store.delete_node(old_path)\n self._cache_invalidate(old_path)\n self._cache_invalidate(new_path)\n moved += 1\n\n return moved\n\n # ─── search ─────────────────────────────────────────────\n \n def search(self, query: str, limit: int = 10) -> List[Tuple[AVMNode, float]]:\n \"\"\"full-textsearch\"\"\"\n return self.store.search(query, limit)\n \n # ─── relationgraph ─────────────────────────────────────────────\n \n def link(self, source: str, target: str,\n edge_type: EdgeType = EdgeType.RELATED,\n weight: float = 1.0):\n \"\"\"addrelation\"\"\"\n return self.store.add_edge(source, target, edge_type, weight)\n \n def links(self, path: str, direction: str = \"both\") -> List:\n \"\"\"Get relations\"\"\"\n return self.store.get_links(path, direction)\n \n # ─── history ─────────────────────────────────────────────\n \n def history(self, path: str, limit: int = 10):\n \"\"\"Get change history\"\"\"\n return self.store.get_history(path, limit)\n \n def read_at_time(self, path: str, as_of) -> Optional[AVMNode]:\n \"\"\"Read node content as it was at a specific time (time travel)\"\"\"\n from datetime import datetime\n if isinstance(as_of, str):\n as_of = datetime.fromisoformat(as_of.replace('Z', '+00:00'))\n return self.store.get_node_at_time(path, as_of)\n \n def read_at_version(self, path: str, version: int) -> Optional[AVMNode]:\n \"\"\"Read node content at a specific version\"\"\"\n return self.store.get_node_at_version(path, version)\n \n # ─── statistics ─────────────────────────────────────────────\n \n def stats(self) -> Dict:\n \"\"\"storagestatistics\"\"\"\n return self.store.stats()\n \n # ─── Linked retrieval ─────────────────────────────────────────\n \n def retrieve(self, query: str, k: int = 5,\n expand_graph: bool = True,\n graph_depth: int = 1) -> \"RetrievalResult\":\n \"\"\"\n Linked retrieval\n \n 1. semanticsearch (if embedding)\n 2. FTS5 full-textsearch\n 3. graphextend\n \"\"\"\n from .retrieval import Retriever, RetrievalResult\n \n # Get or create embedding store\n embedding_store = getattr(self, '_embedding_store', None)\n \n retriever = Retriever(self.store, embedding_store)\n return retriever.retrieve(\n query, k=k,\n expand_graph=expand_graph,\n graph_depth=graph_depth\n )\n \n def synthesize(self, query: str, k: int = 5,\n title: str = None) -> str:\n \"\"\"\n Dynamically generate synthesized document\n \n One-line call:\n vfs.synthesize(\"NVDA risk analysis\")\n \n Returns: Synthesized document in Markdown format\n \"\"\"\n from .retrieval import Retriever, DocumentSynthesizer\n \n embedding_store = getattr(self, '_embedding_store', None)\n retriever = Retriever(self.store, embedding_store)\n synthesizer = DocumentSynthesizer(self.store)\n \n result = retriever.retrieve(query, k=k, expand_graph=True)\n doc = synthesizer.synthesize(result, title=title)\n \n return doc.to_markdown()\n \n def enable_embedding(self, backend: \"EmbeddingBackend\" = None,\n model: str = \"text-embedding-3-small\"):\n \"\"\"\n enablesemanticsearch\n \n Args:\n backend: custom embedding backend\n model: OpenAI model name (if backend not provided)\n \"\"\"\n from .embedding import EmbeddingStore, OpenAIEmbedding\n \n if backend is None:\n backend = OpenAIEmbedding(model=model)\n \n self._embedding_store = EmbeddingStore(self.store, backend)\n return self._embedding_store\n \n def embeend_all(self, prefix: str = \"/\") -> int:\n \"\"\"allnodegenerate embedding\"\"\"\n if not hasattr(self, '_embedding_store'):\n raise RuntimeError(\"Call enable_embedding() first\")\n \n return self._embedding_store.embeend_all(prefix)\n \n # ─── Agent Memory ─────────────────────────────────────\n \n def agent_memory(self, agent_id: str, \n config: Dict = None) -> \"AgentMemory\":\n \"\"\"\n Get Agent Memory instance\n \n Args:\n agent_id: Agent identifier\n config: Optional configuration\n \n Returns:\n AgentMemory instance\n \"\"\"\n from .agent_memory import AgentMemory, MemoryConfig\n \n mem_config = None\n if config:\n mem_config = MemoryConfig.from_dict(config)\n \n return AgentMemory(self, agent_id, mem_config)\n \n # ─── Multi-Agent ─────────────────────────────────────\n \n def load_agents(self, config_path: str = None, config_dict: Dict = None):\n \"\"\"\n Load multi-agent configuration\n \n Args:\n config_path: YAML Configuration file path\n config_dict: Configuration dictionary\n \"\"\"\n from .multi_agent import AgentRegistry, AuditLog, VersionedMemory\n \n self._agent_registry = AgentRegistry()\n self._audit_log = AuditLog(self.store)\n self._versioned_memory = VersionedMemory(self.store)\n \n if config_path:\n import yaml\n with open(config_path) as f:\n config_dict = yaml.safe_load(f)\n \n if config_dict:\n self._agent_registry.load_from_dict(config_dict)\n \n def get_agent_config(self, agent_id: str):\n \"\"\"Get agent configuration\"\"\"\n if not hasattr(self, '_agent_registry'):\n from .multi_agent import AgentRegistry\n self._agent_registry = AgentRegistry()\n \n return self._agent_registry.get(agent_id)\n \n def audit_log(self, agent_id: str = None, path_prefix: str = None,\n limit: int = 100) -> List[Dict]:\n \"\"\"queryauditlog\"\"\"\n if not hasattr(self, '_audit_log'):\n from .multi_agent import AuditLog\n self._audit_log = AuditLog(self.store)\n \n return self._audit_log.query(agent_id, path_prefix, limit=limit)\n \n # ─── Advanced Features ─────────────────────────────────────────\n \n def subscribe(self, pattern: str, callback) -> str:\n \"\"\"Subscribe to path changes\"\"\"\n from .advanced import SubscriptionManager\n \n if not hasattr(self, '_subscription_manager'):\n self._subscription_manager = SubscriptionManager()\n \n return self._subscription_manager.subscribe(pattern, callback)\n \n def _notify_subscribers(self, path: str, event_type: str, agent_id: str = None):\n \"\"\"Notify subscribers (internal method)\"\"\"\n if hasattr(self, '_subscription_manager'):\n from .advanced import MemoryEvent, EventType\n \n event = MemoryEvent(\n event_type=EventType(event_type),\n path=path,\n agent_id=agent_id or \"system\",\n )\n self._subscription_manager.notify(event)\n \n def query_time(self, prefix: str = \"/memory\",\n time_range: str = None,\n after: str = None,\n before: str = None,\n limit: int = 100) -> List[AVMNode]:\n \"\"\"timerangequery\"\"\"\n from .advanced import TimeQuery\n from datetime import datetime\n \n query = TimeQuery(self.store)\n \n after_dt = datetime.fromisoformat(after) if after else None\n before_dt = datetime.fromisoformat(before) if before else None\n \n return query.query(\n prefix=prefix,\n after=after_dt,\n before=before_dt,\n time_range=time_range,\n limit=limit\n )\n \n def sync(self, target: str, prefix: str = \"/memory\") -> Dict[str, int]:\n \"\"\"\n Sync to remote\n \n Args:\n target: directory path or s3://bucket/prefix\n prefix: Path prefix to sync\n \"\"\"\n from .advanced import SyncManager\n \n sync_mgr = SyncManager(self.store)\n \n if target.startswith(\"s3://\"):\n # S3 sync\n parts = target[5:].split(\"/\", 1)\n bucket = parts[0]\n s3_prefix = parts[1] if len(parts) > 1 else \"vfs/\"\n return sync_mgr.sync_to_s3(bucket, s3_prefix, prefix)\n else:\n # Directory sync\n return sync_mgr.sync_to_directory(target, prefix)\n \n def snapshot(self, name: str = None) -> str:\n \"\"\"createsnapshot\"\"\"\n from .advanced import ExportManager\n \n export_mgr = ExportManager(self.store)\n return export_mgr.snapshot(name)\n \n def list_snapshots(self) -> List[Dict]:\n \"\"\"listsnapshot\"\"\"\n from .advanced import ExportManager\n \n export_mgr = ExportManager(self.store)\n return export_mgr.list_snapshots()\n \n def restore_snapshot(self, name: str) -> int:\n \"\"\"restoresnapshot\"\"\"\n from .advanced import ExportManager\n \n export_mgr = ExportManager(self.store)\n return export_mgr.restore_snapshot(name)\n \n # ─── Linux-Style Permissions ──────────────────────────\n \n def init_permissions(self, config_dict: Dict = None):\n \"\"\"\n Initialize Linux-style permission system\n \n Args:\n config_dict: User/group configuration\n \"\"\"\n from .permissions import UserRegistry, PermissionManager, APIKeyManager\n \n self._user_registry = UserRegistry()\n self._perm_manager = PermissionManager(self._user_registry)\n self._api_key_manager = APIKeyManager(self._user_registry)\n \n if config_dict:\n self._user_registry.load_from_dict(config_dict)\n \n def authenticate(self, api_key: str) -> Optional[\"User\"]:\n \"\"\"\n via API Key auth\n \n Returns:\n User object, or None\n \"\"\"\n if not hasattr(self, '_user_registry'):\n self.init_permissions()\n \n return self._user_registry.authenticate(api_key)\n \n def create_user(self, name: str, groups: List[str] = None,\n capabilities: List[str] = None) -> \"User\":\n \"\"\"createuser\"\"\"\n if not hasattr(self, '_user_registry'):\n self.init_permissions()\n \n from .permissions import Capability\n caps = [Capability(c) for c in (capabilities or [])]\n \n return self._user_registry.create_user(name, groups, caps)\n \n def get_user(self, name: str) -> Optional[\"User\"]:\n \"\"\"Get user\"\"\"\n if not hasattr(self, '_user_registry'):\n return None\n return self._user_registry.get_user(name)\n \n def check_permission(self, user: \"User\", path: str, \n action: str = \"read\") -> bool:\n \"\"\"\n checkuserpermission\n \n Args:\n user: userobject\n path: path\n action: read/write/delete/search\n \"\"\"\n if not hasattr(self, '_perm_manager'):\n return True # Allow if permission system not initialized\n \n from .permissions import NodeOwnership\n \n # Get node ownership info\n node = self.store.get_node(path)\n if node:\n ownership = NodeOwnership.from_meta(node.meta)\n else:\n # defaultpermission\n ownership = NodeOwnership(owner=\"root\", group=\"root\", mode=0o644)\n \n if action == \"read\":\n return self._perm_manager.check_read(user, ownership)\n elif action == \"write\":\n return self._perm_manager.check_write(user, ownership)\n elif action == \"delete\":\n return self._perm_manager.check_delete(user, ownership)\n elif action == \"search\":\n return self._perm_manager.check_search(user, path)\n \n return False\n \n def sudo(self, user: \"User\", duration_minutes: int = 5) -> bool:\n \"\"\"temporaryelevate privileges\"\"\"\n if not hasattr(self, '_perm_manager'):\n return False\n return self._perm_manager.sudo(user, duration_minutes)\n \n def create_api_key(self, user: \"User\", \n paths: List[str] = None,\n actions: List[str] = None,\n expires_days: int = None) -> str:\n \"\"\"\n create API Key(for skill authentication)\n \n Args:\n user: user\n paths: Allowed paths (supports wildcards)\n actions: Allowed actions\n expires_days: Expiry days\n \"\"\"\n if not hasattr(self, '_api_key_manager'):\n self.init_permissions()\n \n from .permissions import APIKeyScope\n \n scope = APIKeyScope(\n paths=paths or [\"*\"],\n actions=actions or [\"read\"],\n )\n \n return self._api_key_manager.create_key(user, scope, expires_days)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":34918,"content_sha256":"6e559c3cad6bdc3502af8acd42f5c2a29147f36875d5131ee173210488893e4e"},{"filename":"avm/daemon.py","content":"\"\"\"\nAVM Unified Daemon - Single process, multiple mount points\n\nUsage:\n avm-daemon start [--config CONFIG]\n avm-daemon stop\n avm-daemon status\n avm-daemon add MOUNTPOINT --agent AGENT_ID\n avm-daemon remove MOUNTPOINT\n\"\"\"\n\nimport os\nimport sys\nimport json\nimport signal\nimport threading\nimport argparse\nfrom pathlib import Path\nfrom typing import Dict, Optional\nfrom dataclasses import dataclass, field, asdict\n\n# Lazy imports to avoid circular dependencies\nFUSE = None\nAVMFuse = None\nAVM = None\n\n\ndef _lazy_imports():\n global FUSE, AVMFuse, AVM\n if FUSE is None:\n from fuse import FUSE as _FUSE\n from .fuse_mount import AVMFuse as _AVMFuse\n from .core import AVM as _AVM\n FUSE = _FUSE\n AVMFuse = _AVMFuse\n AVM = _AVM\n\n\n# ═══════════════════════════════════════════════════════════════\n# Configuration\n# ═══════════════════════════════════════════════════════════════\n\nDATA_DIR = Path.home() / \".local\" / \"share\" / \"avm\"\nCONFIG_DIR = Path.home() / \".config\" / \"avm\"\nMOUNTS_CONFIG = CONFIG_DIR / \"mounts.yaml\"\nDAEMON_PID = DATA_DIR / \"daemon.pid\"\n\n\n@dataclass\nclass MountConfig:\n \"\"\"Configuration for a single mount point\"\"\"\n path: str\n agent: str\n enabled: bool = True\n\n\n@dataclass\nclass DaemonConfig:\n \"\"\"Daemon configuration\"\"\"\n mounts: list = field(default_factory=list)\n \n def save(self):\n CONFIG_DIR.mkdir(parents=True, exist_ok=True)\n import yaml\n data = {\n \"mounts\": [\n {\"path\": m.path, \"agent\": m.agent, \"enabled\": m.enabled}\n for m in self.mounts\n ]\n }\n MOUNTS_CONFIG.write_text(yaml.dump(data, default_flow_style=False, allow_unicode=True))\n \n @classmethod\n def load(cls) -> \"DaemonConfig\":\n if not MOUNTS_CONFIG.exists():\n return cls()\n try:\n import yaml\n data = yaml.safe_load(MOUNTS_CONFIG.read_text())\n mounts = [\n MountConfig(\n path=str(Path(m[\"path\"]).expanduser()),\n agent=m[\"agent\"],\n enabled=m.get(\"enabled\", True)\n )\n for m in data.get(\"mounts\", [])\n ]\n return cls(mounts=mounts)\n except Exception:\n return cls()\n\n\n# ═══════════════════════════════════════════════════════════════\n# Mount Thread\n# ═══════════════════════════════════════════════════════════════\n\nclass MountProcess:\n \"\"\"Child process managing a single FUSE mount (fork-based).\n\n Uses os.fork() for isolation — each mount is a separate process so\n macFUSE device slots are claimed sequentially and never race.\n GPU/MPS is disabled in the child via AVM_FUSE_WORKER=1 so XPC issues\n after fork() do not occur.\n \"\"\"\n\n def __init__(self, mountpoint: str, agent_id: str,\n embed_server=None, child_conn=None):\n self.mountpoint = mountpoint\n self.agent_id = agent_id\n self.embed_server = embed_server # legacy / unused in fork mode\n self.child_conn = child_conn # multiprocessing.Pipe child end\n self.pid: Optional[int] = None\n\n def start(self) -> bool:\n \"\"\"Fork and block until the mount is live (st_dev changes) or 12 s.\"\"\"\n import time\n\n normal_dev = Path(self.mountpoint).parent.stat().st_dev\n pid = os.fork()\n if pid == 0:\n self._run_fuse()\n os._exit(0)\n\n self.pid = pid\n mp = Path(self.mountpoint)\n deadline = time.monotonic() + 12.0\n while time.monotonic() \u003c deadline:\n try:\n rc = os.waitpid(pid, os.WNOHANG)\n if rc[0] != 0: # child exited early → mount failed\n self.pid = None\n return False\n except ChildProcessError:\n self.pid = None\n return False\n try:\n if mp.stat().st_dev != normal_dev:\n return True # mount is live\n except OSError:\n pass\n time.sleep(0.3)\n\n # 12 s elapsed — check one last time\n try:\n rc = os.waitpid(pid, os.WNOHANG)\n if rc[0] != 0:\n self.pid = None\n return False\n except ChildProcessError:\n self.pid = None\n return False\n # Child alive but mount didn't appear — proceed anyway\n return mp.stat().st_dev != normal_dev\n\n def _run_fuse(self):\n \"\"\"Run FUSE in forked child.\n\n If a child_conn Pipe was provided, GPU embedding is routed through\n the parent process (MPS-safe). Otherwise falls back to CPU.\n \"\"\"\n _lazy_imports()\n import os as _os\n _os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n\n if self.child_conn is None:\n # No GPU proxy — force CPU embedding in this fork\n _os.environ[\"AVM_FUSE_WORKER\"] = \"1\"\n\n try:\n agent_avm = AVM(agent_id=self.agent_id)\n\n if self.child_conn is not None:\n # Inject GPU proxy backed by the parent's embedding server\n from .embedding import EmbeddingStore, PipeEmbeddingProxy\n proxy = PipeEmbeddingProxy(self.child_conn)\n agent_avm._embedding_store = EmbeddingStore(agent_avm.store, proxy)\n agent_avm._auto_index_embedding = True\n\n Path(self.mountpoint).mkdir(parents=True, exist_ok=True)\n FUSE(\n AVMFuse(agent_avm, self.agent_id),\n self.mountpoint,\n nothreads=True,\n foreground=True,\n allow_other=False,\n attr_timeout=0,\n entry_timeout=0,\n direct_io=True,\n )\n except Exception as e:\n print(f\"FUSE error for {self.mountpoint}: {e}\", file=sys.stderr)\n finally:\n if self.child_conn is not None:\n self.child_conn.close()\n\n def stop(self):\n \"\"\"Kill child and force-unmount.\"\"\"\n import subprocess, platform, time as _t\n\n if self.pid:\n for sig in (signal.SIGTERM, signal.SIGKILL):\n try:\n os.kill(self.pid, sig)\n _t.sleep(0.5)\n os.waitpid(self.pid, os.WNOHANG)\n except (ProcessLookupError, ChildProcessError):\n break\n\n mp = self.mountpoint\n try:\n if platform.system() == \"Darwin\":\n r = subprocess.run(\n [\"/usr/sbin/diskutil\", \"unmount\", \"force\", mp],\n capture_output=True, timeout=10,\n )\n if r.returncode != 0:\n subprocess.run([\"/sbin/umount\", \"-f\", mp],\n capture_output=True, timeout=5)\n else:\n for cmd in ([\"fusermount3\", \"-u\", mp],\n [\"fusermount\", \"-u\", mp],\n [\"/sbin/umount\", \"-f\", mp]):\n r = subprocess.run(cmd, capture_output=True, timeout=5)\n if r.returncode == 0:\n break\n except Exception:\n pass\n\n\n# ═══════════════════════════════════════════════════════════════\n# Daemon\n# ═══════════════════════════════════════════════════════════════\n\nclass AVMDaemon:\n \"\"\"Unified AVM daemon managing multiple mounts\"\"\"\n \n def __init__(self):\n _lazy_imports()\n self.config = DaemonConfig.load()\n self.mounts: Dict[str, MountProcess] = {}\n self._running = False\n self._embed_backend = None # shared LocalEmbedding (GPU/MPS, parent only)\n self._pipe_servers: list = [] # EmbeddingPipeServer instances\n\n def start(self):\n \"\"\"Start the daemon and all configured mounts\"\"\"\n if DAEMON_PID.exists():\n try:\n pid = int(DAEMON_PID.read_text().strip())\n os.kill(pid, 0) # Check if process exists\n # Also verify it's actually an AVM daemon (not a recycled PID)\n try:\n import subprocess\n cmdline = subprocess.check_output(\n [\"ps\", \"-p\", str(pid), \"-o\", \"command=\"],\n timeout=3, text=True\n ).strip()\n if \"avm\" in cmdline.lower() or \"AVMDaemon\" in cmdline:\n print(f\"Daemon already running (pid={pid})\")\n return False\n else:\n print(f\" Stale pid file (pid={pid} is '{cmdline[:40]}'), ignoring\")\n except Exception:\n # Can't verify cmdline — trust the pid\n print(f\"Daemon already running (pid={pid})\")\n return False\n except (ProcessLookupError, ValueError):\n pass # Stale pid file — process gone\n \n # Write PID\n DATA_DIR.mkdir(parents=True, exist_ok=True)\n DAEMON_PID.write_text(str(os.getpid()))\n \n self._running = True\n\n # Start mounts one at a time. macFUSE allocates /dev/macfuseN slots\n # sequentially; launching multiple FUSE threads simultaneously causes\n # \"Resource temporarily unavailable\" on all but the first device.\n # _start_mount now blocks until the mount is accessible before returning.\n for mount_config in self.config.mounts:\n if mount_config.enabled:\n self._start_mount(mount_config)\n \n print(f\"Daemon started (pid={os.getpid()})\")\n print(f\"Mounts: {len(self.mounts)}\")\n \n # Setup signal handlers\n signal.signal(signal.SIGTERM, self._handle_signal)\n signal.signal(signal.SIGINT, self._handle_signal)\n signal.signal(signal.SIGHUP, self._handle_reload)\n \n # Start auto-archive thread\n self._archive_thread = threading.Thread(target=self._auto_archive_loop, daemon=True)\n self._archive_thread.start()\n print(\" Auto-archive enabled (every 6h, threshold=0.15)\")\n \n # Start trash cleanup thread\n self._trash_thread = threading.Thread(target=self._auto_trash_cleanup, daemon=True)\n self._trash_thread.start()\n print(\" Trash cleanup enabled (daily, 30d retention)\")\n\n # Start mount watchdog thread — checks every 30s, remounts dead mounts\n self._watchdog_thread = threading.Thread(target=self._watchdog_loop, daemon=True)\n self._watchdog_thread.start()\n print(\" Mount watchdog enabled (every 30s)\")\n\n # Wait for stop\n try:\n while self._running:\n signal.pause()\n except Exception:\n pass\n \n return True\n\n def _watchdog_loop(self):\n \"\"\"Periodically verify mounts are alive, remount if dead.\"\"\"\n import time, subprocess, platform\n while self._running:\n time.sleep(30)\n for mp, proc in list(self.mounts.items()):\n try:\n # Check if FUSE process is alive first\n if proc.pid:\n try:\n os.kill(proc.pid, 0) # signal 0 = existence check\n except ProcessLookupError:\n raise OSError(\"FUSE process dead\")\n # Also verify mount responds (use stat instead of listdir to avoid empty-dir false positives)\n os.stat(mp)\n entries = os.listdir(mp)\n # Empty directory is OK for FUSE mounts (virtual files may not appear in listdir)\n # Only treat as dead if the FUSE process is gone\n except OSError:\n # Mount is dead — try to recover\n print(f\" [watchdog] Dead mount detected: {mp}, remounting...\", flush=True)\n # Kill stale FUSE process\n if proc.pid:\n try:\n os.kill(proc.pid, 9)\n except ProcessLookupError:\n pass\n # Force unmount\n if platform.system() == \"Darwin\":\n subprocess.run([\"/usr/sbin/diskutil\", \"unmount\", \"force\", mp],\n capture_output=True, timeout=5)\n else:\n for cmd in ([\"fusermount3\", \"-u\", mp],\n [\"fusermount\", \"-u\", mp],\n [\"/sbin/umount\", \"-f\", mp]):\n r = subprocess.run(cmd, capture_output=True, timeout=5)\n if r.returncode == 0:\n break\n # Find config for this mount and remount\n for mc in self.config.mounts:\n if mc.path == mp and mc.enabled:\n self._start_mount(mc)\n break\n \n def _auto_archive_loop(self):\n \"\"\"Background thread that archives cold memories periodically\"\"\"\n import time\n from .advanced import MemoryDecay\n from .core import AVM as _AVM\n from .config import load_config\n \n # Load settings from config\n config = load_config()\n decay_cfg = getattr(config, 'decay', None) or {}\n if isinstance(decay_cfg, dict):\n ARCHIVE_INTERVAL = int(decay_cfg.get('archive_interval_hours', 6)) * 60 * 60\n ARCHIVE_THRESHOLD = float(decay_cfg.get('archive_threshold', 0.15))\n ARCHIVE_HALF_LIFE = float(decay_cfg.get('half_life_days', 14.0))\n ARCHIVE_LIMIT = int(decay_cfg.get('archive_limit', 50))\n else:\n ARCHIVE_INTERVAL = 6 * 60 * 60\n ARCHIVE_THRESHOLD = 0.15\n ARCHIVE_HALF_LIFE = 14.0\n ARCHIVE_LIMIT = 50\n \n # Initial delay to let system settle\n time.sleep(60)\n \n while self._running:\n try:\n vfs = _AVM()\n decay = MemoryDecay(vfs.store, half_life_days=ARCHIVE_HALF_LIFE)\n cold = decay.get_cold_memories(\n prefix=\"/memory\",\n threshold=ARCHIVE_THRESHOLD,\n limit=ARCHIVE_LIMIT\n )\n \n if cold:\n from .utils import utcnow\n archived = []\n for node in cold:\n archive_path = node.path.replace(\"/memory/\", \"/archive/\", 1)\n node.meta['archived_by'] = 'daemon'\n node.meta['archived_at'] = utcnow().isoformat()\n vfs.write(archive_path, node.content, meta=node.meta)\n vfs.store.delete_node(node.path)\n archived.append(node.path)\n \n print(f\"[auto-archive] Archived {len(archived)} cold memories\")\n except Exception as e:\n print(f\"[auto-archive] Error: {e}\")\n \n # Sleep in small chunks to check _running flag\n for _ in range(ARCHIVE_INTERVAL // 60):\n if not self._running:\n break\n time.sleep(60)\n \n def _wait_for_mount(self, mountpoint: str, timeout: float = 10.0):\n \"\"\"Block until the mountpoint is actually serving FUSE requests.\n\n Exits early if:\n - iterdir() succeeds (mount is live)\n - the MountProcess thread has died (mount failed)\n - timeout is reached\n \"\"\"\n import time as _time\n deadline = _time.monotonic() + timeout\n path = Path(mountpoint)\n proc = self.mounts.get(mountpoint)\n\n while _time.monotonic() \u003c deadline:\n # If the thread died, don't wait any longer\n if proc is not None and proc._thread is not None and not proc._thread.is_alive():\n print(f\" ⚠ Mount thread for {mountpoint} exited early\", file=sys.stderr)\n return\n try:\n entries = list(path.iterdir())\n if entries: # wait until FUSE is fully initialized and serving entries\n return # mount is live\n except OSError:\n pass\n _time.sleep(0.5)\n\n print(f\" ⚠ Mount {mountpoint} did not become ready within {timeout}s\",\n file=sys.stderr)\n\n def _start_mount(self, mount_config: MountConfig):\n \"\"\"Start a single mount, with zombie cleanup on failure.\"\"\"\n import subprocess, platform\n\n mp = mount_config.path\n Path(mp).mkdir(parents=True, exist_ok=True)\n\n # Set up GPU embedding pipe (fork-safe: Pipe created before fork,\n # each child gets its own fd pair → no cross-agent access).\n child_conn = None\n from .config import load_config as _lc\n _cfg = _lc()\n _emb_cfg = getattr(_cfg, 'embedding', None) or {}\n if isinstance(_emb_cfg, dict) and _emb_cfg.get('enabled'):\n import multiprocessing as _mp\n from .embedding import EmbeddingPipeServer, LocalEmbedding\n\n # Load the shared GPU backend once (lazy)\n if self._embed_backend is None:\n _model = _emb_cfg.get('model', 'all-MiniLM-L6-v2')\n print(f\" Loading embedding model ({_model}, MPS)...\")\n self._embed_backend = LocalEmbedding(_model)\n self._embed_backend._load_model()\n print(f\" Embedding model ready (dim={self._embed_backend.dimension})\")\n\n parent_conn, child_conn = _mp.Pipe()\n server = EmbeddingPipeServer(parent_conn, backend=self._embed_backend)\n server.start()\n self._pipe_servers.append(server)\n\n proc = MountProcess(mp, mount_config.agent, child_conn=child_conn)\n ok = proc.start()\n\n # Close parent's copy of child_conn (child has its own fd)\n if child_conn is not None:\n child_conn.close()\n\n if not ok:\n # Mount failed — clean up any partial macFUSE state immediately\n print(f\" ⚠ Mount failed for {mp}, cleaning up...\", file=sys.stderr)\n if platform.system() == \"Darwin\":\n subprocess.run([\"/usr/sbin/diskutil\", \"unmount\", \"force\", mp],\n capture_output=True, timeout=5)\n else:\n for cmd in ([\"fusermount3\", \"-u\", mp],\n [\"fusermount\", \"-u\", mp],\n [\"/sbin/umount\", \"-f\", mp]):\n r = subprocess.run(cmd, capture_output=True, timeout=5)\n if r.returncode == 0:\n break\n return\n\n self.mounts[mp] = proc\n print(f\" Mounted: {mp} (agent={mount_config.agent}, pid={proc.pid})\")\n\n # Wait for this mount to become accessible before starting the next one.\n # Note: start() already blocked until the mount was live (st_dev check),\n # so by the time we reach here macFUSE has fully registered the device.\n \n def _auto_trash_cleanup(self):\n \"\"\"Background thread that cleans up old trash items\"\"\"\n import time\n from datetime import datetime, timedelta, timezone\n from .core import AVM as _AVM\n \n CLEANUP_INTERVAL = 24 * 60 * 60 # Daily\n RETENTION_DAYS = 30\n \n # Initial delay\n time.sleep(120)\n \n while self._running:\n try:\n vfs = _AVM()\n trash_items = vfs.list(\"/trash\", limit=500)\n \n cutoff = datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)\n deleted = 0\n \n for item in trash_items:\n deleted_at = item.meta.get('deleted_at')\n if deleted_at:\n try:\n dt = datetime.fromisoformat(deleted_at.replace('Z', '+00:00'))\n if dt \u003c cutoff:\n vfs.delete(item.path, hard=True)\n deleted += 1\n except (ValueError, TypeError):\n pass\n \n if deleted:\n print(f\"[trash-cleanup] Removed {deleted} items older than {RETENTION_DAYS}d\")\n except Exception as e:\n print(f\"[trash-cleanup] Error: {e}\")\n \n # Sleep in chunks\n for _ in range(CLEANUP_INTERVAL // 60):\n if not self._running:\n break\n time.sleep(60)\n \n def _handle_signal(self, signum, frame):\n \"\"\"Handle shutdown signals\"\"\"\n print(\"\\nShutting down...\")\n self._running = False\n \n # Stop all mounts\n for mount in self.mounts.values():\n mount.stop()\n\n # Stop the embedding server\n # Remove PID file\n if DAEMON_PID.exists():\n DAEMON_PID.unlink()\n \n def _handle_reload(self, signum, frame):\n \"\"\"Handle reload signal (SIGHUP)\"\"\"\n print(\"\\nReloading configuration...\")\n \n # Reload config\n new_config = DaemonConfig.load()\n \n # Find what changed\n current_paths = set(self.mounts.keys())\n new_paths = set(m.path for m in new_config.mounts if m.enabled)\n \n # Stop removed mounts\n for path in current_paths - new_paths:\n print(f\" Stopping: {path}\")\n self.mounts[path].stop()\n del self.mounts[path]\n \n # Start new mounts\n for m in new_config.mounts:\n if m.enabled and m.path not in current_paths:\n print(f\" Starting: {m.path}\")\n self._start_mount(m)\n \n self.config = new_config\n print(f\"Reload complete. Mounts: {len(self.mounts)}\")\n \n def add_mount(self, path: str, agent: str):\n \"\"\"Add a mount configuration\"\"\"\n path = str(Path(path).expanduser().resolve())\n # Check if already exists\n for m in self.config.mounts:\n if m.path == path:\n m.agent = agent\n self.config.save()\n print(f\"Updated: {path} (agent={agent})\")\n return\n self.config.mounts.append(MountConfig(path=path, agent=agent))\n self.config.save()\n print(f\"Added: {path} (agent={agent})\")\n \n def remove_mount(self, path: str):\n \"\"\"Remove a mount configuration\"\"\"\n path = str(Path(path).expanduser().resolve())\n for i, m in enumerate(self.config.mounts):\n if m.path == path:\n del self.config.mounts[i]\n self.config.save()\n print(f\"Removed: {path}\")\n return\n print(f\"Not found: {path}\")\n \n def list_mounts(self):\n \"\"\"List configured mounts\"\"\"\n if not self.config.mounts:\n print(\"No mounts configured\")\n return\n \n print(\"Configured mounts:\")\n for m in self.config.mounts:\n status = \"✓\" if m.enabled else \"○\"\n short_path = m.path.replace(str(Path.home()), \"~\")\n print(f\" {status} {short_path} → {m.agent}\")\n\n\n# ═══════════════════════════════════════════════════════════════\n# CLI\n# ═══════════════════════════════════════════════════════════════\n\ndef cmd_start(args):\n \"\"\"Start the daemon\"\"\"\n daemon = AVMDaemon()\n \n if args.daemon:\n # Fork to background\n pid = os.fork()\n if pid > 0:\n print(f\"Daemon started in background (pid={pid})\")\n return 0\n \n # Child process\n os.setsid()\n \n # Redirect stdout/stderr\n log_file = CONFIG_DIR / \"daemon.log\"\n sys.stdout = open(log_file, \"a\")\n sys.stderr = sys.stdout\n \n daemon.start()\n return 0\n\n\ndef cmd_stop(args):\n \"\"\"Stop the daemon and wait for clean shutdown\"\"\"\n import time\n\n if not DAEMON_PID.exists():\n print(\"Daemon not running\")\n return 1\n\n pid = int(DAEMON_PID.read_text().strip())\n try:\n os.kill(pid, signal.SIGTERM)\n except ProcessLookupError:\n DAEMON_PID.unlink(missing_ok=True)\n print(\"Daemon not running (stale pid file removed)\")\n return 1\n\n # Wait for the process to exit (up to 8s)\n for _ in range(16):\n time.sleep(0.5)\n try:\n os.kill(pid, 0)\n except ProcessLookupError:\n break\n else:\n # Still alive — SIGKILL\n try:\n os.kill(pid, signal.SIGKILL)\n time.sleep(0.5)\n except ProcessLookupError:\n pass\n\n DAEMON_PID.unlink(missing_ok=True)\n print(f\"Stopped daemon (pid={pid})\")\n return 0\n\n\ndef cmd_status(args):\n \"\"\"Show daemon status\"\"\"\n print(\"╭─────────────────────────────────────────╮\")\n print(\"│ 🧠 AVM Daemon Status │\")\n print(\"╰─────────────────────────────────────────╯\")\n \n if not DAEMON_PID.exists():\n print(\" Status: ⭘ not running\")\n else:\n pid = int(DAEMON_PID.read_text().strip())\n try:\n os.kill(pid, 0)\n print(f\" Status: ● running (pid={pid})\")\n except ProcessLookupError:\n print(\" Status: ⭘ not running (stale pid)\")\n \n daemon = AVMDaemon()\n config = daemon.config\n \n if not config.mounts:\n print(\"\\n No mounts configured\")\n else:\n print(f\"\\n Mounts: {len(config.mounts)}\")\n print(\" ─────────────────────────────────────\")\n for m in config.mounts:\n status = \"●\" if m.enabled else \"○\"\n short_path = m.path.replace(str(Path.home()), \"~\")\n print(f\" {status} {m.agent:\u003c12} → {short_path}\")\n \n print()\n return 0\n\n\ndef cmd_inspect(args):\n \"\"\"Inspect daemon and mounts in detail\"\"\"\n _lazy_imports()\n \n print(\"╭─────────────────────────────────────────╮\")\n print(\"│ 🔍 AVM Daemon Inspect │\")\n print(\"╰─────────────────────────────────────────╯\")\n \n # Daemon info\n print(\"\\n📋 Daemon\")\n print(\" ─────────────────────────────────────\")\n if DAEMON_PID.exists():\n pid = int(DAEMON_PID.read_text().strip())\n try:\n os.kill(pid, 0)\n print(f\" PID: {pid}\")\n print(f\" Status: ● running\")\n except ProcessLookupError:\n print(f\" Status: ⭘ not running (stale pid={pid})\")\n else:\n print(\" Status: ⭘ not running\")\n \n print(f\" Config: {MOUNTS_CONFIG}\")\n print(f\" PID file: {DAEMON_PID}\")\n \n # Database info\n print(\"\\n💾 Database\")\n print(\" ─────────────────────────────────────\")\n avm = AVM()\n db_path = avm.store.db_path\n print(f\" Path: {db_path}\")\n if Path(db_path).exists():\n size = Path(db_path).stat().st_size\n if size > 1024 * 1024:\n print(f\" Size: {size / 1024 / 1024:.1f} MB\")\n else:\n print(f\" Size: {size / 1024:.1f} KB\")\n \n # Node count\n try:\n import sqlite3\n conn = sqlite3.connect(db_path)\n count = conn.execute(\"SELECT COUNT(*) FROM nodes\").fetchone()[0]\n conn.close()\n print(f\" Nodes: {count}\")\n except Exception:\n pass\n \n # Mount details\n daemon = AVMDaemon()\n config = daemon.config\n \n print(\"\\n📂 Mounts\")\n print(\" ─────────────────────────────────────\")\n \n if not config.mounts:\n print(\" (none configured)\")\n else:\n # Check actual mount status\n import subprocess\n result = subprocess.run([\"/sbin/mount\"], capture_output=True, text=True)\n mounted = result.stdout\n \n for m in config.mounts:\n short_path = m.path.replace(str(Path.home()), \"~\")\n is_mounted = m.path in mounted or m.path.replace(\"/Users/\", \"/private/var/\") in mounted\n \n status_icon = \"●\" if is_mounted else \"○\"\n status_text = \"mounted\" if is_mounted else \"not mounted\"\n \n print(f\"\\n {status_icon} {m.agent}\")\n print(f\" Path: {short_path}\")\n print(f\" Status: {status_text}\")\n \n # Check if accessible\n if is_mounted:\n try:\n list_path = Path(m.path) / \":stats\"\n if list_path.exists():\n stats = json.loads(list_path.read_text())\n print(f\" Nodes: {stats.get('nodes', '?')}\")\n except Exception:\n pass\n \n # Process tree\n print(\"\\n🌳 Processes\")\n print(\" ─────────────────────────────────────\")\n try:\n result = subprocess.run(\n [\"ps\", \"aux\"], capture_output=True, text=True\n )\n procs = [l for l in result.stdout.split(\"\\n\") if \"avm-daemon\" in l and \"grep\" not in l]\n if procs:\n for p in procs:\n parts = p.split()\n pid = parts[1]\n mem = parts[3]\n print(f\" pid={pid} mem={mem}%\")\n else:\n print(\" (no daemon processes)\")\n except Exception:\n print(\" (unable to check)\")\n \n print()\n return 0\n\n\ndef cmd_add(args):\n \"\"\"Add a mount\"\"\"\n daemon = AVMDaemon()\n daemon.add_mount(args.mountpoint, args.agent)\n return 0\n\n\ndef cmd_remove(args):\n \"\"\"Remove a mount\"\"\"\n daemon = AVMDaemon()\n daemon.remove_mount(args.mountpoint)\n return 0\n\n\ndef cmd_check(args):\n \"\"\"Check configuration validity\"\"\"\n print(\"Checking configuration...\")\n \n if not MOUNTS_CONFIG.exists():\n print(f\" ✗ Config not found: {MOUNTS_CONFIG}\")\n return 1\n \n try:\n import yaml\n data = yaml.safe_load(MOUNTS_CONFIG.read_text())\n except Exception as e:\n print(f\" ✗ YAML parse error: {e}\")\n return 1\n \n if not isinstance(data, dict) or \"mounts\" not in data:\n print(\" ✗ Missing 'mounts' key\")\n return 1\n \n mounts = data.get(\"mounts\", [])\n if not isinstance(mounts, list):\n print(\" ✗ 'mounts' must be a list\")\n return 1\n \n errors = []\n for i, m in enumerate(mounts):\n if not isinstance(m, dict):\n errors.append(f\" ✗ Mount {i}: must be a dict\")\n continue\n if \"path\" not in m:\n errors.append(f\" ✗ Mount {i}: missing 'path'\")\n if \"agent\" not in m:\n errors.append(f\" ✗ Mount {i}: missing 'agent'\")\n \n # Check path exists or can be created\n if \"path\" in m:\n path = Path(m[\"path\"]).expanduser()\n parent = path.parent\n if not parent.exists():\n errors.append(f\" ✗ Mount {i}: parent dir not found: {parent}\")\n \n if errors:\n for e in errors:\n print(e)\n return 1\n \n print(f\" ✓ Config valid ({len(mounts)} mounts)\")\n return 0\n\n\ndef cmd_reload(args):\n \"\"\"Reload configuration (send SIGHUP to daemon)\"\"\"\n # Check config first\n print(\"Pre-reload check...\")\n if cmd_check(argparse.Namespace()) != 0:\n print(\"\\n✗ Reload aborted: invalid config\")\n return 1\n \n if not DAEMON_PID.exists():\n print(\"Daemon not running\")\n return 1\n \n pid = int(DAEMON_PID.read_text().strip())\n try:\n os.kill(pid, signal.SIGHUP)\n print(f\"\\n✓ Sent reload signal to daemon (pid={pid})\")\n return 0\n except ProcessLookupError:\n print(\"Daemon not running (stale pid)\")\n DAEMON_PID.unlink()\n return 1\n\n\ndef main():\n parser = argparse.ArgumentParser(\n description=\"AVM Unified Daemon\",\n formatter_class=argparse.RawDescriptionHelpFormatter,\n )\n subparsers = parser.add_subparsers(dest=\"command\", required=True)\n \n # start\n start_parser = subparsers.add_parser(\"start\", help=\"Start daemon\")\n start_parser.add_argument(\"--daemon\", \"-d\", action=\"store_true\",\n help=\"Run in background\")\n start_parser.set_defaults(func=cmd_start)\n \n # stop\n stop_parser = subparsers.add_parser(\"stop\", help=\"Stop daemon\")\n stop_parser.set_defaults(func=cmd_stop)\n \n # reload\n reload_parser = subparsers.add_parser(\"reload\", help=\"Reload configuration\")\n reload_parser.set_defaults(func=cmd_reload)\n \n # check\n check_parser = subparsers.add_parser(\"check\", help=\"Check config validity\")\n check_parser.set_defaults(func=cmd_check)\n \n # status\n status_parser = subparsers.add_parser(\"status\", help=\"Show status\")\n status_parser.set_defaults(func=cmd_status)\n \n # inspect\n inspect_parser = subparsers.add_parser(\"inspect\", help=\"Detailed inspection\")\n inspect_parser.set_defaults(func=cmd_inspect)\n \n # add\n add_parser = subparsers.add_parser(\"add\", help=\"Add mount\")\n add_parser.add_argument(\"mountpoint\", help=\"Mount point path\")\n add_parser.add_argument(\"--agent\", \"-a\", required=True,\n help=\"Agent ID\")\n add_parser.set_defaults(func=cmd_add)\n \n # remove\n remove_parser = subparsers.add_parser(\"remove\", help=\"Remove mount\")\n remove_parser.add_argument(\"mountpoint\", help=\"Mount point path\")\n remove_parser.set_defaults(func=cmd_remove)\n \n args = parser.parse_args()\n return args.func(args)\n\n\nif __name__ == \"__main__\":\n sys.exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":35456,"content_sha256":"8afb571ae8d6f23cc6d3bca8ca63d3e4953657791730d357b858b7b4e9a11b70"},{"filename":"avm/embedding.py","content":"\"\"\"\nvfs/embedding.py - Embedding storage and semantic search\n\nSupports multiple embedding backends:\n- OpenAI (text-embedding-3-small)\n- Local (sentence-transformers)\n- Custom\n\"\"\"\n\nimport json\nimport struct\nimport hashlib\nfrom abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom typing import List, Optional, Dict, Any, Tuple\nfrom pathlib import Path\n\nfrom .store import AVMStore\nfrom .node import AVMNode\nfrom .utils import utcnow\n\n\nclass EmbeddingBackend(ABC):\n \"\"\"Embedding backend base class\"\"\"\n \n @property\n @abstractmethod\n def dimension(self) -> int:\n \"\"\"Vector dimensions\"\"\"\n pass\n \n @abstractmethod\n def embeend(self, text: str) -> List[float]:\n \"\"\"Generate embedding for single text\"\"\"\n pass\n \n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n \"\"\"Batch generate embeddings (default: one by one)\"\"\"\n return [self.embeend(t) for t in texts]\n\n\nclass OpenAIEmbedding(EmbeddingBackend):\n \"\"\"OpenAI Embedding\"\"\"\n \n DIMENSIONS = {\n \"text-embedding-3-small\": 1536,\n \"text-embedding-3-large\": 3072,\n \"text-embedding-ada-002\": 1536,\n }\n \n def __init__(self, model: str = \"text-embedding-3-small\", \n api_key: str = None):\n self.model = model\n self.api_key = api_key or self._load_api_key()\n self._dimension = self.DIMENSIONS.get(model, 1536)\n \n def _load_api_key(self) -> str:\n import os\n return os.environ.get(\"OPENAI_API_KEY\", \"\")\n \n @property\n def dimension(self) -> int:\n return self._dimension\n \n def embeend(self, text: str) -> List[float]:\n import urllib.request\n import time\n \n data = json.dumps({\n \"input\": text[:8000], # truncate\n \"model\": self.model,\n }).encode()\n \n for attempt in range(3):\n req = urllib.request.Request(\n \"https://api.openai.com/v1/embeddings\",\n data=data,\n headers={\n \"Authorization\": f\"Bearer {self.api_key}\",\n \"Content-Type\": \"application/json\",\n }\n )\n try:\n with urllib.request.urlopen(req, timeout=30) as r:\n result = json.loads(r.read())\n return result[\"data\"][0][\"embedding\"]\n except urllib.error.HTTPError as e:\n if e.code == 429 and attempt \u003c 2:\n wait = 5 * (attempt + 1) # 5s, 10s backoff\n print(f\"Rate limited, waiting {wait}s...\")\n time.sleep(wait)\n continue\n raise\n \n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n import urllib.request\n import time\n \n data = json.dumps({\n \"input\": [t[:8000] for t in texts],\n \"model\": self.model,\n }).encode()\n \n for attempt in range(3):\n req = urllib.request.Request(\n \"https://api.openai.com/v1/embeddings\",\n data=data,\n headers={\n \"Authorization\": f\"Bearer {self.api_key}\",\n \"Content-Type\": \"application/json\",\n }\n )\n try:\n with urllib.request.urlopen(req, timeout=60) as r:\n result = json.loads(r.read())\n # Sort by index\n embeddings = sorted(result[\"data\"], key=lambda x: x[\"index\"])\n return [e[\"embedding\"] for e in embeddings]\n except urllib.error.HTTPError as e:\n if e.code == 429 and attempt \u003c 2:\n wait = 5 * (attempt + 1)\n print(f\"Rate limited, waiting {wait}s...\")\n time.sleep(wait)\n continue\n raise\n\n\nclass LocalEmbedding(EmbeddingBackend):\n \"\"\"\n Local embedding (sentence-transformers)\n \n Requires: pip install sentence-transformers\n \"\"\"\n \n # Class-level model cache (persists across instances within same process)\n _model_cache: Dict[str, Any] = {}\n \n def __init__(self, model: str = \"all-MiniLM-L6-v2\"):\n self.model_name = model\n self._model = None\n self._dimension = None\n # Query embedding cache (LRU)\n self._query_cache: Dict[str, List[float]] = {}\n self._cache_max = 100\n \n def _load_model(self):\n if self._model is None:\n # Check class-level cache first\n if self.model_name in LocalEmbedding._model_cache:\n self._model, self._dimension = LocalEmbedding._model_cache[self.model_name]\n else:\n from sentence_transformers import SentenceTransformer\n import os\n # On macOS, MPS (Apple GPU) uses XPC which becomes invalid\n # after os.fork(). Detect this by checking whether the current\n # process is a fork child (different PID from the stored\n # \"original\" PID), or by an explicit env flag set by the daemon.\n # Fall back to CPU in either case.\n force_cpu = (\n os.environ.get(\"AVM_EMBED_CPU\") == \"1\"\n or os.environ.get(\"AVM_FUSE_WORKER\") == \"1\"\n )\n if not force_cpu:\n import platform\n try:\n import torch\n if platform.system() == \"Darwin\" and torch.backends.mps.is_available():\n # Use MPS only in the original process; workers set AVM_FUSE_WORKER=1\n device = \"mps\"\n else:\n device = \"cpu\"\n except Exception:\n device = \"cpu\"\n else:\n device = \"cpu\"\n self._model = SentenceTransformer(self.model_name, device=device)\n self._dimension = self._model.get_sentence_embedding_dimension()\n # Cache at class level\n LocalEmbedding._model_cache[self.model_name] = (self._model, self._dimension)\n \n @property\n def dimension(self) -> int:\n if self._dimension is None:\n self._load_model()\n return self._dimension\n \n def embeend(self, text: str) -> List[float]:\n # Check query cache first\n cache_key = text[:200] # Truncate for cache key\n if cache_key in self._query_cache:\n return self._query_cache[cache_key]\n \n self._load_model()\n # show_progress_bar=False + convert_to_numpy avoids joblib/loky workers\n result = self._model.encode(\n text, show_progress_bar=False, convert_to_numpy=True\n ).tolist()\n \n # Update cache (LRU eviction)\n if len(self._query_cache) >= self._cache_max:\n # Remove oldest entry\n oldest = next(iter(self._query_cache))\n del self._query_cache[oldest]\n self._query_cache[cache_key] = result\n \n return result\n \n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n self._load_model()\n # pool_size=0 / single-process encode — no loky semaphores leaked\n return self._model.encode(\n texts, show_progress_bar=False, convert_to_numpy=True\n ).tolist()\n \n def warmup(self):\n \"\"\"Pre-load model (call once to avoid cold start)\"\"\"\n self._load_model()\n # Warm up with dummy query\n _ = self.embeend(\"warmup query\")\n\n\nclass EmbeddingStore:\n \"\"\"\n Embedding storage\n \n Uses SQLite for vectors, supports cosine similarity search\n \"\"\"\n \n def __init__(self, store: AVMStore, backend: EmbeddingBackend):\n self.store = store\n self.backend = backend\n self._init_table()\n \n def _init_table(self):\n \"\"\"initializevectortable\"\"\"\n with self.store._conn() as conn:\n # Check if table exists\n exists = conn.execute(\n \"SELECT name FROM sqlite_master WHERE type='table' AND name='embeddings'\"\n ).fetchone()\n \n if not exists:\n conn.execute(\"\"\"\n CREATE TABLE embeddings (\n path TEXT PRIMARY KEY,\n vector BLOB NOT NULL,\n content_h TEXT,\n model TEXT,\n updated_at TEXT\n )\n \"\"\")\n else:\n # Migration: add content_h if missing\n cols = [r[1] for r in conn.execute(\"PRAGMA table_info(embeddings)\")]\n if 'content_h' not in cols:\n conn.execute(\"ALTER TABLE embeddings ADD COLUMN content_h TEXT\")\n \n def _serialize_vector(self, vec: List[float]) -> bytes:\n \"\"\"serializevector bytes\"\"\"\n return struct.pack(f'{len(vec)}f', *vec)\n \n def _deserialize_vector(self, data: bytes) -> List[float]:\n \"\"\"deserialize bytes vector\"\"\"\n count = len(data) // 4 # float = 4 bytes\n return list(struct.unpack(f'{count}f', data))\n \n def _content_h(self, content: str) -> str:\n \"\"\"calculatecontenth\"\"\"\n return hashlib.sha256(content.encode()).hexdigest()[:16]\n \n def embeend_node(self, node: AVMNode, force: bool = False) -> bool:\n \"\"\"\n nodegenerate embedding\n \n Returns: whether actually generated new embedding\n \"\"\"\n content_h = self._content_h(node.content)\n \n # checkwhetherrequiresupdate\n if not force:\n with self.store._conn() as conn:\n row = conn.execute(\n \"SELECT content_h FROM embeddings WHERE path = ?\",\n (node.path,)\n ).fetchone()\n if row and row[0] == content_h:\n return False # Already exists and content unchanged\n \n # generate embedding\n # usetitle + contentfirst2000chars\n text = f\"{node.path}\\n\\n{node.content[:2000]}\"\n vector = self.backend.embeend(text)\n \n # storage\n with self.store._conn() as conn:\n conn.execute(\"\"\"\n INSERT OR REPLACE INTO embeddings \n (path, vector, content_h, model, updated_at)\n VALUES (?, ?, ?, ?, ?)\n \"\"\", (\n node.path,\n self._serialize_vector(vector),\n content_h,\n getattr(self.backend, 'model', 'unknown'),\n utcnow().isoformat(),\n ))\n \n return True\n \n def embeend_all(self, prefix: str = \"/\", limit: int = 1000) -> int:\n \"\"\"allnodegenerate embedding\"\"\"\n nodes = self.store.list_nodes(prefix, limit)\n count = 0\n \n for node in nodes:\n if self.embeend_node(node):\n count += 1\n \n return count\n \n def search(self, query: str, k: int = 5, \n prefix: str = None) -> List[Tuple[AVMNode, float]]:\n \"\"\"\n semanticsearch\n \n Returns: [(node, similarity), ...]\n \"\"\"\n # generatequeryvector\n query_vec = self.backend.embeend(query)\n \n # Get all vectors and calculate similarity\n results = []\n \n with self.store._conn() as conn:\n sql = \"SELECT path, vector FROM embeddings\"\n params = []\n \n if prefix:\n sql += \" WHERE path LIKE ?\"\n params.append(prefix + \"%\")\n \n for row in conn.execute(sql, params):\n path = row[0]\n vec = self._deserialize_vector(row[1])\n \n # cosinesimilarity\n similarity = self._cosine_similarity(query_vec, vec)\n results.append((path, similarity))\n \n # Sort and take top-k\n results.sort(key=lambda x: x[1], reverse=True)\n top_k = results[:k]\n \n # getcompletenode\n final = []\n for path, sim in top_k:\n node = self.store.get_node(path)\n if node:\n final.append((node, sim))\n \n return final\n \n def _cosine_similarity(self, a: List[float], b: List[float]) -> float:\n \"\"\"calculatecosinesimilarity\"\"\"\n dot = sum(x * y for x, y in zip(a, b))\n norm_a = sum(x * x for x in a) ** 0.5\n norm_b = sum(x * x for x in b) ** 0.5\n \n if norm_a == 0 or norm_b == 0:\n return 0.0\n \n return dot / (norm_a * norm_b)\n \n def stats(self) -> Dict[str, Any]:\n \"\"\"statisticsinfo\"\"\"\n with self.store._conn() as conn:\n count = conn.execute(\"SELECT COUNT(*) FROM embeddings\").fetchone()[0]\n \n models = {}\n for row in conn.execute(\n \"SELECT model, COUNT(*) FROM embeddings GROUP BY model\"\n ):\n models[row[0] or \"unknown\"] = row[1]\n \n return {\n \"embeendded_nodes\": count,\n \"by_model\": models,\n \"backend\": type(self.backend).__name__,\n \"dimension\": self.backend.dimension,\n }\n\n\n# ─── Shared Embedding Server (main-thread GPU, per-thread proxy) ─────────────\n\nclass EmbeddingRequest:\n \"\"\"A single encode request passed through the shared queue.\"\"\"\n __slots__ = (\"texts\", \"result_event\", \"result\", \"error\")\n\n def __init__(self, texts: List[str]):\n self.texts = texts\n self.result_event = __import__(\"threading\").Event()\n self.result: Optional[List[List[float]]] = None\n self.error: Optional[Exception] = None\n\n\nclass EmbeddingServer:\n \"\"\"\n Runs the SentenceTransformer model on the daemon's main thread (or any\n single designated thread) so that the GPU / MPS context is never touched\n from a forked or worker thread.\n\n Usage::\n\n server = EmbeddingServer(\"all-MiniLM-L6-v2\")\n server.start() # spawns worker thread\n proxy = server.make_proxy() # lightweight, thread-safe proxy\n vec = proxy.embeend(\"hello world\") # routed through the server\n server.stop()\n \"\"\"\n\n def __init__(self, model_name: str = \"all-MiniLM-L6-v2\"):\n import queue, threading\n self.model_name = model_name\n self._queue: \"queue.Queue[Optional[EmbeddingRequest]]\" = queue.Queue()\n self._thread: Optional[\"threading.Thread\"] = None\n self._backend: Optional[LocalEmbedding] = None\n self._started = threading.Event()\n\n # ── lifecycle ────────────────────────────────────────────────────────────\n\n def start(self):\n \"\"\"Start the background encoding thread.\"\"\"\n import threading\n self._thread = threading.Thread(\n target=self._run,\n name=\"avm-embed-server\",\n daemon=True,\n )\n self._thread.start()\n self._started.wait(timeout=30) # wait for model to load\n\n def stop(self):\n \"\"\"Shut down gracefully.\"\"\"\n self._queue.put(None)\n if self._thread:\n self._thread.join(timeout=5)\n\n def make_proxy(self) -> \"SharedEmbeddingProxy\":\n \"\"\"Return a thread-safe proxy that routes requests through this server.\"\"\"\n return SharedEmbeddingProxy(self)\n\n # ── internal ─────────────────────────────────────────────────────────────\n\n def _run(self):\n # Disable joblib/loky parallel workers globally for this process.\n # sentence_transformers uses joblib internally for batch encoding;\n # loky workers leave leaked semaphores when the process forks or exits\n # abnormally. Single-threaded encode is fine for our throughput needs.\n try:\n import joblib\n joblib.parallel_config(backend=\"sequential\")\n except Exception:\n pass\n try:\n import os as _os\n _os.environ.setdefault(\"TOKENIZERS_PARALLELISM\", \"false\")\n except Exception:\n pass\n\n # Load the model here — on the designated thread, GPU context intact.\n self._backend = LocalEmbedding(self.model_name)\n self._backend._load_model()\n self._started.set()\n\n while True:\n req = self._queue.get()\n if req is None:\n break\n try:\n req.result = self._backend.embeend_batch(req.texts)\n except Exception as exc:\n req.error = exc\n finally:\n req.result_event.set()\n\n def encode_batch(self, texts: List[str]) -> List[List[float]]:\n \"\"\"Synchronous encode from any thread (blocks until done).\"\"\"\n req = EmbeddingRequest(texts)\n self._queue.put(req)\n req.result_event.wait()\n if req.error:\n raise req.error\n return req.result # type: ignore[return-value]\n\n @property\n def dimension(self) -> int:\n if self._backend is None:\n raise RuntimeError(\"EmbeddingServer not started yet\")\n return self._backend.dimension\n\n\nclass SharedEmbeddingProxy(EmbeddingBackend):\n \"\"\"\n Drop-in EmbeddingBackend that forwards all work to an EmbeddingServer\n running on another thread. Safe to call from any thread/process that\n shares the same interpreter (i.e. threads — not forks).\n \"\"\"\n\n def __init__(self, server: EmbeddingServer):\n self._server = server\n self._query_cache: Dict[str, List[float]] = {}\n self._cache_max = 200\n\n @property\n def dimension(self) -> int:\n return self._server.dimension\n\n def embeend(self, text: str) -> List[float]:\n key = text[:200]\n if key in self._query_cache:\n return self._query_cache[key]\n result = self._server.encode_batch([text])[0]\n # Simple LRU eviction\n if len(self._query_cache) >= self._cache_max:\n del self._query_cache[next(iter(self._query_cache))]\n self._query_cache[key] = result\n return result\n\n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n return self._server.encode_batch(texts)\n\n def warmup(self):\n self._server.encode_batch([\"warmup\"])\n\n\n# ─── Fork-safe GPU proxy via multiprocessing.Pipe ────────────────────────────\n\nclass PipeEmbeddingProxy(EmbeddingBackend):\n \"\"\"\n Drop-in EmbeddingBackend that forwards encode() calls through a\n ``multiprocessing.Pipe`` to an EmbeddingServer running in the parent\n process (which holds the GPU/MPS context).\n\n Each forked child gets its own dedicated Pipe fd pair — no sharing\n between agents, no socket authentication needed.\n\n Usage (parent side)::\n\n parent_conn, child_conn = multiprocessing.Pipe()\n # pass child_conn to the child *before* fork\n # parent runs: EmbeddingPipeServer(parent_conn, model).start()\n\n Usage (child side)::\n\n proxy = PipeEmbeddingProxy(child_conn, dimension=384)\n # use as a normal EmbeddingBackend\n \"\"\"\n\n def __init__(self, conn, dimension: int = 384):\n self._conn = conn\n self._dim = dimension\n self._query_cache: Dict[str, List[float]] = {}\n self._cache_max = 200\n\n @property\n def dimension(self) -> int:\n return self._dim\n\n def embeend(self, text: str) -> List[float]:\n key = text[:200]\n if key in self._query_cache:\n return self._query_cache[key]\n result = self._request([text])[0]\n if len(self._query_cache) >= self._cache_max:\n del self._query_cache[next(iter(self._query_cache))]\n self._query_cache[key] = result\n return result\n\n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n return self._request(texts)\n\n def warmup(self):\n self._request([\"warmup\"])\n\n def _request(self, texts: List[str]) -> List[List[float]]:\n self._conn.send(texts)\n result = self._conn.recv()\n if isinstance(result, Exception):\n raise result\n return result\n\n\nclass EmbeddingPipeServer:\n \"\"\"\n Runs in the parent process; serves encode requests that arrive on\n *conn* (the parent end of a Pipe). One instance per forked child.\n \"\"\"\n\n def __init__(self, conn, model_name: str = \"all-MiniLM-L6-v2\",\n backend: \"LocalEmbedding | None\" = None):\n self._conn = conn\n self._model_name = model_name\n self._backend = backend # shared pre-loaded backend\n self._thread: Optional[\"__import__('threading').Thread\"] = None\n\n def start(self):\n import threading\n self._thread = threading.Thread(\n target=self._run, daemon=True,\n name=f\"embed-pipe-server\"\n )\n self._thread.start()\n\n def _run(self):\n if self._backend is None:\n self._backend = LocalEmbedding(self._model_name)\n while True:\n try:\n texts = self._conn.recv()\n except EOFError:\n break # child closed its end\n try:\n result = self._backend.embeend_batch(texts)\n self._conn.send(result)\n except Exception as exc:\n try:\n self._conn.send(exc)\n except Exception:\n break\n","content_type":"text/x-python; charset=utf-8","language":"python","size":21793,"content_sha256":"7a424182ebbb3fd1458a015e5cc2d1410545f851570c93268a7348bd040bcc99"},{"filename":"avm/exec_config.yaml","content":"# AVM Exec Handler Configuration\n# \n# ⚠️ 安全配置 - 此文件只能通过直接编辑修改\n# 禁止通过FUSE/API/Agent修改\n#\n# 配置加载后会锁定,运行时无法更改\n\nversion: 1\n\n# 全局设置\nglobal:\n default_timeout: 30.0\n default_rate_limit: 60 # per minute\n max_output_bytes: 1048576 # 1MB\n audit_log_enabled: true\n audit_log_max_entries: 1000\n\n# Handler定义\nhandlers:\n # === Git只读 ===\n git:\n command: git\n allowed_subcommands:\n - status\n - log\n - diff\n - branch\n - show\n - ls-files\n - rev-parse\n - describe\n - tag\n - remote\n - fetch\n - blame\n - shortlog\n - stash\n - config\n blocked_patterns:\n - \"--exec\"\n - \"-c\\\\s*core\\\\.editor\"\n timeout: 30.0\n\n # === Git写操作 (更严格) ===\n git-write:\n command: git\n allowed_subcommands:\n - add\n - commit\n - checkout\n - merge\n - rebase\n - pull\n blocked_patterns:\n - \"--exec\"\n - \"-m\\\\s*['\\\"].*[;|&`]\" # 命令注入\n rate_limit: 30\n timeout: 60.0\n\n # === Docker只读 ===\n docker:\n command: docker\n allowed_subcommands:\n - ps\n - images\n - logs\n - inspect\n - stats\n - version\n - info\n - top\n blocked_patterns:\n - \"--privileged\"\n - \"--pid=host\"\n - \"--network=host\"\n - \"-v\\\\s*/:\" # 挂载根目录\n\n # === Curl (限制协议) ===\n curl:\n command: curl\n allowed_patterns:\n - \"^https?://\" # 只允许http/https\n blocked_patterns:\n - \"file://\"\n - \"dict://\"\n - \"gopher://\"\n - \"--output\\\\s*/\"\n - \"-o\\\\s*/\"\n timeout: 60.0\n\n # === 网络工具 ===\n ping:\n command: ping\n allowed_patterns:\n - \"^-c\\\\s*\\\\d+\\\\s+\" # 必须有-c限制次数\n timeout: 30.0\n\n wget:\n command: wget\n allowed_patterns:\n - \"^https?://\"\n blocked_patterns:\n - \"file://\"\n - \"-O\\\\s*/\"\n timeout: 60.0\n\n # === 文件操作 (受限) ===\n ls:\n command: ls\n blocked_patterns:\n - \"\\\\.\\\\./\\\\.\\\\./\\\\.\\\\./\\\\.\\\\./\" # 防止深度遍历\n\n cat:\n command: cat\n blocked_patterns:\n - \"/etc/shadow\"\n - \"/etc/passwd\"\n - \"~/.ssh/\"\n - \"\\\\.env$\"\n - \"credentials\"\n - \"\\\\.pem$\"\n - \"\\\\.key$\"\n\n head:\n command: head\n blocked_patterns:\n - \"/etc/shadow\"\n - \"\\\\.env$\"\n\n tail:\n command: tail\n blocked_patterns:\n - \"/etc/shadow\"\n - \"\\\\.env$\"\n\n wc:\n command: wc\n # 无限制\n\n find:\n command: find\n blocked_patterns:\n - \"-exec\"\n - \"-delete\"\n timeout: 30.0\n\n grep:\n command: grep\n blocked_patterns:\n - \"-exec\"\n timeout: 30.0\n\n # === Python (严格限制) ===\n python:\n command: python3\n allowed_patterns:\n - \"^-c\\\\s+\"\n - \"^-m\\\\s+(json\\\\.tool|http\\\\.server|venv|pip)\"\n blocked_patterns:\n - \"import\\\\s+os\"\n - \"import\\\\s+subprocess\"\n - \"__import__\"\n - \"eval\\\\s*\\\\(\"\n - \"exec\\\\s*\\\\(\"\n - \"open\\\\s*\\\\(\"\n timeout: 10.0\n rate_limit: 20\n\n # === 系统信息 (只读) ===\n uname:\n command: uname\n # 无限制\n\n whoami:\n command: whoami\n # 无限制\n\n date:\n command: date\n # 无限制\n\n uptime:\n command: uptime\n # 无限制\n\n df:\n command: df\n # 无限制\n\n free:\n command: free\n # 无限制\n\n # === 完全禁止的命令 ===\n rm:\n command: rm\n allowed_subcommands: []\n allowed_patterns:\n - \"^$\" # 不匹配任何东西\n blocked: true\n\n sudo:\n command: sudo\n blocked: true\n\n su:\n command: su\n blocked: true\n\n chmod:\n command: chmod\n blocked: true\n\n chown:\n command: chown\n blocked: true\n\n kill:\n command: kill\n blocked: true\n\n pkill:\n command: pkill\n blocked: true\n\n shutdown:\n command: shutdown\n blocked: true\n\n reboot:\n command: reboot\n blocked: true\n\n# 路径限制 (可选)\nallowed_cwd:\n - \"~/.openclaw/workspace\"\n - \"/tmp\"\n\n# 环境变量白名单\nenv_whitelist:\n - PATH\n - HOME\n - USER\n - LANG\n - TERM\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":4042,"content_sha256":"fde09a6f407f771e838fcf969658451e37cc24988b5f4883c1ad3e8c01602bcd"},{"filename":"avm/exec_handler.py","content":"\"\"\"\nexec_handler.py - AVM受控执行层\n\n将危险命令包装成handler,提供:\n- 命令白名单校验\n- 参数审查\n- 审计日志\n- 速率限制\n- 沙箱执行\n\n⚠️ 安全设计:\n- 配置从YAML文件加载,启动后锁定\n- 禁止通过FUSE/API运行时修改\n- Agent无法绕过安全策略\n\n用法:\n from avm.exec_handler import ExecRegistry, load_config\n \n registry = ExecRegistry()\n registry.load_config(\"exec_config.yaml\")\n \n # 安全执行\n result = registry.execute(\"git\", [\"status\"])\n \n # 被拦截\n result = registry.execute(\"git\", [\"push\"]) # PermissionError\n\"\"\"\n\nimport subprocess\nimport shlex\nimport time\nimport logging\nfrom dataclasses import dataclass, field\nfrom typing import Callable, Optional, Any\nfrom pathlib import Path\nimport re\nimport os\nimport yaml\nimport hashlib\n\nlogger = logging.getLogger(__name__)\n\n# 配置文件路径 (相对于模块)\nDEFAULT_CONFIG_PATH = Path(__file__).parent / \"exec_config.yaml\"\n\n\n@dataclass\nclass ExecResult:\n \"\"\"执行结果\"\"\"\n success: bool\n exit_code: int\n stdout: str\n stderr: str\n elapsed_ms: float\n command: str\n handler: str\n\n\n@dataclass\nclass ExecPolicy:\n \"\"\"执行策略\"\"\"\n # 允许的子命令/参数模式\n allowed_subcommands: list[str] = field(default_factory=list)\n allowed_patterns: list[str] = field(default_factory=list) # regex\n \n # 禁止的模式\n blocked_patterns: list[str] = field(default_factory=list)\n \n # 资源限制\n timeout_seconds: float = 30.0\n max_output_bytes: int = 1024 * 1024 # 1MB\n \n # 工作目录限制\n allowed_cwd: list[str] = field(default_factory=list) # 为空表示不限制\n \n # 环境变量\n inherit_env: bool = False\n env_whitelist: list[str] = field(default_factory=list)\n \n # 速率限制\n rate_limit_per_minute: int = 60\n \n # 是否需要确认\n require_confirmation: bool = False\n\n\nclass RateLimiter:\n \"\"\"简单速率限制器\"\"\"\n \n def __init__(self, limit_per_minute: int):\n self.limit = limit_per_minute\n self.calls: list[float] = []\n \n def check(self) -> bool:\n now = time.time()\n # 清理1分钟前的记录\n self.calls = [t for t in self.calls if now - t \u003c 60]\n return len(self.calls) \u003c self.limit\n \n def record(self):\n self.calls.append(time.time())\n\n\nclass ExecHandler:\n \"\"\"单个命令的handler\"\"\"\n \n def __init__(self, \n name: str,\n command: str,\n policy: ExecPolicy,\n transform: Callable[[list[str]], list[str]] = None):\n self.name = name\n self.command = command\n self.policy = policy\n self.transform = transform # 可选的参数转换\n self.rate_limiter = RateLimiter(policy.rate_limit_per_minute)\n self.call_count = 0\n self.last_call = None\n \n def validate(self, args: list[str], cwd: str = None) -> tuple[bool, str]:\n \"\"\"验证参数是否允许\"\"\"\n policy = self.policy\n \n # 速率限制\n if not self.rate_limiter.check():\n return False, \"Rate limit exceeded\"\n \n # 子命令白名单\n if policy.allowed_subcommands and args:\n if args[0] not in policy.allowed_subcommands:\n return False, f\"Subcommand '{args[0]}' not allowed. Allowed: {policy.allowed_subcommands}\"\n \n # 参数模式匹配\n args_str = \" \".join(args)\n \n # 检查禁止模式\n for pattern in policy.blocked_patterns:\n if re.search(pattern, args_str):\n return False, f\"Argument matches blocked pattern: {pattern}\"\n \n # 检查允许模式 (如果定义了)\n if policy.allowed_patterns:\n matched = any(re.search(p, args_str) for p in policy.allowed_patterns)\n if not matched:\n return False, f\"Arguments don't match any allowed pattern\"\n \n # 工作目录检查\n if policy.allowed_cwd and cwd:\n cwd_path = Path(cwd).resolve()\n allowed = any(\n cwd_path == Path(p).resolve() or \n cwd_path.is_relative_to(Path(p).resolve())\n for p in policy.allowed_cwd\n )\n if not allowed:\n return False, f\"Working directory not allowed: {cwd}\"\n \n return True, \"\"\n \n def execute(self, args: list[str], cwd: str = None) -> ExecResult:\n \"\"\"执行命令\"\"\"\n # 验证\n valid, reason = self.validate(args, cwd)\n if not valid:\n logger.warning(f\"[exec:{self.name}] BLOCKED: {reason}\")\n return ExecResult(\n success=False,\n exit_code=-1,\n stdout=\"\",\n stderr=f\"Blocked: {reason}\",\n elapsed_ms=0,\n command=f\"{self.command} {' '.join(args)}\",\n handler=self.name\n )\n \n # 转换参数\n if self.transform:\n args = self.transform(args)\n \n # 构建命令\n cmd = [self.command] + args\n \n # 构建环境\n env = None\n if not self.policy.inherit_env:\n env = {}\n for var in self.policy.env_whitelist:\n if var in os.environ:\n env[var] = os.environ[var]\n # 确保PATH\n if \"PATH\" not in env:\n env[\"PATH\"] = \"/usr/local/bin:/usr/bin:/bin\"\n \n # 执行\n start = time.perf_counter()\n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n timeout=self.policy.timeout_seconds,\n cwd=cwd,\n env=env\n )\n elapsed = (time.perf_counter() - start) * 1000\n \n stdout = result.stdout.decode(errors='replace')[:self.policy.max_output_bytes]\n stderr = result.stderr.decode(errors='replace')[:self.policy.max_output_bytes]\n \n # 记录\n self.rate_limiter.record()\n self.call_count += 1\n self.last_call = time.time()\n \n logger.info(f\"[exec:{self.name}] OK: {' '.join(cmd[:3])}... exit={result.returncode}\")\n \n return ExecResult(\n success=result.returncode == 0,\n exit_code=result.returncode,\n stdout=stdout,\n stderr=stderr,\n elapsed_ms=elapsed,\n command=f\"{self.command} {' '.join(args)}\",\n handler=self.name\n )\n \n except subprocess.TimeoutExpired:\n elapsed = (time.perf_counter() - start) * 1000\n logger.warning(f\"[exec:{self.name}] TIMEOUT after {elapsed:.0f}ms\")\n return ExecResult(\n success=False,\n exit_code=-1,\n stdout=\"\",\n stderr=\"Timeout\",\n elapsed_ms=elapsed,\n command=f\"{self.command} {' '.join(args)}\",\n handler=self.name\n )\n\n\nclass ExecRegistry:\n \"\"\"执行handler注册表\"\"\"\n \n def __init__(self):\n self.handlers: dict[str, ExecHandler] = {}\n self.audit_log: list[dict] = []\n self._locked = False\n self._config_hash: str = None\n self._config_path: Path = None\n \n def register(self, handler: ExecHandler):\n \"\"\"注册handler\"\"\"\n if self._locked:\n logger.error(\"Registry is locked, cannot register new handlers\")\n raise RuntimeError(\"ExecRegistry is locked after config load\")\n self.handlers[handler.name] = handler\n logger.info(f\"Registered exec handler: {handler.name}\")\n \n def load_config(self, config_path: str = None):\n \"\"\"从YAML配置文件加载handler\"\"\"\n if self._locked:\n raise RuntimeError(\"Registry is already locked\")\n \n path = Path(config_path) if config_path else DEFAULT_CONFIG_PATH\n if not path.exists():\n raise FileNotFoundError(f\"Config file not found: {path}\")\n \n # 计算配置文件hash (用于审计)\n with open(path, 'rb') as f:\n content = f.read()\n self._config_hash = hashlib.sha256(content).hexdigest()[:16]\n \n config = yaml.safe_load(content)\n \n # 全局设置\n global_cfg = config.get('global', {})\n default_timeout = global_cfg.get('default_timeout', 30.0)\n default_rate_limit = global_cfg.get('default_rate_limit', 60)\n \n # 加载handlers\n handlers_cfg = config.get('handlers', {})\n for name, cfg in handlers_cfg.items():\n if cfg.get('blocked', False):\n # 完全禁止的命令\n policy = ExecPolicy(\n allowed_subcommands=[],\n allowed_patterns=[\"^$\"], # 不匹配任何东西\n )\n else:\n policy = ExecPolicy(\n allowed_subcommands=cfg.get('allowed_subcommands', []),\n allowed_patterns=cfg.get('allowed_patterns', []),\n blocked_patterns=cfg.get('blocked_patterns', []),\n timeout_seconds=cfg.get('timeout', default_timeout),\n rate_limit_per_minute=cfg.get('rate_limit', default_rate_limit),\n )\n \n handler = ExecHandler(\n name=name,\n command=cfg.get('command', name),\n policy=policy\n )\n self.handlers[name] = handler\n \n # 锁定\n self._locked = True\n self._config_path = path\n \n logger.info(f\"Loaded {len(self.handlers)} handlers from {path} (hash: {self._config_hash})\")\n logger.warning(\"ExecRegistry is now LOCKED - no runtime modifications allowed\")\n \n def is_locked(self) -> bool:\n \"\"\"检查是否已锁定\"\"\"\n return self._locked\n \n def get_config_info(self) -> dict:\n \"\"\"获取配置信息 (用于审计)\"\"\"\n return {\n \"locked\": self._locked,\n \"config_path\": str(self._config_path) if self._config_path else None,\n \"config_hash\": self._config_hash,\n \"handler_count\": len(self.handlers),\n }\n \n def execute(self, name: str, args: list[str], cwd: str = None) -> ExecResult:\n \"\"\"通过handler执行命令\"\"\"\n if name not in self.handlers:\n return ExecResult(\n success=False,\n exit_code=-1,\n stdout=\"\",\n stderr=f\"Unknown handler: {name}\",\n elapsed_ms=0,\n command=name,\n handler=\"unknown\"\n )\n \n handler = self.handlers[name]\n result = handler.execute(args, cwd)\n \n # 审计日志\n self.audit_log.append({\n \"timestamp\": time.time(),\n \"handler\": name,\n \"args\": args,\n \"cwd\": cwd,\n \"success\": result.success,\n \"exit_code\": result.exit_code,\n \"elapsed_ms\": result.elapsed_ms,\n })\n \n # 保持审计日志在合理大小\n if len(self.audit_log) > 1000:\n self.audit_log = self.audit_log[-500:]\n \n return result\n \n def list_handlers(self) -> list[dict]:\n \"\"\"列出所有handler\"\"\"\n return [\n {\n \"name\": h.name,\n \"command\": h.command,\n \"allowed_subcommands\": h.policy.allowed_subcommands,\n \"call_count\": h.call_count,\n }\n for h in self.handlers.values()\n ]\n\n\ndef register_default_handlers(registry: ExecRegistry):\n \"\"\"注册默认的安全handler\"\"\"\n \n # === Git (只读操作) ===\n registry.register(ExecHandler(\n name=\"git\",\n command=\"git\",\n policy=ExecPolicy(\n allowed_subcommands=[\n \"status\", \"log\", \"diff\", \"branch\", \"show\", \"ls-files\",\n \"rev-parse\", \"describe\", \"tag\", \"remote\", \"fetch\",\n \"blame\", \"shortlog\", \"stash\", \"config\"\n ],\n blocked_patterns=[\n r\"--exec\", # 防止执行任意命令\n r\"-c\\s*core\\.editor\", # 防止修改editor\n ],\n timeout_seconds=30.0,\n )\n ))\n \n # === Git (写操作 - 需要更严格) ===\n registry.register(ExecHandler(\n name=\"git-write\",\n command=\"git\",\n policy=ExecPolicy(\n allowed_subcommands=[\"add\", \"commit\", \"checkout\", \"merge\", \"rebase\"],\n blocked_patterns=[\n r\"--exec\",\n r\"-m\\s*['\\\"].*[;|&`]\", # 防止命令注入\n ],\n rate_limit_per_minute=30, # 更严格的速率限制\n )\n ))\n \n # === Docker (只读/安全操作) ===\n registry.register(ExecHandler(\n name=\"docker\",\n command=\"docker\",\n policy=ExecPolicy(\n allowed_subcommands=[\n \"ps\", \"images\", \"logs\", \"inspect\", \"stats\",\n \"version\", \"info\", \"top\"\n ],\n blocked_patterns=[\n r\"--privileged\",\n r\"--pid=host\",\n r\"--network=host\",\n r\"-v\\s*/:\", # 防止挂载根目录\n ],\n )\n ))\n \n # === Curl (限制URL) ===\n registry.register(ExecHandler(\n name=\"curl\",\n command=\"curl\",\n policy=ExecPolicy(\n allowed_patterns=[\n r\"^https?://\", # 只允许http/https\n ],\n blocked_patterns=[\n r\"file://\",\n r\"dict://\",\n r\"gopher://\",\n r\"--output\\s*/\", # 防止写入任意路径\n r\"-o\\s*/\",\n ],\n timeout_seconds=60.0,\n )\n ))\n \n # === Python (受限执行) ===\n registry.register(ExecHandler(\n name=\"python\",\n command=\"python3\",\n policy=ExecPolicy(\n allowed_patterns=[\n r\"^-c\\s+\", # 只允许-c模式\n r\"^-m\\s+(json\\.tool|http\\.server|venv)\", # 安全模块\n ],\n blocked_patterns=[\n r\"import\\s+os\",\n r\"import\\s+subprocess\",\n r\"__import__\",\n r\"eval\\s*\\(\",\n r\"exec\\s*\\(\",\n ],\n timeout_seconds=10.0,\n )\n ))\n \n # === 文件操作 (安全版) ===\n registry.register(ExecHandler(\n name=\"ls\",\n command=\"ls\",\n policy=ExecPolicy(\n blocked_patterns=[\n r\"\\.\\./\\.\\./\", # 防止过深的路径遍历\n ],\n )\n ))\n \n registry.register(ExecHandler(\n name=\"cat\",\n command=\"cat\",\n policy=ExecPolicy(\n blocked_patterns=[\n r\"/etc/shadow\",\n r\"/etc/passwd\",\n r\"~/.ssh/\",\n r\"\\.env$\",\n r\"credentials\",\n ],\n )\n ))\n \n # === 禁止的命令 (包装为空handler) ===\n for cmd in [\"rm\", \"sudo\", \"su\", \"chmod\", \"chown\", \"kill\"]:\n registry.register(ExecHandler(\n name=cmd,\n command=cmd,\n policy=ExecPolicy(\n allowed_subcommands=[], # 空 = 全部禁止\n allowed_patterns=[\"^$\"], # 不匹配任何东西\n )\n ))\n \n logger.info(f\"Registered {len(registry.handlers)} default exec handlers\")\n\n\n# ============================================================\n# 测试\n# ============================================================\n\nif __name__ == \"__main__\":\n logging.basicConfig(level=logging.INFO)\n \n registry = ExecRegistry()\n register_default_handlers(registry)\n \n print(\"=== 测试受控执行 ===\\n\")\n \n # 允许的操作\n tests = [\n (\"git\", [\"status\"]),\n (\"git\", [\"log\", \"-5\"]),\n (\"git\", [\"push\"]), # 应该被阻止\n (\"ls\", [\"-la\"]),\n (\"cat\", [\"/etc/passwd\"]), # 应该被阻止\n (\"rm\", [\"-rf\", \"/\"]), # 应该被阻止\n (\"curl\", [\"https://example.com\"]),\n (\"curl\", [\"file:///etc/passwd\"]), # 应该被阻止\n ]\n \n for cmd, args in tests:\n result = registry.execute(cmd, args)\n status = \"✅\" if result.success else \"❌\"\n print(f\"{status} {cmd} {' '.join(args[:2])}\")\n if not result.success:\n print(f\" └─ {result.stderr}\")\n \n print(\"\\n=== Handler列表 ===\")\n for h in registry.list_handlers():\n print(f\" {h['name']}: {h['allowed_subcommands'][:3]}...\")\n","content_type":"text/x-python; charset=utf-8","language":"python","size":16757,"content_sha256":"8a79b7c09054a65af296d34b2448d020f4b0f8afb07f05fc32be471796fe170c"},{"filename":"avm/faiss_store.py","content":"\"\"\"\navm/faiss_store.py - FAISS-based vector storage\n\nHigh-performance vector search using Facebook's FAISS library.\nSupports:\n- IVF (Inverted File) index for large-scale search\n- HNSW (Hierarchical Navigable Small World) for fast approximate search\n- Flat index for exact search (small datasets)\n\nUsage:\n from avm.faiss_store import FAISSEmbeddingStore\n from avm.embedding import LocalEmbedding\n \n backend = LocalEmbedding()\n store = FAISSEmbeddingStore(avm_store, backend, index_type=\"flat\")\n store.add_node(node)\n results = store.search(\"query\", k=5)\n\"\"\"\n\nimport os\nimport json\nimport struct\nimport hashlib\nimport pickle\nfrom typing import List, Dict, Any, Tuple, Optional\nfrom pathlib import Path\nimport numpy as np\n\ntry:\n import faiss\n FAISS_AVAILABLE = True\nexcept ImportError:\n FAISS_AVAILABLE = False\n\nfrom .store import AVMStore\nfrom .node import AVMNode\nfrom .embedding import EmbeddingBackend\nfrom .utils import utcnow\n\n\nclass FAISSEmbeddingStore:\n \"\"\"\n FAISS-based embedding storage with multiple index types.\n \n Index types:\n - \"flat\": Exact search (brute force), best for \u003c10k vectors\n - \"ivf\": IVF index, good for 10k-1M vectors\n - \"hnsw\": HNSW index, fast approximate search\n \"\"\"\n \n INDEX_TYPES = [\"flat\", \"ivf\", \"hnsw\"]\n \n def __init__(\n self, \n store: AVMStore, \n backend: EmbeddingBackend,\n index_type: str = \"flat\",\n index_path: str = None,\n nlist: int = 100, # For IVF: number of clusters\n m: int = 32, # For HNSW: number of connections\n ):\n if not FAISS_AVAILABLE:\n raise ImportError(\"FAISS not installed. Run: pip install faiss-cpu\")\n \n self.store = store\n self.backend = backend\n self.index_type = index_type\n self.dimension = backend.dimension\n self.nlist = nlist\n self.m = m\n \n # Index file path\n if index_path:\n self.index_path = Path(index_path)\n else:\n db_dir = Path(store.db_path).parent\n self.index_path = db_dir / \"faiss_index.bin\"\n \n # Path to ID mapping\n self.mapping_path = self.index_path.with_suffix(\".map\")\n \n # Initialize or load index\n self.index: Optional[faiss.Index] = None\n self.id_to_path: Dict[int, str] = {}\n self.path_to_id: Dict[str, int] = {}\n self.next_id: int = 0\n \n self._load_or_create_index()\n \n def _load_or_create_index(self):\n \"\"\"Load existing index or create new one\"\"\"\n if self.index_path.exists() and self.mapping_path.exists():\n try:\n self._load_index()\n return\n except Exception as e:\n print(f\"Warning: Failed to load FAISS index: {e}\")\n \n self._create_index()\n \n def _create_index(self):\n \"\"\"Create a new FAISS index\"\"\"\n if self.index_type == \"flat\":\n # Exact L2 search\n self.index = faiss.IndexFlatIP(self.dimension) # Inner product (cosine after normalization)\n \n elif self.index_type == \"ivf\":\n # IVF with flat quantizer\n quantizer = faiss.IndexFlatIP(self.dimension)\n self.index = faiss.IndexIVFFlat(quantizer, self.dimension, self.nlist)\n # Need to train with some vectors first\n self.index.nprobe = 10 # Search 10 clusters\n \n elif self.index_type == \"hnsw\":\n # HNSW index\n self.index = faiss.IndexHNSWFlat(self.dimension, self.m)\n self.index.hnsw.efConstruction = 40\n self.index.hnsw.efSearch = 16\n \n else:\n raise ValueError(f\"Unknown index type: {self.index_type}\")\n \n # Use IDMap to allow custom IDs\n self.index = faiss.IndexIDMap(self.index)\n \n self.id_to_path = {}\n self.path_to_id = {}\n self.next_id = 0\n \n def _load_index(self):\n \"\"\"Load index from disk\"\"\"\n self.index = faiss.read_index(str(self.index_path))\n \n with open(self.mapping_path, \"rb\") as f:\n mapping = pickle.load(f)\n \n self.id_to_path = mapping[\"id_to_path\"]\n self.path_to_id = mapping[\"path_to_id\"]\n self.next_id = mapping[\"next_id\"]\n \n def save(self):\n \"\"\"Save index to disk\"\"\"\n faiss.write_index(self.index, str(self.index_path))\n \n mapping = {\n \"id_to_path\": self.id_to_path,\n \"path_to_id\": self.path_to_id,\n \"next_id\": self.next_id,\n }\n with open(self.mapping_path, \"wb\") as f:\n pickle.dump(mapping, f)\n \n def _normalize(self, vec: np.ndarray) -> np.ndarray:\n \"\"\"Normalize vector for cosine similarity\"\"\"\n norm = np.linalg.norm(vec)\n if norm > 0:\n return vec / norm\n return vec\n \n def add_node(self, node: AVMNode, force: bool = False) -> bool:\n \"\"\"\n Add a node to the index.\n \n Returns True if node was added/updated.\n \"\"\"\n # Check if already indexed (skip if content unchanged)\n if not force and node.path in self.path_to_id:\n # Could check content hash here for updates\n return False\n \n # Generate embedding\n text = f\"{node.path}\\n\\n{node.content[:2000]}\"\n embedding = self.backend.embeend(text)\n vec = self._normalize(np.array([embedding], dtype=np.float32))\n \n # Remove old entry if exists\n if node.path in self.path_to_id:\n old_id = self.path_to_id[node.path]\n # FAISS doesn't support removal, so we just orphan the old ID\n del self.id_to_path[old_id]\n \n # Add to index\n new_id = self.next_id\n self.next_id += 1\n \n ids = np.array([new_id], dtype=np.int64)\n self.index.add_with_ids(vec, ids)\n \n self.id_to_path[new_id] = node.path\n self.path_to_id[node.path] = new_id\n \n return True\n \n def add_nodes(self, nodes: List[AVMNode], batch_size: int = 100) -> int:\n \"\"\"\n Batch add nodes to index.\n \n Returns number of nodes added.\n \"\"\"\n count = 0\n \n # Filter nodes that need embedding\n to_embed = []\n for node in nodes:\n if node.path not in self.path_to_id:\n to_embed.append(node)\n \n # Batch embed\n for i in range(0, len(to_embed), batch_size):\n batch = to_embed[i:i + batch_size]\n texts = [f\"{n.path}\\n\\n{n.content[:2000]}\" for n in batch]\n \n embeddings = self.backend.embeend_batch(texts)\n vecs = np.array(embeddings, dtype=np.float32)\n \n # Normalize\n norms = np.linalg.norm(vecs, axis=1, keepdims=True)\n norms[norms == 0] = 1\n vecs = vecs / norms\n \n # Assign IDs\n ids = np.array(range(self.next_id, self.next_id + len(batch)), dtype=np.int64)\n \n # Add to index\n self.index.add_with_ids(vecs, ids)\n \n # Update mappings\n for j, node in enumerate(batch):\n new_id = self.next_id + j\n self.id_to_path[new_id] = node.path\n self.path_to_id[node.path] = new_id\n \n self.next_id += len(batch)\n count += len(batch)\n \n return count\n \n def search(\n self, \n query: str, \n k: int = 5, \n prefix: str = None\n ) -> List[Tuple[AVMNode, float]]:\n \"\"\"\n Search for similar nodes.\n \n Args:\n query: Search query text\n k: Number of results\n prefix: Filter by path prefix\n \n Returns:\n List of (node, similarity) tuples\n \"\"\"\n if self.index.ntotal == 0:\n return []\n \n # Generate query embedding\n query_vec = self.backend.embeend(query)\n query_vec = self._normalize(np.array([query_vec], dtype=np.float32))\n \n # Search (get more results if filtering by prefix)\n search_k = k * 5 if prefix else k\n distances, ids = self.index.search(query_vec, min(search_k, self.index.ntotal))\n \n results = []\n for i, (dist, idx) in enumerate(zip(distances[0], ids[0])):\n if idx == -1: # FAISS returns -1 for empty slots\n continue\n \n path = self.id_to_path.get(int(idx))\n if path is None:\n continue\n \n # Filter by prefix\n if prefix and not path.startswith(prefix):\n continue\n \n # Get node\n node = self.store.get_node(path)\n if node:\n # Convert distance to similarity (for IP index, higher is better)\n similarity = float(dist)\n results.append((node, similarity))\n \n if len(results) >= k:\n break\n \n return results\n \n def index_all(self, prefix: str = \"/\", limit: int = 10000) -> int:\n \"\"\"Index all nodes under a prefix\"\"\"\n nodes = self.store.list_nodes(prefix, limit)\n return self.add_nodes(nodes)\n \n def stats(self) -> Dict[str, Any]:\n \"\"\"Get index statistics\"\"\"\n return {\n \"index_type\": self.index_type,\n \"dimension\": self.dimension,\n \"total_vectors\": self.index.ntotal,\n \"indexed_paths\": len(self.path_to_id),\n \"index_path\": str(self.index_path),\n \"backend\": type(self.backend).__name__,\n }\n \n def rebuild(self):\n \"\"\"Rebuild index from scratch\"\"\"\n self._create_index()\n self.index_all()\n self.save()\n\n\ndef get_faiss_store(\n store: AVMStore,\n backend: EmbeddingBackend = None,\n index_type: str = \"flat\",\n) -> FAISSEmbeddingStore:\n \"\"\"\n Get or create a FAISS embedding store.\n \n If backend is None, tries to create a LocalEmbedding.\n \"\"\"\n if backend is None:\n from .embedding import LocalEmbedding\n backend = LocalEmbedding()\n \n return FAISSEmbeddingStore(store, backend, index_type)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10332,"content_sha256":"eba70bfdfc94886be8173fda5d4a15b8fe04861c2e660c4173a165f68ca6e897"},{"filename":"avm/fuse_mount.py","content":"#!/usr/bin/env python3\n\"\"\"\nvfs/fuse_mount.py - FUSE Mount for AVM\n\nMount AVM as a filesystem with virtual nodes for metadata access.\n\nUsage:\n avm-mount /mnt/avm --user akashi\n avm-mount /mnt/avm --db /path/to/vfs.db\n\nVirtual Nodes:\n /path/to/node.md - File content\n /path/to/node.md:meta - Metadata (JSON)\n /path/to/node.md:links - Related nodes\n /path/to/node.md:tags - Tags\n /path/to/node.md:history - Change history\n /path/to/:list - Directory listing\n /path/to/:search?q=X - Search results\n /path/to/:recall?q=X - Token-aware recall\n /path/to/:stats - Statistics\n\"\"\"\n\nimport os\nimport stat\nimport errno\nimport json\nimport argparse\nimport re\nimport sqlite3\nfrom datetime import datetime\n\nfrom .utils import utcnow\nfrom typing import Optional, Dict, Any\nfrom pathlib import Path\n\ntry:\n import fuse as _fuse_module\n from fuse import FUSE, FuseOSError, Operations\n HAS_FUSE = True\n \n # Monkey-patch fusepy for Python 3.13+ compatibility\n # The original code has `self.__critical_exception = e` in a context\n # where `self` is not defined (inside functools.partial callback)\n import sys\n if sys.version_info >= (3, 13):\n import functools\n _original_wrapper = _fuse_module.FUSE._wrapper\n @functools.wraps(_original_wrapper)\n def _patched_wrapper(self, func, *args, **kwargs):\n try:\n return func(*args, **kwargs) or 0\n except OSError as e:\n if e.errno and e.errno > 0:\n return -e.errno\n # Silently ignore exceptions without errno (e.g., PermissionError)\n return -errno.EINVAL\n except Exception:\n return -errno.EINVAL\n _fuse_module.FUSE._wrapper = _patched_wrapper\n \nexcept (ImportError, OSError):\n # ImportError: fusepy not installed\n # OSError: libfuse not found (common in CI environments)\n HAS_FUSE = False\n FUSE = None\n # Stub for when fuse is not installed\n class Operations:\n pass\n class FuseOSError(Exception):\n pass\n\n\nclass AVMFuse(Operations):\n \"\"\"\n FUSE operations for AVM filesystem.\n \n Supports virtual nodes via special suffixes:\n - :meta, :links, :tags, :history (per-file)\n - :list, :search, :recall, :stats (per-directory)\n \"\"\"\n \n # Virtual node suffixes\n VIRTUAL_SUFFIXES = {':meta', ':links', ':tags', ':history', ':shared', ':data', ':info', ':path', ':ttl', ':delta', ':mark'}\n VIRTUAL_DIR_FILES = {':list', ':stats', ':inbox', ':topics', ':help'}\n VIRTUAL_QUERY_PATTERNS = {':search', ':recall', ':changes'}\n \n def __init__(self, vfs, user=None):\n self.vfs = vfs\n self.user = user\n self.fd = 0\n self._open_files: Dict[int, str] = {}\n self._write_buffers: Dict[int, bytes] = {}\n self._tell_store = None # Lazy init\n self._mount_ready_event = None # Set by daemon to signal mount is live\n\n def init(self, path):\n \"\"\"Called by FUSE when the filesystem is mounted and ready.\"\"\"\n # Note: on macFUSE this may not be called reliably; daemon.py uses\n # iterdir() polling instead.\n return None\n \n def _get_tell_store(self):\n \"\"\"Lazy initialization of TellStore\"\"\"\n if self._tell_store is None:\n from .tell import TellStore\n self._tell_store = TellStore(self.vfs.store.db_path)\n return self._tell_store\n \n def _get_hook_manager(self):\n \"\"\"Get or create HookManager with DB persistence\"\"\"\n from .tell import HookManager, get_hook_manager, set_hook_manager\n manager = get_hook_manager()\n # Ensure it has DB path for persistence\n if manager._db_path is None:\n manager = HookManager(db_path=self.vfs.store.db_path)\n set_hook_manager(manager)\n return manager\n \n def _parse_path(self, path: str) -> tuple:\n \"\"\"\n Parse path into (real_path, virtual_suffix, query_params).\n \n Examples:\n /memory/note.md -> ('/memory/note.md', None, None)\n /memory/note.md:meta -> ('/memory/note.md', ':meta', None)\n /memory/:search?q=RSI -> ('/memory', ':search', {'q': 'RSI'})\n /@abc -> resolved shortcut path\n \"\"\"\n # Handle shortcut (@xxx) - check if any path component starts with @\n # e.g., /@abc or /memory/private/@abc\n # If path ends with @xxx/, resolve to parent directory\n parts = path.split('/')\n for i, part in enumerate(parts):\n if part.startswith('@') and len(part) > 1:\n shortcut = part[1:] # Remove @\n # Check for suffix on shortcut (e.g., @abc:meta)\n suffix_part = None\n for suffix in self.VIRTUAL_SUFFIXES:\n if shortcut.endswith(suffix):\n suffix_part = suffix\n shortcut = shortcut[:-len(suffix)]\n break\n # Resolve shortcut to real path\n real_path = self._resolve_shortcut(shortcut)\n if real_path:\n return (real_path, suffix_part, None)\n # Shortcut not found - return as-is for error handling\n return (path, None, None)\n \n # Check for query params\n if '?' in path:\n base, query_str = path.split('?', 1)\n params = {}\n for part in query_str.split('&'):\n if '=' in part:\n k, v = part.split('=', 1)\n params[k] = v\n else:\n base = path\n params = None\n \n # Check for virtual suffix (colon-prefixed, e.g., :meta)\n for suffix in self.VIRTUAL_SUFFIXES | self.VIRTUAL_DIR_FILES | self.VIRTUAL_QUERY_PATTERNS:\n if base.endswith(suffix):\n real_path = base[:-len(suffix)]\n if real_path.endswith('/'):\n real_path = real_path[:-1]\n return (real_path or '/', suffix, params)\n \n # Resolve /private/... shorthand → /memory/private/{agent_id}/...\n base = self._resolve_private(base)\n return (base, None, params)\n\n def _resolve_private(self, path: str) -> str:\n \"\"\"Map /private/... to /memory/private/{user}/... for the current agent.\"\"\"\n if self.user:\n if path.startswith(\"/private/\"):\n return f\"/memory/private/{self.user}/{path[len('/private/'):]}\"\n if path == \"/private\":\n return f\"/memory/private/{self.user}\"\n return path\n\n def _is_virtual(self, path: str) -> bool:\n \"\"\"Check if path is a virtual node.\"\"\"\n _, suffix, _ = self._parse_path(path)\n return suffix is not None\n \n def _resolve_shortcut(self, shortcut: str) -> str:\n \"\"\"Resolve shortcut to real path.\"\"\"\n # Search for node with this shortcut in meta\n nodes = self.vfs.store.list_nodes(\"/memory\", limit=1000)\n for node in nodes:\n if node.meta.get('shortcut') == shortcut:\n return node.path\n return None\n \n def _generate_shortcut(self, path: str) -> str:\n \"\"\"Generate a unique shortcut for a path.\"\"\"\n import hashlib\n # Use hash of path for consistent shortcuts\n h = hashlib.md5(path.encode()).hexdigest()[:3]\n # Check for collision\n existing = self._resolve_shortcut(h)\n if existing and existing != path:\n # Collision - extend hash\n h = hashlib.md5(path.encode()).hexdigest()[:4]\n return h\n \n def _can_see_shared(self, node) -> bool:\n \"\"\"Check if current agent can see this shared node.\"\"\"\n if not self.user:\n return True # Admin mode\n \n # Only filter /memory/shared/ paths\n if not node.path.startswith(\"/memory/shared/\"):\n return True\n \n # Check shared_with in metadata\n shared_with = node.meta.get(\"shared_with\", [])\n \n # Empty or contains \"all\" = everyone can see\n if not shared_with or \"all\" in shared_with:\n return True\n \n return self.user in shared_with\n \n def _get_virtual_content(self, real_path: str, suffix: str, params: dict, update_markers: bool = True) -> str:\n \"\"\"Generate content for virtual nodes.\"\"\"\n \n if suffix == ':data':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n return node.content or ''\n \n if suffix == ':path':\n # Return path relative to mount point (without leading /)\n rel_path = real_path.lstrip('/')\n return f\"{rel_path}\\n\"\n \n if suffix == ':delta':\n # Return diff since last read by this agent (read-only, doesn't update marker)\n if not self.user:\n return '(no agent context)\\n'\n \n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n current_version = node.version\n last_read = node.meta.get('last_read', {})\n last_version = last_read.get(self.user, 0)\n \n if last_version == 0:\n # First read - return full content\n if update_markers:\n last_read[self.user] = current_version\n node.meta['last_read'] = last_read\n self.vfs.store.put_node(node, save_diff=False)\n return f'# (first read, full content)\\n{node.content or \"\"}\\n'\n \n if last_version >= current_version:\n return '(no changes)\\n' # Already up to date, no marker update needed\n \n # Get diffs from last_version to current\n history = self.vfs.history(real_path, limit=100)\n \n # Collect diffs for versions > last_version\n diffs = []\n for h in reversed(history): # oldest first\n if h.version > last_version:\n if h.diff_content and h.change_type == 'update':\n diffs.append(f\"# v{h.version} ({h.changed_at.strftime('%Y-%m-%d %H:%M')})\\n{h.diff_content}\")\n \n if not diffs:\n result = f'(changed but no diff, v{last_version}→v{current_version})\\n'\n else:\n result = '\\n'.join(diffs) + '\\n'\n \n # Auto-mark as read after showing delta\n if update_markers:\n last_read[self.user] = current_version\n node.meta['last_read'] = last_read\n self.vfs.store.put_node(node, save_diff=False)\n \n return result\n \n if suffix == ':mark':\n # Show current read marker for this agent\n if not self.user:\n return '(no agent context)\\n'\n \n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n last_read = node.meta.get('last_read', {})\n last_version = last_read.get(self.user, 0)\n current_version = node.version\n \n return f'marked: v{last_version}, current: v{current_version}\\n'\n \n if suffix == ':ttl':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n expires_at = node.meta.get('expires_at')\n if not expires_at:\n return 'never\\n'\n from datetime import datetime\n try:\n exp_dt = datetime.fromisoformat(expires_at.replace('Z', '+00:00'))\n remaining = exp_dt - utcnow()\n if remaining.total_seconds() \u003c= 0:\n return 'expired\\n'\n # Format as human readable\n mins = int(remaining.total_seconds() / 60)\n if mins \u003c 60:\n return f'{mins}m\\n'\n hours = mins // 60\n if hours \u003c 24:\n return f'{hours}h {mins % 60}m\\n'\n days = hours // 24\n return f'{days}d {hours % 24}h\\n'\n except (ValueError, TypeError):\n return 'invalid\\n'\n \n if suffix == ':info':\n # List available virtual suffixes for this file\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n suffixes = [':data']\n if node.meta:\n suffixes.append(':meta')\n try:\n links = self.vfs.links(real_path, direction=\"both\")\n if links:\n suffixes.append(':links')\n except Exception:\n pass\n if node.meta.get('tags'):\n suffixes.append(':tags')\n if 'shared_with' in node.meta:\n suffixes.append(':shared')\n \n return '\\n'.join(suffixes) + '\\n'\n \n if suffix == ':meta':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n return json.dumps(node.meta, indent=2, default=str) + '\\n'\n \n elif suffix == ':links':\n try:\n edges = self.vfs.links(real_path, direction=\"both\")\n lines = []\n for edge in edges:\n target = edge.get('target') or edge.get('source', '?')\n rel_type = edge.get('type', 'related')\n lines.append(f\"{target} ({rel_type})\")\n return '\\n'.join(lines) + '\\n' if lines else '(no links)\\n'\n except Exception:\n return '(no links)\\n'\n \n elif suffix == ':tags':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n tags = node.meta.get('tags', [])\n return ','.join(tags) + '\\n' if tags else '\\n'\n \n elif suffix == ':shared':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n shared_with = node.meta.get('shared_with', [])\n if not shared_with:\n return 'all\\n'\n return ','.join(shared_with) + '\\n'\n \n elif suffix == ':history':\n history = self.vfs.history(real_path, limit=10)\n lines = []\n for h in history:\n ts = h.changed_at.strftime('%Y-%m-%d %H:%M') if h.changed_at else '?'\n change = h.change_type or 'update'\n ver = f\"v{h.version}\" if h.version else ''\n lines.append(f\"[{ts}] {change} {ver}\")\n return '\\n'.join(lines) + '\\n' if lines else '(no history)\\n'\n \n elif suffix == ':list':\n limit = int(params.get('limit', 50)) if params else 50\n offset = int(params.get('offset', 0)) if params else 0\n query = params.get('q', '') if params else ''\n \n tag_filter = params.get('tag', '') if params else ''\n \n if query:\n # Search mode: use full-text search\n results = self.vfs.search(query, limit=(limit + offset) * 5)\n nodes = [node for node, score in results]\n else:\n # List mode: get nodes from path\n nodes = self.vfs.list(real_path, limit=(limit + offset) * 5)\n \n # Filter by tag if specified\n if tag_filter:\n nodes = [n for n in nodes \n if tag_filter in n.meta.get('tags', [])]\n lines = []\n skipped = 0\n for node in nodes:\n # Filter by access permission first\n if not self._can_see_shared(node):\n continue\n # Then apply offset\n if skipped \u003c offset:\n skipped += 1\n continue\n # Stop at limit\n if len(lines) >= limit:\n break\n # Get or generate shortcut\n shortcut = node.meta.get('shortcut')\n if not shortcut:\n shortcut = self._generate_shortcut(node.path)\n # Store shortcut in meta\n node.meta['shortcut'] = shortcut\n self.vfs.write(node.path, node.content, meta=node.meta)\n # Get filename (truncate if too long)\n filename = node.path.split('/')[-1]\n if len(filename) > 30:\n filename = filename[:27] + '...'\n # Generate summary (first line, skip headers)\n content = node.content or ''\n summary = content.lstrip('#').strip()\n first_line = summary.split('\\n')[0][:40]\n if len(summary.split('\\n')[0]) > 40:\n first_line += '...'\n lines.append(f\"@{shortcut} {filename} {first_line}\")\n return '\\n'.join(lines) + '\\n' if lines else '\\n'\n \n elif suffix == ':stats':\n stats = self.vfs.stats()\n return json.dumps(stats, indent=2, default=str) + '\\n'\n\n elif suffix == ':help':\n agent = self.user or 'agent'\n return f\"\"\"\\\nAVM — AI Virtual Memory (agent: {agent})\n==========================================\n\nQUICK START\n cat /avm/:help This help\n cat /avm/:stats Storage statistics\n cat /avm/:inbox Incoming messages (tell)\n\nPRIVATE SPACE (only visible to you)\n ls /avm/private/ Your private files\n cat /avm/private/note.md Read a private file\n echo \"text\" > /avm/private/note.md Write a private file\n → Automatically stored as /memory/private/{agent}/note.md\n\nSHARED SPACE (all agents can read)\n ls /avm/memory/shared/ Shared files\n echo \"text\" > /avm/memory/shared/report.md\n\nVIRTUAL SUFFIXES (append to any file path)\n file.md:meta Metadata (JSON)\n file.md:tags Tags\n file.md:links Related nodes\n file.md:history Version history\n file.md:delta Changes since last read\n file.md:ttl Time-to-live\n file.md:shared Who can see this file (set with echo \"agent1,agent2\" > ...)\n\nDIRECTORY VIRTUAL FILES\n :list List all nodes under this path\n :stats Statistics\n :inbox Messages sent to you (tell system)\n :search?q=\u003cquery> Full-text search\n :recall?q=\u003cquery> Semantic recall (token-aware)\n :changes?minutes=5 Recent changes\n\nSHORTCUTS\n ls /avm/memory/ Shows @abc shortcuts next to filenames\n cat /avm/@abc Access file by shortcut\n\nCLI (outside FUSE)\n avm read /memory/note.md\n avm write /memory/note.md --content \"text\"\n avm recall \"NVDA risk\" --max-tokens 500\n avm search \"BTC\"\n avm mv /old/path /new/path\n avm-daemon status\n\"\"\"\n \n elif suffix == ':inbox':\n # Show all tells for this agent\n if not self.user:\n return '(no agent context)\\n'\n try:\n tell_store = self._get_tell_store()\n tells = tell_store.get_all(self.user, limit=50)\n \n # Check for mark=read param\n if params and params.get('mark') == 'read':\n tell_store.mark_all_read(self.user)\n return f'Marked {len([t for t in tells if not t.read_at])} messages as read.\\n'\n \n from .tell import format_inbox\n return format_inbox(tells, show_read=True)\n except Exception as e:\n return f'(tell system error: {e})\\n'\n \n elif suffix == ':search':\n query = params.get('q', '') if params else ''\n limit = int(params.get('limit', 10)) if params else 10\n # Use embedding + FTS hybrid when embedding is available\n es = getattr(self.vfs, '_embedding_store', None)\n if es is not None:\n sem_results = es.search(query, k=limit)\n fts_results = self.vfs.search(query, limit=limit)\n # Merge: embedding results first, then FTS results not already seen\n seen = set()\n merged = []\n for node, score in sem_results:\n seen.add(node.path)\n merged.append((node, score))\n for node, score in fts_results:\n if node.path not in seen:\n merged.append((node, score))\n results = merged[:limit]\n else:\n results = self.vfs.search(query, limit=limit)\n lines = []\n for node, score in results:\n lines.append(f\"[{score:.2f}] {node.path}\")\n return '\\n'.join(lines) + '\\n' if lines else '(no results)\\n'\n \n elif suffix == ':recall':\n query = params.get('q', '') if params else ''\n max_tokens = int(params.get('max_tokens', 4000)) if params else 4000\n if self.user:\n memory = self.vfs.agent_memory(self.user)\n return memory.recall(query, max_tokens=max_tokens)\n else:\n return '(no user context for recall)\\n'\n \n elif suffix == ':cold':\n # Show cold (decayed) memories\n from .advanced import MemoryDecay\n threshold = float(params.get('threshold', 0.3)) if params else 0.3\n half_life = float(params.get('half_life', 7.0)) if params else 7.0\n limit = int(params.get('limit', 20)) if params else 20\n \n decay = MemoryDecay(self.vfs.store, half_life_days=half_life)\n cold = decay.get_cold_memories(prefix=real_path or '/memory', threshold=threshold, limit=limit)\n \n if not cold:\n return '(no cold memories)\\n'\n \n lines = [f\"# Cold memories (importance × decay \u003c {threshold})\", \"\"]\n for node in cold:\n importance = node.meta.get(\"importance\", 0.5)\n decay_factor = decay.calculate_decay(node)\n score = importance * decay_factor\n lines.append(f\"{node.path}\")\n lines.append(f\" score={score:.3f} (imp={importance:.2f} × dec={decay_factor:.2f})\")\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':archive':\n # Archive cold memories (requires user context)\n if not self.user:\n return '(no agent context - archive requires user)\\n'\n \n from .advanced import MemoryDecay\n threshold = float(params.get('threshold', 0.2)) if params else 0.2\n half_life = float(params.get('half_life', 7.0)) if params else 7.0\n limit = int(params.get('limit', 10)) if params else 10\n dry_run = params.get('dry_run', 'true').lower() == 'true' if params else True\n \n decay = MemoryDecay(self.vfs.store, half_life_days=half_life)\n cold = decay.get_cold_memories(prefix=real_path or '/memory', threshold=threshold, limit=limit)\n \n if not cold:\n return '(no cold memories to archive)\\n'\n \n if dry_run:\n lines = [f\"# Would archive {len(cold)} memories (dry_run=true)\", \"\"]\n for node in cold:\n archive_path = node.path.replace(\"/memory/\", \"/archive/\", 1)\n lines.append(f\"{node.path} → {archive_path}\")\n return '\\n'.join(lines) + '\\n'\n \n # Actually archive\n archived = []\n for node in cold:\n archive_path = node.path.replace(\"/memory/\", \"/archive/\", 1)\n node.meta['archived_by'] = self.user\n node.meta['archived_at'] = utcnow().isoformat()\n self.vfs.write(archive_path, node.content, meta=node.meta)\n self.vfs.store.delete_node(node.path)\n archived.append((node.path, archive_path))\n \n lines = [f\"# Archived {len(archived)} memories\", \"\"]\n for src, dst in archived:\n lines.append(f\"{src} → {dst}\")\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':decay':\n # Show decay status for a specific file\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n from .advanced import MemoryDecay\n half_life = float(params.get('half_life', 7.0)) if params else 7.0\n decay = MemoryDecay(self.vfs.store, half_life_days=half_life)\n \n importance = node.meta.get(\"importance\", 0.5)\n decay_factor = decay.calculate_decay(node)\n score = importance * decay_factor\n \n lines = [\n f\"path: {node.path}\",\n f\"importance: {importance:.2f}\",\n f\"decay_factor: {decay_factor:.3f}\",\n f\"effective_score: {score:.3f}\",\n f\"half_life: {half_life} days\",\n f\"updated_at: {node.updated_at}\",\n ]\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':subscriptions':\n # List subscriptions for current agent\n if not self.user:\n return '(no agent context)\\n'\n \n from .subscriptions import get_subscription_store\n store = get_subscription_store()\n subs = store.list_subscriptions(agent_id=self.user)\n \n if not subs:\n return '(no subscriptions)\\n'\n \n lines = [\"# Subscriptions\", \"\"]\n for s in subs:\n mode_info = s.mode.value\n if s.mode.value == \"throttled\":\n mode_info += f\" ({s.throttle_seconds}s)\"\n lines.append(f\"{s.pattern} [{mode_info}]\")\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':pending':\n # Show pending notifications\n if not self.user:\n return '(no agent context)\\n'\n \n from .subscriptions import get_subscription_store\n store = get_subscription_store()\n \n mark = params.get('mark', '') == 'read' if params else False\n pending = store.get_pending(self.user, mark_delivered=mark)\n \n if not pending:\n return '(no pending notifications)\\n'\n \n lines = [f\"# Pending ({len(pending)})\", \"\"]\n for p in pending:\n lines.append(f\"[{p['timestamp'][:16]}] {p['path']}\")\n if mark:\n lines.append(f\"\\n(marked {len(pending)} as delivered)\")\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':feed':\n # Show recent activity feed\n from .advanced import AccessStats\n \n limit = int(params.get('limit', 20)) if params else 20\n stats = AccessStats(self.vfs.store)\n \n # Get recent activity across all agents\n with sqlite3.connect(stats.db_path) as conn:\n rows = conn.execute(\"\"\"\n SELECT path, agent_id, access_type, timestamp\n FROM access_log\n ORDER BY timestamp DESC\n LIMIT ?\n \"\"\", (limit,)).fetchall()\n \n if not rows:\n return '(no recent activity)\\n'\n \n lines = [\"# Activity Feed\", \"\"]\n for path, agent, access_type, ts in rows:\n ts_short = ts[11:16] if len(ts) > 16 else ts\n agent_display = agent or \"unknown\"\n lines.append(f\"[{ts_short}] {agent_display} {access_type} {path}\")\n return '\\n'.join(lines) + '\\n'\n \n elif suffix == ':changes':\n # Return recently modified files\n # :changes?since=ISO_TIMESTAMP or :changes?minutes=N\n since = params.get('since', '') if params else ''\n minutes = int(params.get('minutes', 60)) if params else 60\n limit = int(params.get('limit', 20)) if params else 20\n \n from datetime import datetime, timedelta\n \n if since:\n try:\n since_dt = datetime.fromisoformat(since.replace('Z', '+00:00'))\n except ValueError:\n since_dt = utcnow() - timedelta(minutes=minutes)\n else:\n since_dt = utcnow() - timedelta(minutes=minutes)\n \n # Get all nodes and filter by updated_at\n nodes = self.vfs.list(real_path, limit=500)\n changed = []\n for node in nodes:\n if not self._can_see_shared(node):\n continue\n try:\n updated = node.updated_at\n if updated and updated >= since_dt:\n changed.append((node, updated))\n except (AttributeError, TypeError):\n pass\n \n # Sort by update time (newest first)\n changed.sort(key=lambda x: x[1], reverse=True)\n \n lines = []\n for node, updated in changed[:limit]:\n shortcut = node.meta.get('shortcut', '???')\n filename = node.path.split('/')[-1]\n if len(filename) > 25:\n filename = filename[:22] + '...'\n time_str = updated.strftime('%H:%M')\n lines.append(f\"@{shortcut} {time_str} {filename}\")\n \n if not lines:\n return '(no changes)\\n'\n return '\\n'.join(lines) + '\\n'\n \n return ''\n \n def _set_virtual_content(self, real_path: str, suffix: str, content: str) -> bool:\n \"\"\"Set content for writable virtual nodes.\"\"\"\n \n if suffix == ':tags':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n tags = [t.strip() for t in content.strip().split(',') if t.strip()]\n node.meta['tags'] = tags\n self.vfs.write(real_path, node.content, meta=node.meta)\n return True\n \n elif suffix == ':meta':\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n try:\n new_meta = json.loads(content)\n node.meta.update(new_meta)\n self.vfs.write(real_path, node.content, meta=node.meta)\n return True\n except json.JSONDecodeError:\n raise FuseOSError(errno.EINVAL)\n \n elif suffix == ':links':\n # Format: target_path relation_type\n lines = content.strip().split('\\n')\n for line in lines:\n if not line.strip():\n continue\n parts = line.split()\n if len(parts) >= 1:\n target = parts[0]\n rel_type = parts[1] if len(parts) > 1 else 'related'\n self.vfs.link(real_path, target, rel_type)\n return True\n \n elif suffix == ':mark':\n # Update read marker to current version\n if not self.user:\n raise FuseOSError(errno.EACCES)\n \n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n last_read = node.meta.get('last_read', {})\n last_read[self.user] = node.version\n node.meta['last_read'] = last_read\n \n # Write without triggering diff (content unchanged)\n self.vfs.store.put_node(node, save_diff=False)\n return True\n \n elif suffix == ':ttl':\n # Format: Nm (minutes), Nh (hours), Nd (days), or \"never\"\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n ttl_str = content.strip().lower()\n from datetime import datetime, timedelta\n \n if ttl_str == 'never' or not ttl_str:\n if 'expires_at' in node.meta:\n del node.meta['expires_at']\n else:\n # Parse duration\n try:\n if ttl_str.endswith('m'):\n minutes = int(ttl_str[:-1])\n delta = timedelta(minutes=minutes)\n elif ttl_str.endswith('h'):\n hours = int(ttl_str[:-1])\n delta = timedelta(hours=hours)\n elif ttl_str.endswith('d'):\n days = int(ttl_str[:-1])\n delta = timedelta(days=days)\n else:\n # Assume minutes\n delta = timedelta(minutes=int(ttl_str))\n \n expires_at = utcnow() + delta\n node.meta['expires_at'] = expires_at.isoformat()\n except ValueError:\n raise FuseOSError(errno.EINVAL)\n \n self.vfs.write(real_path, node.content, meta=node.meta)\n return True\n \n elif suffix == ':shared':\n # Format: agent1,agent2,... or \"all\"\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n # Only creator can modify shared_with\n creator = node.meta.get('created_by')\n if creator and self.user and creator != self.user:\n raise FuseOSError(errno.EACCES)\n \n agents = content.strip()\n if agents == 'all' or not agents:\n node.meta['shared_with'] = []\n else:\n node.meta['shared_with'] = [a.strip() for a in agents.split(',')]\n \n # Record creator if not set\n if not creator and self.user:\n node.meta['created_by'] = self.user\n \n self.vfs.write(real_path, node.content, meta=node.meta)\n return True\n \n return False\n \n # ─── FUSE Operations ─────────────────────────────────\n \n def getattr(self, path, fh=None):\n \"\"\"Get file attributes.\"\"\"\n now = datetime.now().timestamp()\n \n # Skip macOS special files\n basename = os.path.basename(path)\n if basename.startswith('._') or basename in ('.DS_Store', '.localized'):\n raise FuseOSError(errno.ENOENT)\n \n real_path, suffix, params = self._parse_path(path)\n \n # Root directory\n if path == '/':\n return {\n 'st_mode': stat.S_IFDIR | 0o755,\n 'st_nlink': 2,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n # Virtual node\n if suffix:\n try:\n # Don't update markers in getattr (only in read)\n content = self._get_virtual_content(real_path, suffix, params, update_markers=False)\n return {\n 'st_mode': stat.S_IFREG | 0o644,\n 'st_nlink': 1,\n 'st_size': len(content.encode('utf-8')),\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n except Exception:\n raise FuseOSError(errno.ENOENT)\n \n # Real node\n node = self.vfs.read(real_path)\n if node:\n size = len(node.content.encode('utf-8')) if node.content else 0\n mtime = now\n if 'updated_at' in node.meta:\n try:\n mtime = datetime.fromisoformat(node.meta['updated_at'].replace('Z', '+00:00')).timestamp()\n except (ValueError, AttributeError):\n pass\n \n return {\n 'st_mode': stat.S_IFREG | 0o644,\n 'st_nlink': 1,\n 'st_size': size,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': mtime,\n 'st_ctime': mtime,\n }\n \n # Handle /tell/ paths for cross-agent messaging\n if real_path.startswith('/tell/'):\n # /tell/\u003cagent> - writable file for sending messages\n return {\n 'st_mode': stat.S_IFREG | 0o644,\n 'st_nlink': 1,\n 'st_size': 0,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n if real_path == '/tell':\n # /tell directory\n return {\n 'st_mode': stat.S_IFDIR | 0o755,\n 'st_nlink': 2,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n # Handle /hooks/ paths for hook configuration\n if real_path.startswith('/hooks/'):\n agent_id = real_path.split('/')[-1]\n if agent_id and agent_id != ':list':\n # /hooks/\u003cagent> - readable/writable hook config\n manager = self._get_hook_manager()\n content = manager.format_hook(agent_id)\n return {\n 'st_mode': stat.S_IFREG | 0o644,\n 'st_nlink': 1,\n 'st_size': len(content.encode('utf-8')) if content else 0,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n if real_path == '/hooks':\n # /hooks directory\n return {\n 'st_mode': stat.S_IFDIR | 0o755,\n 'st_nlink': 2,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n # Check if it's a directory (prefix with children)\n children = self.vfs.list(real_path, limit=1)\n if children or real_path in ('/', '/memory', '/memory/private', '/memory/shared'):\n return {\n 'st_mode': stat.S_IFDIR | 0o755,\n 'st_nlink': 2,\n 'st_uid': os.getuid(),\n 'st_gid': os.getgid(),\n 'st_atime': now,\n 'st_mtime': now,\n 'st_ctime': now,\n }\n \n raise FuseOSError(errno.ENOENT)\n \n def opendir(self, path):\n \"\"\"Open directory.\"\"\"\n return 0\n \n def releasedir(self, path, fh):\n \"\"\"Release directory.\"\"\"\n return 0\n \n def readdir(self, path, fh):\n \"\"\"List directory contents.\"\"\"\n real_path, _, _ = self._parse_path(path)\n \n entries = ['.', '..']\n \n # Add virtual directory files\n entries.extend([':list', ':stats', ':inbox', ':help'])\n \n # Add /tell and /hooks directories at root\n if real_path == '/':\n entries.append('tell')\n entries.append('hooks')\n \n # List hooks in /hooks directory\n if real_path == '/hooks':\n try:\n manager = self._get_hook_manager()\n for agent_id in manager.list_hooks().keys():\n entries.append(agent_id)\n except Exception:\n pass\n \n # Add real children\n nodes = self.vfs.list(real_path)\n seen = set()\n \n for node in nodes:\n # Filter by shared_with permission\n if not self._can_see_shared(node):\n continue\n \n # Get relative name\n if node.path.startswith(real_path):\n rel = node.path[len(real_path):].lstrip('/')\n # Only first component (immediate children)\n name = rel.split('/')[0]\n if name and name not in seen:\n seen.add(name)\n entries.append(name)\n # Add virtual suffixes for files (on-demand)\n if '.' in name: # Likely a file\n # :meta only if has metadata beyond system fields\n if node.meta:\n entries.append(f\"{name}:meta\")\n # :links only if has links\n try:\n links = self.vfs.links(node.path, direction=\"both\")\n if links:\n entries.append(f\"{name}:links\")\n except Exception:\n pass\n # :tags only if has tags\n if node.meta.get('tags'):\n entries.append(f\"{name}:tags\")\n # :shared only if shared_with set\n if 'shared_with' in node.meta:\n entries.append(f\"{name}:shared\")\n \n return entries\n \n def read(self, path, size, offset, fh):\n \"\"\"Read file content.\"\"\"\n real_path, suffix, params = self._parse_path(path)\n \n # Handle /hooks/\u003cagent> reads\n if real_path.startswith('/hooks/'):\n agent_id = real_path.split('/')[-1]\n if agent_id == ':list':\n # List all hooks\n manager = self._get_hook_manager()\n hooks = manager.list_hooks()\n lines = [\"# Registered Hooks\", \"\"]\n for aid, hook in hooks.items():\n lines.append(f\"- {aid}: {manager.format_hook(aid)}\")\n content = \"\\n\".join(lines) + \"\\n\" if hooks else \"# No hooks registered\\n\"\n else:\n manager = self._get_hook_manager()\n content = manager.format_hook(agent_id)\n if not content:\n content = \"# No hook configured\\n\"\n encoded = content.encode('utf-8')\n return encoded[offset:offset + size]\n \n if suffix:\n content = self._get_virtual_content(real_path, suffix, params)\n else:\n node = self.vfs.read(real_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n # Check shared permission\n if not self._can_see_shared(node):\n raise FuseOSError(errno.EACCES)\n # Check TTL expiration\n expires_at = node.meta.get('expires_at')\n if expires_at:\n from datetime import datetime\n try:\n exp_dt = datetime.fromisoformat(expires_at.replace('Z', '+00:00'))\n if utcnow() >= exp_dt:\n raise FuseOSError(errno.ENOENT) # Expired = not found\n except (ValueError, TypeError):\n pass\n \n # Auto-mark as read for shared files\n if self.user and '/shared/' in real_path:\n last_read = node.meta.get('last_read', {})\n if last_read.get(self.user) != node.version:\n last_read[self.user] = node.version\n node.meta['last_read'] = last_read\n self.vfs.store.put_node(node, save_diff=False)\n \n content = node.content or ''\n \n # Inject urgent tells at the beginning (only on first read, offset=0)\n if offset == 0 and self.user:\n content = self._inject_urgent_tells(content)\n \n encoded = content.encode('utf-8')\n return encoded[offset:offset + size]\n \n def _inject_urgent_tells(self, content: str) -> str:\n \"\"\"Inject urgent unread tells at the beginning of content\"\"\"\n try:\n tell_store = self._get_tell_store()\n urgent_tells = tell_store.get_urgent_unread(self.user)\n \n if urgent_tells:\n from .tell import format_tells_for_injection\n header = format_tells_for_injection(urgent_tells)\n \n # Mark as read after injection\n tell_store.mark_read([t.id for t in urgent_tells])\n \n return header + content\n except Exception:\n # Don't break reads if tell system fails\n pass\n \n return content\n \n def write(self, path, data, offset, fh):\n \"\"\"Write to file.\"\"\"\n real_path, suffix, _ = self._parse_path(path)\n \n # Buffer writes - load existing content if not already buffered\n if fh not in self._write_buffers:\n if not suffix:\n node = self.vfs.read(real_path)\n if node and node.content:\n self._write_buffers[fh] = node.content.encode('utf-8')\n else:\n self._write_buffers[fh] = b''\n else:\n self._write_buffers[fh] = b''\n \n # Handle offset\n buf = self._write_buffers[fh]\n if offset \u003c len(buf):\n # Insert/overwrite at position\n buf = buf[:offset] + data + buf[offset + len(data):]\n elif offset == len(buf):\n # Append at end\n buf = buf + data\n else:\n # Gap - fill with spaces (not nulls)\n buf = buf + b' ' * (offset - len(buf)) + data\n \n self._write_buffers[fh] = buf\n return len(data)\n \n def create(self, path, mode, fi=None):\n \"\"\"Create a new file.\"\"\"\n # Check for reserved @ prefix\n filename = path.split('/')[-1]\n if filename.startswith('@'):\n raise FuseOSError(errno.EINVAL) # Invalid argument - @ is reserved\n \n real_path, suffix, _ = self._parse_path(path)\n \n self.fd += 1\n self._open_files[self.fd] = path\n self._write_buffers[self.fd] = b''\n \n if not suffix:\n # Create empty node with creator metadata\n meta = {}\n if self.user:\n meta['created_by'] = self.user\n self.vfs.write(real_path, '', meta=meta)\n \n return self.fd\n \n def open(self, path, flags):\n \"\"\"Open a file.\"\"\"\n import os as _os\n self.fd += 1\n self._open_files[self.fd] = path\n \n # Handle O_APPEND: pre-load existing content to buffer\n if flags & _os.O_APPEND:\n real_path, suffix, _ = self._parse_path(path)\n if not suffix:\n node = self.vfs.read(real_path)\n if node and node.content:\n self._write_buffers[self.fd] = node.content.encode('utf-8')\n \n return self.fd\n \n def release(self, path, fh):\n \"\"\"Close a file and flush writes.\"\"\"\n if fh in self._write_buffers and self._write_buffers[fh]:\n real_path, suffix, params = self._parse_path(path)\n content = self._write_buffers[fh].decode('utf-8', errors='replace')\n \n # Handle /tell/\u003cagent> paths for cross-agent messaging\n if real_path.startswith('/tell/'):\n self._handle_tell_write(real_path, content, params)\n # Handle /hooks/\u003cagent> paths for hook configuration\n elif real_path.startswith('/hooks/'):\n self._handle_hook_write(real_path, content)\n elif suffix:\n self._set_virtual_content(real_path, suffix, content)\n else:\n # Preserve existing meta or create new with creator\n existing = self.vfs.read(real_path)\n if existing:\n meta = existing.meta\n else:\n meta = {}\n if self.user:\n meta['created_by'] = self.user\n self.vfs.write(real_path, content, meta=meta)\n \n self._write_buffers.pop(fh, None)\n self._open_files.pop(fh, None)\n return 0\n \n def _handle_tell_write(self, path: str, content: str, params: dict):\n \"\"\"Handle writes to /tell/\u003cagent> paths\"\"\"\n if not self.user:\n return # No sender context\n \n # Parse path: /tell/agentname or /tell/@all\n parts = path.strip('/').split('/')\n if len(parts) \u003c 2:\n return\n \n to_agent = parts[1]\n \n # Parse priority from params or path\n from .tell import TellPriority\n priority_str = 'normal'\n if params:\n priority_str = params.get('priority', 'normal')\n \n try:\n priority = TellPriority(priority_str)\n except ValueError:\n priority = TellPriority.NORMAL\n \n # Parse optional expiration\n expires_at = params.get('expires') if params else None\n \n # Send the tell\n try:\n tell_store = self._get_tell_store()\n tell_store.send(\n from_agent=self.user,\n to_agent=to_agent,\n content=content.strip(),\n priority=priority,\n expires_at=expires_at\n )\n except Exception:\n pass # Don't break writes if tell fails\n \n def _handle_hook_write(self, path: str, content: str):\n \"\"\"Handle writes to /hooks/\u003cagent> paths\"\"\"\n # Parse path: /hooks/agentname\n parts = path.strip('/').split('/')\n if len(parts) \u003c 2:\n return\n \n agent_id = parts[1]\n content = content.strip()\n \n manager = self._get_hook_manager()\n \n if not content:\n # Empty content = delete hook\n manager.unregister(agent_id)\n return\n \n # Parse hook string\n hook = manager.parse_hook_string(content)\n if hook:\n manager.register(agent_id, hook)\n \n def truncate(self, path, length, fh=None):\n \"\"\"Truncate file.\"\"\"\n real_path, suffix, _ = self._parse_path(path)\n \n if suffix:\n return 0 # Virtual files don't really truncate\n \n node = self.vfs.read(real_path)\n if node:\n content = node.content[:length] if node.content else ''\n self.vfs.write(real_path, content)\n \n return 0\n \n def unlink(self, path):\n \"\"\"Delete a file.\"\"\"\n real_path, suffix, _ = self._parse_path(path)\n \n if suffix:\n raise FuseOSError(errno.EPERM) # Can't delete virtual files\n \n # Handle /hooks/\u003cagent> deletion\n if real_path.startswith('/hooks/'):\n agent_id = real_path.split('/')[-1]\n manager = self._get_hook_manager()\n manager.unregister(agent_id)\n return\n \n if not self.vfs.delete(real_path):\n raise FuseOSError(errno.ENOENT)\n \n def mkdir(self, path, mode):\n \"\"\"Create directory (no-op for VFS).\"\"\"\n # VFS doesn't have real directories\n return 0\n \n def rmdir(self, path):\n \"\"\"Remove directory.\"\"\"\n # Check if empty\n nodes = self.vfs.list(path, limit=1)\n if nodes:\n raise FuseOSError(errno.ENOTEMPTY)\n return 0\n \n def rename(self, old, new):\n \"\"\"Rename/move a file.\"\"\"\n old_path, old_suffix, _ = self._parse_path(old)\n new_path, new_suffix, _ = self._parse_path(new)\n \n if old_suffix or new_suffix:\n raise FuseOSError(errno.EPERM)\n \n node = self.vfs.read(old_path)\n if not node:\n raise FuseOSError(errno.ENOENT)\n \n self.vfs.write(new_path, node.content, meta=node.meta)\n self.vfs.delete(old_path)\n return 0\n \n def chmod(self, path, mode):\n \"\"\"Change permissions (no-op).\"\"\"\n return 0\n \n def chown(self, path, uid, gid):\n \"\"\"Change ownership (no-op).\"\"\"\n return 0\n \n def utimens(self, path, times=None):\n \"\"\"Update timestamps (no-op).\"\"\"\n return 0\n\n\nimport signal\nimport subprocess\nimport sys\n\n# PID file location\ndef _pid_file(mountpoint: str) -> Path:\n \"\"\"Get PID file path for a mountpoint.\"\"\"\n safe_name = mountpoint.replace('/', '_').strip('_')\n return Path.home() / '.local' / 'share' / 'avm' / 'mounts' / f'{safe_name}.pid'\n\n\ndef _is_mounted(mountpoint: str) -> bool:\n \"\"\"Check if mountpoint is currently mounted.\"\"\"\n try:\n # Use /sbin/mount for macOS compatibility\n mount_cmd = '/sbin/mount' if os.path.exists('/sbin/mount') else 'mount'\n result = subprocess.run([mount_cmd], capture_output=True, text=True)\n # Handle /tmp -> /private/tmp symlink on macOS\n return mountpoint in result.stdout or mountpoint.replace('/tmp/', '/private/tmp/') in result.stdout\n except Exception:\n return False\n\n\ndef _get_pid(mountpoint: str) -> Optional[int]:\n \"\"\"Get PID of mount process.\"\"\"\n pid_file = _pid_file(mountpoint)\n if pid_file.exists():\n try:\n return int(pid_file.read_text().strip())\n except (ValueError, IOError):\n pass\n return None\n\n\ndef _write_pid(mountpoint: str, pid: int):\n \"\"\"Write PID file.\"\"\"\n pid_file = _pid_file(mountpoint)\n pid_file.parent.mkdir(parents=True, exist_ok=True)\n pid_file.write_text(str(pid))\n\n\ndef _remove_pid(mountpoint: str):\n \"\"\"Remove PID file.\"\"\"\n pid_file = _pid_file(mountpoint)\n if pid_file.exists():\n pid_file.unlink()\n\n\ndef cmd_mount(args):\n \"\"\"Mount AVM filesystem.\"\"\"\n if not HAS_FUSE:\n print(\"Error: fusepy not installed. Run: pip install fusepy\")\n print(\"Also ensure FUSE is installed:\")\n print(\" macOS: brew install macfuse\")\n print(\" Linux: apt install fuse3\")\n return 1\n \n mountpoint = Path(args.mountpoint).resolve()\n mountpoint.mkdir(parents=True, exist_ok=True)\n \n if _is_mounted(str(mountpoint)):\n print(f\"Already mounted: {mountpoint}\")\n return 1\n \n from . import AVM\n from .config import AVMConfig\n \n config = AVMConfig(db_path=args.db) if args.db else None\n \n if args.daemon:\n # Fork to background\n pid = os.fork()\n if pid > 0:\n # Parent\n _write_pid(str(mountpoint), pid)\n print(f\"Mounted: {mountpoint} (pid={pid})\")\n return 0\n \n # Child - detach\n os.setsid()\n \n # Redirect stdio\n sys.stdin = open(os.devnull, 'r')\n sys.stdout = open(os.devnull, 'w')\n sys.stderr = open(os.devnull, 'w')\n \n # Create AVM AFTER fork (SQLite connections can't cross fork)\n avm = AVM(config=config, agent_id=args.agent)\n \n if not args.daemon:\n print(f\"Mounting AVM at {mountpoint}\")\n print(f\"Agent: {args.agent or '(none)'}\")\n print(f\"Database: {avm.store.db_path}\")\n print(\"Press Ctrl+C to unmount\")\n \n try:\n FUSE(\n AVMFuse(avm, args.agent),\n str(mountpoint),\n foreground=not args.daemon,\n allow_other=False,\n nothreads=True,\n )\n finally:\n if args.daemon:\n _remove_pid(str(mountpoint))\n \n return 0\n\n\ndef cmd_stop(args):\n \"\"\"Stop mounted AVM filesystem.\"\"\"\n mountpoint = Path(args.mountpoint).resolve()\n \n if not _is_mounted(str(mountpoint)):\n print(f\"Not mounted: {mountpoint}\")\n _remove_pid(str(mountpoint))\n return 1\n \n pid = _get_pid(str(mountpoint))\n \n # Try umount first\n try:\n if sys.platform == 'darwin':\n subprocess.run(['umount', str(mountpoint)], check=True)\n else:\n subprocess.run(['fusermount', '-u', str(mountpoint)], check=True)\n _remove_pid(str(mountpoint))\n print(f\"Stopped: {mountpoint}\")\n return 0\n except subprocess.CalledProcessError:\n pass\n \n # Kill process if umount failed\n if pid:\n try:\n os.kill(pid, signal.SIGTERM)\n _remove_pid(str(mountpoint))\n print(f\"Stopped: {mountpoint} (killed pid={pid})\")\n return 0\n except ProcessLookupError:\n _remove_pid(str(mountpoint))\n \n print(f\"Failed to stop: {mountpoint}\")\n return 1\n\n\ndef cmd_status(args):\n \"\"\"Show mount status.\"\"\"\n pid_dir = Path.home() / '.local' / 'share' / 'avm' / 'mounts'\n \n if not pid_dir.exists():\n print(\"No mounts.\")\n return 0\n \n found = False\n for pid_file in pid_dir.glob('*.pid'):\n mountpoint = '/' + pid_file.stem.replace('_', '/')\n pid = None\n try:\n pid = int(pid_file.read_text().strip())\n except (ValueError, IOError):\n pass\n \n mounted = _is_mounted(mountpoint)\n running = False\n if pid:\n try:\n os.kill(pid, 0)\n running = True\n except ProcessLookupError:\n pass\n \n status = \"mounted\" if mounted else (\"running\" if running else \"stale\")\n print(f\"{mountpoint}: {status} (pid={pid})\")\n found = True\n \n if not found:\n print(\"No mounts.\")\n \n return 0\n\n\ndef cmd_restart(args):\n \"\"\"Restart mounted AVM filesystem.\"\"\"\n # Get current settings from pid file or args\n mountpoint = Path(args.mountpoint).resolve()\n \n # Stop if running\n if _is_mounted(str(mountpoint)) or _get_pid(str(mountpoint)):\n cmd_stop(args)\n import time\n time.sleep(0.5) # Wait for cleanup\n \n # Start again\n args.daemon = True\n return cmd_mount(args)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":58798,"content_sha256":"58a7f1f65d62fc4ab22cce74c590f258a1f0c0f6d66848bafc1e2ad18081c9cf"},{"filename":"avm/gossip.py","content":"\"\"\"\navm/gossip.py - Agent Gossip Protocol\n\nDecentralized knowledge discovery without a central Librarian.\n\nEach agent maintains a digest of \"what I know\" (topics, capabilities).\nAgents periodically exchange digests to discover each other.\n\nArchitecture:\n ┌─────────┐ ┌─────────┐ ┌─────────┐\n │ Agent A │◀─────▶ │ Agent B │◀─────▶ │ Agent C │\n │ digest │ gossip │ digest │ gossip │ digest │\n └─────────┘ └─────────┘ └─────────┘\n \n Each agent knows:\n - Its own topics (from TopicIndex)\n - Other agents' topics (from gossip)\n - How to reach other agents (tell paths)\n\nBenefits over Librarian:\n- No single point of failure\n- O(1) local queries (\"who knows X?\")\n- Eventual consistency via gossip\n- Privacy: only share topics, not content\n\nProtocol:\n1. Agent A generates digest (topic bloom filter + version)\n2. Agent A broadcasts digest to /gossip/{agent_id}.digest\n3. Other agents read digests periodically\n4. To query: check local digest cache, filter by topic overlap\n\"\"\"\n\nimport hashlib\nimport time\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom .utils import utcnow\nfrom typing import List, Dict, Set, Optional, Tuple\nfrom collections import defaultdict\nimport json\n\nfrom .store import AVMStore\nfrom .topic_index import TopicIndex\n\n\n# Bloom filter parameters\nBLOOM_SIZE = 1024 # bits\nBLOOM_HASHES = 3\n\n\n@dataclass\nclass AgentDigest:\n \"\"\"\n Compact representation of what an agent knows.\n \n Uses a bloom filter for space-efficient topic membership testing.\n \"\"\"\n agent_id: str\n version: int # Incremented on each update\n timestamp: datetime\n \n # Topic bloom filter (compact)\n bloom: bytes = field(default_factory=lambda: bytes(BLOOM_SIZE // 8))\n \n # Topic list (for debugging/display, optional)\n topics: List[str] = field(default_factory=list)\n \n # Metadata\n memory_count: int = 0\n capabilities: List[str] = field(default_factory=list)\n \n def add_topic(self, topic: str):\n \"\"\"Add a topic to the bloom filter\"\"\"\n self.topics.append(topic)\n bloom_array = bytearray(self.bloom)\n \n for i in range(BLOOM_HASHES):\n h = hashlib.md5(f\"{topic}:{i}\".encode()).digest()\n bit_index = int.from_bytes(h[:4], 'big') % BLOOM_SIZE\n byte_index = bit_index // 8\n bit_offset = bit_index % 8\n bloom_array[byte_index] |= (1 \u003c\u003c bit_offset)\n \n self.bloom = bytes(bloom_array)\n \n def might_have_topic(self, topic: str) -> bool:\n \"\"\"Check if this agent might know about a topic (bloom filter query)\"\"\"\n for i in range(BLOOM_HASHES):\n h = hashlib.md5(f\"{topic}:{i}\".encode()).digest()\n bit_index = int.from_bytes(h[:4], 'big') % BLOOM_SIZE\n byte_index = bit_index // 8\n bit_offset = bit_index % 8\n \n if not (self.bloom[byte_index] & (1 \u003c\u003c bit_offset)):\n return False\n \n return True # Might have (false positives possible)\n \n def to_dict(self) -> Dict:\n return {\n \"agent_id\": self.agent_id,\n \"version\": self.version,\n \"timestamp\": self.timestamp.isoformat(),\n \"bloom\": self.bloom.hex(),\n \"topics\": self.topics[:20], # Limit for size\n \"memory_count\": self.memory_count,\n \"capabilities\": self.capabilities,\n }\n \n @classmethod\n def from_dict(cls, d: Dict) -> \"AgentDigest\":\n return cls(\n agent_id=d[\"agent_id\"],\n version=d[\"version\"],\n timestamp=datetime.fromisoformat(d[\"timestamp\"]),\n bloom=bytes.fromhex(d.get(\"bloom\", \"00\" * (BLOOM_SIZE // 8))),\n topics=d.get(\"topics\", []),\n memory_count=d.get(\"memory_count\", 0),\n capabilities=d.get(\"capabilities\", []),\n )\n\n\n@dataclass\nclass GossipMessage:\n \"\"\"A gossip message exchanged between agents\"\"\"\n from_agent: str\n digest: AgentDigest\n ttl: int = 3 # Hops before expiry\n \n def to_dict(self) -> Dict:\n return {\n \"from_agent\": self.from_agent,\n \"digest\": self.digest.to_dict(),\n \"ttl\": self.ttl,\n }\n \n @classmethod\n def from_dict(cls, d: Dict) -> \"GossipMessage\":\n return cls(\n from_agent=d[\"from_agent\"],\n digest=AgentDigest.from_dict(d[\"digest\"]),\n ttl=d.get(\"ttl\", 3),\n )\n\n\nclass GossipStore:\n \"\"\"\n Storage for agent digests.\n \n Stores digests in /gossip/{agent_id}.digest for sharing.\n Maintains local cache of all known agent digests.\n \"\"\"\n \n GOSSIP_PREFIX = \"/gossip\"\n \n def __init__(self, store: AVMStore, topic_index: TopicIndex, agent_id: str):\n self.store = store\n self.topic_index = topic_index\n self.agent_id = agent_id\n \n # Local cache of agent digests\n self._digest_cache: Dict[str, AgentDigest] = {}\n self._version = 0\n \n # Load existing digests\n self._load_digests()\n \n def _load_digests(self):\n \"\"\"Load all digests from storage\"\"\"\n try:\n nodes = self.store.list_nodes(self.GOSSIP_PREFIX, limit=1000)\n for node in nodes:\n if node.path.endswith(\".digest\"):\n try:\n data = json.loads(node.content or \"{}\")\n digest = AgentDigest.from_dict(data)\n self._digest_cache[digest.agent_id] = digest\n except Exception:\n pass\n except Exception:\n pass\n \n def generate_digest(self) -> AgentDigest:\n \"\"\"Generate current agent's digest from TopicIndex\"\"\"\n self._version += 1\n \n digest = AgentDigest(\n agent_id=self.agent_id,\n version=self._version,\n timestamp=utcnow(),\n )\n \n # Add topics from TopicIndex\n all_topics = self.topic_index.all_topics()\n for topic, count in sorted(all_topics.items(), \n key=lambda x: -x[1])[:100]:\n digest.add_topic(topic)\n \n # Count memories\n try:\n private_path = f\"/memory/private/{self.agent_id}\"\n nodes = self.store.list_nodes(private_path, limit=10000)\n digest.memory_count = len(nodes)\n except Exception:\n pass\n \n return digest\n \n def publish_digest(self):\n \"\"\"Publish own digest to gossip namespace\"\"\"\n digest = self.generate_digest()\n \n from .node import AVMNode\n node = AVMNode(\n path=f\"{self.GOSSIP_PREFIX}/{self.agent_id}.digest\",\n content=json.dumps(digest.to_dict(), indent=2),\n meta={\"type\": \"gossip_digest\", \"version\": digest.version},\n )\n self.store.put_node(node)\n \n # Update local cache\n self._digest_cache[self.agent_id] = digest\n \n return digest\n \n def receive_digest(self, message: GossipMessage):\n \"\"\"Receive and process a gossip message\"\"\"\n digest = message.digest\n \n # Check if newer than what we have\n existing = self._digest_cache.get(digest.agent_id)\n if existing and existing.version >= digest.version:\n return False # Already have newer\n \n # Store in cache\n self._digest_cache[digest.agent_id] = digest\n \n # Persist to storage\n from .node import AVMNode\n node = AVMNode(\n path=f\"{self.GOSSIP_PREFIX}/{digest.agent_id}.digest\",\n content=json.dumps(digest.to_dict(), indent=2),\n meta={\"type\": \"gossip_digest\", \"version\": digest.version},\n )\n self.store.put_node(node)\n \n # Forward if TTL > 0 (epidemic spread)\n if message.ttl > 0:\n self._forward_message(message)\n \n return True\n \n def _forward_message(self, message: GossipMessage):\n \"\"\"Forward gossip message to other agents (future: via tell system)\"\"\"\n # For now, just decrement TTL and store\n # In practice, this would use the tell system\n pass\n \n def who_knows(self, topic: str) -> List[Tuple[str, float]]:\n \"\"\"\n Find agents who might know about a topic.\n \n Returns: List of (agent_id, confidence) tuples\n \"\"\"\n results = []\n \n for agent_id, digest in self._digest_cache.items():\n if digest.might_have_topic(topic.lower()):\n # Confidence based on recency\n age_hours = (utcnow() - digest.timestamp).total_seconds() / 3600\n confidence = max(0.1, 1.0 - (age_hours / 168)) # Decay over a week\n results.append((agent_id, confidence))\n \n return sorted(results, key=lambda x: -x[1])\n \n def agents(self) -> List[str]:\n \"\"\"List all known agents\"\"\"\n return list(self._digest_cache.keys())\n \n def get_digest(self, agent_id: str) -> Optional[AgentDigest]:\n \"\"\"Get a specific agent's digest\"\"\"\n return self._digest_cache.get(agent_id)\n \n def refresh(self):\n \"\"\"Refresh all digests from storage\"\"\"\n self._load_digests()\n \n def stats(self) -> Dict:\n \"\"\"Get gossip stats\"\"\"\n return {\n \"known_agents\": len(self._digest_cache),\n \"own_version\": self._version,\n \"agents\": [\n {\n \"id\": d.agent_id,\n \"version\": d.version,\n \"topics\": len(d.topics),\n \"memories\": d.memory_count,\n \"age_hours\": (utcnow() - d.timestamp).total_seconds() / 3600,\n }\n for d in self._digest_cache.values()\n ],\n }\n\n\nclass GossipProtocol:\n \"\"\"\n High-level gossip protocol coordinator.\n \n Usage:\n protocol = GossipProtocol(store, topic_index, \"my_agent\")\n protocol.start() # Begin periodic gossip\n \n # Query\n agents = protocol.who_knows(\"bitcoin\")\n \"\"\"\n \n def __init__(self, store: AVMStore, topic_index: TopicIndex, agent_id: str):\n self.gossip_store = GossipStore(store, topic_index, agent_id)\n self._running = False\n self._thread = None\n \n def start(self, interval_seconds: int = 60):\n \"\"\"Start periodic gossip in background thread\"\"\"\n import threading\n \n self._running = True\n \n def _gossip_loop():\n while self._running:\n try:\n # Publish own digest\n self.gossip_store.publish_digest()\n \n # Refresh known digests\n self.gossip_store.refresh()\n \n except Exception as e:\n print(f\"[Gossip] Error: {e}\")\n \n time.sleep(interval_seconds)\n \n self._thread = threading.Thread(target=_gossip_loop, daemon=True)\n self._thread.start()\n \n def stop(self):\n \"\"\"Stop gossip protocol\"\"\"\n self._running = False\n if self._thread:\n self._thread.join(timeout=5)\n \n def who_knows(self, topic: str) -> List[Tuple[str, float]]:\n \"\"\"Find agents who might know about a topic\"\"\"\n return self.gossip_store.who_knows(topic)\n \n def agents(self) -> List[str]:\n \"\"\"List all known agents\"\"\"\n return self.gossip_store.agents()\n \n def publish(self):\n \"\"\"Manually trigger digest publication\"\"\"\n return self.gossip_store.publish_digest()\n \n def stats(self) -> Dict:\n \"\"\"Get protocol stats\"\"\"\n return self.gossip_store.stats()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":12028,"content_sha256":"1740d372b5f4d1bc824373422c6937ccaf0da9f964ca8eaafdfa6f3bc6bc828c"},{"filename":"avm/graph.py","content":"\"\"\"\nvfs/graph.py - knowledge graph(adjacency list implementation)\n\"\"\"\n\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import Dict, List, Optional, Set, Tuple\nfrom enum import Enum\n\nfrom .utils import utcnow\n\n\nclass EdgeType(Enum):\n \"\"\"edge type\"\"\"\n PEER = \"peer\" # peer relation (e.g., stocks in same sector)\n PARENT = \"parent\" # parent-child relation (e.g., sector→individual stock)\n CITATION = \"citation\" # citation relation (e.g., research report reference)\n DERIVED = \"derived\" # derived relation (e.g., signal derived from indicator)\n RELATED = \"related\" # general relation\n VERSION_OF = \"version_of\" # versionrelation(append-only memory)\n\n\n@dataclass\nclass Edge:\n \"\"\"\n graph edge\n \"\"\"\n source: str # source node path\n target: str # targetnodepath\n edge_type: EdgeType = EdgeType.RELATED\n weight: float = 1.0\n meta: Dict = field(default_factory=dict)\n created_at: datetime = field(default_factory=utcnow)\n \n def to_tuple(self) -> Tuple[str, str, str, float]:\n return (self.source, self.target, self.edge_type.value, self.weight)\n \n def __repr__(self) -> str:\n return f\"Edge({self.source} --[{self.edge_type.value}]--> {self.target})\"\n\n\nclass KVGraph:\n \"\"\"\n knowledge graph\n \n simple adjacency list implementation, supports:\n - add/delete edge\n - Query all related to a node\n - Filter by edge type\n - Path finding (BFS)\n \"\"\"\n \n def __init__(self):\n # adjacency list: {source: [Edge, ...]}\n self._outgoing: Dict[str, List[Edge]] = {}\n # Reverse index: {target: [Edge, ...]}\n self._incoming: Dict[str, List[Edge]] = {}\n \n def add_edge(self, source: str, target: str, \n edge_type: EdgeType = EdgeType.RELATED,\n weight: float = 1.0,\n meta: Dict = None) -> Edge:\n \"\"\"addedge\"\"\"\n edge = Edge(\n source=source,\n target=target,\n edge_type=edge_type,\n weight=weight,\n meta=meta or {},\n )\n \n if source not in self._outgoing:\n self._outgoing[source] = []\n self._outgoing[source].append(edge)\n \n if target not in self._incoming:\n self._incoming[target] = []\n self._incoming[target].append(edge)\n \n return edge\n \n def remove_edge(self, source: str, target: str, \n edge_type: EdgeType = None) -> int:\n \"\"\"delete edge,returndeletecount\"\"\"\n removed = 0\n \n if source in self._outgoing:\n before = len(self._outgoing[source])\n self._outgoing[source] = [\n e for e in self._outgoing[source]\n if not (e.target == target and \n (edge_type is None or e.edge_type == edge_type))\n ]\n removed = before - len(self._outgoing[source])\n \n if target in self._incoming:\n self._incoming[target] = [\n e for e in self._incoming[target]\n if not (e.source == source and\n (edge_type is None or e.edge_type == edge_type))\n ]\n \n return removed\n \n def get_outgoing(self, node: str, \n edge_type: EdgeType = None) -> List[Edge]:\n \"\"\"get outgoing edges\"\"\"\n edges = self._outgoing.get(node, [])\n if edge_type:\n edges = [e for e in edges if e.edge_type == edge_type]\n return edges\n \n def get_incoming(self, node: str,\n edge_type: EdgeType = None) -> List[Edge]:\n \"\"\"get incoming edges\"\"\"\n edges = self._incoming.get(node, [])\n if edge_type:\n edges = [e for e in edges if e.edge_type == edge_type]\n return edges\n \n def get_neighbors(self, node: str,\n edge_type: EdgeType = None) -> Set[str]:\n \"\"\"Get all neighbor nodes\"\"\"\n neighbors = set()\n for e in self.get_outgoing(node, edge_type):\n neighbors.add(e.target)\n for e in self.get_incoming(node, edge_type):\n neighbors.add(e.source)\n return neighbors\n \n def find_path(self, source: str, target: str, \n max_depth: int = 5) -> Optional[List[str]]:\n \"\"\"BFS path finding\"\"\"\n if source == target:\n return [source]\n \n visited = {source}\n queue = [(source, [source])]\n \n while queue and len(queue[0][1]) \u003c= max_depth:\n current, path = queue.pop(0)\n \n for neighbor in self.get_neighbors(current):\n if neighbor == target:\n return path + [neighbor]\n \n if neighbor not in visited:\n visited.add(neighbor)\n queue.append((neighbor, path + [neighbor]))\n \n return None\n \n def get_subgraph(self, center: str, depth: int = 1) -> \"KVGraph\":\n \"\"\"Get subgraph centered on a node\"\"\"\n subgraph = KVGraph()\n visited = set()\n queue = [(center, 0)]\n \n while queue:\n node, d = queue.pop(0)\n if node in visited or d > depth:\n continue\n visited.add(node)\n \n for edge in self.get_outgoing(node):\n subgraph.add_edge(\n edge.source, edge.target,\n edge.edge_type, edge.weight, edge.meta\n )\n if d \u003c depth:\n queue.append((edge.target, d + 1))\n \n for edge in self.get_incoming(node):\n subgraph.add_edge(\n edge.source, edge.target,\n edge.edge_type, edge.weight, edge.meta\n )\n if d \u003c depth:\n queue.append((edge.source, d + 1))\n \n return subgraph\n \n def to_adjacency_list(self) -> Dict[str, List[Dict]]:\n \"\"\"exportadjacency list\"\"\"\n result = {}\n for source, edges in self._outgoing.items():\n result[source] = [\n {\"target\": e.target, \"type\": e.edge_type.value, \"weight\": e.weight}\n for e in edges\n ]\n return result\n \n @property\n def node_count(self) -> int:\n \"\"\"node count\"\"\"\n nodes = set(self._outgoing.keys()) | set(self._incoming.keys())\n return len(nodes)\n \n @property\n def edge_count(self) -> int:\n \"\"\"edge count\"\"\"\n return sum(len(edges) for edges in self._outgoing.values())\n \n def __repr__(self) -> str:\n return f\"KVGraph({self.node_count} nodes, {self.edge_count} edges)\"\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6787,"content_sha256":"b8869d3d8776ee5396ac3745d09bd4724f17d7f149b217c5ca7780e158438e06"},{"filename":"avm/handlers.py","content":"\"\"\"\nvfs/providers.py - Pluggable Provider System\n\nGeneric handler-based providers for VFS.\n\nHandler Types:\n- file: Local filesystem\n- http: REST API calls\n- script: Execute commands\n- plugin: Python plugins\n- sqlite: SQLite queries\n- redis: Redis key-value\n- s3: S3-compatible storage\n\nUsage:\n providers:\n - pattern: \"/live/prices/*\"\n handler: http\n config:\n url: \"https://api.example.com/prices/${symbol}\"\n method: GET\n headers:\n Authorization: \"Bearer ${API_KEY}\"\n transform: \".price\"\n ttl: 60\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport re\nimport json\nimport subprocess\nimport fnmatch\nfrom abc import ABC, abstractmethod\nfrom dataclasses import dataclass, field\nfrom typing import Any, Dict, List, Optional, Protocol, Type, Callable\nfrom datetime import datetime, timedelta\nimport importlib\n\nfrom .utils import utcnow\n\n\n# ─── Provider Protocol ────────────────────────────────────\n\nclass Provider(Protocol):\n \"\"\"Protocol for VFS providers\"\"\"\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n \"\"\"Read content from path\"\"\"\n ...\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n \"\"\"Write content to path\"\"\"\n ...\n \n def list(self, prefix: str, context: Dict[str, Any]) -> List[str]:\n \"\"\"List paths under prefix\"\"\"\n ...\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n \"\"\"Delete path\"\"\"\n ...\n\n\n# ─── Base Handler ─────────────────────────────────────────\n\n@dataclass\nclass ProviderConfig:\n \"\"\"Configuration for a provider\"\"\"\n pattern: str\n handler: str\n config: Dict[str, Any] = field(default_factory=dict)\n ttl: int = 0 # Cache TTL in seconds (0 = no cache)\n access: str = \"ro\" # ro, wo, rw\n \n def matches(self, path: str) -> bool:\n \"\"\"Check if path matches pattern\"\"\"\n return fnmatch.fnmatch(path, self.pattern)\n \n def extract_vars(self, path: str) -> Dict[str, str]:\n \"\"\"Extract variables from path based on pattern\"\"\"\n # Convert pattern to regex\n # /live/prices/* -> /live/prices/(?P\u003c_0>.*)\n # /users/{id}/posts -> /users/(?P\u003cid>[^/]+)/posts\n \n regex_pattern = self.pattern\n vars_found = {}\n \n # Handle {name} style variables\n for match in re.finditer(r'\\{(\\w+)\\}', self.pattern):\n var_name = match.group(1)\n regex_pattern = regex_pattern.replace(\n match.group(0), f'(?P\u003c{var_name}>[^/]+)'\n )\n \n # Handle * wildcards\n wildcard_count = 0\n while '*' in regex_pattern:\n regex_pattern = regex_pattern.replace(\n '*', f'(?P\u003c_w{wildcard_count}>.*)', 1\n )\n wildcard_count += 1\n \n # Match and extract\n match = re.match(f'^{regex_pattern}

AVM Memory Skill AI Virtual Memory — 多 Agent 共享记忆系统 核心能力 - 语义搜索 :embedding + FTS5 混合检索 - Token 感知 :自动截断到 token 预算 - 多 Agent :私有/共享空间隔离 + 订阅通知 - 生命周期 :自动衰减、归档、垃圾清理 - TopicIndex :O(1) recall,已知 topic 1 hop 完成 - Librarian :多 Agent 知识路由,95% hop 减少 - Gossip Protocol :去中心化发现,bloom filter digest - Memory Consolidation :睡眠式记忆整合 --- 快速开始 CLI 方式 FUSE 方式 Python API --- 🆕 多 Agent 发现 方式 1: Librarian(中心化) 当你想知道"谁知道某个话题": 延迟 : 1.7ms,95% hop 减少 方式 2: Gossip Protocol(去中心化) 每个 agent 维护一个 digest(bloom filter),周期性交换: 特点 : - 无单点故障 - 本地查询 O(1) - 假阳性 <15%,假阴性 0% - 每 agent 只需 128 bytes digest 何时用哪个? | 场景 | 推荐 | |--…

, path)\n if match:\n vars_found = {k: v for k, v in match.groupdict().items()}\n \n return vars_found\n\n\nclass BaseHandler(ABC):\n \"\"\"Base class for handlers\"\"\"\n \n # Override these in subclasses to provide skill info for agents\n name: str = \"base\"\n description: str = \"Base handler\"\n usage: str = \"\"\n examples: List[str] = []\n \n def __init__(self, config: Dict[str, Any]):\n self.config = config\n self._cache: Dict[str, tuple] = {} # path -> (content, expires_at)\n \n @classmethod\n def skill_info(cls) -> str:\n \"\"\"\n Generate skill documentation for agents.\n Override to customize, or set class attributes.\n \"\"\"\n lines = [f\"# {cls.name} Handler\", \"\"]\n \n if cls.description:\n lines.append(cls.description)\n lines.append(\"\")\n \n if cls.usage:\n lines.append(\"## Usage\")\n lines.append(\"```\")\n lines.append(cls.usage)\n lines.append(\"```\")\n lines.append(\"\")\n \n if cls.examples:\n lines.append(\"## Examples\")\n for ex in cls.examples:\n lines.append(f\"- `{ex}`\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n \n def _expand_vars(self, template: str, context: Dict[str, Any]) -> str:\n \"\"\"Expand ${VAR} in template\"\"\"\n result = template\n \n # Expand environment variables\n for match in re.finditer(r'\\$\\{(\\w+)\\}', template):\n var_name = match.group(1)\n value = context.get(var_name) or os.environ.get(var_name, '')\n result = result.replace(match.group(0), str(value))\n \n return result\n \n def _get_cached(self, path: str) -> Optional[str]:\n \"\"\"Get cached content if not expired\"\"\"\n if path in self._cache:\n content, expires_at = self._cache[path]\n if utcnow() \u003c expires_at:\n return content\n del self._cache[path]\n return None\n \n def _set_cached(self, path: str, content: str, ttl: int):\n \"\"\"Cache content with TTL\"\"\"\n if ttl > 0:\n expires_at = utcnow() + timedelta(seconds=ttl)\n self._cache[path] = (content, expires_at)\n \n @abstractmethod\n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n pass\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n return False # Default: read-only\n \n def list(self, prefix: str, context: Dict[str, Any]) -> List[str]:\n return [] # Default: no listing\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n return False # Default: no delete\n\n\n# ─── File Handler ─────────────────────────────────────────\n\nclass FileHandler(BaseHandler):\n \"\"\"\n Local filesystem handler\n \n Config:\n root: Base directory path\n create_dirs: Auto-create directories (default: true)\n \"\"\"\n name = \"file\"\n description = \"Read/write local filesystem files.\"\n usage = \"\"\"pattern: \"/data/{filename}\"\nhandler: file\nconfig:\n root: ~/data\n create_dirs: true\"\"\"\n examples = [\n \"cat /data/notes.md\",\n \"echo 'content' > /data/new.md\"\n ]\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n root = self._expand_vars(self.config.get('root', '.'), context)\n file_path = os.path.join(root, path.lstrip('/'))\n \n try:\n with open(file_path, 'r') as f:\n return f.read()\n except FileNotFoundError:\n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n root = self._expand_vars(self.config.get('root', '.'), context)\n file_path = os.path.join(root, path.lstrip('/'))\n \n if self.config.get('create_dirs', True):\n os.makedirs(os.path.dirname(file_path), exist_ok=True)\n \n with open(file_path, 'w') as f:\n f.write(content)\n return True\n \n def list(self, prefix: str, context: Dict[str, Any]) -> List[str]:\n root = self._expand_vars(self.config.get('root', '.'), context)\n dir_path = os.path.join(root, prefix.lstrip('/'))\n \n if not os.path.isdir(dir_path):\n return []\n \n results = []\n for entry in os.listdir(dir_path):\n full_path = os.path.join(prefix, entry)\n results.append(full_path)\n return results\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n root = self._expand_vars(self.config.get('root', '.'), context)\n file_path = os.path.join(root, path.lstrip('/'))\n \n try:\n os.remove(file_path)\n return True\n except FileNotFoundError:\n return False\n\n\n# ─── HTTP Handler ─────────────────────────────────────────\n\nclass HTTPHandler(BaseHandler):\n \"\"\"\n HTTP/REST API handler\n \n Config:\n url: URL template with ${var} placeholders\n method: HTTP method (default: GET)\n headers: Request headers\n body: Request body template\n transform: jq-style transform (optional)\n timeout: Request timeout in seconds\n \"\"\"\n name = \"http\"\n description = \"Fetch data from HTTP/REST APIs.\"\n usage = \"\"\"pattern: \"/api/prices/{symbol}\"\nhandler: http\nconfig:\n url: \"https://api.example.com/prices/${symbol}\"\n headers:\n Authorization: \"Bearer ${API_KEY}\"\n ttl: 60\"\"\"\n examples = [\n \"cat /api/prices/AAPL\",\n \"cat /api/weather/london\"\n ]\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n import urllib.request\n import urllib.error\n \n # Check cache\n cached = self._get_cached(path)\n if cached is not None:\n return cached\n \n url = self._expand_vars(self.config.get('url', ''), context)\n method = self.config.get('method', 'GET')\n headers = {\n k: self._expand_vars(v, context)\n for k, v in self.config.get('headers', {}).items()\n }\n timeout = self.config.get('timeout', 30)\n \n try:\n req = urllib.request.Request(url, method=method, headers=headers)\n \n if method in ('POST', 'PUT', 'PATCH') and 'body' in self.config:\n body = self._expand_vars(self.config['body'], context)\n req.data = body.encode('utf-8')\n \n with urllib.request.urlopen(req, timeout=timeout) as resp:\n content = resp.read().decode('utf-8')\n \n # Apply transform if specified\n if 'transform' in self.config:\n content = self._transform(content, self.config['transform'])\n \n # Cache if TTL specified\n ttl = self.config.get('ttl', 0)\n self._set_cached(path, content, ttl)\n \n return content\n \n except urllib.error.URLError as e:\n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n import urllib.request\n import urllib.error\n \n url = self._expand_vars(self.config.get('url', ''), context)\n method = self.config.get('write_method', 'POST')\n headers = {\n k: self._expand_vars(v, context)\n for k, v in self.config.get('headers', {}).items()\n }\n \n try:\n req = urllib.request.Request(url, method=method, headers=headers)\n req.data = content.encode('utf-8')\n \n with urllib.request.urlopen(req) as resp:\n return resp.status \u003c 400\n except urllib.error.URLError:\n return False\n \n def _transform(self, content: str, transform: str) -> str:\n \"\"\"Apply jq-style transform to JSON content\"\"\"\n try:\n data = json.loads(content)\n \n # Simple jq-like transforms\n if transform.startswith('.'):\n keys = transform[1:].split('.')\n for key in keys:\n if key and isinstance(data, dict):\n data = data.get(key, {})\n elif key.isdigit() and isinstance(data, list):\n data = data[int(key)] if int(key) \u003c len(data) else None\n \n return json.dumps(data, indent=2) if isinstance(data, (dict, list)) else str(data)\n \n return content\n except json.JSONDecodeError:\n return content\n\n\n# ─── Script Handler ───────────────────────────────────────\n\nclass ScriptHandler(BaseHandler):\n \"\"\"\n Execute scripts/commands handler\n \n Config:\n command: Command template with ${var} placeholders\n shell: Use shell (default: true)\n timeout: Execution timeout in seconds\n cwd: Working directory\n env: Additional environment variables\n \"\"\"\n name = \"script\"\n description = \"Execute shell commands and return output.\"\n usage = \"\"\"pattern: \"/system/status\"\nhandler: script\nconfig:\n command: \"uptime\"\n timeout: 10\"\"\"\n examples = [\n \"cat /system/status\",\n \"cat /system/disk\"\n ]\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n command = self._expand_vars(self.config.get('command', ''), context)\n shell = self.config.get('shell', True)\n timeout = self.config.get('timeout', 30)\n cwd = self.config.get('cwd')\n \n env = os.environ.copy()\n for k, v in self.config.get('env', {}).items():\n env[k] = self._expand_vars(v, context)\n \n try:\n result = subprocess.run(\n command if shell else command.split(),\n shell=shell,\n capture_output=True,\n text=True,\n timeout=timeout,\n cwd=cwd,\n env=env\n )\n return result.stdout\n except subprocess.TimeoutExpired:\n return None\n except Exception:\n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n command = self._expand_vars(self.config.get('write_command', ''), context)\n if not command:\n return False\n \n shell = self.config.get('shell', True)\n timeout = self.config.get('timeout', 30)\n \n try:\n result = subprocess.run(\n command if shell else command.split(),\n shell=shell,\n input=content,\n capture_output=True,\n text=True,\n timeout=timeout\n )\n return result.returncode == 0\n except Exception:\n return False\n\n\n# ─── Plugin Handler ───────────────────────────────────────\n\nclass PluginHandler(BaseHandler):\n \"\"\"\n Python plugin handler\n \n Config:\n plugin: Module path (e.g., \"vfs_plugins.talib\")\n class: Class name (default: \"Provider\")\n init: Initialization arguments\n \"\"\"\n name = \"plugin\"\n description = \"Load and call Python plugins.\"\n usage = \"\"\"pattern: \"/indicators/{symbol}\"\nhandler: plugin\nconfig:\n plugin: \"my_plugins.technical\"\n class: \"IndicatorProvider\"\n init:\n api_key: \"${API_KEY}\" \"\"\"\n examples = [\n \"cat /indicators/AAPL\",\n \"cat /indicators/NVDA\"\n ]\n \n def __init__(self, config: Dict[str, Any]):\n super().__init__(config)\n self._plugin = None\n \n def _get_plugin(self) -> Optional[Provider]:\n if self._plugin is None:\n try:\n module_path = self.config.get('plugin', '')\n class_name = self.config.get('class', 'Provider')\n init_args = self.config.get('init', {})\n \n module = importlib.import_module(module_path)\n cls = getattr(module, class_name)\n self._plugin = cls(**init_args)\n except (ImportError, AttributeError):\n return None\n return self._plugin\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n plugin = self._get_plugin()\n if plugin:\n return plugin.read(path, context)\n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n plugin = self._get_plugin()\n if plugin:\n return plugin.write(path, content, context)\n return False\n \n def list(self, prefix: str, context: Dict[str, Any]) -> List[str]:\n plugin = self._get_plugin()\n if plugin:\n return plugin.list(prefix, context)\n return []\n\n\n# ─── SQLite Handler ───────────────────────────────────────\n\nclass SQLiteHandler(BaseHandler):\n \"\"\"\n SQLite query handler\n \n Config:\n db: Database path\n read_query: SELECT query template\n write_query: INSERT/UPDATE query template\n list_query: List query template\n \"\"\"\n name = \"sqlite\"\n description = \"Query SQLite databases.\"\n usage = \"\"\"pattern: \"/db/users/{id}\"\nhandler: sqlite\nconfig:\n db: \"~/data/app.db\"\n read_query: \"SELECT * FROM users WHERE id = ${id}\" \"\"\"\n examples = [\n \"cat /db/users/123\",\n \"cat /db/orders/recent\"\n ]\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n import sqlite3\n \n db_path = self._expand_vars(self.config.get('db', ':memory:'), context)\n query = self._expand_vars(self.config.get('read_query', ''), context)\n \n if not query:\n return None\n \n try:\n conn = sqlite3.connect(db_path)\n conn.row_factory = sqlite3.Row\n cursor = conn.execute(query)\n rows = [dict(row) for row in cursor.fetchall()]\n conn.close()\n \n return json.dumps(rows, indent=2, default=str)\n except sqlite3.Error:\n return None\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n import sqlite3\n \n db_path = self._expand_vars(self.config.get('db', ':memory:'), context)\n query = self._expand_vars(self.config.get('write_query', ''), context)\n \n if not query:\n return False\n \n try:\n conn = sqlite3.connect(db_path)\n context['_content'] = content\n conn.execute(query, context)\n conn.commit()\n conn.close()\n return True\n except sqlite3.Error:\n return False\n\n\n# ─── Handler Registry ─────────────────────────────────────\n\nHANDLERS: Dict[str, Type[BaseHandler]] = {\n 'file': FileHandler,\n 'http': HTTPHandler,\n 'script': ScriptHandler,\n 'plugin': PluginHandler,\n 'sqlite': SQLiteHandler,\n}\n\n\ndef register_handler(name: str, handler_class: Type[BaseHandler]):\n \"\"\"Register a custom handler\"\"\"\n HANDLERS[name] = handler_class\n\n\ndef handler(name: str, description: str = \"\", usage: str = \"\", examples: List[str] = None):\n \"\"\"\n Decorator to register a handler with auto-generated skill info.\n \n Usage:\n @handler(\"redis\", \n description=\"Redis key-value store\",\n usage=\"pattern: /cache/{key}\\\\nhandler: redis\",\n examples=[\"cat /cache/session\"])\n class RedisHandler(BaseHandler):\n def read(self, path, context):\n ...\n \n Or minimal:\n @handler(\"redis\")\n class RedisHandler(BaseHandler):\n '''Redis key-value store for caching.'''\n ...\n \"\"\"\n def decorator(cls: Type[BaseHandler]) -> Type[BaseHandler]:\n # Auto-extract from docstring if not provided\n cls.name = name\n cls.description = description or (cls.__doc__ or \"\").strip().split('\\n')[0]\n cls.usage = usage\n cls.examples = examples or []\n \n # Auto-generate usage from __init__ signature if not provided\n if not usage and hasattr(cls, '__init__'):\n import inspect\n sig = inspect.signature(cls.__init__)\n params = [p for p in sig.parameters.keys() if p not in ('self', 'config')]\n if params:\n cls.usage = f\"# Config params: {', '.join(params)}\"\n \n # Register\n HANDLERS[name] = cls\n return cls\n \n return decorator\n\n\ndef get_handlers_skill_info() -> str:\n \"\"\"\n Get skill info for all registered handlers.\n Agents can read this to learn how to use handlers.\n \"\"\"\n lines = [\"# Available Handlers\", \"\"]\n lines.append(\"These handlers are available for custom providers.\")\n lines.append(\"\")\n \n for name, handler_class in HANDLERS.items():\n lines.append(f\"## {name}\")\n lines.append(\"\")\n if hasattr(handler_class, 'description'):\n lines.append(handler_class.description)\n lines.append(\"\")\n if hasattr(handler_class, 'usage') and handler_class.usage:\n lines.append(\"```yaml\")\n lines.append(handler_class.usage)\n lines.append(\"```\")\n lines.append(\"\")\n if hasattr(handler_class, 'examples') and handler_class.examples:\n lines.append(\"Examples:\")\n for ex in handler_class.examples:\n lines.append(f\"- `{ex}`\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n\n\n# ─── Provider Manager ─────────────────────────────────────\n\nclass ProviderManager:\n \"\"\"Manages multiple providers and routes requests\"\"\"\n \n def __init__(self, configs: List[Dict[str, Any]] = None):\n self.providers: List[tuple] = [] # (ProviderConfig, BaseHandler)\n \n if configs:\n for cfg in configs:\n self.add_provider(cfg)\n \n def add_provider(self, config: Dict[str, Any]):\n \"\"\"Add a provider from config dict\"\"\"\n provider_config = ProviderConfig(\n pattern=config.get('pattern', '/*'),\n handler=config.get('handler', 'file'),\n config=config.get('config', {}),\n ttl=config.get('ttl', 0),\n access=config.get('access', 'ro'),\n )\n \n handler_class = HANDLERS.get(provider_config.handler)\n if handler_class:\n handler = handler_class(provider_config.config)\n self.providers.append((provider_config, handler))\n \n def _find_handler(self, path: str) -> Optional[tuple]:\n \"\"\"Find matching handler for path\"\"\"\n for config, handler in self.providers:\n if config.matches(path):\n return config, handler\n return None\n \n def read(self, path: str, context: Dict[str, Any] = None) -> Optional[str]:\n \"\"\"Read from matching provider\"\"\"\n result = self._find_handler(path)\n if not result:\n return None\n \n config, handler = result\n ctx = context or {}\n ctx.update(config.extract_vars(path))\n \n return handler.read(path, ctx)\n \n def write(self, path: str, content: str, context: Dict[str, Any] = None) -> bool:\n \"\"\"Write to matching provider\"\"\"\n result = self._find_handler(path)\n if not result:\n return False\n \n config, handler = result\n \n # Check access\n if 'w' not in config.access:\n return False\n \n ctx = context or {}\n ctx.update(config.extract_vars(path))\n \n return handler.write(path, content, ctx)\n \n def list(self, prefix: str, context: Dict[str, Any] = None) -> List[str]:\n \"\"\"List from matching provider\"\"\"\n result = self._find_handler(prefix)\n if not result:\n return []\n \n config, handler = result\n ctx = context or {}\n ctx.update(config.extract_vars(prefix))\n \n return handler.list(prefix, ctx)\n \n def delete(self, path: str, context: Dict[str, Any] = None) -> bool:\n \"\"\"Delete from matching provider\"\"\"\n result = self._find_handler(path)\n if not result:\n return False\n \n config, handler = result\n \n if 'w' not in config.access:\n return False\n \n ctx = context or {}\n ctx.update(config.extract_vars(path))\n \n return handler.delete(path, ctx)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":24202,"content_sha256":"0bed198451159b721a3cbf7f6bf512d5a794d3d1f824da234a19a7e42a86d535"},{"filename":"avm/http_client.py","content":"\"\"\"\nAVM HTTP Client — for Windows or no-FUSE environments\n\"\"\"\nimport os\nimport json\n\ntry:\n import urllib.request as req\nexcept ImportError:\n pass\n\nAVM_SERVER_URL = os.environ.get(\"AVM_SERVER_URL\", \"http://localhost:8765\")\n\n\ndef recall(query: str, max_tokens: int = 500) -> str:\n data = json.dumps({\"query\": query, \"max_tokens\": max_tokens}).encode()\n r = req.urlopen(req.Request(\n f\"{AVM_SERVER_URL}/recall\",\n data=data, headers={\"Content-Type\": \"application/json\"}\n ))\n return json.loads(r.read())[\"results\"]\n\n\ndef remember(content: str, importance: float = 0.5) -> str:\n data = json.dumps({\"content\": content, \"importance\": importance}).encode()\n r = req.urlopen(req.Request(\n f\"{AVM_SERVER_URL}/remember\",\n data=data, headers={\"Content-Type\": \"application/json\"}\n ))\n return json.loads(r.read())[\"path\"]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":867,"content_sha256":"2a49d47f733cb73f1d4b5b43f88b59dc2c43d5cb95007bb20242326c5f0f28e6"},{"filename":"avm/index_handler.py","content":"\"\"\"\navm/index_handler.py - Index Handler for Structured Data\n\nIndex is a special handler for semi-structured/unstructured content\nthat tracks file states without storing full content.\n\nFeatures:\n- Agent-provided or hook-generated descriptions\n- Status tracking (clean/dirty/missing)\n- Scan hooks for auto-generation\n\nUsage:\n providers:\n - pattern: \"/index/project/{name}/**\"\n handler: index\n config:\n type: project\n root: ~/projects\n scan_hook: builtin:project_scan\n\"\"\"\n\nimport json\nimport time\nimport re\nimport threading\nfrom pathlib import Path\nfrom dataclasses import dataclass, field, asdict\nfrom typing import Dict, List, Optional, Any, Callable\nfrom abc import ABC, abstractmethod\n\nfrom .handlers import BaseHandler, handler\n\n\n# ─── Extractors ─────────────────────────────────────────────\n\nEXTRACTORS: Dict[str, Callable[[Path], str]] = {}\n\n\ndef extractor(extension: str):\n \"\"\"Decorator to register a file extractor.\"\"\"\n def decorator(func: Callable[[Path], str]):\n EXTRACTORS[extension] = func\n return func\n return decorator\n\n\n@extractor(\".py\")\ndef extract_python(path: Path) -> str:\n \"\"\"Extract Python function/class signatures.\"\"\"\n signatures = []\n try:\n for line in path.read_text(errors='ignore').split('\\n'):\n stripped = line.lstrip()\n if stripped.startswith(('def ', 'async def ', 'class ')):\n signatures.append(line[:100].rstrip())\n except Exception:\n pass\n return '\\n'.join(signatures)\n\n\n@extractor(\".js\")\ndef extract_javascript(path: Path) -> str:\n \"\"\"Extract JavaScript function signatures.\"\"\"\n signatures = []\n patterns = [\n r'^\\s*(async\\s+)?function\\s+\\w+',\n r'^\\s*(const|let|var)\\s+\\w+\\s*=\\s*(async\\s+)?\\(',\n r'^\\s*(const|let|var)\\s+\\w+\\s*=\\s*(async\\s+)?\\w+\\s*=>',\n r'^\\s*(export\\s+)?(async\\s+)?function',\n ]\n try:\n for line in path.read_text(errors='ignore').split('\\n'):\n for pattern in patterns:\n if re.match(pattern, line):\n signatures.append(line[:100].rstrip())\n break\n except Exception:\n pass\n return '\\n'.join(signatures)\n\n\n@extractor(\".ts\")\ndef extract_typescript(path: Path) -> str:\n \"\"\"Extract TypeScript function signatures.\"\"\"\n return extract_javascript(path) # Similar patterns\n\n\n@extractor(\".go\")\ndef extract_go(path: Path) -> str:\n \"\"\"Extract Go function signatures.\"\"\"\n signatures = []\n try:\n for line in path.read_text(errors='ignore').split('\\n'):\n if re.match(r'^func\\s+', line):\n signatures.append(line[:100].rstrip())\n except Exception:\n pass\n return '\\n'.join(signatures)\n\n\n@extractor(\".rs\")\ndef extract_rust(path: Path) -> str:\n \"\"\"Extract Rust function signatures.\"\"\"\n signatures = []\n try:\n for line in path.read_text(errors='ignore').split('\\n'):\n stripped = line.lstrip()\n if re.match(r'(pub\\s+)?(async\\s+)?fn\\s+', stripped):\n signatures.append(line[:100].rstrip())\n except Exception:\n pass\n return '\\n'.join(signatures)\n\n\n# ─── Index Data Models ─────────────────────────────────────\n\n@dataclass\nclass FileEntry:\n \"\"\"A file in the index.\"\"\"\n path: str\n description: str = \"\"\n mtime: float = 0.0\n tags: List[str] = field(default_factory=list)\n meta: Dict[str, Any] = field(default_factory=dict)\n\n\n@dataclass \nclass IndexEntry:\n \"\"\"An index entry (e.g., a project).\"\"\"\n name: str\n root: str\n description: str = \"\"\n files: List[FileEntry] = field(default_factory=list)\n indexed_at: float = 0.0\n tags: List[str] = field(default_factory=list)\n meta: Dict[str, Any] = field(default_factory=dict)\n \n def to_dict(self) -> Dict:\n return asdict(self)\n \n @classmethod\n def from_dict(cls, data: Dict) -> \"IndexEntry\":\n files = [FileEntry(**f) for f in data.pop(\"files\", [])]\n return cls(files=files, **data)\n \n def to_readable(self) -> str:\n \"\"\"Generate human-readable content for display.\"\"\"\n lines = [f\"# {self.name}\", \"\"]\n if self.description:\n lines.append(self.description)\n lines.append(\"\")\n \n if self.tags:\n lines.append(f\"Tags: {', '.join(self.tags)}\")\n lines.append(\"\")\n \n if self.files:\n lines.append(\"## Files\")\n for f in self.files:\n desc = f\": {f.description}\" if f.description else \"\"\n lines.append(f\"- {f.path}{desc}\")\n \n return \"\\n\".join(lines)\n \n def check_status(self) -> Dict[str, str]:\n \"\"\"Check status of all files.\"\"\"\n root = Path(self.root).expanduser()\n status = {}\n \n for f in self.files:\n full_path = root / f.path\n if not full_path.exists():\n status[f.path] = \"missing\"\n elif full_path.stat().st_mtime > f.mtime:\n status[f.path] = \"dirty\"\n else:\n status[f.path] = \"clean\"\n \n return status\n \n def status_report(self) -> str:\n \"\"\"Generate status report.\"\"\"\n status = self.check_status()\n lines = []\n \n clean = dirty = missing = 0\n for path, state in status.items():\n if state == \"clean\":\n clean += 1\n elif state == \"dirty\":\n dirty += 1\n lines.append(f\"{path}: DIRTY\")\n else:\n missing += 1\n lines.append(f\"{path}: MISSING\")\n \n summary = f\"[{clean} clean, {dirty} dirty, {missing} missing]\"\n if lines:\n return summary + \"\\n\" + \"\\n\".join(lines)\n return summary + \"\\nAll files clean.\"\n\n\n# ─── Scan Hooks ─────────────────────────────────────────────\n\nclass ScanHook(ABC):\n \"\"\"Base class for scan hooks.\"\"\"\n \n @abstractmethod\n def scan(self, root: str, **kwargs) -> IndexEntry:\n \"\"\"Scan and generate index.\"\"\"\n pass\n\n\nclass ProjectScanHook(ScanHook):\n \"\"\"Scan a project directory with pluggable extractors.\"\"\"\n \n # File patterns to ignore\n IGNORE_PATTERNS = {\n \"__pycache__\", \".git\", \".venv\", \"node_modules\",\n \".pyc\", \".pyo\", \".so\", \".dylib\", \".egg-info\",\n \".DS_Store\", \"Thumbs.db\"\n }\n \n # File extensions to index\n INDEX_EXTENSIONS = {\n \".py\", \".js\", \".ts\", \".go\", \".rs\", \".java\", \".c\", \".cpp\", \".h\",\n \".md\", \".txt\", \".yaml\", \".yml\", \".json\", \".toml\",\n \".sh\", \".bash\", \".zsh\"\n }\n \n def __init__(self, extractors: List[str] = None):\n \"\"\"\n Args:\n extractors: List of extensions to extract signatures from.\n None means use all available extractors.\n \"\"\"\n self.enabled_extractors = extractors # e.g., [\".py\", \".go\"]\n \n def scan(self, root: str, name: str = None, **kwargs) -> IndexEntry:\n root_path = Path(root).expanduser().resolve()\n name = name or root_path.name\n \n files = []\n for f in root_path.rglob(\"*\"):\n # Skip ignored\n if any(p in str(f) for p in self.IGNORE_PATTERNS):\n continue\n \n if not f.is_file():\n continue\n \n # Only index known extensions\n if f.suffix.lower() not in self.INDEX_EXTENSIONS:\n continue\n \n rel_path = str(f.relative_to(root_path))\n \n # Extract signatures if extractor available\n description = \"\"\n ext = f.suffix.lower()\n if ext in EXTRACTORS:\n if self.enabled_extractors is None or ext in self.enabled_extractors:\n description = EXTRACTORS[ext](f)\n \n files.append(FileEntry(\n path=rel_path,\n mtime=f.stat().st_mtime,\n description=description,\n ))\n \n return IndexEntry(\n name=name,\n root=str(root_path),\n files=files,\n indexed_at=time.time(),\n )\n\n\n# Hook registry\nSCAN_HOOKS: Dict[str, ScanHook] = {\n \"project\": ProjectScanHook(),\n \"code\": ProjectScanHook(extractors=[\".py\", \".js\", \".ts\", \".go\", \".rs\"]),\n}\n\n\ndef register_scan_hook(name: str, hook: ScanHook):\n \"\"\"Register a custom scan hook.\"\"\"\n SCAN_HOOKS[name] = hook\n\n\n# ─── Index Watcher ─────────────────────────────────────────────\n\nclass IndexWatcher:\n \"\"\"Watch for file changes and auto-rescan.\"\"\"\n \n _watchers: Dict[str, \"IndexWatcher\"] = {}\n \n def __init__(self, store: \"IndexStore\", index_type: str, name: str, \n interval: float = 5.0):\n self.store = store\n self.index_type = index_type\n self.name = name\n self.interval = interval\n self._running = False\n self._thread: Optional[threading.Thread] = None\n self._end_time: float = 0\n self._updates: int = 0\n \n @classmethod\n def get(cls, index_type: str, name: str) -> Optional[\"IndexWatcher\"]:\n key = f\"{index_type}/{name}\"\n return cls._watchers.get(key)\n \n def start(self, duration: float = 300):\n \"\"\"Start watching for duration seconds.\"\"\"\n if self._running:\n # Extend duration\n self._end_time = time.time() + duration\n return\n \n self._running = True\n self._end_time = time.time() + duration\n self._updates = 0\n \n key = f\"{self.index_type}/{self.name}\"\n IndexWatcher._watchers[key] = self\n \n self._thread = threading.Thread(target=self._watch_loop, daemon=True)\n self._thread.start()\n \n def stop(self):\n \"\"\"Stop watching.\"\"\"\n self._running = False\n key = f\"{self.index_type}/{self.name}\"\n IndexWatcher._watchers.pop(key, None)\n \n def status(self) -> str:\n \"\"\"Get watch status.\"\"\"\n if not self._running:\n return \"Not watching\"\n \n remaining = max(0, self._end_time - time.time())\n mins, secs = divmod(int(remaining), 60)\n return f\"Watching: {mins}m{secs}s remaining, {self._updates} updates\"\n \n def _watch_loop(self):\n \"\"\"Background watch loop.\"\"\"\n while self._running and time.time() \u003c self._end_time:\n try:\n entry = self.store.get(self.index_type, self.name)\n if entry:\n status = entry.check_status()\n dirty = [p for p, s in status.items() if s == \"dirty\"]\n \n if dirty:\n # Rescan dirty files\n self._rescan_dirty(entry, dirty)\n self._updates += 1\n except Exception:\n pass\n \n time.sleep(self.interval)\n \n self.stop()\n \n def _rescan_dirty(self, entry: IndexEntry, dirty_paths: List[str]):\n \"\"\"Rescan dirty files and update entry.\"\"\"\n root = Path(entry.root).expanduser()\n \n for file_entry in entry.files:\n if file_entry.path in dirty_paths:\n full_path = root / file_entry.path\n if full_path.exists():\n file_entry.mtime = full_path.stat().st_mtime\n \n # Re-extract if applicable\n ext = full_path.suffix.lower()\n if ext in EXTRACTORS:\n file_entry.description = EXTRACTORS[ext](full_path)\n \n self.store.save(self.index_type, entry)\n\n\n# ─── Index Store ─────────────────────────────────────────────\n\nclass IndexStore:\n \"\"\"Storage for index entries.\"\"\"\n \n def __init__(self, db_path: str = None):\n self._indices: Dict[str, Dict[str, IndexEntry]] = {} # type -> name -> entry\n self._db_path = db_path\n self._load()\n \n def _storage_path(self) -> Path:\n if self._db_path:\n return Path(self._db_path).parent / \"indices.json\"\n return Path.home() / \".local\" / \"share\" / \"avm\" / \"indices.json\"\n \n def _load(self):\n path = self._storage_path()\n if path.exists():\n try:\n data = json.loads(path.read_text())\n for index_type, entries in data.items():\n self._indices[index_type] = {}\n for name, entry_data in entries.items():\n self._indices[index_type][name] = IndexEntry.from_dict(entry_data)\n except (json.JSONDecodeError, KeyError):\n pass\n \n def _save(self):\n path = self._storage_path()\n path.parent.mkdir(parents=True, exist_ok=True)\n \n data = {}\n for index_type, entries in self._indices.items():\n data[index_type] = {\n name: entry.to_dict() \n for name, entry in entries.items()\n }\n \n path.write_text(json.dumps(data, indent=2, default=str))\n \n def get(self, index_type: str, name: str) -> Optional[IndexEntry]:\n return self._indices.get(index_type, {}).get(name)\n \n def save(self, index_type: str, entry: IndexEntry):\n if index_type not in self._indices:\n self._indices[index_type] = {}\n self._indices[index_type][entry.name] = entry\n self._save()\n \n def delete(self, index_type: str, name: str) -> bool:\n if index_type in self._indices and name in self._indices[index_type]:\n del self._indices[index_type][name]\n self._save()\n return True\n return False\n \n def list(self, index_type: str) -> List[str]:\n return list(self._indices.get(index_type, {}).keys())\n \n def list_all(self) -> Dict[str, List[str]]:\n return {t: list(e.keys()) for t, e in self._indices.items()}\n\n\n# ─── Index Handler ─────────────────────────────────────────────\n\n@handler(\"index\",\n description=\"Structured index with status tracking and scan hooks\",\n usage=\"\"\"pattern: \"/index/{type}/{name}\"\nhandler: index\nconfig:\n root: ~/projects\n scan_hook: project\"\"\",\n examples=[\n \"cat /index/project/myapp\",\n \"cat /index/project/myapp:status\",\n \"echo 'scan' > /index/project/myapp:scan\",\n ])\nclass IndexHandler(BaseHandler):\n \"\"\"\n Handler for structured indices.\n \n Virtual suffixes:\n - :status - Check file states\n - :scan - Trigger scan hook\n - :files - List files only\n - :json - Raw JSON output\n \"\"\"\n \n def __init__(self, config: Dict[str, Any]):\n super().__init__(config)\n self.store = IndexStore()\n self.root = config.get(\"root\", \"\")\n self.scan_hook_name = config.get(\"scan_hook\", \"project\")\n \n def _parse_path(self, path: str) -> tuple:\n \"\"\"Parse /index/{type}/{name}[:suffix] -> (type, name, suffix)\"\"\"\n parts = path.strip(\"/\").split(\"/\")\n \n if len(parts) \u003c 2:\n return (None, None, None)\n \n # Skip 'index' prefix if present\n if parts[0] == \"index\":\n parts = parts[1:]\n \n if len(parts) \u003c 2:\n return (parts[0] if parts else None, None, None)\n \n index_type = parts[0]\n name_part = parts[1]\n \n # Check for suffix\n suffix = None\n for s in (\":status\", \":scan\", \":files\", \":json\", \":watch\", \":sigs\"):\n if name_part.endswith(s):\n name_part = name_part[:-len(s)]\n suffix = s\n break\n \n return (index_type, name_part, suffix)\n \n def read(self, path: str, context: Dict[str, Any]) -> Optional[str]:\n index_type, name, suffix = self._parse_path(path)\n \n # List all types\n if not index_type:\n all_indices = self.store.list_all()\n return json.dumps(all_indices, indent=2)\n \n # List entries of type\n if not name:\n entries = self.store.list(index_type)\n return \"\\n\".join(entries) if entries else \"(empty)\"\n \n # Get entry\n entry = self.store.get(index_type, name)\n \n if suffix == \":scan\":\n # Trigger scan\n hook = SCAN_HOOKS.get(self.scan_hook_name)\n if not hook:\n return f\"Error: Unknown scan hook '{self.scan_hook_name}'\"\n \n root = self.root or context.get(\"root\", \"\")\n if not root:\n return \"Error: No root path configured\"\n \n project_root = str(Path(root).expanduser() / name)\n entry = hook.scan(project_root, name=name)\n self.store.save(index_type, entry)\n \n # Count files with signatures\n with_sigs = sum(1 for f in entry.files if f.description)\n return f\"Scanned: {len(entry.files)} files, {with_sigs} with signatures\"\n \n if suffix == \":watch\":\n # Get watch status\n watcher = IndexWatcher.get(index_type, name)\n if watcher:\n return watcher.status()\n return \"Not watching\"\n \n if not entry:\n return f\"Index '{index_type}/{name}' not found. Use :scan to create.\"\n \n if suffix == \":status\":\n return entry.status_report()\n elif suffix == \":files\":\n return \"\\n\".join(f.path for f in entry.files)\n elif suffix == \":json\":\n return json.dumps(entry.to_dict(), indent=2, default=str)\n elif suffix == \":sigs\":\n # Show only files with signatures\n lines = []\n for f in entry.files:\n if f.description:\n lines.append(f\"## {f.path}\")\n lines.append(f.description)\n lines.append(\"\")\n return \"\\n\".join(lines) if lines else \"(no signatures extracted)\"\n else:\n return entry.to_readable()\n \n def write(self, path: str, content: str, context: Dict[str, Any]) -> bool:\n index_type, name, suffix = self._parse_path(path)\n \n if not index_type or not name:\n return False\n \n if suffix == \":scan\":\n # Trigger scan on write\n self.read(path, context)\n return True\n \n if suffix == \":watch\":\n # Start/stop watch\n content = content.strip().lower()\n \n if content in (\"stop\", \"off\", \"0\"):\n watcher = IndexWatcher.get(index_type, name)\n if watcher:\n watcher.stop()\n return True\n \n # Start watching\n try:\n duration = float(content) if content else 300\n except ValueError:\n duration = 300\n \n entry = self.store.get(index_type, name)\n if not entry:\n return False\n \n watcher = IndexWatcher(self.store, index_type, name)\n watcher.start(duration)\n return True\n \n # Update entry\n entry = self.store.get(index_type, name)\n if not entry:\n # Create new entry\n entry = IndexEntry(\n name=name,\n root=str(Path(self.root).expanduser() / name) if self.root else \"\",\n indexed_at=time.time(),\n )\n \n # Update description\n entry.description = content.strip()\n self.store.save(index_type, entry)\n return True\n \n def delete(self, path: str, context: Dict[str, Any]) -> bool:\n index_type, name, _ = self._parse_path(path)\n if index_type and name:\n return self.store.delete(index_type, name)\n return False\n \n def list(self, prefix: str, context: Dict[str, Any]) -> List[str]:\n index_type, _, _ = self._parse_path(prefix)\n if index_type:\n return self.store.list(index_type)\n return list(self.store.list_all().keys())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":20731,"content_sha256":"cf3f4f0868156a97f4864299831d3270713491b35d6a8a2e7acb3b7802ade530"},{"filename":"avm/librarian.py","content":"\"\"\"\nLibrarian: Global Knowledge Router for Multi-Agent Systems\n\nThe Librarian is a privileged service that:\n1. Has global visibility (ignores permissions for metadata)\n2. Routes queries to appropriate content or agents\n3. Respects permissions when returning actual content\n4. Suggests collaboration when content is inaccessible\n\nUsage:\n librarian = Librarian(store)\n response = librarian.query(\"agent_a\", \"NVDA analysis\")\n \n if response.accessible:\n # Direct access\n for node in response.accessible:\n print(node.content)\n else:\n # Need collaboration\n for suggestion in response.suggestions:\n print(f\"Ask {suggestion.agent} about {suggestion.topic}\")\n\"\"\"\n\nfrom dataclasses import dataclass, field\nfrom typing import List, Dict, Any, Optional, Set\nfrom datetime import datetime\nimport re\n\nfrom .node import AVMNode\nfrom .store import AVMStore\nfrom .config import AVMConfig\nfrom typing import TYPE_CHECKING\n\nif TYPE_CHECKING:\n from .embedding import EmbeddingStore\n\n\n@dataclass\nclass AgentInfo:\n \"\"\"Agent registry entry\"\"\"\n id: str\n display_name: str = \"\"\n capabilities: List[str] = field(default_factory=list)\n description: str = \"\"\n last_active: Optional[datetime] = None\n memory_count: int = 0\n \n def to_dict(self) -> Dict[str, Any]:\n return {\n \"id\": self.id,\n \"display_name\": self.display_name or self.id,\n \"capabilities\": self.capabilities,\n \"description\": self.description,\n \"last_active\": self.last_active.isoformat() if self.last_active else None,\n \"memory_count\": self.memory_count,\n }\n\n\n@dataclass\nclass SearchMatch:\n \"\"\"A search match with metadata\"\"\"\n path: str\n score: float\n owner: Optional[str] = None\n topic: Optional[str] = None\n snippet: str = \"\"\n accessible: bool = False\n\n\n@dataclass\nclass CollaborationSuggestion:\n \"\"\"Suggestion to ask another agent\"\"\"\n agent: str\n topic: str\n relevance: float\n reason: str = \"\"\n \n def to_dict(self) -> Dict[str, Any]:\n return {\n \"agent\": self.agent,\n \"topic\": self.topic,\n \"relevance\": self.relevance,\n \"reason\": self.reason,\n }\n\n\n@dataclass\nclass LibrarianResponse:\n \"\"\"Response from a Librarian query\"\"\"\n query: str\n requester: str\n accessible: List[AVMNode] = field(default_factory=list)\n suggestions: List[CollaborationSuggestion] = field(default_factory=list)\n directory: List[AgentInfo] = field(default_factory=list)\n total_matches: int = 0\n accessible_count: int = 0\n \n def to_dict(self) -> Dict[str, Any]:\n return {\n \"query\": self.query,\n \"requester\": self.requester,\n \"accessible_count\": self.accessible_count,\n \"total_matches\": self.total_matches,\n \"accessible\": [n.to_dict() for n in self.accessible],\n \"suggestions\": [s.to_dict() for s in self.suggestions],\n \"directory\": [a.to_dict() for a in self.directory],\n }\n\n\nclass PrivacyPolicy:\n \"\"\"Controls what metadata can be revealed\"\"\"\n \n FULL = \"full\" # Reveal existence + owner + topic\n OWNER_ONLY = \"owner\" # Only reveal who to ask\n EXISTENCE = \"existence\" # Only reveal \"someone knows\"\n NONE = \"none\" # Don't reveal anything\n \n def __init__(self, level: str = \"owner\"):\n self.level = level\n \n def can_reveal_existence(self) -> bool:\n return self.level in (self.FULL, self.OWNER_ONLY, self.EXISTENCE)\n \n def can_reveal_owner(self) -> bool:\n return self.level in (self.FULL, self.OWNER_ONLY)\n \n def can_reveal_topic(self) -> bool:\n return self.level == self.FULL\n\n\nclass Librarian:\n \"\"\"\n Global Knowledge Router\n \n The Librarian has privileged access to all memory metadata,\n but respects permissions when returning content.\n \"\"\"\n \n def __init__(self, store: AVMStore, config: AVMConfig = None,\n privacy_policy: PrivacyPolicy = None,\n embedding_store: \"EmbeddingStore\" = None):\n self.store = store\n self.config = config\n self.privacy = privacy_policy or PrivacyPolicy(\"owner\")\n self.embedding_store = embedding_store\n self._agent_registry: Dict[str, AgentInfo] = {}\n self._rebuild_registry()\n \n def _rebuild_registry(self):\n \"\"\"Build agent registry from existing memory paths\"\"\"\n # Scan /memory/private/* for agent IDs\n try:\n nodes = self.store.list_nodes(\"/memory/private/\", limit=1000)\n agent_paths: Dict[str, List[str]] = {}\n \n for node in nodes:\n # Extract agent ID from path\n parts = node.path.split(\"/\")\n if len(parts) >= 4:\n agent_id = parts[3] # /memory/private/{agent_id}/...\n if agent_id not in agent_paths:\n agent_paths[agent_id] = []\n agent_paths[agent_id].append(node.path)\n \n # Build registry\n for agent_id, paths in agent_paths.items():\n # Infer capabilities from path/content keywords\n capabilities = self._infer_capabilities(paths)\n \n self._agent_registry[agent_id] = AgentInfo(\n id=agent_id,\n capabilities=capabilities,\n memory_count=len(paths),\n )\n except Exception:\n pass # Registry building is best-effort\n \n def _infer_capabilities(self, paths: List[str]) -> List[str]:\n \"\"\"Infer agent capabilities from their memory paths\"\"\"\n capabilities = set()\n \n keywords = {\n \"market\": [\"market\", \"trading\", \"stock\", \"nvda\", \"btc\"],\n \"code\": [\"code\", \"programming\", \"bug\", \"feature\", \"test\"],\n \"research\": [\"paper\", \"research\", \"analysis\", \"study\"],\n \"personal\": [\"preference\", \"setting\", \"config\"],\n }\n \n for path in paths:\n path_lower = path.lower()\n for capability, kws in keywords.items():\n if any(kw in path_lower for kw in kws):\n capabilities.add(capability)\n \n return list(capabilities)\n \n def _get_owner(self, path: str) -> Optional[str]:\n \"\"\"Extract owner agent ID from path\"\"\"\n if path.startswith(\"/memory/private/\"):\n parts = path.split(\"/\")\n if len(parts) >= 4:\n return parts[3]\n return None\n \n def _extract_topic(self, path: str) -> str:\n \"\"\"Extract topic from path\"\"\"\n # /memory/private/agent/market/nvda.md → \"market/nvda\"\n parts = path.split(\"/\")\n if len(parts) >= 5:\n return \"/\".join(parts[4:]).replace(\".md\", \"\")\n return path.split(\"/\")[-1].replace(\".md\", \"\")\n \n def _can_access(self, requester: str, path: str) -> bool:\n \"\"\"Check if requester can access this path\"\"\"\n # Shared memory is accessible to all\n if path.startswith(\"/memory/shared/\"):\n return True\n \n # Private memory only to owner\n if path.startswith(\"/memory/private/\"):\n owner = self._get_owner(path)\n return owner == requester\n \n # Check config permissions\n if self.config:\n return self.config.check_permission(path, \"read\")\n \n return False\n \n def register_agent(self, agent_id: str, info: AgentInfo = None):\n \"\"\"Register an agent with capabilities\"\"\"\n if info is None:\n info = AgentInfo(id=agent_id)\n self._agent_registry[agent_id] = info\n \n def query(self, requester: str, question: str, \n limit: int = 20) -> LibrarianResponse:\n \"\"\"\n Query the Librarian for information.\n \n Args:\n requester: The agent making the request\n question: Natural language query\n limit: Max results\n \n Returns:\n LibrarianResponse with accessible content and/or suggestions\n \"\"\"\n response = LibrarianResponse(query=question, requester=requester)\n \n # 1. Global search (privileged - ignores permissions for discovery)\n all_matches = self._privileged_search(question, limit * 2)\n response.total_matches = len(all_matches)\n \n # 2. Separate by accessibility\n accessible = []\n inaccessible_by_owner: Dict[str, List[SearchMatch]] = {}\n \n for match in all_matches:\n if self._can_access(requester, match.path):\n match.accessible = True\n accessible.append(match)\n else:\n owner = match.owner or \"unknown\"\n if owner not in inaccessible_by_owner:\n inaccessible_by_owner[owner] = []\n inaccessible_by_owner[owner].append(match)\n \n # 3. Return accessible content\n for match in accessible[:limit]:\n node = self.store.get_node(match.path)\n if node:\n response.accessible.append(node)\n response.accessible_count = len(response.accessible)\n \n # 4. Generate collaboration suggestions\n if self.privacy.can_reveal_existence():\n for owner, matches in inaccessible_by_owner.items():\n if owner == requester:\n continue\n \n best_match = max(matches, key=lambda m: m.score)\n \n suggestion = CollaborationSuggestion(\n agent=owner if self.privacy.can_reveal_owner() else \"another agent\",\n topic=best_match.topic if self.privacy.can_reveal_topic() else \"related information\",\n relevance=best_match.score,\n reason=f\"Has {len(matches)} relevant memories\" if self.privacy.can_reveal_topic() else \"\",\n )\n response.suggestions.append(suggestion)\n \n # Sort suggestions by relevance\n response.suggestions.sort(key=lambda s: s.relevance, reverse=True)\n \n return response\n \n def _privileged_search(self, query: str, limit: int) -> List[SearchMatch]:\n \"\"\"\n Hybrid search: FTS + semantic (if embedding_store available)\n \n Combines results from both methods, deduplicates, and ranks by combined score.\n \"\"\"\n seen_paths: Set[str] = set()\n matches: List[SearchMatch] = []\n \n # 1. FTS search\n try:\n fts_results = self.store.search(query, limit=limit)\n for node, score in fts_results:\n if node.path in seen_paths:\n continue\n seen_paths.add(node.path)\n \n owner = self._get_owner(node.path)\n topic = self._extract_topic(node.path)\n content = node.content or \"\"\n snippet = content[:200] + \"...\" if len(content) > 200 else content\n \n matches.append(SearchMatch(\n path=node.path,\n score=score,\n owner=owner,\n topic=topic,\n snippet=snippet,\n ))\n except Exception:\n pass\n \n # 2. Semantic search (if available)\n if self.embedding_store:\n try:\n semantic_results = self.embedding_store.search(query, k=limit)\n # Only use high-confidence semantic matches\n min_similarity = 0.3 # Threshold for relevance\n \n for node, similarity in semantic_results:\n if similarity \u003c min_similarity:\n continue # Skip low-confidence matches\n \n if node.path in seen_paths:\n # Boost existing match score\n for m in matches:\n if m.path == node.path:\n # Combine FTS and semantic scores\n m.score = m.score + similarity * 5\n break\n continue\n seen_paths.add(node.path)\n \n owner = self._get_owner(node.path)\n topic = self._extract_topic(node.path)\n content = node.content or \"\"\n snippet = content[:200] + \"...\" if len(content) > 200 else content\n \n matches.append(SearchMatch(\n path=node.path,\n score=similarity * 5, # Scale to complement FTS scores\n owner=owner,\n topic=topic,\n snippet=snippet,\n ))\n except Exception:\n pass\n \n # Sort by score and return top results\n matches.sort(key=lambda m: m.score, reverse=True)\n return matches[:limit]\n \n def who_knows(self, topic: str, limit: int = 10) -> List[AgentInfo]:\n \"\"\"Find agents who might know about a topic\"\"\"\n matches = self._privileged_search(topic, limit * 3)\n \n # Collect unique owners\n owner_scores: Dict[str, float] = {}\n for match in matches:\n if match.owner:\n if match.owner not in owner_scores:\n owner_scores[match.owner] = 0\n owner_scores[match.owner] = max(owner_scores[match.owner], match.score)\n \n # Return agent infos sorted by relevance\n result = []\n for owner, score in sorted(owner_scores.items(), key=lambda x: x[1], reverse=True)[:limit]:\n info = self._agent_registry.get(owner, AgentInfo(id=owner))\n result.append(info)\n \n return result\n \n def agents(self) -> List[AgentInfo]:\n \"\"\"List all registered agents\"\"\"\n return list(self._agent_registry.values())\n \n def agent(self, agent_id: str) -> Optional[AgentInfo]:\n \"\"\"Get info about a specific agent\"\"\"\n return self._agent_registry.get(agent_id)\n \n def directory(self) -> Dict[str, Any]:\n \"\"\"Get full directory of agents and their capabilities\"\"\"\n agents_by_capability: Dict[str, List[str]] = {}\n \n for agent_id, info in self._agent_registry.items():\n for cap in info.capabilities:\n if cap not in agents_by_capability:\n agents_by_capability[cap] = []\n agents_by_capability[cap].append(agent_id)\n \n return {\n \"agents\": [a.to_dict() for a in self._agent_registry.values()],\n \"by_capability\": agents_by_capability,\n \"total_agents\": len(self._agent_registry),\n }\n\n\ndef get_librarian(store: AVMStore = None, config: AVMConfig = None) -> Librarian:\n \"\"\"Get or create a Librarian instance\"\"\"\n if store is None:\n from .core import AVM\n avm = AVM(config=config)\n store = avm.store\n \n return Librarian(store, config)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":15273,"content_sha256":"6ef5e80eca64a08aeaec83ba3fe3a4bb71dc4eb69ee8c726e882a48094af0665"},{"filename":"avm/mcp_server.py","content":"#!/usr/bin/env python3\n\"\"\"\nvfs/mcp_server.py - MCP Server for VFS\n\nExposes VFS functionality as MCP tools for AI agents.\n\nUsage:\n vfs-mcp --api-key $VFS_API_KEY\n vfs-mcp --config /path/to/config.yaml\n\nTools:\n - avm_recall: Retrieve relevant memories\n - avm_remember: Store new memory\n - avm_search: Full-text search\n - avm_list: List memories\n - avm_read: Read specific memory\n - avm_tags: Get tag cloud\n - avm_recent: Get recent memories\n\"\"\"\n\nimport os\nimport sys\nimport json\nimport argparse\nfrom typing import Any, Dict, List, Optional\nfrom datetime import datetime\n\n\n# MCP Protocol Implementation\nclass MCPServer:\n \"\"\"\n MCP (Model Context Protocol) Server\n \n Implements the MCP protocol for stdio communication.\n \"\"\"\n \n def __init__(self, vfs, user):\n self.vfs = vfs\n self.user = user\n self.memory = vfs.agent_memory(user.name)\n \n # Register tools\n self.tools = {\n \"avm_recall\": self._tool_recall,\n \"avm_remember\": self._tool_remember,\n \"avm_search\": self._tool_search,\n \"avm_list\": self._tool_list,\n \"avm_read\": self._tool_read,\n \"avm_tags\": self._tool_tags,\n \"avm_recent\": self._tool_recent,\n \"avm_stats\": self._tool_stats,\n # Two-pe retrieval\n \"avm_browse\": self._tool_browse,\n \"avm_fetch\": self._tool_fetch,\n }\n \n def get_tool_definitions(self) -> List[Dict]:\n \"\"\"Return tool definitions for MCP\"\"\"\n return [\n {\n \"name\": \"avm_recall\",\n \"description\": \"Search and retrieve relevant memories within a token budget. Returns a compact markdown summary of matching memories.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"query\": {\n \"type\": \"string\",\n \"description\": \"Search query to find relevant memories\"\n },\n \"max_tokens\": {\n \"type\": \"number\",\n \"description\": \"Maximum tokens in response (default: 4000)\",\n \"default\": 4000\n },\n \"time_range\": {\n \"type\": \"string\",\n \"description\": \"Time filter: last_1h, last_24h, last_7d, last_30d\",\n \"enum\": [\"last_1h\", \"last_24h\", \"last_7d\", \"last_30d\"]\n },\n \"strategy\": {\n \"type\": \"string\",\n \"description\": \"Scoring strategy: balanced, importance, recency, relevance\",\n \"enum\": [\"balanced\", \"importance\", \"recency\", \"relevance\"],\n \"default\": \"balanced\"\n }\n },\n \"required\": [\"query\"]\n }\n },\n {\n \"name\": \"avm_remember\",\n \"description\": \"Store a new memory. Automatically handles deduplication and linking.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\n \"type\": \"string\",\n \"description\": \"Memory content to store\"\n },\n \"title\": {\n \"type\": \"string\",\n \"description\": \"Optional title for the memory\"\n },\n \"importance\": {\n \"type\": \"number\",\n \"description\": \"Importance score 0-1 (default: 0.5)\",\n \"minimum\": 0,\n \"maximum\": 1,\n \"default\": 0.5\n },\n \"tags\": {\n \"type\": \"array\",\n \"items\": {\"type\": \"string\"},\n \"description\": \"Tags for categorization\"\n },\n \"namespace\": {\n \"type\": \"string\",\n \"description\": \"Shared namespace (e.g., 'market', 'projects')\"\n },\n \"derived_from\": {\n \"type\": \"array\",\n \"items\": {\"type\": \"string\"},\n \"description\": \"Source paths this memory is derived from\"\n }\n },\n \"required\": [\"content\"]\n }\n },\n {\n \"name\": \"avm_search\",\n \"description\": \"Full-text search across memories. Returns matching paths and snippets.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"query\": {\n \"type\": \"string\",\n \"description\": \"Search query\"\n },\n \"limit\": {\n \"type\": \"number\",\n \"description\": \"Maximum results (default: 10)\",\n \"default\": 10\n }\n },\n \"required\": [\"query\"]\n }\n },\n {\n \"name\": \"avm_list\",\n \"description\": \"List memories in a path prefix.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prefix\": {\n \"type\": \"string\",\n \"description\": \"Path prefix (default: user's private memory)\",\n \"default\": \"\"\n },\n \"limit\": {\n \"type\": \"number\",\n \"description\": \"Maximum results (default: 20)\",\n \"default\": 20\n }\n }\n }\n },\n {\n \"name\": \"avm_read\",\n \"description\": \"Read a specific memory by path.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\n \"type\": \"string\",\n \"description\": \"Full path to the memory\"\n }\n },\n \"required\": [\"path\"]\n }\n },\n {\n \"name\": \"avm_tags\",\n \"description\": \"Get tag cloud showing tag frequencies.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"limit\": {\n \"type\": \"number\",\n \"description\": \"Maximum tags to return (default: 20)\",\n \"default\": 20\n }\n }\n }\n },\n {\n \"name\": \"avm_recent\",\n \"description\": \"Get recent memories within a time range.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"time_range\": {\n \"type\": \"string\",\n \"description\": \"Time range: last_1h, last_24h, last_7d, last_30d\",\n \"enum\": [\"last_1h\", \"last_24h\", \"last_7d\", \"last_30d\"],\n \"default\": \"last_24h\"\n },\n \"limit\": {\n \"type\": \"number\",\n \"description\": \"Maximum results (default: 10)\",\n \"default\": 10\n }\n }\n }\n },\n {\n \"name\": \"avm_stats\",\n \"description\": \"Get memory statistics for the current user.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {}\n }\n },\n {\n \"name\": \"avm_browse\",\n \"description\": \"Browse memories - returns paths and short summaries only (not full content). Use this first to find relevant memories, then use avm_fetch to get full content of selected paths. Saves tokens for large result sets.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"query\": {\n \"type\": \"string\",\n \"description\": \"Search query\"\n },\n \"limit\": {\n \"type\": \"number\",\n \"description\": \"Maximum results (default: 10)\",\n \"default\": 10\n },\n \"summary_length\": {\n \"type\": \"number\",\n \"description\": \"Characters per summary (default: 80)\",\n \"default\": 80\n }\n },\n \"required\": [\"query\"]\n }\n },\n {\n \"name\": \"avm_fetch\",\n \"description\": \"Fetch full content of specific memory paths. Use after avm_browse to get complete content of selected memories.\",\n \"inputSchema\": {\n \"type\": \"object\",\n \"properties\": {\n \"paths\": {\n \"type\": \"array\",\n \"items\": {\"type\": \"string\"},\n \"description\": \"List of paths to fetch\"\n }\n },\n \"required\": [\"paths\"]\n }\n }\n ]\n \n # ─── Tool Implementations ─────────────────────────────\n \n def _tool_recall(self, params: Dict) -> str:\n \"\"\"Retrieve relevant memories\"\"\"\n query = params.get(\"query\", \"\")\n max_tokens = params.get(\"max_tokens\", 4000)\n time_range = params.get(\"time_range\")\n strategy = params.get(\"strategy\", \"balanced\")\n \n from .agent_memory import ScoringStrategy\n \n if time_range:\n return self.memory.recall_recent(\n query, \n time_range=time_range,\n max_tokens=max_tokens\n )\n else:\n return self.memory.recall(\n query,\n max_tokens=max_tokens,\n strategy=ScoringStrategy(strategy)\n )\n \n def _tool_remember(self, params: Dict) -> str:\n \"\"\"Store new memory\"\"\"\n content = params.get(\"content\", \"\")\n title = params.get(\"title\")\n importance = params.get(\"importance\", 0.5)\n tags = params.get(\"tags\", [])\n namespace = params.get(\"namespace\")\n derived_from = params.get(\"derived_from\")\n \n if derived_from:\n node = self.memory.remember_derived(\n content,\n derived_from=derived_from,\n title=title,\n importance=importance,\n tags=tags,\n namespace=namespace,\n )\n else:\n node = self.memory.remember(\n content,\n title=title,\n importance=importance,\n tags=tags,\n namespace=namespace,\n )\n \n return f\"Stored: {node.path}\"\n \n def _tool_search(self, params: Dict) -> str:\n \"\"\"Full-text search\"\"\"\n query = params.get(\"query\", \"\")\n limit = params.get(\"limit\", 10)\n \n results = self.vfs.search(query, limit=limit)\n \n lines = [f\"Found {len(results)} results for '{query}':\\n\"]\n for node, score in results:\n snippet = node.content[:100].replace(\"\\n\", \" \")\n lines.append(f\"- [{score:.2f}] {node.path}\")\n lines.append(f\" {snippet}...\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n \n def _tool_list(self, params: Dict) -> str:\n \"\"\"List memories\"\"\"\n prefix = params.get(\"prefix\") or self.memory.private_prefix\n limit = params.get(\"limit\", 20)\n \n nodes = self.vfs.list(prefix, limit=limit)\n \n lines = [f\"Memories in {prefix} ({len(nodes)} items):\\n\"]\n for node in nodes:\n tags = node.meta.get(\"tags\", [])\n tag_str = f\" [{', '.join(tags)}]\" if tags else \"\"\n lines.append(f\"- {node.path}{tag_str}\")\n \n return \"\\n\".join(lines)\n \n def _tool_read(self, params: Dict) -> str:\n \"\"\"Read specific memory\"\"\"\n path = params.get(\"path\", \"\")\n \n node = self.vfs.read(path)\n if not node:\n return f\"Not found: {path}\"\n \n return f\"# {path}\\n\\n{node.content}\"\n \n def _tool_tags(self, params: Dict) -> str:\n \"\"\"Get tag cloud\"\"\"\n limit = params.get(\"limit\", 20)\n \n cloud = self.memory.tag_cloud()\n \n lines = [\"Tag Cloud:\\n\"]\n for tag, count in list(cloud.items())[:limit]:\n lines.append(f\"- {tag}: {count}\")\n \n return \"\\n\".join(lines)\n \n def _tool_recent(self, params: Dict) -> str:\n \"\"\"Get recent memories\"\"\"\n time_range = params.get(\"time_range\", \"last_24h\")\n limit = params.get(\"limit\", 10)\n \n nodes = self.vfs.query_time(\n prefix=\"/memory\",\n time_range=time_range,\n limit=limit\n )\n \n lines = [f\"Recent memories ({time_range}):\\n\"]\n for node in nodes:\n created = node.meta.get(\"created_at\", \"\")[:19]\n lines.append(f\"- [{created}] {node.path}\")\n \n return \"\\n\".join(lines)\n \n def _tool_stats(self, params: Dict) -> str:\n \"\"\"Get statistics\"\"\"\n stats = self.memory.stats()\n \n lines = [\n \"Memory Statistics:\",\n f\"- Agent: {stats['agent_id']}\",\n f\"- Private memories: {stats['private_count']}\",\n f\"- Shared accessible: {stats['shared_accessible']}\",\n f\"- Max tokens: {stats['config']['max_tokens']}\",\n f\"- Strategy: {stats['config']['strategy']}\",\n ]\n \n return \"\\n\".join(lines)\n \n def _tool_browse(self, params: Dict) -> str:\n \"\"\"Browse memories - paths and summaries only\"\"\"\n query = params.get(\"query\", \"\")\n limit = params.get(\"limit\", 10)\n summary_length = params.get(\"summary_length\", 80)\n \n # Use retrieve for semantic + FTS search\n result = self.vfs.retrieve(query, k=limit, expand_graph=True)\n \n if not result.nodes:\n return f\"No memories found for: {query}\"\n \n lines = [\n f\"Found {len(result.nodes)} memories for \\\"{query}\\\":\",\n f\"(Use avm_fetch to get full content)\",\n \"\"\n ]\n \n for node in result.nodes:\n score = result.get_score(node.path)\n source = result.get_source(node.path)\n \n # Create short summary\n content = node.content.replace(\"\\n\", \" \").strip()\n # Skip markdown headers and metadata\n content = ' '.join([\n line for line in content.split()\n if not line.startswith('#') and not line.startswith('*')\n ])\n summary = content[:summary_length]\n if len(content) > summary_length:\n summary += \"...\"\n \n # Source badge\n badge = {\"semantic\": \"🎯\", \"fts\": \"📝\", \"graph\": \"🔗\"}.get(source, \"\")\n \n lines.append(f\"{badge} [{score:.2f}] {node.path}\")\n lines.append(f\" {summary}\")\n \n # Add tags if present\n tags = node.meta.get(\"tags\", [])\n if tags:\n lines.append(f\" Tags: {', '.join(tags)}\")\n \n lines.append(\"\")\n \n return \"\\n\".join(lines)\n \n def _tool_fetch(self, params: Dict) -> str:\n \"\"\"Fetch full content of specific paths\"\"\"\n paths = params.get(\"paths\", [])\n \n if not paths:\n return \"No paths specified\"\n \n contents = []\n not_found = []\n \n for path in paths:\n node = self.vfs.read(path)\n if node:\n contents.append(f\"## {path}\\n\\n{node.content}\")\n else:\n not_found.append(path)\n \n result = \"\\n\\n---\\n\\n\".join(contents)\n \n if not_found:\n result += f\"\\n\\n*Not found: {', '.join(not_found)}*\"\n \n return result\n \n # ─── MCP Protocol ─────────────────────────────────────\n \n def handle_request(self, request: Dict) -> Dict:\n \"\"\"Handle MCP request\"\"\"\n method = request.get(\"method\", \"\")\n params = request.get(\"params\", {})\n request_id = request.get(\"id\")\n \n try:\n if method == \"initialize\":\n return self._handle_initialize(request_id, params)\n elif method == \"tools/list\":\n return self._handle_tools_list(request_id)\n elif method == \"tools/call\":\n return self._handle_tools_call(request_id, params)\n else:\n return self._error_response(request_id, -32601, f\"Unknown method: {method}\")\n except Exception as e:\n return self._error_response(request_id, -32000, str(e))\n \n def _handle_initialize(self, request_id: Any, params: Dict) -> Dict:\n \"\"\"Handle initialize request\"\"\"\n return {\n \"jsonrpc\": \"2.0\",\n \"id\": request_id,\n \"result\": {\n \"protocolVersion\": \"2024-11-05\",\n \"serverInfo\": {\n \"name\": \"avm-memory\",\n \"version\": \"0.7.0\"\n },\n \"capabilities\": {\n \"tools\": {}\n }\n }\n }\n \n def _handle_tools_list(self, request_id: Any) -> Dict:\n \"\"\"Handle tools/list request\"\"\"\n return {\n \"jsonrpc\": \"2.0\",\n \"id\": request_id,\n \"result\": {\n \"tools\": self.get_tool_definitions()\n }\n }\n \n def _handle_tools_call(self, request_id: Any, params: Dict) -> Dict:\n \"\"\"Handle tools/call request\"\"\"\n tool_name = params.get(\"name\", \"\")\n tool_params = params.get(\"arguments\", {})\n \n if tool_name not in self.tools:\n return self._error_response(request_id, -32602, f\"Unknown tool: {tool_name}\")\n \n result = self.tools[tool_name](tool_params)\n \n return {\n \"jsonrpc\": \"2.0\",\n \"id\": request_id,\n \"result\": {\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": result\n }\n ]\n }\n }\n \n def _error_response(self, request_id: Any, code: int, message: str) -> Dict:\n \"\"\"Create error response\"\"\"\n return {\n \"jsonrpc\": \"2.0\",\n \"id\": request_id,\n \"error\": {\n \"code\": code,\n \"message\": message\n }\n }\n \n def run_stdio(self):\n \"\"\"Run MCP server on stdio\"\"\"\n import sys\n \n while True:\n try:\n line = sys.stdin.readline()\n if not line:\n break\n \n request = json.loads(line)\n response = self.handle_request(request)\n \n sys.stdout.write(json.dumps(response) + \"\\n\")\n sys.stdout.flush()\n \n except json.JSONDecodeError:\n continue\n except KeyboardInterrupt:\n break\n\n\ndef main():\n \"\"\"Main entry point\"\"\"\n parser = argparse.ArgumentParser(description=\"VFS MCP Server\")\n parser.add_argument(\"--api-key\", \"-k\", help=\"API key for authentication\")\n parser.add_argument(\"--config\", \"-c\", help=\"Config file path\")\n parser.add_argument(\"--db\", \"-d\", help=\"Database path\")\n parser.add_argument(\"--user\", \"-u\", default=\"default\", help=\"User name\")\n \n args = parser.parse_args()\n \n # Initialize VFS\n from . import VFS\n from .config import AVMConfig\n \n config = AVMConfig(db_path=args.db) if args.db else None\n vfs = VFS(config=config)\n \n # Initialize permissions and authenticate\n vfs.init_permissions()\n \n if args.api_key:\n user = vfs.authenticate(args.api_key)\n if not user:\n # Create user if not exists\n user = vfs.create_user(args.user)\n else:\n # Use default user\n user = vfs.create_user(args.user)\n \n # Run server\n server = MCPServer(vfs, user)\n server.run_stdio()\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":21622,"content_sha256":"0b359068b1032ebd538013e2c90ce562d2ae7468e0682a74f226d287a9712b2d"},{"filename":"avm/multi_agent.py","content":"\"\"\"\nvfs/multi_agent.py - Multi-Agent Support\n\nFeatures:\n- Agent config with roles, quotas, namespace permissions\n- Append-only versioning (no overwrites)\n- Version merging on recall\n- Write locks for concurrency\n- Audit logging\n\"\"\"\n\nimport threading\nimport hashlib\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import List, Dict, Any, Optional, Set\nfrom pathlib import Path\nfrom enum import Enum\nimport fnmatch\nimport json\n\nfrom .store import AVMStore\nfrom .node import AVMNode\nfrom .utils import utcnow\n\n\nclass AgentRole(Enum):\n ADMIN = \"admin\"\n MEMBER = \"member\"\n READONLY = \"readonly\"\n\n\n@dataclass\nclass AgentQuota:\n \"\"\"Agent quota limits\"\"\"\n max_nodes: int = 10000\n max_total_mb: float = 100.0\n \n @classmethod\n def from_dict(cls, data: Dict) -> \"AgentQuota\":\n return cls(\n max_nodes=data.get(\"max_nodes\", 10000),\n max_total_mb=data.get(\"max_total_mb\", 100.0),\n )\n\n\n@dataclass\nclass NamespacePermissions:\n \"\"\"Namespace read/write permissions\"\"\"\n read: List[str] = field(default_factory=lambda: [\"*\"])\n write: List[str] = field(default_factory=list)\n \n @classmethod\n def from_dict(cls, data: Dict) -> \"NamespacePermissions\":\n return cls(\n read=data.get(\"read\", [\"*\"]),\n write=data.get(\"write\", []),\n )\n \n def can_read(self, path: str) -> bool:\n return self._matches(path, self.read)\n \n def can_write(self, path: str) -> bool:\n return self._matches(path, self.write)\n \n def _matches(self, path: str, patterns: List[str]) -> bool:\n for pattern in patterns:\n if pattern == \"*\":\n return True\n if fnmatch.fnmatch(path, pattern):\n return True\n return False\n\n\n@dataclass\nclass AgentConfig:\n \"\"\"Agent configuration\"\"\"\n agent_id: str\n role: AgentRole = AgentRole.MEMBER\n quota: AgentQuota = field(default_factory=AgentQuota)\n namespaces: NamespacePermissions = field(default_factory=NamespacePermissions)\n inherit: Optional[str] = None\n \n @classmethod\n def from_dict(cls, agent_id: str, data: Dict, \n all_configs: Dict = None) -> \"AgentConfig\":\n # Handle inheritance\n if data.get(\"inherit\") and all_configs:\n parent_id = data[\"inherit\"]\n if parent_id in all_configs:\n parent = all_configs[parent_id]\n # Merge with parent\n data = {**parent, **data}\n \n return cls(\n agent_id=agent_id,\n role=AgentRole(data.get(\"role\", \"member\")),\n quota=AgentQuota.from_dict(data.get(\"quota\", {})),\n namespaces=NamespacePermissions.from_dict(data.get(\"namespaces\", {})),\n inherit=data.get(\"inherit\"),\n )\n\n\nclass AgentRegistry:\n \"\"\"\n Registry for agent configurations\n \"\"\"\n \n def __init__(self):\n self._configs: Dict[str, AgentConfig] = {}\n self._locks: Dict[str, threading.RLock] = {}\n self._default_lock = threading.RLock()\n \n def register(self, config: AgentConfig):\n \"\"\"Register an agent config\"\"\"\n self._configs[config.agent_id] = config\n self._locks[config.agent_id] = threading.RLock()\n \n def get(self, agent_id: str) -> AgentConfig:\n \"\"\"Get agent config, create default if not exists\"\"\"\n if agent_id not in self._configs:\n # Create default config\n self._configs[agent_id] = AgentConfig(\n agent_id=agent_id,\n namespaces=NamespacePermissions(\n read=[f\"/memory/private/{agent_id}/*\", \"/memory/shared/*\"],\n write=[f\"/memory/private/{agent_id}/*\"],\n )\n )\n self._locks[agent_id] = threading.RLock()\n \n return self._configs[agent_id]\n \n def get_lock(self, agent_id: str) -> threading.RLock:\n \"\"\"Get write lock for agent\"\"\"\n if agent_id not in self._locks:\n self._locks[agent_id] = threading.RLock()\n return self._locks[agent_id]\n \n def load_from_dict(self, data: Dict):\n \"\"\"Load configs from dict (parsed YAML)\"\"\"\n agents_data = data.get(\"agents\", {})\n \n # First pass: collect raw data\n raw_configs = {}\n for agent_id, config_data in agents_data.items():\n raw_configs[agent_id] = config_data\n \n # Second pass: resolve inheritance\n for agent_id, config_data in agents_data.items():\n config = AgentConfig.from_dict(agent_id, config_data, raw_configs)\n self.register(config)\n\n\nclass AuditLog:\n \"\"\"\n Audit log for tracking operations\n \"\"\"\n \n def __init__(self, store: AVMStore):\n self.store = store\n self._init_table()\n \n def _init_table(self):\n \"\"\"Initialize audit log table\"\"\"\n with self.store._conn() as conn:\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS audit_log (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n agent_id TEXT NOT NULL,\n operation TEXT NOT NULL,\n path TEXT NOT NULL,\n timestamp TEXT NOT NULL,\n details TEXT\n )\n \"\"\")\n conn.execute(\"\"\"\n CREATE INDEX IF NOT EXISTS idx_audit_agent \n ON audit_log(agent_id)\n \"\"\")\n conn.execute(\"\"\"\n CREATE INDEX IF NOT EXISTS idx_audit_path \n ON audit_log(path)\n \"\"\")\n \n def log(self, agent_id: str, operation: str, path: str, \n details: Dict = None):\n \"\"\"Log an operation\"\"\"\n with self.store._conn() as conn:\n conn.execute(\"\"\"\n INSERT INTO audit_log (agent_id, operation, path, timestamp, details)\n VALUES (?, ?, ?, ?, ?)\n \"\"\", (\n agent_id,\n operation,\n path,\n utcnow().isoformat(),\n json.dumps(details) if details else None,\n ))\n \n def query(self, agent_id: str = None, path_prefix: str = None,\n operation: str = None, limit: int = 100) -> List[Dict]:\n \"\"\"Query audit log\"\"\"\n sql = \"SELECT * FROM audit_log WHERE 1=1\"\n params = []\n \n if agent_id:\n sql += \" AND agent_id = ?\"\n params.append(agent_id)\n \n if path_prefix:\n sql += \" AND path LIKE ?\"\n params.append(path_prefix + \"%\")\n \n if operation:\n sql += \" AND operation = ?\"\n params.append(operation)\n \n sql += \" ORDER BY timestamp DESC LIMIT ?\"\n params.append(limit)\n \n with self.store._conn() as conn:\n rows = conn.execute(sql, params).fetchall()\n \n return [\n {\n \"id\": row[0],\n \"agent_id\": row[1],\n \"operation\": row[2],\n \"path\": row[3],\n \"timestamp\": row[4],\n \"details\": json.loads(row[5]) if row[5] else None,\n }\n for row in rows\n ]\n\n\nclass VersionedMemory:\n \"\"\"\n Append-only versioned memory system\n \n Instead of overwriting, creates new versions linked to base path.\n Recall merges all versions.\n \"\"\"\n \n def __init__(self, store: AVMStore):\n self.store = store\n \n def write_version(self, base_path: str, content: str, \n agent_id: str, meta: Dict = None) -> AVMNode:\n \"\"\"\n Write a new version of content\n \n If base_path exists, creates a versioned entry.\n Links new version to base path.\n \"\"\"\n timestamp = utcnow().strftime(\"%Y%m%d_%H%M%S\")\n \n # Check if base exists\n existing = self.store.get_node(base_path)\n \n if existing:\n # Create versioned path\n # /memory/shared/market/NVDA.md -> /memory/shared/market/NVDA.v20260305_164400.md\n base_name = base_path.rsplit(\".\", 1)[0] if \".\" in base_path else base_path\n ext = base_path.rsplit(\".\", 1)[1] if \".\" in base_path else \"md\"\n versioned_path = f\"{base_name}.v{timestamp}.{ext}\"\n else:\n versioned_path = base_path\n \n # Prepare metadata\n full_meta = meta or {}\n full_meta[\"author\"] = agent_id\n full_meta[\"created_at\"] = utcnow().isoformat()\n full_meta[\"base_path\"] = base_path\n \n # Write\n node = AVMNode(\n path=versioned_path,\n content=content,\n meta=full_meta,\n )\n self.store.put_node(node)\n \n # Link to base if versioned\n if versioned_path != base_path:\n from .graph import EdgeType\n self.store.add_edge(\n versioned_path,\n base_path,\n EdgeType.VERSION_OF,\n weight=1.0,\n )\n \n return node\n \n def get_versions(self, base_path: str) -> List[AVMNode]:\n \"\"\"Get all versions of a path\"\"\"\n versions = []\n \n # Get base node if exists\n base = self.store.get_node(base_path)\n if base:\n versions.append(base)\n \n # Get linked versions\n edges = self.store.get_links(base_path, direction=\"in\")\n for edge in edges:\n if edge.edge_type.value == \"version_of\":\n node = self.store.get_node(edge.source)\n if node:\n versions.append(node)\n \n # Sort by creation time\n versions.sort(key=lambda n: n.meta.get(\"created_at\", \"\"), reverse=True)\n \n return versions\n \n def merge_versions(self, versions: List[AVMNode], \n max_per_author: int = 3) -> str:\n \"\"\"\n Merge multiple versions into a single markdown document\n \n Groups by author, shows most recent entries.\n \"\"\"\n if not versions:\n return \"\"\n \n # Group by author\n by_author: Dict[str, List[AVMNode]] = {}\n for v in versions:\n author = v.meta.get(\"author\", \"unknown\")\n if author not in by_author:\n by_author[author] = []\n by_author[author].append(v)\n \n # Build merged content\n lines = []\n base_path = versions[0].meta.get(\"base_path\", versions[0].path)\n lines.append(f\"## {base_path}\")\n lines.append(\"\")\n \n for author, author_versions in by_author.items():\n # Take most recent N versions per author\n recent = author_versions[:max_per_author]\n \n for v in recent:\n created = v.meta.get(\"created_at\", \"\")\n if created:\n # Parse and format\n try:\n dt = datetime.fromisoformat(created)\n created = dt.strftime(\"%Y-%m-%d %H:%M\")\n except:\n pass\n \n lines.append(f\"### {author} ({created})\")\n lines.append(\"\")\n \n # Extract content (skip headers)\n content_lines = v.content.split(\"\\n\")\n for line in content_lines:\n if not line.startswith(\"#\") and line.strip():\n lines.append(line)\n \n lines.append(\"\")\n \n return \"\\n\".join(lines)\n\n\nclass QuotaEnforcer:\n \"\"\"\n Enforce agent quotas\n \"\"\"\n \n def __init__(self, store: AVMStore):\n self.store = store\n \n def check_quota(self, agent_id: str, quota: AgentQuota) -> Dict[str, Any]:\n \"\"\"\n Check if agent is within quota\n \n Returns: {\"ok\": bool, \"usage\": {...}, \"message\": str}\n \"\"\"\n # Count nodes owned by agent\n prefix = f\"/memory/private/{agent_id}\"\n nodes = self.store.list_nodes(prefix, limit=quota.max_nodes + 1)\n node_count = len(nodes)\n \n # Calculate total size\n total_bytes = sum(len(n.content.encode()) for n in nodes)\n total_mb = total_bytes / (1024 * 1024)\n \n ok = True\n message = \"OK\"\n \n if node_count >= quota.max_nodes:\n ok = False\n message = f\"Node limit exceeded: {node_count}/{quota.max_nodes}\"\n elif total_mb >= quota.max_total_mb:\n ok = False\n message = f\"Size limit exceeded: {total_mb:.2f}/{quota.max_total_mb} MB\"\n \n return {\n \"ok\": ok,\n \"usage\": {\n \"nodes\": node_count,\n \"max_nodes\": quota.max_nodes,\n \"size_mb\": round(total_mb, 2),\n \"max_size_mb\": quota.max_total_mb,\n },\n \"message\": message,\n }\n\n\n# Add VERSION_OF edge type\ndef _extend_edge_types():\n \"\"\"Extend EdgeType enum with VERSION_OF\"\"\"\n from .graph import EdgeType\n if not hasattr(EdgeType, 'VERSION_OF'):\n # Dynamically add new member (hacky but works)\n EdgeType._member_map_['VERSION_OF'] = 'version_of'\n EdgeType._value2member_map_['version_of'] = EdgeType.VERSION_OF\n\n# Call on import\ntry:\n _extend_edge_types()\nexcept:\n pass\n","content_type":"text/x-python; charset=utf-8","language":"python","size":13429,"content_sha256":"f1f68569bfb10e0b06488664892763650554e6ff2fb9a32d276b94034b5df423"},{"filename":"avm/node.py","content":"\"\"\"\nvfs/node.py - VFS node data structure\n\"\"\"\n\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import Optional, Dict, Any, List\nfrom enum import Enum\nimport hashlib\nimport json\n\nfrom .utils import utcnow\n\n\nclass NodeType(Enum):\n \"\"\"node type\"\"\"\n FILE = \"file\"\n DIRECTORY = \"dir\"\n LINK = \"link\" # Soft link\n\n\nclass Permission(Enum):\n \"\"\"permission\"\"\"\n READ_ONLY = \"ro\"\n READ_WRITE = \"rw\"\n\n\n@dataclass\nclass AVMNode:\n \"\"\"\n VFSnode\n \n eachnode:\n - path: Virtual path (e.g., /research/MSFT.md)\n - content: filecontent\n - meta: Metadata (TTL, source, update time, etc.)\n - node_type: File/directory/link\n \"\"\"\n path: str\n content: str = \"\"\n meta: Dict[str, Any] = field(default_factory=dict)\n node_type: NodeType = NodeType.FILE\n created_at: datetime = field(default_factory=utcnow)\n updated_at: datetime = field(default_factory=utcnow)\n version: int = 1\n \n # Permission determined by path prefix\n WRITABLE_PREFIXES = (\"/memory\", \"/trash\", \"/archive\", \"/shared\", \"/task\", \"/gossip\")\n READONLY_PREFIXES = (\"/research\", \"/live\", \"/links\")\n \n @property\n def is_writable(self) -> bool:\n \"\"\"checknodewhetherwritable\"\"\"\n for prefix in self.WRITABLE_PREFIXES:\n if self.path.startswith(prefix):\n return True\n return False\n \n @property\n def is_live(self) -> bool:\n \"\"\"checkwhetherlive datanode\"\"\"\n return self.path.startswith(\"/live\")\n \n @property\n def ttl_seconds(self) -> Optional[int]:\n \"\"\"getTTL(onlylivenode)\"\"\"\n return self.meta.get(\"ttl_seconds\") if self.is_live else None\n \n @property\n def is_expired(self) -> bool:\n \"\"\"checklivenodewhetherexpired\"\"\"\n if not self.is_live:\n return False\n ttl = self.ttl_seconds\n if ttl is None:\n return False\n age = (utcnow() - self.updated_at).total_seconds()\n return age > ttl\n \n @property\n def content_h(self) -> str:\n \"\"\"Content hash (for diff detection)\"\"\"\n return hashlib.sha256(self.content.encode()).hexdigest()[:16]\n \n def to_dict(self) -> Dict[str, Any]:\n \"\"\"To dict\"\"\"\n return {\n \"path\": self.path,\n \"content\": self.content,\n \"meta\": self.meta,\n \"node_type\": self.node_type.value,\n \"created_at\": self.created_at.isoformat(),\n \"updated_at\": self.updated_at.isoformat(),\n \"version\": self.version,\n }\n \n @classmethod\n def from_dict(cls, data: Dict[str, Any]) -> \"AVMNode\":\n \"\"\"fromdictcreate\"\"\"\n return cls(\n path=data[\"path\"],\n content=data.get(\"content\", \"\"),\n meta=data.get(\"meta\", {}),\n node_type=NodeType(data.get(\"node_type\", \"file\")),\n created_at=datetime.fromisoformat(data[\"created_at\"]) if \"created_at\" in data else utcnow(),\n updated_at=datetime.fromisoformat(data[\"updated_at\"]) if \"updated_at\" in data else utcnow(),\n version=data.get(\"version\", 1),\n )\n \n def __repr__(self) -> str:\n return f\"AVMNode({self.path}, v{self.version}, {len(self.content)} bytes)\"\n\n\n@dataclass\nclass NodeDiff:\n \"\"\"\n nodechangerecord\n \"\"\"\n node_path: str\n version: int\n old_h: Optional[str]\n new_h: str\n diff_content: str # Unified diff or complete new content\n changed_at: datetime = field(default_factory=utcnow)\n change_type: str = \"update\" # create/update/delete\n \n def to_dict(self) -> Dict[str, Any]:\n return {\n \"node_path\": self.node_path,\n \"version\": self.version,\n \"old_h\": self.old_h,\n \"new_h\": self.new_h,\n \"diff_content\": self.diff_content,\n \"changed_at\": self.changed_at.isoformat(),\n \"change_type\": self.change_type,\n }\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3932,"content_sha256":"586a9786de181c09712a4a9e86d5c843bf7c685644de013db5331d89f72142f1"},{"filename":"avm/permissions.py","content":"\"\"\"\nvfs/permissions.py - Linux-Style Permission System\n\nFeatures:\n- Unix-like rwx permission bits\n- Owner/group/other model\n- Capabilities for fine-grained control\n- API key authentication for skills\n- Sudo support for temporary elevation\n\"\"\"\n\nimport os\nimport stat\nimport hashlib\nimport secrets\nfrom dataclasses import dataclass, field\nfrom datetime import datetime, timedelta\nfrom typing import List, Dict, Any, Optional, Set\nfrom enum import Enum, Flag, auto\n\nfrom .utils import utcnow\n\n\n# ═══════════════════════════════════════════════════════════════\n# Permission Bits\n# ═══════════════════════════════════════════════════════════════\n\nclass PermBits(Flag):\n \"\"\"Unix-style permission bits\"\"\"\n NONE = 0\n X = auto() # Execute (for directories: access)\n W = auto() # Write\n R = auto() # Read\n \n # Convenience combinations\n RW = R | W\n RX = R | X\n RWX = R | W | X\n\n\ndef parse_mode(mode: int) -> Dict[str, PermBits]:\n \"\"\"Parse octal mode to owner/group/other permissions\"\"\"\n return {\n \"owner\": PermBits((mode >> 6) & 0o7),\n \"group\": PermBits((mode >> 3) & 0o7),\n \"other\": PermBits(mode & 0o7),\n }\n\n\ndef mode_to_string(mode: int) -> str:\n \"\"\"Convert mode to rwxrwxrwx string\"\"\"\n chars = \"\"\n for shift in [6, 3, 0]:\n bits = (mode >> shift) & 0o7\n chars += \"r\" if bits & 0o4 else \"-\"\n chars += \"w\" if bits & 0o2 else \"-\"\n chars += \"x\" if bits & 0o1 else \"-\"\n return chars\n\n\ndef string_to_mode(s: str) -> int:\n \"\"\"Convert rwxrwxrwx string to mode\"\"\"\n if len(s) != 9:\n raise ValueError(f\"Invalid mode string: {s}\")\n \n mode = 0\n for i, c in enumerate(s):\n if c not in \"-rwx\":\n raise ValueError(f\"Invalid character in mode: {c}\")\n \n shift = 8 - i\n if c == \"r\":\n mode |= 0o4 \u003c\u003c (shift // 3 * 3)\n elif c == \"w\":\n mode |= 0o2 \u003c\u003c (shift // 3 * 3)\n elif c == \"x\":\n mode |= 0o1 \u003c\u003c (shift // 3 * 3)\n \n return mode\n\n\n# ═══════════════════════════════════════════════════════════════\n# Capabilities\n# ═══════════════════════════════════════════════════════════════\n\nclass Capability(Enum):\n \"\"\"System capabilities for fine-grained access control\"\"\"\n \n # Admin capabilities\n CAP_ADMIN = \"admin\" # Full system access\n CAP_MANAGE_USERS = \"manage_users\" # Create/delete users\n \n # Search capabilities\n CAP_SEARCH_ALL = \"search_all\" # Search any path\n CAP_SEARCH_OWN = \"search_own\" # Search only own paths\n \n # Write capabilities\n CAP_WRITE = \"write\" # Write to allowed paths\n CAP_DELETE = \"delete\" # Delete files\n CAP_SHARE = \"share\" # Share with others\n \n # Special capabilities\n CAP_SUDO = \"sudo\" # Temporary privilege elevation\n CAP_AUDIT = \"audit\" # View audit logs\n CAP_EXPORT = \"export\" # Export data\n\n\n# ═══════════════════════════════════════════════════════════════\n# User & Group\n# ═══════════════════════════════════════════════════════════════\n\n@dataclass\nclass User:\n \"\"\"User account\"\"\"\n name: str\n uid: int\n groups: List[str] = field(default_factory=list)\n capabilities: List[Capability] = field(default_factory=list)\n home: str = \"\"\n api_key: str = \"\"\n created_at: datetime = field(default_factory=utcnow)\n \n def __post_init__(self):\n if not self.home:\n self.home = f\"/memory/private/{self.name}\"\n \n @property\n def is_root(self) -> bool:\n return self.uid == 0\n \n def _capability(self, cap: Capability) -> bool:\n if self.is_root:\n return True\n return cap in self.capabilities or Capability.CAP_ADMIN in self.capabilities\n \n def in_group(self, group: str) -> bool:\n if self.is_root:\n return True\n return group in self.groups or \"*\" in self.groups\n \n def to_dict(self) -> Dict:\n return {\n \"name\": self.name,\n \"uid\": self.uid,\n \"groups\": self.groups,\n \"capabilities\": [c.value for c in self.capabilities],\n \"home\": self.home,\n }\n\n\n@dataclass\nclass Group:\n \"\"\"User group\"\"\"\n name: str\n gid: int\n members: List[str] = field(default_factory=list)\n \n def to_dict(self) -> Dict:\n return {\n \"name\": self.name,\n \"gid\": self.gid,\n \"members\": self.members,\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# Node Ownership\n# ═══════════════════════════════════════════════════════════════\n\n@dataclass\nclass NodeOwnership:\n \"\"\"Ownership metadata for a node\"\"\"\n owner: str = \"root\"\n group: str = \"root\"\n mode: int = 0o644 # rw-r--r--\n \n @classmethod\n def from_meta(cls, meta: Dict) -> \"NodeOwnership\":\n return cls(\n owner=meta.get(\"owner\", \"root\"),\n group=meta.get(\"group\", \"root\"),\n mode=meta.get(\"mode\", 0o644),\n )\n \n def to_meta(self) -> Dict:\n return {\n \"owner\": self.owner,\n \"group\": self.group,\n \"mode\": self.mode,\n }\n \n def mode_string(self) -> str:\n return mode_to_string(self.mode)\n \n def can_read(self, user: User) -> bool:\n \"\"\"Check if user can read\"\"\"\n if user.is_root:\n return True\n \n perms = parse_mode(self.mode)\n \n if self.owner == user.name:\n return bool(perms[\"owner\"] & PermBits.R)\n \n if user.in_group(self.group):\n return bool(perms[\"group\"] & PermBits.R)\n \n return bool(perms[\"other\"] & PermBits.R)\n \n def can_write(self, user: User) -> bool:\n \"\"\"Check if user can write\"\"\"\n if user.is_root:\n return True\n \n perms = parse_mode(self.mode)\n \n if self.owner == user.name:\n return bool(perms[\"owner\"] & PermBits.W)\n \n if user.in_group(self.group):\n return bool(perms[\"group\"] & PermBits.W)\n \n return bool(perms[\"other\"] & PermBits.W)\n \n def can_execute(self, user: User) -> bool:\n \"\"\"Check if user can execute (access directory)\"\"\"\n if user.is_root:\n return True\n \n perms = parse_mode(self.mode)\n \n if self.owner == user.name:\n return bool(perms[\"owner\"] & PermBits.X)\n \n if user.in_group(self.group):\n return bool(perms[\"group\"] & PermBits.X)\n \n return bool(perms[\"other\"] & PermBits.X)\n\n\n# ═══════════════════════════════════════════════════════════════\n# User Registry\n# ═══════════════════════════════════════════════════════════════\n\nclass UserRegistry:\n \"\"\"User and group management\"\"\"\n \n def __init__(self):\n self._users: Dict[str, User] = {}\n self._groups: Dict[str, Group] = {}\n self._api_keys: Dict[str, str] = {} # api_key -> username\n self._next_uid = 1000\n self._next_gid = 1000\n \n # Create root user\n self._create_root()\n \n def _create_root(self):\n \"\"\"Create root user\"\"\"\n root = User(\n name=\"root\",\n uid=0,\n groups=[\"root\", \"*\"],\n capabilities=list(Capability),\n )\n self._users[\"root\"] = root\n self._groups[\"root\"] = Group(name=\"root\", gid=0, members=[\"root\"])\n \n def create_user(self, name: str, \n groups: List[str] = None,\n capabilities: List[Capability] = None,\n generate_api_key: bool = True) -> User:\n \"\"\"Create a new user\"\"\"\n if name in self._users:\n raise ValueError(f\"User already exists: {name}\")\n \n uid = self._next_uid\n self._next_uid += 1\n \n user = User(\n name=name,\n uid=uid,\n groups=groups or [],\n capabilities=capabilities or [Capability.CAP_SEARCH_OWN, Capability.CAP_WRITE],\n )\n \n if generate_api_key:\n user.api_key = self._generate_api_key()\n self._api_keys[user.api_key] = name\n \n self._users[name] = user\n \n # Add to groups\n for group in user.groups:\n if group not in self._groups:\n self.create_group(group)\n self._groups[group].members.append(name)\n \n return user\n \n def create_group(self, name: str) -> Group:\n \"\"\"Create a new group\"\"\"\n if name in self._groups:\n return self._groups[name]\n \n gid = self._next_gid\n self._next_gid += 1\n \n group = Group(name=name, gid=gid)\n self._groups[name] = group\n \n return group\n \n def get_user(self, name: str) -> Optional[User]:\n \"\"\"Get user by name\"\"\"\n return self._users.get(name)\n \n def get_user_by_api_key(self, api_key: str) -> Optional[User]:\n \"\"\"Get user by API key\"\"\"\n username = self._api_keys.get(api_key)\n if username:\n return self._users.get(username)\n return None\n \n def authenticate(self, api_key: str) -> Optional[User]:\n \"\"\"Authenticate by API key\"\"\"\n return self.get_user_by_api_key(api_key)\n \n def _generate_api_key(self) -> str:\n \"\"\"Generate a secure API key\"\"\"\n return f\"vfs_{secrets.token_urlsafe(32)}\"\n \n def list_users(self) -> List[User]:\n \"\"\"List all users\"\"\"\n return list(self._users.values())\n \n def list_groups(self) -> List[Group]:\n \"\"\"List all groups\"\"\"\n return list(self._groups.values())\n \n def delete_user(self, name: str) -> bool:\n \"\"\"Delete a user\"\"\"\n if name == \"root\":\n raise ValueError(\"Cannot delete root user\")\n \n user = self._users.pop(name, None)\n if user:\n if user.api_key:\n self._api_keys.pop(user.api_key, None)\n for group in self._groups.values():\n if name in group.members:\n group.members.remove(name)\n return True\n return False\n \n def load_from_dict(self, data: Dict):\n \"\"\"Load users and groups from dict\"\"\"\n for name, user_data in data.get(\"users\", {}).items():\n if name == \"root\":\n # Update root capabilities\n root = self._users[\"root\"]\n if \"capabilities\" in user_data:\n root.capabilities = [\n Capability(c) for c in user_data[\"capabilities\"]\n ]\n continue\n \n caps = [Capability(c) for c in user_data.get(\"capabilities\", [])]\n self.create_user(\n name=name,\n groups=user_data.get(\"groups\", []),\n capabilities=caps,\n generate_api_key=user_data.get(\"generate_api_key\", True),\n )\n \n for name, group_data in data.get(\"groups\", {}).items():\n group = self.create_group(name)\n group.members = group_data.get(\"members\", [])\n\n\n# ═══════════════════════════════════════════════════════════════\n# Permission Manager\n# ═══════════════════════════════════════════════════════════════\n\nclass PermissionManager:\n \"\"\"\n Central permission management\n \n Combines Unix-style permissions with capabilities.\n \"\"\"\n \n def __init__(self, user_registry: UserRegistry = None):\n self.registry = user_registry or UserRegistry()\n self._sudo_sessions: Dict[str, datetime] = {} # user -> expiry\n \n def check_read(self, user: User, ownership: NodeOwnership) -> bool:\n \"\"\"Check read permission\"\"\"\n return ownership.can_read(user)\n \n def check_write(self, user: User, ownership: NodeOwnership) -> bool:\n \"\"\"Check write permission\"\"\"\n if not user._capability(Capability.CAP_WRITE):\n return False\n return ownership.can_write(user)\n \n def check_delete(self, user: User, ownership: NodeOwnership) -> bool:\n \"\"\"Check delete permission\"\"\"\n if not user._capability(Capability.CAP_DELETE):\n return False\n return ownership.can_write(user)\n \n def check_search(self, user: User, path: str) -> bool:\n \"\"\"Check if user can search this path\"\"\"\n if user._capability(Capability.CAP_SEARCH_ALL):\n return True\n \n if user._capability(Capability.CAP_SEARCH_OWN):\n # Can only search own home and shared\n return (path.startswith(user.home) or \n path.startswith(\"/memory/shared\"))\n \n return False\n \n def sudo(self, user: User, duration_minutes: int = 5) -> bool:\n \"\"\"Elevate privileges temporarily\"\"\"\n if not user._capability(Capability.CAP_SUDO):\n return False\n \n expiry = utcnow() + timedelta(minutes=duration_minutes)\n self._sudo_sessions[user.name] = expiry\n return True\n \n def is_sudo(self, user: User) -> bool:\n \"\"\"Check if user active sudo session\"\"\"\n expiry = self._sudo_sessions.get(user.name)\n if expiry and expiry > utcnow():\n return True\n return False\n \n def get_effective_user(self, user: User) -> User:\n \"\"\"Get effective user (root if sudo active)\"\"\"\n if self.is_sudo(user):\n return self.registry.get_user(\"root\")\n return user\n \n def chown(self, ownership: NodeOwnership, \n new_owner: str = None, new_group: str = None,\n user: User = None) -> bool:\n \"\"\"Change ownership (requires root or owner)\"\"\"\n if user and not user.is_root and ownership.owner != user.name:\n return False\n \n if new_owner:\n ownership.owner = new_owner\n if new_group:\n ownership.group = new_group\n \n return True\n \n def chmod(self, ownership: NodeOwnership,\n mode: int, user: User = None) -> bool:\n \"\"\"Change mode (requires root or owner)\"\"\"\n if user and not user.is_root and ownership.owner != user.name:\n return False\n \n ownership.mode = mode\n return True\n \n def get_default_ownership(self, user: User) -> NodeOwnership:\n \"\"\"Get default ownership for new files\"\"\"\n return NodeOwnership(\n owner=user.name,\n group=user.groups[0] if user.groups else \"users\",\n mode=0o644, # rw-r--r--\n )\n\n\n# ═══════════════════════════════════════════════════════════════\n# API Key Authentication (for Skills)\n# ═══════════════════════════════════════════════════════════════\n\n@dataclass\nclass APIKeyScope:\n \"\"\"Scope limitations for API key\"\"\"\n paths: List[str] = field(default_factory=lambda: [\"*\"])\n actions: List[str] = field(default_factory=lambda: [\"read\"])\n rate_limit: int = 1000 # requests per hour\n expires_at: Optional[datetime] = None\n\n\nclass APIKeyManager:\n \"\"\"\n Manage API keys for skill authentication\n \"\"\"\n \n def __init__(self, user_registry: UserRegistry):\n self.registry = user_registry\n self._scopes: Dict[str, APIKeyScope] = {} # api_key -> scope\n \n def create_key(self, user: User, \n scope: APIKeyScope = None,\n expires_days: int = None) -> str:\n \"\"\"Create a scoped API key\"\"\"\n key = f\"vfs_{secrets.token_urlsafe(32)}\"\n \n if scope is None:\n scope = APIKeyScope()\n \n if expires_days:\n scope.expires_at = utcnow() + timedelta(days=expires_days)\n \n self._scopes[key] = scope\n self.registry._api_keys[key] = user.name\n \n return key\n \n def validate_key(self, key: str, path: str = None, \n action: str = None) -> Optional[User]:\n \"\"\"Validate API key and check scope\"\"\"\n user = self.registry.authenticate(key)\n if not user:\n return None\n \n scope = self._scopes.get(key)\n if scope:\n # Check expiry\n if scope.expires_at and scope.expires_at \u003c utcnow():\n return None\n \n # Check path\n if path and scope.paths != [\"*\"]:\n if not any(path.startswith(p.rstrip(\"*\")) for p in scope.paths):\n return None\n \n # Check action\n if action and action not in scope.actions:\n return None\n \n return user\n \n def revoke_key(self, key: str) -> bool:\n \"\"\"Revoke an API key\"\"\"\n if key in self._scopes:\n del self._scopes[key]\n if key in self.registry._api_keys:\n del self.registry._api_keys[key]\n return True\n return False\n","content_type":"text/x-python; charset=utf-8","language":"python","size":18651,"content_sha256":"f81688014097fbe0a5821e76f63c159b4a1cf7f2a41ecc7e928e7a157fbab928"},{"filename":"avm/providers/__init__.py","content":"\"\"\"\nVFS Providers\n\"\"\"\n\nfrom .base import AVMProvider, LiveProvider, StaticProvider\nfrom .alpaca import AlpacaPositionsProvider, AlpacaOrdersProvider\nfrom .indicators import TechnicalIndicatorsProvider\nfrom .memory import MemoryProvider\nfrom .news import NewsProvider\nfrom .watchlist import WatchlistProvider\nfrom .http_json import HttpJsonProvider\n\n__all__ = [\n \"AVMProvider\",\n \"LiveProvider\",\n \"StaticProvider\",\n \"AlpacaPositionsProvider\",\n \"AlpacaOrdersProvider\",\n \"TechnicalIndicatorsProvider\",\n \"MemoryProvider\",\n \"NewsProvider\",\n \"WatchlistProvider\",\n \"HttpJsonProvider\",\n]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":609,"content_sha256":"7d97668c4b09c4d699749ee39dfde5c700391cc00e94f5e6379d9d61741695d7"},{"filename":"avm/providers/alpaca.py","content":"\"\"\"\nvfs/providers/alpaca.py - Alpaca Trading API Provider\n\"\"\"\n\nimport json\nfrom datetime import datetime\nfrom typing import Optional, Any\n\nfrom .base import LiveProvider\nfrom ..node import AVMNode\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass AlpacaPositionsProvider(LiveProvider):\n \"\"\"\n Alpaca positions data\n \n path:\n /live/positions.md - Positions overview\n /live/positions/account.md - accountinfo\n /live/positions/AAPL.md - singlepositions\n \"\"\"\n \n def __init__(self, store: AVMStore, \n api_key: str, secret_key: str,\n base_url: str = \"https://paper-api.alpaca.markets\",\n ttl_seconds: int = 60):\n super().__init__(store, \"/live/positions\", ttl_seconds)\n self.api_key = api_key\n self.secret_key = secret_key\n self.base_url = base_url\n \n def _api_request(self, endpoint: str) -> Any:\n import urllib.request\n \n req = urllib.request.Request(\n f\"{self.base_url}{endpoint}\",\n headers={\n \"APCA-API-KEY-ID\": self.api_key,\n \"APCA-API-SECRET-KEY\": self.secret_key,\n }\n )\n \n with urllib.request.urlopen(req, timeout=10) as r:\n return json.loads(r.read())\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n try:\n if path == \"/live/positions.md\":\n return self._fetch_positions()\n elif path == \"/live/positions/account.md\":\n return self._fetch_account()\n elif path.startswith(\"/live/positions/\"):\n symbol = path.split(\"/\")[-1].replace(\".md\", \"\")\n return self._fetch_position(symbol)\n except Exception as e:\n return self._make_node(\n path,\n f\"# Error\\n\\nFailed to fetch: {e}\",\n {\"error\": str(e)}\n )\n return None\n \n def _fetch_positions(self) -> AVMNode:\n positions = self._api_request(\"/v2/positions\")\n account = self._api_request(\"/v2/account\")\n \n lines = [\n \"# Portfolio Positions\",\n \"\",\n f\"**Equity:** ${float(account.get('equity', 0)):,.2f}\",\n f\"**Cash:** ${float(account.get('cash', 0)):,.2f}\",\n f\"**Buying Power:** ${float(account.get('buying_power', 0)):,.2f}\",\n \"\",\n \"## Positions\",\n \"\",\n \"| Symbol | Qty | Avg Cost | Current | P/L | P/L % |\",\n \"|--------|-----|----------|---------|-----|-------|\",\n ]\n \n total_pl = 0\n for pos in positions:\n symbol = pos[\"symbol\"]\n qty = int(pos[\"qty\"])\n avg_cost = float(pos[\"avg_entry_price\"])\n current = float(pos[\"current_price\"])\n pl = float(pos[\"unrealized_pl\"])\n pl_pct = float(pos[\"unrealized_plpc\"]) * 100\n total_pl += pl\n \n lines.append(\n f\"| {symbol} | {qty} | ${avg_cost:.2f} | ${current:.2f} | \"\n f\"${pl:+,.2f} | {pl_pct:+.2f}% |\"\n )\n \n lines.extend([\n \"\",\n f\"**Total Unrealized P/L:** ${total_pl:+,.2f}\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n ])\n \n return self._make_node(\n \"/live/positions.md\",\n \"\\n\".join(lines),\n {\"position_count\": len(positions), \"total_pl\": total_pl}\n )\n \n def _fetch_account(self) -> AVMNode:\n account = self._api_request(\"/v2/account\")\n \n lines = [\n \"# Account Summary\",\n \"\",\n f\"- **Account ID:** {account.get('id', 'N/A')}\",\n f\"- **Status:** {account.get('status', 'N/A')}\",\n f\"- **Equity:** ${float(account.get('equity', 0)):,.2f}\",\n f\"- **Cash:** ${float(account.get('cash', 0)):,.2f}\",\n f\"- **Buying Power:** ${float(account.get('buying_power', 0)):,.2f}\",\n f\"- **Portfolio Value:** ${float(account.get('portfolio_value', 0)):,.2f}\",\n f\"- **Day Trade Count:** {account.get('daytrade_count', 0)}\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n ]\n \n return self._make_node(\n \"/live/positions/account.md\",\n \"\\n\".join(lines),\n {\"account_id\": account.get(\"id\")}\n )\n \n def _fetch_position(self, symbol: str) -> AVMNode:\n try:\n pos = self._api_request(f\"/v2/positions/{symbol}\")\n except Exception:\n return self._make_node(\n f\"/live/positions/{symbol}.md\",\n f\"# {symbol}\\n\\nNo position found.\",\n {\"symbol\": symbol, \"_position\": False}\n )\n \n lines = [\n f\"# {symbol} Position\",\n \"\",\n f\"- **Quantity:** {pos['qty']}\",\n f\"- **Avg Entry Price:** ${float(pos['avg_entry_price']):.2f}\",\n f\"- **Current Price:** ${float(pos['current_price']):.2f}\",\n f\"- **Market Value:** ${float(pos['market_value']):,.2f}\",\n f\"- **Unrealized P/L:** ${float(pos['unrealized_pl']):+,.2f}\",\n f\"- **Unrealized P/L %:** {float(pos['unrealized_plpc'])*100:+.2f}%\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n ]\n \n return self._make_node(\n f\"/live/positions/{symbol}.md\",\n \"\\n\".join(lines),\n {\n \"symbol\": symbol,\n \"_position\": True,\n \"qty\": int(pos[\"qty\"]),\n \"market_value\": float(pos[\"market_value\"]),\n }\n )\n\n\nclass AlpacaOrdersProvider(LiveProvider):\n \"\"\"\n Alpaca orders data\n \n path:\n /live/orders.md - allorders\n /live/orders/open.md - notfilledorders\n /live/orders/filled.md - alreadyfilledorders\n \"\"\"\n \n def __init__(self, store: AVMStore,\n api_key: str, secret_key: str,\n base_url: str = \"https://paper-api.alpaca.markets\",\n ttl_seconds: int = 30):\n super().__init__(store, \"/live/orders\", ttl_seconds)\n self.api_key = api_key\n self.secret_key = secret_key\n self.base_url = base_url\n \n def _api_request(self, endpoint: str) -> Any:\n import urllib.request\n \n req = urllib.request.Request(\n f\"{self.base_url}{endpoint}\",\n headers={\n \"APCA-API-KEY-ID\": self.api_key,\n \"APCA-API-SECRET-KEY\": self.secret_key,\n }\n )\n \n with urllib.request.urlopen(req, timeout=10) as r:\n return json.loads(r.read())\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n try:\n if path == \"/live/orders.md\":\n return self._fetch_orders(\"all\")\n elif path == \"/live/orders/open.md\":\n return self._fetch_orders(\"open\")\n elif path == \"/live/orders/filled.md\":\n return self._fetch_orders(\"filled\")\n except Exception as e:\n return self._make_node(path, f\"# Error\\n\\n{e}\", {\"error\": str(e)})\n return None\n \n def _fetch_orders(self, status: str) -> AVMNode:\n endpoint = f\"/v2/orders?status={status}&limit=50\"\n orders = self._api_request(endpoint)\n \n lines = [\n f\"# Orders ({status.title()})\",\n \"\",\n \"| Symbol | Side | Qty | Type | Status | Created |\",\n \"|--------|------|-----|------|--------|---------|\",\n ]\n \n for o in orders:\n created = o.get(\"created_at\", \"\")[:10]\n lines.append(\n f\"| {o['symbol']} | {o['side']} | {o['qty']} | \"\n f\"{o['type']} | {o['status']} | {created} |\"\n )\n \n lines.extend([\n \"\",\n f\"**Total:** {len(orders)} orders\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n ])\n \n path = \"/live/orders.md\" if status == \"all\" else f\"/live/orders/{status}.md\"\n return self._make_node(path, \"\\n\".join(lines), {\"order_count\": len(orders)})\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8327,"content_sha256":"591620bc571bb7078ff98ffd5a0e1b4d4d08049a44c271549cca09d47c04be95"},{"filename":"avm/providers/base.py","content":"\"\"\"\nvfs/providers/base.py - Provider base class\n\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom datetime import datetime\nfrom typing import Dict, List, Optional, Any\n\nfrom ..node import AVMNode, NodeType\nfrom ..store import AVMStore\n\n\nclass AVMProvider(ABC):\n \"\"\"\n Data provider base class\n \"\"\"\n \n def __init__(self, store: AVMStore, prefix: str):\n self.store = store\n self.prefix = prefix\n \n @abstractmethod\n def fetch(self, path: str) -> Optional[AVMNode]:\n \"\"\"Fetch data from source\"\"\"\n pass\n \n def get(self, path: str, force_refresh: bool = False) -> Optional[AVMNode]:\n \"\"\"Get node (with cache)\"\"\"\n if not path.startswith(self.prefix):\n return None\n \n cached = self.store.get_node(path)\n \n if cached and not force_refresh:\n if not cached.is_expired:\n return cached\n \n node = self.fetch(path)\n if node:\n self.store._put_node_internal(node, save_diff=True)\n \n return node\n \n def refresh_all(self) -> int:\n \"\"\"Refresh all nodes\"\"\"\n count = 0\n for node in self.store.list_nodes(self.prefix):\n refreshed = self.get(node.path, force_refresh=True)\n if refreshed:\n count += 1\n return count\n\n\nclass LiveProvider(AVMProvider):\n \"\"\"Live data provider (with TTL)\"\"\"\n \n def __init__(self, store: AVMStore, prefix: str, ttl_seconds: int = 300):\n super().__init__(store, prefix)\n self.ttl_seconds = ttl_seconds\n \n def _make_node(self, path: str, content: str, \n meta: Dict = None) -> AVMNode:\n node_meta = meta or {}\n node_meta[\"ttl_seconds\"] = self.ttl_seconds\n node_meta[\"provider\"] = self.__class__.__name__\n \n return AVMNode(\n path=path,\n content=content,\n meta=node_meta,\n node_type=NodeType.FILE,\n )\n\n\nclass StaticProvider(AVMProvider):\n \"\"\"Static data provider\"\"\"\n \n def _make_node(self, path: str, content: str,\n meta: Dict = None) -> AVMNode:\n node_meta = meta or {}\n node_meta[\"provider\"] = self.__class__.__name__\n \n return AVMNode(\n path=path,\n content=content,\n meta=node_meta,\n node_type=NodeType.FILE,\n )\n","content_type":"text/x-python; charset=utf-8","language":"python","size":2401,"content_sha256":"d501edac008e1af16fe5b36b62054be96aa73183fb339cad8c15cf69307e5d87"},{"filename":"avm/providers/http_json.py","content":"\"\"\"\navm/providers/http_json.py - Generic HTTP JSON Provider\n\nFetch JSON from HTTP API and format as Markdown\n\"\"\"\n\nimport json\nimport urllib.request\nfrom datetime import datetime\nfrom typing import Optional, Dict, Any\n\nfrom .base import LiveProvider\nfrom ..node import AVMNode\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass HttpJsonProvider(LiveProvider):\n \"\"\"\n Generic HTTP JSON Provider\n \n Config:\n base_url: API base URL\n token: Bearer token (optional)\n headers: Custom request headers (optional)\n path_mapping: Path to API endpoint mapping (optional)\n \"\"\"\n \n def __init__(self, store: AVMStore, prefix: str, ttl_seconds: int = 60,\n base_url: str = \"\", token: str = \"\", \n headers: Dict[str, str] = None,\n path_mapping: Dict[str, str] = None):\n super().__init__(store, prefix, ttl_seconds)\n self.base_url = base_url.rstrip(\"/\")\n self.token = token\n self.extra_headers = headers or {}\n self.path_mapping = path_mapping or {}\n \n def _get_endpoint(self, path: str) -> str:\n \"\"\"Convert VFS path to API endpoint\"\"\"\n # removeprefix\n rel_path = path[len(self.prefix):].lstrip(\"/\")\n \n # checkmapping\n if path in self.path_mapping:\n return self.path_mapping[path]\n \n # default:directlyusepath\n return f\"/{rel_path}\".replace(\".md\", \"\")\n \n def _request(self, endpoint: str) -> Any:\n \"\"\"send HTTP request\"\"\"\n url = f\"{self.base_url}{endpoint}\"\n \n headers = {\"User-Agent\": \"VFS/1.0\"}\n headers.update(self.extra_headers)\n \n if self.token:\n headers[\"Authorization\"] = f\"Bearer {self.token}\"\n \n req = urllib.request.Request(url, headers=headers)\n \n with urllib.request.urlopen(req, timeout=10) as r:\n return json.loads(r.read())\n \n def _format_json_to_md(self, data: Any, title: str = \"\") -> str:\n \"\"\"Format JSON data as Markdown\"\"\"\n lines = []\n \n if title:\n lines.append(f\"# {title}\")\n lines.append(\"\")\n \n if isinstance(data, dict):\n for key, value in data.items():\n if isinstance(value, (dict, list)):\n lines.append(f\"## {key}\")\n lines.append(\"\")\n lines.append(\"```json\")\n lines.append(json.dumps(value, indent=2))\n lines.append(\"```\")\n else:\n lines.append(f\"- **{key}:** {value}\")\n elif isinstance(data, list):\n lines.append(\"| # | Value |\")\n lines.append(\"|---|-------|\")\n for i, item in enumerate(data[:50]): # Limit line count\n if isinstance(item, dict):\n lines.append(f\"| {i} | {json.dumps(item)} |\")\n else:\n lines.append(f\"| {i} | {item} |\")\n else:\n lines.append(str(data))\n \n lines.append(\"\")\n lines.append(f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\")\n \n return \"\\n\".join(lines)\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n \"\"\"getdata\"\"\"\n try:\n endpoint = self._get_endpoint(path)\n data = self._request(endpoint)\n \n # Format as Markdown\n title = path.split(\"/\")[-1].replace(\".md\", \"\").replace(\"_\", \" \").title()\n content = self._format_json_to_md(data, title)\n \n return self._make_node(path, content, {\"raw_data\": data})\n \n except Exception as e:\n return self._make_node(\n path,\n f\"# Error\\n\\nFailed to fetch: {e}\",\n {\"error\": str(e)}\n )\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3866,"content_sha256":"262e164c8dac68bc45ab997befcd15ff15896a559b0bd0853fea64ee90ef484b"},{"filename":"avm/providers/indicators.py","content":"\"\"\"\nvfs/providers/indicators.py - technical indicators Provider\n\nFetch data from Yahoo Finance and calculate technical indicators\n\"\"\"\n\nimport json\nfrom datetime import datetime\nfrom typing import Optional, Dict, Any, List\nimport urllib.request\n\nfrom .base import LiveProvider\nfrom ..node import AVMNode\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass TechnicalIndicatorsProvider(LiveProvider):\n \"\"\"\n technical indicatorsdata\n \n path:\n /live/indicators/AAPL.md - Single stock complete indicators\n /live/indicators/AAPL/rsi.md - RSI\n /live/indicators/AAPL/macd.md - MACD\n /live/indicators/AAPL/ma.md - Moving average lines\n /live/indicators/AAPL/bb.md - Bollinger Bands\n \"\"\"\n \n def __init__(self, store: AVMStore, ttl_seconds: int = 300):\n super().__init__(store, \"/live/indicators\", ttl_seconds)\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n parts = path.replace(\"/live/indicators/\", \"\").split(\"/\")\n if not parts or not parts[0]:\n return None\n \n symbol = parts[0].replace(\".md\", \"\").upper()\n indicator = parts[1].replace(\".md\", \"\") if len(parts) > 1 else None\n \n try:\n data = self._fetch_yahoo_data(symbol)\n if indicator:\n return self._make_indicator_node(symbol, indicator, data)\n else:\n return self._make_full_report(symbol, data)\n except Exception as e:\n return self._make_node(\n path,\n f\"# Error\\n\\nFailed to fetch {symbol}: {e}\",\n {\"error\": str(e), \"symbol\": symbol}\n )\n \n def _fetch_yahoo_data(self, symbol: str, days: int = 120) -> Dict[str, Any]:\n \"\"\"Fetch historical data from Yahoo Finance\"\"\"\n end = int(datetime.now().timestamp())\n start = end - days * 86400\n \n url = (\n f\"https://query1.finance.yahoo.com/v8/finance/chart/{symbol}\"\n f\"?interval=1d&period1={start}&period2={end}\"\n )\n \n req = urllib.request.Request(url, headers={\"User-Agent\": \"Mozilla/5.0\"})\n with urllib.request.urlopen(req, timeout=10) as r:\n data = json.loads(r.read())\n \n result = data.get(\"chart\", {}).get(\"result\", [{}])[0]\n quote = result.get(\"indicators\", {}).get(\"quote\", [{}])[0]\n \n closes = [c for c in quote.get(\"close\", []) if c is not None]\n highs = [h for h in quote.get(\"high\", []) if h is not None]\n lows = [l for l in quote.get(\"low\", []) if l is not None]\n volumes = [v for v in quote.get(\"volume\", []) if v is not None]\n \n return {\n \"symbol\": symbol,\n \"closes\": closes,\n \"highs\": highs,\n \"lows\": lows,\n \"volumes\": volumes,\n \"current_price\": closes[-1] if closes else 0,\n }\n \n def _calc_rsi(self, closes: List[float], period: int = 14) -> Optional[float]:\n if len(closes) \u003c period + 1:\n return None\n \n gains = []\n losses = []\n for i in range(1, len(closes)):\n diff = closes[i] - closes[i-1]\n gains.append(diff if diff > 0 else 0)\n losses.append(-diff if diff \u003c 0 else 0)\n \n avg_gain = sum(gains[:period]) / period\n avg_loss = sum(losses[:period]) / period\n \n for i in range(period, len(gains)):\n avg_gain = (avg_gain * (period - 1) + gains[i]) / period\n avg_loss = (avg_loss * (period - 1) + losses[i]) / period\n \n if avg_loss == 0:\n return 100\n rs = avg_gain / avg_loss\n return 100 - (100 / (1 + rs))\n \n def _calc_ema(self, closes: List[float], period: int) -> Optional[float]:\n if len(closes) \u003c period:\n return None\n k = 2 / (period + 1)\n ema = sum(closes[:period]) / period\n for price in closes[period:]:\n ema = price * k + ema * (1 - k)\n return ema\n \n def _calc_sma(self, closes: List[float], period: int) -> Optional[float]:\n if len(closes) \u003c period:\n return None\n return sum(closes[-period:]) / period\n \n def _calc_macd(self, closes: List[float]) -> Optional[Dict[str, float]]:\n if len(closes) \u003c 35:\n return None\n \n ema12 = self._calc_ema(closes, 12)\n ema26 = self._calc_ema(closes, 26)\n \n if ema12 is None or ema26 is None:\n return None\n \n macd_line = ema12 - ema26\n \n # calculate MACD historyfor Signal\n macd_history = []\n for i in range(26, len(closes) + 1):\n e12 = self._calc_ema(closes[:i], 12)\n e26 = self._calc_ema(closes[:i], 26)\n if e12 and e26:\n macd_history.append(e12 - e26)\n \n if len(macd_history) \u003c 9:\n return None\n \n # Signal = EMA9 of MACD\n k = 2 / 10\n signal = sum(macd_history[:9]) / 9\n for m in macd_history[9:]:\n signal = m * k + signal * (1 - k)\n \n histogram = macd_line - signal\n \n # Detect golden cross/death cross\n prev_macd = macd_history[-2] if len(macd_history) > 1 else None\n prev_hist = None\n if prev_macd and len(macd_history) > 2:\n prev_k = 2 / 10\n prev_signal = sum(macd_history[:9]) / 9\n for m in macd_history[9:-1]:\n prev_signal = m * prev_k + prev_signal * (1 - prev_k)\n prev_hist = prev_macd - prev_signal\n \n cross = \"none\"\n if prev_hist is not None:\n if prev_hist \u003c 0 and histogram > 0:\n cross = \"golden\" # golden cross\n elif prev_hist > 0 and histogram \u003c 0:\n cross = \"death\" # death cross\n \n return {\n \"macd\": macd_line,\n \"signal\": signal,\n \"histogram\": histogram,\n \"cross\": cross,\n }\n \n def _calc_bollinger(self, closes: List[float], period: int = 20, \n num_std: float = 2) -> Optional[Dict[str, float]]:\n if len(closes) \u003c period:\n return None\n \n recent = closes[-period:]\n sma = sum(recent) / period\n variance = sum((x - sma) ** 2 for x in recent) / period\n std = variance ** 0.5\n \n upper = sma + num_std * std\n lower = sma - num_std * std\n \n price = closes[-1]\n bb_pct = (price - lower) / (upper - lower) if upper != lower else 0.5\n \n return {\n \"upper\": upper,\n \"middle\": sma,\n \"lower\": lower,\n \"width\": (upper - lower) / sma,\n \"percent\": bb_pct,\n }\n \n def _calc_atr(self, highs: List[float], lows: List[float], \n closes: List[float], period: int = 14) -> Optional[float]:\n if len(closes) \u003c period + 1:\n return None\n \n trs = []\n for i in range(1, len(closes)):\n tr = max(\n highs[i] - lows[i],\n abs(highs[i] - closes[i-1]),\n abs(lows[i] - closes[i-1])\n )\n trs.append(tr)\n \n if len(trs) \u003c period:\n return None\n \n atr = sum(trs[:period]) / period\n for tr in trs[period:]:\n atr = (atr * (period - 1) + tr) / period\n \n return atr\n \n def _make_full_report(self, symbol: str, data: Dict) -> AVMNode:\n closes = data[\"closes\"]\n highs = data[\"highs\"]\n lows = data[\"lows\"]\n price = data[\"current_price\"]\n \n rsi = self._calc_rsi(closes)\n macd = self._calc_macd(closes)\n bb = self._calc_bollinger(closes)\n atr = self._calc_atr(highs, lows, closes)\n sma20 = self._calc_sma(closes, 20)\n sma50 = self._calc_sma(closes, 50)\n ema12 = self._calc_ema(closes, 12)\n ema26 = self._calc_ema(closes, 26)\n \n lines = [\n f\"# {symbol} Technical Indicators\",\n \"\",\n f\"**Current Price:** ${price:.2f}\",\n \"\",\n \"## RSI (14)\",\n f\"- **Value:** {rsi:.1f}\" if rsi else \"- N/A\",\n ]\n \n if rsi:\n if rsi \u003c 30:\n lines.append(\"- **Signal:** 🟢 Oversold (potential buy)\")\n elif rsi > 70:\n lines.append(\"- **Signal:** 🔴 Overbought (potential sell)\")\n else:\n lines.append(\"- **Signal:** ⚪ Neutral\")\n \n lines.extend([\n \"\",\n \"## MACD\",\n ])\n \n if macd:\n lines.extend([\n f\"- **MACD Line:** {macd['macd']:.4f}\",\n f\"- **Signal Line:** {macd['signal']:.4f}\",\n f\"- **Histogram:** {macd['histogram']:.4f}\",\n ])\n if macd[\"cross\"] == \"golden\":\n lines.append(\"- **Signal:** 🟢 Golden Cross (bullish)\")\n elif macd[\"cross\"] == \"death\":\n lines.append(\"- **Signal:** 🔴 Death Cross (beenarish)\")\n else:\n lines.append(\"- **Signal:** ⚪ No crossover\")\n else:\n lines.append(\"- N/A\")\n \n lines.extend([\n \"\",\n \"## Moving Averages\",\n f\"- **SMA 20:** ${sma20:.2f}\" if sma20 else \"- SMA 20: N/A\",\n f\"- **SMA 50:** ${sma50:.2f}\" if sma50 else \"- SMA 50: N/A\",\n f\"- **EMA 12:** ${ema12:.2f}\" if ema12 else \"- EMA 12: N/A\",\n f\"- **EMA 26:** ${ema26:.2f}\" if ema26 else \"- EMA 26: N/A\",\n ])\n \n if sma20 and sma50:\n if sma20 > sma50:\n lines.append(\"- **Trend:** 🟢 SMA20 > SMA50 (bullish)\")\n else:\n lines.append(\"- **Trend:** 🔴 SMA20 \u003c SMA50 (beenarish)\")\n \n if price and sma50:\n if price > sma50:\n lines.append(f\"- **Price vs SMA50:** 🟢 Above (+{(price/sma50-1)*100:.1f}%)\")\n else:\n lines.append(f\"- **Price vs SMA50:** 🔴 Below ({(price/sma50-1)*100:.1f}%)\")\n \n lines.extend([\n \"\",\n \"## Bollinger Bands\",\n ])\n \n if bb:\n lines.extend([\n f\"- **Upper:** ${bb['upper']:.2f}\",\n f\"- **Middle:** ${bb['middle']:.2f}\",\n f\"- **Lower:** ${bb['lower']:.2f}\",\n f\"- **Width:** {bb['width']*100:.1f}%\",\n f\"- **%B:** {bb['percent']*100:.1f}%\",\n ])\n if bb[\"percent\"] \u003c 0.2:\n lines.append(\"- **Signal:** 🟢 Near lower band (potential bounce)\")\n elif bb[\"percent\"] > 0.8:\n lines.append(\"- **Signal:** 🔴 Near upper band (potential pullback)\")\n else:\n lines.append(\"- N/A\")\n \n lines.extend([\n \"\",\n \"## Volatility\",\n f\"- **ATR (14):** ${atr:.2f}\" if atr else \"- ATR: N/A\",\n ])\n \n if atr and price:\n lines.append(f\"- **ATR %:** {atr/price*100:.2f}%\")\n \n lines.extend([\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n ])\n \n return self._make_node(\n f\"/live/indicators/{symbol}.md\",\n \"\\n\".join(lines),\n {\n \"symbol\": symbol,\n \"price\": price,\n \"rsi\": rsi,\n \"macd\": macd,\n \"bb\": bb,\n \"atr\": atr,\n }\n )\n \n def _make_indicator_node(self, symbol: str, indicator: str, \n data: Dict) -> AVMNode:\n closes = data[\"closes\"]\n highs = data[\"highs\"]\n lows = data[\"lows\"]\n price = data[\"current_price\"]\n \n if indicator == \"rsi\":\n rsi = self._calc_rsi(closes)\n content = f\"# {symbol} RSI\\n\\n\"\n content += f\"**RSI (14):** {rsi:.1f}\\n\" if rsi else \"N/A\\n\"\n if rsi:\n if rsi \u003c 30:\n content += \"**Status:** Oversold\\n\"\n elif rsi > 70:\n content += \"**Status:** Overbought\\n\"\n else:\n content += \"**Status:** Neutral\\n\"\n meta = {\"rsi\": rsi}\n \n elif indicator == \"macd\":\n macd = self._calc_macd(closes)\n content = f\"# {symbol} MACD\\n\\n\"\n if macd:\n content += f\"**MACD:** {macd['macd']:.4f}\\n\"\n content += f\"**Signal:** {macd['signal']:.4f}\\n\"\n content += f\"**Histogram:** {macd['histogram']:.4f}\\n\"\n content += f\"**Cross:** {macd['cross']}\\n\"\n else:\n content += \"N/A\\n\"\n meta = {\"macd\": macd}\n \n elif indicator == \"ma\":\n sma20 = self._calc_sma(closes, 20)\n sma50 = self._calc_sma(closes, 50)\n ema12 = self._calc_ema(closes, 12)\n ema26 = self._calc_ema(closes, 26)\n content = f\"# {symbol} Moving Averages\\n\\n\"\n content += f\"**SMA 20:** ${sma20:.2f}\\n\" if sma20 else \"\"\n content += f\"**SMA 50:** ${sma50:.2f}\\n\" if sma50 else \"\"\n content += f\"**EMA 12:** ${ema12:.2f}\\n\" if ema12 else \"\"\n content += f\"**EMA 26:** ${ema26:.2f}\\n\" if ema26 else \"\"\n content += f\"**Price:** ${price:.2f}\\n\"\n meta = {\"sma20\": sma20, \"sma50\": sma50, \"ema12\": ema12, \"ema26\": ema26}\n \n elif indicator == \"bb\":\n bb = self._calc_bollinger(closes)\n content = f\"# {symbol} Bollinger Bands\\n\\n\"\n if bb:\n content += f\"**Upper:** ${bb['upper']:.2f}\\n\"\n content += f\"**Middle:** ${bb['middle']:.2f}\\n\"\n content += f\"**Lower:** ${bb['lower']:.2f}\\n\"\n content += f\"**%B:** {bb['percent']*100:.1f}%\\n\"\n else:\n content += \"N/A\\n\"\n meta = {\"bb\": bb}\n \n else:\n content = f\"# {symbol}\\n\\nUnknown indicator: {indicator}\\n\"\n meta = {}\n \n content += f\"\\n*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\\n\"\n \n return self._make_node(\n f\"/live/indicators/{symbol}/{indicator}.md\",\n content,\n {\"symbol\": symbol, \"indicator\": indicator, **meta}\n )\n","content_type":"text/x-python; charset=utf-8","language":"python","size":14465,"content_sha256":"78a2f5220bbef4f1b26a6dbb34bf2f89c993e153e6701bb1ee343e7cfd692e1f"},{"filename":"avm/providers/memory.py","content":"\"\"\"\navm/providers/memory.py - Bot memory provider\n\"\"\"\n\nfrom typing import Dict, Optional\n\nfrom .base import AVMProvider\nfrom ..node import AVMNode, NodeType\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass MemoryProvider(AVMProvider):\n \"\"\"\n Bot memory zone\n \n path: /memory/*\n Read/write enabled\n \n usage:\n - Bot's own observations and learnings\n - Trading experience lessons\n - userpreferencerecord\n \"\"\"\n \n def __init__(self, store: AVMStore):\n super().__init__(store, \"/memory\")\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n \"\"\"Memory zone reads directly from store, no external fetch required\"\"\"\n return self.store.get_node(path)\n \n def write(self, path: str, content: str, meta: Dict = None) -> AVMNode:\n \"\"\"writememory\"\"\"\n if not path.startswith(\"/memory\"):\n raise PermissionError(f\"Cannot write to {path}\")\n \n node = AVMNode(\n path=path,\n content=content,\n meta=meta or {},\n node_type=NodeType.FILE,\n )\n \n return self.store.put_node(node)\n \n def append(self, path: str, content: str) -> AVMNode:\n \"\"\"Append content to existing node\"\"\"\n existing = self.store.get_node(path)\n \n if existing:\n new_content = existing.content + \"\\n\" + content\n else:\n new_content = content\n \n return self.write(path, new_content, existing.meta if existing else None)\n \n def create_lesson(self, title: str, content: str, \n tags: list = None) -> AVMNode:\n \"\"\"Create an experience lesson\"\"\"\n from datetime import datetime\n \n # generatepath\n timestamp = utcnow().strftime(\"%Y%m%d_%H%M%S\")\n slug = title.lower().replace(\" \", \"_\")[:30]\n path = f\"/memory/lessons/{timestamp}_{slug}.md\"\n \n # Format content\n full_content = f\"# {title}\\n\\n\"\n full_content += f\"*Created: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\\n\\n\"\n \n if tags:\n full_content += f\"**Tags:** {', '.join(tags)}\\n\\n\"\n \n full_content += \"---\\n\\n\"\n full_content += content\n \n return self.write(path, full_content, {\"tags\": tags or [], \"title\": title})\n \n def create_observation(self, symbol: str, observation: str,\n category: str = \"general\") -> AVMNode:\n \"\"\"createmarketobservationrecord\"\"\"\n from datetime import datetime\n \n timestamp = utcnow().strftime(\"%Y%m%d_%H%M%S\")\n path = f\"/memory/observations/{symbol}/{timestamp}.md\"\n \n content = f\"# {symbol} Observation\\n\\n\"\n content += f\"*Time: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\\n\"\n content += f\"*Category: {category}*\\n\\n\"\n content += \"---\\n\\n\"\n content += observation\n \n return self.write(path, content, {\n \"symbol\": symbol, \n \"category\": category,\n \"timestamp\": timestamp,\n })\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3095,"content_sha256":"52227eadb558cb502a28eaadf32aa53d98bd2dd74050eafdd8cd3efeaa5212f1"},{"filename":"avm/providers/news.py","content":"\"\"\"\nvfs/providers/news.py - news Provider\n\nFetch financial news from public RSS/API\n\"\"\"\n\nimport json\nimport xml.etree.ElementTree as ET\nfrom datetime import datetime\nfrom typing import Optional, List, Dict\nimport urllib.request\nimport urllib.parse\n\nfrom .base import LiveProvider\nfrom ..node import AVMNode\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass NewsProvider(LiveProvider):\n \"\"\"\n financialnewsdata\n \n path:\n /live/news/market.md - marketnews\n /live/news/AAPL.md - stockrelatednews\n /live/news/crypto.md - cryptocurrencynews\n \"\"\"\n \n # RSS feeds\n RSS_SOURCES = {\n \"market\": [\n (\"Yahoo Finance\", \"https://feeds.finance.yahoo.com/rss/2.0/headline?s=^DJI,^GSPC,^IXIC®ion=US&lang=en-US\"),\n (\"Investing.com\", \"https://www.investing.com/rss/market_overview_Ede.rss\"),\n ],\n \"crypto\": [\n (\"CoinDesk\", \"https://www.coindesk.com/arc/outboundfeeds/rss/\"),\n ],\n }\n \n def __init__(self, store: AVMStore, ttl_seconds: int = 600):\n super().__init__(store, \"/live/news\", ttl_seconds)\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n parts = path.replace(\"/live/news/\", \"\").replace(\".md\", \"\")\n \n try:\n if parts == \"market\":\n return self._fetch_market_news()\n elif parts == \"crypto\":\n return self._fetch_crypto_news()\n elif parts.isupper(): # Stock symbol\n return self._fetch_stock_news(parts)\n except Exception as e:\n return self._make_node(\n path,\n f\"# Error\\n\\nFailed to fetch news: {e}\",\n {\"error\": str(e)}\n )\n \n return None\n \n def _fetch_rss(self, url: str, limit: int = 10) -> List[Dict]:\n \"\"\"Fetch RSS content\"\"\"\n try:\n req = urllib.request.Request(url, headers={\n \"User-Agent\": \"Mozilla/5.0 (compatible; VFS/1.0)\",\n })\n with urllib.request.urlopen(req, timeout=10) as r:\n content = r.read()\n \n root = ET.fromstring(content)\n items = []\n \n for item in root.findall(\".//item\")[:limit]:\n title = item.findtext(\"title\", \"\")\n link = item.findtext(\"link\", \"\")\n pub_date = item.findtext(\"pubDate\", \"\")\n description = item.findtext(\"description\", \"\")\n \n # cleanup description\n if description:\n description = description[:200].replace(\"\u003c\", \"<\").replace(\">\", \">\")\n \n items.append({\n \"title\": title,\n \"link\": link,\n \"date\": pub_date,\n \"description\": description,\n })\n \n return items\n except Exception:\n return []\n \n def _fetch_market_news(self) -> AVMNode:\n \"\"\"getmarketnews\"\"\"\n all_items = []\n \n for source_name, url in self.RSS_SOURCES.get(\"market\", []):\n items = self._fetch_rss(url, limit=5)\n for item in items:\n item[\"source\"] = source_name\n all_items.extend(items)\n \n lines = [\n \"# Market News\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n \"\",\n ]\n \n for item in all_items[:15]:\n lines.append(f\"### {item['title']}\")\n lines.append(f\"*{item.get('source', 'Unknown')} | {item['date'][:25] if item['date'] else 'N/A'}*\")\n if item[\"description\"]:\n lines.append(f\"\\n{item['description']}...\")\n if item[\"link\"]:\n lines.append(f\"\\n[Read more]({item['link']})\")\n lines.append(\"\")\n \n return self._make_node(\n \"/live/news/market.md\",\n \"\\n\".join(lines),\n {\"item_count\": len(all_items)}\n )\n \n def _fetch_crypto_news(self) -> AVMNode:\n \"\"\"getcryptocurrencynews\"\"\"\n all_items = []\n \n for source_name, url in self.RSS_SOURCES.get(\"crypto\", []):\n items = self._fetch_rss(url, limit=10)\n for item in items:\n item[\"source\"] = source_name\n all_items.extend(items)\n \n lines = [\n \"# Crypto News\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n \"\",\n ]\n \n for item in all_items[:10]:\n lines.append(f\"### {item['title']}\")\n lines.append(f\"*{item.get('source', 'Unknown')} | {item['date'][:25] if item['date'] else 'N/A'}*\")\n if item[\"description\"]:\n lines.append(f\"\\n{item['description']}...\")\n lines.append(\"\")\n \n return self._make_node(\n \"/live/news/crypto.md\",\n \"\\n\".join(lines),\n {\"item_count\": len(all_items)}\n )\n \n def _fetch_stock_news(self, symbol: str) -> AVMNode:\n \"\"\"Fetch stock-related news (Yahoo Finance RSS)\"\"\"\n url = f\"https://feeds.finance.yahoo.com/rss/2.0/headline?s={symbol}®ion=US&lang=en-US\"\n items = self._fetch_rss(url, limit=10)\n \n lines = [\n f\"# {symbol} News\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n \"\",\n ]\n \n if not items:\n lines.append(\"No recent news found.\")\n else:\n for item in items:\n lines.append(f\"### {item['title']}\")\n lines.append(f\"*{item['date'][:25] if item['date'] else 'N/A'}*\")\n if item[\"description\"]:\n lines.append(f\"\\n{item['description']}...\")\n if item[\"link\"]:\n lines.append(f\"\\n[Read more]({item['link']})\")\n lines.append(\"\")\n \n return self._make_node(\n f\"/live/news/{symbol}.md\",\n \"\\n\".join(lines),\n {\"symbol\": symbol, \"item_count\": len(items)}\n )\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6204,"content_sha256":"2047d6b5debe55aa67366ffbbf765d9fe3756934de51248b61f0e47ae7dda3d2"},{"filename":"avm/providers/watchlist.py","content":"\"\"\"\nvfs/providers/watchlist.py - watchlistcolumntable Provider\n\nAggregate multiple data sources, generate watchlist overview\n\"\"\"\n\nfrom datetime import datetime\nfrom typing import Optional, List, Dict\n\nfrom .base import LiveProvider\nfrom .indicators import TechnicalIndicatorsProvider\nfrom ..node import AVMNode\nfrom ..store import AVMStore\nfrom ..utils import utcnow\n\n\nclass WatchlistProvider(LiveProvider):\n \"\"\"\n Watchlist overview\n \n path:\n /live/watchlist.md - defaultcolumntable\n /live/watchlist/tech.md - tech stocks\n /live/watchlist/value.md - value stocks\n /live/watchlist/custom.md - custom\n \"\"\"\n \n # Preset watchlist\n WATCHLISTS = {\n \"default\": [\"SPY\", \"QQQ\", \"AAPL\", \"MSFT\", \"NVDA\", \"TSLA\", \"AMZN\", \"GOOGL\"],\n \"tech\": [\"AAPL\", \"MSFT\", \"NVDA\", \"AMD\", \"INTC\", \"AVGO\", \"QCOM\", \"TSM\"],\n \"value\": [\"BRK-B\", \"JPM\", \"JNJ\", \"PG\", \"KO\", \"WMT\", \"XOM\", \"CVX\"],\n \"crypto\": [\"COIN\", \"MARA\", \"RIOT\", \"MSTR\", \"SQ\", \"PYPL\"],\n }\n \n def __init__(self, store: AVMStore, \n custom_symbols: List[str] = None,\n ttl_seconds: int = 300):\n super().__init__(store, \"/live/watchlist\", ttl_seconds)\n self.indicators_provider = TechnicalIndicatorsProvider(store, ttl_seconds)\n self.custom_symbols = custom_symbols or []\n \n def fetch(self, path: str) -> Optional[AVMNode]:\n name = path.replace(\"/live/watchlist\", \"\").replace(\".md\", \"\").strip(\"/\")\n \n if not name:\n name = \"default\"\n \n if name == \"custom\":\n symbols = self.custom_symbols\n else:\n symbols = self.WATCHLISTS.get(name, self.WATCHLISTS[\"default\"])\n \n if not symbols:\n return self._make_node(\n path,\n f\"# Watchlist: {name}\\n\\nNo symbols configured.\",\n {\"name\": name, \"symbols\": []}\n )\n \n return self._fetch_watchlist(path, name, symbols)\n \n def _fetch_watchlist(self, path: str, name: str, \n symbols: List[str]) -> AVMNode:\n \"\"\"getwatchlistdata\"\"\"\n lines = [\n f\"# Watchlist: {name.title()}\",\n \"\",\n f\"*Updated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n \"\",\n \"## Quick Overview\",\n \"\",\n \"| Symbol | Price | RSI | MACD | Trend | Signal |\",\n \"|--------|-------|-----|------|-------|--------|\",\n ]\n \n symbol_data = []\n \n for symbol in symbols:\n try:\n data = self.indicators_provider._fetch_yahoo_data(symbol)\n closes = data[\"closes\"]\n price = data[\"current_price\"]\n \n rsi = self.indicators_provider._calc_rsi(closes)\n macd = self.indicators_provider._calc_macd(closes)\n sma20 = self.indicators_provider._calc_sma(closes, 20)\n sma50 = self.indicators_provider._calc_sma(closes, 50)\n \n # Trend judgment\n trend = \"—\"\n if sma20 and sma50:\n trend = \"🟢\" if sma20 > sma50 else \"🔴\"\n \n # Signals\n signals = []\n if rsi:\n if rsi \u003c 30:\n signals.append(\"oversold\")\n elif rsi > 70:\n signals.append(\"overbought\")\n \n if macd and macd[\"cross\"] == \"golden\":\n signals.append(\"golden cross\")\n elif macd and macd[\"cross\"] == \"death\":\n signals.append(\"death cross\")\n \n signal_str = \"/\".join(signals) if signals else \"—\"\n macd_emoji = \"\"\n if macd:\n if macd[\"histogram\"] > 0:\n macd_emoji = \"📈\"\n else:\n macd_emoji = \"📉\"\n \n rsi_str = f\"{rsi:.0f}\" if rsi else \"—\"\n lines.append(\n f\"| {symbol} | ${price:.2f} | {rsi_str} | {macd_emoji} | {trend} | {signal_str} |\"\n )\n \n symbol_data.append({\n \"symbol\": symbol,\n \"price\": price,\n \"rsi\": rsi,\n \"trend\": \"bullish\" if trend == \"🟢\" else \"beenarish\" if trend == \"🔴\" else \"neutral\",\n \"signals\": signals,\n })\n \n except Exception as e:\n lines.append(f\"| {symbol} | Error | — | — | — | {str(e)[:20]} |\")\n symbol_data.append({\"symbol\": symbol, \"error\": str(e)})\n \n # adddetailedanalysis\n lines.extend([\n \"\",\n \"## Alerts\",\n \"\",\n ])\n \n oversold = [s for s in symbol_data if \"oversold\" in s.get(\"signals\", [])]\n overbought = [s for s in symbol_data if \"overbought\" in s.get(\"signals\", [])]\n golden = [s for s in symbol_data if \"golden cross\" in s.get(\"signals\", [])]\n death = [s for s in symbol_data if \"death cross\" in s.get(\"signals\", [])]\n \n if oversold:\n lines.append(f\"🟢 **Oversold:** {', '.join(s['symbol'] for s in oversold)}\")\n if overbought:\n lines.append(f\"🔴 **Overbought:** {', '.join(s['symbol'] for s in overbought)}\")\n if golden:\n lines.append(f\"✅ **Golden Cross:** {', '.join(s['symbol'] for s in golden)}\")\n if death:\n lines.append(f\"❌ **Death Cross:** {', '.join(s['symbol'] for s in death)}\")\n \n if not any([oversold, overbought, golden, death]):\n lines.append(\"No significant alerts.\")\n \n return self._make_node(\n path,\n \"\\n\".join(lines),\n {\n \"name\": name,\n \"symbols\": symbols,\n \"data\": symbol_data,\n }\n )\n \n def set_custom_watchlist(self, symbols: List[str]):\n \"\"\"settingscustomwatchlist\"\"\"\n self.custom_symbols = symbols\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6233,"content_sha256":"eed0f68d792e6d63accd205387c5deb10d5f6ed91064a5957360e834833f36c6"},{"filename":"avm/retrieval.py","content":"\"\"\"\navm/retrieval.py - Linked retrieval and dynamic document building\n\nfeatures:\n1. semanticsearch (embedding)\n2. graphextend (relatednode)\n3. Dynamic document synthesis\n\"\"\"\n\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import List, Dict, Any, Optional, Set, Tuple\n\nfrom .store import AVMStore\nfrom .node import AVMNode\nfrom .graph import EdgeType\nfrom .embedding import EmbeddingStore, EmbeddingBackend\nfrom .utils import utcnow\n\n\n@dataclass\nclass RetrievalResult:\n \"\"\"retrieveresult\"\"\"\n query: str\n nodes: List[AVMNode]\n scores: Dict[str, float] # path -> relevance score\n sources: Dict[str, str] # path -> source type (semantic/graph/fts)\n graph_edges: List[Tuple[str, str, str]] # (from, to, type)\n \n @property\n def paths(self) -> List[str]:\n return [n.path for n in self.nodes]\n \n def get_score(self, path: str) -> float:\n return self.scores.get(path, 0.0)\n \n def get_source(self, path: str) -> str:\n return self.sources.get(path, \"unknown\")\n\n\n@dataclass\nclass SynthesizedDocument:\n \"\"\"Dynamically synthesized document\"\"\"\n title: str\n content: str\n sections: List[Dict[str, Any]]\n sources: List[str]\n generated_at: datetime = field(default_factory=utcnow)\n \n def to_markdown(self) -> str:\n return self.content\n\n\nclass Retriever:\n \"\"\"\n Linked retriever\n \n supports:\n - semanticsearch (requires embedding)\n - FTS5 full-textsearch (fallback)\n - graphextend\n - resultfusion\n \"\"\"\n \n def __init__(self, store: AVMStore, \n embedding_store: EmbeddingStore = None):\n self.store = store\n self.embedding_store = embedding_store\n \n def retrieve(self, query: str,\n k: int = 5,\n expand_graph: bool = True,\n graph_depth: int = 1,\n prefix: str = None) -> RetrievalResult:\n \"\"\"\n Linked retrieval\n \n Args:\n query: Query text\n k: returncount\n expand_graph: whetherextendrelationgraph\n graph_depth: Graph expansion depth\n prefix: pathprefixfilter\n \"\"\"\n nodes = []\n scores = {}\n sources = {}\n seen_paths: Set[str] = set()\n \n # 1. semanticsearch (if embedding)\n if self.embedding_store:\n semantic_results = self.embedding_store.search(query, k=k, prefix=prefix)\n for node, score in semantic_results:\n if node.path not in seen_paths:\n nodes.append(node)\n scores[node.path] = score\n sources[node.path] = \"semantic\"\n seen_paths.add(node.path)\n \n # 2. FTS5 full-text search (supplement or fallback)\n fts_results = self.store.search(query, limit=k)\n for node, score in fts_results:\n if node.path not in seen_paths:\n nodes.append(node)\n # Normalize FTS score\n scores[node.path] = min(1.0, score / 10.0)\n sources[node.path] = \"fts\"\n seen_paths.add(node.path)\n \n # 3. graphextend\n graph_edges = []\n if expand_graph and nodes:\n expanded = self._expand_graph(\n [n.path for n in nodes],\n depth=graph_depth,\n max_expand=k\n )\n \n for path, edge_info in expanded.items():\n if path not in seen_paths:\n node = self.store.get_node(path)\n if node:\n nodes.append(node)\n # Score decay for graph expansion\n scores[path] = edge_info[\"score\"] * 0.5\n sources[path] = \"graph\"\n seen_paths.add(path)\n graph_edges.append((\n edge_info[\"from\"],\n path,\n edge_info[\"type\"]\n ))\n \n # 4. Sort by score\n nodes.sort(key=lambda n: scores.get(n.path, 0), reverse=True)\n \n return RetrievalResult(\n query=query,\n nodes=nodes[:k * 2], # Return more for synthesis\n scores=scores,\n sources=sources,\n graph_edges=graph_edges,\n )\n \n def _expand_graph(self, seed_paths: List[str], \n depth: int = 1,\n max_expand: int = 10) -> Dict[str, Dict]:\n \"\"\"\n fromseednodeextendrelationgraph\n \n Returns: {path: {\"from\": src, \"type\": edge_type, \"score\": weight}}\n \"\"\"\n expanded = {}\n visited = set(seed_paths)\n current_level = seed_paths\n \n for d in range(depth):\n next_level = []\n \n for path in current_level:\n edges = self.store.get_links(path, direction=\"both\")\n \n for edge in edges:\n other = edge.target if edge.source == path else edge.source\n \n if other not in visited and len(expanded) \u003c max_expand:\n visited.add(other)\n next_level.append(other)\n expanded[other] = {\n \"from\": path,\n \"type\": edge.edge_type.value,\n \"score\": edge.weight,\n }\n \n current_level = next_level\n if not current_level:\n break\n \n return expanded\n\n\nclass DocumentSynthesizer:\n \"\"\"\n Dynamic document synthesizer\n \n Aggregate multiple node contents into a structured document\n \"\"\"\n \n def __init__(self, store: AVMStore):\n self.store = store\n \n def synthesize(self, result: RetrievalResult,\n title: str = None,\n max_sections: int = 5,\n section_max_chars: int = 500) -> SynthesizedDocument:\n \"\"\"\n Synthesize dynamic document\n \n Args:\n result: retrieveresult\n title: documenttitle(defaultuse query)\n max_sections: Max section count\n section_max_chars: Max characters per section\n \"\"\"\n if not title:\n title = f\"{result.query} (auto-generated)\"\n \n sections = []\n sources = []\n \n # Group by category\n categorized = self._categorize_nodes(result.nodes)\n \n for category, nodes in categorized.items():\n if len(sections) >= max_sections:\n break\n \n section = self._build_section(\n category, nodes, result,\n max_chars=section_max_chars\n )\n sections.append(section)\n sources.extend([n.path for n in nodes])\n \n # build Markdown\n content = self._build_markdown(title, sections, result)\n \n return SynthesizedDocument(\n title=title,\n content=content,\n sections=sections,\n sources=list(set(sources)),\n )\n \n def _categorize_nodes(self, nodes: List[AVMNode]) -> Dict[str, List[AVMNode]]:\n \"\"\"Categorize node by path prefix\"\"\"\n categories = {}\n \n category_names = {\n \"/market/indicators\": \"technical indicators\",\n \"/market/news\": \"relatednews\",\n \"/market/watchlist\": \"Related assets\",\n \"/trading/positions\": \"currentpositions\",\n \"/memory/lessons\": \"historyexperience\",\n \"/memory\": \"Memory notes\",\n \"/research\": \"researchreport\",\n \"/live\": \"live data\",\n }\n \n for node in nodes:\n # Find longest matching prefix\n matched_prefix = None\n matched_name = \"other\"\n \n for prefix, name in category_names.items():\n if node.path.startswith(prefix):\n if matched_prefix is None or len(prefix) > len(matched_prefix):\n matched_prefix = prefix\n matched_name = name\n \n if matched_name not in categories:\n categories[matched_name] = []\n categories[matched_name].append(node)\n \n return categories\n \n def _build_section(self, category: str, \n nodes: List[AVMNode],\n result: RetrievalResult,\n max_chars: int = 500) -> Dict:\n \"\"\"buildsection\"\"\"\n items = []\n \n for node in nodes[:3]: # At most 3 per category\n # extractsummary\n content = node.content\n \n # Try to extract key info\n summary = self._extract_summary(content, max_chars // 3)\n \n items.append({\n \"path\": node.path,\n \"summary\": summary,\n \"score\": result.get_score(node.path),\n \"source_type\": result.get_source(node.path),\n })\n \n return {\n \"category\": category,\n \"items\": items,\n }\n \n def _extract_summary(self, content: str, max_chars: int) -> str:\n \"\"\"extractcontentsummary\"\"\"\n # remove Markdown title\n lines = content.split(\"\\n\")\n text_lines = []\n \n for line in lines:\n line = line.strip()\n if line and not line.startswith(\"#\") and not line.startswith(\"*Updated:\"):\n text_lines.append(line)\n \n text = \" \".join(text_lines)\n \n if len(text) > max_chars:\n text = text[:max_chars] + \"...\"\n \n return text\n \n def _build_markdown(self, title: str, \n sections: List[Dict],\n result: RetrievalResult) -> str:\n \"\"\"build Markdown document\"\"\"\n lines = [\n f\"# {title}\",\n \"\",\n f\"*Generated: {utcnow().strftime('%Y-%m-%d %H:%M:%S')} UTC*\",\n f\"*Query: \\\"{result.query}\\\"*\",\n \"\",\n ]\n \n for section in sections:\n lines.append(f\"## {section['category']}\")\n lines.append(\"\")\n \n for item in section[\"items\"]:\n # sourceannotation\n source_badge = \"\"\n if item[\"source_type\"] == \"semantic\":\n source_badge = \"🎯\"\n elif item[\"source_type\"] == \"graph\":\n source_badge = \"🔗\"\n else:\n source_badge = \"📝\"\n \n lines.append(f\"> {source_badge} source: `{item['path']}`\")\n lines.append(\"\")\n lines.append(item[\"summary\"])\n lines.append(\"\")\n \n # relatedgraph\n if result.graph_edges:\n lines.append(\"## relatedrelation\")\n lines.append(\"\")\n for src, tgt, etype in result.graph_edges:\n lines.append(f\"- {src} --[{etype}]--> {tgt}\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n \n def quick_summary(self, query: str, \n retriever: Retriever,\n k: int = 5) -> str:\n \"\"\"\n Quickly generate query summary\n \n One-line call:\n synthesizer.quick_summary(\"NVDA risk analysis\", retriever)\n \"\"\"\n result = retriever.retrieve(query, k=k, expand_graph=True)\n doc = self.synthesize(result, max_sections=5)\n return doc.to_markdown()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":11737,"content_sha256":"00abe5e8ec07337e2222c228d4724e81c0187b812c099d81086c7e033aeab7f5"},{"filename":"avm/store.py","content":"\"\"\"\nvfs/store.py - SQLite storage layer (with FTS5 full-text search)\n\"\"\"\n\nimport os\nimport sqlite3\nimport json\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, Any, Tuple\nfrom contextlib import contextmanager\nimport difflib\n\nfrom .node import AVMNode, NodeDiff, NodeType\nfrom .graph import KVGraph, Edge, EdgeType\nfrom .utils import utcnow\n\n\n# SQLite schema\nSCHEMA = \"\"\"\n-- Nodes table\nCREATE TABLE IF NOT EXISTS nodes (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n path TEXT UNIQUE NOT NULL,\n content TEXT NOT NULL DEFAULT '',\n meta TEXT NOT NULL DEFAULT '{}',\n node_type TEXT NOT NULL DEFAULT 'file',\n created_at TEXT NOT NULL,\n updated_at TEXT NOT NULL,\n version INTEGER NOT NULL DEFAULT 1,\n content_h TEXT\n);\n\nCREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);\n\n-- FTS5 full-text index (standalone table)\nCREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(\n path,\n content\n);\n\n-- Edges table (relation graph)\nCREATE TABLE IF NOT EXISTS edges (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n source TEXT NOT NULL,\n target TEXT NOT NULL,\n edge_type TEXT NOT NULL DEFAULT 'related',\n weight REAL NOT NULL DEFAULT 1.0,\n meta TEXT NOT NULL DEFAULT '{}',\n created_at TEXT NOT NULL,\n UNIQUE(source, target, edge_type)\n);\n\nCREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source);\nCREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target);\n\n-- Change history table\nCREATE TABLE IF NOT EXISTS diffs (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n node_path TEXT NOT NULL,\n version INTEGER NOT NULL,\n old_h TEXT,\n new_h TEXT NOT NULL,\n diff_content TEXT NOT NULL,\n changed_at TEXT NOT NULL,\n change_type TEXT NOT NULL DEFAULT 'update'\n);\n\nCREATE INDEX IF NOT EXISTS idx_diffs_path ON diffs(node_path);\nCREATE INDEX IF NOT EXISTS idx_diffs_version ON diffs(node_path, version);\n\n-- Vectors table (for embeddings)\nCREATE TABLE IF NOT EXISTS embeddings (\n path TEXT PRIMARY KEY,\n vector BLOB, -- Serialized float array\n model TEXT,\n updated_at TEXT\n);\n\"\"\"\n\n\nclass AVMStore:\n \"\"\"\n VFS SQLite storage\n \n Features:\n - Node CRUD\n - FTS5 full-text search\n - Relation graph storage\n - Change history\n \"\"\"\n \n def __init__(self, db_path: str = None):\n if db_path is None:\n # Default: use XDG data home or ~/.local/share/avm\n xdg_data = os.environ.get(\"XDG_DATA_HOME\", str(Path.home() / \".local\" / \"share\"))\n db_path = str(Path(xdg_data) / \"vfs\" / \"avm.db\")\n \n self.db_path = db_path\n Path(db_path).parent.mkdir(parents=True, exist_ok=True)\n \n self._init_db()\n \n def _init_db(self):\n \"\"\"Initialize database\"\"\"\n with self._conn() as conn:\n conn.executescript(SCHEMA)\n \n @contextmanager\n def _conn(self):\n \"\"\"Get database connection with configurable WAL mode\"\"\"\n conn = sqlite3.connect(self.db_path)\n conn.row_factory = sqlite3.Row\n \n # Configurable performance settings\n if getattr(self, '_wal_mode', True):\n conn.execute(\"PRAGMA journal_mode=WAL\")\n sync_mode = getattr(self, '_sync_mode', 'NORMAL')\n conn.execute(f\"PRAGMA synchronous={sync_mode}\")\n \n try:\n yield conn\n conn.commit()\n finally:\n conn.close()\n \n def configure_performance(self, wal_mode: bool = True, sync_mode: str = \"NORMAL\"):\n \"\"\"Configure performance settings (for ablation experiments)\"\"\"\n self._wal_mode = wal_mode\n self._sync_mode = sync_mode\n \n # ─── Node operations ─────────────────────────────────────────\n \n def get_node(self, path: str) -> Optional[AVMNode]:\n \"\"\"Read node\"\"\"\n with self._conn() as conn:\n row = conn.execute(\n \"SELECT * FROM nodes WHERE path = ?\", (path,)\n ).fetchone()\n \n if row is None:\n return None\n \n return AVMNode(\n path=row[\"path\"],\n content=row[\"content\"],\n meta=json.loads(row[\"meta\"]),\n node_type=NodeType(row[\"node_type\"]),\n created_at=datetime.fromisoformat(row[\"created_at\"]),\n updated_at=datetime.fromisoformat(row[\"updated_at\"]),\n version=row[\"version\"],\n )\n \n def put_node(self, node: AVMNode, save_diff: bool = True) -> AVMNode:\n \"\"\"\n Write node\n \n - Check write permission\n - Auto-increment version\n - Save diff\n \"\"\"\n if not node.is_writable:\n # Read-only path: only allow internal provider writes(via _put_node_internal)\n raise PermissionError(f\"Path {node.path} is read-only\")\n \n return self._put_node_internal(node, save_diff)\n \n def _put_node_internal(self, node: AVMNode, save_diff: bool = True) -> AVMNode:\n \"\"\"\n Internal write (bypass permission check, for providers)\n \"\"\"\n with self._conn() as conn:\n existing = self.get_node(node.path)\n \n now = utcnow()\n new_h = node.content_h\n \n if existing:\n # Update\n old_h = existing.content_h\n \n # Only bump version if content changed\n if old_h != new_h:\n new_version = existing.version + 1\n else:\n new_version = existing.version\n \n if save_diff and old_h != new_h:\n # Save diff\n diff = self._compute_diff(existing.content, node.content)\n self._save_diff(conn, NodeDiff(\n node_path=node.path,\n version=new_version,\n old_h=old_h,\n new_h=new_h,\n diff_content=diff,\n change_type=\"update\",\n ))\n \n conn.execute(\"\"\"\n UPDATE nodes SET \n content = ?, meta = ?, node_type = ?,\n updated_at = ?, version = ?, content_h = ?\n WHERE path = ?\n \"\"\", (\n node.content,\n json.dumps(node.meta),\n node.node_type.value,\n now.isoformat(),\n new_version,\n new_h,\n node.path,\n ))\n \n # Update FTS index\n conn.execute(\"DELETE FROM nodes_fts WHERE path = ?\", (node.path,))\n conn.execute(\n \"INSERT INTO nodes_fts (path, content) VALUES (?, ?)\",\n (node.path, node.content)\n )\n \n node.version = new_version\n node.updated_at = now\n else:\n # Create new\n if save_diff:\n self._save_diff(conn, NodeDiff(\n node_path=node.path,\n version=1,\n old_h=None,\n new_h=new_h,\n diff_content=node.content,\n change_type=\"create\",\n ))\n \n conn.execute(\"\"\"\n INSERT INTO nodes \n (path, content, meta, node_type, created_at, updated_at, version, content_h)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n \"\"\", (\n node.path,\n node.content,\n json.dumps(node.meta),\n node.node_type.value,\n now.isoformat(),\n now.isoformat(),\n 1,\n new_h,\n ))\n \n # Insert FTS index\n conn.execute(\n \"INSERT INTO nodes_fts (path, content) VALUES (?, ?)\",\n (node.path, node.content)\n )\n \n node.version = 1\n node.created_at = now\n node.updated_at = now\n \n return node\n \n def delete_node(self, path: str) -> bool:\n \"\"\"Delete node\"\"\"\n node = self.get_node(path)\n if node is None:\n return False\n \n if not node.is_writable:\n raise PermissionError(f\"Path {path} is read-only\")\n \n with self._conn() as conn:\n # Record deletion\n self._save_diff(conn, NodeDiff(\n node_path=path,\n version=node.version + 1,\n old_h=node.content_h,\n new_h=\"\",\n diff_content=\"\",\n change_type=\"delete\",\n ))\n \n conn.execute(\"DELETE FROM nodes WHERE path = ?\", (path,))\n conn.execute(\"DELETE FROM nodes_fts WHERE path = ?\", (path,))\n conn.execute(\"DELETE FROM edges WHERE source = ? OR target = ?\", (path, path))\n \n return True\n \n def list_nodes(self, prefix: str = \"/\", limit: int = 100) -> List[AVMNode]:\n \"\"\"List nodes\"\"\"\n with self._conn() as conn:\n rows = conn.execute(\n \"SELECT * FROM nodes WHERE path LIKE ? ORDER BY path LIMIT ?\",\n (prefix + \"%\", limit)\n ).fetchall()\n \n return [\n AVMNode(\n path=row[\"path\"],\n content=row[\"content\"],\n meta=json.loads(row[\"meta\"]),\n node_type=NodeType(row[\"node_type\"]),\n created_at=datetime.fromisoformat(row[\"created_at\"]),\n updated_at=datetime.fromisoformat(row[\"updated_at\"]),\n version=row[\"version\"],\n )\n for row in rows\n ]\n \n # ─── Search ─────────────────────────────────────────────\n \n def search(self, query: str, limit: int = 10) -> List[Tuple[AVMNode, float]]:\n \"\"\"\n FTS5 full-text search\n return [(node, score), ...]\n \n Auto-add prefix match (*) for mixed text\n \"\"\"\n # Add prefix match for each word, escape special chars\n import re\n # Remove FTS5 special characters\n clean_query = re.sub(r'[^\\w\\s]', ' ', query)\n terms = clean_query.split()\n if not terms:\n return []\n # Use OR to match any term (more inclusive)\n fts_query = \" OR \".join(f\"{term}*\" for term in terms if term)\n \n with self._conn() as conn:\n # FTS5 BM25 ranking\n rows = conn.execute(\"\"\"\n SELECT nodes.*, bm25(nodes_fts) as score\n FROM nodes_fts\n JOIN nodes ON nodes_fts.path = nodes.path\n WHERE nodes_fts MATCH ?\n ORDER BY score\n LIMIT ?\n \"\"\", (fts_query, limit)).fetchall()\n \n results = []\n for row in rows:\n node = AVMNode(\n path=row[\"path\"],\n content=row[\"content\"],\n meta=json.loads(row[\"meta\"]),\n node_type=NodeType(row[\"node_type\"]),\n created_at=datetime.fromisoformat(row[\"created_at\"]),\n updated_at=datetime.fromisoformat(row[\"updated_at\"]),\n version=row[\"version\"],\n )\n results.append((node, abs(row[\"score\"])))\n \n return results\n \n # ─── Relation graph ─────────────────────────────────────────────\n \n def add_edge(self, source: str, target: str,\n edge_type: EdgeType = EdgeType.RELATED,\n weight: float = 1.0,\n meta: Dict = None) -> Edge:\n \"\"\"Add edge\"\"\"\n edge = Edge(\n source=source,\n target=target,\n edge_type=edge_type,\n weight=weight,\n meta=meta or {},\n )\n \n with self._conn() as conn:\n conn.execute(\"\"\"\n INSERT OR REPLACE INTO edges \n (source, target, edge_type, weight, meta, created_at)\n VALUES (?, ?, ?, ?, ?, ?)\n \"\"\", (\n source, target, edge_type.value, weight,\n json.dumps(meta or {}),\n utcnow().isoformat(),\n ))\n \n return edge\n \n def get_links(self, path: str, \n direction: str = \"both\",\n edge_type: EdgeType = None) -> List[Edge]:\n \"\"\"Get edges for node\"\"\"\n with self._conn() as conn:\n edges = []\n \n if direction in (\"out\", \"both\"):\n sql = \"SELECT * FROM edges WHERE source = ?\"\n params = [path]\n if edge_type:\n sql += \" AND edge_type = ?\"\n params.append(edge_type.value)\n \n for row in conn.execute(sql, params):\n edges.append(Edge(\n source=row[\"source\"],\n target=row[\"target\"],\n edge_type=EdgeType(row[\"edge_type\"]),\n weight=row[\"weight\"],\n meta=json.loads(row[\"meta\"]),\n created_at=datetime.fromisoformat(row[\"created_at\"]),\n ))\n \n if direction in (\"in\", \"both\"):\n sql = \"SELECT * FROM edges WHERE target = ?\"\n params = [path]\n if edge_type:\n sql += \" AND edge_type = ?\"\n params.append(edge_type.value)\n \n for row in conn.execute(sql, params):\n edges.append(Edge(\n source=row[\"source\"],\n target=row[\"target\"],\n edge_type=EdgeType(row[\"edge_type\"]),\n weight=row[\"weight\"],\n meta=json.loads(row[\"meta\"]),\n created_at=datetime.fromisoformat(row[\"created_at\"]),\n ))\n \n return edges\n \n def load_graph(self) -> KVGraph:\n \"\"\"Load full graph to memory\"\"\"\n graph = KVGraph()\n \n with self._conn() as conn:\n for row in conn.execute(\"SELECT * FROM edges\"):\n graph.add_edge(\n row[\"source\"],\n row[\"target\"],\n EdgeType(row[\"edge_type\"]),\n row[\"weight\"],\n json.loads(row[\"meta\"]),\n )\n \n return graph\n \n # ─── Diff ─────────────────────────────────────────────\n \n def _compute_diff(self, old: str, new: str) -> str:\n \"\"\"Calculate unified diff\"\"\"\n diff = difflib.unified_diff(\n old.splitlines(keepends=True),\n new.splitlines(keepends=True),\n lineterm=\"\",\n )\n return \"\".join(diff)\n \n def _save_diff(self, conn, diff: NodeDiff):\n \"\"\"Save diff record\"\"\"\n conn.execute(\"\"\"\n INSERT INTO diffs \n (node_path, version, old_h, new_h, diff_content, changed_at, change_type)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n \"\"\", (\n diff.node_path,\n diff.version,\n diff.old_h,\n diff.new_h,\n diff.diff_content,\n diff.changed_at.isoformat(),\n diff.change_type,\n ))\n \n def get_history(self, path: str, limit: int = 10) -> List[NodeDiff]:\n \"\"\"Get change history\"\"\"\n with self._conn() as conn:\n rows = conn.execute(\"\"\"\n SELECT * FROM diffs \n WHERE node_path = ? \n ORDER BY version DESC \n LIMIT ?\n \"\"\", (path, limit)).fetchall()\n \n return [\n NodeDiff(\n node_path=row[\"node_path\"],\n version=row[\"version\"],\n old_h=row[\"old_h\"],\n new_h=row[\"new_h\"],\n diff_content=row[\"diff_content\"],\n changed_at=datetime.fromisoformat(row[\"changed_at\"]),\n change_type=row[\"change_type\"],\n )\n for row in rows\n ]\n \n def get_node_at_version(self, path: str, version: int) -> Optional[AVMNode]:\n \"\"\"Get node content at a specific version (time travel)\"\"\"\n with self._conn() as conn:\n # Get current node\n current = self.get_node(path)\n if not current:\n return None\n \n if current.version \u003c= version:\n return current\n \n # Reconstruct by reverse-applying diffs\n content = current.content\n diffs = conn.execute(\"\"\"\n SELECT diff_content, version FROM diffs \n WHERE node_path = ? AND version > ?\n ORDER BY version DESC\n \"\"\", (path, version)).fetchall()\n \n # Note: This is a simplified approach\n # For full reconstruction, we'd need forward diffs\n # For now, return current with version marker\n node = AVMNode(\n path=path,\n content=content,\n version=version,\n meta={**current.meta, '_reconstructed': True, '_target_version': version},\n )\n return node\n \n def get_node_at_time(self, path: str, as_of: datetime) -> Optional[AVMNode]:\n \"\"\"Get node content at a specific point in time\"\"\"\n with self._conn() as conn:\n # Find the version that was current at that time\n row = conn.execute(\"\"\"\n SELECT version FROM diffs \n WHERE node_path = ? AND changed_at \u003c= ?\n ORDER BY changed_at DESC\n LIMIT 1\n \"\"\", (path, as_of.isoformat())).fetchone()\n \n if row:\n return self.get_node_at_version(path, row[0])\n \n # No diffs before that time, might be the original\n return self.get_node(path)\n \n # ─── Statistics ─────────────────────────────────────────────\n \n def stats(self) -> Dict[str, Any]:\n \"\"\"Get storage statistics\"\"\"\n with self._conn() as conn:\n node_count = conn.execute(\"SELECT COUNT(*) FROM nodes\").fetchone()[0]\n edge_count = conn.execute(\"SELECT COUNT(*) FROM edges\").fetchone()[0]\n diff_count = conn.execute(\"SELECT COUNT(*) FROM diffs\").fetchone()[0]\n \n # Stats by path prefix\n prefix_stats = {}\n for row in conn.execute(\"\"\"\n SELECT \n CASE \n WHEN path LIKE '/live%' THEN '/live'\n WHEN path LIKE '/research%' THEN '/research'\n WHEN path LIKE '/memory%' THEN '/memory'\n ELSE '/other'\n END as prefix,\n COUNT(*) as cnt\n FROM nodes GROUP BY prefix\n \"\"\"):\n prefix_stats[row[\"prefix\"]] = row[\"cnt\"]\n \n return {\n \"nodes\": node_count,\n \"edges\": edge_count,\n \"diffs\": diff_count,\n \"by_prefix\": prefix_stats,\n \"db_path\": self.db_path,\n }\n","content_type":"text/x-python; charset=utf-8","language":"python","size":20203,"content_sha256":"5123566bfd562387f735aebc0ad09f5097581759f83f12f9c1c4428a7e27bf48"},{"filename":"avm/subscriptions.py","content":"\"\"\"\navm/subscriptions.py - Persistent subscriptions with throttling/batching\n\nSubscription modes:\n- realtime: Push immediately via tell/hook\n- throttled: Aggregate within window, push at end\n- batched: No push, accumulate for /:inbox\n- digest: Scheduled summaries (e.g., every 2h)\n\nUsage:\n # FUSE\n echo \"pattern=/memory/shared/*;mode=throttled;throttle=60\" > avm/:subscribe\n cat avm/:subscriptions\n \n # CLI\n avm subscribe /memory/shared/* --mode throttled --throttle 60\n avm subscriptions list\n\"\"\"\n\nimport json\nimport time\nimport threading\nimport sqlite3\nfrom datetime import datetime, timedelta\nfrom typing import List, Dict, Optional, Callable\nfrom dataclasses import dataclass, field, asdict\nfrom pathlib import Path\nfrom enum import Enum\nimport fnmatch\n\nfrom .utils import utcnow\n\n\nclass SubscriptionMode(Enum):\n REALTIME = \"realtime\" # Push immediately\n THROTTLED = \"throttled\" # Aggregate within window\n BATCHED = \"batched\" # No push, wait for poll\n DIGEST = \"digest\" # Scheduled summary\n\n\n@dataclass\nclass Subscription:\n \"\"\"A subscription to path pattern changes\"\"\"\n id: int\n agent_id: str\n pattern: str\n mode: SubscriptionMode\n throttle_seconds: int = 60\n digest_cron: Optional[str] = None\n webhook_url: Optional[str] = None # HTTP POST endpoint\n enabled: bool = True\n created_at: str = \"\"\n \n def __post_init__(self):\n if isinstance(self.mode, str):\n self.mode = SubscriptionMode(self.mode)\n if not self.created_at:\n self.created_at = utcnow().isoformat()\n\n\n@dataclass\nclass PendingNotification:\n \"\"\"Accumulated notifications waiting to be sent\"\"\"\n subscription_id: int\n agent_id: str\n paths: List[str] = field(default_factory=list)\n first_event: str = \"\"\n last_event: str = \"\"\n count: int = 0\n\n\nclass SubscriptionStore:\n \"\"\"SQLite-backed subscription storage\"\"\"\n \n def __init__(self, db_path: str = None):\n if db_path is None:\n db_path = str(Path.home() / \".local\" / \"share\" / \"avm\" / \"subscriptions.db\")\n Path(db_path).parent.mkdir(parents=True, exist_ok=True)\n self.db_path = db_path\n self._init_tables()\n \n # In-memory state for throttling\n self._pending: Dict[int, PendingNotification] = {}\n self._throttle_timers: Dict[int, threading.Timer] = {}\n self._lock = threading.Lock()\n \n # Callback for sending notifications\n self._notify_callback: Optional[Callable] = None\n \n def _init_tables(self):\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS subscriptions (\n id INTEGER PRIMARY KEY,\n agent_id TEXT NOT NULL,\n pattern TEXT NOT NULL,\n mode TEXT NOT NULL DEFAULT 'batched',\n throttle_seconds INTEGER DEFAULT 60,\n digest_cron TEXT,\n webhook_url TEXT,\n enabled INTEGER DEFAULT 1,\n created_at TEXT NOT NULL,\n UNIQUE(agent_id, pattern)\n )\n \"\"\")\n # Migration: add webhook_url if missing\n cols = [r[1] for r in conn.execute(\"PRAGMA table_info(subscriptions)\")]\n if 'webhook_url' not in cols:\n conn.execute(\"ALTER TABLE subscriptions ADD COLUMN webhook_url TEXT\")\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS pending_events (\n id INTEGER PRIMARY KEY,\n subscription_id INTEGER NOT NULL,\n path TEXT NOT NULL,\n event_type TEXT NOT NULL,\n timestamp TEXT NOT NULL,\n delivered INTEGER DEFAULT 0,\n FOREIGN KEY(subscription_id) REFERENCES subscriptions(id)\n )\n \"\"\")\n conn.execute(\"CREATE INDEX IF NOT EXISTS idx_pending_sub ON pending_events(subscription_id, delivered)\")\n \n def set_notify_callback(self, callback: Callable):\n \"\"\"Set callback for sending notifications\"\"\"\n self._notify_callback = callback\n \n def subscribe(self, agent_id: str, pattern: str, \n mode: SubscriptionMode = SubscriptionMode.BATCHED,\n throttle_seconds: int = 60,\n digest_cron: str = None,\n webhook_url: str = None) -> Subscription:\n \"\"\"Create or update a subscription\"\"\"\n now = utcnow().isoformat()\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\"\"\"\n INSERT INTO subscriptions (agent_id, pattern, mode, throttle_seconds, digest_cron, webhook_url, created_at)\n VALUES (?, ?, ?, ?, ?, ?, ?)\n ON CONFLICT(agent_id, pattern) DO UPDATE SET\n mode = excluded.mode,\n throttle_seconds = excluded.throttle_seconds,\n digest_cron = excluded.digest_cron,\n webhook_url = excluded.webhook_url,\n enabled = 1\n \"\"\", (agent_id, pattern, mode.value, throttle_seconds, digest_cron, webhook_url, now))\n \n row = conn.execute(\n \"SELECT id, agent_id, pattern, mode, throttle_seconds, digest_cron, webhook_url, enabled, created_at FROM subscriptions WHERE agent_id = ? AND pattern = ?\",\n (agent_id, pattern)\n ).fetchone()\n \n return Subscription(\n id=row[0], agent_id=row[1], pattern=row[2],\n mode=row[3], throttle_seconds=row[4],\n digest_cron=row[5], webhook_url=row[6], enabled=bool(row[7]), created_at=row[8]\n )\n \n def unsubscribe(self, agent_id: str, pattern: str):\n \"\"\"Remove a subscription\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\n \"DELETE FROM subscriptions WHERE agent_id = ? AND pattern = ?\",\n (agent_id, pattern)\n )\n \n def list_subscriptions(self, agent_id: str = None) -> List[Subscription]:\n \"\"\"List subscriptions, optionally filtered by agent\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n if agent_id:\n rows = conn.execute(\n \"SELECT id, agent_id, pattern, mode, throttle_seconds, digest_cron, webhook_url, enabled, created_at FROM subscriptions WHERE agent_id = ? AND enabled = 1\",\n (agent_id,)\n ).fetchall()\n else:\n rows = conn.execute(\n \"SELECT id, agent_id, pattern, mode, throttle_seconds, digest_cron, webhook_url, enabled, created_at FROM subscriptions WHERE enabled = 1\"\n ).fetchall()\n \n return [\n Subscription(id=r[0], agent_id=r[1], pattern=r[2], mode=r[3],\n throttle_seconds=r[4], digest_cron=r[5], webhook_url=r[6],\n enabled=bool(r[7]), created_at=r[8])\n for r in rows\n ]\n \n def get_matching_subscriptions(self, path: str) -> List[Subscription]:\n \"\"\"Find all subscriptions that match a given path\"\"\"\n all_subs = self.list_subscriptions()\n matching = []\n for sub in all_subs:\n if fnmatch.fnmatch(path, sub.pattern):\n matching.append(sub)\n return matching\n \n def on_write(self, path: str, author: str = None):\n \"\"\"Called when a path is written - triggers subscription notifications\"\"\"\n subs = self.get_matching_subscriptions(path)\n now = utcnow().isoformat()\n \n for sub in subs:\n # Don't notify the author\n if author and sub.agent_id == author:\n continue\n \n if sub.mode == SubscriptionMode.REALTIME:\n self._send_immediate(sub, path)\n \n elif sub.mode == SubscriptionMode.THROTTLED:\n self._add_to_throttle(sub, path, now)\n \n elif sub.mode == SubscriptionMode.BATCHED:\n self._store_pending(sub.id, path, now)\n \n elif sub.mode == SubscriptionMode.DIGEST:\n self._store_pending(sub.id, path, now)\n \n def _send_immediate(self, sub: Subscription, path: str):\n \"\"\"Send notification immediately via callback or webhook\"\"\"\n # Try webhook first if configured\n if sub.webhook_url:\n self._send_webhook(sub.webhook_url, {\n \"event\": \"write\",\n \"path\": path,\n \"pattern\": sub.pattern,\n \"agent_id\": sub.agent_id,\n \"timestamp\": utcnow().isoformat(),\n })\n elif self._notify_callback:\n self._notify_callback(sub.agent_id, f\"[update] {path}\")\n \n def _add_to_throttle(self, sub: Subscription, path: str, timestamp: str):\n \"\"\"Add to throttle buffer, schedule flush\"\"\"\n with self._lock:\n if sub.id not in self._pending:\n self._pending[sub.id] = PendingNotification(\n subscription_id=sub.id,\n agent_id=sub.agent_id,\n paths=[path],\n first_event=timestamp,\n last_event=timestamp,\n count=1\n )\n # Schedule flush\n timer = threading.Timer(sub.throttle_seconds, self._flush_throttle, args=[sub.id])\n timer.daemon = True\n timer.start()\n self._throttle_timers[sub.id] = timer\n else:\n pending = self._pending[sub.id]\n if path not in pending.paths:\n pending.paths.append(path)\n pending.last_event = timestamp\n pending.count += 1\n \n def _flush_throttle(self, sub_id: int):\n \"\"\"Flush throttled notifications\"\"\"\n with self._lock:\n pending = self._pending.pop(sub_id, None)\n self._throttle_timers.pop(sub_id, None)\n \n if not pending:\n return\n \n # Get subscription for webhook URL\n sub = self._get_subscription_by_id(sub_id)\n \n if sub and sub.webhook_url:\n # Send webhook with batched updates\n self._send_webhook(sub.webhook_url, {\n \"event\": \"batch_update\",\n \"paths\": pending.paths,\n \"count\": pending.count,\n \"pattern\": sub.pattern if sub else None,\n \"agent_id\": pending.agent_id,\n \"first_event\": pending.first_event,\n \"last_event\": pending.last_event,\n })\n elif self._notify_callback:\n if pending.count == 1:\n msg = f\"[update] {pending.paths[0]}\"\n else:\n msg = f\"[{pending.count} updates] {', '.join(pending.paths[:3])}\"\n if len(pending.paths) > 3:\n msg += f\" +{len(pending.paths) - 3} more\"\n self._notify_callback(pending.agent_id, msg)\n \n def _get_subscription_by_id(self, sub_id: int) -> Optional[Subscription]:\n \"\"\"Get subscription by ID\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n row = conn.execute(\n \"SELECT id, agent_id, pattern, mode, throttle_seconds, digest_cron, webhook_url, enabled, created_at FROM subscriptions WHERE id = ?\",\n (sub_id,)\n ).fetchone()\n \n if row:\n return Subscription(\n id=row[0], agent_id=row[1], pattern=row[2], mode=row[3],\n throttle_seconds=row[4], digest_cron=row[5], webhook_url=row[6],\n enabled=bool(row[7]), created_at=row[8]\n )\n return None\n \n def _send_webhook(self, url: str, payload: dict, timeout: int = 10):\n \"\"\"Send webhook POST request (fire-and-forget in thread)\"\"\"\n import urllib.request\n import urllib.error\n \n def _send():\n try:\n data = json.dumps(payload).encode('utf-8')\n req = urllib.request.Request(\n url,\n data=data,\n headers={'Content-Type': 'application/json'},\n method='POST'\n )\n with urllib.request.urlopen(req, timeout=timeout) as resp:\n pass # Fire and forget\n except Exception as e:\n # Log but don't fail\n print(f\"[subscription webhook] Failed to POST to {url}: {e}\")\n \n # Send in background thread\n thread = threading.Thread(target=_send, daemon=True)\n thread.start()\n \n def _store_pending(self, sub_id: int, path: str, timestamp: str):\n \"\"\"Store for later retrieval (batched/digest)\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\n \"INSERT INTO pending_events (subscription_id, path, event_type, timestamp) VALUES (?, ?, ?, ?)\",\n (sub_id, path, \"write\", timestamp)\n )\n \n def get_pending(self, agent_id: str, mark_delivered: bool = False) -> List[Dict]:\n \"\"\"Get pending notifications for an agent\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n rows = conn.execute(\"\"\"\n SELECT e.id, e.path, e.event_type, e.timestamp, s.pattern\n FROM pending_events e\n JOIN subscriptions s ON e.subscription_id = s.id\n WHERE s.agent_id = ? AND e.delivered = 0\n ORDER BY e.timestamp DESC\n \"\"\", (agent_id,)).fetchall()\n \n if mark_delivered and rows:\n ids = [r[0] for r in rows]\n placeholders = ','.join('?' * len(ids))\n conn.execute(f\"UPDATE pending_events SET delivered = 1 WHERE id IN ({placeholders})\", ids)\n \n return [\n {\"path\": r[1], \"event_type\": r[2], \"timestamp\": r[3], \"pattern\": r[4]}\n for r in rows\n ]\n \n def clear_pending(self, agent_id: str):\n \"\"\"Clear all pending notifications for an agent\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\"\"\"\n UPDATE pending_events SET delivered = 1\n WHERE subscription_id IN (SELECT id FROM subscriptions WHERE agent_id = ?)\n \"\"\", (agent_id,))\n\n\n# Singleton instance\n_subscription_store: Optional[SubscriptionStore] = None\n\ndef get_subscription_store() -> SubscriptionStore:\n global _subscription_store\n if _subscription_store is None:\n _subscription_store = SubscriptionStore()\n return _subscription_store\n","content_type":"text/x-python; charset=utf-8","language":"python","size":14745,"content_sha256":"2fdc4c0f1116c0293c5962733ae7f19d03156851240c4e966911c20534330c26"},{"filename":"avm/telemetry.py","content":"\"\"\"Telemetry and observability for AVM operations.\"\"\"\n\nimport json\nimport time\nimport sqlite3\nfrom datetime import datetime, timezone\nfrom typing import Dict, Any, Optional, List\nfrom pathlib import Path\nfrom contextlib import contextmanager\nfrom dataclasses import dataclass, asdict\n\n\n@dataclass\nclass OpLog:\n \"\"\"Single operation log entry.\"\"\"\n ts: str\n op: str\n agent: str\n path: Optional[str] = None\n query: Optional[str] = None\n tokens_in: Optional[int] = None\n tokens_out: Optional[int] = None\n results: Optional[int] = None\n latency_ms: Optional[float] = None\n success: bool = True\n error: Optional[str] = None\n meta: Optional[Dict] = None\n\n\nclass Telemetry:\n \"\"\"\n AVM telemetry collector.\n \n Logs operations to SQLite for analysis and benchmarking.\n \"\"\"\n \n def __init__(self, db_path: str = None):\n if db_path is None:\n data_dir = Path.home() / \".local\" / \"share\" / \"avm\"\n data_dir.mkdir(parents=True, exist_ok=True)\n db_path = str(data_dir / \"telemetry.db\")\n \n self.db_path = db_path\n self._init_db()\n \n def _init_db(self):\n \"\"\"Initialize telemetry table.\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS op_logs (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n ts TEXT NOT NULL,\n op TEXT NOT NULL,\n agent TEXT NOT NULL,\n path TEXT,\n query TEXT,\n tokens_in INTEGER,\n tokens_out INTEGER,\n results INTEGER,\n latency_ms REAL,\n success INTEGER DEFAULT 1,\n error TEXT,\n meta TEXT\n )\n \"\"\")\n conn.execute(\"CREATE INDEX IF NOT EXISTS idx_op_ts ON op_logs(ts)\")\n conn.execute(\"CREATE INDEX IF NOT EXISTS idx_op_agent ON op_logs(agent)\")\n conn.execute(\"CREATE INDEX IF NOT EXISTS idx_op_op ON op_logs(op)\")\n \n def log(self, entry: OpLog):\n \"\"\"Log an operation.\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.execute(\"\"\"\n INSERT INTO op_logs \n (ts, op, agent, path, query, tokens_in, tokens_out, \n results, latency_ms, success, error, meta)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)\n \"\"\", (\n entry.ts,\n entry.op,\n entry.agent,\n entry.path,\n entry.query,\n entry.tokens_in,\n entry.tokens_out,\n entry.results,\n entry.latency_ms,\n 1 if entry.success else 0,\n entry.error,\n json.dumps(entry.meta) if entry.meta else None\n ))\n \n @contextmanager\n def track(self, op: str, agent: str, **kwargs):\n \"\"\"\n Context manager to track an operation.\n \n Usage:\n with telemetry.track(\"recall\", \"akashi\", query=\"test\") as t:\n result = do_recall()\n t[\"results\"] = len(result)\n t[\"tokens_out\"] = count_tokens(result)\n \"\"\"\n start = time.perf_counter()\n ctx = {\n \"op\": op,\n \"agent\": agent,\n \"success\": True,\n \"error\": None,\n **kwargs\n }\n \n try:\n yield ctx\n except Exception as e:\n ctx[\"success\"] = False\n ctx[\"error\"] = str(e)\n raise\n finally:\n latency_ms = (time.perf_counter() - start) * 1000\n \n entry = OpLog(\n ts=datetime.now(timezone.utc).isoformat(),\n op=ctx[\"op\"],\n agent=ctx[\"agent\"],\n path=ctx.get(\"path\"),\n query=ctx.get(\"query\"),\n tokens_in=ctx.get(\"tokens_in\"),\n tokens_out=ctx.get(\"tokens_out\"),\n results=ctx.get(\"results\"),\n latency_ms=latency_ms,\n success=ctx[\"success\"],\n error=ctx.get(\"error\"),\n meta=ctx.get(\"meta\")\n )\n self.log(entry)\n \n def query(\n self,\n agent: str = None,\n op: str = None,\n since: str = None,\n limit: int = 100\n ) -> List[Dict]:\n \"\"\"Query operation logs.\"\"\"\n sql = \"SELECT * FROM op_logs WHERE 1=1\"\n params = []\n \n if agent:\n sql += \" AND agent = ?\"\n params.append(agent)\n if op:\n sql += \" AND op = ?\"\n params.append(op)\n if since:\n sql += \" AND ts >= ?\"\n params.append(since)\n \n sql += \" ORDER BY ts DESC LIMIT ?\"\n params.append(limit)\n \n with sqlite3.connect(self.db_path) as conn:\n conn.row_factory = sqlite3.Row\n rows = conn.execute(sql, params).fetchall()\n return [dict(r) for r in rows]\n \n def stats(self, agent: str = None, since: str = None) -> Dict:\n \"\"\"Get aggregated statistics.\"\"\"\n where = \"WHERE 1=1\"\n params = []\n \n if agent:\n where += \" AND agent = ?\"\n params.append(agent)\n if since:\n where += \" AND ts >= ?\"\n params.append(since)\n \n with sqlite3.connect(self.db_path) as conn:\n # Op counts\n rows = conn.execute(f\"\"\"\n SELECT op, COUNT(*) as count,\n AVG(latency_ms) as avg_latency,\n SUM(tokens_in) as total_tokens_in,\n SUM(tokens_out) as total_tokens_out\n FROM op_logs {where}\n GROUP BY op\n \"\"\", params).fetchall()\n \n by_op = {}\n for row in rows:\n by_op[row[0]] = {\n \"count\": row[1],\n \"avg_latency_ms\": round(row[2], 2) if row[2] else None,\n \"total_tokens_in\": row[3],\n \"total_tokens_out\": row[4]\n }\n \n # Error rate\n total = conn.execute(\n f\"SELECT COUNT(*) FROM op_logs {where}\", params\n ).fetchone()[0]\n \n errors = conn.execute(\n f\"SELECT COUNT(*) FROM op_logs {where} AND success = 0\", params\n ).fetchone()[0]\n \n return {\n \"total_ops\": total,\n \"error_rate\": round(errors / total, 4) if total else 0,\n \"by_op\": by_op\n }\n \n def token_savings(self, agent: str = None, since: str = None) -> Dict:\n \"\"\"Calculate token savings from recall operations.\"\"\"\n where = \"WHERE op = 'recall'\"\n params = []\n \n if agent:\n where += \" AND agent = ?\"\n params.append(agent)\n if since:\n where += \" AND ts >= ?\"\n params.append(since)\n \n with sqlite3.connect(self.db_path) as conn:\n row = conn.execute(f\"\"\"\n SELECT \n COUNT(*) as recalls,\n SUM(tokens_in) as tokens_returned,\n SUM(tokens_out) as tokens_available\n FROM op_logs {where}\n \"\"\", params).fetchone()\n \n recalls = row[0] or 0\n tokens_returned = row[1] or 0\n tokens_available = row[2] or 0\n \n if tokens_available > 0:\n savings_pct = round((1 - tokens_returned / tokens_available) * 100, 1)\n else:\n savings_pct = 0\n \n return {\n \"recalls\": recalls,\n \"tokens_returned\": tokens_returned,\n \"tokens_available\": tokens_available,\n \"tokens_saved\": tokens_available - tokens_returned,\n \"savings_pct\": savings_pct\n }\n\n\n# Global instance\n_telemetry: Optional[Telemetry] = None\n\n\ndef get_telemetry() -> Telemetry:\n \"\"\"Get global telemetry instance.\"\"\"\n global _telemetry\n if _telemetry is None:\n _telemetry = Telemetry()\n return _telemetry\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8339,"content_sha256":"1b3184b70557296ed840340b90fb1ac46d4dacc66fa5aa01368eb063ffc77026"},{"filename":"avm/tell.py","content":"\"\"\"\navm/tell.py - Cross-agent messaging system\n\nAllows agents to send important messages to each other that get\ninjected into the recipient's next read operation.\n\nPriority levels:\n- urgent: Injected into next read of ANY file\n- normal: Shown when reading /:inbox or /tell/@me\n- low: Only shown when explicitly reading /:inbox\n\nHooks:\n- Shell: Execute command when tell is sent\n- HTTP: POST to webhook URL\n- OpenClaw: Send via sessions_send\n\nUsage:\n # Write a tell\n echo \"important message\" > avm/tell/kearsarge?priority=urgent\n echo \"fyi\" > avm/tell/kearsarge\n echo \"message\" > avm/tell/@all # Broadcast\n\n # Read tells\n cat avm/:inbox # All unread tells\n cat avm/tell/@me # Same as /:inbox\n cat avm/tell/@me?mark=read # Mark all as read\n\"\"\"\n\nimport json\nimport sqlite3\nfrom datetime import datetime, timezone\nfrom typing import List, Dict, Optional, Any\nfrom dataclasses import dataclass, asdict\nfrom enum import Enum\n\n\nclass TellPriority(Enum):\n URGENT = \"urgent\" # Inject into next read\n NORMAL = \"normal\" # Show in inbox\n LOW = \"low\" # Only explicit inbox read\n\n\n@dataclass\nclass Tell:\n \"\"\"A message from one agent to another\"\"\"\n id: int\n from_agent: str\n to_agent: str # Can be specific agent or \"@all\"\n content: str\n priority: TellPriority\n created_at: str\n read_at: Optional[str] = None\n expires_at: Optional[str] = None\n ack_required: bool = False\n meta: Dict[str, Any] = None\n \n def __post_init__(self):\n if isinstance(self.priority, str):\n self.priority = TellPriority(self.priority)\n if self.meta is None:\n self.meta = {}\n \n def to_dict(self) -> Dict:\n d = asdict(self)\n d['priority'] = self.priority.value\n return d\n \n def format_header(self) -> str:\n \"\"\"Format as markdown header for injection\"\"\"\n priority_emoji = {\n TellPriority.URGENT: \"🔴\",\n TellPriority.NORMAL: \"🟡\", \n TellPriority.LOW: \"⚪\"\n }\n emoji = priority_emoji.get(self.priority, \"\")\n return f\"## {emoji} From: {self.from_agent} @ {self.created_at}\\n{self.content}\"\n\n\nclass TellStore:\n \"\"\"SQLite storage for tells\"\"\"\n \n SCHEMA = \"\"\"\n CREATE TABLE IF NOT EXISTS tells (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n from_agent TEXT NOT NULL,\n to_agent TEXT NOT NULL,\n content TEXT NOT NULL,\n priority TEXT NOT NULL DEFAULT 'normal',\n created_at TEXT NOT NULL,\n read_at TEXT,\n expires_at TEXT,\n ack_required INTEGER DEFAULT 0,\n meta TEXT DEFAULT '{}'\n );\n \n CREATE INDEX IF NOT EXISTS idx_tells_to_agent ON tells(to_agent);\n CREATE INDEX IF NOT EXISTS idx_tells_read_at ON tells(read_at);\n CREATE INDEX IF NOT EXISTS idx_tells_priority ON tells(priority);\n \"\"\"\n \n def __init__(self, db_path: str):\n self.db_path = db_path\n self._init_db()\n \n def _init_db(self):\n \"\"\"Initialize tell tables\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n conn.executescript(self.SCHEMA)\n \n def _row_to_tell(self, row: tuple) -> Tell:\n \"\"\"Convert database row to Tell object\"\"\"\n return Tell(\n id=row[0],\n from_agent=row[1],\n to_agent=row[2],\n content=row[3],\n priority=TellPriority(row[4]),\n created_at=row[5],\n read_at=row[6],\n expires_at=row[7],\n ack_required=bool(row[8]),\n meta=json.loads(row[9]) if row[9] else {}\n )\n \n def send(self, from_agent: str, to_agent: str, content: str,\n priority: TellPriority = TellPriority.NORMAL,\n expires_at: str = None, ack_required: bool = False,\n meta: Dict = None) -> Tell:\n \"\"\"Send a tell to an agent\"\"\"\n now = datetime.now(timezone.utc).isoformat()\n meta_json = json.dumps(meta or {})\n \n with sqlite3.connect(self.db_path) as conn:\n cursor = conn.execute(\"\"\"\n INSERT INTO tells \n (from_agent, to_agent, content, priority, created_at, expires_at, ack_required, meta)\n VALUES (?, ?, ?, ?, ?, ?, ?, ?)\n \"\"\", (from_agent, to_agent, content, priority.value, now, expires_at, int(ack_required), meta_json))\n \n tell_id = cursor.lastrowid\n \n tell = Tell(\n id=tell_id,\n from_agent=from_agent,\n to_agent=to_agent,\n content=content,\n priority=priority,\n created_at=now,\n expires_at=expires_at,\n ack_required=ack_required,\n meta=meta or {}\n )\n \n # Trigger hooks (outside transaction)\n try:\n hook_manager = get_hook_manager()\n hook_manager.trigger(tell)\n except Exception:\n pass # Don't fail send if hook fails\n \n return tell\n \n def get_unread(self, agent_id: str, priority: TellPriority = None,\n include_broadcast: bool = True) -> List[Tell]:\n \"\"\"Get unread tells for an agent\"\"\"\n now = datetime.now(timezone.utc).isoformat()\n \n with sqlite3.connect(self.db_path) as conn:\n conn.row_factory = sqlite3.Row\n \n # Build query\n conditions = [\"read_at IS NULL\"]\n params = []\n \n # Agent filter (including @all broadcasts)\n if include_broadcast:\n conditions.append(\"(to_agent = ? OR to_agent = '@all')\")\n else:\n conditions.append(\"to_agent = ?\")\n params.append(agent_id)\n \n # Priority filter\n if priority:\n conditions.append(\"priority = ?\")\n params.append(priority.value)\n \n # Expiration filter\n conditions.append(\"(expires_at IS NULL OR expires_at > ?)\")\n params.append(now)\n \n query = f\"\"\"\n SELECT id, from_agent, to_agent, content, priority, \n created_at, read_at, expires_at, ack_required, meta\n FROM tells \n WHERE {' AND '.join(conditions)}\n ORDER BY \n CASE priority \n WHEN 'urgent' THEN 0 \n WHEN 'normal' THEN 1 \n ELSE 2 \n END,\n created_at DESC\n \"\"\"\n \n rows = conn.execute(query, params).fetchall()\n return [self._row_to_tell(tuple(row)) for row in rows]\n \n def get_urgent_unread(self, agent_id: str) -> List[Tell]:\n \"\"\"Get only urgent unread tells\"\"\"\n return self.get_unread(agent_id, priority=TellPriority.URGENT)\n \n def mark_read(self, tell_ids: List[int]) -> int:\n \"\"\"Mark tells as read\"\"\"\n if not tell_ids:\n return 0\n \n now = datetime.now(timezone.utc).isoformat()\n placeholders = ','.join('?' * len(tell_ids))\n \n with sqlite3.connect(self.db_path) as conn:\n cursor = conn.execute(f\"\"\"\n UPDATE tells SET read_at = ?\n WHERE id IN ({placeholders}) AND read_at IS NULL\n \"\"\", [now] + tell_ids)\n return cursor.rowcount\n \n def mark_all_read(self, agent_id: str) -> int:\n \"\"\"Mark all tells for an agent as read\"\"\"\n now = datetime.now(timezone.utc).isoformat()\n \n with sqlite3.connect(self.db_path) as conn:\n cursor = conn.execute(\"\"\"\n UPDATE tells SET read_at = ?\n WHERE (to_agent = ? OR to_agent = '@all') AND read_at IS NULL\n \"\"\", (now, agent_id))\n return cursor.rowcount\n \n def get_all(self, agent_id: str, limit: int = 50) -> List[Tell]:\n \"\"\"Get all tells for an agent (read and unread)\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n rows = conn.execute(\"\"\"\n SELECT id, from_agent, to_agent, content, priority,\n created_at, read_at, expires_at, ack_required, meta\n FROM tells\n WHERE to_agent = ? OR to_agent = '@all'\n ORDER BY created_at DESC\n LIMIT ?\n \"\"\", (agent_id, limit)).fetchall()\n return [self._row_to_tell(row) for row in rows]\n \n def delete_expired(self) -> int:\n \"\"\"Delete expired tells\"\"\"\n now = datetime.now(timezone.utc).isoformat()\n with sqlite3.connect(self.db_path) as conn:\n cursor = conn.execute(\"\"\"\n DELETE FROM tells WHERE expires_at IS NOT NULL AND expires_at \u003c ?\n \"\"\", (now,))\n return cursor.rowcount\n \n def stats(self, agent_id: str = None) -> Dict:\n \"\"\"Get tell statistics\"\"\"\n with sqlite3.connect(self.db_path) as conn:\n if agent_id:\n total = conn.execute(\"\"\"\n SELECT COUNT(*) FROM tells WHERE to_agent = ? OR to_agent = '@all'\n \"\"\", (agent_id,)).fetchone()[0]\n unread = conn.execute(\"\"\"\n SELECT COUNT(*) FROM tells \n WHERE (to_agent = ? OR to_agent = '@all') AND read_at IS NULL\n \"\"\", (agent_id,)).fetchone()[0]\n else:\n total = conn.execute(\"SELECT COUNT(*) FROM tells\").fetchone()[0]\n unread = conn.execute(\"SELECT COUNT(*) FROM tells WHERE read_at IS NULL\").fetchone()[0]\n \n return {\n \"total\": total,\n \"unread\": unread,\n \"read\": total - unread\n }\n\n\ndef format_tells_for_injection(tells: List[Tell]) -> str:\n \"\"\"Format tells as a header block for file injection\"\"\"\n if not tells:\n return \"\"\n \n lines = [\n \"# ⚠️ UNREAD MESSAGES\",\n \"\"\n ]\n \n for tell in tells:\n lines.append(tell.format_header())\n lines.append(\"\")\n \n lines.append(\"---\")\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n\n\ndef format_inbox(tells: List[Tell], show_read: bool = False) -> str:\n \"\"\"Format tells for inbox view\"\"\"\n if not tells:\n return \"# 📬 Inbox\\n\\nNo messages.\\n\"\n \n lines = [\"# 📬 Inbox\", \"\"]\n \n unread = [t for t in tells if not t.read_at]\n read = [t for t in tells if t.read_at]\n \n if unread:\n lines.append(f\"## Unread ({len(unread)})\")\n lines.append(\"\")\n for tell in unread:\n lines.append(tell.format_header())\n lines.append(\"\")\n \n if show_read and read:\n lines.append(f\"## Read ({len(read)})\")\n lines.append(\"\")\n for tell in read[:10]: # Limit read messages\n lines.append(tell.format_header())\n lines.append(\"\")\n \n return \"\\n\".join(lines)\n\n\n# ============================================================\n# Hook System\n# ============================================================\n\nclass HookType(Enum):\n \"\"\"Types of hooks that can be triggered\"\"\"\n SHELL = \"shell\" # Execute shell command\n HTTP = \"http\" # POST to webhook URL\n OPENCLAW = \"openclaw\" # Send via OpenClaw sessions_send\n\n\n@dataclass\nclass HookConfig:\n \"\"\"Configuration for a single hook\"\"\"\n type: HookType\n target: str # Command, URL, or session key\n enabled: bool = True\n timeout: int = 10 # seconds\n \n def __post_init__(self):\n if isinstance(self.type, str):\n self.type = HookType(self.type)\n\n\nclass HookManager:\n \"\"\"\n Manages hooks for tell notifications.\n \n Config example (in avm.yaml or hooks.yaml):\n ```yaml\n hooks:\n kearsarge:\n on_tell:\n type: shell\n target: \"openclaw notify kearsarge\"\n yuze:\n on_tell:\n type: http\n target: \"http://localhost:3000/webhook\"\n akashi:\n on_tell:\n type: openclaw\n target: \"agent:akashi\"\n ```\n \n Or via virtual files:\n ```bash\n echo \"shell:openclaw notify kearsarge\" > avm/hooks/kearsarge\n echo \"http:http://localhost:3000/webhook\" > avm/hooks/yuze\n ```\n \"\"\"\n \n def __init__(self, config: Dict[str, Dict] = None, db_path: str = None):\n self._hooks: Dict[str, HookConfig] = {}\n self._db_path = db_path\n if db_path:\n self._init_db()\n self._load_from_db()\n if config:\n self._load_config(config)\n \n def _init_db(self):\n \"\"\"Initialize hooks table\"\"\"\n with sqlite3.connect(self._db_path) as conn:\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS hooks (\n agent_id TEXT PRIMARY KEY,\n type TEXT NOT NULL,\n target TEXT NOT NULL,\n enabled INTEGER DEFAULT 1,\n timeout INTEGER DEFAULT 10,\n created_at TEXT\n )\n \"\"\")\n \n def _load_from_db(self):\n \"\"\"Load hooks from database\"\"\"\n try:\n with sqlite3.connect(self._db_path) as conn:\n rows = conn.execute(\n \"SELECT agent_id, type, target, enabled, timeout FROM hooks\"\n ).fetchall()\n for row in rows:\n self._hooks[row[0]] = HookConfig(\n type=HookType(row[1]),\n target=row[2],\n enabled=bool(row[3]),\n timeout=row[4]\n )\n except Exception:\n pass # Table might not exist yet\n \n def _save_to_db(self, agent_id: str, hook: HookConfig):\n \"\"\"Save hook to database\"\"\"\n if not self._db_path:\n return\n now = datetime.now(timezone.utc).isoformat()\n with sqlite3.connect(self._db_path) as conn:\n conn.execute(\"\"\"\n INSERT OR REPLACE INTO hooks (agent_id, type, target, enabled, timeout, created_at)\n VALUES (?, ?, ?, ?, ?, ?)\n \"\"\", (agent_id, hook.type.value, hook.target, int(hook.enabled), hook.timeout, now))\n \n def _delete_from_db(self, agent_id: str):\n \"\"\"Delete hook from database\"\"\"\n if not self._db_path:\n return\n with sqlite3.connect(self._db_path) as conn:\n conn.execute(\"DELETE FROM hooks WHERE agent_id = ?\", (agent_id,))\n \n def _load_config(self, config: Dict):\n \"\"\"Load hooks from config dict\"\"\"\n hooks_config = config.get('hooks', {})\n for agent_id, agent_hooks in hooks_config.items():\n if 'on_tell' in agent_hooks:\n hook_data = agent_hooks['on_tell']\n if isinstance(hook_data, str):\n # Simple format: just a command\n self._hooks[agent_id] = HookConfig(\n type=HookType.SHELL,\n target=hook_data\n )\n elif isinstance(hook_data, dict):\n self._hooks[agent_id] = HookConfig(\n type=HookType(hook_data.get('type', 'shell')),\n target=hook_data['target'],\n enabled=hook_data.get('enabled', True),\n timeout=hook_data.get('timeout', 10)\n )\n \n def register(self, agent_id: str, hook: HookConfig):\n \"\"\"Register a hook for an agent\"\"\"\n self._hooks[agent_id] = hook\n self._save_to_db(agent_id, hook)\n \n def unregister(self, agent_id: str):\n \"\"\"Unregister a hook\"\"\"\n self._hooks.pop(agent_id, None)\n self._delete_from_db(agent_id)\n \n def list_hooks(self) -> Dict[str, HookConfig]:\n \"\"\"List all registered hooks\"\"\"\n return dict(self._hooks)\n \n @staticmethod\n def parse_hook_string(s: str) -> Optional[HookConfig]:\n \"\"\"\n Parse hook from string format.\n \n Formats:\n type:target\n type:target?enabled=true&timeout=10\n \n Examples:\n shell:echo hello\n http:http://localhost:3000/webhook\n openclaw:agent:akashi\n \"\"\"\n s = s.strip()\n if not s or ':' not in s:\n return None\n \n # Parse query params if present\n params = {}\n if '?' in s:\n s, query = s.split('?', 1)\n for part in query.split('&'):\n if '=' in part:\n k, v = part.split('=', 1)\n params[k] = v\n \n # Parse type:target\n type_str, target = s.split(':', 1)\n \n try:\n hook_type = HookType(type_str.lower())\n except ValueError:\n return None\n \n return HookConfig(\n type=hook_type,\n target=target,\n enabled=params.get('enabled', 'true').lower() == 'true',\n timeout=int(params.get('timeout', 10))\n )\n \n def format_hook(self, agent_id: str) -> str:\n \"\"\"Format hook config as string\"\"\"\n hook = self._hooks.get(agent_id)\n if not hook:\n return \"\"\n \n params = []\n if not hook.enabled:\n params.append(\"enabled=false\")\n if hook.timeout != 10:\n params.append(f\"timeout={hook.timeout}\")\n \n base = f\"{hook.type.value}:{hook.target}\"\n if params:\n base += \"?\" + \"&\".join(params)\n return base\n \n def get_hook(self, agent_id: str) -> Optional[HookConfig]:\n \"\"\"Get hook config for an agent\"\"\"\n return self._hooks.get(agent_id)\n \n def trigger(self, tell: Tell) -> Dict[str, Any]:\n \"\"\"\n Trigger hooks for a tell.\n Returns results for each triggered hook.\n \"\"\"\n results = {}\n \n # Get agents to notify\n agents_to_notify = []\n if tell.to_agent == '@all':\n # Trigger all registered hooks\n agents_to_notify = list(self._hooks.keys())\n elif tell.to_agent in self._hooks:\n agents_to_notify = [tell.to_agent]\n \n for agent_id in agents_to_notify:\n hook = self._hooks.get(agent_id)\n if not hook or not hook.enabled:\n continue\n \n try:\n result = self._execute_hook(hook, tell, agent_id)\n results[agent_id] = {\"success\": True, \"result\": result}\n except Exception as e:\n results[agent_id] = {\"success\": False, \"error\": str(e)}\n \n return results\n \n def _execute_hook(self, hook: HookConfig, tell: Tell, agent_id: str) -> Any:\n \"\"\"Execute a single hook\"\"\"\n if hook.type == HookType.SHELL:\n return self._execute_shell(hook, tell, agent_id)\n elif hook.type == HookType.HTTP:\n return self._execute_http(hook, tell)\n elif hook.type == HookType.OPENCLAW:\n return self._execute_openclaw(hook, tell, agent_id)\n else:\n raise ValueError(f\"Unknown hook type: {hook.type}\")\n \n def _execute_shell(self, hook: HookConfig, tell: Tell, agent_id: str) -> str:\n \"\"\"Execute shell command\"\"\"\n import subprocess\n import shlex\n \n # Expand variables in command\n cmd = hook.target\n cmd = cmd.replace('${from}', tell.from_agent)\n cmd = cmd.replace('${to}', agent_id)\n cmd = cmd.replace('${priority}', tell.priority.value)\n cmd = cmd.replace('${content}', shlex.quote(tell.content[:100]))\n \n result = subprocess.run(\n cmd,\n shell=True,\n capture_output=True,\n timeout=hook.timeout,\n text=True\n )\n \n if result.returncode != 0:\n raise RuntimeError(f\"Command failed: {result.stderr}\")\n \n return result.stdout.strip()\n \n def _execute_http(self, hook: HookConfig, tell: Tell) -> Dict:\n \"\"\"POST to webhook URL\"\"\"\n import urllib.request\n import urllib.error\n \n payload = json.dumps({\n \"type\": \"tell\",\n \"from\": tell.from_agent,\n \"to\": tell.to_agent,\n \"content\": tell.content,\n \"priority\": tell.priority.value,\n \"created_at\": tell.created_at\n }).encode('utf-8')\n \n req = urllib.request.Request(\n hook.target,\n data=payload,\n headers={'Content-Type': 'application/json'},\n method='POST'\n )\n \n try:\n with urllib.request.urlopen(req, timeout=hook.timeout) as resp:\n return {\"status\": resp.status, \"body\": resp.read().decode()}\n except urllib.error.HTTPError as e:\n raise RuntimeError(f\"HTTP {e.code}: {e.reason}\")\n except urllib.error.URLError as e:\n raise RuntimeError(f\"URL error: {e.reason}\")\n \n def _execute_openclaw(self, hook: HookConfig, tell: Tell, agent_id: str) -> str:\n \"\"\"\n Send notification via OpenClaw.\n Requires openclaw CLI to be available.\n \"\"\"\n import subprocess\n \n message = f\"📬 New message from {tell.from_agent}:\\n{tell.content}\"\n \n # Use openclaw CLI to send\n cmd = [\n \"openclaw\", \"send\",\n \"--to\", hook.target,\n \"--message\", message\n ]\n \n result = subprocess.run(\n cmd,\n capture_output=True,\n timeout=hook.timeout,\n text=True\n )\n \n if result.returncode != 0:\n # Fallback: try sessions_send via API if available\n raise RuntimeError(f\"OpenClaw send failed: {result.stderr}\")\n \n return \"sent\"\n\n\n# Global hook manager (can be configured at startup)\n_hook_manager: Optional[HookManager] = None\n\n\ndef get_hook_manager() -> HookManager:\n \"\"\"Get or create the global hook manager\"\"\"\n global _hook_manager\n if _hook_manager is None:\n _hook_manager = HookManager()\n return _hook_manager\n\n\ndef set_hook_manager(manager: HookManager):\n \"\"\"Set the global hook manager\"\"\"\n global _hook_manager\n _hook_manager = manager\n\n\ndef configure_hooks(config: Dict):\n \"\"\"Configure hooks from a config dict\"\"\"\n global _hook_manager\n _hook_manager = HookManager(config)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":22511,"content_sha256":"70877fdebf83e1404c5ec907a23c0d1ebafa0a5c6fb4c6d1bcd55fb117337163"},{"filename":"avm/topic_index.py","content":"\"\"\"\navm/topic_index.py - Topic-Level Index for Fast Recall\n\nReduces recall hops from 4 to 1 by pre-computing topic→path mappings.\n\nArchitecture:\n- Topic extraction happens on write (async)\n- Topics stored in separate table with path references\n- Recall first checks topic index, then falls back to FTS\n\nThis enables:\n- 1-hop recall for known topics\n- Instant \"what do I know about X?\" queries\n- Topic clustering for discovery\n\"\"\"\n\nimport re\nimport hashlib\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom typing import List, Dict, Set, Optional, Tuple\nfrom collections import defaultdict\n\nfrom .store import AVMStore\nfrom .utils import utcnow\nfrom .node import AVMNode\n\n\n# Common stop words to filter out\nSTOP_WORDS = {\n \"the\", \"a\", \"an\", \"and\", \"or\", \"but\", \"in\", \"on\", \"at\", \"to\", \"for\",\n \"of\", \"with\", \"by\", \"from\", \"as\", \"is\", \"was\", \"are\", \"were\", \"been\",\n \"be\", \"have\", \"has\", \"had\", \"do\", \"does\", \"did\", \"will\", \"would\",\n \"could\", \"should\", \"may\", \"might\", \"must\", \"shall\", \"can\", \"need\",\n \"this\", \"that\", \"these\", \"those\", \"i\", \"you\", \"he\", \"she\", \"it\",\n \"we\", \"they\", \"what\", \"which\", \"who\", \"whom\", \"when\", \"where\", \"why\",\n \"how\", \"all\", \"each\", \"every\", \"both\", \"few\", \"more\", \"most\", \"other\",\n \"some\", \"such\", \"no\", \"nor\", \"not\", \"only\", \"own\", \"same\", \"so\",\n \"than\", \"too\", \"very\", \"just\", \"also\", \"now\", \"here\", \"there\",\n}\n\n\n@dataclass\nclass TopicEntry:\n \"\"\"A topic with associated paths\"\"\"\n topic: str\n paths: Set[str] = field(default_factory=set)\n last_updated: datetime = field(default_factory=utcnow)\n \n def add_path(self, path: str):\n self.paths.add(path)\n self.last_updated = utcnow()\n \n def remove_path(self, path: str):\n self.paths.discard(path)\n self.last_updated = utcnow()\n\n\nclass TopicIndex:\n \"\"\"\n In-memory topic index backed by SQLite.\n \n Schema:\n topics(topic TEXT PRIMARY KEY, paths TEXT, updated_at INTEGER)\n \"\"\"\n \n def __init__(self, store: AVMStore):\n self.store = store\n self._cache: Dict[str, TopicEntry] = {}\n self._path_to_topics: Dict[str, Set[str]] = defaultdict(set)\n self._ensure_table()\n self._load_index()\n \n def _ensure_table(self):\n \"\"\"Create topics table if not exists\"\"\"\n with self.store._conn() as conn:\n conn.execute(\"\"\"\n CREATE TABLE IF NOT EXISTS topics (\n topic TEXT PRIMARY KEY,\n paths TEXT,\n updated_at INTEGER\n )\n \"\"\")\n conn.execute(\"\"\"\n CREATE INDEX IF NOT EXISTS idx_topics_updated \n ON topics(updated_at)\n \"\"\")\n \n def _load_index(self):\n \"\"\"Load index from SQLite into memory\"\"\"\n with self.store._conn() as conn:\n rows = conn.execute(\"SELECT topic, paths, updated_at FROM topics\").fetchall()\n for topic, paths_str, updated_at in rows:\n paths = set(paths_str.split(\"|\")) if paths_str else set()\n entry = TopicEntry(\n topic=topic,\n paths=paths,\n last_updated=datetime.fromtimestamp(updated_at / 1000) if updated_at else utcnow()\n )\n self._cache[topic] = entry\n for path in paths:\n self._path_to_topics[path].add(topic)\n \n def _save_topic(self, topic: str):\n \"\"\"Save a single topic to SQLite\"\"\"\n entry = self._cache.get(topic)\n if not entry:\n return\n \n paths_str = \"|\".join(entry.paths) if entry.paths else \"\"\n updated_at = int(entry.last_updated.timestamp() * 1000)\n \n with self.store._conn() as conn:\n conn.execute(\"\"\"\n INSERT OR REPLACE INTO topics (topic, paths, updated_at)\n VALUES (?, ?, ?)\n \"\"\", (topic, paths_str, updated_at))\n \n def extract_topics(self, content: str, title: str = \"\") -> List[str]:\n \"\"\"\n Extract topics from content using simple NLP.\n \n Strategy:\n 1. Split into words\n 2. Filter stop words\n 3. Extract noun phrases (simple: capitalized words)\n 4. Extract hashtags\n 5. Use title words as high-weight topics\n \"\"\"\n topics = set()\n \n # Title words are important topics\n if title:\n title_words = re.findall(r'\\b[a-zA-Z]{3,}\\b', title.lower())\n topics.update(w for w in title_words if w not in STOP_WORDS)\n \n # Extract hashtags\n hashtags = re.findall(r'#(\\w+)', content)\n topics.update(h.lower() for h in hashtags)\n \n # Extract capitalized words (likely proper nouns)\n proper_nouns = re.findall(r'\\b[A-Z][a-z]+\\b', content)\n topics.update(w.lower() for w in proper_nouns if len(w) > 2 and w.lower() not in STOP_WORDS)\n \n # Extract significant words (frequency-based)\n words = re.findall(r'\\b[a-zA-Z]{4,}\\b', content.lower())\n word_freq = defaultdict(int)\n for w in words:\n if w not in STOP_WORDS:\n word_freq[w] += 1\n \n # Top words by frequency\n sorted_words = sorted(word_freq.items(), key=lambda x: -x[1])\n topics.update(w for w, _ in sorted_words[:10])\n \n return list(topics)[:20] # Max 20 topics per document\n \n def index_path(self, path: str, content: str, title: str = \"\"):\n \"\"\"\n Index a path's topics.\n \n Called on write operations.\n \"\"\"\n # Remove old topics for this path\n old_topics = self._path_to_topics.get(path, set()).copy()\n for topic in old_topics:\n if topic in self._cache:\n self._cache[topic].remove_path(path)\n if not self._cache[topic].paths:\n del self._cache[topic]\n self._path_to_topics[path] = set()\n \n # Extract new topics\n topics = self.extract_topics(content, title)\n \n # Update index\n for topic in topics:\n if topic not in self._cache:\n self._cache[topic] = TopicEntry(topic=topic)\n self._cache[topic].add_path(path)\n self._path_to_topics[path].add(topic)\n self._save_topic(topic)\n \n def remove_path(self, path: str):\n \"\"\"Remove a path from the index\"\"\"\n topics = self._path_to_topics.get(path, set()).copy()\n for topic in topics:\n if topic in self._cache:\n self._cache[topic].remove_path(path)\n if not self._cache[topic].paths:\n del self._cache[topic]\n else:\n self._save_topic(topic)\n del self._path_to_topics[path]\n \n def query(self, query: str, limit: int = 50) -> List[Tuple[str, float]]:\n \"\"\"\n Query the topic index.\n \n Returns: List of (path, score) tuples\n \"\"\"\n # Extract query topics\n query_topics = set(self.extract_topics(query, \"\"))\n \n if not query_topics:\n return []\n \n # Score paths by topic overlap\n path_scores: Dict[str, float] = defaultdict(float)\n \n for topic in query_topics:\n if topic in self._cache:\n entry = self._cache[topic]\n # Score based on topic specificity (fewer paths = more specific)\n specificity = 1.0 / (len(entry.paths) + 1)\n for path in entry.paths:\n path_scores[path] += specificity\n \n # Normalize and sort\n if path_scores:\n max_score = max(path_scores.values())\n path_scores = {p: s / max_score for p, s in path_scores.items()}\n \n sorted_paths = sorted(path_scores.items(), key=lambda x: -x[1])\n return sorted_paths[:limit]\n \n def topics_for_path(self, path: str) -> List[str]:\n \"\"\"Get topics associated with a path\"\"\"\n return list(self._path_to_topics.get(path, set()))\n \n def paths_for_topic(self, topic: str) -> List[str]:\n \"\"\"Get paths associated with a topic\"\"\"\n entry = self._cache.get(topic.lower())\n return list(entry.paths) if entry else []\n \n def all_topics(self) -> Dict[str, int]:\n \"\"\"Get all topics with path counts\"\"\"\n return {topic: len(entry.paths) for topic, entry in self._cache.items()}\n \n def similar_topics(self, topic: str, limit: int = 10) -> List[Tuple[str, float]]:\n \"\"\"\n Find topics that often co-occur with the given topic.\n \n Uses Jaccard similarity on path sets.\n \"\"\"\n topic = topic.lower()\n if topic not in self._cache:\n return []\n \n target_paths = self._cache[topic].paths\n if not target_paths:\n return []\n \n similarities = []\n for other_topic, entry in self._cache.items():\n if other_topic == topic:\n continue\n \n intersection = len(target_paths & entry.paths)\n if intersection > 0:\n union = len(target_paths | entry.paths)\n jaccard = intersection / union\n similarities.append((other_topic, jaccard))\n \n similarities.sort(key=lambda x: -x[1])\n return similarities[:limit]\n \n def stats(self) -> Dict[str, int]:\n \"\"\"Get index statistics\"\"\"\n return {\n \"total_topics\": len(self._cache),\n \"total_paths\": len(self._path_to_topics),\n \"avg_paths_per_topic\": sum(len(e.paths) for e in self._cache.values()) / max(len(self._cache), 1),\n \"avg_topics_per_path\": sum(len(t) for t in self._path_to_topics.values()) / max(len(self._path_to_topics), 1),\n }\n\n\ndef integrate_with_recall(topic_index: TopicIndex, avm_store: AVMStore):\n \"\"\"\n Hook to integrate topic index with recall.\n \n Call this in agent_memory.recall() before FTS:\n \n 1. Query topic index (O(1) lookup)\n 2. If topics found → use those paths directly\n 3. Else → fall back to FTS\n \n This reduces hop count from 4 to 1 for known topics.\n \"\"\"\n pass # Integration point - see agent_memory.py\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10312,"content_sha256":"a4ba678554e471c156706841ce7ce7b8a31e8b5e74c4cc463e7ad454bbe32bf0"},{"filename":"avm/utils.py","content":"\"\"\"\navm/utils.py - Utility functions\n\"\"\"\n\nfrom datetime import datetime, timezone\n\n\ndef utcnow() -> datetime:\n \"\"\"Get current UTC time (timezone-aware).\n \n Replacement for deprecated datetime.utcnow()\n \"\"\"\n return datetime.now(timezone.utc)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":256,"content_sha256":"4fedf4a068d56b68be1e4cc3b8c9e540a3dab104ebf417759a7386985ba2feaf"},{"filename":"benchmarks/agent_executor.py","content":"#!/usr/bin/env python3\n\"\"\"\nAgent executor for AVM benchmarks.\nUses subprocess to call coding agents (Codex/Claude Code).\n\"\"\"\n\nimport json\nimport subprocess\nimport time\nimport tempfile\nfrom dataclasses import dataclass\nfrom pathlib import Path\nfrom typing import Callable\n\ntry:\n import tiktoken\n _encoder = tiktoken.encoding_for_model(\"gpt-4\")\n def count_tokens(text: str) -> int:\n return len(_encoder.encode(text))\nexcept ImportError:\n def count_tokens(text: str) -> int:\n # Fallback: rough estimate\n return int(len(text.split()) * 1.3)\n\n\n@dataclass\nclass AgentResponse:\n \"\"\"Response from an agent execution.\"\"\"\n success: bool\n output: str\n tokens_used: int\n latency_ms: float\n error: str = \"\"\n\n\ndef run_claude_code(task: str, workdir: str = None, timeout: int = 120) -> AgentResponse:\n \"\"\"\n Run Claude Code with a task.\n Uses --print mode for non-interactive execution.\n \"\"\"\n start = time.time()\n \n cmd = [\n \"claude\",\n \"--print\",\n \"--permission-mode\", \"bypassPermissions\",\n \"--model\", \"claude-sonnet-4-6\",\n task\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=timeout,\n cwd=workdir,\n )\n \n latency = (time.time() - start) * 1000\n \n # Claude Code doesn't easily report tokens, estimate from output length\n tokens_estimate = len(result.stdout.split()) * 1.3\n \n return AgentResponse(\n success=result.returncode == 0,\n output=result.stdout,\n tokens_used=int(tokens_estimate),\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except subprocess.TimeoutExpired:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=timeout * 1000,\n error=\"Timeout\",\n )\n except Exception as e:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef run_codex(task: str, workdir: str = None, timeout: int = 120) -> AgentResponse:\n \"\"\"\n Run Codex CLI with a task using exec mode.\n Uses a clean workdir to avoid reading unrelated files.\n \"\"\"\n start = time.time()\n \n # Use workdir if provided, otherwise create temp\n if not workdir:\n workdir = tempfile.mkdtemp(prefix=\"codex_bench_\")\n \n cmd = [\n \"codex\", \"exec\",\n \"-c\", \"approval_policy=never\",\n \"--skip-git-repo-check\",\n task\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=timeout,\n cwd=workdir,\n )\n \n latency = (time.time() - start) * 1000\n \n # Parse actual token count from Codex output or use tiktoken\n output = result.stdout.strip()\n tokens_used = 0\n \n # Try to parse from Codex output first\n if \"tokens used\" in output:\n lines = output.split('\\n')\n for i, line in enumerate(lines):\n if \"tokens used\" in line.lower() and i + 1 \u003c len(lines):\n try:\n tokens_used = int(lines[i + 1].replace(',', '').strip())\n output = '\\n'.join(lines[:i]).strip()\n except ValueError:\n pass\n break\n \n # Fallback: count with tiktoken (input + output estimate)\n if tokens_used == 0:\n tokens_used = count_tokens(task) + count_tokens(output)\n \n return AgentResponse(\n success=result.returncode == 0,\n output=output.strip(),\n tokens_used=tokens_used,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except subprocess.TimeoutExpired:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=timeout * 1000,\n error=\"Timeout\",\n )\n except Exception as e:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\nclass MultiAgentExecutor:\n \"\"\"Executes multi-agent scenarios.\"\"\"\n \n def __init__(self, agent_type: str = \"claude\"):\n self.agent_type = agent_type\n self.run_fn = run_claude_code if agent_type == \"claude\" else run_codex\n \n def run_scenario(self, scenario: dict, workdir: Path, log_callback: Callable = None) -> dict:\n \"\"\"\n Run a scenario with multiple agents.\n \n Args:\n scenario: The scenario definition\n workdir: Working directory for the scenario\n log_callback: Function to call for logging events\n \n Returns:\n Results dictionary\n \"\"\"\n results = {\n \"agents\": {},\n \"total_tokens\": 0,\n \"total_latency_ms\": 0,\n }\n \n # For simple scenarios, run agents sequentially\n agents = scenario.get(\"agents\", [])\n context = scenario.get(\"initial_context\", \"\")\n provided_code = scenario.get(\"provided_code\", \"\")\n \n # Build task context\n task_context = f\"\"\"\nContext: {context}\n\n{f'Provided Code:{chr(10)}{provided_code}' if provided_code else ''}\n\nAssertions to satisfy:\n{chr(10).join(f'- {a}' for a in scenario.get('assertions', []))}\n\"\"\"\n \n for agent in agents:\n agent_id = agent[\"id\"]\n agent_role = agent[\"role\"]\n \n # Build agent-specific task\n task = f\"\"\"You are {agent_id} with role: {agent_role}\n\n{task_context}\n\nComplete your part of this task. Be concise and produce working code/output.\n\"\"\"\n \n if log_callback:\n log_callback(agent_id, \"start\", {\"role\": agent_role})\n \n # Run agent\n response = self.run_fn(task, workdir=str(workdir), timeout=90)\n \n results[\"agents\"][agent_id] = {\n \"success\": response.success,\n \"output_length\": len(response.output),\n \"tokens\": response.tokens_used,\n \"latency_ms\": response.latency_ms,\n \"error\": response.error,\n }\n \n results[\"total_tokens\"] += response.tokens_used\n results[\"total_latency_ms\"] += response.latency_ms\n \n if log_callback:\n log_callback(agent_id, \"complete\", {\n \"success\": response.success,\n \"tokens\": response.tokens_used,\n \"latency_ms\": response.latency_ms,\n })\n \n # Save agent output for next agent's context\n output_file = workdir / f\"{agent_id}_output.txt\"\n output_file.write_text(response.output)\n \n # Update context for next agent\n task_context += f\"\\n\\n{agent_id}'s output:\\n{response.output[:2000]}\"\n \n return results\n\n\ndef test_simple():\n \"\"\"Quick test with a simple task.\"\"\"\n print(\"Testing Claude Code...\")\n response = run_claude_code(\"Print 'Hello from benchmark test'\", timeout=30)\n print(f\"Success: {response.success}\")\n print(f\"Latency: {response.latency_ms:.0f}ms\")\n print(f\"Output: {response.output[:200]}\")\n return response.success\n\n\ndef run_claude_opus(task: str, workdir: str = None, timeout: int = 120) -> AgentResponse:\n \"\"\"\n Run Claude Code (Opus) with a task.\n Uses --print mode for non-interactive execution.\n \"\"\"\n start = time.time()\n \n if not workdir:\n workdir = tempfile.mkdtemp(prefix=\"claude_bench_\")\n \n cmd = [\n \"claude\",\n \"--print\",\n \"--dangerously-skip-permissions\",\n \"--model\", \"claude-opus-4-5\", # Use Opus for complex tasks\n task\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=timeout,\n cwd=workdir,\n )\n \n latency = (time.time() - start) * 1000\n output = result.stdout.strip()\n \n # Count tokens with tiktoken (approximate for Claude)\n tokens_used = count_tokens(task) + count_tokens(output)\n \n return AgentResponse(\n success=result.returncode == 0 and len(output) > 0,\n output=output,\n tokens_used=tokens_used,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except subprocess.TimeoutExpired:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=timeout * 1000,\n error=\"Timeout\",\n )\n except Exception as e:\n return AgentResponse(\n success=False,\n output=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef run_agent(task: str, agent_type: str = \"codex\", workdir: str = None, timeout: int = 120) -> AgentResponse:\n \"\"\"\n Unified interface to run any agent type.\n \n Args:\n task: The task/prompt\n agent_type: \"codex\" or \"claude\"\n workdir: Working directory\n timeout: Timeout in seconds\n \n Returns:\n AgentResponse with results\n \"\"\"\n if agent_type == \"claude\":\n return run_claude_opus(task, workdir, timeout)\n else:\n return run_codex(task, workdir, timeout)\n\n\ndef test_codex():\n \"\"\"Quick test with Codex.\"\"\"\n print(\"Testing Codex...\")\n response = run_codex(\"Print 'Hello from Codex benchmark test'\", timeout=30)\n print(f\"Success: {response.success}\")\n print(f\"Latency: {response.latency_ms:.0f}ms\")\n print(f\"Output: {response.output[:500]}\")\n print(f\"Error: {response.error[:200] if response.error else 'none'}\")\n return response.success\n\n\ndef test_claude():\n \"\"\"Quick test with Claude.\"\"\"\n print(\"Testing Claude...\")\n response = run_claude_opus(\"Print 'Hello from Claude benchmark test'\", timeout=30)\n print(f\"Success: {response.success}\")\n print(f\"Latency: {response.latency_ms:.0f}ms\")\n print(f\"Output: {response.output[:500]}\")\n print(f\"Error: {response.error[:200] if response.error else 'none'}\")\n return response.success\n\n\nif __name__ == \"__main__\":\n import sys\n if len(sys.argv) > 1:\n if sys.argv[1] == \"codex\":\n test_codex()\n elif sys.argv[1] == \"claude\":\n test_claude()\n else:\n test_simple()\n else:\n test_simple()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":10966,"content_sha256":"86959864978ed12d8afb4d1786a624ae03497c3b9b67b0bdb2164068f6cdae79"},{"filename":"benchmarks/avm_integration.py","content":"#!/usr/bin/env python3\n\"\"\"\nAVM integration for multi-agent benchmarks.\n\nProvides recall/remember operations for agents to share knowledge.\n\"\"\"\n\nimport subprocess\nimport json\nimport time\nfrom dataclasses import dataclass\nfrom typing import Optional\n\n\n@dataclass\nclass AVMResult:\n \"\"\"Result from an AVM operation.\"\"\"\n success: bool\n data: str\n tokens_used: int # Estimated tokens for the retrieved/stored content\n latency_ms: float\n error: str = \"\"\n\n\ndef avm_recall(query: str, agent_id: str, max_tokens: int = 500) -> AVMResult:\n \"\"\"\n Recall relevant memories for an agent.\n \n Args:\n query: Search query for relevant memories\n agent_id: The agent performing the recall\n max_tokens: Maximum tokens to retrieve\n \n Returns:\n AVMResult with retrieved memories\n \"\"\"\n start = time.time()\n \n cmd = [\n \"avm\", \"recall\",\n \"-a\", agent_id,\n \"-t\", str(max_tokens),\n query\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=30,\n )\n \n latency = (time.time() - start) * 1000\n output = result.stdout.strip()\n \n # Remove null bytes and other control characters\n output = output.replace('\\x00', '')\n output = ''.join(c for c in output if ord(c) >= 32 or c in '\\n\\r\\t')\n \n # Estimate tokens from output length\n tokens = len(output.split()) if output else 0\n \n return AVMResult(\n success=result.returncode == 0,\n data=output,\n tokens_used=tokens,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except subprocess.TimeoutExpired:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=30000,\n error=\"Timeout\",\n )\n except Exception as e:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef avm_remember(content: str, agent_id: str, importance: float = 0.6, \n title: Optional[str] = None) -> AVMResult:\n \"\"\"\n Store a memory for an agent.\n \n Args:\n content: Content to remember\n agent_id: The agent storing the memory\n importance: Importance score (0-1)\n title: Optional title for the memory\n \n Returns:\n AVMResult with storage confirmation\n \"\"\"\n start = time.time()\n \n cmd = [\n \"avm\", \"remember\",\n \"-a\", agent_id,\n \"-i\", str(importance),\n \"-c\", content,\n ]\n \n if title:\n cmd.extend([\"-t\", title])\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=30,\n )\n \n latency = (time.time() - start) * 1000\n \n # Tokens used is the content being stored\n tokens = len(content.split())\n \n return AVMResult(\n success=result.returncode == 0,\n data=result.stdout.strip(),\n tokens_used=tokens,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except subprocess.TimeoutExpired:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=30000,\n error=\"Timeout\",\n )\n except Exception as e:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef avm_write(path: str, content: str) -> AVMResult:\n \"\"\"\n Write content to AVM shared storage.\n \n Args:\n path: Path in AVM (e.g., /shared/project/notes.md)\n content: Content to write\n \n Returns:\n AVMResult with write confirmation\n \"\"\"\n start = time.time()\n \n cmd = [\n \"avm\", \"write\",\n path,\n content,\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=30,\n )\n \n latency = (time.time() - start) * 1000\n tokens = len(content.split())\n \n return AVMResult(\n success=result.returncode == 0,\n data=result.stdout.strip(),\n tokens_used=tokens,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except Exception as e:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef avm_read(path: str) -> AVMResult:\n \"\"\"\n Read content from AVM shared storage.\n \n Args:\n path: Path in AVM (e.g., /shared/project/notes.md)\n \n Returns:\n AVMResult with content\n \"\"\"\n start = time.time()\n \n cmd = [\n \"avm\", \"read\",\n path,\n ]\n \n try:\n result = subprocess.run(\n cmd,\n capture_output=True,\n text=True,\n timeout=30,\n )\n \n latency = (time.time() - start) * 1000\n output = result.stdout.strip()\n tokens = len(output.split()) if output else 0\n \n return AVMResult(\n success=result.returncode == 0,\n data=output,\n tokens_used=tokens,\n latency_ms=latency,\n error=result.stderr if result.returncode != 0 else \"\",\n )\n except Exception as e:\n return AVMResult(\n success=False,\n data=\"\",\n tokens_used=0,\n latency_ms=(time.time() - start) * 1000,\n error=str(e),\n )\n\n\ndef avm_memory_stats(agent_id: str) -> dict:\n \"\"\"Get memory statistics for an agent.\"\"\"\n try:\n result = subprocess.run(\n [\"avm\", \"memory-stats\", \"-a\", agent_id],\n capture_output=True,\n text=True,\n timeout=10,\n )\n \n # Parse output\n stats = {}\n for line in result.stdout.strip().split('\\n'):\n if ':' in line:\n key, value = line.split(':', 1)\n stats[key.strip().lower().replace(' ', '_')] = value.strip()\n \n return stats\n except Exception as e:\n return {\"error\": str(e)}\n\n\ndef test_avm():\n \"\"\"Test AVM integration.\"\"\"\n print(\"Testing AVM integration...\\n\")\n \n # Test remember\n print(\"1. Testing remember...\")\n result = avm_remember(\n content=\"Bug fix: use re.escape() to handle special characters in search queries\",\n agent_id=\"bench_debugger\",\n importance=0.7,\n title=\"regex_escape_fix\"\n )\n print(f\" Success: {result.success}\")\n print(f\" Latency: {result.latency_ms:.0f}ms\")\n print(f\" Output: {result.data[:100]}\")\n \n # Test recall\n print(\"\\n2. Testing recall...\")\n result = avm_recall(\n query=\"regex special characters bug fix\",\n agent_id=\"bench_fixer\",\n max_tokens=200\n )\n print(f\" Success: {result.success}\")\n print(f\" Latency: {result.latency_ms:.0f}ms\")\n print(f\" Tokens: {result.tokens_used}\")\n print(f\" Data preview: {result.data[:200] if result.data else '(empty)'}\")\n \n # Test memory stats\n print(\"\\n3. Testing memory stats...\")\n stats = avm_memory_stats(\"bench_debugger\")\n print(f\" Stats: {stats}\")\n \n return result.success\n\n\nif __name__ == \"__main__\":\n test_avm()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":7767,"content_sha256":"88d15e74bbacf635fa47d80542f6c3823fc9904980ddbbd793f2f881ede6ae36"},{"filename":"benchmarks/bench_ablation.py","content":"#!/usr/bin/env python3\n\"\"\"\nAVM Ablation Study Benchmark\n\nTests the impact of individual optimizations by enabling/disabling them.\n\nConfigurations tested:\n1. baseline: all optimizations OFF\n2. wal_only: only WAL mode ON\n3. cache_only: only hot cache ON\n4. async_only: only async embedding ON\n5. all_on: all optimizations ON (default)\n\nUsage:\n python benchmarks/bench_ablation.py\n python benchmarks/bench_ablation.py --json\n python benchmarks/bench_ablation.py --config wal_only\n\"\"\"\n\nimport time\nimport tempfile\nimport os\nimport json\nimport random\nimport argparse\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict, field\nfrom typing import List, Dict, Any\n\n\n# Ablation configurations\nCONFIGS = {\n \"baseline\": {\n \"wal_mode\": False,\n \"async_embedding\": False,\n \"hot_cache\": False,\n \"sync_mode\": \"FULL\",\n },\n \"wal_only\": {\n \"wal_mode\": True,\n \"async_embedding\": False,\n \"hot_cache\": False,\n \"sync_mode\": \"NORMAL\",\n },\n \"cache_only\": {\n \"wal_mode\": False,\n \"async_embedding\": False,\n \"hot_cache\": True,\n \"cache_size\": 100,\n \"sync_mode\": \"FULL\",\n },\n \"async_only\": {\n \"wal_mode\": False,\n \"async_embedding\": True,\n \"hot_cache\": False,\n \"sync_mode\": \"FULL\",\n },\n \"wal_cache\": {\n \"wal_mode\": True,\n \"async_embedding\": False,\n \"hot_cache\": True,\n \"cache_size\": 100,\n \"sync_mode\": \"NORMAL\",\n },\n \"wal_async\": {\n \"wal_mode\": True,\n \"async_embedding\": True,\n \"hot_cache\": False,\n \"sync_mode\": \"NORMAL\",\n },\n \"all_on\": {\n \"wal_mode\": True,\n \"async_embedding\": True,\n \"hot_cache\": True,\n \"cache_size\": 100,\n \"sync_mode\": \"NORMAL\",\n },\n}\n\n\n@dataclass\nclass AblationResult:\n config_name: str\n config: Dict[str, Any]\n write_ops_sec: float\n write_latency_ms: float\n read_ops_sec: float\n read_latency_ms: float\n search_ops_sec: float\n search_latency_ms: float\n\n\ndef generate_content(size: int = 300) -> str:\n words = [\"memory\", \"agent\", \"task\", \"project\", \"analysis\", \"data\", \"result\", \"market\", \"trading\"]\n return \" \".join(random.choice(words) for _ in range(size // 8))\n\n\ndef setup_avm(perf_config: Dict[str, Any]):\n \"\"\"Setup AVM with specific performance config\"\"\"\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm.config import AVMConfig, PermissionRule\n from avm import AVM\n \n config = AVMConfig(\n permissions=[\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n PermissionRule(pattern=\"/shared/*\", access=\"rw\"),\n ],\n embedding={\"enabled\": False}, # Disable embedding for fair comparison\n performance=perf_config,\n )\n \n avm = AVM(config=config, agent_id=\"ablation\")\n return avm, tmpdir\n\n\ndef run_benchmark(config_name: str, perf_config: Dict[str, Any], \n n_write: int = 100, n_read: int = 100, n_search: int = 20) -> AblationResult:\n \"\"\"Run benchmark with specific config\"\"\"\n print(f\" Testing: {config_name}...\")\n \n avm, tmpdir = setup_avm(perf_config)\n \n # Write benchmark\n write_latencies = []\n start = time.perf_counter()\n for i in range(n_write):\n content = generate_content()\n t0 = time.perf_counter()\n avm.write(f\"/memory/item_{i}.md\", content)\n write_latencies.append((time.perf_counter() - t0) * 1000)\n write_total = (time.perf_counter() - start) * 1000\n write_ops_sec = n_write / (write_total / 1000)\n write_latency_ms = sum(write_latencies) / len(write_latencies)\n \n # Read benchmark (first pass - cold)\n # Then warm reads\n read_latencies = []\n start = time.perf_counter()\n for i in range(n_read):\n t0 = time.perf_counter()\n avm.read(f\"/memory/item_{i % n_write}.md\")\n read_latencies.append((time.perf_counter() - t0) * 1000)\n read_total = (time.perf_counter() - start) * 1000\n read_ops_sec = n_read / (read_total / 1000)\n read_latency_ms = sum(read_latencies) / len(read_latencies)\n \n # Search benchmark\n search_latencies = []\n start = time.perf_counter()\n for i in range(n_search):\n t0 = time.perf_counter()\n avm.search(\"analysis project\", limit=10)\n search_latencies.append((time.perf_counter() - t0) * 1000)\n search_total = (time.perf_counter() - start) * 1000\n search_ops_sec = n_search / (search_total / 1000) if search_total > 0 else 0\n search_latency_ms = sum(search_latencies) / len(search_latencies) if search_latencies else 0\n \n return AblationResult(\n config_name=config_name,\n config=perf_config,\n write_ops_sec=write_ops_sec,\n write_latency_ms=write_latency_ms,\n read_ops_sec=read_ops_sec,\n read_latency_ms=read_latency_ms,\n search_ops_sec=search_ops_sec,\n search_latency_ms=search_latency_ms,\n )\n\n\ndef print_results(results: List[AblationResult], as_json: bool = False):\n if as_json:\n print(json.dumps([asdict(r) for r in results], indent=2))\n return\n \n print(\"\\n\" + \"=\" * 80)\n print(\"ABLATION STUDY RESULTS\")\n print(\"=\" * 80)\n \n # Find baseline for comparison\n baseline = next((r for r in results if r.config_name == \"baseline\"), results[0])\n \n print(f\"\\n{'Config':\u003c15} {'Write ops/s':\u003c12} {'Write lat':\u003c10} {'Read ops/s':\u003c12} {'Read lat':\u003c10} {'Search ops/s':\u003c12}\")\n print(\"-\" * 80)\n \n for r in results:\n write_delta = ((r.write_ops_sec / baseline.write_ops_sec) - 1) * 100 if baseline.write_ops_sec > 0 else 0\n read_delta = ((r.read_ops_sec / baseline.read_ops_sec) - 1) * 100 if baseline.read_ops_sec > 0 else 0\n \n write_ops_str = f\"{r.write_ops_sec:.1f}\"\n if r.config_name != \"baseline\":\n write_ops_str += f\" ({'+' if write_delta >= 0 else ''}{write_delta:.0f}%)\"\n \n read_ops_str = f\"{r.read_ops_sec:.1f}\"\n if r.config_name != \"baseline\":\n read_ops_str += f\" ({'+' if read_delta >= 0 else ''}{read_delta:.0f}%)\"\n \n print(f\"{r.config_name:\u003c15} {write_ops_str:\u003c12} {r.write_latency_ms:.2f}ms {read_ops_str:\u003c12} {r.read_latency_ms:.3f}ms {r.search_ops_sec:.1f}\")\n \n # Summary\n print(\"\\n\" + \"-\" * 80)\n print(\"OPTIMIZATION IMPACT SUMMARY\")\n print(\"-\" * 80)\n \n # Calculate individual contributions\n all_on = next((r for r in results if r.config_name == \"all_on\"), None)\n if all_on and baseline:\n total_write_improvement = all_on.write_ops_sec / baseline.write_ops_sec\n total_read_improvement = all_on.read_ops_sec / baseline.read_ops_sec\n \n print(f\"\\nTotal improvement (all_on vs baseline):\")\n print(f\" Write: {total_write_improvement:.1f}x faster\")\n print(f\" Read: {total_read_improvement:.1f}x faster\")\n \n # Individual contributions\n print(f\"\\nIndividual optimization contributions:\")\n \n for config_name in [\"wal_only\", \"cache_only\", \"async_only\"]:\n r = next((r for r in results if r.config_name == config_name), None)\n if r:\n write_contrib = (r.write_ops_sec / baseline.write_ops_sec - 1) * 100\n read_contrib = (r.read_ops_sec / baseline.read_ops_sec - 1) * 100\n \n opt_name = config_name.replace(\"_only\", \"\").upper()\n print(f\" {opt_name:\u003c15} Write: {'+' if write_contrib >= 0 else ''}{write_contrib:.0f}% Read: {'+' if read_contrib >= 0 else ''}{read_contrib:.0f}%\")\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"AVM Ablation Study\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output JSON\")\n parser.add_argument(\"--config\", help=\"Run specific config only\")\n parser.add_argument(\"--small\", action=\"store_true\", help=\"Smaller dataset\")\n args = parser.parse_args()\n \n scale = 0.5 if args.small else 1.0\n n_write = int(100 * scale)\n n_read = int(100 * scale)\n n_search = int(20 * scale)\n \n print(\"AVM Ablation Study\")\n print(f\" Write ops: {n_write}, Read ops: {n_read}, Search ops: {n_search}\")\n print()\n \n if args.config:\n configs_to_test = {args.config: CONFIGS[args.config]}\n else:\n configs_to_test = CONFIGS\n \n results = []\n for config_name, perf_config in configs_to_test.items():\n result = run_benchmark(config_name, perf_config, n_write, n_read, n_search)\n results.append(result)\n \n print_results(results, as_json=args.json)\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8632,"content_sha256":"c98a8d6f35500708202ad3e5e1d993221fe6161c8ae9581b5837dc3d108858de"},{"filename":"benchmarks/bench_agent_efficiency.py","content":"#!/usr/bin/env python3\n\"\"\"\nAVM Agent Efficiency Benchmark\n\nMeasures metrics relevant to AI agent performance:\n1. Cache hit rate\n2. Recall precision/recall (semantic relevance)\n3. Hop count per operation\n4. Information efficiency (tokens saved)\n5. Scalability curves under different loads\n\nUsage:\n python benchmarks/bench_agent_efficiency.py\n python benchmarks/bench_agent_efficiency.py --json\n python benchmarks/bench_agent_efficiency.py --scenario cache_curve\n\"\"\"\n\nimport time\nimport tempfile\nimport os\nimport json\nimport random\nimport math\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict, field\nfrom typing import List, Dict, Any, Tuple\nimport argparse\n\n\n@dataclass\nclass CacheMetrics:\n \"\"\"Cache performance metrics\"\"\"\n total_reads: int = 0\n cache_hits: int = 0\n cache_misses: int = 0\n \n @property\n def hit_rate(self) -> float:\n return self.cache_hits / self.total_reads if self.total_reads > 0 else 0.0\n\n\n@dataclass\nclass RecallMetrics:\n \"\"\"Recall quality metrics\"\"\"\n query: str\n expected_paths: List[str]\n returned_paths: List[str]\n precision: float = 0.0\n recall: float = 0.0\n f1: float = 0.0\n latency_ms: float = 0.0\n\n\n@dataclass\nclass HopMetrics:\n \"\"\"Operation hop count\"\"\"\n operation: str\n hops: int\n breakdown: Dict[str, int] = field(default_factory=dict)\n\n\n@dataclass\nclass EfficiencyMetrics:\n \"\"\"Information efficiency\"\"\"\n total_content_tokens: int\n returned_tokens: int\n tokens_saved: int\n compression_ratio: float\n relevant_ratio: float # % of returned tokens that are relevant\n\n\n@dataclass\nclass ScalabilityPoint:\n \"\"\"Single point on scalability curve\"\"\"\n memory_count: int\n write_ops_sec: float\n read_ops_sec: float\n recall_latency_ms: float\n cache_hit_rate: float\n tokens_per_recall: float\n\n\ndef generate_topic_content(topic: str, size: int = 300) -> str:\n \"\"\"Generate content with specific topic keywords\"\"\"\n topic_words = {\n \"market\": [\"NVDA\", \"RSI\", \"overbought\", \"technical\", \"analysis\", \"price\", \"volume\"],\n \"trading\": [\"position\", \"stop-loss\", \"profit\", \"strategy\", \"risk\", \"entry\", \"exit\"],\n \"news\": [\"announced\", \"reported\", \"update\", \"breaking\", \"market\", \"impact\"],\n \"research\": [\"study\", \"analysis\", \"findings\", \"data\", \"conclusion\", \"evidence\"],\n \"memory\": [\"remembered\", \"learned\", \"experience\", \"lesson\", \"insight\"],\n }\n \n words = topic_words.get(topic, [\"general\", \"content\", \"data\"])\n filler = [\"the\", \"and\", \"with\", \"for\", \"this\", \"that\", \"about\"]\n \n content = []\n for _ in range(size // 8):\n if random.random() \u003c 0.4:\n content.append(random.choice(words))\n else:\n content.append(random.choice(filler))\n \n return \" \".join(content)\n\n\ndef estimate_tokens(text: str) -> int:\n \"\"\"Estimate token count (chars / 4)\"\"\"\n return len(text) // 4\n\n\ndef setup_avm_with_tracking():\n \"\"\"Setup AVM with hop tracking\"\"\"\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n from avm.config import AVMConfig, PermissionRule\n \n config = AVMConfig(\n permissions=[\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n ],\n embedding={\"enabled\": True, \"backend\": \"local\", \"model\": \"all-MiniLM-L6-v2\", \"auto_index\": True},\n performance={\"hot_cache\": True, \"cache_size\": 100, \"wal_mode\": True},\n )\n \n avm = AVM(config=config, agent_id=\"bench\")\n return avm, tmpdir\n\n\ndef setup_avm_minimal():\n \"\"\"Setup AVM without embedding for faster tests\"\"\"\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n from avm.config import AVMConfig, PermissionRule\n \n config = AVMConfig(\n permissions=[\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n ],\n embedding={\"enabled\": False},\n performance={\"hot_cache\": True, \"cache_size\": 100, \"wal_mode\": True},\n )\n \n avm = AVM(config=config, agent_id=\"bench\")\n return avm, tmpdir\n\n\n# ═══════════════════════════════════════════════════════════════\n# 1. Cache Hit Rate Analysis\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_cache_hit_rate(n_memories: int = 200, n_reads: int = 500,\n access_pattern: str = \"zipf\") -> Dict[str, Any]:\n \"\"\"\n Measure cache hit rate under different access patterns\n \n Patterns:\n - uniform: equal probability for all items\n - zipf: power-law (some items accessed much more)\n - temporal: recent items accessed more\n - working_set: hot set + occasional cold\n \"\"\"\n print(f\" Cache hit rate ({access_pattern} pattern)...\")\n \n avm, tmpdir = setup_avm_minimal()\n \n # Populate memories\n for i in range(n_memories):\n avm.write(f\"/memory/item_{i}.md\", generate_topic_content(\"memory\", 200))\n \n # Clear cache stats\n avm._cache.clear()\n avm._cache_order.clear()\n \n # Access pattern generators\n def uniform_access():\n return random.randint(0, n_memories - 1)\n \n def zipf_access(alpha=1.5):\n # Zipf distribution: P(k) ∝ 1/k^alpha\n rank = int(random.paretovariate(alpha))\n return min(rank - 1, n_memories - 1)\n \n def temporal_access():\n # Prefer recent items (higher indices)\n return int(random.triangular(0, n_memories, n_memories * 0.9))\n \n def working_set_access(hot_size=20, hot_prob=0.8):\n if random.random() \u003c hot_prob:\n return random.randint(0, hot_size - 1)\n else:\n return random.randint(hot_size, n_memories - 1)\n \n access_fn = {\n \"uniform\": uniform_access,\n \"zipf\": zipf_access,\n \"temporal\": temporal_access,\n \"working_set\": working_set_access,\n }[access_pattern]\n \n # Track hits/misses\n hits = 0\n misses = 0\n hit_rate_over_time = []\n \n for i in range(n_reads):\n idx = access_fn()\n path = f\"/memory/item_{idx}.md\"\n \n # Check if in cache before read\n was_cached = path in avm._cache\n \n avm.read(path)\n \n if was_cached:\n hits += 1\n else:\n misses += 1\n \n # Record hit rate every 50 reads\n if (i + 1) % 50 == 0:\n hit_rate_over_time.append({\n \"reads\": i + 1,\n \"hit_rate\": hits / (i + 1),\n })\n \n return {\n \"pattern\": access_pattern,\n \"n_memories\": n_memories,\n \"n_reads\": n_reads,\n \"cache_size\": avm._cache_max_size,\n \"total_hits\": hits,\n \"total_misses\": misses,\n \"final_hit_rate\": hits / n_reads,\n \"hit_rate_curve\": hit_rate_over_time,\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# 2. Recall Quality (Precision/Recall)\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_recall_quality(n_memories: int = 100) -> Dict[str, Any]:\n \"\"\"Measure recall precision and recall for semantic search\"\"\"\n print(\" Recall quality (precision/recall)...\")\n \n avm, tmpdir = setup_avm_with_tracking()\n \n # Create memories with known topics\n topics = [\"market\", \"trading\", \"news\", \"research\", \"memory\"]\n topic_paths = {t: [] for t in topics}\n \n for i in range(n_memories):\n topic = topics[i % len(topics)]\n path = f\"/memory/{topic}/item_{i}.md\"\n content = generate_topic_content(topic, 300)\n avm.write(path, content, meta={\"topic\": topic})\n topic_paths[topic].append(path)\n \n # Wait for embedding to complete\n time.sleep(2)\n \n # Test queries\n test_queries = [\n (\"NVDA RSI technical analysis\", \"market\"),\n (\"stop-loss trading strategy\", \"trading\"),\n (\"breaking news announcement\", \"news\"),\n (\"research study findings\", \"research\"),\n ]\n \n results = []\n \n for query, expected_topic in test_queries:\n expected = set(topic_paths[expected_topic])\n \n t0 = time.perf_counter()\n search_results = avm.search(query, limit=20)\n latency = (time.perf_counter() - t0) * 1000\n \n returned = set(node.path for node, _ in search_results)\n \n # Calculate precision/recall\n true_positives = len(returned & expected)\n precision = true_positives / len(returned) if returned else 0\n recall = true_positives / len(expected) if expected else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n \n results.append({\n \"query\": query,\n \"expected_topic\": expected_topic,\n \"expected_count\": len(expected),\n \"returned_count\": len(returned),\n \"true_positives\": true_positives,\n \"precision\": precision,\n \"recall\": recall,\n \"f1\": f1,\n \"latency_ms\": latency,\n })\n \n # Aggregate\n avg_precision = sum(r[\"precision\"] for r in results) / len(results)\n avg_recall = sum(r[\"recall\"] for r in results) / len(results)\n avg_f1 = sum(r[\"f1\"] for r in results) / len(results)\n avg_latency = sum(r[\"latency_ms\"] for r in results) / len(results)\n \n return {\n \"n_memories\": n_memories,\n \"n_queries\": len(test_queries),\n \"avg_precision\": avg_precision,\n \"avg_recall\": avg_recall,\n \"avg_f1\": avg_f1,\n \"avg_latency_ms\": avg_latency,\n \"per_query\": results,\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# 3. Hop Count Analysis\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_hop_count() -> Dict[str, Any]:\n \"\"\"Analyze hop count for different operations\"\"\"\n print(\" Hop count analysis...\")\n \n # Theoretical hop counts\n operations = {\n \"read_cold\": {\n \"description\": \"Read uncached node\",\n \"hops\": 2,\n \"breakdown\": {\"cache_check\": 1, \"sqlite_read\": 1},\n },\n \"read_hot\": {\n \"description\": \"Read cached node\",\n \"hops\": 1,\n \"breakdown\": {\"cache_check\": 1},\n },\n \"write\": {\n \"description\": \"Write with async embedding\",\n \"hops\": 1, # SQLite only, embedding is async\n \"breakdown\": {\"sqlite_write\": 1, \"cache_update\": 0, \"embedding_async\": 0},\n },\n \"search_embedding\": {\n \"description\": \"Semantic search\",\n \"hops\": 2,\n \"breakdown\": {\"embedding_search\": 1, \"batch_read\": 1},\n },\n \"search_fts\": {\n \"description\": \"Full-text search\",\n \"hops\": 2,\n \"breakdown\": {\"fts_search\": 1, \"batch_read\": 1},\n },\n \"recall_hot\": {\n \"description\": \"Recall with topic hit\",\n \"hops\": 1,\n \"breakdown\": {\"topic_index\": 1},\n \"note\": \"Future optimization\",\n },\n \"recall_cold\": {\n \"description\": \"Recall full search\",\n \"hops\": 4,\n \"breakdown\": {\"embedding\": 1, \"fts\": 1, \"graph_expand\": 1, \"batch_read\": 1},\n },\n }\n \n return {\n \"operations\": operations,\n \"summary\": {\n \"best_case\": 1,\n \"worst_case\": 4,\n \"optimization_target\": 2,\n },\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# 4. Information Efficiency (Token Savings)\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_information_efficiency(n_memories: int = 100) -> Dict[str, Any]:\n \"\"\"Measure token savings from recall vs full read\"\"\"\n print(\" Information efficiency...\")\n \n avm, tmpdir = setup_avm_minimal()\n \n # Create memories of varying sizes\n total_content_tokens = 0\n for i in range(n_memories):\n size = random.randint(200, 1000)\n content = generate_topic_content(\"memory\", size)\n total_content_tokens += estimate_tokens(content)\n avm.write(f\"/memory/item_{i}.md\", content)\n \n # Simulate recall with different token budgets\n budgets = [500, 1000, 2000, 4000, 8000]\n results = []\n \n from avm.agent_memory import AgentMemory\n mem = AgentMemory(avm, \"bench\")\n \n for budget in budgets:\n # Recall returns token-limited context\n t0 = time.perf_counter()\n context = mem.recall(\"general content data\", max_tokens=budget)\n latency = (time.perf_counter() - t0) * 1000\n \n returned_tokens = estimate_tokens(context)\n \n results.append({\n \"budget\": budget,\n \"returned_tokens\": returned_tokens,\n \"total_available\": total_content_tokens,\n \"compression_ratio\": returned_tokens / total_content_tokens if total_content_tokens > 0 else 0,\n \"tokens_saved\": total_content_tokens - returned_tokens,\n \"savings_pct\": (1 - returned_tokens / total_content_tokens) * 100 if total_content_tokens > 0 else 0,\n \"latency_ms\": latency,\n })\n \n return {\n \"n_memories\": n_memories,\n \"total_content_tokens\": total_content_tokens,\n \"budget_results\": results,\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# 5. Scalability Curves\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_scalability_curve(memory_counts: List[int] = None) -> Dict[str, Any]:\n \"\"\"Generate scalability curves for different memory counts\"\"\"\n print(\" Scalability curves...\")\n \n if memory_counts is None:\n memory_counts = [50, 100, 200, 500, 1000]\n \n curves = {\n \"write\": [],\n \"read\": [],\n \"search\": [],\n \"cache_hit_rate\": [],\n }\n \n for n in memory_counts:\n print(f\" Testing with {n} memories...\")\n \n avm, tmpdir = setup_avm_minimal()\n \n # Write benchmark\n write_latencies = []\n t0 = time.perf_counter()\n for i in range(n):\n tw = time.perf_counter()\n avm.write(f\"/memory/item_{i}.md\", generate_topic_content(\"memory\", 300))\n write_latencies.append((time.perf_counter() - tw) * 1000)\n write_total = time.perf_counter() - t0\n write_ops_sec = n / write_total\n \n curves[\"write\"].append({\"memory_count\": n, \"ops_sec\": write_ops_sec, \"avg_latency_ms\": sum(write_latencies) / len(write_latencies)})\n \n # Clear cache\n avm._cache.clear()\n avm._cache_order.clear()\n \n # Read benchmark (with cache warming)\n n_reads = min(n * 2, 500)\n hits = 0\n read_latencies = []\n t0 = time.perf_counter()\n for i in range(n_reads):\n idx = random.randint(0, n - 1)\n path = f\"/memory/item_{idx}.md\"\n was_cached = path in avm._cache\n \n tr = time.perf_counter()\n avm.read(path)\n read_latencies.append((time.perf_counter() - tr) * 1000)\n \n if was_cached:\n hits += 1\n read_total = time.perf_counter() - t0\n read_ops_sec = n_reads / read_total\n \n curves[\"read\"].append({\"memory_count\": n, \"ops_sec\": read_ops_sec, \"avg_latency_ms\": sum(read_latencies) / len(read_latencies)})\n curves[\"cache_hit_rate\"].append({\"memory_count\": n, \"hit_rate\": hits / n_reads})\n \n # Search benchmark\n n_searches = 20\n search_latencies = []\n t0 = time.perf_counter()\n for i in range(n_searches):\n ts = time.perf_counter()\n avm.search(\"memory content\", limit=10)\n search_latencies.append((time.perf_counter() - ts) * 1000)\n search_total = time.perf_counter() - t0\n search_ops_sec = n_searches / search_total\n \n curves[\"search\"].append({\"memory_count\": n, \"ops_sec\": search_ops_sec, \"avg_latency_ms\": sum(search_latencies) / len(search_latencies)})\n \n return {\n \"memory_counts\": memory_counts,\n \"curves\": curves,\n }\n\n\n# ═══════════════════════════════════════════════════════════════\n# Main\n# ═══════════════════════════════════════════════════════════════\n\ndef print_results(results: Dict[str, Any], as_json: bool = False):\n if as_json:\n print(json.dumps(results, indent=2, default=str))\n return\n \n print(\"\\n\" + \"=\" * 80)\n print(\"AGENT EFFICIENCY BENCHMARK RESULTS\")\n print(\"=\" * 80)\n \n # Cache hit rate\n if \"cache\" in results:\n cache = results[\"cache\"]\n print(f\"\\n### CACHE HIT RATE\")\n for pattern, data in cache.items():\n print(f\" {pattern}: {data['final_hit_rate']:.1%} hit rate\")\n \n # Recall quality\n if \"recall_quality\" in results:\n rq = results[\"recall_quality\"]\n print(f\"\\n### RECALL QUALITY\")\n print(f\" Avg Precision: {rq['avg_precision']:.2%}\")\n print(f\" Avg Recall: {rq['avg_recall']:.2%}\")\n print(f\" Avg F1: {rq['avg_f1']:.2%}\")\n print(f\" Avg Latency: {rq['avg_latency_ms']:.1f}ms\")\n \n # Hop count\n if \"hop_count\" in results:\n hc = results[\"hop_count\"]\n print(f\"\\n### HOP COUNT\")\n for op, data in hc[\"operations\"].items():\n print(f\" {op}: {data['hops']} hops - {data['description']}\")\n \n # Information efficiency\n if \"efficiency\" in results:\n eff = results[\"efficiency\"]\n print(f\"\\n### INFORMATION EFFICIENCY\")\n print(f\" Total content: {eff['total_content_tokens']:,} tokens\")\n for r in eff[\"budget_results\"]:\n print(f\" Budget {r['budget']:,}: {r['savings_pct']:.1f}% saved ({r['returned_tokens']:,} returned)\")\n \n # Scalability\n if \"scalability\" in results:\n sc = results[\"scalability\"]\n print(f\"\\n### SCALABILITY CURVES\")\n print(f\" Memory counts tested: {sc['memory_counts']}\")\n print(f\"\\n Write ops/sec:\")\n for p in sc[\"curves\"][\"write\"]:\n print(f\" {p['memory_count']:4d} memories: {p['ops_sec']:.1f} ops/s\")\n print(f\"\\n Cache hit rate:\")\n for p in sc[\"curves\"][\"cache_hit_rate\"]:\n print(f\" {p['memory_count']:4d} memories: {p['hit_rate']:.1%}\")\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"AVM Agent Efficiency Benchmark\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output JSON\")\n parser.add_argument(\"--scenario\", choices=[\"cache\", \"recall\", \"hops\", \"efficiency\", \"scale\", \"all\"], \n default=\"all\", help=\"Specific scenario to run\")\n parser.add_argument(\"--small\", action=\"store_true\", help=\"Smaller dataset\")\n args = parser.parse_args()\n \n scale = 0.5 if args.small else 1.0\n \n results = {}\n \n print(\"AVM Agent Efficiency Benchmark\")\n print()\n \n if args.scenario in (\"cache\", \"all\"):\n results[\"cache\"] = {}\n for pattern in [\"uniform\", \"zipf\", \"temporal\", \"working_set\"]:\n results[\"cache\"][pattern] = bench_cache_hit_rate(\n n_memories=int(200 * scale),\n n_reads=int(500 * scale),\n access_pattern=pattern\n )\n \n if args.scenario in (\"recall\", \"all\"):\n results[\"recall_quality\"] = bench_recall_quality(n_memories=int(100 * scale))\n \n if args.scenario in (\"hops\", \"all\"):\n results[\"hop_count\"] = bench_hop_count()\n \n if args.scenario in (\"efficiency\", \"all\"):\n results[\"efficiency\"] = bench_information_efficiency(n_memories=int(100 * scale))\n \n if args.scenario in (\"scale\", \"all\"):\n counts = [50, 100, 200] if args.small else [50, 100, 200, 500, 1000]\n results[\"scalability\"] = bench_scalability_curve(counts)\n \n print_results(results, as_json=args.json)\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":21302,"content_sha256":"ead0b50b5143c5888500408b44ae73c114b1b6b662ed91f974f9f64321cb3987"},{"filename":"benchmarks/bench_comprehensive.py","content":"#!/usr/bin/env python3\n\"\"\"\nAVM Comprehensive Benchmark\n\nCompares AVM vs raw file I/O across different scenarios:\n1. Single agent continuous work\n2. Multi-agent collaboration\n3. Cold start with large history\n4. Subscription notification latency\n\nUsage:\n python benchmarks/bench_comprehensive.py\n python benchmarks/bench_comprehensive.py --scenario single\n python benchmarks/bench_comprehensive.py --json\n\"\"\"\n\nimport time\nimport tempfile\nimport os\nimport json\nimport random\nimport string\nimport threading\nimport concurrent.futures\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict\nfrom typing import List, Dict, Any\nimport argparse\n\n\n@dataclass\nclass BenchmarkResult:\n scenario: str\n operation: str\n ops_count: int\n total_time_ms: float\n ops_per_sec: float\n avg_latency_ms: float\n p99_latency_ms: float = 0.0\n extra: Dict[str, Any] = None\n\n\ndef generate_content(size: int = 500) -> str:\n \"\"\"Generate random content\"\"\"\n words = [\"memory\", \"agent\", \"task\", \"project\", \"analysis\", \"data\", \"result\"]\n return \" \".join(random.choice(words) for _ in range(size // 7))\n\n\ndef setup_avm():\n \"\"\"Setup AVM with temp database\"\"\"\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n avm = AVM(agent_id=\"bench\")\n return avm, tmpdir\n\n\ndef setup_raw_files():\n \"\"\"Setup raw file directory\"\"\"\n tmpdir = tempfile.mkdtemp()\n return tmpdir\n\n\n# ═══════════════════════════════════════════════════════════════\n# Scenario 1: Single Agent Continuous Work\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_single_agent_avm(n_memories: int = 500, n_recalls: int = 50) -> List[BenchmarkResult]:\n \"\"\"Benchmark single agent with AVM\"\"\"\n avm, tmpdir = setup_avm()\n results = []\n \n # Write memories\n latencies = []\n start = time.perf_counter()\n for i in range(n_memories):\n content = generate_content()\n t0 = time.perf_counter()\n avm.write(f\"/memory/item_{i}.md\", content, meta={\"importance\": random.random()})\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent\",\n operation=\"write\",\n ops_count=n_memories,\n total_time_ms=total_time,\n ops_per_sec=n_memories / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n ))\n \n # Read (with cache)\n latencies = []\n start = time.perf_counter()\n for i in range(n_memories):\n t0 = time.perf_counter()\n avm.read(f\"/memory/item_{i}.md\")\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent\",\n operation=\"read\",\n ops_count=n_memories,\n total_time_ms=total_time,\n ops_per_sec=n_memories / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n ))\n \n # Search\n latencies = []\n start = time.perf_counter()\n for i in range(n_recalls):\n t0 = time.perf_counter()\n avm.search(\"analysis project\", limit=10)\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent\",\n operation=\"search\",\n ops_count=n_recalls,\n total_time_ms=total_time,\n ops_per_sec=n_recalls / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)] if latencies else 0,\n ))\n \n # Recall (AgentMemory)\n from avm.agent_memory import AgentMemory\n mem = AgentMemory(avm, \"bench\")\n \n # Remember some with proper format\n for i in range(20):\n mem.remember(f\"Important finding about topic {i}: \" + generate_content(200), \n title=f\"Topic {i}\", importance=0.8)\n \n latencies = []\n start = time.perf_counter()\n for i in range(n_recalls):\n t0 = time.perf_counter()\n mem.recall(f\"topic {i % 20}\", max_tokens=500)\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent\",\n operation=\"recall\",\n ops_count=n_recalls,\n total_time_ms=total_time,\n ops_per_sec=n_recalls / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)] if latencies else 0,\n ))\n \n return results\n\n\ndef bench_single_agent_raw(n_memories: int = 500, n_recalls: int = 50) -> List[BenchmarkResult]:\n \"\"\"Benchmark single agent with raw files\"\"\"\n tmpdir = setup_raw_files()\n memory_dir = Path(tmpdir) / \"memory\"\n memory_dir.mkdir()\n results = []\n \n # Write\n latencies = []\n start = time.perf_counter()\n for i in range(n_memories):\n content = generate_content()\n t0 = time.perf_counter()\n (memory_dir / f\"item_{i}.md\").write_text(content)\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent_raw\",\n operation=\"write\",\n ops_count=n_memories,\n total_time_ms=total_time,\n ops_per_sec=n_memories / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n ))\n \n # Read\n latencies = []\n start = time.perf_counter()\n for i in range(n_memories):\n t0 = time.perf_counter()\n (memory_dir / f\"item_{i}.md\").read_text()\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent_raw\",\n operation=\"read\",\n ops_count=n_memories,\n total_time_ms=total_time,\n ops_per_sec=n_memories / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n ))\n \n # Search (grep simulation)\n import subprocess\n latencies = []\n start = time.perf_counter()\n for i in range(min(n_recalls, 20)): # grep is slow\n t0 = time.perf_counter()\n subprocess.run(\n [\"grep\", \"-l\", \"analysis\", str(memory_dir)],\n capture_output=True,\n timeout=5\n )\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"single_agent_raw\",\n operation=\"search\",\n ops_count=len(latencies),\n total_time_ms=total_time,\n ops_per_sec=len(latencies) / (total_time / 1000) if total_time > 0 else 0,\n avg_latency_ms=sum(latencies) / len(latencies) if latencies else 0,\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)] if latencies else 0,\n ))\n \n return results\n\n\n# ═══════════════════════════════════════════════════════════════\n# Scenario 2: Multi-Agent Collaboration\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_multi_agent_avm(n_agents: int = 5, ops_per_agent: int = 100) -> List[BenchmarkResult]:\n \"\"\"Benchmark multi-agent concurrent access\"\"\"\n avm, tmpdir = setup_avm()\n results = []\n \n # Setup subscriptions\n from avm.subscriptions import get_subscription_store, SubscriptionMode\n sub_store = get_subscription_store()\n for i in range(n_agents):\n sub_store.subscribe(f\"agent_{i}\", \"/shared/*\", mode=SubscriptionMode.BATCHED)\n \n write_latencies = []\n read_latencies = []\n \n def agent_work(agent_id: int):\n \"\"\"Simulate agent work\"\"\"\n local_write_lat = []\n local_read_lat = []\n \n for j in range(ops_per_agent):\n # Write to shared\n content = generate_content(200)\n t0 = time.perf_counter()\n avm.write(f\"/shared/agent_{agent_id}_item_{j}.md\", content)\n local_write_lat.append((time.perf_counter() - t0) * 1000)\n \n # Read from another agent\n other = (agent_id + 1) % n_agents\n t0 = time.perf_counter()\n avm.read(f\"/shared/agent_{other}_item_{j % max(1, j)}.md\")\n local_read_lat.append((time.perf_counter() - t0) * 1000)\n \n return local_write_lat, local_read_lat\n \n start = time.perf_counter()\n with concurrent.futures.ThreadPoolExecutor(max_workers=n_agents) as executor:\n futures = [executor.submit(agent_work, i) for i in range(n_agents)]\n for future in concurrent.futures.as_completed(futures):\n w, r = future.result()\n write_latencies.extend(w)\n read_latencies.extend(r)\n total_time = (time.perf_counter() - start) * 1000\n \n total_ops = n_agents * ops_per_agent\n \n results.append(BenchmarkResult(\n scenario=\"multi_agent\",\n operation=\"concurrent_write\",\n ops_count=total_ops,\n total_time_ms=total_time,\n ops_per_sec=total_ops / (total_time / 1000),\n avg_latency_ms=sum(write_latencies) / len(write_latencies),\n p99_latency_ms=sorted(write_latencies)[int(len(write_latencies) * 0.99)],\n extra={\"n_agents\": n_agents},\n ))\n \n results.append(BenchmarkResult(\n scenario=\"multi_agent\",\n operation=\"concurrent_read\",\n ops_count=total_ops,\n total_time_ms=total_time,\n ops_per_sec=total_ops / (total_time / 1000),\n avg_latency_ms=sum(read_latencies) / len(read_latencies),\n p99_latency_ms=sorted(read_latencies)[int(len(read_latencies) * 0.99)],\n extra={\"n_agents\": n_agents},\n ))\n \n # Check pending notifications\n total_pending = sum(len(sub_store.get_pending(f\"agent_{i}\")) for i in range(n_agents))\n results.append(BenchmarkResult(\n scenario=\"multi_agent\",\n operation=\"subscription_accumulation\",\n ops_count=total_pending,\n total_time_ms=0,\n ops_per_sec=0,\n avg_latency_ms=0,\n extra={\"pending_per_agent\": total_pending / n_agents},\n ))\n \n return results\n\n\n# ═══════════════════════════════════════════════════════════════\n# Scenario 3: Cold Start with Large History\n# ═══════════════════════════════════════════════════════════════\n\ndef bench_cold_start(n_history: int = 2000, n_queries: int = 20) -> List[BenchmarkResult]:\n \"\"\"Benchmark cold start performance\"\"\"\n avm, tmpdir = setup_avm()\n results = []\n \n # Populate large history\n print(f\" Populating {n_history} historical memories...\")\n for i in range(n_history):\n avm.write(f\"/memory/history/item_{i}.md\", generate_content(300))\n \n # Cold start simulation: new AVM instance\n from avm import AVM\n avm2 = AVM(agent_id=\"cold_start\")\n \n # First query (cold)\n latencies = []\n start = time.perf_counter()\n for i in range(n_queries):\n t0 = time.perf_counter()\n avm2.search(\"analysis project data\", limit=10)\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"cold_start\",\n operation=\"first_search\",\n ops_count=n_queries,\n total_time_ms=total_time,\n ops_per_sec=n_queries / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n extra={\"history_size\": n_history},\n ))\n \n # Warm queries (after cache populated)\n latencies = []\n start = time.perf_counter()\n for i in range(n_queries):\n t0 = time.perf_counter()\n avm2.search(\"analysis project data\", limit=10)\n latencies.append((time.perf_counter() - t0) * 1000)\n total_time = (time.perf_counter() - start) * 1000\n \n results.append(BenchmarkResult(\n scenario=\"cold_start\",\n operation=\"warm_search\",\n ops_count=n_queries,\n total_time_ms=total_time,\n ops_per_sec=n_queries / (total_time / 1000),\n avg_latency_ms=sum(latencies) / len(latencies),\n p99_latency_ms=sorted(latencies)[int(len(latencies) * 0.99)],\n extra={\"history_size\": n_history},\n ))\n \n return results\n\n\n# ═══════════════════════════════════════════════════════════════\n# Main\n# ═══════════════════════════════════════════════════════════════\n\ndef print_results(results: List[BenchmarkResult], as_json: bool = False):\n if as_json:\n print(json.dumps([asdict(r) for r in results], indent=2))\n return\n \n print(\"\\n\" + \"=\" * 70)\n print(\"BENCHMARK RESULTS\")\n print(\"=\" * 70)\n \n current_scenario = None\n for r in results:\n if r.scenario != current_scenario:\n current_scenario = r.scenario\n print(f\"\\n### {r.scenario.upper().replace('_', ' ')}\")\n print(\"-\" * 50)\n \n print(f\" {r.operation}:\")\n print(f\" Ops: {r.ops_count:,}\")\n print(f\" Total: {r.total_time_ms:.1f} ms\")\n print(f\" Throughput: {r.ops_per_sec:.1f} ops/sec\")\n print(f\" Latency: avg={r.avg_latency_ms:.2f}ms p99={r.p99_latency_ms:.2f}ms\")\n if r.extra:\n print(f\" Extra: {r.extra}\")\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"AVM Comprehensive Benchmark\")\n parser.add_argument(\"--scenario\", choices=[\"single\", \"multi\", \"cold\", \"all\"], default=\"all\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output JSON\")\n parser.add_argument(\"--small\", action=\"store_true\", help=\"Smaller dataset for quick test\")\n args = parser.parse_args()\n \n all_results = []\n \n scale = 0.2 if args.small else 1.0\n \n if args.scenario in (\"single\", \"all\"):\n print(\"\\n[1/4] Single Agent (AVM)...\")\n all_results.extend(bench_single_agent_avm(\n n_memories=int(500 * scale),\n n_recalls=int(50 * scale)\n ))\n \n print(\"[2/4] Single Agent (Raw Files)...\")\n all_results.extend(bench_single_agent_raw(\n n_memories=int(500 * scale),\n n_recalls=int(20 * scale)\n ))\n \n if args.scenario in (\"multi\", \"all\"):\n print(\"[3/4] Multi-Agent Collaboration...\")\n all_results.extend(bench_multi_agent_avm(\n n_agents=5,\n ops_per_agent=int(100 * scale)\n ))\n \n if args.scenario in (\"cold\", \"all\"):\n print(\"[4/4] Cold Start...\")\n all_results.extend(bench_cold_start(\n n_history=int(2000 * scale),\n n_queries=int(20 * scale)\n ))\n \n print_results(all_results, as_json=args.json)\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":16160,"content_sha256":"f36312b9145e9ec7e7bc9f43c92d5cfc623392a4ce4250de485cac31f9756707"},{"filename":"benchmarks/bench_librarian.py","content":"#!/usr/bin/env python3\n\"\"\"\nLibrarian Performance Benchmark\n\nMeasures the impact of Librarian on multi-agent knowledge discovery.\n\"\"\"\n\nimport os\nimport time\nimport statistics\nimport tempfile\nfrom pathlib import Path\n\nimport sys\nsys.path.insert(0, str(Path(__file__).parent.parent))\n\n\ndef create_avm_for_agent(tmpdir: str, agent_id: str):\n \"\"\"Create AVM instance for a specific agent.\"\"\"\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n from avm.config import AVMConfig, PermissionRule\n \n config = AVMConfig(\n permissions=[\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n PermissionRule(pattern=\"/shared/*\", access=\"rw\"),\n ],\n embedding={\"enabled\": True, \"backend\": \"local\", \"model\": \"all-MiniLM-L6-v2\", \"auto_index\": True},\n performance={\"hot_cache\": True, \"cache_size\": 100, \"wal_mode\": True},\n )\n \n return AVM(config=config, agent_id=agent_id)\n\n\ndef setup_memories(tmpdir: str, agents: list, memories_per_agent: int = 10):\n \"\"\"Set up memories for each agent.\"\"\"\n topics = [\"trading\", \"research\", \"crypto\", \"macro\", \"technical\"]\n \n for agent in agents:\n avm = create_avm_for_agent(tmpdir, agent)\n mem = avm.agent_memory(agent)\n for j in range(memories_per_agent):\n topic = topics[j % len(topics)]\n content = f\"Agent {agent} observation about {topic}: data point #{j}\"\n mem.remember(content, importance=0.5 + (j % 5) * 0.1, tags=[topic])\n \n # Add shared memories\n avm = create_avm_for_agent(tmpdir, \"shared_writer\")\n for i in range(5):\n topic = topics[i % len(topics)]\n avm.write(f\"/memory/shared/{topic}_{i}.md\", f\"Shared knowledge about {topic}\")\n \n return avm # Return last one for vfs access\n\n\ndef bench_traditional_vs_librarian(avm, agents: list, iterations: int = 30):\n \"\"\"Compare traditional multi-agent recall vs Librarian single query.\"\"\"\n from avm.librarian import Librarian\n \n librarian = Librarian(avm.store)\n for agent in agents:\n librarian.register_agent(agent)\n \n query = \"trading market analysis\"\n \n # Traditional: query each agent separately\n trad_latencies = []\n for _ in range(iterations):\n start = time.perf_counter()\n for agent in agents:\n # Simulate: search + filter for agent\n avm.search(f\"{query} {agent}\", limit=10)\n trad_latencies.append((time.perf_counter() - start) * 1000)\n \n # Librarian: single query\n lib_latencies = []\n for _ in range(iterations):\n start = time.perf_counter()\n response = librarian.query(agents[0], query)\n lib_latencies.append((time.perf_counter() - start) * 1000)\n \n return {\n \"traditional\": {\n \"p50_ms\": statistics.median(trad_latencies),\n \"p99_ms\": sorted(trad_latencies)[int(len(trad_latencies) * 0.99)],\n \"hops\": 4 * len(agents), # Each search = 4 hops\n },\n \"librarian\": {\n \"p50_ms\": statistics.median(lib_latencies),\n \"p99_ms\": sorted(lib_latencies)[int(len(lib_latencies) * 0.99)],\n \"hops\": 1,\n },\n }\n\n\ndef bench_who_knows(avm, agents: list, iterations: int = 100):\n \"\"\"Measure who-knows lookup.\"\"\"\n from avm.librarian import Librarian\n \n librarian = Librarian(avm.store)\n for agent in agents:\n librarian.register_agent(agent)\n \n latencies = []\n for _ in range(iterations):\n start = time.perf_counter()\n result = librarian.who_knows(\"trading\")\n latencies.append((time.perf_counter() - start) * 1000)\n \n return {\n \"p50_ms\": statistics.median(latencies),\n \"p99_ms\": sorted(latencies)[int(len(latencies) * 0.99)],\n }\n\n\ndef bench_privacy_policy(avm, agents: list, iterations: int = 50):\n \"\"\"Measure privacy policy overhead.\"\"\"\n from avm.librarian import Librarian, PrivacyPolicy\n \n # No privacy\n lib_none = Librarian(avm.store, privacy_policy=PrivacyPolicy(\"full\"))\n for agent in agents:\n lib_none.register_agent(agent)\n \n none_latencies = []\n for _ in range(iterations):\n start = time.perf_counter()\n lib_none.query(agents[0], \"trading\")\n none_latencies.append((time.perf_counter() - start) * 1000)\n \n # With privacy\n lib_priv = Librarian(avm.store, privacy_policy=PrivacyPolicy(\"owner\"))\n for agent in agents:\n lib_priv.register_agent(agent)\n \n priv_latencies = []\n for _ in range(iterations):\n start = time.perf_counter()\n lib_priv.query(agents[0], \"trading\")\n priv_latencies.append((time.perf_counter() - start) * 1000)\n \n p50_none = statistics.median(none_latencies)\n p50_priv = statistics.median(priv_latencies)\n \n return {\n \"no_privacy_p50_ms\": p50_none,\n \"with_privacy_p50_ms\": p50_priv,\n \"overhead_ms\": p50_priv - p50_none,\n \"overhead_pct\": ((p50_priv - p50_none) / p50_none * 100) if p50_none > 0 else 0,\n }\n\n\ndef bench_scalability(tmpdir: str, max_agents: int = 16):\n \"\"\"Measure scalability with agent count.\"\"\"\n from avm.librarian import Librarian\n \n results = []\n \n for n in [2, 4, 8, 16]:\n if n > max_agents:\n break\n \n agents = [f\"scale_{i}\" for i in range(n)]\n avm = setup_memories(tmpdir, agents, memories_per_agent=5)\n \n librarian = Librarian(avm.store)\n for agent in agents:\n librarian.register_agent(agent)\n \n latencies = []\n for _ in range(30):\n start = time.perf_counter()\n librarian.who_knows(\"scale_test\")\n latencies.append((time.perf_counter() - start) * 1000)\n \n results.append({\n \"n_agents\": n,\n \"p50_ms\": statistics.median(latencies),\n \"p99_ms\": sorted(latencies)[int(len(latencies) * 0.99)],\n })\n \n return results\n\n\ndef main():\n print(\"=\" * 60)\n print(\"LIBRARIAN BENCHMARK\")\n print(\"=\" * 60)\n \n with tempfile.TemporaryDirectory() as tmpdir:\n agents = [\"akashi\", \"trader\", \"analyst\", \"researcher\", \"watcher\"]\n \n print(\"\\n[1] Setting up multi-agent environment...\")\n avm = setup_memories(tmpdir, agents, memories_per_agent=10)\n print(f\" Created {len(agents)} agents with 10 memories each\")\n \n print(\"\\n[2] Traditional vs Librarian Query...\")\n cmp = bench_traditional_vs_librarian(avm, agents)\n print(f\" Traditional (5 agents):\")\n print(f\" p50: {cmp['traditional']['p50_ms']:.2f}ms\")\n print(f\" Hops: {cmp['traditional']['hops']}\")\n print(f\" Librarian:\")\n print(f\" p50: {cmp['librarian']['p50_ms']:.2f}ms\")\n print(f\" Hops: {cmp['librarian']['hops']}\")\n \n hop_reduction = (1 - cmp['librarian']['hops'] / cmp['traditional']['hops']) * 100\n print(f\" → Hop reduction: {hop_reduction:.0f}%\")\n \n print(\"\\n[3] Who-Knows Lookup...\")\n wk = bench_who_knows(avm, agents)\n print(f\" p50: {wk['p50_ms']:.2f}ms, p99: {wk['p99_ms']:.2f}ms\")\n \n print(\"\\n[4] Privacy Policy Overhead...\")\n priv = bench_privacy_policy(avm, agents)\n print(f\" No privacy: {priv['no_privacy_p50_ms']:.2f}ms\")\n print(f\" With privacy: {priv['with_privacy_p50_ms']:.2f}ms\")\n print(f\" Overhead: {priv['overhead_ms']:.2f}ms ({priv['overhead_pct']:.1f}%)\")\n \n print(\"\\n[5] Scalability...\")\n scale = bench_scalability(tmpdir)\n for row in scale:\n print(f\" {row['n_agents']:2d} agents: p50={row['p50_ms']:.2f}ms\")\n \n # Summary\n print(\"\\n\" + \"=\" * 60)\n print(\"SUMMARY\")\n print(\"=\" * 60)\n print(f\" Hop Reduction: {hop_reduction:.0f}%\")\n print(f\" Librarian p50: {cmp['librarian']['p50_ms']:.2f}ms\")\n print(f\" Who-Knows p50: {wk['p50_ms']:.2f}ms\")\n print(f\" Privacy Overhead: {priv['overhead_pct']:.1f}%\")\n \n # Output for blog\n import json\n results = {\n \"comparison\": cmp,\n \"who_knows\": wk,\n \"privacy\": priv,\n \"scalability\": scale,\n \"hop_reduction_pct\": hop_reduction,\n }\n print(\"\\n\" + \"=\" * 60)\n print(\"JSON OUTPUT\")\n print(\"=\" * 60)\n print(json.dumps(results, indent=2))\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8481,"content_sha256":"3ce66ed616f5ae886548b5c3f586a9c29f5d6f159cc9d2347d645537be0744b3"},{"filename":"benchmarks/bench_paper.py","content":"#!/usr/bin/env python3\n\"\"\"\nAVM Paper-Quality Benchmark Suite\n\nComplete experimental suite for academic paper:\n\n1. Latency Distribution (CDF/histogram)\n2. Throughput vs. Memory Count (scalability)\n3. Cache Size Sensitivity Analysis\n4. Embedding Model Comparison\n5. Multi-Agent Contention Analysis\n6. Cold Start Analysis\n7. Memory Decay Impact\n8. Token Budget vs. Recall Quality Trade-off\n\nEach experiment outputs data suitable for plotting.\n\nUsage:\n python benchmarks/bench_paper.py --all\n python benchmarks/bench_paper.py --exp latency_cdf\n python benchmarks/bench_paper.py --exp scalability --output results/\n\"\"\"\n\nimport time\nimport tempfile\nimport os\nimport json\nimport random\nimport math\nimport statistics\nimport argparse\nfrom pathlib import Path\nfrom dataclasses import dataclass, asdict, field\nfrom typing import List, Dict, Any, Tuple\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\nimport threading\n\n\n# ═══════════════════════════════════════════════════════════════\n# Utilities\n# ═══════════════════════════════════════════════════════════════\n\ndef generate_content(topic: str = \"memory\", size: int = 300) -> str:\n topic_words = {\n \"market\": [\"NVDA\", \"RSI\", \"technical\", \"analysis\", \"price\", \"volume\", \"trend\"],\n \"trading\": [\"position\", \"stop-loss\", \"profit\", \"strategy\", \"risk\"],\n \"memory\": [\"remembered\", \"learned\", \"experience\", \"lesson\", \"insight\"],\n }\n words = topic_words.get(topic, [\"content\", \"data\", \"info\"])\n filler = [\"the\", \"and\", \"with\", \"for\", \"this\"]\n return \" \".join(random.choice(words) if random.random() \u003c 0.3 else random.choice(filler) for _ in range(size // 5))\n\n\ndef estimate_tokens(text: str) -> int:\n return len(text) // 4\n\n\ndef setup_avm(perf_config: Dict = None, embedding: bool = False):\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n from avm.config import AVMConfig, PermissionRule\n \n perf = perf_config or {\"hot_cache\": True, \"cache_size\": 100, \"wal_mode\": True}\n \n config = AVMConfig(\n permissions=[\n PermissionRule(pattern=\"/memory/*\", access=\"rw\"),\n PermissionRule(pattern=\"/shared/*\", access=\"rw\"),\n ],\n embedding={\"enabled\": embedding, \"backend\": \"local\", \"model\": \"all-MiniLM-L6-v2\", \"auto_index\": True} if embedding else {\"enabled\": False},\n performance=perf,\n )\n \n avm = AVM(config=config, agent_id=\"bench\")\n return avm, tmpdir\n\n\ndef percentile(data: List[float], p: float) -> float:\n if not data:\n return 0.0\n sorted_data = sorted(data)\n k = (len(sorted_data) - 1) * p / 100\n f = math.floor(k)\n c = math.ceil(k)\n if f == c:\n return sorted_data[int(k)]\n return sorted_data[f] * (c - k) + sorted_data[c] * (k - f)\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 1: Latency Distribution (CDF)\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_latency_distribution(n_ops: int = 1000) -> Dict:\n \"\"\"\n Measure latency distributions for read/write/search operations.\n Output: histogram bins and CDF data for plotting.\n \"\"\"\n print(\" Experiment: Latency Distribution...\")\n \n avm, _ = setup_avm()\n \n # Populate\n for i in range(200):\n avm.write(f\"/memory/item_{i}.md\", generate_content(size=300))\n \n # Clear cache for cold reads\n avm._cache.clear()\n avm._cache_order.clear()\n \n results = {\"read_cold\": [], \"read_hot\": [], \"write\": [], \"search\": []}\n \n # Cold reads\n for i in range(min(n_ops, 200)):\n t0 = time.perf_counter()\n avm.read(f\"/memory/item_{i}.md\")\n results[\"read_cold\"].append((time.perf_counter() - t0) * 1000)\n \n # Hot reads (same items again)\n for i in range(n_ops):\n idx = i % 200\n t0 = time.perf_counter()\n avm.read(f\"/memory/item_{idx}.md\")\n results[\"read_hot\"].append((time.perf_counter() - t0) * 1000)\n \n # Writes\n for i in range(n_ops):\n content = generate_content(size=300)\n t0 = time.perf_counter()\n avm.write(f\"/memory/write_{i}.md\", content)\n results[\"write\"].append((time.perf_counter() - t0) * 1000)\n \n # Search\n for i in range(min(n_ops, 100)):\n t0 = time.perf_counter()\n avm.search(\"memory content\", limit=10)\n results[\"search\"].append((time.perf_counter() - t0) * 1000)\n \n # Calculate statistics\n output = {}\n for op, latencies in results.items():\n if not latencies:\n continue\n \n # CDF data points (for plotting)\n sorted_lat = sorted(latencies)\n n_lat = len(sorted_lat)\n cdf_points = [(sorted_lat[min(int(n_lat * p / 100), n_lat - 1)], p) for p in range(0, 101, 5)]\n \n output[op] = {\n \"n\": len(latencies),\n \"min\": min(latencies),\n \"max\": max(latencies),\n \"mean\": statistics.mean(latencies),\n \"median\": statistics.median(latencies),\n \"std\": statistics.stdev(latencies) if len(latencies) > 1 else 0,\n \"p50\": percentile(latencies, 50),\n \"p90\": percentile(latencies, 90),\n \"p95\": percentile(latencies, 95),\n \"p99\": percentile(latencies, 99),\n \"cdf\": cdf_points,\n \"histogram\": {\n \"bins\": list(range(0, int(max(latencies)) + 2)),\n \"counts\": [sum(1 for l in latencies if b \u003c= l \u003c b + 1) for b in range(int(max(latencies)) + 1)],\n },\n }\n \n return output\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 2: Scalability (Throughput vs. Memory Count)\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_scalability(memory_counts: List[int] = None) -> Dict:\n \"\"\"\n Measure how throughput scales with memory count.\n \"\"\"\n print(\" Experiment: Scalability...\")\n \n if memory_counts is None:\n memory_counts = [10, 50, 100, 200, 500, 1000, 2000]\n \n results = []\n \n for n in memory_counts:\n print(f\" n={n}...\")\n avm, _ = setup_avm()\n \n # Write\n t0 = time.perf_counter()\n for i in range(n):\n avm.write(f\"/memory/item_{i}.md\", generate_content(size=300))\n write_time = time.perf_counter() - t0\n write_throughput = n / write_time\n \n # Read (warm)\n n_reads = min(n * 2, 1000)\n t0 = time.perf_counter()\n for i in range(n_reads):\n avm.read(f\"/memory/item_{i % n}.md\")\n read_time = time.perf_counter() - t0\n read_throughput = n_reads / read_time\n \n # Search\n n_search = 50\n t0 = time.perf_counter()\n for _ in range(n_search):\n avm.search(\"memory content\", limit=10)\n search_time = time.perf_counter() - t0\n search_throughput = n_search / search_time\n \n results.append({\n \"memory_count\": n,\n \"write_throughput\": write_throughput,\n \"read_throughput\": read_throughput,\n \"search_throughput\": search_throughput,\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 3: Cache Size Sensitivity\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_cache_sensitivity(cache_sizes: List[int] = None, n_memories: int = 500) -> Dict:\n \"\"\"\n Measure hit rate and read latency for different cache sizes.\n \"\"\"\n print(\" Experiment: Cache Size Sensitivity...\")\n \n if cache_sizes is None:\n cache_sizes = [10, 25, 50, 100, 200, 500]\n \n results = []\n \n for cache_size in cache_sizes:\n print(f\" cache_size={cache_size}...\")\n \n perf = {\"hot_cache\": True, \"cache_size\": cache_size, \"wal_mode\": True}\n avm, _ = setup_avm(perf_config=perf)\n \n # Populate\n for i in range(n_memories):\n avm.write(f\"/memory/item_{i}.md\", generate_content(size=300))\n \n # Clear cache\n avm._cache.clear()\n avm._cache_order.clear()\n \n # Zipf access pattern\n hits = 0\n n_reads = 1000\n latencies = []\n \n for _ in range(n_reads):\n # Zipf distribution\n idx = min(int(random.paretovariate(1.5)) - 1, n_memories - 1)\n path = f\"/memory/item_{idx}.md\"\n \n was_cached = path in avm._cache\n \n t0 = time.perf_counter()\n avm.read(path)\n latencies.append((time.perf_counter() - t0) * 1000)\n \n if was_cached:\n hits += 1\n \n results.append({\n \"cache_size\": cache_size,\n \"hit_rate\": hits / n_reads,\n \"avg_latency_ms\": statistics.mean(latencies),\n \"p99_latency_ms\": percentile(latencies, 99),\n })\n \n return {\"n_memories\": n_memories, \"n_reads\": 1000, \"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 4: Multi-Agent Contention\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_multi_agent_contention(agent_counts: List[int] = None) -> Dict:\n \"\"\"\n Measure throughput degradation with concurrent agents.\n \"\"\"\n print(\" Experiment: Multi-Agent Contention...\")\n \n if agent_counts is None:\n agent_counts = [1, 2, 4, 8, 16]\n \n results = []\n \n for n_agents in agent_counts:\n print(f\" agents={n_agents}...\")\n \n avm, _ = setup_avm()\n \n # Shared counter for completed ops\n completed = {\"count\": 0}\n lock = threading.Lock()\n \n def agent_workload(agent_id: int, n_ops: int = 100):\n local_latencies = []\n for i in range(n_ops):\n path = f\"/shared/agent_{agent_id}/item_{i}.md\"\n content = generate_content(size=200)\n \n t0 = time.perf_counter()\n avm.write(path, content)\n local_latencies.append((time.perf_counter() - t0) * 1000)\n \n with lock:\n completed[\"count\"] += 1\n \n return local_latencies\n \n n_ops_per_agent = 100\n all_latencies = []\n \n t0 = time.perf_counter()\n with ThreadPoolExecutor(max_workers=n_agents) as executor:\n futures = [executor.submit(agent_workload, i, n_ops_per_agent) for i in range(n_agents)]\n for f in as_completed(futures):\n all_latencies.extend(f.result())\n total_time = time.perf_counter() - t0\n \n total_ops = n_agents * n_ops_per_agent\n throughput = total_ops / total_time\n \n results.append({\n \"n_agents\": n_agents,\n \"total_ops\": total_ops,\n \"total_time_sec\": total_time,\n \"throughput\": throughput,\n \"avg_latency_ms\": statistics.mean(all_latencies),\n \"p99_latency_ms\": percentile(all_latencies, 99),\n \"throughput_per_agent\": throughput / n_agents,\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 5: Cold Start Analysis\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_cold_start(history_sizes: List[int] = None) -> Dict:\n \"\"\"\n Measure first-query latency with different history sizes.\n \"\"\"\n print(\" Experiment: Cold Start...\")\n \n if history_sizes is None:\n history_sizes = [10, 50, 100, 500, 1000]\n \n results = []\n \n for n in history_sizes:\n print(f\" history_size={n}...\")\n \n avm, _ = setup_avm(embedding=True)\n \n # Populate\n for i in range(n):\n avm.write(f\"/memory/item_{i}.md\", generate_content(size=300))\n \n # Wait for embedding\n time.sleep(0.5)\n \n # Cold search (first query)\n t0 = time.perf_counter()\n avm.search(\"analysis memory\", limit=10)\n cold_latency = (time.perf_counter() - t0) * 1000\n \n # Warm searches\n warm_latencies = []\n for _ in range(10):\n t0 = time.perf_counter()\n avm.search(\"content data\", limit=10)\n warm_latencies.append((time.perf_counter() - t0) * 1000)\n \n results.append({\n \"history_size\": n,\n \"cold_latency_ms\": cold_latency,\n \"warm_avg_latency_ms\": statistics.mean(warm_latencies),\n \"warmup_factor\": cold_latency / statistics.mean(warm_latencies) if warm_latencies else 0,\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 6: Token Budget vs. Recall Quality\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_token_quality_tradeoff(budgets: List[int] = None) -> Dict:\n \"\"\"\n Measure recall quality at different token budgets.\n \"\"\"\n print(\" Experiment: Token Budget vs. Quality...\")\n \n if budgets is None:\n budgets = [100, 250, 500, 1000, 2000, 4000, 8000]\n \n avm, _ = setup_avm(embedding=True)\n \n # Create memories with known topics\n topics = [\"market\", \"trading\", \"memory\"]\n topic_paths = {t: [] for t in topics}\n \n for i in range(150):\n topic = topics[i % len(topics)]\n path = f\"/memory/{topic}/item_{i}.md\"\n avm.write(path, generate_content(topic, 400))\n topic_paths[topic].append(path)\n \n time.sleep(1)\n \n from avm.agent_memory import AgentMemory\n mem = AgentMemory(avm, \"bench\")\n \n results = []\n \n for budget in budgets:\n print(f\" budget={budget}...\")\n \n # Query for market topic\n t0 = time.perf_counter()\n context = mem.recall(\"NVDA RSI technical analysis\", max_tokens=budget)\n latency = (time.perf_counter() - t0) * 1000\n \n returned_tokens = estimate_tokens(context)\n \n # Check how many market paths are mentioned\n market_mentions = sum(1 for p in topic_paths[\"market\"] if p in context or f\"item_{p.split('_')[-1].replace('.md', '')}\" in context)\n \n results.append({\n \"budget\": budget,\n \"returned_tokens\": returned_tokens,\n \"utilization\": returned_tokens / budget if budget > 0 else 0,\n \"latency_ms\": latency,\n \"relevant_mentions\": market_mentions,\n \"total_relevant\": len(topic_paths[\"market\"]),\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 7: Write Batch Size Impact\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_write_batch_size(batch_sizes: List[int] = None) -> Dict:\n \"\"\"\n Measure throughput for different write batch sizes.\n \"\"\"\n print(\" Experiment: Write Batch Size...\")\n \n if batch_sizes is None:\n batch_sizes = [1, 5, 10, 25, 50, 100]\n \n results = []\n \n for batch_size in batch_sizes:\n print(f\" batch_size={batch_size}...\")\n \n avm, _ = setup_avm()\n \n n_batches = 20\n latencies = []\n \n for batch_idx in range(n_batches):\n t0 = time.perf_counter()\n for i in range(batch_size):\n avm.write(f\"/memory/batch_{batch_idx}/item_{i}.md\", generate_content(size=300))\n latencies.append((time.perf_counter() - t0) * 1000)\n \n total_ops = n_batches * batch_size\n total_time = sum(latencies) / 1000\n throughput = total_ops / total_time\n \n results.append({\n \"batch_size\": batch_size,\n \"total_ops\": total_ops,\n \"throughput\": throughput,\n \"avg_batch_latency_ms\": statistics.mean(latencies),\n \"per_op_latency_ms\": statistics.mean(latencies) / batch_size,\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Experiment 8: Memory Content Size Impact\n# ═══════════════════════════════════════════════════════════════\n\ndef exp_content_size_impact(sizes: List[int] = None) -> Dict:\n \"\"\"\n Measure how content size affects read/write performance.\n \"\"\"\n print(\" Experiment: Content Size Impact...\")\n \n if sizes is None:\n sizes = [100, 500, 1000, 2000, 5000, 10000]\n \n results = []\n \n for size in sizes:\n print(f\" size={size} chars...\")\n \n avm, _ = setup_avm()\n \n content = generate_content(size=size)\n \n # Write latency\n write_latencies = []\n for i in range(50):\n t0 = time.perf_counter()\n avm.write(f\"/memory/size_{size}/item_{i}.md\", content)\n write_latencies.append((time.perf_counter() - t0) * 1000)\n \n # Read latency (warm)\n read_latencies = []\n for i in range(50):\n t0 = time.perf_counter()\n avm.read(f\"/memory/size_{size}/item_{i % 50}.md\")\n read_latencies.append((time.perf_counter() - t0) * 1000)\n \n results.append({\n \"content_size_chars\": size,\n \"content_size_tokens\": estimate_tokens(content),\n \"write_avg_ms\": statistics.mean(write_latencies),\n \"write_p99_ms\": percentile(write_latencies, 99),\n \"read_avg_ms\": statistics.mean(read_latencies),\n \"read_p99_ms\": percentile(read_latencies, 99),\n })\n \n return {\"data\": results}\n\n\n# ═══════════════════════════════════════════════════════════════\n# Main\n# ═══════════════════════════════════════════════════════════════\n\nEXPERIMENTS = {\n \"latency_cdf\": exp_latency_distribution,\n \"scalability\": exp_scalability,\n \"cache_sensitivity\": exp_cache_sensitivity,\n \"multi_agent\": exp_multi_agent_contention,\n \"cold_start\": exp_cold_start,\n \"token_quality\": exp_token_quality_tradeoff,\n \"batch_size\": exp_write_batch_size,\n \"content_size\": exp_content_size_impact,\n}\n\n\ndef print_summary(results: Dict):\n print(\"\\n\" + \"=\" * 80)\n print(\"PAPER BENCHMARK RESULTS SUMMARY\")\n print(\"=\" * 80)\n \n for exp_name, data in results.items():\n print(f\"\\n### {exp_name.upper()}\")\n \n if \"data\" in data:\n for item in data[\"data\"][:3]: # First 3 rows\n print(f\" {item}\")\n if len(data[\"data\"]) > 3:\n print(f\" ... ({len(data['data'])} total rows)\")\n else:\n for key, val in list(data.items())[:3]:\n if isinstance(val, dict):\n print(f\" {key}: {list(val.keys())[:3]}...\")\n else:\n print(f\" {key}: {val}\")\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"AVM Paper Benchmark Suite\")\n parser.add_argument(\"--all\", action=\"store_true\", help=\"Run all experiments\")\n parser.add_argument(\"--exp\", choices=list(EXPERIMENTS.keys()), help=\"Run specific experiment\")\n parser.add_argument(\"--output\", type=str, help=\"Output directory for results\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output JSON\")\n parser.add_argument(\"--small\", action=\"store_true\", help=\"Smaller dataset\")\n args = parser.parse_args()\n \n print(\"AVM Paper Benchmark Suite\")\n print()\n \n if args.exp:\n experiments_to_run = {args.exp: EXPERIMENTS[args.exp]}\n elif args.all:\n experiments_to_run = EXPERIMENTS\n else:\n # Default: run a quick subset\n experiments_to_run = {\n \"latency_cdf\": EXPERIMENTS[\"latency_cdf\"],\n \"scalability\": EXPERIMENTS[\"scalability\"],\n \"cache_sensitivity\": EXPERIMENTS[\"cache_sensitivity\"],\n }\n \n results = {}\n \n for name, func in experiments_to_run.items():\n try:\n if args.small:\n # Reduced parameters for quick testing\n if name == \"scalability\":\n results[name] = func(memory_counts=[10, 50, 100])\n elif name == \"latency_cdf\":\n results[name] = func(n_ops=100)\n else:\n results[name] = func()\n else:\n results[name] = func()\n except Exception as e:\n print(f\" ERROR in {name}: {e}\")\n results[name] = {\"error\": str(e)}\n \n if args.json:\n print(json.dumps(results, indent=2, default=str))\n else:\n print_summary(results)\n \n if args.output:\n output_dir = Path(args.output)\n output_dir.mkdir(parents=True, exist_ok=True)\n \n for name, data in results.items():\n with open(output_dir / f\"{name}.json\", \"w\") as f:\n json.dump(data, f, indent=2, default=str)\n \n print(f\"\\nResults saved to {output_dir}/\")\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":23457,"content_sha256":"ece870ddb039812fb86f3cef8d75b8b304692d491e2b2b66cac74d501993b177"},{"filename":"benchmarks/bench_performance.py","content":"#!/usr/bin/env python3\n\"\"\"AVM Performance Benchmark\"\"\"\nimport time\nimport tempfile\nimport os\nfrom pathlib import Path\n\ndef setup():\n \"\"\"Setup benchmark environment\"\"\"\n tmpdir = tempfile.mkdtemp()\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n avm = AVM(agent_id=\"bench\")\n return avm, tmpdir\n\ndef bench_write(avm, n=100):\n \"\"\"Benchmark write operations\"\"\"\n start = time.perf_counter()\n for i in range(n):\n avm.write(f\"/memory/bench/file{i}.md\", f\"Content {i}\\n\" * 10)\n elapsed = time.perf_counter() - start\n return n / elapsed # ops/sec\n\ndef bench_read(avm, n=100):\n \"\"\"Benchmark read operations\"\"\"\n # Ensure files exist\n for i in range(n):\n path = f\"/memory/bench/read{i}.md\"\n if not avm.read(path):\n avm.write(path, f\"Read content {i}\")\n \n start = time.perf_counter()\n for i in range(n):\n avm.read(f\"/memory/bench/read{i}.md\")\n elapsed = time.perf_counter() - start\n return n / elapsed\n\ndef bench_list(avm, n=50):\n \"\"\"Benchmark list operations\"\"\"\n start = time.perf_counter()\n for _ in range(n):\n avm.list(\"/memory/bench\", limit=100)\n elapsed = time.perf_counter() - start\n return n / elapsed\n\ndef bench_search(avm, n=20):\n \"\"\"Benchmark search operations\"\"\"\n start = time.perf_counter()\n for i in range(n):\n avm.search(f\"content {i}\", limit=10)\n elapsed = time.perf_counter() - start\n return n / elapsed\n\ndef bench_recall(avm, n=10):\n \"\"\"Benchmark recall operations\"\"\"\n from avm.agent_memory import AgentMemory\n mem = AgentMemory(avm, \"bench\")\n \n # Write some memories\n for i in range(20):\n mem.remember(f\"Memory about topic {i} with details\", title=f\"Topic {i}\")\n \n start = time.perf_counter()\n for i in range(n):\n mem.recall(f\"topic {i % 20}\", max_tokens=500)\n elapsed = time.perf_counter() - start\n return n / elapsed\n\ndef main():\n print(\"=\" * 50)\n print(\"AVM Performance Benchmark\")\n print(\"=\" * 50)\n \n avm, tmpdir = setup()\n \n results = {}\n \n print(\"\\n[1/5] Write benchmark (100 ops)...\")\n results['write'] = bench_write(avm, 100)\n print(f\" {results['write']:.1f} ops/sec\")\n \n print(\"\\n[2/5] Read benchmark (100 ops)...\")\n results['read'] = bench_read(avm, 100)\n print(f\" {results['read']:.1f} ops/sec\")\n \n print(\"\\n[3/5] List benchmark (50 ops)...\")\n results['list'] = bench_list(avm, 50)\n print(f\" {results['list']:.1f} ops/sec\")\n \n print(\"\\n[4/5] Search benchmark (20 ops)...\")\n results['search'] = bench_search(avm, 20)\n print(f\" {results['search']:.1f} ops/sec\")\n \n print(\"\\n[5/5] Recall benchmark (10 ops)...\")\n results['recall'] = bench_recall(avm, 10)\n print(f\" {results['recall']:.1f} ops/sec\")\n \n print(\"\\n\" + \"=\" * 50)\n print(\"Summary\")\n print(\"=\" * 50)\n for op, rate in results.items():\n print(f\" {op:10} {rate:8.1f} ops/sec\")\n \n # Cleanup\n import shutil\n shutil.rmtree(tmpdir, ignore_errors=True)\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3102,"content_sha256":"b19e7dfc347fe52b8f7359c5bc848f50ca840791108e5c9a60815db1ba72a1df"},{"filename":"benchmarks/bench_topic_index.py","content":"#!/usr/bin/env python3\n\"\"\"\nTopicIndex Performance Benchmark\n\nCompares TopicIndex vs FTS for recall operations.\n\"\"\"\n\nimport os\nimport time\nimport statistics\nimport tempfile\nfrom pathlib import Path\n\nimport sys\nsys.path.insert(0, str(Path(__file__).parent.parent))\n\n\ndef setup_env(tmpdir: str, n_memories: int = 500):\n \"\"\"Set up test environment with memories\"\"\"\n os.environ['XDG_DATA_HOME'] = tmpdir\n \n from avm import AVM\n from avm.config import AVMConfig, PermissionRule\n from avm.topic_index import TopicIndex\n \n config = AVMConfig(\n permissions=[PermissionRule(pattern=\"/memory/*\", access=\"rw\")],\n embedding={\"enabled\": True, \"backend\": \"local\", \"model\": \"all-MiniLM-L6-v2\", \"auto_index\": True},\n performance={\"hot_cache\": True, \"cache_size\": 100, \"wal_mode\": True},\n )\n \n avm = AVM(config=config, agent_id=\"bench\")\n topic_index = TopicIndex(avm.store)\n \n # Generate memories\n topics = [\"trading\", \"crypto\", \"bitcoin\", \"ethereum\", \"stocks\", \n \"analysis\", \"technical\", \"market\", \"price\", \"signal\"]\n \n for i in range(n_memories):\n t1 = topics[i % len(topics)]\n t2 = topics[(i * 3) % len(topics)]\n content = f\"Memory #{i} about {t1} and {t2}. Analysis of #{t1} patterns.\"\n path = f\"/memory/private/bench/mem_{i}.md\"\n \n avm.write(path, content, {\"importance\": 0.5 + (i % 5) * 0.1})\n topic_index.index_path(path, content)\n \n return avm, topic_index\n\n\ndef bench_index_speed(topic_index, n_docs: int = 100):\n \"\"\"Benchmark indexing speed\"\"\"\n latencies = []\n \n for i in range(n_docs):\n content = f\"Test document {i} about trading and crypto analysis\"\n path = f\"/memory/bench/speed_{i}.md\"\n \n start = time.perf_counter()\n topic_index.index_path(path, content)\n latencies.append((time.perf_counter() - start) * 1000)\n \n return {\n \"docs\": n_docs,\n \"total_ms\": sum(latencies),\n \"avg_ms\": statistics.mean(latencies),\n \"p50_ms\": statistics.median(latencies),\n \"p99_ms\": sorted(latencies)[int(len(latencies) * 0.99)],\n \"throughput_docs_per_sec\": n_docs / (sum(latencies) / 1000),\n }\n\n\ndef bench_query_speed(topic_index, avm, queries: list, iterations: int = 50):\n \"\"\"Benchmark query speed: TopicIndex vs FTS\"\"\"\n topic_latencies = []\n fts_latencies = []\n \n for _ in range(iterations):\n for query in queries:\n # TopicIndex\n start = time.perf_counter()\n topic_results = topic_index.query(query, limit=20)\n topic_latencies.append((time.perf_counter() - start) * 1000)\n \n # FTS\n start = time.perf_counter()\n fts_results = avm.search(query, limit=20)\n fts_latencies.append((time.perf_counter() - start) * 1000)\n \n return {\n \"topic_index\": {\n \"p50_ms\": statistics.median(topic_latencies),\n \"p99_ms\": sorted(topic_latencies)[int(len(topic_latencies) * 0.99)],\n \"avg_ms\": statistics.mean(topic_latencies),\n },\n \"fts\": {\n \"p50_ms\": statistics.median(fts_latencies),\n \"p99_ms\": sorted(fts_latencies)[int(len(fts_latencies) * 0.99)],\n \"avg_ms\": statistics.mean(fts_latencies),\n },\n \"speedup\": statistics.median(fts_latencies) / statistics.median(topic_latencies),\n }\n\n\ndef bench_recall_quality(topic_index, avm, test_cases: list):\n \"\"\"Measure recall quality (precision/recall)\"\"\"\n results = []\n \n for query, expected_topics in test_cases:\n # Get TopicIndex results\n topic_results = topic_index.query(query, limit=20)\n topic_paths = set(p for p, _ in topic_results)\n \n # Get FTS results\n fts_results = avm.search(query, limit=20)\n fts_paths = set(n.path for n in fts_results)\n \n # Calculate overlap\n overlap = len(topic_paths & fts_paths)\n \n results.append({\n \"query\": query,\n \"topic_count\": len(topic_paths),\n \"fts_count\": len(fts_paths),\n \"overlap\": overlap,\n \"jaccard\": overlap / max(len(topic_paths | fts_paths), 1),\n })\n \n avg_jaccard = statistics.mean(r[\"jaccard\"] for r in results)\n return {\n \"test_cases\": len(test_cases),\n \"avg_jaccard_similarity\": avg_jaccard,\n \"details\": results,\n }\n\n\ndef main():\n print(\"=\" * 60)\n print(\"TOPICINDEX BENCHMARK\")\n print(\"=\" * 60)\n \n with tempfile.TemporaryDirectory() as tmpdir:\n print(\"\\n[1] Setting up environment (500 memories)...\")\n avm, topic_index = setup_env(tmpdir, n_memories=500)\n print(f\" Topics indexed: {len(topic_index.all_topics())}\")\n \n print(\"\\n[2] Indexing Speed...\")\n idx_speed = bench_index_speed(topic_index, n_docs=100)\n print(f\" Throughput: {idx_speed['throughput_docs_per_sec']:.0f} docs/sec\")\n print(f\" p50: {idx_speed['p50_ms']:.3f}ms, p99: {idx_speed['p99_ms']:.3f}ms\")\n \n print(\"\\n[3] Query Speed (TopicIndex vs FTS)...\")\n queries = [\"trading analysis\", \"bitcoin price\", \"crypto market\", \"technical signal\"]\n query_speed = bench_query_speed(topic_index, avm, queries)\n print(f\" TopicIndex p50: {query_speed['topic_index']['p50_ms']:.3f}ms\")\n print(f\" FTS p50: {query_speed['fts']['p50_ms']:.3f}ms\")\n print(f\" Speedup: {query_speed['speedup']:.1f}x\")\n \n print(\"\\n[4] Recall Quality...\")\n test_cases = [\n (\"trading\", [\"trading\"]),\n (\"bitcoin analysis\", [\"bitcoin\", \"analysis\"]),\n (\"crypto market\", [\"crypto\", \"market\"]),\n ]\n quality = bench_recall_quality(topic_index, avm, test_cases)\n print(f\" Avg Jaccard similarity: {quality['avg_jaccard_similarity']:.2%}\")\n \n # Stats\n stats = topic_index.stats()\n print(\"\\n[5] Index Stats...\")\n print(f\" Total topics: {stats['total_topics']}\")\n print(f\" Total paths: {stats['total_paths']}\")\n print(f\" Avg paths/topic: {stats['avg_paths_per_topic']:.1f}\")\n \n # Summary\n print(\"\\n\" + \"=\" * 60)\n print(\"SUMMARY\")\n print(\"=\" * 60)\n print(f\" Index Speed: {idx_speed['throughput_docs_per_sec']:.0f} docs/sec\")\n print(f\" Query Speedup: {query_speed['speedup']:.1f}x vs FTS\")\n print(f\" Recall Quality: {quality['avg_jaccard_similarity']:.0%} overlap with FTS\")\n \n import json\n print(\"\\n\" + \"=\" * 60)\n print(\"JSON OUTPUT\")\n print(\"=\" * 60)\n print(json.dumps({\n \"index_speed\": idx_speed,\n \"query_speed\": query_speed,\n \"recall_quality\": {\"avg_jaccard\": quality[\"avg_jaccard_similarity\"]},\n \"stats\": stats,\n }, indent=2))\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6951,"content_sha256":"0fe77e2697a2477f578288af0eaca6fa59154af262d3c5f7bb4bfc61c135debd"},{"filename":"benchmarks/embedding_bench.py","content":"#!/usr/bin/env python3\n\"\"\"\nEmbedding Store Benchmark\n\nCompares:\n1. SQLite-based EmbeddingStore (brute force cosine)\n2. FAISS Flat index (exact search)\n3. FAISS HNSW index (approximate search)\n\nMeasures:\n- Index build time\n- Query latency\n- Memory usage\n- Recall (for approximate methods)\n\"\"\"\n\nimport os\nimport sys\nimport time\nimport json\nimport random\nimport tempfile\nimport argparse\nfrom typing import List, Dict, Tuple\nfrom dataclasses import dataclass, field\n\nsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n\nfrom avm.store import AVMStore\nfrom avm.node import AVMNode\n\n\n@dataclass\nclass EmbeddingBenchConfig:\n \"\"\"Benchmark configuration\"\"\"\n num_documents: int = 1000\n embedding_dim: int = 384 # MiniLM dimension\n query_count: int = 100\n k: int = 10\n seed: int = 42\n\n\n@dataclass\nclass EmbeddingBenchResult:\n \"\"\"Benchmark results\"\"\"\n store_type: str = \"\"\n \n # Build\n build_time_sec: float = 0.0\n \n # Query\n avg_query_ms: float = 0.0\n p50_query_ms: float = 0.0\n p99_query_ms: float = 0.0\n \n # Accuracy (for approximate methods)\n recall_at_k: float = 1.0 # Compared to brute force\n \n # Scale\n num_documents: int = 0\n num_queries: int = 0\n \n def to_dict(self) -> Dict:\n return {\n \"store_type\": self.store_type,\n \"build_time_sec\": round(self.build_time_sec, 3),\n \"latency_ms\": {\n \"avg\": round(self.avg_query_ms, 3),\n \"p50\": round(self.p50_query_ms, 3),\n \"p99\": round(self.p99_query_ms, 3),\n },\n \"recall_at_k\": round(self.recall_at_k, 4),\n \"scale\": {\n \"documents\": self.num_documents,\n \"queries\": self.num_queries,\n }\n }\n\n\nclass MockEmbeddingBackend:\n \"\"\"Mock embedding backend for benchmarking (no actual model)\"\"\"\n \n def __init__(self, dimension: int = 384):\n self._dimension = dimension\n self._cache: Dict[str, List[float]] = {}\n \n @property\n def dimension(self) -> int:\n return self._dimension\n \n def embeend(self, text: str) -> List[float]:\n \"\"\"Generate deterministic pseudo-embedding based on text hash\"\"\"\n if text in self._cache:\n return self._cache[text]\n \n import hashlib\n h = hashlib.sha256(text.encode()).digest()\n random.seed(int.from_bytes(h[:4], 'little'))\n vec = [random.gauss(0, 1) for _ in range(self._dimension)]\n # Normalize\n norm = sum(x*x for x in vec) ** 0.5\n vec = [x / norm for x in vec]\n self._cache[text] = vec\n return vec\n \n def embeend_batch(self, texts: List[str]) -> List[List[float]]:\n return [self.embeend(t) for t in texts]\n\n\nclass EmbeddingBenchmark:\n \"\"\"Benchmark harness for embedding stores\"\"\"\n \n def __init__(self, config: EmbeddingBenchConfig):\n self.config = config\n self.tmpdir = tempfile.mkdtemp()\n self.store = AVMStore(os.path.join(self.tmpdir, \"bench.db\"))\n self.backend = MockEmbeddingBackend(config.embedding_dim)\n \n random.seed(config.seed)\n \n # Ground truth results (from brute force)\n self.ground_truth: Dict[str, List[str]] = {}\n \n # Generate documents\n self.documents: List[AVMNode] = []\n self.queries: List[str] = []\n \n def setup(self):\n \"\"\"Generate synthetic documents and queries\"\"\"\n print(f\"Generating {self.config.num_documents} documents...\")\n \n topics = [\"ai\", \"market\", \"code\", \"research\", \"personal\"]\n \n for i in range(self.config.num_documents):\n topic = random.choice(topics)\n content = f\"Document {i} about {topic}. \" + \" \".join(\n [random.choice([\"neural\", \"stock\", \"function\", \"paper\", \"note\"]) \n for _ in range(20)]\n )\n \n node = AVMNode(\n path=f\"/memory/doc_{i:05d}.md\",\n content=content,\n )\n self.documents.append(node)\n self.store.put_node(node)\n \n # Generate queries\n print(f\"Generating {self.config.query_count} queries...\")\n for i in range(self.config.query_count):\n query = f\"query {i} about \" + random.choice(topics)\n self.queries.append(query)\n \n def benchmark_sqlite(self) -> EmbeddingBenchResult:\n \"\"\"Benchmark SQLite-based embedding store\"\"\"\n from avm.embedding import EmbeddingStore\n \n result = EmbeddingBenchResult(\n store_type=\"sqlite\",\n num_documents=len(self.documents),\n num_queries=len(self.queries),\n )\n \n # Build index\n store = EmbeddingStore(self.store, self.backend)\n \n start = time.perf_counter()\n for node in self.documents:\n store.embeend_node(node)\n result.build_time_sec = time.perf_counter() - start\n \n # Query benchmark\n latencies = []\n for query in self.queries:\n start = time.perf_counter()\n results = store.search(query, k=self.config.k)\n elapsed_ms = (time.perf_counter() - start) * 1000\n latencies.append(elapsed_ms)\n \n # Store ground truth\n self.ground_truth[query] = [r[0].path for r in results]\n \n latencies.sort()\n result.avg_query_ms = sum(latencies) / len(latencies)\n result.p50_query_ms = latencies[len(latencies) // 2]\n result.p99_query_ms = latencies[int(len(latencies) * 0.99)]\n result.recall_at_k = 1.0 # Ground truth\n \n return result\n \n def benchmark_faiss(self, index_type: str = \"flat\") -> EmbeddingBenchResult:\n \"\"\"Benchmark FAISS-based embedding store\"\"\"\n from avm.faiss_store import FAISSEmbeddingStore\n \n result = EmbeddingBenchResult(\n store_type=f\"faiss-{index_type}\",\n num_documents=len(self.documents),\n num_queries=len(self.queries),\n )\n \n # Build index\n index_path = os.path.join(self.tmpdir, f\"faiss_{index_type}.bin\")\n store = FAISSEmbeddingStore(\n self.store, self.backend, \n index_type=index_type,\n index_path=index_path,\n )\n \n start = time.perf_counter()\n store.add_nodes(self.documents)\n result.build_time_sec = time.perf_counter() - start\n \n # Query benchmark\n latencies = []\n total_recall = 0\n \n for query in self.queries:\n start = time.perf_counter()\n results = store.search(query, k=self.config.k)\n elapsed_ms = (time.perf_counter() - start) * 1000\n latencies.append(elapsed_ms)\n \n # Calculate recall vs ground truth\n if query in self.ground_truth:\n result_paths = set(r[0].path for r in results)\n truth_paths = set(self.ground_truth[query])\n if truth_paths:\n recall = len(result_paths & truth_paths) / len(truth_paths)\n total_recall += recall\n \n latencies.sort()\n result.avg_query_ms = sum(latencies) / len(latencies)\n result.p50_query_ms = latencies[len(latencies) // 2]\n result.p99_query_ms = latencies[int(len(latencies) * 0.99)]\n result.recall_at_k = total_recall / len(self.queries) if self.queries else 1.0\n \n return result\n \n def run_all(self) -> List[EmbeddingBenchResult]:\n \"\"\"Run all benchmarks\"\"\"\n results = []\n \n # SQLite (baseline)\n print(\"\\n=== SQLite (brute force) ===\")\n results.append(self.benchmark_sqlite())\n print(f\" Build: {results[-1].build_time_sec:.2f}s, Query: {results[-1].avg_query_ms:.2f}ms\")\n \n # FAISS Flat\n print(\"\\n=== FAISS Flat (exact) ===\")\n results.append(self.benchmark_faiss(\"flat\"))\n print(f\" Build: {results[-1].build_time_sec:.2f}s, Query: {results[-1].avg_query_ms:.2f}ms, Recall: {results[-1].recall_at_k:.2%}\")\n \n # FAISS HNSW\n print(\"\\n=== FAISS HNSW (approximate) ===\")\n results.append(self.benchmark_faiss(\"hnsw\"))\n print(f\" Build: {results[-1].build_time_sec:.2f}s, Query: {results[-1].avg_query_ms:.2f}ms, Recall: {results[-1].recall_at_k:.2%}\")\n \n return results\n \n def cleanup(self):\n \"\"\"Cleanup\"\"\"\n import shutil\n shutil.rmtree(self.tmpdir, ignore_errors=True)\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"Embedding Store Benchmark\")\n parser.add_argument(\"--documents\", \"-d\", type=int, default=1000, help=\"Number of documents\")\n parser.add_argument(\"--queries\", \"-q\", type=int, default=100, help=\"Number of queries\")\n parser.add_argument(\"--k\", type=int, default=10, help=\"Top-k results\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output as JSON\")\n args = parser.parse_args()\n \n config = EmbeddingBenchConfig(\n num_documents=args.documents,\n query_count=args.queries,\n k=args.k,\n )\n \n bench = EmbeddingBenchmark(config)\n bench.setup()\n results = bench.run_all()\n bench.cleanup()\n \n if args.json:\n print(json.dumps([r.to_dict() for r in results], indent=2))\n else:\n print(\"\\n\" + \"=\" * 60)\n print(\"EMBEDDING STORE BENCHMARK RESULTS\")\n print(\"=\" * 60)\n print(f\"Documents: {config.num_documents}, Queries: {config.query_count}, K: {config.k}\")\n print()\n print(f\"{'Store':\u003c20} {'Build (s)':\u003c12} {'Query (ms)':\u003c12} {'Recall':\u003c10}\")\n print(\"-\" * 54)\n for r in results:\n print(f\"{r.store_type:\u003c20} {r.build_time_sec:\u003c12.3f} {r.avg_query_ms:\u003c12.3f} {r.recall_at_k:\u003c10.2%}\")\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":9935,"content_sha256":"de06a7c8f1cfcd415647759421fe56fff0a66318bdaa92ab6fe073a6efd7cd5b"},{"filename":"benchmarks/fileorg/__init__.py","content":"\"\"\"fileorg - Organize files by extension based on YAML config.\"\"\"\n\nfrom .fileorg import (\n build_extension_map,\n load_config,\n main,\n organize_files,\n)\n\n__version__ = \"0.1.0\"\n__all__ = [\"main\", \"load_config\", \"build_extension_map\", \"organize_files\"]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":262,"content_sha256":"cf37ff731507b0fa9f8f4f260ee18a406e7f4651de2ab900bb0a7fb56fd05c48"},{"filename":"benchmarks/fileorg/__main__.py","content":"\"\"\"Allow running as `python -m fileorg`.\"\"\"\n\nfrom .fileorg import main\nimport sys\n\nif __name__ == \"__main__\":\n sys.exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":131,"content_sha256":"d740f27e03c4f6964fdaa071d64a89d46776e8ee81e1146ee9961536d3a697d1"},{"filename":"benchmarks/fileorg/fileorg.py","content":"#!/usr/bin/env python3\n\"\"\"fileorg - Organize files by extension based on YAML config.\"\"\"\n\nimport argparse\nimport os\nimport shutil\nimport sys\nfrom pathlib import Path\nfrom typing import Optional\n\nimport yaml\n\n\nDEFAULT_CONFIG = \"\"\"# fileorg configuration\nsource: .\ndry_run: false\n\nrules:\n # Images\n - extensions: [jpg, jpeg, png, gif, webp, svg]\n target: Images\n\n # Documents\n - extensions: [pdf, doc, docx, txt, md]\n target: Documents\n\n # Code\n - extensions: [py, js, ts, go, rs, java]\n target: Code\n\n # Archives\n - extensions: [zip, tar, gz, 7z, rar]\n target: Archives\n\"\"\"\n\n\ndef load_config(config_path: Path) -> dict:\n \"\"\"Load and validate config from YAML file.\"\"\"\n if not config_path.exists():\n print(f\"Error: Config file not found: {config_path}\", file=sys.stderr)\n sys.exit(1)\n\n with open(config_path) as f:\n config = yaml.safe_load(f)\n\n if not config:\n print(\"Error: Empty config file\", file=sys.stderr)\n sys.exit(1)\n\n if \"rules\" not in config:\n print(\"Error: Config must contain 'rules' section\", file=sys.stderr)\n sys.exit(1)\n\n return config\n\n\ndef build_extension_map(rules: list) -> dict[str, str]:\n \"\"\"Build extension -> target folder mapping from rules.\"\"\"\n ext_map = {}\n for rule in rules:\n target = rule.get(\"target\")\n extensions = rule.get(\"extensions\", [])\n for ext in extensions:\n ext_lower = ext.lower().lstrip(\".\")\n ext_map[ext_lower] = target\n return ext_map\n\n\ndef organize_files(\n source: Path,\n ext_map: dict[str, str],\n dry_run: bool = False,\n verbose: bool = False,\n) -> dict:\n \"\"\"Organize files in source directory based on extension mapping.\"\"\"\n stats = {\"moved\": 0, \"skipped\": 0, \"errors\": 0}\n\n if not source.exists():\n print(f\"Error: Source directory not found: {source}\", file=sys.stderr)\n sys.exit(1)\n\n for item in source.iterdir():\n if item.is_dir():\n continue\n\n ext = item.suffix.lower().lstrip(\".\")\n if ext not in ext_map:\n if verbose:\n print(f\" Skip: {item.name} (no matching rule)\")\n stats[\"skipped\"] += 1\n continue\n\n target_folder = source / ext_map[ext]\n target_path = target_folder / item.name\n\n if dry_run:\n print(f\" [DRY-RUN] {item.name} -> {ext_map[ext]}/\")\n stats[\"moved\"] += 1\n else:\n try:\n target_folder.mkdir(exist_ok=True)\n if target_path.exists():\n print(f\" Warning: {target_path} already exists, skipping\")\n stats[\"skipped\"] += 1\n continue\n shutil.move(str(item), str(target_path))\n if verbose:\n print(f\" Moved: {item.name} -> {ext_map[ext]}/\")\n stats[\"moved\"] += 1\n except Exception as e:\n print(f\" Error moving {item.name}: {e}\", file=sys.stderr)\n stats[\"errors\"] += 1\n\n return stats\n\n\ndef cmd_run(args: argparse.Namespace) -> int:\n \"\"\"Execute file organization.\"\"\"\n config = load_config(args.config)\n\n source = args.source if args.source else Path(config.get(\"source\", \".\"))\n source = source.expanduser().resolve()\n dry_run = args.dry_run if args.dry_run else config.get(\"dry_run\", False)\n verbose = args.verbose\n\n ext_map = build_extension_map(config[\"rules\"])\n\n print(f\"Organizing files in: {source}\")\n if dry_run:\n print(\"Mode: DRY-RUN (no changes will be made)\")\n print()\n\n stats = organize_files(source, ext_map, dry_run=dry_run, verbose=verbose)\n\n print()\n print(f\"Done. Moved: {stats['moved']}, Skipped: {stats['skipped']}, Errors: {stats['errors']}\")\n\n return 0 if stats[\"errors\"] == 0 else 1\n\n\ndef cmd_init(args: argparse.Namespace) -> int:\n \"\"\"Generate default config file.\"\"\"\n config_path = args.output\n\n if config_path.exists() and not args.force:\n print(f\"Error: {config_path} already exists. Use --force to overwrite.\", file=sys.stderr)\n return 1\n\n with open(config_path, \"w\") as f:\n f.write(DEFAULT_CONFIG)\n\n print(f\"Created config file: {config_path}\")\n return 0\n\n\ndef cmd_validate(args: argparse.Namespace) -> int:\n \"\"\"Validate config file.\"\"\"\n config = load_config(args.config)\n\n source = Path(config.get(\"source\", \".\")).expanduser()\n rules = config.get(\"rules\", [])\n\n errors = []\n\n if not source.exists():\n errors.append(f\"Source directory does not exist: {source}\")\n\n if not rules:\n errors.append(\"No rules defined\")\n\n for i, rule in enumerate(rules):\n if \"target\" not in rule:\n errors.append(f\"Rule {i+1}: missing 'target'\")\n if \"extensions\" not in rule:\n errors.append(f\"Rule {i+1}: missing 'extensions'\")\n elif not isinstance(rule[\"extensions\"], list):\n errors.append(f\"Rule {i+1}: 'extensions' must be a list\")\n\n if errors:\n print(\"Config validation failed:\", file=sys.stderr)\n for err in errors:\n print(f\" - {err}\", file=sys.stderr)\n return 1\n\n print(\"Config is valid.\")\n print(f\" Source: {source}\")\n print(f\" Rules: {len(rules)}\")\n print(f\" Dry-run: {config.get('dry_run', False)}\")\n return 0\n\n\ndef cmd_list(args: argparse.Namespace) -> int:\n \"\"\"List configured rules.\"\"\"\n config = load_config(args.config)\n rules = config.get(\"rules\", [])\n\n if not rules:\n print(\"No rules defined.\")\n return 0\n\n print(\"Configured rules:\")\n for rule in rules:\n target = rule.get(\"target\", \"???\")\n extensions = rule.get(\"extensions\", [])\n ext_str = \", \".join(f\".{e}\" for e in extensions)\n print(f\" {ext_str} -> {target}/\")\n\n return 0\n\n\ndef add_config_arg(parser: argparse.ArgumentParser) -> None:\n \"\"\"Add common config argument to a parser.\"\"\"\n parser.add_argument(\n \"-c\", \"--config\",\n type=Path,\n default=Path(\"fileorg.yaml\"),\n help=\"Path to config file (default: fileorg.yaml)\",\n )\n\n\ndef main(argv: Optional[list[str]] = None) -> int:\n parser = argparse.ArgumentParser(\n prog=\"fileorg\",\n description=\"Organize files by extension based on YAML config\",\n )\n\n subparsers = parser.add_subparsers(dest=\"command\", help=\"Available commands\")\n\n # run command\n run_parser = subparsers.add_parser(\"run\", help=\"Execute file organization\")\n add_config_arg(run_parser)\n run_parser.add_argument(\n \"-n\", \"--dry-run\",\n action=\"store_true\",\n help=\"Show what would be done without making changes\",\n )\n run_parser.add_argument(\n \"-s\", \"--source\",\n type=Path,\n help=\"Override source directory from config\",\n )\n run_parser.add_argument(\n \"-v\", \"--verbose\",\n action=\"store_true\",\n help=\"Enable verbose output\",\n )\n\n # init command\n init_parser = subparsers.add_parser(\"init\", help=\"Generate default config file\")\n init_parser.add_argument(\n \"-o\", \"--output\",\n type=Path,\n default=Path(\"fileorg.yaml\"),\n help=\"Output path for config file (default: fileorg.yaml)\",\n )\n init_parser.add_argument(\n \"-f\", \"--force\",\n action=\"store_true\",\n help=\"Overwrite existing config file\",\n )\n\n # validate command\n validate_parser = subparsers.add_parser(\"validate\", help=\"Validate config file\")\n add_config_arg(validate_parser)\n\n # list command\n list_parser = subparsers.add_parser(\"list\", help=\"List configured rules\")\n add_config_arg(list_parser)\n\n args = parser.parse_args(argv)\n\n # Default to 'run' if no command specified\n if args.command is None:\n args.command = \"run\"\n args.config = Path(\"fileorg.yaml\")\n args.dry_run = False\n args.source = None\n args.verbose = False\n\n commands = {\n \"run\": cmd_run,\n \"init\": cmd_init,\n \"validate\": cmd_validate,\n \"list\": cmd_list,\n }\n\n return commands[args.command](args)\n\n\nif __name__ == \"__main__\":\n sys.exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":8120,"content_sha256":"6add8828f2859b830523f58f92048882c57b6e90cf6a595e88882ab69cea3003"},{"filename":"benchmarks/fileorg/README.md","content":"# fileorg\n\nOrganize files in a directory by extension using YAML-based rules.\n\n## Table of Contents\n\n- [Installation](#installation)\n- [Quick Start](#quick-start)\n- [Usage](#usage)\n- [Configuration](#configuration)\n- [Examples](#examples)\n- [Behavior](#behavior)\n- [Exit Codes](#exit-codes)\n- [Troubleshooting](#troubleshooting)\n\n## Installation\n\n```bash\npip install pyyaml\n```\n\n## Quick Start\n\n```bash\n# Generate default config\nfileorg init\n\n# Preview changes (dry-run)\nfileorg run --dry-run\n\n# Organize files\nfileorg run\n```\n\n## Usage\n\n```\nfileorg [OPTIONS] [COMMAND]\n```\n\n### Global Options\n\n| Option | Description |\n|--------|-------------|\n| `-c, --config PATH` | Path to config file (default: `fileorg.yaml`) |\n\n### Commands\n\n#### `run` - Execute file organization (default)\n\n```bash\nfileorg run [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `-n, --dry-run` | Show what would be done without making changes |\n| `-s, --source PATH` | Override source directory from config |\n| `-v, --verbose` | Enable verbose output |\n\nExamples:\n```bash\nfileorg run # Organize files using fileorg.yaml\nfileorg run -n # Preview changes\nfileorg run -v # Show all operations\nfileorg run -s ~/Downloads # Override source directory\nfileorg -c ~/rules.yaml run # Use custom config\n```\n\n#### `init` - Generate default config file\n\n```bash\nfileorg init [OPTIONS]\n```\n\n| Option | Description |\n|--------|-------------|\n| `-o, --output PATH` | Output path for config file (default: `fileorg.yaml`) |\n| `-f, --force` | Overwrite existing config file |\n\nExamples:\n```bash\nfileorg init # Create fileorg.yaml\nfileorg init -o custom.yaml # Create custom.yaml\nfileorg init -f # Overwrite existing config\n```\n\n#### `validate` - Validate config file\n\n```bash\nfileorg validate\n```\n\nChecks config for:\n- Valid YAML syntax\n- Required fields (`rules`)\n- Source directory existence\n- Rule structure (each rule has `target` and `extensions`)\n\n#### `list` - List configured rules\n\n```bash\nfileorg list\n```\n\nDisplays all extension-to-folder mappings from the config.\n\n## Configuration\n\nConfig file format (`fileorg.yaml`):\n\n```yaml\n# Source directory to organize (default: current directory)\nsource: .\n\n# Enable dry-run mode by default (default: false)\ndry_run: false\n\n# Organization rules\nrules:\n # Images\n - extensions: [jpg, jpeg, png, gif, webp, svg]\n target: Images\n\n # Documents\n - extensions: [pdf, doc, docx, txt, md]\n target: Documents\n\n # Code\n - extensions: [py, js, ts, go, rs, java]\n target: Code\n\n # Archives\n - extensions: [zip, tar, gz, 7z, rar]\n target: Archives\n```\n\n### Config Fields\n\n| Field | Type | Required | Default | Description |\n|-------|------|----------|---------|-------------|\n| `source` | string | No | `.` | Directory to organize |\n| `dry_run` | boolean | No | `false` | Default dry-run mode |\n| `rules` | list | Yes | - | Organization rules |\n\n### Rule Fields\n\n| Field | Type | Required | Description |\n|-------|------|----------|-------------|\n| `extensions` | list | Yes | File extensions (without dots) |\n| `target` | string | Yes | Target folder name |\n\n## Examples\n\n### Organize Downloads folder\n\n```yaml\n# ~/fileorg-downloads.yaml\nsource: ~/Downloads\ndry_run: false\n\nrules:\n - extensions: [jpg, jpeg, png, gif, heic]\n target: Images\n - extensions: [mp4, mov, avi, mkv]\n target: Videos\n - extensions: [pdf, doc, docx, xlsx]\n target: Documents\n - extensions: [dmg, pkg, exe, zip]\n target: Installers\n```\n\n```bash\nfileorg -c ~/fileorg-downloads.yaml run\n```\n\n### Preview before organizing\n\n```bash\n# Always preview first\nfileorg run --dry-run\n\n# Output:\n# [DRY-RUN] photo.jpg -> Images/\n# [DRY-RUN] report.pdf -> Documents/\n# [DRY-RUN] script.py -> Code/\n```\n\n## Running as Module\n\n```bash\npython -m fileorg run\npython -m fileorg init\n```\n\n## Behavior\n\n- **Directories are skipped** - only files in the source directory are processed\n- **Subdirectories are not scanned** - only top-level files are organized\n- **Collision handling** - if a file already exists at the target location, it is skipped with a warning\n- **Target folders are auto-created** - destination folders are created as needed\n- **Case-insensitive matching** - `.JPG` and `.jpg` match the same rule\n- **Extensions without dots** - config uses `jpg` not `.jpg` (dots are stripped automatically)\n\n## Exit Codes\n\n| Code | Meaning |\n|------|---------|\n| 0 | Success |\n| 1 | Error (config not found, validation failed, move errors) |\n\n## Troubleshooting\n\n### Config file not found\n\n```\nError: Config file not found: fileorg.yaml\n```\n\nRun `fileorg init` to create a default config, or specify a path with `-c`.\n\n### Source directory not found\n\n```\nError: Source directory not found: /path/to/dir\n```\n\nCheck that the `source` path in your config exists and is accessible.\n\n### No files moved\n\nIf `fileorg run` reports 0 files moved:\n- Run with `--verbose` to see which files are being skipped\n- Check that your rules include the extensions you want to organize\n- Verify files exist in the source directory (not subdirectories)\n\n### Permission denied\n\n```\nError moving file.txt: [Errno 13] Permission denied\n```\n\nCheck that you have write permission for both the source file and target directory.\n\n### Config validation failed\n\n```\nConfig validation failed:\n - Rule 1: missing 'target'\n```\n\nEnsure each rule in your config has both `extensions` (list) and `target` (string) fields.\n\n## API Usage\n\nYou can also use fileorg as a Python library:\n\n```python\nfrom fileorg import load_config, build_extension_map, organize_files\nfrom pathlib import Path\n\n# Load config\nconfig = load_config(Path(\"fileorg.yaml\"))\n\n# Build extension mapping\next_map = build_extension_map(config[\"rules\"])\n\n# Organize files\nsource = Path(config.get(\"source\", \".\")).expanduser().resolve()\nstats = organize_files(source, ext_map, dry_run=True, verbose=True)\n\nprint(f\"Would move {stats['moved']} files\")\n```\n\n### API Reference\n\n| Function | Description |\n|----------|-------------|\n| `load_config(path)` | Load and validate YAML config file, returns dict |\n| `build_extension_map(rules)` | Convert rules list to `{ext: target}` dict |\n| `organize_files(source, ext_map, dry_run, verbose)` | Move files, returns stats dict |\n| `main(argv)` | CLI entry point, returns exit code |\n\n### Stats Dictionary\n\nThe `organize_files` function returns:\n\n```python\n{\"moved\": int, \"skipped\": int, \"errors\": int}\n```\n\n## License\n\nMIT\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":6522,"content_sha256":"9a7d8e34bc53218b5b21b1e8002e4509eed3db2cebd04f8fa8cf97e3d9b2004a"},{"filename":"benchmarks/librarian_bench.py","content":"#!/usr/bin/env python3\n\"\"\"\nLibrarian Benchmark\n\nMeasures:\n1. Query routing accuracy\n2. Cross-agent discovery precision/recall\n3. Privacy policy effectiveness\n4. Latency at different scales\n\"\"\"\n\nimport os\nimport sys\nimport time\nimport json\nimport random\nimport tempfile\nimport argparse\nfrom typing import List, Dict, Tuple\nfrom dataclasses import dataclass, field\n\nsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))\n\nfrom avm.store import AVMStore\nfrom avm.node import AVMNode\nfrom avm.librarian import Librarian, LibrarianResponse, PrivacyPolicy, AgentInfo\nfrom avm.topic_index import TopicIndex\nfrom avm.embedding import EmbeddingStore, LocalEmbedding\n\n\n@dataclass\nclass BenchmarkConfig:\n \"\"\"Benchmark configuration\"\"\"\n num_agents: int = 10\n memories_per_agent: int = 100\n topics: List[str] = field(default_factory=lambda: [\n \"trading\", \"market\", \"crypto\", \"ai\", \"research\",\n \"personal\", \"code\", \"bugs\", \"features\", \"meetings\"\n ])\n query_count: int = 100\n seed: int = 42\n use_embedding: bool = False # Enable semantic search\n\n\n@dataclass\nclass BenchmarkResult:\n \"\"\"Benchmark results\"\"\"\n # Routing accuracy\n precision: float = 0.0 # Correct accessible / total accessible\n recall: float = 0.0 # Correct accessible / should be accessible\n \n # Discovery\n suggestion_accuracy: float = 0.0 # Correct suggestions / total suggestions\n \n # Latency (ms)\n avg_latency_ms: float = 0.0\n p50_latency_ms: float = 0.0\n p99_latency_ms: float = 0.0\n \n # Scale\n total_memories: int = 0\n total_agents: int = 0\n queries_run: int = 0\n \n def to_dict(self) -> Dict:\n return {\n \"routing\": {\n \"precision\": round(self.precision, 4),\n \"recall\": round(self.recall, 4),\n \"f1\": round(2 * self.precision * self.recall / (self.precision + self.recall + 0.001), 4),\n },\n \"discovery\": {\n \"suggestion_accuracy\": round(self.suggestion_accuracy, 4),\n },\n \"latency_ms\": {\n \"avg\": round(self.avg_latency_ms, 2),\n \"p50\": round(self.p50_latency_ms, 2),\n \"p99\": round(self.p99_latency_ms, 2),\n },\n \"scale\": {\n \"total_memories\": self.total_memories,\n \"total_agents\": self.total_agents,\n \"queries_run\": self.queries_run,\n }\n }\n\n\nclass LibrarianBenchmark:\n \"\"\"Benchmark harness for Librarian\"\"\"\n \n def __init__(self, config: BenchmarkConfig):\n self.config = config\n self.tmpdir = tempfile.mkdtemp()\n self.store = AVMStore(os.path.join(self.tmpdir, \"bench.db\"))\n self.topic_index = TopicIndex(self.store)\n \n # Embedding store (optional)\n self.embedding_store = None\n if config.use_embedding:\n try:\n backend = LocalEmbedding(\"all-MiniLM-L6-v2\")\n self.embedding_store = EmbeddingStore(self.store, backend)\n print(\"Embedding enabled (all-MiniLM-L6-v2)\")\n except ImportError:\n print(\"Warning: sentence-transformers not installed, using FTS only\")\n \n self.librarian = Librarian(\n self.store, \n privacy_policy=PrivacyPolicy(\"full\"),\n embedding_store=self.embedding_store,\n )\n \n # Ground truth\n self.agent_topics: Dict[str, List[str]] = {}\n self.memory_topics: Dict[str, str] = {} # path -> topic\n \n random.seed(config.seed)\n \n def setup(self):\n \"\"\"Generate synthetic memories\"\"\"\n print(f\"Generating {self.config.num_agents} agents × {self.config.memories_per_agent} memories...\")\n \n for agent_idx in range(self.config.num_agents):\n agent_id = f\"agent_{agent_idx:03d}\"\n \n # Each agent specializes in 2-3 topics\n agent_topics = random.sample(self.config.topics, random.randint(2, 3))\n self.agent_topics[agent_id] = agent_topics\n \n for mem_idx in range(self.config.memories_per_agent):\n # 80% on-topic, 20% random\n if random.random() \u003c 0.8:\n topic = random.choice(agent_topics)\n else:\n topic = random.choice(self.config.topics)\n \n path = f\"/memory/private/{agent_id}/{topic}/{mem_idx:04d}.md\"\n content = self._generate_content(topic, mem_idx)\n \n node = AVMNode(\n path=path,\n content=content,\n meta={\"topic\": topic, \"importance\": random.random()}\n )\n self.store.put_node(node)\n self.topic_index.index_path(path, content, topic)\n self.memory_topics[path] = topic\n \n # Generate embedding if enabled\n if self.embedding_store:\n self.embedding_store.embeend_node(node)\n \n # Register agents\n for agent_id, topics in self.agent_topics.items():\n self.librarian.register_agent(agent_id, AgentInfo(\n id=agent_id,\n capabilities=topics,\n memory_count=self.config.memories_per_agent,\n ))\n \n print(f\"Setup complete: {len(self.memory_topics)} memories indexed\")\n \n def _generate_content(self, topic: str, idx: int) -> str:\n \"\"\"Generate synthetic content for a topic\"\"\"\n templates = {\n \"trading\": f\"Market analysis #{idx}: BTC showing bullish patterns. RSI at {random.randint(30, 70)}.\",\n \"market\": f\"Stock update #{idx}: NVDA up {random.randint(1, 10)}%. Volume {random.randint(10, 100)}M.\",\n \"crypto\": f\"Crypto news #{idx}: ETH gas fees at {random.randint(5, 50)} gwei. DEX volume rising.\",\n \"ai\": f\"AI research #{idx}: New transformer architecture with {random.randint(1, 100)}B params.\",\n \"research\": f\"Paper summary #{idx}: Novel approach to {random.choice(['NLP', 'CV', 'RL', 'ML'])}.\",\n \"personal\": f\"Personal note #{idx}: Remember to {random.choice(['call', 'email', 'meet'])} about project.\",\n \"code\": f\"Code review #{idx}: Fixed bug in {random.choice(['auth', 'api', 'db'])} module.\",\n \"bugs\": f\"Bug report #{idx}: Issue in {random.choice(['login', 'checkout', 'search'])} flow.\",\n \"features\": f\"Feature request #{idx}: Add {random.choice(['dark mode', 'export', 'filters'])}.\",\n \"meetings\": f\"Meeting notes #{idx}: Discussed Q{random.randint(1, 4)} roadmap with team.\",\n }\n return templates.get(topic, f\"Generic content #{idx} about {topic}\")\n \n def run_benchmark(self) -> BenchmarkResult:\n \"\"\"Run the benchmark\"\"\"\n result = BenchmarkResult()\n result.total_memories = len(self.memory_topics)\n result.total_agents = len(self.agent_topics)\n \n latencies = []\n correct_accessible = 0\n total_accessible = 0\n should_be_accessible = 0\n correct_suggestions = 0\n total_suggestions = 0\n \n # Generate queries\n queries = self._generate_queries()\n result.queries_run = len(queries)\n \n for query, expected_topic, requester in queries:\n # Time the query\n start = time.perf_counter()\n response = self.librarian.query(requester, query, limit=10)\n elapsed_ms = (time.perf_counter() - start) * 1000\n latencies.append(elapsed_ms)\n \n # Check routing accuracy\n total_accessible += len(response.accessible)\n \n # Calculate expected accessible count\n expected_paths = [\n p for p, t in self.memory_topics.items()\n if t == expected_topic and p.startswith(f\"/memory/private/{requester}/\")\n ]\n should_be_accessible += len(expected_paths)\n \n # Count correct accessible\n for node in response.accessible:\n actual_topic = self.memory_topics.get(node.path, \"\")\n if actual_topic == expected_topic:\n correct_accessible += 1\n \n # Check suggestion accuracy\n for suggestion in response.suggestions:\n total_suggestions += 1\n # Check if suggested agent actually has this topic\n if suggestion.agent in self.agent_topics:\n if expected_topic in self.agent_topics[suggestion.agent]:\n correct_suggestions += 1\n \n # Calculate metrics\n result.precision = correct_accessible / max(total_accessible, 1)\n result.recall = correct_accessible / max(should_be_accessible, 1)\n result.suggestion_accuracy = correct_suggestions / max(total_suggestions, 1)\n \n # Latency percentiles\n latencies.sort()\n result.avg_latency_ms = sum(latencies) / len(latencies)\n result.p50_latency_ms = latencies[len(latencies) // 2]\n result.p99_latency_ms = latencies[int(len(latencies) * 0.99)]\n \n return result\n \n def _generate_queries(self) -> List[Tuple[str, str, str]]:\n \"\"\"Generate test queries: (query, expected_topic, requester)\"\"\"\n queries = []\n \n # Direct keyword queries (FTS-friendly)\n topic_keywords = {\n \"trading\": [\"market analysis\", \"bullish\", \"RSI\", \"trading strategy\"],\n \"market\": [\"stock\", \"NVDA\", \"volume\", \"earnings\"],\n \"crypto\": [\"BTC\", \"ETH\", \"gas fees\", \"DEX\"],\n \"ai\": [\"transformer\", \"neural network\", \"GPT\", \"LLM\"],\n \"research\": [\"paper\", \"novel approach\", \"study\", \"findings\"],\n \"personal\": [\"remember\", \"call\", \"email\", \"meeting\"],\n \"code\": [\"code review\", \"bug fix\", \"refactor\", \"module\"],\n \"bugs\": [\"bug report\", \"issue\", \"error\", \"crash\"],\n \"features\": [\"feature request\", \"enhancement\", \"add support\"],\n \"meetings\": [\"meeting notes\", \"roadmap\", \"discussion\", \"team\"],\n }\n \n # Semantic queries (embedding-friendly, no exact keywords)\n topic_semantic = {\n \"trading\": [\"how should I invest\", \"what's the best strategy\", \"market timing\"],\n \"market\": [\"how are tech stocks doing\", \"latest equity news\", \"share price changes\"],\n \"crypto\": [\"blockchain developments\", \"digital currency trends\", \"decentralized finance\"],\n \"ai\": [\"machine learning progress\", \"artificial intelligence advances\", \"deep learning models\"],\n \"research\": [\"academic work\", \"scientific studies\", \"published findings\"],\n \"personal\": [\"things I need to do\", \"my schedule\", \"reminders\"],\n \"code\": [\"programming improvements\", \"software changes\", \"development work\"],\n \"bugs\": [\"problems in the system\", \"things that broke\", \"errors to fix\"],\n \"features\": [\"new functionality\", \"product improvements\", \"user requests\"],\n \"meetings\": [\"team discussions\", \"planning sessions\", \"group conversations\"],\n }\n \n agents = list(self.agent_topics.keys())\n \n for i in range(self.config.query_count):\n topic = random.choice(self.config.topics)\n requester = random.choice(agents)\n \n # Mix: 50% keyword queries, 50% semantic queries\n if i % 2 == 0:\n query = random.choice(topic_keywords[topic])\n else:\n query = random.choice(topic_semantic[topic])\n \n queries.append((query, topic, requester))\n \n return queries\n \n def cleanup(self):\n \"\"\"Cleanup temporary files\"\"\"\n import shutil\n shutil.rmtree(self.tmpdir, ignore_errors=True)\n\n\ndef run_scale_benchmark(max_agents: int = 50, step: int = 10):\n \"\"\"Run benchmark at different scales\"\"\"\n results = []\n \n for num_agents in range(10, max_agents + 1, step):\n config = BenchmarkConfig(\n num_agents=num_agents,\n memories_per_agent=100,\n query_count=100,\n )\n \n bench = LibrarianBenchmark(config)\n bench.setup()\n result = bench.run_benchmark()\n bench.cleanup()\n \n results.append({\n \"agents\": num_agents,\n \"memories\": num_agents * 100,\n **result.to_dict()\n })\n \n print(f\"[{num_agents} agents] Precision: {result.precision:.3f}, \"\n f\"Recall: {result.recall:.3f}, Latency: {result.avg_latency_ms:.2f}ms\")\n \n return results\n\n\ndef main():\n parser = argparse.ArgumentParser(description=\"Librarian Benchmark\")\n parser.add_argument(\"--agents\", \"-a\", type=int, default=10, help=\"Number of agents\")\n parser.add_argument(\"--memories\", \"-m\", type=int, default=100, help=\"Memories per agent\")\n parser.add_argument(\"--queries\", \"-q\", type=int, default=100, help=\"Number of queries\")\n parser.add_argument(\"--scale\", action=\"store_true\", help=\"Run scale benchmark\")\n parser.add_argument(\"--max-agents\", type=int, default=50, help=\"Max agents for scale test\")\n parser.add_argument(\"--embedding\", \"-e\", action=\"store_true\", help=\"Enable semantic search\")\n parser.add_argument(\"--json\", action=\"store_true\", help=\"Output as JSON\")\n args = parser.parse_args()\n \n if args.scale:\n results = run_scale_benchmark(args.max_agents)\n if args.json:\n print(json.dumps(results, indent=2))\n return\n \n config = BenchmarkConfig(\n num_agents=args.agents,\n memories_per_agent=args.memories,\n query_count=args.queries,\n use_embedding=args.embedding,\n )\n \n bench = LibrarianBenchmark(config)\n bench.setup()\n result = bench.run_benchmark()\n bench.cleanup()\n \n if args.json:\n print(json.dumps(result.to_dict(), indent=2))\n else:\n print(\"\\n\" + \"=\" * 50)\n print(\"LIBRARIAN BENCHMARK RESULTS\")\n print(\"=\" * 50)\n print(f\"\\nScale: {result.total_agents} agents, {result.total_memories} memories\")\n print(f\"Queries: {result.queries_run}\")\n print(f\"\\nRouting Accuracy:\")\n print(f\" Precision: {result.precision:.4f}\")\n print(f\" Recall: {result.recall:.4f}\")\n f1 = 2 * result.precision * result.recall / (result.precision + result.recall + 0.001)\n print(f\" F1 Score: {f1:.4f}\")\n print(f\"\\nDiscovery:\")\n print(f\" Suggestion Accuracy: {result.suggestion_accuracy:.4f}\")\n print(f\"\\nLatency:\")\n print(f\" Average: {result.avg_latency_ms:.2f}ms\")\n print(f\" P50: {result.p50_latency_ms:.2f}ms\")\n print(f\" P99: {result.p99_latency_ms:.2f}ms\")\n\n\nif __name__ == \"__main__\":\n main()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":14951,"content_sha256":"8cb1fbf9858da1790dc18aa6dae200912765838ab66bf0808f60941faccabb2f"},{"filename":"benchmarks/memory/shared/bench/shared_knowledge.md","content":"# Shared Knowledge Base\n\n## System Components\n\n### Logging\n- Structured logging with configurable levels (DEBUG, INFO, WARN, ERROR)\n- Log aggregation for distributed systems\n- Correlation IDs for request tracing\n\n### Configuration\n- Environment-based config (dev, staging, prod)\n- Feature flags for gradual rollouts\n- Config hot-reloading support\n\n### Monitoring & Metrics\n- System metrics: CPU, memory, disk, network\n- Application metrics: request latency, error rates, throughput\n- Custom business metrics\n\n### Tracing\n- Distributed tracing with span context propagation\n- OpenTelemetry integration\n- Trace sampling strategies\n\n### Deployment\n- Blue-green deployment support\n- Canary releases with traffic splitting\n- Rollback capabilities\n\n### Security\n- Authentication: JWT tokens with refresh mechanism\n- Authorization: Role-based access control (RBAC)\n- API rate limiting and throttling\n- Input validation and sanitization\n\n### Caching\n- Multi-layer caching (L1: in-memory, L2: Redis)\n- Cache invalidation strategies (TTL, event-driven)\n- Cache warming for critical paths\n\n### Database\n- Connection pooling for efficient resource usage\n- Read replicas for scaling reads\n- Query optimization and indexing strategies\n- Migration management with versioning\n\n### Error Handling\n- Structured error responses with error codes\n- Circuit breaker pattern for fault tolerance\n- Retry policies with exponential backoff\n- Graceful degradation strategies\n\n### Testing\n- Unit tests with mocking frameworks\n- Integration tests with test containers\n- End-to-end tests for critical flows\n- Performance/load testing benchmarks\n\n### API Design\n- RESTful conventions with proper HTTP verbs\n- GraphQL for flexible client queries\n- API versioning strategies (URL, header, query param)\n- OpenAPI/Swagger documentation\n\n### Message Queues\n- Async processing with message brokers (RabbitMQ, Kafka)\n- Dead letter queues for failed messages\n- Message ordering and deduplication\n- Consumer group management\n\n### Observability\n- SLIs, SLOs, and SLAs definition\n- Alerting with escalation policies\n- Dashboards for real-time visibility\n- Incident management workflows\n\n### Scalability Patterns\n- Horizontal scaling with load balancers\n- Sharding for data partitioning\n- Event sourcing for audit trails\n- CQRS for read/write optimization\n\n### DevOps & CI/CD\n- Pipeline automation (build, test, deploy)\n- Infrastructure as Code (Terraform, Pulumi)\n- Container orchestration (Kubernetes)\n- GitOps workflows\n\n## Keywords\nlog, config, scale, update, feature, trace, metric, monitor, optimize, deploy, fix, system, security, auth, cache, database, error, test, retry, circuit-breaker, jwt, rbac, api, graphql, queue, kafka, sli, slo, alert, dashboard, sharding, cqrs, kubernetes, terraform, gitops, cicd\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2774,"content_sha256":"79e6c3d4241a45a00f5f9320f8c5f8c50b9c593fc2d8ba9b7ffa370ab3ce7a44"},{"filename":"benchmarks/memory/shared/bugs/auth_token_prefix.md","content":"# Auth Token Format\n\n## Correct Format\n```\nAuthorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...\n```\n\n## Wrong Format\n```\nAuthorization: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...\n```\n\n## Issue Details\n- The `Bearer` prefix is **required** for OAuth 2.0 Bearer Token authentication\n- Missing the prefix will cause 401 Unauthorized responses\n- This is defined in RFC 6750\n\n## Integration Points\n- All API clients must include the Bearer prefix\n- Backend validation expects `Bearer ` (with space) before the token\n- Token refresh flows must preserve the correct format\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":572,"content_sha256":"48701c282eb53758fac0269ed9602b78366f86762eb2b39f85704b47e98c3626"},{"filename":"benchmarks/memory/shared/consensus/decision_a.md","content":"# Decision A\n\n**Agent:** agent_a\n**Timestamp:** 2026-03-23\n**Decision:** APPROVE\n\n## Rationale\n\nBased on the shared knowledge base and system architecture:\n\n1. **System Readiness**: All core components (logging, monitoring, tracing, security) are properly documented and configured\n2. **Deployment Safety**: Blue-green deployment and rollback capabilities are in place\n3. **Observability**: Comprehensive metrics, SLIs/SLOs, and alerting are configured\n4. **Error Handling**: Circuit breaker patterns and graceful degradation strategies are implemented\n\nDecision A confirms readiness to proceed with the proposed action.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":621,"content_sha256":"151fbbf40569286a94df3677ff1b023d992b901e1b24e221b8956363e977ad17"},{"filename":"benchmarks/memory/shared/consensus/decision_b.md","content":"# Decision B\n\n**Agent:** agent_b\n**Timestamp:** 2026-03-23\n**Decision:** REJECT\n\n## Rationale\n\nBased on my independent analysis of the system state and risk factors:\n\n1. **Incomplete Testing Coverage**: While infrastructure components are documented, there is no evidence of end-to-end integration testing under production load conditions\n2. **Missing Failure Mode Analysis**: The knowledge base lacks documentation of known failure modes and their mitigations beyond basic circuit breakers\n3. **Timing Concerns**: Deployment during current period introduces unnecessary risk without sufficient rollback validation\n4. **Dependency Risks**: External service dependencies have not been stress-tested for cascading failure scenarios\n\n## Conflict with Decision A\n\nThis decision directly conflicts with agent_a's APPROVE decision. While agent_a focused on the presence of infrastructure components, this analysis emphasizes the gaps in validation and testing that pose unacceptable risk.\n\n## Recommended Resolution\n\nBefore proceeding, require:\n- Production-equivalent load testing results\n- Documented failure mode analysis\n- Explicit sign-off on rollback procedures\n\nDecision B recommends deferring the proposed action until these concerns are addressed.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1251,"content_sha256":"543685a2a67139c88120282691a113a818e62a12786e3b14222c91bed26f8488"},{"filename":"benchmarks/memory/shared/consensus/resolution.md","content":"# Conflict Resolution\n\n**Resolver:** resolver\n**Timestamp:** 2026-03-23\n**Conflict:** Decision A (APPROVE) vs Decision B (REJECT)\n\n## Resolution: CONDITIONAL APPROVE\n\nNeither a full approval nor a full rejection is warranted. The resolution is a **CONDITIONAL APPROVE** with gating requirements.\n\n## Analysis\n\n### Decision A's Valid Points\n- Core infrastructure components are documented and in place\n- Observability, deployment safety, and error handling exist in the architecture\n- The system has foundational readiness\n\n### Decision B's Valid Points\n- Documentation of components ≠ validation under production conditions\n- The shared knowledge base confirms testing infrastructure exists but provides no evidence of execution results\n- Failure mode analysis is absent from the knowledge base\n- Risk mitigation requires validation, not just capability presence\n\n## Synthesized Position\n\nThe infrastructure is **ready** but **unvalidated**. Proceeding without validation introduces unquantified risk. However, a full rejection ignores the substantial work already completed.\n\n## Conditional Requirements\n\nBefore full approval, the following must be satisfied:\n\n1. **Load Test Evidence** - Execute and document production-equivalent load testing (referenced in knowledge base but no results shown)\n2. **Rollback Validation** - Perform one rollback drill and document the results\n3. **Failure Mode Catalog** - Document top 3 known failure modes and their mitigations\n\n## Decision\n\n| Condition | Status | Action |\n|-----------|--------|--------|\n| All 3 requirements met | APPROVE | Proceed with deployment |\n| 1-2 requirements met | DEFER | Complete remaining items first |\n| 0 requirements met | REJECT | Full validation needed |\n\n## Rationale\n\nThis resolution:\n- Acknowledges agent_a's infrastructure assessment as accurate\n- Addresses agent_b's risk concerns with concrete gates\n- Provides a clear path forward rather than deadlock\n- Balances progress with safety\n\nThe conflict arose from different evaluation criteria (presence vs. validation). Both are necessary; this resolution requires both.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2101,"content_sha256":"252fce65ceec0b536718f7c1eef77c4aefea69608f3a04b7831f8b37b48ff1dc"},{"filename":"benchmarks/memory/shared/consensus/voter_1_vote.md","content":"# Voter_1 Vote\n\n**Proposal:** `release_v2.1`\n**Vote:** ACCEPT\n**Timestamp:** 2026-03-23\n\n## Rationale\nThe proposed value `release_v2.1` is a valid and reasonable release version identifier. No objections.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":205,"content_sha256":"f789f85a69ac1988f19dca0418ac7ba44d5319dba5efc4db17dca77ea9e3d7ff"},{"filename":"benchmarks/memory/shared/queue/high_priority.md","content":"# High Priority Queue\n## Producer: producer_1\n## Timestamp: 2026-03-23\n\n---\n\n### Item 1\n- **ID:** HP-001\n- **Priority:** HIGH\n- **Description:** Critical security patch for authentication module\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 2\n- **ID:** HP-002\n- **Priority:** HIGH\n- **Description:** Database migration for user schema v2\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 3\n- **ID:** HP-003\n- **Priority:** HIGH\n- **Description:** Performance fix for API rate limiting\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 4\n- **ID:** HP-004\n- **Priority:** HIGH\n- **Description:** Fix memory leak in worker pool causing OOM crashes\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 5\n- **ID:** HP-005\n- **Priority:** HIGH\n- **Description:** SSL certificate renewal before expiration\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 6\n- **ID:** HP-006\n- **Priority:** HIGH\n- **Description:** Hotfix for payment processing timeout errors\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 7\n- **ID:** HP-007\n- **Priority:** HIGH\n- **Description:** Critical auth token validation bypass in OAuth flow\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 8\n- **ID:** HP-008\n- **Priority:** HIGH\n- **Description:** Database connection pool exhaustion under load\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 9\n- **ID:** HP-009\n- **Priority:** HIGH\n- **Description:** Race condition in distributed lock acquisition\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 10\n- **ID:** HP-010\n- **Priority:** HIGH\n- **Description:** Critical webhook delivery failures causing data sync lag\n- **Status:** PROCESSED\n- **Processed By:** day_agent\n- **Processed At:** 2026-03-23\n\n### Item 11\n- **ID:** HP-011\n- **Priority:** HIGH\n- **Description:** Emergency fix for S3 bucket permission misconfiguration\n- **Status:** PROCESSED\n- **Processed By:** day_agent\n- **Processed At:** 2026-03-23\n\n### Item 12\n- **ID:** HP-012\n- **Priority:** HIGH\n- **Description:** Auth feature external API dependency blocking sprint delivery\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2501,"content_sha256":"4f0e5b0109570995fc678c48a4cb5a02abe620cbc0d539c418a8df48708d0280"},{"filename":"benchmarks/memory/shared/queue/low_priority.md","content":"# Low Priority Queue\n## Producer: producer_2\n## Timestamp: 2026-03-23\n\n---\n\n### Item 1\n- **ID:** LP-001\n- **Priority:** LOW\n- **Description:** Update documentation for API endpoints\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 2\n- **ID:** LP-002\n- **Priority:** LOW\n- **Description:** Refactor legacy logging utility\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 3\n- **ID:** LP-003\n- **Priority:** LOW\n- **Description:** Add unit tests for helper functions\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 4\n- **ID:** LP-004\n- **Priority:** LOW\n- **Description:** Add dark mode theme support to UI\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 5\n- **ID:** LP-005\n- **Priority:** LOW\n- **Description:** Implement data export feature (CSV/JSON)\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 6\n- **ID:** LP-006\n- **Priority:** LOW\n- **Description:** Improve search performance with indexing\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 7\n- **ID:** LP-007\n- **Priority:** LOW\n- **Description:** Add keyboard shortcuts for common actions\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 8\n- **ID:** LP-008\n- **Priority:** LOW\n- **Description:** Implement user activity audit logging\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 9\n- **ID:** LP-009\n- **Priority:** LOW\n- **Description:** Add bulk import feature for data migration\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 10\n- **ID:** LP-010\n- **Priority:** LOW\n- **Description:** Optimize database query caching strategy\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 11\n- **ID:** LP-011\n- **Priority:** LOW\n- **Description:** Add email template customization options\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23\n\n### Item 12\n- **ID:** LP-012\n- **Priority:** LOW\n- **Description:** Fix memory leak in worker process\n- **Status:** PROCESSED\n- **Processed By:** consumer\n- **Processed At:** 2026-03-23","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2363,"content_sha256":"56c3a4fa84f3603f9837b73fae8cffba9a71100e1f347b97bb9ff26caee6d4a0"},{"filename":"benchmarks/MULTIAGENT_BENCH_PLAN.md","content":"# AVM Multi-Agent Collaboration Benchmark Plan\n\n## 目标\n\n测量真实场景下的:\n1. **Agent 效率** — 单 agent 任务完成速度/质量\n2. **AVM 效率** — 记忆检索/存储对任务的影响\n3. **协作效率** — 多 agent 协作 vs 单 agent 的提升\n4. **问题解决速度** — 端到端任务完成时间\n\n## 现有数据集参考\n\n### MARBLE (MultiAgentBench) - ACL 2025\n- GitHub: https://github.com/ulab-uiuc/MARBLE\n- 场景:狼人杀、研究协作、Web 任务\n- 指标:任务分数、里程碑完成率、协作质量\n- **适用性**:偏游戏/社交模拟,与 AVM 场景不完全匹配\n\n### AWS Multi-Agent Benchmark\n- GitHub: https://github.com/aws-samples/multiagent-collab-scenario-benchmark\n- 场景:旅行规划、抵押贷款、软件开发(30 个场景)\n- 指标:断言验证(assertions)\n- **适用性**:企业场景,数据格式简洁,可借鉴\n\n### AgentBench - ICLR 2024\n- GitHub: https://github.com/THUDM/AgentBench\n- 场景:代码、游戏、Web、数据库等\n- 指标:任务成功率\n- **适用性**:单 agent 为主,可作为 baseline\n\n## AVM 专属 Benchmark 设计\n\n### 场景类别\n\n1. **知识检索任务** (Memory Retrieval)\n - 从多 agent 共享记忆中检索信息\n - 测量 recall/precision/latency\n - 对比有/无 AVM 的表现\n\n2. **协作编码任务** (Collaborative Coding)\n - 多 agent 共同完成代码任务\n - Agent A 写框架,Agent B 写测试,Agent C review\n - 测量协作效率、代码质量\n\n3. **信息同步任务** (Information Sync)\n - Agent A 学到新知识,Agent B 需要使用\n - 测量 gossip protocol 传播效率\n - 对比直接通信 vs AVM 共享\n\n4. **上下文累积任务** (Context Accumulation)\n - 长对话中积累的知识点\n - 测量 consolidation 效果\n - memory decay 的影响\n\n### 指标体系\n\n| 指标 | 描述 | 测量方法 |\n|------|------|----------|\n| **Task Success Rate** | 任务完成率 | 二元判断 + LLM judge |\n| **Time to Complete** | 完成时间 | wall-clock time |\n| **Token Efficiency** | Token 使用效率 | tokens / task score |\n| **Memory Precision** | 记忆检索精度 | retrieved_relevant / retrieved_total |\n| **Memory Recall** | 记忆检索召回 | retrieved_relevant / total_relevant |\n| **Collaboration Score** | 协作质量 | 专家评分 / LLM judge |\n| **Knowledge Transfer** | 知识传递效率 | Agent B 使用 Agent A 知识的成功率 |\n\n### 数据集格式\n\n```json\n{\n \"scenario_id\": \"coding-001\",\n \"category\": \"collaborative_coding\",\n \"description\": \"Implement a REST API with tests\",\n \"agents\": [\n {\"id\": \"coder\", \"role\": \"Write implementation\"},\n {\"id\": \"tester\", \"role\": \"Write tests\"},\n {\"id\": \"reviewer\", \"role\": \"Code review\"}\n ],\n \"initial_context\": \"...\",\n \"assertions\": [\n \"API endpoints are functional\",\n \"Test coverage > 80%\",\n \"No critical review comments unaddressed\"\n ],\n \"expected_interactions\": 5,\n \"time_limit_seconds\": 600\n}\n```\n\n### 实验设计\n\n#### Ablation Study\n\n| 配置 | AVM | Gossip | Consolidation |\n|------|-----|--------|---------------|\n| Baseline | ❌ | ❌ | ❌ |\n| +AVM | ✅ | ❌ | ❌ |\n| +Gossip | ✅ | ✅ | ❌ |\n| Full | ✅ | ✅ | ✅ |\n\n#### 比较对象\n\n- No memory (baseline)\n- Simple key-value store\n- AVM (ours)\n- RAG-based memory\n\n### 日志格式\n\n```json\n{\n \"run_id\": \"uuid\",\n \"timestamp\": \"ISO8601\",\n \"scenario_id\": \"coding-001\",\n \"config\": {\n \"avm_enabled\": true,\n \"gossip_enabled\": true,\n \"model\": \"claude-sonnet-4\"\n },\n \"events\": [\n {\n \"timestamp\": \"...\",\n \"agent\": \"coder\",\n \"action\": \"memory_write\",\n \"path\": \"/shared/code/api.py\",\n \"tokens_used\": 150\n },\n {\n \"timestamp\": \"...\",\n \"agent\": \"tester\",\n \"action\": \"memory_read\",\n \"query\": \"api implementation\",\n \"results\": 3,\n \"latency_ms\": 45\n }\n ],\n \"result\": {\n \"success\": true,\n \"assertions_passed\": 3,\n \"assertions_total\": 3,\n \"time_seconds\": 342,\n \"total_tokens\": 15000\n }\n}\n```\n\n## 实现步骤\n\n### Phase 1: 数据集创建 (Week 1)\n- [ ] 设计 10 个协作编码场景\n- [ ] 设计 10 个知识检索场景\n- [ ] 设计 10 个信息同步场景\n- [ ] 编写 assertion 验证器\n\n### Phase 2: Benchmark 框架 (Week 2)\n- [ ] 实现 scenario runner\n- [ ] 实现日志记录器\n- [ ] 实现 LLM judge\n- [ ] 集成 AVM\n\n### Phase 3: 实验运行 (Week 3)\n- [ ] 运行 baseline 实验\n- [ ] 运行 ablation study\n- [ ] 收集结果\n\n### Phase 4: 分析报告 (Week 4)\n- [ ] 统计分析\n- [ ] 可视化\n- [ ] 撰写技术报告\n\n## 参考资料\n\n- MARBLE: https://arxiv.org/abs/2503.01935\n- AWS Multi-Agent: https://arxiv.org/abs/2412.05449\n- AgentBench: https://github.com/THUDM/AgentBench\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":4746,"content_sha256":"4d22caae494a9440d9339b47517346ac89a0ba18f119b4a7c05ac6b2846cf530"},{"filename":"benchmarks/notification_service/__init__.py","content":"\"\"\"Notification Service - Redis-based message processing.\"\"\"\n\nfrom .main import app\nfrom .service import notification_service, NotificationService\nfrom .models import (\n NotificationMessage,\n NotificationType,\n NotificationPriority,\n NotificationStatus,\n NotificationResult,\n EmailPayload,\n HealthStatus,\n)\nfrom .processor import processor\nfrom .redis_client import queue\n\n__all__ = [\n \"app\",\n \"notification_service\",\n \"NotificationService\",\n \"NotificationMessage\",\n \"NotificationType\",\n \"NotificationPriority\",\n \"NotificationStatus\",\n \"NotificationResult\",\n \"EmailPayload\",\n \"HealthStatus\",\n \"processor\",\n \"queue\",\n]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":675,"content_sha256":"172e6ac5be702c35193ceee974739fc611c2df4a777ea44b333a10c34112b6f4"},{"filename":"benchmarks/notification_service/.github/workflows/ci.yaml","content":"name: CI\n\non:\n push:\n branches: [main]\n paths:\n - 'notification_service/**'\n pull_request:\n branches: [main]\n paths:\n - 'notification_service/**'\n\ndefaults:\n run:\n working-directory: notification_service\n\njobs:\n lint:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n\n - name: Set up Python\n uses: actions/setup-python@v5\n with:\n python-version: '3.11'\n cache: 'pip'\n\n - name: Install linting tools\n run: pip install ruff mypy\n\n - name: Run ruff\n run: ruff check .\n\n - name: Run mypy\n run: mypy . --ignore-missing-imports\n\n test:\n runs-on: ubuntu-latest\n services:\n redis:\n image: redis:7-alpine\n ports:\n - 6379:6379\n options: >-\n --health-cmd \"redis-cli ping\"\n --health-interval 10s\n --health-timeout 5s\n --health-retries 5\n\n steps:\n - uses: actions/checkout@v4\n\n - name: Set up Python\n uses: actions/setup-python@v5\n with:\n python-version: '3.11'\n cache: 'pip'\n\n - name: Install dependencies\n run: |\n pip install -r requirements.txt\n pip install pytest-cov\n\n - name: Run tests with coverage\n env:\n NOTIF_REDIS_HOST: localhost\n NOTIF_REDIS_PORT: 6379\n run: |\n pytest --cov=. --cov-report=xml --cov-report=term-missing\n\n - name: Upload coverage\n uses: codecov/codecov-action@v4\n with:\n files: notification_service/coverage.xml\n fail_ci_if_error: false\n\n build:\n runs-on: ubuntu-latest\n needs: [lint, test]\n steps:\n - uses: actions/checkout@v4\n\n - name: Set up Docker Buildx\n uses: docker/setup-buildx-action@v3\n\n - name: Build Docker image\n uses: docker/build-push-action@v5\n with:\n context: notification_service\n push: false\n tags: notification-service:${{ github.sha }}\n cache-from: type=gha\n cache-to: type=gha,mode=max\n\n deploy:\n runs-on: ubuntu-latest\n needs: build\n if: github.ref == 'refs/heads/main' && github.event_name == 'push'\n environment: production\n steps:\n - uses: actions/checkout@v4\n\n - name: Set up Docker Buildx\n uses: docker/setup-buildx-action@v3\n\n - name: Login to Container Registry\n uses: docker/login-action@v3\n with:\n registry: ${{ vars.REGISTRY }}\n username: ${{ secrets.REGISTRY_USERNAME }}\n password: ${{ secrets.REGISTRY_PASSWORD }}\n\n - name: Build and push\n uses: docker/build-push-action@v5\n with:\n context: notification_service\n push: true\n tags: |\n ${{ vars.REGISTRY }}/notification-service:${{ github.sha }}\n ${{ vars.REGISTRY }}/notification-service:latest\n\n - name: Deploy to Kubernetes\n env:\n KUBECONFIG_DATA: ${{ secrets.KUBECONFIG }}\n run: |\n echo \"$KUBECONFIG_DATA\" | base64 -d > /tmp/kubeconfig\n export KUBECONFIG=/tmp/kubeconfig\n cd k8s\n kubectl kustomize . | kubectl apply -f -\n kubectl -n notification-service set image deployment/notification-service \\\n notification-service=${{ vars.REGISTRY }}/notification-service:${{ github.sha }}\n kubectl -n notification-service rollout status deployment/notification-service\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":3454,"content_sha256":"3d310b5ab11d2ef37dce8d6b0ac5c431de23aa1be2ac8732a0b3cfcf6e4d7bfe"},{"filename":"benchmarks/notification_service/.github/workflows/pr-check.yaml","content":"name: PR Check\n\non:\n pull_request:\n branches: [main]\n paths:\n - 'notification_service/**'\n\ndefaults:\n run:\n working-directory: notification_service\n\njobs:\n validate-k8s:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n\n - name: Set up kubectl\n uses: azure/setup-kubectl@v3\n\n - name: Validate Kubernetes manifests\n run: |\n kubectl kustomize k8s/ > /tmp/manifests.yaml\n kubectl apply --dry-run=client -f /tmp/manifests.yaml\n\n docker-lint:\n runs-on: ubuntu-latest\n steps:\n - uses: actions/checkout@v4\n\n - name: Lint Dockerfile\n uses: hadolint/[email protected]\n with:\n dockerfile: notification_service/Dockerfile\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":738,"content_sha256":"1fca4a438f229754109a957b7dc894b20ece4e9302ba35c099d68b2534aaa50d"},{"filename":"benchmarks/notification_service/config.py","content":"\"\"\"Configuration for the Notification Service.\"\"\"\n\nimport os\nfrom pydantic_settings import BaseSettings\n\n\nclass Settings(BaseSettings):\n \"\"\"Service configuration from environment variables.\"\"\"\n\n # Redis settings\n redis_host: str = \"localhost\"\n redis_port: int = 6379\n redis_db: int = 0\n redis_password: str | None = None\n redis_queue_name: str = \"notifications\"\n redis_connect_timeout: float = 5.0\n redis_socket_timeout: float = 5.0\n redis_max_retries: int = 3\n redis_retry_delay: float = 0.5\n\n # Circuit breaker settings\n circuit_breaker_threshold: int = 5\n circuit_breaker_timeout: float = 30.0\n\n # Service settings\n service_name: str = \"notification-service\"\n service_version: str = \"1.0.0\"\n log_level: str = \"INFO\"\n\n # Processing settings\n poll_interval: float = 1.0 # seconds\n batch_size: int = 10\n max_retries: int = 3\n\n # Email settings (for future SMTP integration)\n smtp_host: str = \"localhost\"\n smtp_port: int = 587\n smtp_user: str | None = None\n smtp_password: str | None = None\n smtp_from: str = \"[email protected]\"\n\n class Config:\n env_prefix = \"NOTIF_\"\n\n\nsettings = Settings()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":1188,"content_sha256":"7a421b71259de2bb675d95f1a25d97c39146020e7af9a17abe8da7e6f2fb3e04"},{"filename":"benchmarks/notification_service/docker-compose.test.yml","content":"version: \"3.8\"\n\nservices:\n test-runner:\n build:\n context: .\n dockerfile: Dockerfile.test\n environment:\n - NOTIF_REDIS_HOST=redis\n - NOTIF_REDIS_PORT=6379\n depends_on:\n redis:\n condition: service_healthy\n volumes:\n - ./:/app\n - /app/__pycache__\n\n redis:\n image: redis:7-alpine\n healthcheck:\n test: [\"CMD\", \"redis-cli\", \"ping\"]\n interval: 5s\n timeout: 3s\n retries: 3\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":447,"content_sha256":"0749cc3654c5f18589409cb61cb5abbebe81ec8b030294353918c3e9577b91c2"},{"filename":"benchmarks/notification_service/docker-compose.yml","content":"version: \"3.8\"\n\nservices:\n notification-service:\n build:\n context: .\n dockerfile: Dockerfile\n ports:\n - \"8000:8000\"\n environment:\n - NOTIF_REDIS_HOST=redis\n - NOTIF_REDIS_PORT=6379\n - NOTIF_LOG_LEVEL=INFO\n - NOTIF_POLL_INTERVAL=1.0\n - NOTIF_BATCH_SIZE=10\n depends_on:\n redis:\n condition: service_healthy\n restart: unless-stopped\n healthcheck:\n test: [\"CMD\", \"python\", \"-c\", \"import urllib.request; urllib.request.urlopen('http://localhost:8000/health')\"]\n interval: 30s\n timeout: 10s\n retries: 3\n start_period: 10s\n\n redis:\n image: redis:7-alpine\n ports:\n - \"6379:6379\"\n volumes:\n - redis_data:/data\n healthcheck:\n test: [\"CMD\", \"redis-cli\", \"ping\"]\n interval: 10s\n timeout: 5s\n retries: 3\n restart: unless-stopped\n\nvolumes:\n redis_data:\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":881,"content_sha256":"84b5349c9fa6b915c3f20665d730649efca5e00198ee1ddd78f60cb73fecf20c"},{"filename":"benchmarks/notification_service/Dockerfile","content":"# Notification Service Dockerfile\nFROM python:3.11-slim\n\nWORKDIR /app\n\n# Install dependencies\nCOPY requirements.txt .\nRUN pip install --no-cache-dir -r requirements.txt\n\n# Copy application code\nCOPY . .\n\n# Create non-root user for security\nRUN useradd --create-home --shell /bin/bash appuser && \\\n chown -R appuser:appuser /app\nUSER appuser\n\n# Expose port\nEXPOSE 8000\n\n# Health check\nHEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \\\n CMD python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8000/health')\" || exit 1\n\n# Run the application\nCMD [\"uvicorn\", \"notification_service.main:app\", \"--host\", \"0.0.0.0\", \"--port\", \"8000\"]\n","content_type":"text/plain; charset=utf-8","language":"docker","size":679,"content_sha256":"233179e91067bfd2d76f90789691d39443e77058147da493d387f8785e983494"},{"filename":"benchmarks/notification_service/handlers.py","content":"\"\"\"Notification type handlers.\"\"\"\n\nimport smtplib\nfrom abc import ABC, abstractmethod\nfrom email.mime.multipart import MIMEMultipart\nfrom email.mime.text import MIMEText\nfrom typing import Dict, Type, Optional\n\nfrom .models import (\n NotificationMessage,\n NotificationResult,\n NotificationStatus,\n NotificationType,\n EmailPayload,\n)\nfrom .config import settings\nfrom .logger import log_notification_event, log_error\n\n\nclass NotificationHandler(ABC):\n \"\"\"Base class for notification handlers.\"\"\"\n\n @abstractmethod\n def handle(self, message: NotificationMessage) -> NotificationResult:\n \"\"\"Process a notification message.\"\"\"\n pass\n\n @abstractmethod\n def validate_payload(self, payload: dict) -> bool:\n \"\"\"Validate the notification payload.\"\"\"\n pass\n\n\nclass EmailHandler(NotificationHandler):\n \"\"\"Handler for email notifications.\"\"\"\n\n def __init__(self):\n self._smtp_enabled = bool(settings.smtp_host and settings.smtp_port)\n\n def validate_payload(self, payload: dict) -> bool:\n \"\"\"Validate email payload structure.\"\"\"\n try:\n EmailPayload.model_validate(payload)\n return True\n except Exception:\n return False\n\n def handle(self, message: NotificationMessage) -> NotificationResult:\n \"\"\"Process an email notification.\"\"\"\n log_notification_event(\"EMAIL_PROCESSING\", message.id, {\"type\": \"email\"})\n\n if not self.validate_payload(message.payload):\n log_error(\"Invalid email payload\", message.id)\n return NotificationResult(\n notification_id=message.id,\n status=NotificationStatus.FAILED,\n error_message=\"Invalid email payload\",\n )\n\n email = EmailPayload.model_validate(message.payload)\n\n try:\n self._send_email(email, message.id)\n log_notification_event(\n \"EMAIL_SENT\",\n message.id,\n {\"to\": email.to, \"subject\": email.subject},\n )\n return NotificationResult(\n notification_id=message.id,\n status=NotificationStatus.SENT,\n details={\"to\": email.to, \"subject\": email.subject},\n )\n except Exception as e:\n log_error(\"Email send failed\", message.id, e)\n return NotificationResult(\n notification_id=message.id,\n status=NotificationStatus.FAILED,\n error_message=str(e),\n )\n\n def _send_email(self, email: EmailPayload, notification_id: str) -> None:\n \"\"\"Send email via SMTP or log in stub mode.\"\"\"\n if not self._smtp_enabled or settings.smtp_host == \"localhost\":\n # Stub mode: log email details without sending\n log_notification_event(\n \"EMAIL_STUB\",\n notification_id,\n {\n \"to\": email.to,\n \"subject\": email.subject,\n \"body_length\": len(email.body),\n },\n )\n return\n\n # Build MIME message\n msg = MIMEMultipart(\"alternative\")\n msg[\"Subject\"] = email.subject\n msg[\"From\"] = settings.smtp_from\n msg[\"To\"] = email.to\n\n if email.cc:\n msg[\"Cc\"] = \", \".join(email.cc)\n if email.bcc:\n msg[\"Bcc\"] = \", \".join(email.bcc)\n\n # Attach plain text body\n msg.attach(MIMEText(email.body, \"plain\"))\n\n # Attach HTML body if provided\n if email.html_body:\n msg.attach(MIMEText(email.html_body, \"html\"))\n\n # Build recipient list\n recipients = [email.to]\n if email.cc:\n recipients.extend(email.cc)\n if email.bcc:\n recipients.extend(email.bcc)\n\n # Send via SMTP\n with smtplib.SMTP(settings.smtp_host, settings.smtp_port) as server:\n if settings.smtp_user and settings.smtp_password:\n server.starttls()\n server.login(settings.smtp_user, settings.smtp_password)\n server.sendmail(settings.smtp_from, recipients, msg.as_string())\n\n\n# Handler registry for extensibility\nHANDLERS: Dict[NotificationType, Type[NotificationHandler]] = {\n NotificationType.EMAIL: EmailHandler,\n}\n\n\ndef get_handler(notification_type: NotificationType) -> NotificationHandler:\n \"\"\"Get the appropriate handler for a notification type.\"\"\"\n handler_class = HANDLERS.get(notification_type)\n if not handler_class:\n raise ValueError(f\"No handler for notification type: {notification_type}\")\n return handler_class()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":4639,"content_sha256":"51ad773c8b51ed51994bb74cfe2e021b9eb2c1dac523b5942622739fad5d4f25"},{"filename":"benchmarks/notification_service/k8s/configmap.yaml","content":"apiVersion: v1\nkind: ConfigMap\nmetadata:\n name: notification-service-config\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\ndata:\n NOTIF_REDIS_HOST: \"redis\"\n NOTIF_REDIS_PORT: \"6379\"\n NOTIF_REDIS_DB: \"0\"\n NOTIF_REDIS_CONNECT_TIMEOUT: \"5.0\"\n NOTIF_REDIS_SOCKET_TIMEOUT: \"5.0\"\n NOTIF_REDIS_MAX_RETRIES: \"3\"\n NOTIF_REDIS_RETRY_DELAY: \"0.5\"\n NOTIF_CIRCUIT_BREAKER_THRESHOLD: \"5\"\n NOTIF_CIRCUIT_BREAKER_TIMEOUT: \"30.0\"\n NOTIF_LOG_LEVEL: \"INFO\"\n NOTIF_POLL_INTERVAL: \"1.0\"\n NOTIF_BATCH_SIZE: \"10\"\n NOTIF_MAX_RETRIES: \"3\"\n NOTIF_SMTP_PORT: \"587\"\n NOTIF_SMTP_FROM: \"[email protected]\"\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":640,"content_sha256":"9c836a43d33aace23a30bfdbc271ecb0be98b0d26c37308f2dfb0c82bf88b50d"},{"filename":"benchmarks/notification_service/k8s/deployment.yaml","content":"apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: notification-service\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\n app.kubernetes.io/component: api\nspec:\n replicas: 2\n selector:\n matchLabels:\n app.kubernetes.io/name: notification-service\n template:\n metadata:\n labels:\n app.kubernetes.io/name: notification-service\n app.kubernetes.io/component: api\n spec:\n securityContext:\n runAsNonRoot: true\n runAsUser: 1000\n fsGroup: 1000\n containers:\n - name: notification-service\n image: notification-service:latest\n imagePullPolicy: IfNotPresent\n ports:\n - name: http\n containerPort: 8000\n protocol: TCP\n envFrom:\n - configMapRef:\n name: notification-service-config\n - secretRef:\n name: notification-service-secrets\n resources:\n requests:\n memory: \"128Mi\"\n cpu: \"100m\"\n limits:\n memory: \"512Mi\"\n cpu: \"500m\"\n livenessProbe:\n httpGet:\n path: /health\n port: http\n initialDelaySeconds: 10\n periodSeconds: 30\n timeoutSeconds: 10\n failureThreshold: 3\n readinessProbe:\n httpGet:\n path: /health\n port: http\n initialDelaySeconds: 5\n periodSeconds: 10\n timeoutSeconds: 5\n failureThreshold: 3\n securityContext:\n allowPrivilegeEscalation: false\n readOnlyRootFilesystem: true\n capabilities:\n drop:\n - ALL\n restartPolicy: Always\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":1801,"content_sha256":"4194573bd0041700188390791ac254c8601cd3d713e381959f0dd9418b24ddb7"},{"filename":"benchmarks/notification_service/k8s/hpa.yaml","content":"apiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\nmetadata:\n name: notification-service\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\nspec:\n scaleTargetRef:\n apiVersion: apps/v1\n kind: Deployment\n name: notification-service\n minReplicas: 2\n maxReplicas: 10\n metrics:\n - type: Resource\n resource:\n name: cpu\n target:\n type: Utilization\n averageUtilization: 70\n - type: Resource\n resource:\n name: memory\n target:\n type: Utilization\n averageUtilization: 80\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":602,"content_sha256":"0bd6ce22388934118991b5a88821be136cf39dde87fff4cf5febb49cdf4aa19d"},{"filename":"benchmarks/notification_service/k8s/kustomization.yaml","content":"apiVersion: kustomize.config.k8s.io/v1beta1\nkind: Kustomization\n\nnamespace: notification-service\n\nresources:\n - namespace.yaml\n - configmap.yaml\n - secret.yaml\n - redis.yaml\n - deployment.yaml\n - service.yaml\n - hpa.yaml\n - networkpolicy.yaml\n\ncommonLabels:\n app.kubernetes.io/part-of: notifications\n app.kubernetes.io/managed-by: kustomize\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":351,"content_sha256":"aa98835b01a6ba6af5ab331889c4d73bbb3bb12c0ce90d8df1b85d6fa4605ad4"},{"filename":"benchmarks/notification_service/k8s/namespace.yaml","content":"apiVersion: v1\nkind: Namespace\nmetadata:\n name: notification-service\n labels:\n app.kubernetes.io/name: notification-service\n app.kubernetes.io/part-of: notifications\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":174,"content_sha256":"af8a3be60e49df47f841c7187d92403662d44bd07728b554bcc10c4086368608"},{"filename":"benchmarks/notification_service/k8s/networkpolicy.yaml","content":"apiVersion: networking.k8s.io/v1\nkind: NetworkPolicy\nmetadata:\n name: notification-service-network\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\nspec:\n podSelector:\n matchLabels:\n app.kubernetes.io/name: notification-service\n policyTypes:\n - Ingress\n - Egress\n ingress:\n # Allow inbound traffic on port 8000\n - from: []\n ports:\n - protocol: TCP\n port: 8000\n egress:\n # Allow outbound to Redis\n - to:\n - podSelector:\n matchLabels:\n app.kubernetes.io/name: redis\n ports:\n - protocol: TCP\n port: 6379\n # Allow DNS resolution\n - to: []\n ports:\n - protocol: UDP\n port: 53\n - protocol: TCP\n port: 53\n # Allow SMTP outbound (for email delivery)\n - to: []\n ports:\n - protocol: TCP\n port: 587\n - protocol: TCP\n port: 465\n - protocol: TCP\n port: 25\n---\napiVersion: networking.k8s.io/v1\nkind: NetworkPolicy\nmetadata:\n name: redis-network\n namespace: notification-service\n labels:\n app.kubernetes.io/name: redis\nspec:\n podSelector:\n matchLabels:\n app.kubernetes.io/name: redis\n policyTypes:\n - Ingress\n ingress:\n # Only allow inbound from notification-service pods\n - from:\n - podSelector:\n matchLabels:\n app.kubernetes.io/name: notification-service\n ports:\n - protocol: TCP\n port: 6379\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":1506,"content_sha256":"fa63dfa7e441ca08b95ecd0454743b5e77cef4752e2d7b96369c8761e0fd7ec7"},{"filename":"benchmarks/notification_service/k8s/redis.yaml","content":"apiVersion: apps/v1\nkind: Deployment\nmetadata:\n name: redis\n namespace: notification-service\n labels:\n app.kubernetes.io/name: redis\n app.kubernetes.io/component: cache\nspec:\n replicas: 1\n selector:\n matchLabels:\n app.kubernetes.io/name: redis\n template:\n metadata:\n labels:\n app.kubernetes.io/name: redis\n app.kubernetes.io/component: cache\n spec:\n containers:\n - name: redis\n image: redis:7-alpine\n ports:\n - name: redis\n containerPort: 6379\n protocol: TCP\n resources:\n requests:\n memory: \"64Mi\"\n cpu: \"50m\"\n limits:\n memory: \"256Mi\"\n cpu: \"200m\"\n livenessProbe:\n exec:\n command:\n - redis-cli\n - ping\n initialDelaySeconds: 5\n periodSeconds: 10\n readinessProbe:\n exec:\n command:\n - redis-cli\n - ping\n initialDelaySeconds: 3\n periodSeconds: 5\n volumeMounts:\n - name: redis-data\n mountPath: /data\n volumes:\n - name: redis-data\n emptyDir: {}\n---\napiVersion: v1\nkind: Service\nmetadata:\n name: redis\n namespace: notification-service\n labels:\n app.kubernetes.io/name: redis\nspec:\n type: ClusterIP\n ports:\n - port: 6379\n targetPort: redis\n protocol: TCP\n name: redis\n selector:\n app.kubernetes.io/name: redis\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":1552,"content_sha256":"6c06100b8a3d8cf6dee4c8173dd5017ad27fdee369ed8193ef55c7ba76963c74"},{"filename":"benchmarks/notification_service/k8s/secret.yaml","content":"apiVersion: v1\nkind: Secret\nmetadata:\n name: notification-service-secrets\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\ntype: Opaque\nstringData:\n # Replace with actual values in production\n # Consider using external secret management (Vault, AWS Secrets Manager, etc.)\n NOTIF_REDIS_PASSWORD: \"\"\n NOTIF_SMTP_USER: \"\"\n NOTIF_SMTP_PASSWORD: \"\"\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":394,"content_sha256":"3db283021efd14fa9ea11b28d29d02511681b01cdbc7aa513b40d191110e0d13"},{"filename":"benchmarks/notification_service/k8s/service.yaml","content":"apiVersion: v1\nkind: Service\nmetadata:\n name: notification-service\n namespace: notification-service\n labels:\n app.kubernetes.io/name: notification-service\nspec:\n type: ClusterIP\n ports:\n - port: 80\n targetPort: http\n protocol: TCP\n name: http\n selector:\n app.kubernetes.io/name: notification-service\n","content_type":"application/yaml; charset=utf-8","language":"yaml","size":330,"content_sha256":"03d867e3c6fda74ebdf7f5cceae98b14e4ee908724a06ec6643456b0232ad049"},{"filename":"benchmarks/notification_service/logger.py","content":"\"\"\"Logging configuration for the Notification Service.\"\"\"\n\nimport logging\nimport sys\nfrom datetime import datetime\nfrom typing import Any\n\nfrom .config import settings\n\n\ndef setup_logger(name: str = \"notification_service\") -> logging.Logger:\n \"\"\"Set up and return a configured logger.\"\"\"\n logger = logging.getLogger(name)\n logger.setLevel(getattr(logging, settings.log_level.upper()))\n\n if not logger.handlers:\n handler = logging.StreamHandler(sys.stdout)\n handler.setLevel(getattr(logging, settings.log_level.upper()))\n\n formatter = logging.Formatter(\n \"%(asctime)s | %(levelname)-8s | %(name)s | %(message)s\",\n datefmt=\"%Y-%m-%d %H:%M:%S\"\n )\n handler.setFormatter(formatter)\n logger.addHandler(handler)\n\n return logger\n\n\nlogger = setup_logger()\n\n\ndef log_notification_event(\n event_type: str,\n notification_id: str,\n details: dict[str, Any] | None = None\n) -> None:\n \"\"\"Log a structured notification event.\"\"\"\n log_data = {\n \"event\": event_type,\n \"notification_id\": notification_id,\n \"timestamp\": datetime.utcnow().isoformat(),\n }\n if details:\n log_data.update(details)\n\n logger.info(f\"[{event_type}] notification_id={notification_id} {details or ''}\")\n\n\ndef log_error(\n message: str,\n notification_id: str | None = None,\n error: Exception | None = None\n) -> None:\n \"\"\"Log an error with context.\"\"\"\n extra = \"\"\n if notification_id:\n extra += f\" notification_id={notification_id}\"\n if error:\n extra += f\" error={type(error).__name__}: {error}\"\n\n logger.error(f\"{message}{extra}\")\n","content_type":"text/x-python; charset=utf-8","language":"python","size":1650,"content_sha256":"01a1bad23a3c50697120309e743074d297c054a146c347985c9f76549eb57269"},{"filename":"benchmarks/notification_service/main.py","content":"\"\"\"Notification Service REST API using FastAPI.\"\"\"\n\nimport asyncio\nimport uuid\nfrom contextlib import asynccontextmanager\nfrom typing import Optional\n\nfrom fastapi import FastAPI, HTTPException, status, BackgroundTasks\nfrom fastapi.responses import JSONResponse\n\nfrom .models import (\n NotificationMessage,\n NotificationType,\n NotificationPriority,\n HealthStatus,\n EmailPayload,\n)\nfrom .redis_client import queue\nfrom .processor import processor\nfrom .config import settings\nfrom .logger import logger\n\n\n@asynccontextmanager\nasync def lifespan(app: FastAPI):\n \"\"\"Manage application lifecycle.\"\"\"\n # Startup: start the processor in background\n logger.info(f\"Starting {settings.service_name} v{settings.service_version}\")\n task = asyncio.create_task(processor.run())\n yield\n # Shutdown: stop the processor\n processor.stop()\n task.cancel()\n try:\n await task\n except asyncio.CancelledError:\n pass\n logger.info(\"Service shutdown complete\")\n\n\napp = FastAPI(\n title=\"Notification Service\",\n description=\"A service for processing notification messages from Redis queue\",\n version=settings.service_version,\n lifespan=lifespan,\n)\n\n\[email protected](\"/health\", response_model=HealthStatus)\ndef health_check():\n \"\"\"Health check endpoint.\"\"\"\n redis_connected = queue.is_connected()\n queue_length = queue.queue_length() if redis_connected else -1\n\n status_str = \"healthy\" if redis_connected else \"degraded\"\n\n return HealthStatus(\n status=status_str,\n redis_connected=redis_connected,\n queue_length=queue_length,\n )\n\n\[email protected](\"/\")\ndef root():\n \"\"\"Root endpoint with service info.\"\"\"\n return {\n \"service\": settings.service_name,\n \"version\": settings.service_version,\n \"status\": \"running\",\n }\n\n\[email protected](\"/stats\")\ndef get_stats():\n \"\"\"Get processor statistics.\"\"\"\n return {\n \"processor\": processor.stats,\n \"queue_length\": queue.queue_length(),\n }\n\n\[email protected](\"/notifications/email\", status_code=status.HTTP_202_ACCEPTED)\ndef submit_email_notification(\n email: EmailPayload,\n priority: NotificationPriority = NotificationPriority.NORMAL,\n):\n \"\"\"Submit an email notification to the queue.\"\"\"\n notification_id = str(uuid.uuid4())\n\n message = NotificationMessage(\n id=notification_id,\n type=NotificationType.EMAIL,\n priority=priority,\n payload=email.model_dump(),\n )\n\n if queue.enqueue(message):\n return {\n \"notification_id\": notification_id,\n \"status\": \"queued\",\n \"type\": \"email\",\n }\n else:\n raise HTTPException(\n status_code=status.HTTP_503_SERVICE_UNAVAILABLE,\n detail=\"Failed to queue notification - Redis unavailable\",\n )\n\n\[email protected](\"/notifications\", status_code=status.HTTP_202_ACCEPTED)\ndef submit_notification(message: NotificationMessage):\n \"\"\"Submit a raw notification message to the queue.\"\"\"\n if queue.enqueue(message):\n return {\n \"notification_id\": message.id,\n \"status\": \"queued\",\n \"type\": message.type,\n }\n else:\n raise HTTPException(\n status_code=status.HTTP_503_SERVICE_UNAVAILABLE,\n detail=\"Failed to queue notification - Redis unavailable\",\n )\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3329,"content_sha256":"9ee368dc99e75e2c277282f636198ccc6ed5ec2a52808f5eb86afb72159d0dfe"},{"filename":"benchmarks/notification_service/models.py","content":"\"\"\"Data models for the Notification Service.\"\"\"\n\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Optional, Dict, Any\nfrom pydantic import BaseModel, Field, EmailStr\n\n\nclass NotificationType(str, Enum):\n \"\"\"Supported notification types.\"\"\"\n EMAIL = \"email\"\n # Extensible: add SMS, PUSH, WEBHOOK, etc.\n\n\nclass NotificationStatus(str, Enum):\n \"\"\"Notification processing status.\"\"\"\n PENDING = \"pending\"\n PROCESSING = \"processing\"\n SENT = \"sent\"\n FAILED = \"failed\"\n\n\nclass NotificationPriority(str, Enum):\n \"\"\"Notification priority levels.\"\"\"\n LOW = \"low\"\n NORMAL = \"normal\"\n HIGH = \"high\"\n URGENT = \"urgent\"\n\n\nclass EmailPayload(BaseModel):\n \"\"\"Email-specific notification payload.\"\"\"\n to: EmailStr\n subject: str = Field(..., min_length=1, max_length=500)\n body: str = Field(..., min_length=1)\n html_body: Optional[str] = None\n cc: Optional[list[EmailStr]] = None\n bcc: Optional[list[EmailStr]] = None\n\n\nclass NotificationMessage(BaseModel):\n \"\"\"Message schema for Redis queue.\"\"\"\n id: str = Field(..., description=\"Unique notification ID\")\n type: NotificationType\n priority: NotificationPriority = NotificationPriority.NORMAL\n payload: Dict[str, Any]\n created_at: datetime = Field(default_factory=datetime.utcnow)\n retry_count: int = 0\n max_retries: int = 3\n metadata: Optional[Dict[str, Any]] = None\n\n\nclass NotificationResult(BaseModel):\n \"\"\"Result of processing a notification.\"\"\"\n notification_id: str\n status: NotificationStatus\n processed_at: datetime = Field(default_factory=datetime.utcnow)\n error_message: Optional[str] = None\n details: Optional[Dict[str, Any]] = None\n\n\nclass HealthStatus(BaseModel):\n \"\"\"Health check response.\"\"\"\n status: str\n service: str = \"notification-service\"\n version: str = \"1.0.0\"\n redis_connected: bool\n queue_length: int = 0\n timestamp: datetime = Field(default_factory=datetime.utcnow)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":1971,"content_sha256":"385a571edb8eb928e374ba701911da9398b7d05e2cfaf5a88133269dea9d0927"},{"filename":"benchmarks/notification_service/processor.py","content":"\"\"\"Notification processor - consumes messages from Redis queue.\"\"\"\n\nimport asyncio\nfrom typing import Callable, Optional\n\nfrom .redis_client import queue\nfrom .handlers import get_handler\nfrom .models import (\n NotificationMessage,\n NotificationResult,\n NotificationStatus,\n)\nfrom .config import settings\nfrom .logger import logger, log_notification_event, log_error\n\n\nclass NotificationProcessor:\n \"\"\"Processes notifications from the Redis queue.\"\"\"\n\n def __init__(self):\n self._running = False\n self._processed_count = 0\n self._failed_count = 0\n self._on_result: Optional[Callable[[NotificationResult], None]] = None\n\n @property\n def is_running(self) -> bool:\n return self._running\n\n @property\n def stats(self) -> dict:\n return {\n \"processed\": self._processed_count,\n \"failed\": self._failed_count,\n \"running\": self._running,\n }\n\n def set_result_callback(\n self, callback: Callable[[NotificationResult], None]\n ) -> None:\n \"\"\"Set a callback to be called with each processing result.\"\"\"\n self._on_result = callback\n\n def process_one(self, message: NotificationMessage) -> NotificationResult:\n \"\"\"Process a single notification message.\"\"\"\n log_notification_event(\"PROCESSING_START\", message.id, {\"type\": message.type})\n\n try:\n handler = get_handler(message.type)\n result = handler.handle(message)\n\n if result.status == NotificationStatus.SENT:\n self._processed_count += 1\n elif result.status == NotificationStatus.FAILED:\n self._handle_failure(message, result)\n\n if self._on_result:\n self._on_result(result)\n\n return result\n\n except Exception as e:\n log_error(\"Processing error\", message.id, e)\n self._failed_count += 1\n result = NotificationResult(\n notification_id=message.id,\n status=NotificationStatus.FAILED,\n error_message=str(e),\n )\n if self._on_result:\n self._on_result(result)\n return result\n\n def _handle_failure(\n self, message: NotificationMessage, result: NotificationResult\n ) -> None:\n \"\"\"Handle a failed notification - retry if possible.\"\"\"\n self._failed_count += 1\n\n if message.retry_count \u003c message.max_retries:\n log_notification_event(\n \"RETRY_SCHEDULED\",\n message.id,\n {\"attempt\": message.retry_count + 1, \"max\": message.max_retries},\n )\n queue.requeue(message)\n else:\n log_notification_event(\n \"MAX_RETRIES_EXCEEDED\",\n message.id,\n {\"attempts\": message.retry_count},\n )\n\n async def run(self) -> None:\n \"\"\"Run the processor loop (async).\"\"\"\n self._running = True\n logger.info(\"Notification processor started\")\n\n while self._running:\n message = queue.dequeue()\n if message:\n self.process_one(message)\n else:\n await asyncio.sleep(settings.poll_interval)\n\n logger.info(\"Notification processor stopped\")\n\n def stop(self) -> None:\n \"\"\"Signal the processor to stop.\"\"\"\n self._running = False\n logger.info(\"Processor stop requested\")\n\n\n# Global processor instance\nprocessor = NotificationProcessor()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3527,"content_sha256":"9f82489e982ca7356fb9717e2c9e1b404c73928d8bf2da418aaf5ba06d61a4cb"},{"filename":"benchmarks/notification_service/pytest.ini","content":"[pytest]\ntestpaths = .\npython_files = test_*.py\npython_classes = Test*\npython_functions = test_*\naddopts = -v --tb=short\nasyncio_mode = auto\nfilterwarnings =\n ignore::DeprecationWarning\n","content_type":"text/plain; charset=utf-8","language":"ini","size":189,"content_sha256":"1bb57f9bf082f430aed19eeba5380a504acf0f19237b8b39a48477a858292c04"},{"filename":"benchmarks/notification_service/redis_client.py","content":"\"\"\"Redis client for queue operations.\"\"\"\n\nimport json\nimport time\nfrom typing import Optional\nimport redis\n\nfrom .config import settings\nfrom .models import NotificationMessage\nfrom .logger import logger\n\n\nclass CircuitBreaker:\n \"\"\"Simple circuit breaker to prevent cascading failures.\"\"\"\n\n def __init__(self, failure_threshold: int = 5, recovery_timeout: float = 30.0):\n self.failure_threshold = failure_threshold\n self.recovery_timeout = recovery_timeout\n self.failure_count = 0\n self.last_failure_time: Optional[float] = None\n self.state = \"closed\" # closed, open, half-open\n\n def record_failure(self) -> None:\n \"\"\"Record a failure and potentially open the circuit.\"\"\"\n self.last_failure_time = time.time()\n\n # In half-open state, any failure immediately re-opens the circuit\n if self.state == \"half-open\":\n self.state = \"open\"\n self._probe_in_progress = False\n logger.warning(\"Circuit breaker re-opened after failure in half-open state\")\n return\n\n self.failure_count += 1\n if self.failure_count >= self.failure_threshold:\n self.state = \"open\"\n logger.warning(f\"Circuit breaker opened after {self.failure_count} failures\")\n\n def record_success(self) -> None:\n \"\"\"Record a success and reset the circuit.\"\"\"\n if self.state == \"half-open\":\n logger.info(\"Circuit breaker closed after successful probe in half-open state\")\n self.failure_count = 0\n self.state = \"closed\"\n self._probe_in_progress = False\n\n def can_execute(self) -> bool:\n \"\"\"Check if we can attempt an operation.\"\"\"\n if self.state == \"closed\":\n return True\n if self.state == \"open\":\n if self.last_failure_time and (time.time() - self.last_failure_time) > self.recovery_timeout:\n self.state = \"half-open\"\n self._probe_in_progress = True\n return True\n return False\n # half-open: only allow if no probe in progress\n if self.state == \"half-open\":\n if getattr(self, '_probe_in_progress', False):\n return False # reject concurrent requests during probe\n return False # already probing, wait for result\n return False\n\n\nclass RedisQueue:\n \"\"\"Redis-based message queue for notifications.\"\"\"\n\n def __init__(self):\n self._client: Optional[redis.Redis] = None\n self._circuit_breaker = CircuitBreaker(\n failure_threshold=settings.circuit_breaker_threshold,\n recovery_timeout=settings.circuit_breaker_timeout,\n )\n\n @property\n def client(self) -> redis.Redis:\n \"\"\"Lazy initialization of Redis client.\"\"\"\n if self._client is None:\n self._client = redis.Redis(\n host=settings.redis_host,\n port=settings.redis_port,\n db=settings.redis_db,\n password=settings.redis_password,\n decode_responses=True,\n socket_connect_timeout=settings.redis_connect_timeout,\n socket_timeout=settings.redis_socket_timeout,\n retry_on_timeout=True,\n )\n return self._client\n\n def _reset_client(self) -> None:\n \"\"\"Reset the client connection on failures.\"\"\"\n if self._client is not None:\n try:\n self._client.close()\n except Exception:\n pass\n self._client = None\n\n def is_connected(self) -> bool:\n \"\"\"Check if Redis connection is healthy.\"\"\"\n if not self._circuit_breaker.can_execute():\n return False\n try:\n self.client.ping()\n self._circuit_breaker.record_success()\n return True\n except redis.ConnectionError:\n self._circuit_breaker.record_failure()\n self._reset_client()\n return False\n\n def enqueue(self, message: NotificationMessage) -> bool:\n \"\"\"Add a notification message to the queue with retry logic.\"\"\"\n if not self._circuit_breaker.can_execute():\n logger.warning(f\"Circuit breaker open, rejecting enqueue for: {message.id}\")\n return False\n\n for attempt in range(settings.redis_max_retries):\n try:\n data = message.model_dump_json()\n self.client.lpush(settings.redis_queue_name, data)\n logger.info(f\"Enqueued notification: {message.id}\")\n self._circuit_breaker.record_success()\n return True\n except redis.RedisError as e:\n logger.error(f\"Failed to enqueue message (attempt {attempt + 1}): {e}\")\n self._reset_client()\n if attempt \u003c settings.redis_max_retries - 1:\n time.sleep(settings.redis_retry_delay * (attempt + 1))\n\n # Only record failure to circuit breaker after all retries exhausted\n self._circuit_breaker.record_failure()\n return False\n\n def dequeue(self) -> Optional[NotificationMessage]:\n \"\"\"Remove and return a notification from the queue (FIFO).\"\"\"\n if not self._circuit_breaker.can_execute():\n return None\n\n try:\n data = self.client.rpop(settings.redis_queue_name)\n if data:\n self._circuit_breaker.record_success()\n return NotificationMessage.model_validate_json(data)\n return None\n except redis.RedisError as e:\n logger.error(f\"Failed to dequeue message: {e}\")\n self._circuit_breaker.record_failure()\n self._reset_client()\n return None\n\n def dequeue_blocking(self, timeout: int = 0) -> Optional[NotificationMessage]:\n \"\"\"Blocking dequeue with optional timeout.\"\"\"\n if not self._circuit_breaker.can_execute():\n return None\n\n try:\n result = self.client.brpop(settings.redis_queue_name, timeout=timeout)\n if result:\n _, data = result\n self._circuit_breaker.record_success()\n return NotificationMessage.model_validate_json(data)\n return None\n except redis.RedisError as e:\n logger.error(f\"Failed to dequeue message: {e}\")\n self._circuit_breaker.record_failure()\n self._reset_client()\n return None\n\n def requeue(self, message: NotificationMessage) -> bool:\n \"\"\"Re-add a message to the queue (for retries).\"\"\"\n message.retry_count += 1\n return self.enqueue(message)\n\n def queue_length(self) -> int:\n \"\"\"Get current queue length.\"\"\"\n if not self._circuit_breaker.can_execute():\n return -1\n\n try:\n length = self.client.llen(settings.redis_queue_name)\n self._circuit_breaker.record_success()\n return length\n except redis.RedisError:\n self._circuit_breaker.record_failure()\n self._reset_client()\n return -1\n\n def clear(self) -> bool:\n \"\"\"Clear the queue (for testing).\"\"\"\n try:\n self.client.delete(settings.redis_queue_name)\n return True\n except redis.RedisError:\n return False\n\n\n# Global queue instance\nqueue = RedisQueue()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":7354,"content_sha256":"7d317553110ea01d0b07bc6ece1c0ff72ba7c5504c232d7421a42f4ce5411993"},{"filename":"benchmarks/notification_service/requirements.txt","content":"fastapi>=0.100.0\nuvicorn>=0.23.0\npydantic[email]>=2.0.0\npydantic-settings>=2.0.0\nredis>=5.0.0\npytest>=7.0.0\npytest-asyncio>=0.21.0\nhttpx>=0.24.0\n","content_type":"text/plain; charset=utf-8","language":null,"size":145,"content_sha256":"5e36c24879fc3dce1a98806676b942c34bb1e5acaae15b2f7325bbab6803f1de"},{"filename":"benchmarks/notification_service/scripts/deploy.sh","content":"#!/bin/bash\nset -e\n\necho \"=== Notification Service Deployment ===\"\n\n# Colors\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nNC='\\033[0m'\n\nusage() {\n echo \"Usage: $0 [environment] [options]\"\n echo \"\"\n echo \"Environments:\"\n echo \" local Deploy to local Docker Compose\"\n echo \" k8s Deploy to Kubernetes cluster\"\n echo \"\"\n echo \"Options:\"\n echo \" --build Build images before deploying\"\n echo \" --dry-run Show what would be deployed (k8s only)\"\n echo \"\"\n exit 1\n}\n\nENVIRONMENT=\"${1:-local}\"\nBUILD=false\nDRY_RUN=false\n\nshift || true\nwhile [[ $# -gt 0 ]]; do\n case $1 in\n --build)\n BUILD=true\n shift\n ;;\n --dry-run)\n DRY_RUN=true\n shift\n ;;\n *)\n usage\n ;;\n esac\ndone\n\ncd \"$(dirname \"$0\")/..\"\n\ndeploy_local() {\n echo -e \"${YELLOW}Deploying to local Docker Compose...${NC}\"\n\n if [ \"$BUILD\" = true ]; then\n echo \"Building images...\"\n docker-compose build\n fi\n\n docker-compose up -d\n\n echo -e \"${GREEN}Service deployed!${NC}\"\n echo \"\"\n echo \"Endpoints:\"\n echo \" - API: http://localhost:8000\"\n echo \" - Health: http://localhost:8000/health\"\n echo \" - Stats: http://localhost:8000/stats\"\n echo \"\"\n echo \"Commands:\"\n echo \" - Logs: docker-compose logs -f notification-service\"\n echo \" - Stop: docker-compose down\"\n}\n\ndeploy_k8s() {\n echo -e \"${YELLOW}Deploying to Kubernetes...${NC}\"\n\n if ! command -v kubectl &> /dev/null; then\n echo -e \"${RED}kubectl not found. Please install kubectl first.${NC}\"\n exit 1\n fi\n\n if [ \"$DRY_RUN\" = true ]; then\n echo \"Dry run - showing manifests:\"\n kubectl kustomize k8s/\n echo \"\"\n echo \"To apply: kubectl apply -k k8s/\"\n return\n fi\n\n if [ \"$BUILD\" = true ]; then\n echo \"Building and pushing image...\"\n REGISTRY=\"${REGISTRY:-localhost:5000}\"\n TAG=\"${TAG:-latest}\"\n docker build -t \"$REGISTRY/notification-service:$TAG\" .\n docker push \"$REGISTRY/notification-service:$TAG\"\n\n # Update image in deployment\n kubectl -n notification-service set image deployment/notification-service \\\n notification-service=\"$REGISTRY/notification-service:$TAG\"\n fi\n\n echo \"Applying Kubernetes manifests...\"\n kubectl apply -k k8s/\n\n echo \"Waiting for deployment...\"\n kubectl -n notification-service rollout status deployment/notification-service --timeout=120s\n\n echo -e \"${GREEN}Deployment complete!${NC}\"\n echo \"\"\n echo \"Commands:\"\n echo \" - Logs: kubectl -n notification-service logs -f deployment/notification-service\"\n echo \" - Port-forward: kubectl -n notification-service port-forward svc/notification-service 8000:80\"\n echo \" - Status: kubectl -n notification-service get pods\"\n}\n\ncase \"$ENVIRONMENT\" in\n local)\n deploy_local\n ;;\n k8s)\n deploy_k8s\n ;;\n *)\n usage\n ;;\nesac\n","content_type":"application/x-sh; charset=utf-8","language":"bash","size":3034,"content_sha256":"cf948fb8e8682a02865530b1f54330fa4defec91f448b69d7e9e348977c064f1"},{"filename":"benchmarks/notification_service/scripts/test.sh","content":"#!/bin/bash\nset -e\n\necho \"=== Notification Service Test Runner ===\"\n\n# Colors for output\nRED='\\033[0;31m'\nGREEN='\\033[0;32m'\nYELLOW='\\033[1;33m'\nNC='\\033[0m' # No Color\n\nusage() {\n echo \"Usage: $0 [command]\"\n echo \"\"\n echo \"Commands:\"\n echo \" unit Run unit tests only (no Redis required)\"\n echo \" integration Run integration tests (requires Redis)\"\n echo \" all Run all tests (requires Redis)\"\n echo \" docker Run all tests in Docker containers\"\n echo \" coverage Run tests with coverage report\"\n echo \"\"\n exit 1\n}\n\nrun_unit_tests() {\n echo -e \"${YELLOW}Running unit tests...${NC}\"\n python -m pytest test_service.py -v --tb=short\n}\n\nrun_integration_tests() {\n echo -e \"${YELLOW}Running integration tests...${NC}\"\n python -m pytest test_integration.py -v --tb=short\n}\n\nrun_all_tests() {\n echo -e \"${YELLOW}Running all tests...${NC}\"\n python -m pytest . -v --tb=short\n}\n\nrun_docker_tests() {\n echo -e \"${YELLOW}Running tests in Docker...${NC}\"\n docker-compose -f docker-compose.test.yml build\n docker-compose -f docker-compose.test.yml run --rm test-runner\n docker-compose -f docker-compose.test.yml down\n}\n\nrun_coverage() {\n echo -e \"${YELLOW}Running tests with coverage...${NC}\"\n python -m pytest . -v --cov=. --cov-report=term-missing --cov-report=html\n echo -e \"${GREEN}Coverage report generated in htmlcov/${NC}\"\n}\n\ncd \"$(dirname \"$0\")/..\"\n\ncase \"${1:-all}\" in\n unit)\n run_unit_tests\n ;;\n integration)\n run_integration_tests\n ;;\n all)\n run_all_tests\n ;;\n docker)\n run_docker_tests\n ;;\n coverage)\n run_coverage\n ;;\n *)\n usage\n ;;\nesac\n\necho -e \"${GREEN}Tests completed!${NC}\"\n","content_type":"application/x-sh; charset=utf-8","language":"bash","size":1774,"content_sha256":"95c1e1b5569b2c84234771f3bc8d98ef74dc53c26a6c25fba0ca3eba64dcb685"},{"filename":"benchmarks/notification_service/service.py","content":"\"\"\"Service layer for notification operations.\"\"\"\n\nimport uuid\nfrom typing import Optional, List\nfrom datetime import datetime\n\nfrom .models import (\n NotificationMessage,\n NotificationType,\n NotificationPriority,\n NotificationStatus,\n NotificationResult,\n EmailPayload,\n)\nfrom .redis_client import queue\nfrom .processor import processor\nfrom .logger import logger, log_notification_event\n\n\nclass NotificationService:\n \"\"\"High-level service for managing notifications.\"\"\"\n\n def submit_email(\n self,\n to: str,\n subject: str,\n body: str,\n html_body: Optional[str] = None,\n cc: Optional[List[str]] = None,\n bcc: Optional[List[str]] = None,\n priority: NotificationPriority = NotificationPriority.NORMAL,\n metadata: Optional[dict] = None,\n ) -> Optional[str]:\n \"\"\"Submit an email notification.\n\n Returns the notification ID if queued successfully, None otherwise.\n \"\"\"\n notification_id = str(uuid.uuid4())\n\n payload = {\n \"to\": to,\n \"subject\": subject,\n \"body\": body,\n }\n if html_body:\n payload[\"html_body\"] = html_body\n if cc:\n payload[\"cc\"] = cc\n if bcc:\n payload[\"bcc\"] = bcc\n\n message = NotificationMessage(\n id=notification_id,\n type=NotificationType.EMAIL,\n priority=priority,\n payload=payload,\n metadata=metadata,\n )\n\n if queue.enqueue(message):\n log_notification_event(\"NOTIFICATION_SUBMITTED\", notification_id, {\n \"type\": \"email\",\n \"priority\": priority,\n \"to\": to,\n })\n return notification_id\n\n logger.error(f\"Failed to queue notification: {notification_id}\")\n return None\n\n def submit_notification(self, message: NotificationMessage) -> bool:\n \"\"\"Submit a pre-built notification message.\n\n Returns True if queued successfully.\n \"\"\"\n if queue.enqueue(message):\n log_notification_event(\"NOTIFICATION_SUBMITTED\", message.id, {\n \"type\": message.type,\n \"priority\": message.priority,\n })\n return True\n return False\n\n def get_queue_status(self) -> dict:\n \"\"\"Get current queue status.\"\"\"\n connected = queue.is_connected()\n length = queue.queue_length() if connected else -1\n\n return {\n \"connected\": connected,\n \"queue_length\": length,\n \"circuit_breaker_state\": queue._circuit_breaker.state,\n }\n\n def get_processor_stats(self) -> dict:\n \"\"\"Get processor statistics.\"\"\"\n return {\n **processor.stats,\n \"queue_length\": queue.queue_length(),\n }\n\n def is_healthy(self) -> bool:\n \"\"\"Check if the service is healthy.\"\"\"\n return queue.is_connected()\n\n def process_sync(self, message: NotificationMessage) -> NotificationResult:\n \"\"\"Process a notification synchronously (bypass queue).\n\n Useful for testing or immediate processing requirements.\n \"\"\"\n return processor.process_one(message)\n\n\n# Global service instance\nnotification_service = NotificationService()\n","content_type":"text/x-python; charset=utf-8","language":"python","size":3300,"content_sha256":"26a14a75b27824a521ce99e79d3c7f533f8a8a7960fbfb5e93ec86badd4afc36"},{"filename":"benchmarks/notification_service/test_integration.py","content":"\"\"\"Integration tests for the Notification Service.\n\nThese tests require a running Redis instance.\nRun with: docker-compose -f docker-compose.test.yml run test-runner\n\"\"\"\n\nimport os\nimport time\nimport pytest\nfrom fastapi.testclient import TestClient\n\n# Skip integration tests if Redis is not available\nREDIS_HOST = os.getenv(\"NOTIF_REDIS_HOST\", \"localhost\")\nREDIS_PORT = int(os.getenv(\"NOTIF_REDIS_PORT\", \"6379\"))\n\n\ndef redis_available():\n \"\"\"Check if Redis is available.\"\"\"\n try:\n import redis\n client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, socket_timeout=1)\n client.ping()\n return True\n except Exception:\n return False\n\n\npytestmark = pytest.mark.skipif(\n not redis_available(),\n reason=\"Redis not available\"\n)\n\n\[email protected](scope=\"module\")\ndef integration_client():\n \"\"\"Create test client with real Redis connection.\"\"\"\n from notification_service.main import app\n with TestClient(app) as client:\n yield client\n\n\[email protected](autouse=True)\ndef clear_queue():\n \"\"\"Clear the notification queue before each test.\"\"\"\n import redis\n client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT)\n client.delete(\"notifications\")\n yield\n client.delete(\"notifications\")\n\n\nclass TestHealthIntegration:\n \"\"\"Integration tests for health endpoint.\"\"\"\n\n def test_health_with_redis(self, integration_client):\n \"\"\"Health check should report healthy when Redis is connected.\"\"\"\n response = integration_client.get(\"/health\")\n assert response.status_code == 200\n data = response.json()\n assert data[\"status\"] == \"healthy\"\n assert data[\"redis_connected\"] is True\n\n\nclass TestNotificationFlow:\n \"\"\"Integration tests for the full notification flow.\"\"\"\n\n def test_submit_and_check_queue(self, integration_client):\n \"\"\"Submit notification and verify it's in the queue.\"\"\"\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": \"Integration Test\",\n \"body\": \"Testing the full notification flow\",\n }\n\n # Submit notification\n response = integration_client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 202\n notification_id = response.json()[\"notification_id\"]\n\n # Check queue has the message\n response = integration_client.get(\"/health\")\n assert response.json()[\"queue_length\"] >= 1\n\n def test_submit_multiple_notifications(self, integration_client):\n \"\"\"Submit multiple notifications and verify queue length.\"\"\"\n for i in range(5):\n email_data = {\n \"to\": f\"test{i}@example.com\",\n \"subject\": f\"Test {i}\",\n \"body\": f\"Body {i}\",\n }\n response = integration_client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 202\n\n # Check queue has all messages\n response = integration_client.get(\"/health\")\n assert response.json()[\"queue_length\"] >= 5\n\n def test_notification_with_priority(self, integration_client):\n \"\"\"Test submitting notifications with different priorities.\"\"\"\n priorities = [\"low\", \"normal\", \"high\", \"urgent\"]\n\n for priority in priorities:\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": f\"Priority: {priority}\",\n \"body\": \"Testing priority\",\n }\n response = integration_client.post(\n f\"/notifications/email?priority={priority}\",\n json=email_data\n )\n assert response.status_code == 202\n\n def test_raw_notification_submission(self, integration_client):\n \"\"\"Test submitting raw notification message.\"\"\"\n notification = {\n \"id\": \"integration-raw-001\",\n \"type\": \"email\",\n \"priority\": \"high\",\n \"payload\": {\n \"to\": \"[email protected]\",\n \"subject\": \"Raw Notification\",\n \"body\": \"Submitted via raw endpoint\",\n },\n }\n\n response = integration_client.post(\"/notifications\", json=notification)\n assert response.status_code == 202\n assert response.json()[\"notification_id\"] == \"integration-raw-001\"\n\n\nclass TestStatsIntegration:\n \"\"\"Integration tests for stats endpoint.\"\"\"\n\n def test_stats_endpoint(self, integration_client):\n \"\"\"Stats endpoint should return processor stats.\"\"\"\n response = integration_client.get(\"/stats\")\n assert response.status_code == 200\n data = response.json()\n assert \"processor\" in data\n assert \"queue_length\" in data\n assert isinstance(data[\"processor\"][\"processed\"], int)\n assert isinstance(data[\"processor\"][\"failed\"], int)\n\n\nclass TestErrorHandling:\n \"\"\"Integration tests for error handling.\"\"\"\n\n def test_invalid_email_format(self, integration_client):\n \"\"\"Invalid email should return validation error.\"\"\"\n email_data = {\n \"to\": \"not-an-email\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n }\n response = integration_client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 422\n\n def test_missing_required_fields(self, integration_client):\n \"\"\"Missing required fields should return validation error.\"\"\"\n email_data = {\n \"to\": \"[email protected]\",\n # missing subject and body\n }\n response = integration_client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 422\n\n def test_empty_subject(self, integration_client):\n \"\"\"Empty subject should return validation error.\"\"\"\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": \"\",\n \"body\": \"Body\",\n }\n response = integration_client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 422\n","content_type":"text/x-python; charset=utf-8","language":"python","size":6022,"content_sha256":"52c7ee4e7bd41c86224b9fa0123f2f339b4c18726638e6529799c5354b179e61"},{"filename":"benchmarks/notification_service/test_service.py","content":"\"\"\"Tests for the Notification Service.\"\"\"\n\nimport pytest\nfrom datetime import datetime\nfrom unittest.mock import patch, MagicMock, AsyncMock\nfrom fastapi.testclient import TestClient\n\nfrom notification_service.main import app\nfrom notification_service.models import (\n NotificationMessage,\n NotificationType,\n NotificationPriority,\n NotificationStatus,\n EmailPayload,\n NotificationResult,\n HealthStatus,\n)\nfrom notification_service.handlers import EmailHandler, get_handler, HANDLERS\nfrom notification_service.processor import NotificationProcessor\nfrom notification_service.redis_client import RedisQueue\nfrom notification_service.config import Settings\n\n\[email protected]\ndef client():\n \"\"\"Create a test client.\"\"\"\n return TestClient(app)\n\n\[email protected]\ndef mock_redis():\n \"\"\"Mock Redis queue.\"\"\"\n with patch(\"notification_service.main.queue\") as mock_queue:\n mock_queue.is_connected.return_value = True\n mock_queue.queue_length.return_value = 5\n mock_queue.enqueue.return_value = True\n yield mock_queue\n\n\[email protected]\ndef sample_email_payload():\n \"\"\"Create a sample email payload.\"\"\"\n return {\n \"to\": \"[email protected]\",\n \"subject\": \"Test Subject\",\n \"body\": \"Test body content\",\n }\n\n\[email protected]\ndef sample_notification_message():\n \"\"\"Create a sample notification message.\"\"\"\n return NotificationMessage(\n id=\"test-notification-123\",\n type=NotificationType.EMAIL,\n priority=NotificationPriority.NORMAL,\n payload={\n \"to\": \"[email protected]\",\n \"subject\": \"Test Subject\",\n \"body\": \"Test body content\",\n },\n )\n\n\nclass TestHealthEndpoint:\n \"\"\"Tests for health check endpoint.\"\"\"\n\n def test_health_check_healthy(self, client, mock_redis):\n response = client.get(\"/health\")\n assert response.status_code == 200\n data = response.json()\n assert data[\"status\"] == \"healthy\"\n assert data[\"redis_connected\"] is True\n assert data[\"service\"] == \"notification-service\"\n\n def test_health_check_degraded(self, client):\n with patch(\"notification_service.main.queue\") as mock_queue:\n mock_queue.is_connected.return_value = False\n mock_queue.queue_length.return_value = -1\n response = client.get(\"/health\")\n assert response.status_code == 200\n data = response.json()\n assert data[\"status\"] == \"degraded\"\n\n\nclass TestRootEndpoint:\n \"\"\"Tests for root endpoint.\"\"\"\n\n def test_root(self, client):\n response = client.get(\"/\")\n assert response.status_code == 200\n data = response.json()\n assert data[\"service\"] == \"notification-service\"\n assert data[\"status\"] == \"running\"\n\n\nclass TestEmailNotificationEndpoint:\n \"\"\"Tests for email notification submission.\"\"\"\n\n def test_submit_email_notification(self, client, mock_redis):\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": \"Test Subject\",\n \"body\": \"Test body content\",\n }\n response = client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 202\n data = response.json()\n assert \"notification_id\" in data\n assert data[\"status\"] == \"queued\"\n assert data[\"type\"] == \"email\"\n\n def test_submit_email_invalid(self, client, mock_redis):\n email_data = {\n \"to\": \"invalid-email\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n }\n response = client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 422\n\n def test_submit_email_redis_unavailable(self, client):\n with patch(\"notification_service.main.queue\") as mock_queue:\n mock_queue.enqueue.return_value = False\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n }\n response = client.post(\"/notifications/email\", json=email_data)\n assert response.status_code == 503\n\n\nclass TestEmailHandler:\n \"\"\"Tests for email notification handler.\"\"\"\n\n def test_validate_payload_valid(self):\n handler = EmailHandler()\n payload = {\n \"to\": \"[email protected]\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n }\n assert handler.validate_payload(payload) is True\n\n def test_validate_payload_invalid(self):\n handler = EmailHandler()\n payload = {\"to\": \"invalid\", \"subject\": \"Test\"}\n assert handler.validate_payload(payload) is False\n\n def test_handle_email(self):\n handler = EmailHandler()\n message = NotificationMessage(\n id=\"test-123\",\n type=NotificationType.EMAIL,\n payload={\n \"to\": \"[email protected]\",\n \"subject\": \"Test Subject\",\n \"body\": \"Test body\",\n },\n )\n result = handler.handle(message)\n assert result.notification_id == \"test-123\"\n assert result.status == NotificationStatus.SENT\n\n\nclass TestGetHandler:\n \"\"\"Tests for handler registry.\"\"\"\n\n def test_get_email_handler(self):\n handler = get_handler(NotificationType.EMAIL)\n assert isinstance(handler, EmailHandler)\n\n\nclass TestNotificationProcessor:\n \"\"\"Tests for notification processor.\"\"\"\n\n def test_process_one(self):\n processor = NotificationProcessor()\n message = NotificationMessage(\n id=\"proc-test-123\",\n type=NotificationType.EMAIL,\n payload={\n \"to\": \"[email protected]\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n },\n )\n result = processor.process_one(message)\n assert result.status == NotificationStatus.SENT\n assert processor.stats[\"processed\"] == 1\n\n def test_stats(self):\n processor = NotificationProcessor()\n assert processor.stats[\"processed\"] == 0\n assert processor.stats[\"failed\"] == 0\n assert processor.stats[\"running\"] is False\n\n def test_process_invalid_payload(self):\n processor = NotificationProcessor()\n message = NotificationMessage(\n id=\"invalid-payload-test\",\n type=NotificationType.EMAIL,\n payload={\"invalid\": \"data\"},\n )\n result = processor.process_one(message)\n assert result.status == NotificationStatus.FAILED\n assert processor.stats[\"failed\"] == 1\n\n def test_result_callback(self):\n processor = NotificationProcessor()\n results = []\n processor.set_result_callback(lambda r: results.append(r))\n\n message = NotificationMessage(\n id=\"callback-test\",\n type=NotificationType.EMAIL,\n payload={\n \"to\": \"[email protected]\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n },\n )\n processor.process_one(message)\n assert len(results) == 1\n assert results[0].notification_id == \"callback-test\"\n\n\nclass TestModels:\n \"\"\"Tests for data models.\"\"\"\n\n def test_email_payload_valid(self):\n payload = EmailPayload(\n to=\"[email protected]\",\n subject=\"Test\",\n body=\"Body\",\n )\n assert payload.to == \"[email protected]\"\n\n def test_email_payload_with_optional_fields(self):\n payload = EmailPayload(\n to=\"[email protected]\",\n subject=\"Test\",\n body=\"Body\",\n html_body=\"\u003cp>HTML Body\u003c/p>\",\n cc=[\"[email protected]\"],\n bcc=[\"[email protected]\"],\n )\n assert payload.html_body == \"\u003cp>HTML Body\u003c/p>\"\n assert len(payload.cc) == 1\n assert len(payload.bcc) == 1\n\n def test_notification_message_defaults(self):\n msg = NotificationMessage(\n id=\"test-id\",\n type=NotificationType.EMAIL,\n payload={\"to\": \"[email protected]\"},\n )\n assert msg.priority == NotificationPriority.NORMAL\n assert msg.retry_count == 0\n assert msg.max_retries == 3\n assert isinstance(msg.created_at, datetime)\n\n def test_notification_result(self):\n result = NotificationResult(\n notification_id=\"test-id\",\n status=NotificationStatus.SENT,\n )\n assert result.error_message is None\n assert isinstance(result.processed_at, datetime)\n\n def test_health_status(self):\n health = HealthStatus(\n status=\"healthy\",\n redis_connected=True,\n queue_length=10,\n )\n assert health.service == \"notification-service\"\n assert health.version == \"1.0.0\"\n\n\nclass TestNotificationSubmission:\n \"\"\"Tests for notification submission endpoints.\"\"\"\n\n def test_submit_raw_notification(self, client, mock_redis):\n notification = {\n \"id\": \"raw-test-123\",\n \"type\": \"email\",\n \"payload\": {\n \"to\": \"[email protected]\",\n \"subject\": \"Test\",\n \"body\": \"Body\",\n },\n }\n response = client.post(\"/notifications\", json=notification)\n assert response.status_code == 202\n data = response.json()\n assert data[\"notification_id\"] == \"raw-test-123\"\n\n def test_submit_email_with_priority(self, client, mock_redis):\n email_data = {\n \"to\": \"[email protected]\",\n \"subject\": \"Urgent Test\",\n \"body\": \"Urgent body content\",\n }\n response = client.post(\n \"/notifications/email?priority=urgent\", json=email_data\n )\n assert response.status_code == 202\n\n\nclass TestStatsEndpoint:\n \"\"\"Tests for stats endpoint.\"\"\"\n\n def test_get_stats(self, client, mock_redis):\n with patch(\"notification_service.main.processor\") as mock_processor:\n mock_processor.stats = {\n \"processed\": 10,\n \"failed\": 2,\n \"running\": True,\n }\n response = client.get(\"/stats\")\n assert response.status_code == 200\n data = response.json()\n assert \"processor\" in data\n assert \"queue_length\" in data\n\n\nclass TestRedisQueue:\n \"\"\"Tests for Redis queue operations.\"\"\"\n\n def test_queue_initialization(self):\n queue = RedisQueue()\n assert queue._client is None\n\n @patch(\"notification_service.redis_client.redis.Redis\")\n def test_enqueue_message(self, mock_redis_class):\n mock_client = MagicMock()\n mock_redis_class.return_value = mock_client\n\n queue = RedisQueue()\n message = NotificationMessage(\n id=\"queue-test\",\n type=NotificationType.EMAIL,\n payload={\"to\": \"[email protected]\", \"subject\": \"Test\", \"body\": \"Body\"},\n )\n result = queue.enqueue(message)\n assert mock_client.lpush.called\n\n @patch(\"notification_service.redis_client.redis.Redis\")\n def test_dequeue_message(self, mock_redis_class):\n mock_client = MagicMock()\n mock_client.rpop.return_value = '{\"id\": \"test\", \"type\": \"email\", \"payload\": {\"to\": \"[email protected]\", \"subject\": \"s\", \"body\": \"b\"}, \"priority\": \"normal\", \"retry_count\": 0, \"max_retries\": 3}'\n mock_redis_class.return_value = mock_client\n\n queue = RedisQueue()\n message = queue.dequeue()\n assert message is not None\n assert message.id == \"test\"\n\n @patch(\"notification_service.redis_client.redis.Redis\")\n def test_queue_length(self, mock_redis_class):\n mock_client = MagicMock()\n mock_client.llen.return_value = 5\n mock_redis_class.return_value = mock_client\n\n queue = RedisQueue()\n assert queue.queue_length() == 5\n\n @patch(\"notification_service.redis_client.redis.Redis\")\n def test_requeue_increments_retry(self, mock_redis_class):\n mock_client = MagicMock()\n mock_redis_class.return_value = mock_client\n\n queue = RedisQueue()\n message = NotificationMessage(\n id=\"retry-test\",\n type=NotificationType.EMAIL,\n payload={\"to\": \"[email protected]\", \"subject\": \"Test\", \"body\": \"Body\"},\n retry_count=1,\n )\n queue.requeue(message)\n assert message.retry_count == 2\n\n\nclass TestConfig:\n \"\"\"Tests for configuration.\"\"\"\n\n def test_default_settings(self):\n settings = Settings()\n assert settings.redis_host == \"localhost\"\n assert settings.redis_port == 6379\n assert settings.poll_interval == 1.0\n assert settings.max_retries == 3\n\n @patch.dict(\"os.environ\", {\"NOTIF_REDIS_HOST\": \"redis-server\", \"NOTIF_REDIS_PORT\": \"6380\"})\n def test_settings_from_env(self):\n settings = Settings()\n assert settings.redis_host == \"redis-server\"\n assert settings.redis_port == 6380\n\n\nclass TestCircuitBreaker:\n \"\"\"Tests for circuit breaker functionality.\"\"\"\n\n def test_circuit_starts_closed(self):\n from notification_service.redis_client import CircuitBreaker\n\n cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)\n assert cb.state == \"closed\"\n assert cb.can_execute() is True\n\n def test_circuit_opens_after_threshold(self):\n from notification_service.redis_client import CircuitBreaker\n\n cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)\n cb.record_failure()\n cb.record_failure()\n assert cb.state == \"closed\"\n cb.record_failure()\n assert cb.state == \"open\"\n assert cb.can_execute() is False\n\n def test_circuit_resets_on_success(self):\n from notification_service.redis_client import CircuitBreaker\n\n cb = CircuitBreaker(failure_threshold=3, recovery_timeout=10.0)\n cb.record_failure()\n cb.record_failure()\n assert cb.failure_count == 2\n cb.record_success()\n assert cb.failure_count == 0\n assert cb.state == \"closed\"\n\n def test_circuit_half_open_after_timeout(self):\n from notification_service.redis_client import CircuitBreaker\n import time\n\n cb = CircuitBreaker(failure_threshold=2, recovery_timeout=0.1)\n cb.record_failure()\n cb.record_failure()\n assert cb.state == \"open\"\n time.sleep(0.15)\n assert cb.can_execute() is True\n assert cb.state == \"half-open\"\n\n def test_circuit_reopens_on_failure_in_half_open(self):\n from notification_service.redis_client import CircuitBreaker\n import time\n\n cb = CircuitBreaker(failure_threshold=2, recovery_timeout=0.1)\n cb.record_failure()\n cb.record_failure()\n time.sleep(0.15)\n cb.can_execute() # Transitions to half-open\n cb.record_failure()\n assert cb.state == \"open\"\n\n\nclass TestLogger:\n \"\"\"Tests for logging functionality.\"\"\"\n\n def test_setup_logger(self):\n from notification_service.logger import setup_logger\n import logging\n\n logger = setup_logger(\"test_logger\")\n assert isinstance(logger, logging.Logger)\n assert logger.name == \"test_logger\"\n\n def test_log_notification_event(self):\n from notification_service.logger import log_notification_event\n\n # Should not raise\n log_notification_event(\"TEST_EVENT\", \"test-123\", {\"key\": \"value\"})\n log_notification_event(\"TEST_EVENT_NO_DETAILS\", \"test-456\")\n\n def test_log_error(self):\n from notification_service.logger import log_error\n\n # Should not raise\n log_error(\"Test error message\")\n log_error(\"Error with ID\", notification_id=\"test-123\")\n log_error(\"Error with exception\", error=ValueError(\"test error\"))\n\n\nclass TestHandlerRegistry:\n \"\"\"Tests for notification handler registry.\"\"\"\n\n def test_email_handler_registered(self):\n assert NotificationType.EMAIL in HANDLERS\n assert HANDLERS[NotificationType.EMAIL] == EmailHandler\n\n def test_get_handler_unknown_type(self):\n with pytest.raises(ValueError, match=\"No handler for notification type\"):\n # Temporarily mock an unknown type\n get_handler(\"unknown\")\n\n\nclass TestNotificationService:\n \"\"\"Tests for the high-level notification service.\"\"\"\n\n @patch(\"notification_service.service.queue\")\n def test_submit_email(self, mock_queue):\n \"\"\"Test submitting an email via service layer.\"\"\"\n from notification_service.service import NotificationService\n\n mock_queue.enqueue.return_value = True\n\n service = NotificationService()\n notification_id = service.submit_email(\n to=\"[email protected]\",\n subject=\"Service Test\",\n body=\"Testing service layer\",\n )\n\n assert notification_id is not None\n assert mock_queue.enqueue.called\n call_args = mock_queue.enqueue.call_args[0][0]\n assert call_args.type == NotificationType.EMAIL\n assert call_args.payload[\"to\"] == \"[email protected]\"\n\n @patch(\"notification_service.service.queue\")\n def test_submit_email_with_options(self, mock_queue):\n \"\"\"Test email submission with all options.\"\"\"\n from notification_service.service import NotificationService\n\n mock_queue.enqueue.return_value = True\n\n service = NotificationService()\n notification_id = service.submit_email(\n to=\"[email protected]\",\n subject=\"Full Options\",\n body=\"Plain body\",\n html_body=\"\u003cp>HTML body\u003c/p>\",\n cc=[\"[email protected]\"],\n bcc=[\"[email protected]\"],\n priority=NotificationPriority.HIGH,\n metadata={\"source\": \"test\"},\n )\n\n assert notification_id is not None\n call_args = mock_queue.enqueue.call_args[0][0]\n assert call_args.priority == NotificationPriority.HIGH\n assert call_args.payload[\"html_body\"] == \"\u003cp>HTML body\u003c/p>\"\n assert call_args.metadata == {\"source\": \"test\"}\n\n @patch(\"notification_service.service.queue\")\n def test_submit_email_failure(self, mock_queue):\n \"\"\"Test email submission when queue fails.\"\"\"\n from notification_service.service import NotificationService\n\n mock_queue.enqueue.return_value = False\n\n service = NotificationService()\n notification_id = service.submit_email(\n to=\"[email protected]\",\n subject=\"Fail Test\",\n body=\"Should fail\",\n )\n\n assert notification_id is None\n\n @patch(\"notification_service.service.queue\")\n def test_get_queue_status(self, mock_queue):\n \"\"\"Test getting queue status.\"\"\"\n from notification_service.service import NotificationService\n\n mock_queue.is_connected.return_value = True\n mock_queue.queue_length.return_value = 10\n mock_queue._circuit_breaker.state = \"closed\"\n\n service = NotificationService()\n status = service.get_queue_status()\n\n assert status[\"connected\"] is True\n assert status[\"queue_length\"] == 10\n assert status[\"circuit_breaker_state\"] == \"closed\"\n\n @patch(\"notification_service.service.queue\")\n def test_is_healthy(self, mock_queue):\n \"\"\"Test health check via service.\"\"\"\n from notification_service.service import NotificationService\n\n mock_queue.is_connected.return_value = True\n\n service = NotificationService()\n assert service.is_healthy() is True\n\n mock_queue.is_connected.return_value = False\n assert service.is_healthy() is False\n\n @patch(\"notification_service.service.processor\")\n def test_process_sync(self, mock_processor):\n \"\"\"Test synchronous processing.\"\"\"\n from notification_service.service import NotificationService\n\n mock_result = NotificationResult(\n notification_id=\"sync-test\",\n status=NotificationStatus.SENT,\n )\n mock_processor.process_one.return_value = mock_result\n\n service = NotificationService()\n message = NotificationMessage(\n id=\"sync-test\",\n type=NotificationType.EMAIL,\n payload={\"to\": \"[email protected]\", \"subject\": \"Sync\", \"body\": \"Test\"},\n )\n\n result = service.process_sync(message)\n assert result.status == NotificationStatus.SENT\n mock_processor.process_one.assert_called_once_with(message)\n","content_type":"text/x-python; charset=utf-8","language":"python","size":20381,"content_sha256":"14c99b93a41748e5c24b37aff4a73f9a3d3b13d8de89054de3c1266d9bf8c1c0"},{"filename":"benchmarks/pipeline/__init__.py","content":"\"\"\"\nData Pipeline Package\n\nComplete ETL pipeline for benchmark data:\n1. Read JSON records from files\n2. Validate schema\n3. Transform data (normalize fields, calculate derived values)\n4. Output to CSV\n\nUsage:\n from pipeline import run_pipeline, DataPipeline, PipelineConfig\n\n # Simple usage\n result = run_pipeline(\"input.json\", \"output.csv\")\n print(result.summary())\n\n # Advanced usage\n config = PipelineConfig(\n input_path=\"input.json\",\n output_path=\"output.csv\",\n error_policy=ErrorPolicy.SKIP,\n )\n pipeline = DataPipeline(config)\n result = pipeline.run()\n\"\"\"\n\nfrom .pipeline import (\n DataPipeline,\n PipelineConfig,\n PipelineResult,\n run_pipeline,\n validate_file,\n)\nfrom .transform import (\n BatchTransformResult,\n BenchmarkTransformer,\n ErrorPolicy,\n TransformResult,\n transform_and_export,\n transform_file,\n)\nfrom .validator import (\n DataValidator,\n ValidationResult,\n ValidationSeverity,\n)\n\n__all__ = [\n # Pipeline\n \"DataPipeline\",\n \"PipelineConfig\",\n \"PipelineResult\",\n \"run_pipeline\",\n \"validate_file\",\n # Transform\n \"BatchTransformResult\",\n \"BenchmarkTransformer\",\n \"ErrorPolicy\",\n \"TransformResult\",\n \"transform_and_export\",\n \"transform_file\",\n # Validator\n \"DataValidator\",\n \"ValidationResult\",\n \"ValidationSeverity\",\n]\n","content_type":"text/x-python; charset=utf-8","language":"python","size":1372,"content_sha256":"3899f0e7ff0a093b60f7912a59f62005ebf560ad0880721d14288c603c4d0172"},{"filename":"benchmarks/pipeline/pipeline.py","content":"\"\"\"\nData Pipeline Orchestrator\n\nCoordinates the complete ETL pipeline:\n1. Read JSON records from file\n2. Validate schema\n3. Transform data (normalize fields, calculate derived values)\n4. Output to CSV\n\nHandles errors gracefully with configurable policies.\n\"\"\"\n\nimport csv\nimport json\nimport logging\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom pathlib import Path\nfrom typing import Any, Iterator\n\nfrom .transform import (\n BatchTransformResult,\n BenchmarkTransformer,\n ErrorPolicy,\n TransformResult,\n flatten_record,\n records_to_csv_rows,\n)\nfrom .validator import DataValidator, ValidationResult, ValidationSeverity\n\nlogger = logging.getLogger(__name__)\n\n\n# =============================================================================\n# Pipeline Configuration\n# =============================================================================\n\n\n@dataclass\nclass PipelineConfig:\n \"\"\"Configuration for the data pipeline.\"\"\"\n\n # Input settings\n input_path: Path | str\n\n # Output settings\n output_path: Path | str | None = None\n output_format: str = \"csv\" # \"csv\" or \"json\"\n\n # Error handling\n error_policy: ErrorPolicy = ErrorPolicy.COLLECT\n max_errors: int = 100 # Stop after this many errors\n\n # Validation\n strict_schema: bool = True\n schema_name: str = \"benchmark_result\"\n\n # Processing\n batch_size: int = 1000\n include_aggregates: bool = True\n\n # Logging\n log_level: str = \"INFO\"\n error_log_path: Path | str | None = None\n\n def __post_init__(self):\n self.input_path = Path(self.input_path)\n if self.output_path:\n self.output_path = Path(self.output_path)\n if self.error_log_path:\n self.error_log_path = Path(self.error_log_path)\n\n\n# =============================================================================\n# Pipeline Results\n# =============================================================================\n\n\n@dataclass\nclass PipelineResult:\n \"\"\"Complete result of a pipeline run.\"\"\"\n\n success: bool\n input_path: Path\n output_path: Path | None = None\n\n # Stats\n records_read: int = 0\n records_valid: int = 0\n records_transformed: int = 0\n records_written: int = 0\n\n # Timing\n start_time: datetime | None = None\n end_time: datetime | None = None\n\n # Errors\n validation_errors: list[dict[str, Any]] = field(default_factory=list)\n transform_errors: list[dict[str, Any]] = field(default_factory=list)\n io_errors: list[str] = field(default_factory=list)\n\n # Aggregates (if computed)\n aggregates: dict[str, Any] = field(default_factory=dict)\n\n @property\n def duration_seconds(self) -> float:\n if self.start_time and self.end_time:\n return (self.end_time - self.start_time).total_seconds()\n return 0.0\n\n @property\n def error_count(self) -> int:\n return len(self.validation_errors) + len(self.transform_errors) + len(self.io_errors)\n\n def summary(self) -> str:\n \"\"\"Generate a human-readable summary.\"\"\"\n status = \"SUCCESS\" if self.success else \"FAILED\"\n lines = [\n f\"Pipeline {status}\",\n f\" Input: {self.input_path}\",\n f\" Output: {self.output_path or 'None'}\",\n f\" Duration: {self.duration_seconds:.2f}s\",\n \"\",\n \"Records:\",\n f\" Read: {self.records_read}\",\n f\" Valid: {self.records_valid}\",\n f\" Transformed: {self.records_transformed}\",\n f\" Written: {self.records_written}\",\n ]\n\n if self.error_count > 0:\n lines.extend([\n \"\",\n f\"Errors: {self.error_count}\",\n f\" Validation: {len(self.validation_errors)}\",\n f\" Transform: {len(self.transform_errors)}\",\n f\" I/O: {len(self.io_errors)}\",\n ])\n\n return \"\\n\".join(lines)\n\n\n# =============================================================================\n# File I/O\n# =============================================================================\n\n\ndef read_json_file(path: Path) -> tuple[Any, str | None]:\n \"\"\"\n Read and parse a JSON file.\n\n Returns:\n Tuple of (data, error_message)\n \"\"\"\n try:\n with open(path, \"r\", encoding=\"utf-8\") as f:\n return json.load(f), None\n except FileNotFoundError:\n return None, f\"File not found: {path}\"\n except json.JSONDecodeError as e:\n return None, f\"Invalid JSON at line {e.lineno}: {e.msg}\"\n except PermissionError:\n return None, f\"Permission denied: {path}\"\n except Exception as e:\n return None, f\"Error reading file: {e}\"\n\n\ndef read_json_lines(path: Path, batch_size: int = 1000) -> Iterator[list[dict]]:\n \"\"\"\n Read JSON lines format (one JSON object per line) in batches.\n\n Yields batches of records.\n \"\"\"\n batch = []\n try:\n with open(path, \"r\", encoding=\"utf-8\") as f:\n for line_num, line in enumerate(f, 1):\n line = line.strip()\n if not line:\n continue\n try:\n record = json.loads(line)\n batch.append(record)\n if len(batch) >= batch_size:\n yield batch\n batch = []\n except json.JSONDecodeError as e:\n logger.warning(f\"Skipping invalid JSON at line {line_num}: {e}\")\n\n if batch:\n yield batch\n except Exception as e:\n logger.error(f\"Error reading file: {e}\")\n raise\n\n\ndef write_csv(path: Path, headers: list[str], rows: list[list[Any]]) -> str | None:\n \"\"\"\n Write data to CSV file.\n\n Returns error message if failed, None on success.\n \"\"\"\n try:\n path.parent.mkdir(parents=True, exist_ok=True)\n with open(path, \"w\", newline=\"\", encoding=\"utf-8\") as f:\n writer = csv.writer(f)\n writer.writerow(headers)\n writer.writerows(rows)\n return None\n except PermissionError:\n return f\"Permission denied: {path}\"\n except Exception as e:\n return f\"Error writing CSV: {e}\"\n\n\ndef write_json(path: Path, data: Any, pretty: bool = True) -> str | None:\n \"\"\"\n Write data to JSON file.\n\n Returns error message if failed, None on success.\n \"\"\"\n try:\n path.parent.mkdir(parents=True, exist_ok=True)\n with open(path, \"w\", encoding=\"utf-8\") as f:\n if pretty:\n json.dump(data, f, indent=2, default=str)\n else:\n json.dump(data, f, default=str)\n return None\n except Exception as e:\n return f\"Error writing JSON: {e}\"\n\n\n# =============================================================================\n# Pipeline Implementation\n# =============================================================================\n\n\nclass DataPipeline:\n \"\"\"\n Main data pipeline that orchestrates the ETL process.\n\n Usage:\n config = PipelineConfig(\n input_path=\"data/input.json\",\n output_path=\"data/output.csv\"\n )\n pipeline = DataPipeline(config)\n result = pipeline.run()\n\n if result.success:\n print(f\"Processed {result.records_written} records\")\n else:\n for error in result.validation_errors:\n print(f\"Error: {error}\")\n \"\"\"\n\n def __init__(self, config: PipelineConfig):\n self.config = config\n self.validator = DataValidator()\n self.transformer = BenchmarkTransformer(error_policy=config.error_policy)\n\n # Set up logging\n logging.basicConfig(\n level=getattr(logging, config.log_level.upper()),\n format=\"%(asctime)s - %(name)s - %(levelname)s - %(message)s\"\n )\n\n def run(self) -> PipelineResult:\n \"\"\"Execute the complete pipeline.\"\"\"\n result = PipelineResult(\n success=False,\n input_path=self.config.input_path,\n output_path=self.config.output_path,\n start_time=datetime.now()\n )\n\n logger.info(f\"Starting pipeline: {self.config.input_path}\")\n\n # Step 1: Read input\n data, error = self._read_input()\n if error:\n result.io_errors.append(error)\n result.end_time = datetime.now()\n return result\n\n # Determine record type and extract records\n records = self._extract_records(data)\n result.records_read = len(records)\n logger.info(f\"Read {result.records_read} records\")\n\n # Step 2: Validate\n valid_records, validation_errors = self._validate_records(records)\n result.records_valid = len(valid_records)\n result.validation_errors = validation_errors\n\n if validation_errors:\n logger.warning(f\"Found {len(validation_errors)} validation errors\")\n if len(validation_errors) >= self.config.max_errors:\n logger.error(\"Max errors reached, stopping pipeline\")\n result.end_time = datetime.now()\n return result\n\n # Step 3: Transform\n transformed_records, transform_errors = self._transform_records(valid_records)\n result.records_transformed = len(transformed_records)\n result.transform_errors = transform_errors\n\n # Calculate aggregates if requested\n if self.config.include_aggregates and transformed_records:\n result.aggregates = self._calculate_aggregates(transformed_records)\n\n # Step 4: Write output\n if self.config.output_path and transformed_records:\n write_error = self._write_output(transformed_records, result.aggregates)\n if write_error:\n result.io_errors.append(write_error)\n else:\n result.records_written = len(transformed_records)\n logger.info(f\"Wrote {result.records_written} records to {self.config.output_path}\")\n\n # Write error log if configured\n if self.config.error_log_path and result.error_count > 0:\n self._write_error_log(result)\n\n result.success = result.error_count == 0 or (\n result.records_written > 0 and\n self.config.error_policy != ErrorPolicy.FAIL_FAST\n )\n result.end_time = datetime.now()\n\n logger.info(result.summary())\n return result\n\n def _read_input(self) -> tuple[Any, str | None]:\n \"\"\"Read the input file.\"\"\"\n if not self.config.input_path.exists():\n return None, f\"Input file not found: {self.config.input_path}\"\n\n suffix = self.config.input_path.suffix.lower()\n\n if suffix == \".jsonl\":\n # JSON lines format - read all into memory for now\n records = []\n try:\n for batch in read_json_lines(self.config.input_path, self.config.batch_size):\n records.extend(batch)\n return records, None\n except Exception as e:\n return None, str(e)\n else:\n # Regular JSON\n return read_json_file(self.config.input_path)\n\n def _extract_records(self, data: Any) -> list[dict[str, Any]]:\n \"\"\"Extract records from the input data structure.\"\"\"\n if isinstance(data, list):\n return data\n elif isinstance(data, dict):\n # Check for common wrapper patterns\n if \"results\" in data and isinstance(data[\"results\"], list):\n return data[\"results\"]\n elif \"data\" in data and isinstance(data[\"data\"], list):\n return data[\"data\"]\n elif \"records\" in data and isinstance(data[\"records\"], list):\n return data[\"records\"]\n else:\n # Single record\n return [data]\n else:\n return []\n\n def _validate_records(\n self, records: list[dict[str, Any]]\n ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:\n \"\"\"\n Validate all records against the schema.\n\n Returns:\n Tuple of (valid_records, error_details)\n \"\"\"\n valid_records = []\n errors = []\n\n for i, record in enumerate(records):\n validation_result = self.validator.validate(record, self.config.schema_name)\n\n if validation_result.valid:\n valid_records.append(record)\n else:\n for issue in validation_result.errors:\n errors.append({\n \"record_index\": i,\n \"field\": issue.field,\n \"message\": str(issue.message),\n \"value\": str(issue.value) if issue.value is not None else None,\n \"severity\": issue.severity.value,\n })\n\n if len(errors) >= self.config.max_errors:\n break\n\n # In SKIP mode, continue; otherwise stop\n if self.config.error_policy == ErrorPolicy.FAIL_FAST:\n break\n\n if len(errors) >= self.config.max_errors:\n break\n\n return valid_records, errors\n\n def _transform_records(\n self, records: list[dict[str, Any]]\n ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:\n \"\"\"\n Transform all records.\n\n Returns:\n Tuple of (transformed_records, error_details)\n \"\"\"\n batch_result = self.transformer.transform_batch(records, \"benchmark\")\n\n errors = []\n for idx, error in batch_result.errors:\n errors.append({\n \"record_index\": idx,\n \"type\": type(error).__name__,\n \"message\": str(error),\n \"field\": getattr(error, \"field\", None),\n })\n\n return batch_result.records, errors\n\n def _calculate_aggregates(\n self, records: list[dict[str, Any]]\n ) -> dict[str, Any]:\n \"\"\"Calculate aggregate statistics.\"\"\"\n from .transform import (\n aggregate_by_feature,\n aggregate_by_scale,\n calculate_success_rate,\n )\n\n return {\n \"total_records\": len(records),\n \"success_rate\": calculate_success_rate(records),\n \"by_feature\": aggregate_by_feature(records),\n \"by_scale\": aggregate_by_scale(records),\n \"total_duration_ms\": sum(r.get(\"duration_ms\", 0) for r in records),\n \"total_tokens\": sum(r.get(\"tokens_used\", 0) for r in records),\n }\n\n def _write_output(\n self, records: list[dict[str, Any]], aggregates: dict[str, Any]\n ) -> str | None:\n \"\"\"Write the output file.\"\"\"\n output_path = self.config.output_path\n\n if self.config.output_format == \"csv\":\n headers, rows = records_to_csv_rows(records)\n return write_csv(output_path, headers, rows)\n\n elif self.config.output_format == \"json\":\n output_data = {\n \"timestamp\": datetime.now().isoformat(),\n \"records\": records,\n }\n if aggregates:\n output_data[\"aggregates\"] = aggregates\n return write_json(output_path, output_data)\n\n else:\n return f\"Unknown output format: {self.config.output_format}\"\n\n def _write_error_log(self, result: PipelineResult) -> None:\n \"\"\"Write detailed error log.\"\"\"\n error_data = {\n \"timestamp\": datetime.now().isoformat(),\n \"input_path\": str(result.input_path),\n \"validation_errors\": result.validation_errors,\n \"transform_errors\": result.transform_errors,\n \"io_errors\": result.io_errors,\n }\n\n error = write_json(self.config.error_log_path, error_data)\n if error:\n logger.error(f\"Failed to write error log: {error}\")\n\n\n# =============================================================================\n# Convenience Functions\n# =============================================================================\n\n\ndef run_pipeline(\n input_path: str | Path,\n output_path: str | Path | None = None,\n output_format: str = \"csv\",\n error_policy: str = \"collect\",\n) -> PipelineResult:\n \"\"\"\n Run the data pipeline with sensible defaults.\n\n Args:\n input_path: Path to input JSON file\n output_path: Path for output file (optional)\n output_format: \"csv\" or \"json\"\n error_policy: \"fail_fast\", \"collect\", or \"skip\"\n\n Returns:\n PipelineResult with processing details\n\n Example:\n result = run_pipeline(\n \"data/benchmark.json\",\n \"data/output.csv\"\n )\n print(result.summary())\n \"\"\"\n policy_map = {\n \"fail_fast\": ErrorPolicy.FAIL_FAST,\n \"collect\": ErrorPolicy.COLLECT,\n \"skip\": ErrorPolicy.SKIP,\n }\n\n config = PipelineConfig(\n input_path=input_path,\n output_path=output_path,\n output_format=output_format,\n error_policy=policy_map.get(error_policy, ErrorPolicy.COLLECT),\n )\n\n pipeline = DataPipeline(config)\n return pipeline.run()\n\n\ndef validate_file(input_path: str | Path) -> ValidationResult:\n \"\"\"\n Validate a JSON file without transforming.\n\n Returns validation result with any issues found.\n \"\"\"\n path = Path(input_path)\n data, error = read_json_file(path)\n\n if error:\n from .validator import ValidationIssue\n return ValidationResult(\n valid=False,\n issues=[ValidationIssue(message=error, rule=\"io\")]\n )\n\n validator = DataValidator()\n\n # Auto-detect schema\n if isinstance(data, dict) and \"results\" in data:\n return validator.validate_core_benchmark(data)\n elif isinstance(data, dict) and \"run_id\" in data:\n return validator.validate_run_result(data)\n else:\n return validator.validate_benchmark_result(data)\n\n\n# =============================================================================\n# CLI Entry Point\n# =============================================================================\n\n\ndef main():\n \"\"\"Command-line interface for the pipeline.\"\"\"\n import argparse\n\n parser = argparse.ArgumentParser(\n description=\"Data pipeline for benchmark results\",\n formatter_class=argparse.RawDescriptionHelpFormatter,\n epilog=\"\"\"\nExamples:\n python -m pipeline.pipeline input.json -o output.csv\n python -m pipeline.pipeline input.json -o output.json --format json\n python -m pipeline.pipeline input.json --validate-only\n python -m pipeline.pipeline input.json -o output.csv --error-policy skip\n \"\"\"\n )\n\n parser.add_argument(\"input\", help=\"Input JSON file path\")\n parser.add_argument(\"-o\", \"--output\", help=\"Output file path\")\n parser.add_argument(\n \"--format\",\n choices=[\"csv\", \"json\"],\n default=\"csv\",\n help=\"Output format (default: csv)\"\n )\n parser.add_argument(\n \"--error-policy\",\n choices=[\"fail_fast\", \"collect\", \"skip\"],\n default=\"collect\",\n help=\"How to handle errors (default: collect)\"\n )\n parser.add_argument(\n \"--validate-only\",\n action=\"store_true\",\n help=\"Only validate, don't transform or output\"\n )\n parser.add_argument(\n \"--error-log\",\n help=\"Path to write error log\"\n )\n parser.add_argument(\n \"-v\", \"--verbose\",\n action=\"store_true\",\n help=\"Verbose output\"\n )\n\n args = parser.parse_args()\n\n if args.validate_only:\n result = validate_file(args.input)\n if result.valid:\n print(\"Validation: PASSED\")\n else:\n print(\"Validation: FAILED\")\n for issue in result.issues:\n print(f\" - {issue}\")\n return 0 if result.valid else 1\n\n config = PipelineConfig(\n input_path=args.input,\n output_path=args.output,\n output_format=args.format,\n error_policy=ErrorPolicy[args.error_policy.upper()],\n error_log_path=args.error_log,\n log_level=\"DEBUG\" if args.verbose else \"INFO\",\n )\n\n pipeline = DataPipeline(config)\n result = pipeline.run()\n\n print(result.summary())\n return 0 if result.success else 1\n\n\nif __name__ == \"__main__\":\n exit(main())\n","content_type":"text/x-python; charset=utf-8","language":"python","size":20268,"content_sha256":"7bee3248eaa81d7987cb6083c091f0f93724939a1e983da61fd30e39d7035e33"},{"filename":"benchmarks/pipeline/test_validator.py","content":"\"\"\"\nComprehensive tests for the data validation module.\n\nTests cover:\n- Schema validation (required fields, types, allowed values)\n- Field-level validation (range, length, pattern, custom validators)\n- Batch validation and error aggregation\n- Edge cases and error handling\n\"\"\"\n\nimport pytest\nfrom datetime import datetime\nfrom typing import Any\n\nfrom .validator import (\n DataValidator,\n FieldSchema,\n Schema,\n ValidationIssue,\n ValidationResult,\n ValidationSeverity,\n BENCHMARK_RESULT_SCHEMA,\n RUN_RESULT_SCHEMA,\n CORE_BENCHMARK_SCHEMA,\n is_iso_timestamp,\n is_uuid,\n validate_json_structure,\n validate_non_empty,\n validate_cross_field,\n)\n\n\n# =============================================================================\n# Test Fixtures\n# =============================================================================\n\n\[email protected]\ndef valid_benchmark_result() -> dict[str, Any]:\n \"\"\"A valid benchmark result record.\"\"\"\n return {\n \"feature\": \"remember_recall\",\n \"scale\": \"small\",\n \"operation\": \"write_all\",\n \"duration_ms\": 766.99,\n \"success\": True,\n \"items\": 10,\n \"tokens_used\": 150,\n \"details\": {\"avg_ms\": 76.7, \"total_writes\": 10},\n }\n\n\[email protected]\ndef valid_run_result() -> dict[str, Any]:\n \"\"\"A valid run result record.\"\"\"\n return {\n \"run_id\": \"run-001\",\n \"scenario_id\": \"sc-benchmark-01\",\n \"category\": \"performance\",\n \"start_time\": \"2026-03-23T10:00:00+00:00\",\n \"end_time\": \"2026-03-23T10:05:00+00:00\",\n \"config\": {\"timeout\": 300},\n \"events\": [],\n \"result\": {\"success\": True, \"metrics\": {}},\n }\n\n\[email protected]\ndef valid_core_benchmark() -> dict[str, Any]:\n \"\"\"A valid core benchmark record.\"\"\"\n return {\n \"timestamp\": \"2026-03-23T14:16:36+00:00\",\n \"scales\": {\"small\": 10, \"medium\": 100, \"large\": 500},\n \"results\": [\n {\n \"feature\": \"remember_recall\",\n \"scale\": \"small\",\n \"operation\": \"write_all\",\n \"duration_ms\": 766.99,\n \"success\": True,\n },\n {\n \"feature\": \"multi_agent\",\n \"scale\": \"medium\",\n \"operation\": \"coordinate\",\n \"duration_ms\": 1500.0,\n \"success\": True,\n },\n ],\n }\n\n\[email protected]\ndef validator() -> DataValidator:\n \"\"\"A DataValidator instance.\"\"\"\n return DataValidator()\n\n\n# =============================================================================\n# Test ValidationIssue\n# =============================================================================\n\n\nclass TestValidationIssue:\n def test_str_representation(self):\n issue = ValidationIssue(\n message=\"Field cannot be null\",\n severity=ValidationSeverity.ERROR,\n field=\"feature\",\n rule=\"nullable\",\n )\n result = str(issue)\n assert \"[ERROR]\" in result\n assert \"Field 'feature'\" in result\n assert \"Field cannot be null\" in result\n\n def test_str_with_suggestion(self):\n issue = ValidationIssue(\n message=\"Invalid type\",\n field=\"duration_ms\",\n suggestion=\"Convert to float\",\n )\n result = str(issue)\n assert \"Suggestion: Convert to float\" in result\n\n def test_warning_severity(self):\n issue = ValidationIssue(\n message=\"Unexpected field\",\n severity=ValidationSeverity.WARNING,\n )\n assert \"[WARNING]\" in str(issue)\n\n\n# =============================================================================\n# Test ValidationResult\n# =============================================================================\n\n\nclass TestValidationResult:\n def test_valid_result(self):\n result = ValidationResult(valid=True, validated_data={\"key\": \"value\"})\n assert result.valid\n assert result.errors == []\n assert result.warnings == []\n\n def test_invalid_result_with_errors(self):\n issues = [\n ValidationIssue(\"Error 1\", ValidationSeverity.ERROR),\n ValidationIssue(\"Warning 1\", ValidationSeverity.WARNING),\n ValidationIssue(\"Error 2\", ValidationSeverity.ERROR),\n ]\n result = ValidationResult(valid=False, issues=issues)\n assert not result.valid\n assert len(result.errors) == 2\n assert len(result.warnings) == 1\n\n def test_merge_results(self):\n result1 = ValidationResult(\n valid=True,\n issues=[ValidationIssue(\"Warning\", ValidationSeverity.WARNING)],\n )\n result2 = ValidationResult(\n valid=False,\n issues=[ValidationIssue(\"Error\", ValidationSeverity.ERROR)],\n validated_data={\"key\": \"value\"},\n )\n merged = result1.merge(result2)\n assert not merged.valid\n assert len(merged.issues) == 2\n assert merged.validated_data == {\"key\": \"value\"}\n\n\n# =============================================================================\n# Test FieldSchema Validation\n# =============================================================================\n\n\nclass TestFieldSchema:\n def test_required_field_missing(self):\n schema = FieldSchema(name=\"feature\", types=str, required=True)\n # Note: required check is at Schema level, not FieldSchema.validate\n issues = schema.validate(None)\n assert len(issues) == 1\n assert \"null\" in issues[0].message.lower()\n\n def test_type_check_single_type(self):\n schema = FieldSchema(name=\"count\", types=int, required=True)\n # Valid\n assert schema.validate(42) == []\n # Invalid\n issues = schema.validate(\"not_an_int\")\n assert len(issues) == 1\n assert \"type\" in issues[0].rule\n\n def test_type_check_union_types(self):\n schema = FieldSchema(name=\"duration\", types=(int, float), required=True)\n assert schema.validate(42) == []\n assert schema.validate(42.5) == []\n issues = schema.validate(\"not_a_number\")\n assert len(issues) == 1\n\n def test_allowed_values(self):\n schema = FieldSchema(\n name=\"scale\",\n types=str,\n allowed_values=[\"small\", \"medium\", \"large\"],\n )\n assert schema.validate(\"small\") == []\n issues = schema.validate(\"extra_large\")\n assert len(issues) == 1\n assert \"allowed values\" in issues[0].message\n\n def test_min_max_value(self):\n schema = FieldSchema(\n name=\"duration_ms\",\n types=(int, float),\n min_value=0,\n max_value=60000,\n )\n assert schema.validate(1000) == []\n assert schema.validate(0) == []\n assert schema.validate(60000) == []\n\n issues = schema.validate(-1)\n assert len(issues) == 1\n assert \"below minimum\" in issues[0].message\n\n issues = schema.validate(70000)\n assert len(issues) == 1\n assert \"exceeds maximum\" in issues[0].message\n\n def test_string_length(self):\n schema = FieldSchema(\n name=\"operation\",\n types=str,\n min_length=1,\n max_length=100,\n )\n assert schema.validate(\"write\") == []\n\n issues = schema.validate(\"\")\n assert len(issues) == 1\n assert \"below minimum\" in issues[0].message\n\n issues = schema.validate(\"x\" * 150)\n assert len(issues) == 1\n assert \"exceeds maximum\" in issues[0].message\n\n def test_pattern_validation(self):\n schema = FieldSchema(\n name=\"run_id\",\n types=str,\n pattern=r\"^run-\\d{3}$\",\n )\n assert schema.validate(\"run-001\") == []\n issues = schema.validate(\"invalid-id\")\n assert len(issues) == 1\n assert \"pattern\" in issues[0].rule\n\n def test_custom_validator(self):\n def is_positive(value: int) -> bool:\n return value > 0\n\n schema = FieldSchema(\n name=\"items\",\n types=int,\n custom_validator=is_positive,\n )\n assert schema.validate(10) == []\n issues = schema.validate(0)\n assert len(issues) == 1\n assert \"custom\" in issues[0].rule\n\n def test_custom_validator_exception(self):\n def bad_validator(value: Any) -> bool:\n raise ValueError(\"Validator crashed\")\n\n schema = FieldSchema(\n name=\"field\",\n types=str,\n custom_validator=bad_validator,\n )\n issues = schema.validate(\"test\")\n assert len(issues) == 1\n assert issues[0].severity == ValidationSeverity.WARNING\n assert \"exception\" in issues[0].message.lower()\n\n def test_nullable_field(self):\n schema = FieldSchema(name=\"details\", types=dict, nullable=True)\n assert schema.validate(None) == []\n assert schema.validate({\"key\": \"value\"}) == []\n\n\n# =============================================================================\n# Test Schema Validation\n# =============================================================================\n\n\nclass TestSchema:\n def test_missing_required_field(self):\n schema = Schema(\n name=\"test\",\n fields=[\n FieldSchema(name=\"required_field\", types=str, required=True),\n FieldSchema(name=\"optional_field\", types=str, required=False),\n ],\n )\n result = schema.validate({\"optional_field\": \"value\"})\n assert not result.valid\n assert any(\"required_field\" in str(i) for i in result.issues)\n\n def test_extra_fields_allowed(self):\n schema = Schema(\n name=\"test\",\n fields=[FieldSchema(name=\"known\", types=str)],\n allow_extra_fields=True,\n )\n result = schema.validate({\"known\": \"value\", \"unknown\": \"extra\"})\n assert result.valid\n\n def test_extra_fields_not_allowed(self):\n schema = Schema(\n name=\"test\",\n fields=[FieldSchema(name=\"known\", types=str)],\n allow_extra_fields=False,\n )\n result = schema.validate({\"known\": \"value\", \"unknown\": \"extra\"})\n # Extra fields generate warnings, not errors by default\n assert len(result.warnings) == 1\n assert \"unexpected\" in result.warnings[0].message.lower()\n\n def test_extra_validators(self):\n def check_consistency(data: dict) -> list[ValidationIssue]:\n if data.get(\"success\") is False and data.get(\"items\", 0) > 0:\n return [\n ValidationIssue(\n message=\"Failed operation should have 0 items\",\n severity=ValidationSeverity.WARNING,\n )\n ]\n return []\n\n schema = Schema(\n name=\"test\",\n fields=[\n FieldSchema(name=\"success\", types=bool),\n FieldSchema(name=\"items\", types=int),\n ],\n )\n schema.add_validator(check_consistency)\n\n result = schema.validate({\"success\": False, \"items\": 10})\n assert len(result.warnings) == 1\n\n def test_add_field_chaining(self):\n schema = Schema(name=\"test\")\n result = schema.add_field(FieldSchema(name=\"f1\", types=str))\n assert result is schema\n assert len(schema.fields) == 1\n\n\n# =============================================================================\n# Test Predefined Schemas\n# =============================================================================\n\n\nclass TestBenchmarkResultSchema:\n def test_valid_record(self, validator, valid_benchmark_result):\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid, f\"Errors: {result.errors}\"\n\n def test_missing_required_fields(self, validator):\n result = validator.validate_benchmark_result({})\n assert not result.valid\n error_fields = [e.field for e in result.errors]\n assert \"feature\" in error_fields\n assert \"scale\" in error_fields\n assert \"operation\" in error_fields\n assert \"duration_ms\" in error_fields\n assert \"success\" in error_fields\n\n def test_invalid_feature_value(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"feature\"] = \"unknown_feature\"\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert not result.valid\n assert any(\"allowed values\" in str(e) for e in result.errors)\n\n def test_invalid_scale_value(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"scale\"] = \"tiny\"\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert not result.valid\n\n def test_negative_duration(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"duration_ms\"] = -100\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert not result.valid\n assert any(\"below minimum\" in str(e) for e in result.errors)\n\n def test_invalid_type_for_success(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"success\"] = \"yes\" # Should be bool\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert not result.valid\n\n def test_optional_fields_missing_ok(self, validator):\n minimal = {\n \"feature\": \"remember_recall\",\n \"scale\": \"small\",\n \"operation\": \"test\",\n \"duration_ms\": 100,\n \"success\": True,\n }\n result = validator.validate_benchmark_result(minimal)\n assert result.valid\n\n\nclass TestRunResultSchema:\n def test_valid_record(self, validator, valid_run_result):\n result = validator.validate_run_result(valid_run_result)\n assert result.valid, f\"Errors: {result.errors}\"\n\n def test_invalid_timestamp(self, validator, valid_run_result):\n valid_run_result[\"start_time\"] = \"not-a-timestamp\"\n result = validator.validate_run_result(valid_run_result)\n assert not result.valid\n\n def test_missing_result_field(self, validator, valid_run_result):\n del valid_run_result[\"result\"]\n result = validator.validate_run_result(valid_run_result)\n assert not result.valid\n\n\nclass TestCoreBenchmarkSchema:\n def test_valid_record(self, validator, valid_core_benchmark):\n result = validator.validate_core_benchmark(valid_core_benchmark)\n assert result.valid, f\"Errors: {result.errors}\"\n\n def test_validates_nested_results(self, validator, valid_core_benchmark):\n # Add an invalid nested result\n valid_core_benchmark[\"results\"].append(\n {\"feature\": \"invalid\", \"scale\": \"huge\"} # Missing required fields\n )\n result = validator.validate_core_benchmark(valid_core_benchmark)\n assert not result.valid\n # Should have errors with proper path prefixes\n assert any(\"results[2]\" in str(e.field) for e in result.errors)\n\n\n# =============================================================================\n# Test DataValidator Class\n# =============================================================================\n\n\nclass TestDataValidator:\n def test_register_custom_schema(self, validator):\n custom_schema = Schema(\n name=\"custom\",\n fields=[FieldSchema(name=\"custom_field\", types=str, required=True)],\n )\n validator.register_schema(custom_schema)\n\n result = validator.validate({\"custom_field\": \"value\"}, \"custom\")\n assert result.valid\n\n def test_validate_unknown_schema(self, validator):\n result = validator.validate({}, \"nonexistent_schema\")\n assert not result.valid\n assert \"Unknown schema\" in result.issues[0].message\n\n def test_get_schema(self, validator):\n schema = validator.get_schema(\"benchmark_result\")\n assert schema is not None\n assert schema.name == \"benchmark_result\"\n\n assert validator.get_schema(\"nonexistent\") is None\n\n def test_batch_validation(self, validator, valid_benchmark_result):\n records = [\n valid_benchmark_result,\n valid_benchmark_result.copy(),\n {\"feature\": \"invalid\"}, # Invalid\n ]\n results, summary = validator.validate_batch(records, \"benchmark_result\")\n\n assert len(results) == 3\n assert summary[\"total\"] == 3\n assert summary[\"valid\"] == 2\n assert summary[\"invalid\"] == 1\n assert summary[\"validation_rate\"] == 2 / 3\n\n def test_batch_validation_empty(self, validator):\n results, summary = validator.validate_batch([], \"benchmark_result\")\n assert results == []\n assert summary[\"total\"] == 0\n assert summary[\"validation_rate\"] == 0\n\n\n# =============================================================================\n# Test Helper Functions\n# =============================================================================\n\n\nclass TestHelperFunctions:\n def test_is_iso_timestamp_valid(self):\n assert is_iso_timestamp(\"2026-03-23T14:16:36+00:00\")\n assert is_iso_timestamp(\"2026-03-23T14:16:36Z\")\n assert is_iso_timestamp(\"2026-03-23T14:16:36.123456+00:00\")\n assert is_iso_timestamp(\"2026-03-23\")\n\n def test_is_iso_timestamp_invalid(self):\n assert not is_iso_timestamp(\"not-a-date\")\n assert not is_iso_timestamp(\"23/03/2026\")\n assert not is_iso_timestamp(\"\")\n assert not is_iso_timestamp(None) # type: ignore\n\n def test_is_uuid_valid(self):\n assert is_uuid(\"550e8400-e29b-41d4-a716-446655440000\")\n assert is_uuid(\"550E8400-E29B-41D4-A716-446655440000\") # Uppercase\n\n def test_is_uuid_invalid(self):\n assert not is_uuid(\"not-a-uuid\")\n assert not is_uuid(\"550e8400-e29b-41d4-a716\") # Too short\n assert not is_uuid(\"\")\n\n def test_validate_json_structure_dict(self):\n result = validate_json_structure({\"key\": \"value\"}, dict)\n assert result.valid\n\n result = validate_json_structure([1, 2, 3], dict)\n assert not result.valid\n\n def test_validate_json_structure_list(self):\n result = validate_json_structure([1, 2, 3], list)\n assert result.valid\n\n def test_validate_non_empty(self):\n data = {\"name\": \"\", \"items\": [], \"config\": {}}\n issues = validate_non_empty(data, [\"name\", \"items\", \"config\"])\n assert len(issues) == 3\n\n data = {\"name\": \"test\", \"items\": [1], \"config\": {\"k\": \"v\"}}\n issues = validate_non_empty(data, [\"name\", \"items\", \"config\"])\n assert len(issues) == 0\n\n def test_validate_cross_field(self):\n data = {\"start_time\": 100, \"end_time\": 50}\n issues = validate_cross_field(\n data,\n \"start_time\",\n \"end_time\",\n lambda s, e: s \u003c e,\n \"end_time must be after start_time\",\n )\n assert len(issues) == 1\n\n data = {\"start_time\": 100, \"end_time\": 200}\n issues = validate_cross_field(\n data,\n \"start_time\",\n \"end_time\",\n lambda s, e: s \u003c e,\n \"end_time must be after start_time\",\n )\n assert len(issues) == 0\n\n\n# =============================================================================\n# Test Edge Cases\n# =============================================================================\n\n\nclass TestEdgeCases:\n def test_deeply_nested_path(self, validator):\n \"\"\"Test that nested validation paths are properly constructed.\"\"\"\n schema = Schema(\n name=\"outer\",\n fields=[FieldSchema(name=\"inner\", types=dict, required=True)],\n )\n validator.register_schema(schema)\n\n result = validator.validate({\"inner\": \"not_a_dict\"}, \"outer\")\n assert not result.valid\n\n def test_special_characters_in_field_names(self):\n schema = FieldSchema(name=\"field-with-dash\", types=str)\n assert schema.validate(\"value\") == []\n\n def test_unicode_values(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"operation\"] = \"测试操作\"\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid\n\n def test_very_large_numbers(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"duration_ms\"] = 1e15\n valid_benchmark_result[\"tokens_used\"] = 10**12\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid\n\n def test_float_precision(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"duration_ms\"] = 0.0000001\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid\n\n def test_none_in_details(self, validator, valid_benchmark_result):\n valid_benchmark_result[\"details\"] = None\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid # details is nullable\n\n\n# =============================================================================\n# Test Validation Helper Functions (Extended)\n# =============================================================================\n\n\nclass TestValidateNumericRange:\n def test_within_range(self):\n from .validator import validate_numeric_range\n\n issues = validate_numeric_range(50, min_val=0, max_val=100, field_name=\"count\")\n assert len(issues) == 0\n\n def test_at_boundaries(self):\n from .validator import validate_numeric_range\n\n assert len(validate_numeric_range(0, min_val=0, max_val=100)) == 0\n assert len(validate_numeric_range(100, min_val=0, max_val=100)) == 0\n\n def test_below_minimum(self):\n from .validator import validate_numeric_range\n\n issues = validate_numeric_range(-5, min_val=0, field_name=\"items\")\n assert len(issues) == 1\n assert \"below minimum\" in issues[0].message\n assert issues[0].field == \"items\"\n\n def test_above_maximum(self):\n from .validator import validate_numeric_range\n\n issues = validate_numeric_range(150, max_val=100, field_name=\"score\")\n assert len(issues) == 1\n assert \"exceeds maximum\" in issues[0].message\n\n def test_float_values(self):\n from .validator import validate_numeric_range\n\n assert len(validate_numeric_range(0.5, min_val=0.0, max_val=1.0)) == 0\n assert len(validate_numeric_range(1.5, min_val=0.0, max_val=1.0)) == 1\n\n\nclass TestValidateStringFormat:\n def test_valid_pattern(self):\n from .validator import validate_string_format\n\n issues = validate_string_format(\n \"run-001\", pattern=r\"^run-\\d{3}$\", field_name=\"run_id\", format_name=\"run ID\"\n )\n assert len(issues) == 0\n\n def test_invalid_pattern(self):\n from .validator import validate_string_format\n\n issues = validate_string_format(\n \"invalid\", pattern=r\"^run-\\d{3}$\", field_name=\"run_id\", format_name=\"run ID\"\n )\n assert len(issues) == 1\n assert \"run ID format\" in issues[0].message\n assert issues[0].suggestion is not None\n\n\nclass TestValidateListItems:\n def test_all_valid_items(self):\n from .validator import validate_list_items, ValidationIssue\n\n def int_validator(item: Any, idx: int) -> list[ValidationIssue]:\n if not isinstance(item, int):\n return [ValidationIssue(message=f\"Expected int, got {type(item).__name__}\")]\n return []\n\n issues = validate_list_items([1, 2, 3], int_validator, field_name=\"numbers\")\n assert len(issues) == 0\n\n def test_some_invalid_items(self):\n from .validator import validate_list_items, ValidationIssue\n\n def positive_validator(item: Any, idx: int) -> list[ValidationIssue]:\n if item \u003c= 0:\n return [ValidationIssue(message=\"Must be positive\", field=\"value\")]\n return []\n\n issues = validate_list_items([1, -2, 3, -4], positive_validator, field_name=\"nums\")\n assert len(issues) == 2\n assert \"nums[1]\" in issues[0].field\n assert \"nums[3]\" in issues[1].field\n\n\nclass TestValidateConsistency:\n def test_all_rules_pass(self):\n from .validator import validate_consistency\n\n data = {\"success\": True, \"items\": 10, \"duration_ms\": 100}\n rules = [\n (\"positive_duration\", lambda d: d[\"duration_ms\"] > 0, \"Duration must be positive\"),\n (\"has_items_on_success\", lambda d: not d[\"success\"] or d[\"items\"] > 0, \"Success requires items\"),\n ]\n issues = validate_consistency(data, rules)\n assert len(issues) == 0\n\n def test_rule_fails(self):\n from .validator import validate_consistency, ValidationSeverity\n\n data = {\"success\": True, \"items\": 0}\n rules = [\n (\"has_items_on_success\", lambda d: not d[\"success\"] or d[\"items\"] > 0, \"Success requires items\"),\n ]\n issues = validate_consistency(data, rules)\n assert len(issues) == 1\n assert issues[0].severity == ValidationSeverity.WARNING\n\n def test_rule_raises_exception(self):\n from .validator import validate_consistency\n\n data = {}\n rules = [\n (\"bad_rule\", lambda d: d[\"nonexistent\"][\"key\"] > 0, \"Should handle exception\"),\n ]\n issues = validate_consistency(data, rules)\n assert len(issues) == 1\n assert \"failed\" in issues[0].message.lower()\n\n\nclass TestBenchmarkConsistencyValidator:\n def test_success_items_consistency(self):\n from .validator import create_benchmark_consistency_validator\n\n validator = create_benchmark_consistency_validator()\n\n # Valid: success with items\n issues = validator({\"success\": True, \"items\": 10, \"duration_ms\": 100, \"tokens_used\": 50})\n # This should pass all consistency checks\n assert all(\"success_items_consistency\" not in (i.rule or \"\") for i in issues if i.rule)\n\n def test_negative_duration_warning(self):\n from .validator import create_benchmark_consistency_validator\n\n validator = create_benchmark_consistency_validator()\n\n # Invalid: negative duration\n issues = validator({\"success\": True, \"items\": 10, \"duration_ms\": -100})\n duration_issues = [i for i in issues if i.rule == \"duration_positive\"]\n assert len(duration_issues) == 1\n\n def test_unreasonable_tokens_warning(self):\n from .validator import create_benchmark_consistency_validator\n\n validator = create_benchmark_consistency_validator()\n\n # Invalid: too many tokens\n issues = validator({\"success\": True, \"items\": 10, \"duration_ms\": 100, \"tokens_used\": 2_000_000})\n token_issues = [i for i in issues if i.rule == \"tokens_reasonable\"]\n assert len(token_issues) == 1\n\n\n# =============================================================================\n# Integration Tests\n# =============================================================================\n\n\nclass TestValidatorIntegration:\n \"\"\"Integration tests for validator with real data structures.\"\"\"\n\n def test_validate_actual_benchmark_file_structure(self, validator):\n \"\"\"Test validation against actual benchmark file structure.\"\"\"\n # Simulates the structure in core_benchmark.json\n data = {\n \"timestamp\": \"2026-03-23T14:16:36.318289+00:00\",\n \"scales\": {\"small\": 10, \"medium\": 100, \"large\": 500},\n \"results\": [\n {\n \"feature\": \"remember_recall\",\n \"scale\": \"small\",\n \"items\": 10,\n \"operation\": \"write_all\",\n \"duration_ms\": 766.99,\n \"success\": True,\n \"tokens_used\": 0,\n \"details\": {\"avg_ms\": 76.7, \"total_writes\": 10},\n },\n {\n \"feature\": \"multi_agent\",\n \"scale\": \"medium\",\n \"items\": 100,\n \"operation\": \"private_write\",\n \"duration_ms\": 7995.99,\n \"success\": True,\n \"tokens_used\": 0,\n \"details\": {\"agents\": 100},\n },\n ],\n }\n\n result = validator.validate_core_benchmark(data)\n assert result.valid, f\"Errors: {[str(e) for e in result.errors]}\"\n\n def test_validate_with_null_details(self, validator):\n \"\"\"Test that null details field is handled correctly.\"\"\"\n data = {\n \"feature\": \"delta_sync\",\n \"scale\": \"small\",\n \"items\": 10,\n \"operation\": \"full_read\",\n \"duration_ms\": 50.78,\n \"success\": True,\n \"tokens_used\": 0,\n \"details\": None, # Explicitly null\n }\n\n result = validator.validate_benchmark_result(data)\n assert result.valid\n\n def test_validate_all_feature_types(self, validator):\n \"\"\"Test all allowed feature values.\"\"\"\n features = [\n \"remember_recall\",\n \"multi_agent\",\n \"semantic_search\",\n \"token_aware\",\n \"delta_sync\",\n \"discovery\",\n ]\n\n for feature in features:\n data = {\n \"feature\": feature,\n \"scale\": \"small\",\n \"operation\": \"test\",\n \"duration_ms\": 100,\n \"success\": True,\n }\n result = validator.validate_benchmark_result(data)\n assert result.valid, f\"Feature '{feature}' should be valid\"\n\n def test_validate_all_scale_types(self, validator):\n \"\"\"Test all allowed scale values.\"\"\"\n for scale in [\"small\", \"medium\", \"large\"]:\n data = {\n \"feature\": \"remember_recall\",\n \"scale\": scale,\n \"operation\": \"test\",\n \"duration_ms\": 100,\n \"success\": True,\n }\n result = validator.validate_benchmark_result(data)\n assert result.valid, f\"Scale '{scale}' should be valid\"\n\n\n# =============================================================================\n# Error Aggregation Tests\n# =============================================================================\n\n\nclass TestErrorAggregation:\n def test_validation_result_merge_preserves_all_issues(self):\n \"\"\"Test that merging results preserves all issues.\"\"\"\n result1 = ValidationResult(\n valid=True,\n issues=[\n ValidationIssue(\"Warning 1\", ValidationSeverity.WARNING),\n ValidationIssue(\"Info 1\", ValidationSeverity.INFO),\n ],\n )\n result2 = ValidationResult(\n valid=False,\n issues=[\n ValidationIssue(\"Error 1\", ValidationSeverity.ERROR),\n ],\n )\n\n merged = result1.merge(result2)\n assert not merged.valid\n assert len(merged.issues) == 3\n assert len(merged.errors) == 1\n assert len(merged.warnings) == 1\n\n def test_batch_validation_aggregates_errors_correctly(self, validator):\n \"\"\"Test batch validation aggregates errors from all records.\"\"\"\n records = [\n # Valid\n {\"feature\": \"remember_recall\", \"scale\": \"small\", \"operation\": \"test\", \"duration_ms\": 100, \"success\": True},\n # Invalid feature\n {\"feature\": \"invalid_feature\", \"scale\": \"small\", \"operation\": \"test\", \"duration_ms\": 100, \"success\": True},\n # Missing required field\n {\"feature\": \"remember_recall\", \"scale\": \"small\", \"operation\": \"test\", \"success\": True},\n # Invalid scale\n {\"feature\": \"remember_recall\", \"scale\": \"tiny\", \"operation\": \"test\", \"duration_ms\": 100, \"success\": True},\n ]\n\n results, summary = validator.validate_batch(records, \"benchmark_result\")\n\n assert summary[\"total\"] == 4\n assert summary[\"valid\"] == 1\n assert summary[\"invalid\"] == 3\n assert summary[\"total_issues\"] >= 3\n\n\n# =============================================================================\n# Custom Schema Tests\n# =============================================================================\n\n\nclass TestCustomSchema:\n def test_create_and_use_custom_schema(self, validator):\n \"\"\"Test creating and registering a custom schema.\"\"\"\n from .validator import Schema, FieldSchema\n\n # Create a custom schema for a new data type\n custom_schema = Schema(\n name=\"agent_config\",\n fields=[\n FieldSchema(name=\"agent_id\", types=str, required=True, pattern=r\"^agent-\\d+$\"),\n FieldSchema(name=\"model\", types=str, required=True),\n FieldSchema(name=\"temperature\", types=float, required=False, min_value=0.0, max_value=2.0),\n FieldSchema(name=\"max_tokens\", types=int, required=False, min_value=1, max_value=100000),\n ],\n )\n\n validator.register_schema(custom_schema)\n\n # Valid data\n valid_data = {\n \"agent_id\": \"agent-001\",\n \"model\": \"claude-3-opus\",\n \"temperature\": 0.7,\n \"max_tokens\": 4096,\n }\n result = validator.validate(valid_data, \"agent_config\")\n assert result.valid\n\n # Invalid agent_id pattern\n invalid_data = {\n \"agent_id\": \"bad-id\",\n \"model\": \"claude-3-opus\",\n }\n result = validator.validate(invalid_data, \"agent_config\")\n assert not result.valid\n\n def test_schema_with_extra_validator(self, validator):\n \"\"\"Test schema with custom cross-field validation.\"\"\"\n from .validator import Schema, FieldSchema, ValidationIssue, ValidationSeverity\n\n def validate_time_range(data: dict) -> list[ValidationIssue]:\n \"\"\"Ensure end_time > start_time.\"\"\"\n if \"start_ms\" in data and \"end_ms\" in data:\n if data[\"end_ms\"] \u003c= data[\"start_ms\"]:\n return [ValidationIssue(\n message=\"end_ms must be greater than start_ms\",\n severity=ValidationSeverity.ERROR,\n rule=\"time_range\",\n )]\n return []\n\n schema = Schema(\n name=\"time_span\",\n fields=[\n FieldSchema(name=\"start_ms\", types=(int, float), required=True),\n FieldSchema(name=\"end_ms\", types=(int, float), required=True),\n ],\n )\n schema.add_validator(validate_time_range)\n\n validator.register_schema(schema)\n\n # Valid time range\n result = validator.validate({\"start_ms\": 100, \"end_ms\": 200}, \"time_span\")\n assert result.valid\n\n # Invalid: end before start\n result = validator.validate({\"start_ms\": 200, \"end_ms\": 100}, \"time_span\")\n assert not result.valid\n assert any(\"time_range\" in (i.rule or \"\") for i in result.issues)\n\n\n# =============================================================================\n# Performance / Stress Tests\n# =============================================================================\n\n\nclass TestValidationPerformance:\n def test_batch_validation_large_dataset(self, validator, valid_benchmark_result):\n \"\"\"Test batch validation handles large datasets.\"\"\"\n # Create 1000 records\n records = [valid_benchmark_result.copy() for _ in range(1000)]\n\n results, summary = validator.validate_batch(records, \"benchmark_result\")\n\n assert summary[\"total\"] == 1000\n assert summary[\"valid\"] == 1000\n assert summary[\"validation_rate\"] == 1.0\n\n def test_deeply_nested_details(self, validator, valid_benchmark_result):\n \"\"\"Test validation handles deeply nested details structures.\"\"\"\n valid_benchmark_result[\"details\"] = {\n \"level1\": {\n \"level2\": {\n \"level3\": {\n \"data\": [1, 2, 3],\n \"nested_list\": [{\"a\": 1}, {\"b\": 2}],\n }\n }\n }\n }\n\n result = validator.validate_benchmark_result(valid_benchmark_result)\n assert result.valid\n\n\n# =============================================================================\n# Run Tests\n# =============================================================================\n\n\nif __name__ == \"__main__\":\n pytest.main([__file__, \"-v\"])\n","content_type":"text/x-python; charset=utf-8","language":"python","size":36488,"content_sha256":"2d2b3faddc3ecf7474fa030edc3c6ff53ac95a0fe882325c01b730e4dfc206c3"},{"filename":"benchmarks/pipeline/transform.py","content":"\"\"\"\nData Transformation Module for Benchmark Pipeline\n\nHandles:\n- Schema validation\n- Field normalization (snake_case, date formats, units)\n- Derived value calculations (metrics, aggregations)\n- Graceful error handling with detailed error reports\n\"\"\"\n\nimport json\nimport re\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import Any, Callable, TypeVar\n\nT = TypeVar(\"T\")\n\n\nclass TransformError(Exception):\n \"\"\"Base exception for transformation errors.\"\"\"\n\n def __init__(self, message: str, field: str | None = None, value: Any = None):\n self.field = field\n self.value = value\n super().__init__(message)\n\n\nclass ValidationError(TransformError):\n \"\"\"Schema validation failed.\"\"\"\n\n pass\n\n\nclass NormalizationError(TransformError):\n \"\"\"Field normalization failed.\"\"\"\n\n pass\n\n\nclass CalculationError(TransformError):\n \"\"\"Derived value calculation failed.\"\"\"\n\n pass\n\n\nclass ErrorPolicy(Enum):\n \"\"\"How to handle transformation errors.\"\"\"\n\n FAIL_FAST = \"fail_fast\" # Stop on first error\n COLLECT = \"collect\" # Collect all errors, continue processing\n SKIP = \"skip\" # Skip invalid records, log warning\n\n\n@dataclass\nclass TransformResult:\n \"\"\"Result of a transformation operation.\"\"\"\n\n success: bool\n data: dict[str, Any] | None = None\n errors: list[TransformError] = field(default_factory=list)\n warnings: list[str] = field(default_factory=list)\n metrics: dict[str, Any] = field(default_factory=dict)\n\n\n@dataclass\nclass BatchTransformResult:\n \"\"\"Result of transforming multiple records.\"\"\"\n\n total: int = 0\n successful: int = 0\n failed: int = 0\n skipped: int = 0\n records: list[dict[str, Any]] = field(default_factory=list)\n errors: list[tuple[int, TransformError]] = field(default_factory=list)\n aggregate_metrics: dict[str, Any] = field(default_factory=dict)\n\n\n# =============================================================================\n# Schema Definitions\n# =============================================================================\n\nBENCHMARK_RESULT_SCHEMA = {\n \"required\": [\"feature\", \"scale\", \"operation\", \"duration_ms\", \"success\"],\n \"types\": {\n \"feature\": str,\n \"scale\": str,\n \"items\": int,\n \"operation\": str,\n \"duration_ms\": (int, float),\n \"success\": bool,\n \"tokens_used\": int,\n \"details\": (dict, type(None)),\n },\n \"allowed_values\": {\n \"scale\": [\"small\", \"medium\", \"large\"],\n \"feature\": [\n \"remember_recall\",\n \"multi_agent\",\n \"semantic_search\",\n \"token_aware\",\n \"delta_sync\",\n \"discovery\",\n ],\n },\n}\n\nRUN_RESULT_SCHEMA = {\n \"required\": [\"run_id\", \"scenario_id\", \"start_time\", \"end_time\", \"result\"],\n \"types\": {\n \"run_id\": str,\n \"scenario_id\": str,\n \"category\": str,\n \"start_time\": str,\n \"end_time\": str,\n \"config\": dict,\n \"events\": list,\n \"result\": dict,\n },\n}\n\nCORE_BENCHMARK_SCHEMA = {\n \"required\": [\"timestamp\", \"results\"],\n \"types\": {\n \"timestamp\": str,\n \"scales\": dict,\n \"results\": list,\n },\n}\n\n\n# =============================================================================\n# Schema Validation\n# =============================================================================\n\n\ndef validate_schema(\n data: dict[str, Any], schema: dict[str, Any], path: str = \"\"\n) -> list[ValidationError]:\n \"\"\"\n Validate data against a schema definition.\n\n Args:\n data: The data to validate\n schema: Schema with 'required', 'types', and 'allowed_values' keys\n path: Current path for nested validation (for error messages)\n\n Returns:\n List of validation errors (empty if valid)\n \"\"\"\n errors = []\n\n # Check required fields\n for field_name in schema.get(\"required\", []):\n if field_name not in data:\n errors.append(\n ValidationError(\n f\"Missing required field: {path}{field_name}\",\n field=f\"{path}{field_name}\",\n )\n )\n\n # Check types\n for field_name, expected_type in schema.get(\"types\", {}).items():\n if field_name in data:\n value = data[field_name]\n if not isinstance(value, expected_type):\n errors.append(\n ValidationError(\n f\"Invalid type for {path}{field_name}: \"\n f\"expected {expected_type}, got {type(value).__name__}\",\n field=f\"{path}{field_name}\",\n value=value,\n )\n )\n\n # Check allowed values\n for field_name, allowed in schema.get(\"allowed_values\", {}).items():\n if field_name in data and data[field_name] not in allowed:\n errors.append(\n ValidationError(\n f\"Invalid value for {path}{field_name}: \"\n f\"'{data[field_name]}' not in {allowed}\",\n field=f\"{path}{field_name}\",\n value=data[field_name],\n )\n )\n\n return errors\n\n\n# =============================================================================\n# Field Normalization\n# =============================================================================\n\n\ndef to_snake_case(name: str) -> str:\n \"\"\"Convert camelCase or PascalCase to snake_case.\"\"\"\n s1 = re.sub(\"(.)([A-Z][a-z]+)\", r\"\\1_\\2\", name)\n return re.sub(\"([a-z0-9])([A-Z])\", r\"\\1_\\2\", s1).lower()\n\n\ndef normalize_keys(data: dict[str, Any]) -> dict[str, Any]:\n \"\"\"Recursively normalize all dictionary keys to snake_case.\"\"\"\n result = {}\n for key, value in data.items():\n new_key = to_snake_case(key)\n if isinstance(value, dict):\n result[new_key] = normalize_keys(value)\n elif isinstance(value, list):\n result[new_key] = [\n normalize_keys(item) if isinstance(item, dict) else item\n for item in value\n ]\n else:\n result[new_key] = value\n return result\n\n\ndef parse_iso_timestamp(ts: str) -> datetime | None:\n \"\"\"Parse ISO 8601 timestamp to datetime.\"\"\"\n if not ts:\n return None\n try:\n # Handle various ISO formats\n ts = ts.replace(\"Z\", \"+00:00\")\n return datetime.fromisoformat(ts)\n except ValueError:\n return None\n\n\ndef normalize_timestamp(data: dict[str, Any], fields: list[str]) -> dict[str, Any]:\n \"\"\"\n Normalize timestamp fields to ISO 8601 format.\n\n Adds '_parsed' suffix fields with datetime objects.\n \"\"\"\n result = data.copy()\n for field_name in fields:\n if field_name in result and result[field_name]:\n parsed = parse_iso_timestamp(str(result[field_name]))\n if parsed:\n result[f\"{field_name}_parsed\"] = parsed\n result[field_name] = parsed.isoformat()\n return result\n\n\ndef normalize_duration(\n value: int | float, from_unit: str = \"ms\", to_unit: str = \"s\"\n) -> float:\n \"\"\"Convert duration between units.\"\"\"\n conversions = {\n (\"ms\", \"s\"): lambda x: x / 1000,\n (\"s\", \"ms\"): lambda x: x * 1000,\n (\"ms\", \"ms\"): lambda x: x,\n (\"s\", \"s\"): lambda x: x,\n }\n key = (from_unit, to_unit)\n if key not in conversions:\n raise NormalizationError(f\"Unknown unit conversion: {from_unit} -> {to_unit}\")\n return conversions[key](value)\n\n\ndef normalize_benchmark_result(record: dict[str, Any]) -> dict[str, Any]:\n \"\"\"\n Normalize a single benchmark result record.\n\n - Converts keys to snake_case\n - Normalizes duration to seconds\n - Adds normalized scale factor\n \"\"\"\n result = normalize_keys(record)\n\n # Add duration in seconds\n if \"duration_ms\" in result:\n result[\"duration_s\"] = normalize_duration(result[\"duration_ms\"], \"ms\", \"s\")\n\n # Add scale factor\n scale_factors = {\"small\": 1, \"medium\": 10, \"large\": 50}\n if \"scale\" in result:\n result[\"scale_factor\"] = scale_factors.get(result[\"scale\"], 1)\n\n return result\n\n\ndef normalize_run_result(record: dict[str, Any]) -> dict[str, Any]:\n \"\"\"Normalize a run result record.\"\"\"\n result = normalize_keys(record)\n result = normalize_timestamp(result, [\"start_time\", \"end_time\"])\n\n # Calculate duration if both timestamps present\n if \"start_time_parsed\" in result and \"end_time_parsed\" in result:\n delta = result[\"end_time_parsed\"] - result[\"start_time_parsed\"]\n result[\"total_duration_s\"] = delta.total_seconds()\n\n return result\n\n\n# =============================================================================\n# Derived Value Calculations\n# =============================================================================\n\n\ndef calculate_throughput(duration_ms: float, items: int) -> float:\n \"\"\"Calculate items per second.\"\"\"\n if duration_ms \u003c= 0:\n return 0.0\n return items / (duration_ms / 1000)\n\n\ndef calculate_latency_stats(durations: list[float]) -> dict[str, float]:\n \"\"\"Calculate latency statistics from a list of durations.\"\"\"\n if not durations:\n return {\"min\": 0, \"max\": 0, \"avg\": 0, \"p50\": 0, \"p95\": 0, \"p99\": 0}\n\n sorted_d = sorted(durations)\n n = len(sorted_d)\n\n def percentile(p: float) -> float:\n k = (n - 1) * p\n f = int(k)\n c = f + 1 if f + 1 \u003c n else f\n return sorted_d[f] + (k - f) * (sorted_d[c] - sorted_d[f])\n\n return {\n \"min\": sorted_d[0],\n \"max\": sorted_d[-1],\n \"avg\": sum(sorted_d) / n,\n \"p50\": percentile(0.50),\n \"p95\": percentile(0.95),\n \"p99\": percentile(0.99),\n }\n\n\ndef calculate_success_rate(results: list[dict[str, Any]]) -> float:\n \"\"\"Calculate success rate from a list of results.\"\"\"\n if not results:\n return 0.0\n successes = sum(1 for r in results if r.get(\"success\", False))\n return successes / len(results)\n\n\ndef calculate_derived_metrics(record: dict[str, Any]) -> dict[str, Any]:\n \"\"\"\n Calculate derived metrics for a benchmark result.\n\n Adds:\n - throughput_per_s: items processed per second\n - efficiency_score: composite efficiency metric\n - cost_estimate: estimated token cost\n \"\"\"\n result = record.copy()\n\n # Throughput\n duration_ms = result.get(\"duration_ms\", 0)\n items = result.get(\"items\", 1)\n result[\"throughput_per_s\"] = calculate_throughput(duration_ms, items)\n\n # Efficiency score (higher is better)\n tokens = result.get(\"tokens_used\", 0)\n if duration_ms > 0:\n # Penalize high token usage and slow operations\n time_factor = 1000 / max(duration_ms, 1)\n token_factor = 1 / max(tokens + 1, 1)\n result[\"efficiency_score\"] = round(time_factor * token_factor * 100, 2)\n else:\n result[\"efficiency_score\"] = 0\n\n # Cost estimate (rough estimate: $0.00001 per token)\n result[\"cost_estimate_usd\"] = tokens * 0.00001\n\n return result\n\n\ndef aggregate_by_feature(results: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:\n \"\"\"\n Aggregate results by feature.\n\n Returns summary statistics per feature.\n \"\"\"\n by_feature: dict[str, list[dict[str, Any]]] = {}\n for r in results:\n feature = r.get(\"feature\", \"unknown\")\n by_feature.setdefault(feature, []).append(r)\n\n aggregated = {}\n for feature, records in by_feature.items():\n durations = [r[\"duration_ms\"] for r in records if \"duration_ms\" in r]\n tokens = [r.get(\"tokens_used\", 0) for r in records]\n\n aggregated[feature] = {\n \"count\": len(records),\n \"success_rate\": calculate_success_rate(records),\n \"latency_stats_ms\": calculate_latency_stats(durations),\n \"total_tokens\": sum(tokens),\n \"avg_tokens\": sum(tokens) / len(tokens) if tokens else 0,\n }\n\n return aggregated\n\n\ndef aggregate_by_scale(results: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:\n \"\"\"Aggregate results by scale.\"\"\"\n by_scale: dict[str, list[dict[str, Any]]] = {}\n for r in results:\n scale = r.get(\"scale\", \"unknown\")\n by_scale.setdefault(scale, []).append(r)\n\n aggregated = {}\n for scale, records in by_scale.items():\n durations = [r[\"duration_ms\"] for r in records if \"duration_ms\" in r]\n items = [r.get(\"items\", 0) for r in records]\n\n aggregated[scale] = {\n \"count\": len(records),\n \"success_rate\": calculate_success_rate(records),\n \"latency_stats_ms\": calculate_latency_stats(durations),\n \"total_items\": sum(items),\n \"throughput_per_s\": calculate_throughput(sum(durations), sum(items))\n if durations\n else 0,\n }\n\n return aggregated\n\n\n# =============================================================================\n# Main Transformation Pipeline\n# =============================================================================\n\n\nclass BenchmarkTransformer:\n \"\"\"\n Main transformer class for benchmark data.\n\n Usage:\n transformer = BenchmarkTransformer(error_policy=ErrorPolicy.COLLECT)\n result = transformer.transform_core_benchmark(data)\n if result.success:\n print(result.data)\n else:\n for error in result.errors:\n print(f\"Error: {error}\")\n \"\"\"\n\n def __init__(self, error_policy: ErrorPolicy = ErrorPolicy.COLLECT):\n self.error_policy = error_policy\n self._custom_validators: list[Callable[[dict], list[ValidationError]]] = []\n self._custom_transformers: list[Callable[[dict], dict]] = []\n\n def add_validator(\n self, validator: Callable[[dict[str, Any]], list[ValidationError]]\n ) -> \"BenchmarkTransformer\":\n \"\"\"Add a custom validation function.\"\"\"\n self._custom_validators.append(validator)\n return self\n\n def add_transformer(\n self, transformer: Callable[[dict[str, Any]], dict[str, Any]]\n ) -> \"BenchmarkTransformer\":\n \"\"\"Add a custom transformation function.\"\"\"\n self._custom_transformers.append(transformer)\n return self\n\n def _handle_error(\n self, error: TransformError, errors: list[TransformError]\n ) -> bool:\n \"\"\"\n Handle an error according to policy.\n\n Returns True if processing should continue, False if it should stop.\n \"\"\"\n if self.error_policy == ErrorPolicy.FAIL_FAST:\n raise error\n errors.append(error)\n return self.error_policy != ErrorPolicy.SKIP\n\n def transform_benchmark_result(\n self, record: dict[str, Any]\n ) -> TransformResult:\n \"\"\"Transform a single benchmark result record.\"\"\"\n errors: list[TransformError] = []\n warnings: list[str] = []\n\n # Validate schema\n schema_errors = validate_schema(record, BENCHMARK_RESULT_SCHEMA)\n for err in schema_errors:\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n\n # Custom validators\n for validator in self._custom_validators:\n try:\n custom_errors = validator(record)\n for err in custom_errors:\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n except Exception as e:\n warnings.append(f\"Custom validator failed: {e}\")\n\n # Normalize\n try:\n data = normalize_benchmark_result(record)\n except Exception as e:\n err = NormalizationError(f\"Normalization failed: {e}\")\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n data = record\n\n # Calculate derived metrics\n try:\n data = calculate_derived_metrics(data)\n except Exception as e:\n err = CalculationError(f\"Metric calculation failed: {e}\")\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n\n # Custom transformers\n for transformer in self._custom_transformers:\n try:\n data = transformer(data)\n except Exception as e:\n warnings.append(f\"Custom transformer failed: {e}\")\n\n return TransformResult(\n success=len(errors) == 0,\n data=data,\n errors=errors,\n warnings=warnings,\n metrics={\n \"duration_ms\": data.get(\"duration_ms\"),\n \"tokens_used\": data.get(\"tokens_used\", 0),\n },\n )\n\n def transform_core_benchmark(self, data: dict[str, Any]) -> TransformResult:\n \"\"\"Transform a complete core benchmark file.\"\"\"\n errors: list[TransformError] = []\n warnings: list[str] = []\n\n # Validate top-level schema\n schema_errors = validate_schema(data, CORE_BENCHMARK_SCHEMA)\n for err in schema_errors:\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n\n # Transform metadata\n result = normalize_keys(data)\n result = normalize_timestamp(result, [\"timestamp\"])\n\n # Transform each result record\n transformed_results = []\n for i, record in enumerate(data.get(\"results\", [])):\n record_result = self.transform_benchmark_result(record)\n if record_result.data:\n transformed_results.append(record_result.data)\n errors.extend(record_result.errors)\n warnings.extend(record_result.warnings)\n\n result[\"results\"] = transformed_results\n\n # Add aggregate metrics\n result[\"aggregates\"] = {\n \"by_feature\": aggregate_by_feature(transformed_results),\n \"by_scale\": aggregate_by_scale(transformed_results),\n \"overall\": {\n \"total_records\": len(transformed_results),\n \"success_rate\": calculate_success_rate(transformed_results),\n \"total_tokens\": sum(\n r.get(\"tokens_used\", 0) for r in transformed_results\n ),\n },\n }\n\n return TransformResult(\n success=len(errors) == 0,\n data=result,\n errors=errors,\n warnings=warnings,\n metrics=result[\"aggregates\"][\"overall\"],\n )\n\n def transform_run_result(self, data: dict[str, Any]) -> TransformResult:\n \"\"\"Transform a run result record.\"\"\"\n errors: list[TransformError] = []\n\n # Validate schema\n schema_errors = validate_schema(data, RUN_RESULT_SCHEMA)\n for err in schema_errors:\n if not self._handle_error(err, errors):\n return TransformResult(success=False, errors=errors)\n\n # Normalize\n result = normalize_run_result(data)\n\n # Transform events\n if \"events\" in result:\n result[\"events\"] = [normalize_keys(e) for e in result[\"events\"]]\n result[\"event_summary\"] = {\n \"count\": len(result[\"events\"]),\n \"agents\": list({e.get(\"agent\") for e in result[\"events\"]}),\n \"actions\": list({e.get(\"action\") for e in result[\"events\"]}),\n }\n\n return TransformResult(\n success=len(errors) == 0,\n data=result,\n errors=errors,\n )\n\n def transform_batch(\n self, records: list[dict[str, Any]], record_type: str = \"benchmark\"\n ) -> BatchTransformResult:\n \"\"\"\n Transform a batch of records.\n\n Args:\n records: List of records to transform\n record_type: Type of record ('benchmark' or 'run')\n \"\"\"\n batch_result = BatchTransformResult(total=len(records))\n\n transform_fn = (\n self.transform_benchmark_result\n if record_type == \"benchmark\"\n else self.transform_run_result\n )\n\n for i, record in enumerate(records):\n try:\n result = transform_fn(record)\n if result.success and result.data:\n batch_result.records.append(result.data)\n batch_result.successful += 1\n elif self.error_policy == ErrorPolicy.SKIP:\n batch_result.skipped += 1\n else:\n batch_result.failed += 1\n for err in result.errors:\n batch_result.errors.append((i, err))\n except TransformError as e:\n batch_result.failed += 1\n batch_result.errors.append((i, e))\n\n # Calculate aggregate metrics\n if batch_result.records:\n batch_result.aggregate_metrics = {\n \"success_rate\": batch_result.successful / batch_result.total,\n \"by_feature\": aggregate_by_feature(batch_result.records),\n \"by_scale\": aggregate_by_scale(batch_result.records),\n }\n\n return batch_result\n\n\n# =============================================================================\n# CSV Output Helpers\n# =============================================================================\n\n\ndef flatten_record(record: dict[str, Any], prefix: str = \"\") -> dict[str, Any]:\n \"\"\"Flatten nested dictionary for CSV output.\"\"\"\n flat = {}\n for key, value in record.items():\n full_key = f\"{prefix}{key}\" if prefix else key\n if isinstance(value, dict):\n flat.update(flatten_record(value, f\"{full_key}_\"))\n elif isinstance(value, list):\n flat[full_key] = json.dumps(value)\n elif isinstance(value, datetime):\n flat[full_key] = value.isoformat()\n else:\n flat[full_key] = value\n return flat\n\n\ndef records_to_csv_rows(records: list[dict[str, Any]]) -> tuple[list[str], list[list]]:\n \"\"\"\n Convert records to CSV-ready format.\n\n Returns:\n Tuple of (headers, rows)\n \"\"\"\n if not records:\n return [], []\n\n # Flatten all records\n flat_records = [flatten_record(r) for r in records]\n\n # Collect all unique keys\n all_keys = set()\n for r in flat_records:\n all_keys.update(r.keys())\n\n # Sort keys for consistent column order\n headers = sorted(all_keys)\n\n # Build rows\n rows = []\n for r in flat_records:\n row = [r.get(h, \"\") for h in headers]\n rows.append(row)\n\n return headers, rows\n\n\n# =============================================================================\n# Convenience Functions\n# =============================================================================\n\n\ndef transform_file(\n input_path: str | Path,\n error_policy: ErrorPolicy = ErrorPolicy.COLLECT,\n) -> TransformResult:\n \"\"\"\n Transform a JSON benchmark file.\n\n Auto-detects file type based on content structure.\n \"\"\"\n path = Path(input_path)\n if not path.exists():\n return TransformResult(\n success=False,\n errors=[ValidationError(f\"File not found: {path}\")],\n )\n\n try:\n with open(path) as f:\n data = json.load(f)\n except json.JSONDecodeError as e:\n return TransformResult(\n success=False,\n errors=[ValidationError(f\"Invalid JSON: {e}\")],\n )\n\n transformer = BenchmarkTransformer(error_policy=error_policy)\n\n # Auto-detect type\n if \"results\" in data and isinstance(data.get(\"results\"), list):\n # Core benchmark format\n return transformer.transform_core_benchmark(data)\n elif \"run_id\" in data:\n # Run result format\n return transformer.transform_run_result(data)\n else:\n # Try as single benchmark result\n return transformer.transform_benchmark_result(data)\n\n\ndef transform_and_export(\n input_path: str | Path,\n output_csv_path: str | Path | None = None,\n error_policy: ErrorPolicy = ErrorPolicy.COLLECT,\n) -> TransformResult:\n \"\"\"\n Transform a JSON file and optionally export to CSV.\n\n Args:\n input_path: Path to input JSON file\n output_csv_path: Optional path for CSV output\n error_policy: How to handle errors\n\n Returns:\n TransformResult with transformed data\n \"\"\"\n import csv\n\n result = transform_file(input_path, error_policy)\n\n if result.success and result.data and output_csv_path:\n # Get records to export\n records = result.data.get(\"results\", [result.data])\n\n headers, rows = records_to_csv_rows(records)\n\n output_path = Path(output_csv_path)\n output_path.parent.mkdir(parents=True, exist_ok=True)\n\n with open(output_path, \"w\", newline=\"\") as f:\n writer = csv.writer(f)\n writer.writerow(headers)\n writer.writerows(rows)\n\n return result\n","content_type":"text/x-python; charset=utf-8","language":"python","size":24895,"content_sha256":"4bf43dcc0dc4af34d847e58a3fc6624441458db59ee4cebc50947a53b705972f"},{"filename":"benchmarks/pipeline/validator.py","content":"\"\"\"\nData Validation Module for Benchmark Pipeline\n\nProvides comprehensive validation for benchmark data including:\n- Schema validation with detailed error reporting\n- Type checking with support for union types\n- Range and constraint validation\n- Custom validator registration\n- Batch validation with configurable error policies\n\"\"\"\n\nimport re\nfrom dataclasses import dataclass, field\nfrom datetime import datetime\nfrom enum import Enum\nfrom typing import Any, Callable, TypeVar\n\nT = TypeVar(\"T\")\n\n\n# =============================================================================\n# Validation Errors\n# =============================================================================\n\n\nclass ValidationSeverity(Enum):\n \"\"\"Severity level for validation issues.\"\"\"\n ERROR = \"error\" # Must be fixed, blocks processing\n WARNING = \"warning\" # Should be fixed, allows processing\n INFO = \"info\" # Informational, no action needed\n\n\n@dataclass\nclass ValidationIssue:\n \"\"\"A single validation issue.\"\"\"\n message: str\n severity: ValidationSeverity = ValidationSeverity.ERROR\n field: str | None = None\n value: Any = None\n rule: str | None = None\n suggestion: str | None = None\n\n def __str__(self) -> str:\n parts = [f\"[{self.severity.value.upper()}]\"]\n if self.field:\n parts.append(f\"Field '{self.field}':\")\n parts.append(self.message)\n if self.suggestion:\n parts.append(f\"(Suggestion: {self.suggestion})\")\n return \" \".join(parts)\n\n\n@dataclass\nclass ValidationResult:\n \"\"\"Result of validation operations.\"\"\"\n valid: bool\n issues: list[ValidationIssue] = field(default_factory=list)\n validated_data: dict[str, Any] | None = None\n\n @property\n def errors(self) -> list[ValidationIssue]:\n return [i for i in self.issues if i.severity == ValidationSeverity.ERROR]\n\n @property\n def warnings(self) -> list[ValidationIssue]:\n return [i for i in self.issues if i.severity == ValidationSeverity.WARNING]\n\n def merge(self, other: \"ValidationResult\") -> \"ValidationResult\":\n \"\"\"Merge another result into this one.\"\"\"\n return ValidationResult(\n valid=self.valid and other.valid,\n issues=self.issues + other.issues,\n validated_data=other.validated_data if other.validated_data else self.validated_data,\n )\n\n\n# =============================================================================\n# Schema Definitions\n# =============================================================================\n\n\n@dataclass\nclass FieldSchema:\n \"\"\"Schema definition for a single field.\"\"\"\n name: str\n types: tuple[type, ...] | type\n required: bool = False\n nullable: bool = False\n allowed_values: list[Any] | None = None\n min_value: float | None = None\n max_value: float | None = None\n min_length: int | None = None\n max_length: int | None = None\n pattern: str | None = None\n custom_validator: Callable[[Any], bool] | None = None\n\n def validate(self, value: Any, path: str = \"\") -> list[ValidationIssue]:\n \"\"\"Validate a value against this field schema.\"\"\"\n issues = []\n full_path = f\"{path}.{self.name}\" if path else self.name\n\n # Handle None values\n if value is None:\n if not self.nullable and self.required:\n issues.append(ValidationIssue(\n message=f\"Field cannot be null\",\n field=full_path,\n rule=\"nullable\",\n ))\n return issues\n\n # Type check\n expected_types = self.types if isinstance(self.types, tuple) else (self.types,)\n if not isinstance(value, expected_types):\n type_names = \", \".join(t.__name__ for t in expected_types)\n issues.append(ValidationIssue(\n message=f\"Expected type {type_names}, got {type(value).__name__}\",\n field=full_path,\n value=value,\n rule=\"type\",\n suggestion=f\"Convert to {expected_types[0].__name__}\",\n ))\n return issues # Skip further checks if type is wrong\n\n # Allowed values\n if self.allowed_values is not None and value not in self.allowed_values:\n issues.append(ValidationIssue(\n message=f\"Value '{value}' not in allowed values: {self.allowed_values}\",\n field=full_path,\n value=value,\n rule=\"allowed_values\",\n ))\n\n # Numeric range\n if isinstance(value, (int, float)):\n if self.min_value is not None and value \u003c self.min_value:\n issues.append(ValidationIssue(\n message=f\"Value {value} is below minimum {self.min_value}\",\n field=full_path,\n value=value,\n rule=\"min_value\",\n ))\n if self.max_value is not None and value > self.max_value:\n issues.append(ValidationIssue(\n message=f\"Value {value} exceeds maximum {self.max_value}\",\n field=full_path,\n value=value,\n rule=\"max_value\",\n ))\n\n # String length\n if isinstance(value, str):\n if self.min_length is not None and len(value) \u003c self.min_length:\n issues.append(ValidationIssue(\n message=f\"String length {len(value)} is below minimum {self.min_length}\",\n field=full_path,\n value=value,\n rule=\"min_length\",\n ))\n if self.max_length is not None and len(value) > self.max_length:\n issues.append(ValidationIssue(\n message=f\"String length {len(value)} exceeds maximum {self.max_length}\",\n field=full_path,\n value=value,\n rule=\"max_length\",\n ))\n if self.pattern is not None and not re.match(self.pattern, value):\n issues.append(ValidationIssue(\n message=f\"String does not match pattern '{self.pattern}'\",\n field=full_path,\n value=value,\n rule=\"pattern\",\n ))\n\n # Custom validator\n if self.custom_validator is not None:\n try:\n if not self.custom_validator(value):\n issues.append(ValidationIssue(\n message=\"Custom validation failed\",\n field=full_path,\n value=value,\n rule=\"custom\",\n ))\n except Exception as e:\n issues.append(ValidationIssue(\n message=f\"Custom validator raised exception: {e}\",\n field=full_path,\n value=value,\n rule=\"custom\",\n severity=ValidationSeverity.WARNING,\n ))\n\n return issues\n\n\n@dataclass\nclass Schema:\n \"\"\"Complete schema for validating records.\"\"\"\n name: str\n fields: list[FieldSchema] = field(default_factory=list)\n allow_extra_fields: bool = True\n extra_validators: list[Callable[[dict[str, Any]], list[ValidationIssue]]] = field(\n default_factory=list\n )\n\n def add_field(self, field_schema: FieldSchema) -> \"Schema\":\n \"\"\"Add a field to the schema.\"\"\"\n self.fields.append(field_schema)\n return self\n\n def add_validator(\n self, validator: Callable[[dict[str, Any]], list[ValidationIssue]]\n ) -> \"Schema\":\n \"\"\"Add a custom validator.\"\"\"\n self.extra_validators.append(validator)\n return self\n\n def validate(self, data: dict[str, Any], path: str = \"\") -> ValidationResult:\n \"\"\"Validate data against this schema.\"\"\"\n issues = []\n field_names = {f.name for f in self.fields}\n\n # Check required fields\n for field_schema in self.fields:\n if field_schema.required and field_schema.name not in data:\n issues.append(ValidationIssue(\n message=f\"Missing required field\",\n field=f\"{path}.{field_schema.name}\" if path else field_schema.name,\n rule=\"required\",\n ))\n\n # Validate each field\n for field_schema in self.fields:\n if field_schema.name in data:\n issues.extend(field_schema.validate(data[field_schema.name], path))\n\n # Check for extra fields\n if not self.allow_extra_fields:\n extra = set(data.keys()) - field_names\n for extra_field in extra:\n issues.append(ValidationIssue(\n message=f\"Unexpected field '{extra_field}'\",\n field=f\"{path}.{extra_field}\" if path else extra_field,\n rule=\"extra_field\",\n severity=ValidationSeverity.WARNING,\n ))\n\n # Run extra validators\n for validator in self.extra_validators:\n try:\n issues.extend(validator(data))\n except Exception as e:\n issues.append(ValidationIssue(\n message=f\"Extra validator raised exception: {e}\",\n rule=\"extra_validator\",\n severity=ValidationSeverity.WARNING,\n ))\n\n has_errors = any(i.severity == ValidationSeverity.ERROR for i in issues)\n return ValidationResult(\n valid=not has_errors,\n issues=issues,\n validated_data=data if not has_errors else None,\n )\n\n\n# =============================================================================\n# Pre-defined Schemas\n# =============================================================================\n\n\ndef is_iso_timestamp(value: str) -> bool:\n \"\"\"Check if string is a valid ISO 8601 timestamp.\"\"\"\n try:\n value = value.replace(\"Z\", \"+00:00\")\n datetime.fromisoformat(value)\n return True\n except (ValueError, AttributeError):\n return False\n\n\ndef is_uuid(value: str) -> bool:\n \"\"\"Check if string is a valid UUID format.\"\"\"\n uuid_pattern = r\"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$\"\n return bool(re.match(uuid_pattern, value.lower()))\n\n\nBENCHMARK_RESULT_SCHEMA = Schema(\n name=\"benchmark_result\",\n fields=[\n FieldSchema(\n name=\"feature\",\n types=str,\n required=True,\n allowed_values=[\n \"remember_recall\",\n \"multi_agent\",\n \"semantic_search\",\n \"token_aware\",\n \"delta_sync\",\n \"discovery\",\n ],\n ),\n FieldSchema(\n name=\"scale\",\n types=str,\n required=True,\n allowed_values=[\"small\", \"medium\", \"large\"],\n ),\n FieldSchema(\n name=\"operation\",\n types=str,\n required=True,\n min_length=1,\n max_length=100,\n ),\n FieldSchema(\n name=\"duration_ms\",\n types=(int, float),\n required=True,\n min_value=0,\n ),\n FieldSchema(\n name=\"success\",\n types=bool,\n required=True,\n ),\n FieldSchema(\n name=\"items\",\n types=int,\n required=False,\n min_value=0,\n ),\n FieldSchema(\n name=\"tokens_used\",\n types=int,\n required=False,\n min_value=0,\n ),\n FieldSchema(\n name=\"details\",\n types=(dict, type(None)),\n required=False,\n nullable=True,\n ),\n ],\n)\n\n\nRUN_RESULT_SCHEMA = Schema(\n name=\"run_result\",\n fields=[\n FieldSchema(\n name=\"run_id\",\n types=str,\n required=True,\n min_length=1,\n ),\n FieldSchema(\n name=\"scenario_id\",\n types=str,\n required=True,\n min_length=1,\n ),\n FieldSchema(\n name=\"category\",\n types=str,\n required=False,\n ),\n FieldSchema(\n name=\"start_time\",\n types=str,\n required=True,\n custom_validator=is_iso_timestamp,\n ),\n FieldSchema(\n name=\"end_time\",\n types=str,\n required=True,\n custom_validator=is_iso_timestamp,\n ),\n FieldSchema(\n name=\"config\",\n types=dict,\n required=False,\n ),\n FieldSchema(\n name=\"events\",\n types=list,\n required=False,\n ),\n FieldSchema(\n name=\"result\",\n types=dict,\n required=True,\n ),\n ],\n)\n\n\nCORE_BENCHMARK_SCHEMA = Schema(\n name=\"core_benchmark\",\n fields=[\n FieldSchema(\n name=\"timestamp\",\n types=str,\n required=True,\n custom_validator=is_iso_timestamp,\n ),\n FieldSchema(\n name=\"scales\",\n types=dict,\n required=False,\n ),\n FieldSchema(\n name=\"results\",\n types=list,\n required=True,\n ),\n ],\n)\n\n\n# =============================================================================\n# Validator Class\n# =============================================================================\n\n\nclass DataValidator:\n \"\"\"\n Main validator class for benchmark data.\n\n Usage:\n validator = DataValidator()\n result = validator.validate_benchmark_result(record)\n if result.valid:\n print(\"Valid!\")\n else:\n for issue in result.errors:\n print(issue)\n \"\"\"\n\n def __init__(self):\n self._schemas: dict[str, Schema] = {\n \"benchmark_result\": BENCHMARK_RESULT_SCHEMA,\n \"run_result\": RUN_RESULT_SCHEMA,\n \"core_benchmark\": CORE_BENCHMARK_SCHEMA,\n }\n\n def register_schema(self, schema: Schema) -> None:\n \"\"\"Register a custom schema.\"\"\"\n self._schemas[schema.name] = schema\n\n def get_schema(self, name: str) -> Schema | None:\n \"\"\"Get a schema by name.\"\"\"\n return self._schemas.get(name)\n\n def validate(self, data: dict[str, Any], schema_name: str) -> ValidationResult:\n \"\"\"Validate data against a named schema.\"\"\"\n schema = self._schemas.get(schema_name)\n if not schema:\n return ValidationResult(\n valid=False,\n issues=[ValidationIssue(\n message=f\"Unknown schema: {schema_name}\",\n rule=\"schema_lookup\",\n )],\n )\n return schema.validate(data)\n\n def validate_benchmark_result(self, data: dict[str, Any]) -> ValidationResult:\n \"\"\"Validate a single benchmark result.\"\"\"\n return self.validate(data, \"benchmark_result\")\n\n def validate_run_result(self, data: dict[str, Any]) -> ValidationResult:\n \"\"\"Validate a run result record.\"\"\"\n return self.validate(data, \"run_result\")\n\n def validate_core_benchmark(self, data: dict[str, Any]) -> ValidationResult:\n \"\"\"Validate a core benchmark file.\"\"\"\n result = self.validate(data, \"core_benchmark\")\n\n # Also validate nested results\n if \"results\" in data and isinstance(data[\"results\"], list):\n for i, record in enumerate(data[\"results\"]):\n nested_result = self.validate_benchmark_result(record)\n for issue in nested_result.issues:\n issue.field = f\"results[{i}].{issue.field}\" if issue.field else f\"results[{i}]\"\n result = result.merge(nested_result)\n\n return result\n\n def validate_batch(\n self,\n records: list[dict[str, Any]],\n schema_name: str\n ) -> tuple[list[ValidationResult], dict[str, Any]]:\n \"\"\"\n Validate a batch of records.\n\n Returns:\n Tuple of (list of results, summary stats)\n \"\"\"\n results = []\n valid_count = 0\n invalid_count = 0\n total_issues = 0\n\n for record in records:\n result = self.validate(record, schema_name)\n results.append(result)\n if result.valid:\n valid_count += 1\n else:\n invalid_count += 1\n total_issues += len(result.issues)\n\n summary = {\n \"total\": len(records),\n \"valid\": valid_count,\n \"invalid\": invalid_count,\n \"total_issues\": total_issues,\n \"validation_rate\": valid_count / len(records) if records else 0,\n }\n\n return results, summary\n\n\n# =============================================================================\n# Validation Helpers\n# =============================================================================\n\n\ndef validate_json_structure(data: Any, expected_type: type = dict) -> ValidationResult:\n \"\"\"Validate that data has the expected JSON structure.\"\"\"\n if not isinstance(data, expected_type):\n return ValidationResult(\n valid=False,\n issues=[ValidationIssue(\n message=f\"Expected {expected_type.__name__}, got {type(data).__name__}\",\n rule=\"structure\",\n )],\n )\n return ValidationResult(valid=True)\n\n\ndef validate_non_empty(data: dict[str, Any], fields: list[str]) -> list[ValidationIssue]:\n \"\"\"Validate that specified fields are non-empty.\"\"\"\n issues = []\n for field_name in fields:\n value = data.get(field_name)\n if value is not None and not value: # Empty string, list, dict, etc.\n issues.append(ValidationIssue(\n message=f\"Field cannot be empty\",\n field=field_name,\n value=value,\n rule=\"non_empty\",\n ))\n return issues\n\n\ndef validate_cross_field(\n data: dict[str, Any],\n field1: str,\n field2: str,\n comparator: Callable[[Any, Any], bool],\n error_message: str,\n) -> list[ValidationIssue]:\n \"\"\"Validate a relationship between two fields.\"\"\"\n if field1 in data and field2 in data:\n if not comparator(data[field1], data[field2]):\n return [ValidationIssue(\n message=error_message,\n rule=\"cross_field\",\n )]\n return []\n\n\ndef validate_numeric_range(\n value: int | float,\n min_val: float | None = None,\n max_val: float | None = None,\n field_name: str = \"value\",\n) -> list[ValidationIssue]:\n \"\"\"Validate that a numeric value is within a specified range.\"\"\"\n issues = []\n if min_val is not None and value \u003c min_val:\n issues.append(ValidationIssue(\n message=f\"Value {value} is below minimum {min_val}\",\n field=field_name,\n value=value,\n rule=\"range\",\n ))\n if max_val is not None and value > max_val:\n issues.append(ValidationIssue(\n message=f\"Value {value} exceeds maximum {max_val}\",\n field=field_name,\n value=value,\n rule=\"range\",\n ))\n return issues\n\n\ndef validate_string_format(\n value: str,\n pattern: str,\n field_name: str = \"value\",\n format_name: str = \"pattern\",\n) -> list[ValidationIssue]:\n \"\"\"Validate that a string matches a specific format pattern.\"\"\"\n if not re.match(pattern, value):\n return [ValidationIssue(\n message=f\"Value does not match expected {format_name} format\",\n field=field_name,\n value=value,\n rule=\"format\",\n suggestion=f\"Value should match pattern: {pattern}\",\n )]\n return []\n\n\ndef validate_list_items(\n items: list[Any],\n item_validator: Callable[[Any, int], list[ValidationIssue]],\n field_name: str = \"items\",\n) -> list[ValidationIssue]:\n \"\"\"Validate each item in a list using a custom validator.\"\"\"\n issues = []\n for i, item in enumerate(items):\n item_issues = item_validator(item, i)\n for issue in item_issues:\n issue.field = f\"{field_name}[{i}].{issue.field}\" if issue.field else f\"{field_name}[{i}]\"\n issues.extend(item_issues)\n return issues\n\n\ndef validate_consistency(\n data: dict[str, Any],\n rules: list[tuple[str, Callable[[dict[str, Any]], bool], str]],\n) -> list[ValidationIssue]:\n \"\"\"\n Validate data consistency using multiple rules.\n\n Args:\n data: The data to validate\n rules: List of (rule_name, check_function, error_message) tuples\n\n Returns:\n List of validation issues for failed rules\n \"\"\"\n issues = []\n for rule_name, check_fn, error_msg in rules:\n try:\n if not check_fn(data):\n issues.append(ValidationIssue(\n message=error_msg,\n rule=rule_name,\n severity=ValidationSeverity.WARNING,\n ))\n except Exception as e:\n issues.append(ValidationIssue(\n message=f\"Consistency check '{rule_name}' failed: {e}\",\n rule=rule_name,\n severity=ValidationSeverity.WARNING,\n ))\n return issues\n\n\ndef create_benchmark_consistency_validator() -> Callable[[dict[str, Any]], list[ValidationIssue]]:\n \"\"\"Create a validator for benchmark result consistency checks.\"\"\"\n def validator(data: dict[str, Any]) -> list[ValidationIssue]:\n rules = [\n (\n \"success_items_consistency\",\n lambda d: d.get(\"success\", True) or d.get(\"items\", 0) == 0,\n \"Failed operations should have 0 items processed\",\n ),\n (\n \"duration_positive\",\n lambda d: d.get(\"duration_ms\", 0) >= 0,\n \"Duration cannot be negative\",\n ),\n (\n \"tokens_reasonable\",\n lambda d: d.get(\"tokens_used\", 0) \u003c= 1_000_000,\n \"Token usage seems unreasonably high (>1M)\",\n ),\n (\n \"throughput_reasonable\",\n lambda d: (d.get(\"duration_ms\", 1) == 0 or\n d.get(\"items\", 0) / (d.get(\"duration_ms\", 1) / 1000) \u003c= 100_000),\n \"Throughput seems unreasonably high (>100K items/sec)\",\n ),\n ]\n return validate_consistency(data, rules)\n return validator\n","content_type":"text/x-python; charset=utf-8","language":"python","size":22560,"content_sha256":"18f6287463636616a3d651001a560f611dc255a5ee20cf237eed291899069ca7"},{"filename":"benchmarks/results/agent_a_decision.md","content":"# Decision A: Architecture Decision Record Summary\n\n## Retrieved ADR Context\n\nBased on the shared knowledge retrieval memory, I have identified the following relevant architecture decisions:\n\n### ADR-001: Database Selection\n- **Decision**: Use PostgreSQL over MySQL\n- **Context**: Need JSONB support for flexible schema requirements\n- **Date**: 2024-06\n- **Rationale**: PostgreSQL's native JSONB support provides better performance for semi-structured data queries compared to MySQL's JSON type\n\n### ADR-007: Architecture Pattern\n- **Decision**: Monolith architecture (for now)\n- **Context**: Team size \u003c 10 developers\n- **Date**: 2024-09\n- **Rationale**: With a small team, the operational overhead of microservices outweighs the benefits. Monolith allows faster iteration and simpler deployment.\n\n## Decision A Recommendation\n\nFor the current project state, I recommend:\n\n1. **Continue with PostgreSQL** - The JSONB capabilities are essential for our data model flexibility\n2. **Maintain monolith architecture** - Team size hasn't significantly changed; premature decomposition would add complexity without proportional benefit\n3. **Plan for future extraction** - Identify bounded contexts within the monolith that could become services when team scales past 15-20 developers\n\n---\n*Generated by agent_a based on AVM knowledge retrieval*\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1339,"content_sha256":"2574afbc54b717c029193935d501aa8dfb83d84386d2a5e07cd9d05656032953"},{"filename":"benchmarks/results/agent_b_decision.md","content":"# Decision B: Alternative Architecture Recommendation\n\n## Retrieved ADRs Reviewed\n\n- **ADR-001**: PostgreSQL selected for JSONB support (2024-06)\n- **ADR-007**: Monolith chosen due to team size \u003c 10 (2024-09)\n\n## Conflicting Recommendation\n\nBased on the same ADRs, I recommend **reconsidering these decisions**:\n\n### Database: Consider MongoDB Instead\nWhile ADR-001 chose PostgreSQL for JSONB support, MongoDB provides:\n- Native document storage (no JSONB workaround needed)\n- Better horizontal scaling for future growth\n- More flexible schema evolution\n- The JSONB requirement suggests document-centric data that fits MongoDB naturally\n\n### Architecture: Prepare for Microservices\nADR-007's monolith decision may be short-sighted:\n- Team size is a temporary constraint\n- Monolith-to-microservices migration is expensive\n- Starting with well-defined service boundaries (even in a monolith) is prudent\n- Consider \"modular monolith\" as compromise\n\n## Rationale for Dissent\n\nThe original ADRs optimize for **current state** rather than **future trajectory**. If the team expects growth, locking in PostgreSQL and monolithic architecture creates technical debt.\n\n## Decision\n\n**Recommend revisiting ADR-001 and ADR-007** before major development milestones.\n\n---\n*Agent B - Conflicting Analysis*\n*Generated: 2026-03-23*\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1316,"content_sha256":"47482c02a53a30470f05d86eac0737f66331baa6d632b4b4ae7b98d5ec1e5be3"},{"filename":"benchmarks/results/avm_features.json","content":"{\n \"timestamp\": \"2026-03-23T14:05:44.644970+00:00\",\n \"results\": {\n \"delta\": {\n \"baseline\": {\n \"tokens_read\": 425,\n \"task_tokens\": 623,\n \"total\": 1048,\n \"found_issues\": true,\n \"output_preview\": \"Current active issues in the last 24 hours:\\n\\n- `CRITICAL`: Database replication lag, logged on `2026-03-23`; status appears active because it is still marked \\u201cinvestigating\\u201d.\\n- `URGENT`: API rate limi\"\n },\n \"avm\": {\n \"tokens_read\": 123,\n \"task_tokens\": 421,\n \"total\": 544,\n \"found_issues\": true,\n \"output_preview\": \"- `CRITICAL`: Database replication lag detected on 2026-03-23, still marked as \\u201cinvestigating\\u201d.\\n\\nThe 2026-03-22 API rate limit incident is not clearly confirmed as still active from the provided logs,\"\n }\n },\n \"digest\": {\n \"baseline\": {\n \"tokens_read\": 471,\n \"task_tokens\": 678,\n \"total\": 1149,\n \"output_preview\": \"This project is a web application for task management, with user authentication, task CRUD, team collaboration, and notifications. The stack is React + TypeScript on the frontend, Python FastAPI on the backend, PostgreSQL for the database, Redis for caching, and RabbitMQ for queueing. The current pr\"\n },\n \"avm\": {\n \"tokens_read\": 191,\n \"task_tokens\": 583,\n \"total\": 774,\n \"output_preview\": \"This project is a web application for task management, with user authentication, task CRUD, team collaboration, and notifications. The stack is React + TypeScript on the frontend, Python FastAPI on the backend, with PostgreSQL, Redis, and RabbitMQ supporting data, caching, and queueing. The current \"\n }\n },\n \"compact_recovery\": {\n \"baseline\": {\n \"tokens_recalled\": 0,\n \"task_tokens\": 234,\n \"total\": 234,\n \"has_specific_answer\": false,\n \"details_found\": {\n \"file\": true,\n \"line\": false,\n \"fix\": false\n },\n \"output_preview\": \"I can\\u2019t specify the exact file line and patch from the current workspace because `/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/tmpv8gfuh_8` is empty.\\n\\nI checked:\\n- `pwd` -> `/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/tmpv8gfuh_8`\\n- `ls -la` -> only `.` and `..`\\n- no `connec\"\n },\n \"avm\": {\n \"tokens_recalled\": 286,\n \"task_tokens\": 619,\n \"total\": 905,\n \"has_specific_answer\": false,\n \"details_found\": {\n \"file\": false,\n \"line\": false,\n \"fix\": true\n },\n \"output_preview\": \"From the notes, the code change is:\\n\\n```python\\nexcept TimeoutError:\\n if conn:\\n conn.close()\\n raise\\n```\\n\\nThis belongs in the `get_connection()` method, inside the `except TimeoutError` block. The missing `conn.close()` guard is the leak fix.\\n\\nI can\\u2019t truthfully give an exact file path an\"\n }\n },\n \"semantic\": {\n \"baseline\": {\n \"tokens_read\": 0,\n \"task_tokens\": 484,\n \"total\": 484,\n \"found_relevant\": true,\n \"details\": {\n \"ssl\": true,\n \"jwt\": true,\n \"session\": true\n },\n \"output_preview\": \"I can\\u2019t base this on your actual notes because the workspace at [`/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/tmpv9p8i16b`](/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/tmpv9p8i16b) is empty, so there are no note files to inspect.\\n\\nCommon login failure causes and fixes:\\n\\n- W\"\n },\n \"avm\": {\n \"tokens_read\": 159,\n \"task_tokens\": 532,\n \"total\": 691,\n \"found_relevant\": true,\n \"details\": {\n \"ssl\": false,\n \"jwt\": true,\n \"session\": false\n },\n \"output_preview\": \"A likely cause is that the client is sending the JWT without the required `Bearer ` prefix in the `Authorization` header.\\n\\nCorrect:\\n```http\\nAuthorization: Bearer \u003cjwt>\\n```\\n\\nWrong:\\n```http\\nAuthorization: \u003cjwt>\\n```\\n\\nPossible causes and solutions:\\n- Missing `Bearer ` prefix\\n Solution: Update the clien\"\n }\n }\n }\n}","content_type":"application/json; charset=utf-8","language":"json","size":4149,"content_sha256":"312d23e453e7e5ecea9fc98022a091d05885b7c4ef1b944a05c39e5c6f03dfdf"},{"filename":"benchmarks/results/cc-001_051db298.json","content":"{\n \"run_id\": \"051db298-9f5a-4cf7-961f-5a60696bb65a\",\n \"scenario_id\": \"cc-001\",\n \"category\": \"unknown\",\n \"start_time\": \"2026-03-23T13:20:12.340419+00:00\",\n \"end_time\": \"2026-03-23T13:21:34.275241+00:00\",\n \"config\": {\n \"agent\": \"codex\",\n \"avm_enabled\": false\n },\n \"events\": [\n {\n \"timestamp\": \"2026-03-23T13:20:12.340872+00:00\",\n \"agent\": \"architect\",\n \"action\": \"start\",\n \"details\": {\n \"role\": \"Design API structure and data models\"\n },\n \"tokens_used\": 0,\n \"latency_ms\": 0\n },\n {\n \"timestamp\": \"2026-03-23T13:20:41.933962+00:00\",\n \"agent\": \"architect\",\n \"action\": \"llm_call\",\n \"details\": {\n \"output_preview\": \"I couldn\\u2019t write to [code.py](/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/bench_q1q2b58i/code.py) because the workspace is read-only. Thi\",\n \"success\": true\n },\n \"tokens_used\": 645,\n \"latency_ms\": 29576.38692855835\n },\n {\n \"timestamp\": \"2026-03-23T13:20:41.934008+00:00\",\n \"agent\": \"implementer\",\n \"action\": \"start\",\n \"details\": {\n \"role\": \"Write the actual code\"\n },\n \"tokens_used\": 0,\n \"latency_ms\": 0\n },\n {\n \"timestamp\": \"2026-03-23T13:21:13.211638+00:00\",\n \"agent\": \"implementer\",\n \"action\": \"llm_call\",\n \"details\": {\n \"output_preview\": \"I couldn\\u2019t write to [code.py](/private/var/folders/2h/m6vg_6ps0nx5ldbj_xr55hlh0000gn/T/bench_q1q2b58i/code.py) because the workspace is read-only. Thi\",\n \"success\": true\n },\n \"tokens_used\": 668,\n \"latency_ms\": 31276.795148849487\n },\n {\n \"timestamp\": \"2026-03-23T13:21:13.211697+00:00\",\n \"agent\": \"tester\",\n \"action\": \"start\",\n \"details\": {\n \"role\": \"Write unit tests\"\n },\n \"tokens_used\": 0,\n \"latency_ms\": 0\n },\n {\n \"timestamp\": \"2026-03-23T13:21:34.274735+00:00\",\n \"agent\": \"tester\",\n \"action\": \"llm_call\",\n \"details\": {\n \"output_preview\": \"I couldn\\u2019t write tests in this read-only workspace. Put this in `test_code.py`:\\n\\n```python\\nimport importlib\\n\\nimport pytest\\n\\n\\[email protected]\\ndef clien\",\n \"success\": true\n },\n \"tokens_used\": 679,\n \"latency_ms\": 21062.40487098694\n }\n ],\n \"result\": {\n \"success\": true,\n \"mode\": \"baseline\",\n \"token_breakdown\": {\n \"avm_overhead\": 0,\n \"task_tokens\": 1992,\n \"total\": 1992\n },\n \"summary\": {\n \"total_tokens\": 1992,\n \"total_latency_ms\": 81915.58694839478,\n \"memory_operations\": 0,\n \"event_count\": 6\n }\n }\n}","content_type":"application/json; charset=utf-8","language":"json","size":2607,"content_sha256":"348547fb64ae1218c873c98d19f7592366ed30883ee9d5b451a08476a90be62c"},{"filename":"benchmarks/results/cc-001_ec20cb28.json","content":"{\n \"run_id\": \"ec20cb28-bcd4-433e-9bc9-73cd94b4dbff\",\n \"scenario_id\": \"cc-001\",\n \"category\": \"unknown\",\n \"start_time\": \"2026-03-23T11:08:37.756662+00:00\",\n \"end_time\": \"2026-03-23T11:08:37.756842+00:00\",\n \"config\": {\n \"avm_enabled\": true,\n \"gossip_enabled\": false,\n \"dry_run\": true\n },\n \"events\": [\n {\n \"timestamp\": \"2026-03-23T11:08:37.756837+00:00\",\n \"agent\": \"system\",\n \"action\": \"dry_run\",\n \"details\": {\n \"scenario\": \"cc-001\"\n },\n \"tokens_used\": 0,\n \"latency_ms\": 0\n }\n ],\n \"result\": {\n \"success\": true,\n \"dry_run\": true,\n \"summary\": {\n \"total_tokens\": 0,\n \"total_latency_ms\": 0,\n \"memory_operations\": 0,\n \"event_count\": 1\n }\n }\n}","content_type":"application/json; charset=utf-8","language":"json","size":733,"content_sha256":"a86adb7b185e17a319f32d283812afcb4e434e07fb78fcf5cda794077f8140b4"},{"filename":"benchmarks/results/conflict_resolution.md","content":"# Conflict Resolution\n\n## Summary\nResolved conflict between Agent A (uphold ADRs) and Agent B (revisit ADRs).\n\n## Decisions\n\n| Topic | Resolution | Rationale |\n|-------|------------|-----------|\n| **Database (ADR-001)** | **Uphold PostgreSQL** | Migration cost outweighs benefits; JSONB is sufficient for current needs |\n| **Architecture (ADR-007)** | **Adopt modular monolith** | Compromise: maintains simplicity while preparing service boundaries |\n\n## Reasoning\n\n- **PostgreSQL stays**: Agent B's MongoDB argument assumes future scale that isn't guaranteed. PostgreSQL handles document workloads well.\n- **Modular monolith**: Agent B's valid concern about migration cost is addressed by defining bounded contexts now, not full microservices.\n\n---\n*Resolved by resolver agent | 2026-03-23*\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":792,"content_sha256":"6731b9ba18f5000bb90a5b47aa9b5d8844a9f5df6e9a76f854c3a1958eb315e9"},{"filename":"benchmarks/results/PROJECT_DECISIONS.md","content":"# AVM Benchmark Project Decisions\n\n**Created**: 2026-03-23\n**Last Updated**: 2026-03-23\n**Status**: Active Development\n\n## Project Overview\n\nThis benchmark suite evaluates the AVM (Agent Virtual Memory) system for multi-agent collaboration scenarios. The project measures agent efficiency, memory retrieval performance, and collaboration quality.\n\n## Key Architectural Decisions\n\n### 1. Benchmark Categories\n\n**Decision**: Focus on four core benchmark categories:\n- Knowledge Retrieval - Memory precision/recall testing\n- Collaborative Coding - Multi-agent code development\n- Information Sync - Gossip protocol and knowledge transfer\n- Context Accumulation - Long-context memory consolidation\n\n**Rationale**: These categories cover the primary use cases for AVM and allow comparison with existing benchmarks (MARBLE, AWS Multi-Agent, AgentBench).\n\n### 2. Scenario Dataset Structure\n\n**Decision**: Use JSON-based scenario definitions with the following structure:\n- `scenario_id`: Unique identifier\n- `category`: Benchmark category\n- `agents`: List of participating agents with roles\n- `assertions`: Validation criteria\n- `time_limit_seconds`: Execution timeout\n\n**Current State**: 48 scenarios across 8 JSON files in `/scenarios/`\n\n### 3. Metrics System\n\n**Decision**: Track these primary metrics:\n| Metric | Purpose |\n|--------|---------|\n| Task Success Rate | Binary completion + LLM judge |\n| Time to Complete | Wall-clock time |\n| Token Efficiency | tokens / task score |\n| Memory Precision | retrieved_relevant / retrieved_total |\n| Memory Recall | retrieved_relevant / total_relevant |\n\n### 4. Ablation Study Design\n\n**Decision**: Test four configurations:\n1. Baseline (no AVM, no Gossip, no Consolidation)\n2. +AVM only\n3. +AVM +Gossip\n4. Full (AVM + Gossip + Consolidation)\n\n**Rationale**: Isolates the contribution of each AVM component.\n\n### 5. Agent Execution Framework\n\n**Decision**: Use `agent_executor.py` as the central execution harness with support for:\n- Heterogeneous agents (Claude, Codex, etc.)\n- Parallel and sequential execution modes\n- Event logging with JSON format\n\n### 6. Notification Service Architecture\n\n**Decision**: Implement a Redis-backed notification service with:\n- Pydantic-based configuration (`config.py`) with `NOTIF_` env prefix\n- Circuit breaker pattern for fault tolerance (5 failure threshold, 30s recovery)\n- Retry logic with exponential backoff (3 retries, 0.5s base delay)\n- Lazy Redis client initialization with connection pooling\n\n**Rationale**: Production-grade resilience for multi-agent communication infrastructure.\n\n## Current Progress\n\n### Completed\n- Comprehensive benchmark dataset (48 scenarios)\n- Core benchmark suite (6 features x 3 scales)\n- AVM advanced features tests (delta sync, token-aware recall, semantic search)\n- Extreme 4-agent collaboration test (Codex + Claude Opus)\n- Heterogeneous agent support\n- Notification service with circuit breaker (`redis_client.py`)\n- Configuration system with Pydantic validation (`config.py`)\n- Claude-only benchmark (10 scenarios, baseline vs AVM comparison)\n\n### In Progress\n- `notification_service/config.py` - Configuration updates (staged)\n- `notification_service/redis_client.py` - Redis queue client with circuit breaker (modified)\n\n### Pending\n- Full ablation study execution\n- Statistical analysis\n- Visualization and reporting\n\n## Technical Debt\n\n1. Modified files not yet committed:\n - `notification_service/config.py` (staged)\n - `notification_service/redis_client.py` (modified)\n\n2. **Multi-agent isolation failure** - Core benchmark reveals agents can see other agents' private memories:\n - `isolation_check` tests fail at all scales (small/medium/large)\n - `found_other_secret: true` in all isolation tests\n - **Priority: HIGH** - Security concern for production use\n\n3. **Semantic search limited accuracy** - Only 1/3 queries finding related content:\n - \"machine is slow\" → not finding \"performance issues\"\n - \"customer cannot sign in\" → not finding \"authentication errors\"\n - \"too many API calls\" → correctly finding \"rate limiting\"\n\n4. **Discovery list_private failures** - `list_private` operation fails at all scales\n\n## Integration Points\n\n### External Systems\n- Redis for message queue operations\n- LLM APIs (Claude, Codex) for agent execution\n- AVM system for memory operations\n\n### Configuration\n- Environment variables with `NOTIF_` prefix\n- Pydantic-based settings validation\n- Circuit breaker pattern for fault tolerance\n\n## Key Findings from Claude-Only Benchmark\n\n**Test Run**: 10 scenarios, 2026-03-23\n\n| Metric | Baseline | AVM | Analysis |\n|--------|----------|-----|----------|\n| Success Rate | 9/10 (90%) | 7/10 (70%) | AVM introduces complexity |\n| Total Tokens | 7,262 | 4,338 | **40% reduction with AVM** |\n| AVM Overhead | - | 5,298 | Memory operations cost |\n\n**Notable Results**:\n- **cc-005 (Bug Fix)**: AVM saved 37% tokens (1446→913) while maintaining success\n- **cc-002 (CLI Tool)**: AVM saved 51% tokens (1301→633) with equal quality\n- **is-006 (Breaking News)**: AVM saved 38% tokens (2082→1280) - best collaboration scenario\n- **cc-007 (Legacy Refactoring)**: AVM failed (311 tokens vs baseline 1304) - complex multi-step task\n\n**Conclusion**: AVM shows significant token efficiency gains (40% average) but may reduce reliability on complex multi-step collaborative tasks.\n\n## Open Questions\n\n1. ~~Should we expand the scenario dataset beyond 48 scenarios?~~ **Deferred** - Current dataset sufficient for initial analysis\n2. What's the target for memory precision/recall metrics? **Suggested**: >80% for production readiness\n3. How should we weight different metrics in the final score? **Proposed**: Success rate (50%), Token efficiency (30%), Time (20%)\n4. **NEW**: How to address multi-agent isolation failure before production use?\n5. **NEW**: Should semantic search use embedding similarity thresholds?\n\n## Next Steps & Priorities\n\n### High Priority\n1. **Fix multi-agent isolation** - Critical security issue (found_other_secret=true)\n2. **Commit pending changes** - `notification_service/config.py` and `redis_client.py`\n3. **Run full ablation study** - Execute all 4 configurations across 48 scenarios\n\n### Medium Priority\n4. **Improve semantic search** - Currently 33% hit rate on related concepts\n5. **Statistical analysis** - Calculate significance tests for AVM vs baseline\n6. **Generate visualizations** - Create charts for benchmark results\n\n### Low Priority\n7. **Fix discovery list_private** - Non-critical feature failure\n8. **Document findings** - Write technical report summarizing outcomes\n9. **Optimize execution** - Reduce benchmark runtime if needed\n\n## Benchmark Results Summary\n\n### Core Benchmark (6 features x 3 scales)\n\n| Feature | Small | Medium | Large | Status |\n|---------|-------|--------|-------|--------|\n| remember_recall | ✅ 77ms avg write | ✅ 76ms avg write | ✅ 80ms avg write | Working |\n| multi_agent | ❌ isolation fail | ❌ isolation fail | ❌ isolation fail | **BROKEN** |\n| semantic_search | ⚠️ 33% hit rate | ⚠️ 33% hit rate | ⚠️ 33% hit rate | Limited |\n| token_aware | ✅ within budget | ✅ within budget | ✅ within budget | Working |\n| delta_sync | ✅ working | ✅ working | ✅ working | Working |\n| discovery | ❌ list_private fail | ❌ list_private fail | ❌ list_private fail | Partial |\n\n### AVM vs Baseline Token Comparison\n\n| Scenario | Baseline Tokens | AVM Tokens | Change | Note |\n|----------|-----------------|------------|--------|------|\n| cc-001 (REST API) | 1,992 | 3,591 | +80% | AVM overhead dominates |\n| cc-005 (Bug Fix) | 1,169 | 2,662 | +128% | Early benchmark |\n| cc-005 (Claude-only) | 1,446 | 913 | **-37%** | Improved with context |\n| cc-002 (CLI Tool) | 1,301 | 633 | **-51%** | Best AVM efficiency |\n| is-006 (News Prop) | 2,082 | 1,280 | **-38%** | Strong collaboration |\n| kr-001 (Handoff) | 578 | 1,604 | +177% | Simple task overhead |\n| kr-004 (Error Pattern) | 238 | 686 | +188% | Simple task overhead |\n\n**Pattern**: AVM shows token savings on complex collaborative tasks but adds overhead on simple tasks.\n\n---\n\n*Last updated by agent_a - 2026-03-23*\n\n---\n\n## Agent B Continuation Notes (2026-03-23)\n\n### Work Received from Agent A:\n\n1. **PROJECT_DECISIONS.md** - Comprehensive documentation of:\n - 4 benchmark categories with rationale\n - 48 scenarios across 8 JSON files\n - Metrics system (success rate, tokens, memory precision/recall)\n - Ablation study design (4 configurations)\n - Claude-only benchmark results showing 40% token reduction with AVM\n\n2. **Notification Service** - Production-ready components:\n - `config.py` - Pydantic settings with `NOTIF_` prefix (staged)\n - `redis_client.py` - Circuit breaker + retry logic (modified)\n\n3. **Critical Issues Identified**:\n - Multi-agent isolation failure (`found_other_secret: true`)\n - Semantic search 33% hit rate\n - Discovery `list_private` failures\n\n### Agent B Actions:\n\n1. Reviewed all benchmark results (48 scenarios, core benchmark, claude-only)\n2. Verified notification service implementation quality\n3. Confirmed circuit breaker pattern implementation is correct:\n - States: closed → open → half-open → closed\n - Threshold: 5 failures\n - Recovery timeout: 30 seconds\n - Exponential backoff on retries\n\n### Recommendations for Next Agent:\n\n1. **Commit the pending changes** - Both `config.py` and `redis_client.py` are complete\n2. **Address isolation failure** - This is a blocking issue for production\n3. **Consider semantic search improvements** - Embedding similarity thresholds may help\n\n*Updated by agent_b - 2026-03-23*\n\n---\n\n## Night Agent Shift Summary (2026-03-23)\n\n### Shift Handoff Review\n\nVerified all prior work from agent_a and agent_b:\n\n1. **Code Review Complete**:\n - `redis_client.py` (192 lines) - Circuit breaker implementation is production-ready:\n - Three states properly implemented: closed → open → half-open → closed\n - Failure threshold (5) and recovery timeout (30s) are configurable\n - Exponential backoff on retries: `delay * (attempt + 1)`\n - Client reset on failures to prevent stale connections\n - All queue operations (enqueue/dequeue/blocking/requeue) protected by circuit breaker\n\n - `config.py` (47 lines) - Clean Pydantic settings with `NOTIF_` prefix:\n - New settings: `redis_connect_timeout`, `redis_socket_timeout`, `redis_max_retries`, `redis_retry_delay`\n - Circuit breaker settings: `circuit_breaker_threshold`, `circuit_breaker_timeout`\n\n - `k8s/configmap.yaml` - All 6 new environment variables added\n\n2. **Pending Changes Status**:\n - 3 modified files ready for commit\n - 1 untracked file (this document)\n - All changes are coherent and implement the same feature (circuit breaker + resilience)\n\n### Recommendations for Day Agent\n\n**Immediate Actions**:\n1. **Stage and commit** the pending changes:\n ```bash\n git add notification_service/config.py notification_service/k8s/configmap.yaml notification_service/redis_client.py results/PROJECT_DECISIONS.md\n git commit -m \"feat(notification): add circuit breaker and retry logic for Redis resilience\"\n ```\n\n2. **High-priority bug**: Multi-agent isolation failure needs investigation in AVM core, not benchmark code\n\n3. **Ready for ablation study**: All 48 scenarios and execution framework are in place\n\n**Technical Notes**:\n- Circuit breaker does NOT cover `clear()` method - intentional for testing purposes\n- `queue_length()` returns -1 when circuit is open (allows monitoring to detect degraded state)\n- Retry delay uses linear backoff, not exponential (0.5s, 1.0s, 1.5s) - sufficient for Redis\n\n*End of night shift - 2026-03-23*\n*Signed: night_agent*\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":11687,"content_sha256":"c52660f3b57426657f2d49423658bdabe0cf266ad9b398d72681344c1b1ab605"},{"filename":"benchmarks/todo_api/requirements.txt","content":"fastapi>=0.100.0\nuvicorn>=0.23.0\npydantic>=2.0.0\npytest>=7.0.0\nhttpx>=0.24.0\n","content_type":"text/plain; charset=utf-8","language":null,"size":77,"content_sha256":"5dc9b36766626e4868d7222f0a049a596a28b366e8ae34090fb137fb57812428"},{"filename":"CHANGELOG.md","content":"# Changelog\n\nAll notable changes to AVM will be documented in this file.\n\nThe format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),\nand this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).\n\n## [1.1.0] - 2026-03-09\n\n### Added\n- **Tell System**: Cross-agent messaging for important notifications\n - Priority levels: `urgent` (inject into next read), `normal`, `low`\n - Broadcast to all agents via `@all`\n - Automatic injection of urgent messages into file reads\n - `/:inbox` virtual file to view all messages\n - `/tell/\u003cagent>` path for sending messages\n - Expiration support and read tracking\n- **Hook System**: Notifications when tells are sent\n - Shell hooks: Execute command on tell\n - HTTP hooks: POST to webhook URL\n - OpenClaw hooks: Send via sessions_send\n - Config-driven via `hooks.yaml`\n- **Virtual Hook Files**: Configure hooks via filesystem\n - `/hooks/\u003cagent>` - Read/write hook configuration\n - `/hooks/:list` - List all registered hooks\n - `rm /hooks/\u003cagent>` - Delete a hook\n - Format: `type:target?enabled=true&timeout=10`\n- 35 new tests for tell + hook functionality (227 total)\n\n### Usage\n```bash\n# Send a tell (as akashi)\necho \"DB schema changed\" > avm/tell/kearsarge?priority=urgent\necho \"Team meeting\" > avm/tell/@all\n\n# Read inbox\ncat avm/:inbox\n\n# Mark all as read\ncat avm/:inbox?mark=read\n```\n\n### Hook Config Example\n```yaml\nhooks:\n kearsarge:\n on_tell:\n type: shell\n target: \"openclaw notify kearsarge\"\n yuze:\n on_tell:\n type: http\n target: \"http://localhost:3000/webhook\"\n```\n\n## [1.0.0] - 2026-03-06\n\n### Added\n- **Index Handler**: Structured project indexing with status tracking\n - Code signature extraction (Python, JS, Go, Rust)\n - Watch mode for auto-updates\n - Status tracking: clean/dirty/missing\n- **Config Handler**: Agent-writable configuration\n - Layered config: defaults → user → runtime\n - `/.config/` for settings, `/.meta/` for system info\n- **Duplicate Detection**: Write-time similarity check\n - `RememberResult` with `similar` field\n - Jaccard word overlap with FTS candidate retrieval\n- **Mount Daemon**: Background FUSE mount management\n - `avm-mount --daemon`, `stop`, `status`, `restart`\n\n### Changed\n- 69 tests (13 new handler tests)\n- Benchmark results: 89% token savings\n\n## [0.9.0] - 2026-03-05\n\n### Added\n- **FUSE Mount**: Mount AVM as a filesystem with `avm-mount`\n- **Virtual Nodes**: Access metadata via `:meta`, `:links`, `:tags`, `:search`, `:recall`\n- **Renamed**: Project renamed from VFS to AVM\n- **CLI**: New commands `avm`, `avm-mcp`, `avm-mount`\n\n### Changed\n- Package renamed from `vfs` to `avm`\n- Default DB path: `~/.local/share/avm/avm.db` (XDG standard)\n\n## [0.8.0] - 2026-03-05\n\n### Added\n- **Two-pe retrieval**: `avm_browse` + `avm_fetch` for token efficiency\n- 75% token savings on large result sets\n\n## [0.7.0] - 2026-03-05\n\n### Added\n- **MCP Server**: 10 tools for AI agent integration\n- **Linux-style permissions**: rwx bits, ownership, capabilities\n- **API key authentication** for skills\n\n## [0.6.0] - 2026-03-05\n\n### Added\n- Advanced features: subscriptions, decay, compaction\n- Semantic deduplication\n- Derived links\n- Time queries\n- Tag system\n- Access statistics\n- Export/import (JSONL, Markdown)\n- Snapshots\n- Sync to directory\n\n## [0.5.0] - 2026-03-05\n\n### Added\n- Multi-agent support\n- Append-only versioning\n- Audit logging\n- Quota enforcement\n- Namespace permissions\n\n## [0.4.0] - 2026-03-05\n\n### Added\n- Agent Memory with token-aware recall\n- Scoring strategies (balanced, importance, recency, relevance)\n- Compact markdown synthesis\n\n## [0.3.0] - 2026-03-05\n\n### Added\n- Linked retrieval\n- Document synthesis\n- Semantic + FTS + graph expansion\n\n## [0.2.0] - 2026-03-05\n\n### Added\n- Config-driven architecture\n- YAML configuration\n- Pluggable handlers\n\n## [0.1.0] - 2026-03-05\n\n### Added\n- Core AVM functionality\n- SQLite storage with FTS5\n- Knowledge graph (edges)\n- Read/write/search/link operations\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":3999,"content_sha256":"38986a62cfe0e403885f569ec2a775b1a6c304428396ee53a3fc43881a0f534a"},{"filename":"CODE_OF_CONDUCT.md","content":"# Code of Conduct\n\n## Our Pledge\n\nWe pledge to make participation in our project a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\n**Positive behavior:**\n- Using welcoming and inclusive language\n- Being respectful of differing viewpoints\n- Gracefully accepting constructive criticism\n- Focusing on what is best for the community\n\n**Unacceptable behavior:**\n- Trolling, insulting comments, and personal attacks\n- Public or private harassment\n- Publishing others' private information without permission\n- Other conduct which could reasonably been considered inappropriate\n\n## Enforcement\n\nProject maintainers are responsible for clarifying standards of acceptable behavior and will take appropriate action in response to any instances of unacceptable behavior.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.0.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1082,"content_sha256":"6fdc419bb0e8ba274559545839da6a3e04a5efaceab227d0ba9adb777b6dcdd8"},{"filename":"CONTRIBUTING.md","content":"# Contributing to AVM\n\nThank you for your interest in contributing to AVM! 🎉\n\n## Getting Started\n\n1. Fork the repository\n2. Clone your fork:\n ```bash\n git clone https://github.com/YOUR_USERNAME/avm.git\n cd avm\n ```\n3. Install in development mode:\n ```bash\n pip install -e \".[dev]\"\n ```\n4. Run tests:\n ```bash\n pytest\n ```\n\n## Development Workflow\n\n1. Create a branch for your feature:\n ```bash\n git checkout -b feature/your-feature-name\n ```\n\n2. Make your changes and write tests\n\n3. Run tests and ensure they pass:\n ```bash\n pytest -v\n ```\n\n4. Commit with a clear message:\n ```bash\n git commit -m \"feat: add your feature description\"\n ```\n\n5. Push and create a Pull Request\n\n## Commit Messages\n\nWe follow [Conventional Commits](https://www.conventionalcommits.org/):\n\n- `feat:` New feature\n- `fix:` Bug fix\n- `docs:` Documentation only\n- `refactor:` Code refactoring\n- `test:` Adding tests\n- `chore:` Maintenance\n\n## Code Style\n\n- Use type hints\n- Write docstrings for public functions\n- Keep functions focused and small\n- Comments in English\n\n## Project Structure\n\n```\navm/\n├── __init__.py # Public API\n├── core.py # VFS core class\n├── store.py # SQLite storage\n├── agent_memory.py # Token-aware recall\n├── fuse_mount.py # FUSE filesystem\n├── mcp_server.py # MCP protocol server\n├── handlers.py # Pluggable handlers\n├── permissions.py # Linux-style permissions\n└── providers/ # Data providers\n```\n\n## Adding a New Handler\n\n```python\nfrom avm import BaseHandler, register_handler\n\nclass MyHandler(BaseHandler):\n def read(self, path, context):\n # Your implementation\n return content\n \n def write(self, path, content, context):\n # Your implementation\n return True\n\nregister_handler('myhandler', MyHandler)\n```\n\n## Adding a New MCP Tool\n\nEdit `avm/mcp_server.py`:\n\n1. Add to `self.tools` in `__init__`\n2. Add tool definition in `get_tool_definitions()`\n3. Implement `_tool_yourname()` method\n\n## Questions?\n\nOpen an issue or start a discussion. We're happy to help!\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":2138,"content_sha256":"cfaff7b39a81da6c3519fba06c128e73a8b42b50670120f735c4d4f4db80429e"},{"filename":"Dockerfile","content":"FROM python:3.12-slim\n\nWORKDIR /app\n\n# Install FUSE (for Linux FUSE support)\nRUN apt-get update && apt-get install -y \\\n fuse3 \\\n libfuse3-dev \\\n && rm -rf /var/lib/apt/lists/*\n\n# Install AVM\nCOPY . .\nRUN pip install -e \".[server]\"\n\n# Default: run HTTP API server (no FUSE in Docker)\nEXPOSE 8765\nENV AVM_AGENT=default\n\nCMD [\"avm-serve\", \"--host\", \"0.0.0.0\", \"--port\", \"8765\"]\n","content_type":"text/plain; charset=utf-8","language":"docker","size":385,"content_sha256":"eb95cfe4e241fe9a7745a189ada63aefa19fec6cf148f08d143fd8c54b5c21bb"},{"filename":"README.md","content":"# AVM - AI Virtual Memory\n\n> **AVM** — 面向多 Agent 的本地零成本共享记忆系统。语义搜索,FUSE 挂载,私有+共享隔离。\n\n## Core Value\n\n- **面向多 Agent 的本地共享记忆** — 多个 Agent 共享同一记忆层,私有空间互不干扰\n- **零成本** — 本地 sentence-transformers (all-MiniLM-L6-v2),无需任何 API key,无网络依赖\n- **语义搜索** — 不是关键词匹配,是向量相似度。\"伊朗军事冲突\" 能找到 \"中东局势紧张\"\n- **FUSE 挂载** — `cat`/`echo`/`ls` 直接操作记忆,shell 脚本和任何工具都能用\n- **多 Agent 隔离** — 私有空间 (`/private/`) + 共享空间 (`/memory/shared/`),协作不混淆\n\n## Why You Need AVM\n\n**The Problem:** LLMs forget everything between sessions. Context windows are limited. RAG retrieves chunks, not structured knowledge.\n\n**AVM solves this:**\n\n| Challenge | Without AVM | With AVM |\n|-----------|-------------|----------|\n| **Multi-agent sync** | Copy-paste, version chaos | Shared namespaces, `:delta` for changes |\n| **Memory isolation** | All-or-nothing access | Private + shared, per-agent permissions |\n| **Context limits** | Fixed window, truncate | Token-aware recall, fit any budget |\n| **Knowledge structure** | Flat vector chunks | Linked graph, typed relationships |\n| **Discovery** | Need exact keywords | Semantic search + browse/explore/timeline |\n\n**Real examples:**\n\n```python\n# Trading agent remembers across sessions\ntrader.remember(\"NVDA RSI at 72, overbought\", importance=0.9, tags=[\"market\"])\n# 3 months later...\ntrader.recall(\"what did I observe about NVDA?\", max_tokens=500)\n\n# Agent forgets what it knows\ntrader.topics() # \"technical: 12, crypto: 8, macro: 5\"\ntrader.timeline(7) # \"Mon: BTC signal, Tue: Fed notes...\"\n\n# Multi-agent collaboration\nanalyst.remember(\"SPY pattern\", namespace=\"shared\")\ntrader.recall(\"market patterns\") # sees analyst's shared memory\n```\n\n## When to Use AVM\n\n**Best for:**\n- 📦 **Shared knowledge** — Company docs, cron configs, market analysis that multiple agents access\n- 🤝 **Multi-agent collaboration** — Agent A writes analysis, Agent B recalls it\n- 🔄 **Incremental sync** — Read only changes since last read with `:delta`\n- 🗂️ **External references** — Paths, schedules, entity descriptions (not file content itself)\n\n**Not needed for:**\n- 🔒 **Private agent memory** — Most agent frameworks have built-in memory tools\n- 📄 **Code indexing** — IDEs and LSP do this better\n- 📝 **Ephemeral notes** — Use TTL or just don't store\n\n**Rule of thumb:** If only one agent needs it, use `/private/` (auto-scoped to your agent). If multiple agents need it, put it in `/memory/shared/`.\n\n## AVM vs MemGPT\n\n| | **MemGPT/Letta** | **AVM** |\n|---|---|---|\n| **Philosophy** | LLM manages its own memory | Explicit API, you control |\n| **Memory decisions** | LLM decides when to store/retrieve | Agent calls `remember()`/`recall()` |\n| **Architecture** | Agent framework | Pure storage layer |\n| **LLM dependency** | Needs LLM for every memory op | No LLM needed |\n| **Multi-agent** | Single agent focus | Built-in isolation + sharing |\n| **Interface** | Python SDK | FUSE mount, MCP, CLI, Python |\n| **Integration** | Self-contained | Works with shell, editors, any tool |\n\n**Analogy:**\n- MemGPT = **Autopilot** (LLM drives)\n- AVM = **Manual transmission** (you drive)\n\n**When to use which:**\n- **MemGPT**: Want autonomous memory, single agent, hands-off\n- **AVM**: Want explicit control, multi-agent, integrate with existing tools\n\n**They can work together:** Use AVM as storage backend, add MemGPT-style logic on top for automatic memory management.\n\n\u003cdetails>\n\u003csummary>\u003cb>🎮 See it in action (click to expand)\u003c/b>\u003c/summary>\n\n```\n ╔═══════════════════════════════════════════════════════════╗\n ║ █████╗ ██╗ ██╗███╗ ███╗ ║\n ║ ██╔══██╗██║ ██║████╗ ████║ ║\n ║ ███████║██║ ██║██╔████╔██║ ║\n ║ ██╔══██║╚██╗ ██╔╝██║╚██╔╝██║ ║\n ║ ██║ ██║ ╚████╔╝ ██║ ╚═╝ ██║ ║\n ║ AI Virtual Memory - Playground ║\n ╚═══════════════════════════════════════════════════════════╝\n\n============================================================\n 1. BASIC READ/WRITE\n============================================================\n✓ Written: /memory/lessons/risk_management.md\n✓ Written: /memory/market/NVDA_analysis.md\n\n📌 Read content:\n # Risk Management Rules\n ## Position Sizing\n - Never risk more than 2% of portfolio on a single trade\n - Use stop-loss orders religiously\n\n============================================================\n 2. FULL-TEXT SEARCH\n============================================================\n📌 Search: 'RSI overbought':\n [0.85] /memory/lessons/risk_management.md\n [0.72] /memory/market/NVDA_analysis.md\n\n============================================================\n 3. KNOWLEDGE GRAPH (LINKING)\n============================================================\n✓ Linked: NVDA_analysis → risk_management (related)\n\n📌 Links from risk_management.md:\n → /memory/market/NVDA_analysis.md (related)\n\n============================================================\n 4. AGENT MEMORY (TOKEN-AWARE RECALL)\n============================================================\n✓ Remembered: NVDA warning (importance: 0.9)\n✓ Remembered: BTC observation (importance: 0.7)\n\n📌 Recall: 'NVDA risk' (max 500 tokens):\n ## Relevant Memory (2 items, ~120 tokens)\n [/memory/private/trader/nvda_warning.md] (0.92)\n NVDA showing weakness. RSI at 72, reduce exposure.\n\n============================================================\n 5. MULTI-AGENT ISOLATION\n============================================================\n✓ Analyst stored: SPY pattern (private to analyst)\n\n📌 Trader tries to recall analyst's memory:\n Cannot access - private to analyst\n\n📌 Trader stats: Private: 3\n📌 Analyst stats: Private: 1\n\n============================================================\n 6. INCREMENTAL COLLABORATION\n============================================================\n# Analyst updates shared report\n$ echo \"New finding\" >> /shared/report.md\n\n# Trader reads only the changes\n$ cat /shared/report.md:delta\n# v3 (2026-03-07 10:30)\n--- +++ @@ -5 +5,2 @@\n+New finding\n\n# Next read shows no changes\n$ cat /shared/report.md:delta\n(no changes)\n\n============================================================\n 6. METADATA & TAGS\n============================================================\n📌 Tag Cloud:\n market: 2, nvda: 1, warning: 1, btc: 1\n\n============================================================\n 7. NAVIGATION & DISCOVERY\n============================================================\n📌 Topics:\n 📁 private: 3 memories\n 🏷️ market: 2, technical: 1, crypto: 1\n\n📌 Timeline (today):\n [14:30] nvda_alert: NVDA RSI at 72...\n [14:25] btc_note: BTC holding $65K...\n\n📌 Workflow: topics() → browse() → explore() → recall()\n\n============================================================\n DEMO COMPLETE 🎉\n============================================================\n```\n\n**Run it yourself:**\n```bash\npip install -e .\npython playground.py\n```\n\n\u003c/details>\n\n## Performance\n\nBenchmarked on Apple M2 Pro, 16GB RAM, macOS 15.7, Python 3.13, SQLite 3.45 (WAL mode).\n\n| Metric | Value | Notes |\n|--------|-------|-------|\n| Write throughput | 468 ops/s | WAL + async embedding |\n| Read throughput (hot) | 724,000 ops/s | LRU cache hit |\n| Read throughput (cold) | 3,300 ops/s | Cache miss → SQLite |\n| Search throughput | 2,000 ops/s | FTS5 full-text |\n| Cache hit rate | 95% | Zipf access pattern |\n| Token savings | 97%+ | vs. loading all memories |\n\n**Key findings:**\n- **LRU cache is the dominant optimization** — 420x read improvement\n- **Multi-agent contention** — SQLite write lock serializes writes; per-agent throughput drops linearly with agent count\n- **Cold start** — First query ~6x slower due to embedding model initialization\n\nSee [detailed benchmarks and ablation study](https://bkmashiro.moe/posts/projects/avm-performance-analysis) for full analysis.\n\n### Multi-Agent Discovery\n\n| Method | Hops | Latency | Architecture |\n|--------|------|---------|--------------|\n| Traditional recall | 4 | ~3.5ms | Per-agent search |\n| TopicIndex | 1 | ~0.5ms | Pre-computed index |\n| Librarian | 1 | ~1.7ms | Centralized router |\n| Gossip | 1 | ~0.5ms | Decentralized bloom filters |\n\n## Features\n\n- **FUSE Mount** - Mount as filesystem, use `ls`, `cat`, `echo`\n- **Virtual Nodes** - Access metadata via `:meta`, `:links`, `:tags`\n- **MCP Server** - Integrate with AI agents via MCP protocol\n- **Agent Memory** - Token-aware recall with scoring strategies\n- **Multi-Agent** - Permissions, quotas, audit logging\n- **Tell System** - Cross-agent messaging with priority levels (urgent/normal/low), webhook delivery\n- **Full-Text Search** - FTS5 (English recommended; Chinese lacks tokenizer support)\n- **Semantic Search** - Local embedding (all-MiniLM-L6-v2), zero API cost, auto-index on write\n- **FAISS Index** - High-performance vector search (21x faster than SQLite brute force)\n- **Hybrid Search** - Combines FTS + semantic for best precision/recall tradeoff\n- **TopicIndex** - O(1) recall for known topics, reduces hop count from 4 to 1\n- **Librarian** - Global knowledge router for multi-agent discovery (95% hop reduction)\n- **Gossip Protocol** - Decentralized agent discovery using bloom filter digests\n- **Memory Consolidation** - Sleep-like memory processing: decay, merge, summarize\n- **Subscriptions** - Path pattern monitoring with webhook push notifications\n- **Memory Digest** - Daily/on-demand summaries of recent activity\n\n## Install\n\n```bash\npip install -e .\n\n# For FUSE mount (optional)\npip install fusepy\n# macOS: brew install macfuse\n# Linux: apt install fuse3\n```\n\n## Quick Start\n\n### Python API\n\n```python\nfrom avm import AVM\n\navm = AVM()\n\n# Read/Write\navm.write(\"/memory/lesson.md\", \"# Trading Lesson\\n\\nRSI > 70 = overbought\")\nnode = avm.read(\"/memory/lesson.md\")\n\n# Search\nresults = avm.search(\"RSI\")\n\n# Agent Memory\nmem = avm.agent_memory(\"akashi\")\nmem.remember(\"NVDA showing weakness\", tags=[\"market\", \"nvda\"])\ncontext = mem.recall(\"NVDA risk\", max_tokens=4000)\n```\n\n### CLI\n\n```bash\n# Read/Write\navm read /memory/lesson.md\navm write /memory/lesson.md --content \"New lesson\"\n\n# Full-text search\navm search \"RSI\"\n\n# Move / rename (DB-level, no FUSE required)\navm mv /memory/old-name.md /memory/new-name.md # single node\navm mv /memory/news- /memory/archive/news- # prefix tree (all children)\navm mv /memory/2024/ /archive/2024/ # directory-style move\n\n# Semantic search (embedding)\navm semantic \"Iran conflict news\" # semantic similarity\navm semantic \"BTC market\" --limit 5 # limit results\navm semantic \"trading\" --agent akashi # agent context\n\n# Agent Memory (token-aware recall, hybrid FTS+embedding)\navm recall \"NVDA risk\" --agent akashi --max-tokens 4000\n```\n\n### FUSE Mount\n\nMount AVM as a filesystem for shell access.\n\n**Requirements:**\n- macOS: `brew install macfuse` (approve system extension in System Settings → Privacy & Security)\n- Linux: `apt install fuse3`\n\n```bash\n# Configure mounts in ~/.config/avm/mounts.yaml\n# Example:\n# mounts:\n# - mountpoint: ~/.openclaw/workspace/avm\n# agent_id: myagent\n\n# Start daemon (manages all mounts)\n# Recommended: use launchd/systemd for auto-start on login\navm-daemon start --daemon # background (double-fork)\navm-daemon start # foreground (for launchd/systemd managed processes)\n\n# Check status\navm-daemon status\n\n# Reload config\navm-daemon reload\n\n# Stop daemon\navm-daemon stop\n\n# Use standard shell commands\nls /mnt/avm/memory/\ncat /mnt/avm/memory/lesson.md\necho \"New insight\" > /mnt/avm/memory/log.md\n\n# Virtual nodes (append suffix to any file path)\ncat /mnt/avm/memory/lesson.md:meta # Metadata (JSON)\ncat /mnt/avm/memory/lesson.md:links # Related nodes\ncat /mnt/avm/memory/lesson.md:tags # Tags\ncat /mnt/avm/memory/lesson.md:ttl # Time-to-live\ncat /mnt/avm/memory/lesson.md:history # Version history\ncat /mnt/avm/memory/:list # Directory listing\ncat '/mnt/avm/memory/:list?limit=10' # Paginated\ncat '/mnt/avm/memory/:list?tag=work' # Filter by tag\ncat '/mnt/avm/memory/:changes?minutes=5' # Recent changes\ncat /mnt/avm/memory/:stats # Statistics\ncat \"/mnt/avm/:search?q=RSI\" # Search\ncat \"/mnt/avm/:recall?q=NVDA\" # Token-aware recall\n\n# Shortcuts - quick access via @xxx prefix\ncat /mnt/avm/memory/:list # Shows: @abc lesson.md Risk management...\ncat /mnt/avm/@abc # Access file by shortcut\ncat /mnt/avm/@abc:meta # Works with suffixes too\n```\n\n### MCP Server\n\n```bash\n# Start MCP server\navm-mcp --user akashi\n```\n\n```yaml\n# mcp_servers.yaml\navm-memory:\n command: avm-mcp\n args: [\"--user\", \"akashi\"]\n```\n\n**MCP Tools:**\n\n| Tool | Description |\n|------|-------------|\n| `avm_recall` | Token-controlled memory retrieval |\n| `avm_browse` | Get paths + summaries (two-pe) |\n| `avm_fetch` | Get full content of selected paths |\n| `avm_remember` | Store memory with tags/importance |\n| `avm_search` | Full-text search |\n| `avm_list` | List by prefix |\n| `avm_read` | Read specific path |\n| `avm_tags` | Tag cloud |\n| `avm_recent` | Time-based queries |\n| `avm_stats` | Statistics |\n\n## Navigation & Discovery\n\nWhen an agent forgets context or doesn't know keywords, use navigation methods:\n\n```python\nmem = avm.agent_memory(\"trader\")\n\n# 1. Topic overview - see what's in memory\nmem.topics()\n# ## Memory Topics\n# ### By Category:\n# 📁 private: 15 memories\n# ### By Tag:\n# 🏷️ technical: 4 occurrences\n# 🏷️ crypto: 3 occurrences\n\n# 2. Browse tree - drill down without keywords\nmem.browse(\"/memory\", depth=2)\n# 📁 private (15)\n# 📁 trader (15)\n\n# 3. Timeline - \"what did I observe recently?\"\nmem.timeline(days=7, limit=10)\n# ## Timeline (last 7 days)\n# ### 2026-03-05\n# [14:30] nvda_rsi: NVDA RSI at 72...\n# [14:25] btc_support: BTC holding $65K...\n\n# 4. Graph exploration - follow links\nmem.explore(\"/memory/private/trader/nvda.md\", depth=2)\n# ## Starting from: .../nvda.md\n# ### Hop 1:\n# [related] .../macd_analysis.md\n# ### Hop 2:\n# [derived] .../trading_signal.md\n```\n\n**Workflow:** topics() → browse() → explore() → recall()\n\n## Configuration\n\n```yaml\n# config.yaml\nproviders:\n # HTTP API\n - pattern: \"/live/prices/{symbol}\"\n handler: http\n config:\n url: \"https://api.example.com/prices/${symbol}\"\n headers:\n Authorization: \"Bearer ${API_KEY}\"\n ttl: 60\n\n # Script\n - pattern: \"/system/status\"\n handler: script\n config:\n command: \"uptime\"\n\n # Plugin\n - pattern: \"/live/indicators/*\"\n handler: plugin\n config:\n plugin: \"my_plugins.talib\"\n\npermissions:\n - pattern: \"/memory/*\"\n access: rw\n - pattern: \"/live/*\"\n access: ro\n\ndefault_access: ro\n```\n\n### Handlers\n\n| Handler | Description |\n|---------|-------------|\n| `file` | Local filesystem |\n| `http` | REST API calls |\n| `script` | Execute commands |\n| `plugin` | Python plugins |\n| `sqlite` | Database queries |\n| `index` | Structured index with status tracking |\n\n### Index Handler (CLI/MCP only)\n\nTrack project files and extract code signatures:\n\n```bash\n# Via CLI\navm index scan myapp /path/to/project\navm index status myapp\navm index sigs myapp\n```\n\n> Note: Index handler not exposed via FUSE mount, use CLI or MCP.\n\n### Custom Handlers\n\n```python\nfrom avm import BaseHandler, register_handler\n\nclass RedisHandler(BaseHandler):\n def read(self, path, context):\n return self.redis.get(path)\n\nregister_handler('redis', RedisHandler)\n```\n\n## Virtual Nodes\n\nAccess metadata via special suffixes:\n\n| Suffix | Read | Write |\n|--------|------|-------|\n| `:meta` | JSON metadata | Update metadata |\n| `:links` | Related nodes | Add links |\n| `:tags` | Tags (comma-separated) | Set tags |\n| `:shared` | Shared-with agents | Set agents |\n| `:ttl` | Time remaining | Set expiration (5m/2h/1d/never) |\n| `:history` | Change history (version, time, type) | - |\n| `:path` | Relative path | - |\n| `:info` | Available suffixes | - |\n| `:data` | Raw content | - |\n| `:list` | Directory listing | - |\n| `:list?limit=N&offset=M` | Paginated listing | - |\n| `:list?q=keyword` | Search + list | - |\n| `:list?tag=xxx` | Filter by tag | - |\n| `:changes?minutes=N` | Recently modified files | - |\n| `:delta` | Diff since last read (auto-marks) | - |\n| `:mark` | Read position (version) | Update marker |\n| `:stats` | Statistics | - |\n| `:search?q=` | Search results | - |\n| `:recall?q=` | Token-aware recall | - |\n| `:inbox` | Unread messages | Mark all read |\n\n## High-Performance Vector Search\n\nAVM supports multiple vector storage backends for semantic search:\n\n### SQLite (default)\nBrute-force cosine similarity, good for \u003c5k documents:\n```python\nfrom avm.embedding import EmbeddingStore, LocalEmbedding\nstore = EmbeddingStore(avm_store, LocalEmbedding())\n```\n\n### FAISS (recommended for scale)\n21x faster than SQLite, supports exact and approximate search:\n```python\nfrom avm.faiss_store import FAISSEmbeddingStore, get_faiss_store\nfrom avm.embedding import LocalEmbedding\n\n# Flat index (exact, \u003c10k docs)\nstore = FAISSEmbeddingStore(avm_store, LocalEmbedding(), index_type=\"flat\")\n\n# HNSW index (approximate, >10k docs)\nstore = FAISSEmbeddingStore(avm_store, LocalEmbedding(), index_type=\"hnsw\")\n\n# Batch index documents\nstore.add_nodes(nodes)\nstore.save()\n\n# Search\nresults = store.search(\"market analysis\", k=5)\n```\n\n**Benchmark (2000 documents):**\n| Backend | Query Latency | Recall |\n|---------|--------------|--------|\n| SQLite | 58ms | 100% |\n| FAISS Flat | 2.7ms | 100% |\n| FAISS HNSW | 2.7ms | ~90% |\n\n## Subscriptions & Webhooks\n\nMonitor path patterns for changes:\n\n```bash\n# Subscribe with webhook\navm subscribe \"/memory/shared/market/*\" -a trader -m realtime -w http://localhost:3000/hook\n\n# Subscribe with throttling (batches updates)\navm subscribe \"/memory/shared/*\" -a analyst -m throttled -t 60\n\n# List subscriptions\navm subscriptions --agent trader\n\n# Unsubscribe\navm unsubscribe \"/memory/shared/market/*\" -a trader\n```\n\nWebhook payload:\n```json\n{\n \"event\": \"write\",\n \"path\": \"/memory/shared/market/nvda.md\",\n \"pattern\": \"/memory/shared/market/*\",\n \"agent_id\": \"trader\",\n \"timestamp\": \"2026-03-23T09:15:00Z\"\n}\n```\n\n## Cross-Agent Messaging (Tell)\n\nSend important messages to other agents:\n\n```bash\n# Send urgent message (injected into recipient's next read)\necho \"DB schema changed!\" > /mnt/avm/tell/kearsarge?priority=urgent\n\n# Send normal message\necho \"FYI: New API deployed\" > /mnt/avm/tell/kearsarge\n\n# Broadcast to all agents\necho \"Team meeting at 3pm\" > /mnt/avm/tell/@all\n\n# Check your inbox\ncat /mnt/avm/:inbox\n\n# Mark all as read\ncat \"/mnt/avm/:inbox?mark=read\"\n```\n\n**Priority levels:**\n- `urgent` - Injected into next file read (any file)\n- `normal` - Shown in `:inbox`\n- `low` - Only shown when explicitly reading `:inbox`\n\n## Two-Phase Retrieval\n\nFor large result sets, use two-pe retrieval to save tokens:\n\n```bash\n# Phase 1: Get paths + summaries (~200 tokens)\ncat \"/mnt/avm/memory/:search?q=NVDA\"\n# → [0.85] /memory/market/NVDA.md\n# → RSI overbought warning...\n# → [0.72] /memory/lessons/nvda_q4.md\n# → Down 15% after Q4 earnings...\n\n# Phase 2: Get selected content (~300 tokens)\ncat /mnt/avm/memory/market/NVDA.md\n\n# Total: 500 tokens vs 2000 tokens (75% saved)\n```\n\n## Linux-Style Permissions\n\n```python\navm.init_permissions({\n \"users\": {\n \"akashi\": {\n \"groups\": [\"trading\", \"admin\"],\n \"capabilities\": [\"search_all\", \"write\", \"sudo\"]\n },\n \"guest\": {\n \"groups\": [],\n \"capabilities\": []\n }\n }\n})\n\n# Check permissions\nuser = avm.get_user(\"akashi\")\navm.check_permission(user, \"/memory/private/akashi/note.md\", \"write\")\n\n# API keys for skills\nkey = avm.create_api_key(user, paths=[\"/memory/*\"], actions=[\"read\"])\n```\n\n## Multi-Bot Architecture\n\n```\n┌─────────────────────────────────────────┐\n│ Application │\n├─────────────────────────────────────────┤\n│ Akashi → avm-mcp --user akashi ─┐ │\n│ Yuze → avm-mcp --user yuze ─┼─→ DB │\n│ Laffey → avm-mcp --user laffey ─┘ │\n└─────────────────────────────────────────┘\n```\n\n- Each bot its own MCP process\n- Shared database for cross-bot memory\n- Auth at startup, no token per request\n\n## Database\n\nDefault location: `~/.local/share/avm/avm.db`\n\nOverride:\n```bash\navm --db /path/to/custom.db read /memory/note.md\nXDG_DATA_HOME=/custom/path avm read /memory/note.md\n```\n\n## FUSE Daemon Architecture\n\nThe daemon manages multiple FUSE mounts as separate `fork()`ed child processes.\nEach child gets its own `/dev/macfuseN` slot and is started serially (the parent\npolls `stat().st_dev` to confirm the previous mount is live before forking the next).\n\n**GPU Embedding (macOS MPS)**\n\n`os.fork()` invalidates the Apple GPU (MPS/XPC) context in the child. AVM\nsolves this with a per-child `multiprocessing.Pipe` proxy:\n\n```\nparent (MPS GPU) child(akashi)\n LocalEmbedding(MPS) ←────────── PipeEmbeddingProxy(child_conn)\n EmbeddingPipeServer ──send/recv── encode(\"text\") → [0.1, -0.3, ...]\n```\n\n- Model loaded **once** in the parent, shared across all children\n- Each child has its own isolated Pipe fd pair — no cross-agent access\n- `avm recall` / `avm semantic` run in the main process and use MPS directly\n\n## Versions\n\n- **v1.3.0** - GPU Pipe proxy, `avm mv`, fork-based daemon with st_dev polling\n- **v1.2.0** - FAISS vector search (21x speedup), webhooks, hybrid search\n- **v1.1.0** - TopicIndex O(1) recall, Gossip protocol, memory consolidation\n- **v0.9.0** - Rename to AVM, FUSE mount with virtual nodes\n- **v0.8.0** - Two-phase retrieval (browse + fetch)\n- **v0.7.0** - Linux-style permissions, MCP server\n- **v0.6.0** - Advanced features (sync, tags, export)\n- **v0.5.0** - Multi-agent support\n- **v0.4.0** - Agent Memory (token-aware recall)\n- **v0.3.0** - Linked Retrieval + Document Synthesis\n- **v0.2.0** - Config-driven providers/permissions\n- **v0.1.0** - Core VFS\n\n## License\n\nMIT\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":23180,"content_sha256":"77373a14615aa17d8c9ca24883bfcae991d1de63dbdbfaf65aab2701d5ff91c1"},{"filename":"ROADMAP-v2.md","content":"# AVM v2 Roadmap — 效能提升计划\n\n> Created: 2026-03-22\n> Target: 完整的多 agent 协作 + 性能优化 + 通用 benchmark\n\n---\n\n## Phase 1: 核心优化(今日完成)\n\n### 1.1 热记忆缓存 ✅ → 待实现\n- LRU 缓存最近访问的 100 个节点\n- 写入时 invalidate\n- 配置: `cache.max_size`, `cache.ttl`\n\n### 1.2 持久化订阅 ✅\n- `avm/subscriptions.py` — 完成\n- 四种模式: realtime/throttled/batched/digest\n- CLI + FUSE 支持\n\n### 1.3 Activity Feed ✅\n- `/:feed` 虚拟文件 — 完成\n- 显示全局活动流\n\n---\n\n## Phase 2: 多 Agent 协作\n\n### 2.1 任务上下文打包\n```bash\navm bundle /task/project-x --since 7d > handoff.md\n```\n- 收集相关记忆 + 时间线 + 依赖\n- 输出 markdown 或 JSON\n\n### 2.2 知识图谱可视化\n```bash\navm graph /task/project-x --depth 2 --format mermaid\n```\n- 输出 Mermaid/DOT 格式\n- 显示节点关系\n\n### 2.3 软删除 + 垃圾桶\n- 删除移到 `/trash/`\n- 30 天后自动清理\n- `avm restore /trash/file.md`\n\n---\n\n## Phase 3: Benchmark 框架\n\n### 3.1 场景定义\n1. **单 Agent 持续工作**\n - 1000 条记忆写入\n - 100 次 recall\n - 测量: 延迟、token 节省\n\n2. **多 Agent 协作**\n - 5 个 agent 并发读写\n - 订阅通知延迟\n - 冲突检测\n\n3. **冷启动**\n - 大量历史记忆(10k 条)\n - recall 首次查询延迟\n\n### 3.2 对比基线\n- **无 AVM**: 直接文件读写 + grep 搜索\n- **有 AVM**: SQLite + embedding + 衰减\n\n### 3.3 指标\n- ops/sec (读/写/搜索/recall)\n- token 节省率 (recall vs 全量读取)\n- 通知延迟 (订阅 → 收到)\n- 内存占用\n\n---\n\n## 实现顺序\n\n| 优先级 | 功能 | 预计时间 |\n|--------|------|----------|\n| 1 | 热记忆缓存 | 30min |\n| 2 | 软删除/垃圾桶 | 20min |\n| 3 | 任务打包 | 40min |\n| 4 | 知识图谱 | 30min |\n| 5 | Benchmark 重构 | 60min |\n| 6 | 测试 + 文档 | 30min |\n\n---\n\n## 开始!\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":1912,"content_sha256":"d8e949f66e327e2945fe22129448bd27d62c3c14b702c2a67fd35664b7a8190f"},{"filename":"SECURITY.md","content":"# Security Policy\n\n## Supported Versions\n\n| Version | Supported |\n| ------- | ------------------ |\n| 0.9.x | :white_check_mark: |\n| \u003c 0.9 | :x: |\n\n## Reporting a Vulnerability\n\nIf you discover a security vulnerability, please report it by emailing the maintainers directly rather than opening a public issue.\n\n**Do not disclose security vulnerabilities publicly until they have been addressed.**\n\nWe will acknowledge receipt within 48 hours and provide a detailed response within 7 days.\n\n## Security Considerations\n\nAVM handles potentially sensitive data. Please consider:\n\n- **Database Security**: The SQLite database may contain sensitive information. Ensure proper file permissions.\n- **API Keys**: Store API keys in environment variables, not in code.\n- **FUSE Mount**: When using `avm-mount`, be aware of file system permissions.\n- **MCP Server**: The MCP server authenticates via `--user` flag. Ensure proper process isolation.\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":963,"content_sha256":"6d8a3dcc0aa60e86b047337c6a87c2cf13965860fcaf8f6026033d87cdac83bf"},{"filename":"tests/__init__.py","content":"# AVM tests\n","content_type":"text/x-python; charset=utf-8","language":"python","size":12,"content_sha256":"13fffe1beb6a60d085088e29fbecadc9ceda168de5e7e7dc2b7d17fa2438253e"},{"filename":"trading/__init__.py","content":"# trading package — VFS providers for trading bots\n","content_type":"text/x-python; charset=utf-8","language":"python","size":53,"content_sha256":"6bae11c993600cdbabb8edc5334be7a1aa10dacaa623dd4e244cc34c2f943908"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"AVM Memory Skill","type":"text"}]},{"type":"blockquote","content":[{"type":"paragraph","content":[{"text":"AI Virtual Memory — 多 Agent 共享记忆系统","type":"text"}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"核心能力","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"语义搜索","type":"text","marks":[{"type":"strong"}]},{"text":":embedding + FTS5 混合检索","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Token 感知","type":"text","marks":[{"type":"strong"}]},{"text":":自动截断到 token 预算","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"多 Agent","type":"text","marks":[{"type":"strong"}]},{"text":":私有/共享空间隔离 + 订阅通知","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"生命周期","type":"text","marks":[{"type":"strong"}]},{"text":":自动衰减、归档、垃圾清理","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"TopicIndex","type":"text","marks":[{"type":"strong"}]},{"text":":O(1) recall,已知 topic 1 hop 完成","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Librarian","type":"text","marks":[{"type":"strong"}]},{"text":":多 Agent 知识路由,95% hop 减少","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Gossip Protocol","type":"text","marks":[{"type":"strong"}]},{"text":":去中心化发现,bloom filter digest","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Memory Consolidation","type":"text","marks":[{"type":"strong"}]},{"text":":睡眠式记忆整合","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"快速开始","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"CLI 方式","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 记忆\navm remember \"NVDA RSI at 72\" --importance 0.8\n\n# 回忆(token 限制)\navm recall \"NVDA analysis\" --max-tokens 2000\n\n# 语义搜索\navm semantic \"technical indicators\"\n\n# 时间旅行\navm read /memory/notes.md --as-of 2026-03-20","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"FUSE 方式","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 挂载\navm-mount ~/avm --agent myagent\n\n# 读写\ncat ~/avm/memory/notes.md\necho \"New insight\" > ~/avm/memory/insight.md\n\n# 虚拟文件\ncat ~/avm/:search?q=analysis\ncat ~/avm/:recall?q=trading&max_tokens=1000","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Python API","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm import AVM\nfrom avm.agent_memory import AgentMemory\n\navm = AVM(agent_id=\"myagent\")\nmem = AgentMemory(avm, \"myagent\")\n\n# 记忆\nmem.remember(\"RSI at 72\", importance=0.8, tags=[\"market\", \"nvda\"])\n\n# 回忆\ncontext = mem.recall(\"technical analysis\", max_tokens=2000)","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🆕 多 Agent 发现","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"方式 1: Librarian(中心化)","type":"text"}]},{"type":"paragraph","content":[{"text":"当你想知道\"谁知道某个话题\":","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# CLI\navm ask \"who knows about bitcoin trading?\"\navm who-knows \"market analysis\"\navm agents # 列出所有 agent\n\n# Python\nfrom avm.librarian import Librarian\n\nlibrarian = Librarian(avm.store)\nresponse = librarian.query(\"trader\", \"bitcoin analysis\")\n# response.matches → 可访问的内容\n# response.collaboration_suggestions → 建议去问谁","type":"text"}]},{"type":"paragraph","content":[{"text":"延迟","type":"text","marks":[{"type":"strong"}]},{"text":":~1.7ms,95% hop 减少","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"方式 2: Gossip Protocol(去中心化)","type":"text"}]},{"type":"paragraph","content":[{"text":"每个 agent 维护一个 digest(bloom filter),周期性交换:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 发布自己的 digest(agent 启动时调用)\navm gossip publish\n\n# 刷新已知 agent 的 digest\navm gossip refresh\n\n# 查询谁可能知道某话题\navm gossip who-knows \"bitcoin\"\n\n# 查看协议状态\navm gossip stats","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm.gossip import GossipProtocol\n\n# 启动 gossip(后台线程,每 60 秒交换)\nprotocol = GossipProtocol(avm.store, topic_index, \"my_agent\")\nprotocol.start(interval_seconds=60)\n\n# 查询\nexperts = protocol.who_knows(\"bitcoin\")\n# → [(\"trader\", 0.95), (\"analyst\", 0.82)]\n\n# 手动发布\nprotocol.publish()","type":"text"}]},{"type":"paragraph","content":[{"text":"特点","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"无单点故障","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"本地查询 O(1)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"假阳性 \u003c15%,假阴性 0%","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"每 agent 只需 128 bytes digest","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"何时用哪个?","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"场景","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"推荐","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"需要精确结果","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Librarian","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"需要容错","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Gossip","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"离线环境","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Gossip","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"简单部署","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Librarian","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"隐私敏感","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Gossip(只暴露 topic 存在性)","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🆕 TopicIndex(O(1) Recall)","type":"text"}]},{"type":"paragraph","content":[{"text":"写入时自动索引 topics,recall 时先查索引:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 自动触发:写入时 TopicIndex.index_path() 被异步调用\navm.write(\"/memory/btc.md\", \"Bitcoin analysis #trading\")\n\n# 回忆时:已知 topic → 1 hop,未知 topic → 4 hops\nmem.recall(\"bitcoin\") # 直接从索引取,1 hop\nmem.recall(\"xyz123\") # 回退到 FTS+embedding,4 hops","type":"text"}]},{"type":"paragraph","content":[{"text":"手动使用","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm.topic_index import TopicIndex\n\nidx = TopicIndex(avm.store)\n\n# 查询\nresults = idx.query(\"bitcoin trading\", limit=20)\n# → [(\"/memory/btc.md\", 0.85), ...]\n\n# 查看某 topic 的所有路径\nidx.paths_for_topic(\"bitcoin\")\n\n# 相似 topic\nidx.similar_topics(\"bitcoin\")\n# → [(\"crypto\", 0.7), (\"trading\", 0.5)]\n\n# 统计\nidx.stats()\n# → {\"total_topics\": 150, \"total_paths\": 500, ...}","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🆕 Memory Consolidation(记忆整合)","type":"text"}]},{"type":"paragraph","content":[{"text":"像人睡觉一样整理记忆:","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm.consolidation import MemoryConsolidator\n\nconsolidator = MemoryConsolidator(avm.store)\n\n# 完整运行\nresult = consolidator.run(agent_id=\"trader\")\n# result.importance_decayed → 衰减了多少条\n# result.memories_merged → 合并了多少条\n# result.summaries_created → 生成了多少摘要\n\n# 单独操作\nconsolidator.decay_importance() # 衰减旧记忆\nconsolidator.merge_similar() # 合并相似记忆\nconsolidator.extract_summaries() # 提取摘要","type":"text"}]},{"type":"paragraph","content":[{"text":"定时运行","type":"text","marks":[{"type":"strong"}]},{"text":"(cron job):","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm.consolidation import schedule_consolidation\n\n# 每 24 小时运行一次\nschedule_consolidation(avm.store, interval_hours=24)","type":"text"}]},{"type":"paragraph","content":[{"text":"配置","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from avm.consolidation import ConsolidationConfig\n\nconfig = ConsolidationConfig(\n decay_half_life_days=30.0, # 30天后重要性减半\n min_importance=0.1, # 最低重要性\n similarity_threshold=0.8, # Jaccard 相似度阈值\n min_age_for_merge_days=7.0, # 7天内的不合并\n min_cluster_size=3, # 至少3条才生成摘要\n)","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"订阅协作","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 订阅共享空间\navm subscribe \"/shared/market/*\" --agent kearsarge --mode throttled --throttle 60\n\n# 跨 agent 消息\necho \"DB changed\" > ~/avm/tell/akashi?priority=urgent","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"生命周期管理","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 冷记忆\navm cold --threshold 0.3\n\n# 归档\navm archive --threshold 0.2\n\n# 软删除\navm delete /memory/old.md\n\n# 恢复\navm restore /trash/memory/old.md","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"MCP Server","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"avm-mcp --user akashi","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"MCP Tools","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Tool","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"描述","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"avm_recall","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Token 感知记忆检索","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"avm_remember","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"存储新记忆","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"avm_search","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"语义搜索","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"avm_ask","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Librarian 查询","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"avm_who_knows","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"找相关 agent","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"最佳实践","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"Agent 启动时","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 1. 启动 gossip(发布自己的 digest)\nprotocol = GossipProtocol(store, topic_index, agent_id)\nprotocol.start()\n\n# 2. 加载近期 context\ncontext = mem.recall(\"recent work\", max_tokens=2000)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"定期维护(heartbeat/cron)","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 1. 刷新 gossip digest\nprotocol.publish()\n\n# 2. 运行 consolidation(每周一次)\nif is_weekly_maintenance:\n consolidator.run()","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"发现其他 agent","type":"text"}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 快速本地查询(gossip)\nexperts = protocol.who_knows(\"bitcoin\")\n\n# 精确跨域查询(librarian)\nresponse = librarian.query(my_agent, \"bitcoin trading strategies\")\nfor suggestion in response.collaboration_suggestions:\n print(f\"Ask {suggestion.agent_id} about {suggestion.topic}\")","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"性能数据","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"操作","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"延迟","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"说明","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Write","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"2.1ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"含异步 TopicIndex","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Read (cached)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0.001ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"LRU 缓存命中","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Recall (TopicIndex)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0.5ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"已知 topic","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Recall (FTS)","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"18ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"未知 topic","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Librarian query","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"1.7ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"中心化路由","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Gossip who_knows","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"0.5ms","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"本地 bloom filter","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"⚠️ 安全注意事项","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"隐私隔离","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"私有空间","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"},{"text":"/memory/private/{agent_id}/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 只有 owner 可访问","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"共享空间","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"},{"text":"/memory/shared/","type":"text","marks":[{"type":"code_inline"}]},{"text":" 所有 agent 可访问","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Gossip digest","type":"text","marks":[{"type":"strong"}]},{"text":" 只暴露 topic 存在性,不暴露具体内容","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"权限检查","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"写入前检查 ","type":"text"},{"text":"_check_private_access()","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Librarian 返回结果前检查 ","type":"text"},{"text":"_can_access()","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"4 级隐私策略:","type":"text"},{"text":"full","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"owner","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"existence","type":"text","marks":[{"type":"code_inline"}]},{"text":"、","type":"text"},{"text":"none","type":"text","marks":[{"type":"code_inline"}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"建议","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"敏感信息用高 importance(不易被归档)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"跨 agent 共享前检查内容","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"定期 ","type":"text"},{"text":"avm cold","type":"text","marks":[{"type":"code_inline"}]},{"text":" 检查低活跃记忆","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"consolidation 前 ","type":"text"},{"text":"--dry-run","type":"text","marks":[{"type":"code_inline"}]},{"text":" 预览","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"更多信息","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"性能分析博客","type":"text","marks":[{"type":"link","attrs":{"href":"https://bkmashiro.moe/posts/projects/avm-performance-analysis","title":null}}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"技术报告","type":"text","marks":[{"type":"link","attrs":{"href":"docs/TECHNICAL-REPORT-2026-03-22.md","title":null}}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"源码","type":"text","marks":[{"type":"link","attrs":{"href":"https://github.com/aivmem/avm","title":null}}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}}]},"metadata":{"date":"2026-06-05","author":"@skillopedia","source":{"stars":0,"repo_name":"avm","origin_url":"https://github.com/aivmem/avm/blob/HEAD/SKILL.md","repo_owner":"aivmem","body_sha256":"231888fc355896ad5fd3204a4117f3b02d33ecc671368d75137d17cf8ae35d50","cluster_key":"fd2274a3c712ce8ca759b2a91ad9087fdd0c5d3349dfc3d62b4c3f2a0888e290","clean_bundle":{"format":"clean-skill-bundle-v1","source":"aivmem/avm/SKILL.md","attachments":[{"id":"073e574f-0b2c-52f5-8b43-359bf05693b3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/073e574f-0b2c-52f5-8b43-359bf05693b3/attachment.md","path":".github/ISSUE_TEMPLATE/bug_report.md","size":466,"sha256":"0c07d64619c203aa7f0b287ca4470a886f628641bf8afd2e01e1b1730fe938cf","contentType":"text/markdown; charset=utf-8"},{"id":"4b242032-0a13-5844-a2f5-e74914cece61","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4b242032-0a13-5844-a2f5-e74914cece61/attachment.yml","path":".github/ISSUE_TEMPLATE/config.yml","size":283,"sha256":"3167363ea7ccc572d918c75059c01fef7d2bf1a85facf5c52b191cbeb0623941","contentType":"application/yaml; charset=utf-8"},{"id":"73b63b1d-780b-5acf-90ce-121dd5e6beb6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/73b63b1d-780b-5acf-90ce-121dd5e6beb6/attachment.md","path":".github/ISSUE_TEMPLATE/feature_request.md","size":397,"sha256":"0fef59efce098c6d0827ef40fa96593b53c60363dc1d3c9297852a85dd971c07","contentType":"text/markdown; charset=utf-8"},{"id":"040c1f83-6154-5af2-aa6c-4723759f585f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/040c1f83-6154-5af2-aa6c-4723759f585f/attachment.md","path":".github/PULL_REQUEST_TEMPLATE.md","size":403,"sha256":"97c64c9c58f970c49f5efbf6009a01b909a8fbf0d679b2fd6f9208eacb172d50","contentType":"text/markdown; charset=utf-8"},{"id":"eeeb5468-5b27-55e6-8109-23e4e2990df8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/eeeb5468-5b27-55e6-8109-23e4e2990df8/attachment.yml","path":".github/workflows/ci.yml","size":941,"sha256":"c711dc5ef6536498d65873808ae96225de2148d0b1ef2fab37a2bc0a29151204","contentType":"application/yaml; charset=utf-8"},{"id":"9fdeb5da-63ab-58d0-b75c-96c068c29920","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9fdeb5da-63ab-58d0-b75c-96c068c29920/attachment","path":".gitignore","size":413,"sha256":"cb1efd4ff828cd42463030b82b7f1a80f52e12553e8fa49189c8e471cfca06df","contentType":"text/plain; charset=utf-8"},{"id":"05b14033-7e41-5c19-8a71-277bd78bf575","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/05b14033-7e41-5c19-8a71-277bd78bf575/attachment.md","path":"CHANGELOG.md","size":3999,"sha256":"38986a62cfe0e403885f569ec2a775b1a6c304428396ee53a3fc43881a0f534a","contentType":"text/markdown; charset=utf-8"},{"id":"f89647ed-55ff-5dad-8307-9cfa5982d009","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f89647ed-55ff-5dad-8307-9cfa5982d009/attachment.md","path":"CODE_OF_CONDUCT.md","size":1082,"sha256":"6fdc419bb0e8ba274559545839da6a3e04a5efaceab227d0ba9adb777b6dcdd8","contentType":"text/markdown; charset=utf-8"},{"id":"ac3a60de-5b8d-546c-82ea-7766edf1d33e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ac3a60de-5b8d-546c-82ea-7766edf1d33e/attachment.md","path":"CONTRIBUTING.md","size":2138,"sha256":"cfaff7b39a81da6c3519fba06c128e73a8b42b50670120f735c4d4f4db80429e","contentType":"text/markdown; charset=utf-8"},{"id":"fcb059e3-dd57-5352-b500-8c7760978174","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fcb059e3-dd57-5352-b500-8c7760978174/attachment","path":"Dockerfile","size":385,"sha256":"eb95cfe4e241fe9a7745a189ada63aefa19fec6cf148f08d143fd8c54b5c21bb","contentType":"text/plain; charset=utf-8"},{"id":"c5c6b1e3-47e3-5789-9f11-01d75973453c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c5c6b1e3-47e3-5789-9f11-01d75973453c/attachment.md","path":"README.md","size":23180,"sha256":"77373a14615aa17d8c9ca24883bfcae991d1de63dbdbfaf65aab2701d5ff91c1","contentType":"text/markdown; charset=utf-8"},{"id":"d9d4c06e-4505-5fde-bbfc-6ed72dad2eb6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d9d4c06e-4505-5fde-bbfc-6ed72dad2eb6/attachment.md","path":"ROADMAP-v2.md","size":1912,"sha256":"d8e949f66e327e2945fe22129448bd27d62c3c14b702c2a67fd35664b7a8190f","contentType":"text/markdown; charset=utf-8"},{"id":"4d3a1375-8057-5788-8bf7-0ae246f628a5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4d3a1375-8057-5788-8bf7-0ae246f628a5/attachment.md","path":"SECURITY.md","size":963,"sha256":"6d8a3dcc0aa60e86b047337c6a87c2cf13965860fcaf8f6026033d87cdac83bf","contentType":"text/markdown; charset=utf-8"},{"id":"856bb368-ccc5-5f0b-bf31-cfb15d321526","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/856bb368-ccc5-5f0b-bf31-cfb15d321526/attachment.py","path":"avm/__init__.py","size":3193,"sha256":"b46523bd3c37c9a6d9a627afc68c97f2cefa9db7f113aee074c859006248d45c","contentType":"text/x-python; charset=utf-8"},{"id":"434f4110-3342-578d-9221-e3dcd107262d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/434f4110-3342-578d-9221-e3dcd107262d/attachment.py","path":"avm/advanced.py","size":42310,"sha256":"62a9d1aa71c39c143a534a05cd537ecfffa7e6e7932582b58f9aca7d0151d1f1","contentType":"text/x-python; charset=utf-8"},{"id":"e1332157-095d-5fbd-800f-f69ffc563cfb","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e1332157-095d-5fbd-800f-f69ffc563cfb/attachment.py","path":"avm/agent_memory.py","size":44562,"sha256":"f892ed7c9f5371d5de3138ac337a8088afa0dafd67af5763bcc0760d7f8e107a","contentType":"text/x-python; charset=utf-8"},{"id":"f113bf42-1900-5587-a7cf-42b16d460b4f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f113bf42-1900-5587-a7cf-42b16d460b4f/attachment.py","path":"avm/api_server.py","size":2416,"sha256":"50d0c69d1684c2507b0d9d313ab98a7219b6a3c206b69dd776fca23c2789ad01","contentType":"text/x-python; charset=utf-8"},{"id":"2c5b7dba-0fbd-5462-8790-d7fc30ec6c05","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2c5b7dba-0fbd-5462-8790-d7fc30ec6c05/attachment.py","path":"avm/cli.py","size":61202,"sha256":"ceb17620c6b887c6e11bcb2e328242f8f78e66a8b4686b46ff4f2adee0e07bfa","contentType":"text/x-python; charset=utf-8"},{"id":"9b5e4f1b-31cc-5b1d-af72-1c9ccaa1afc8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9b5e4f1b-31cc-5b1d-af72-1c9ccaa1afc8/attachment.py","path":"avm/config.py","size":8074,"sha256":"a1bf8defefd0c5e6b2db691391ac273cdd00cd7c87bcab5bb1e8cd799e888b1c","contentType":"text/x-python; charset=utf-8"},{"id":"bdc3491e-d6d7-541d-bb1d-b61b01befee9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bdc3491e-d6d7-541d-bb1d-b61b01befee9/attachment.py","path":"avm/config_handler.py","size":10604,"sha256":"112c6a03264b692939d2f63d285bb2028c52b76aa9c5784585383c1cee693cab","contentType":"text/x-python; charset=utf-8"},{"id":"21ecc831-1142-516b-9112-833f4c9ba096","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/21ecc831-1142-516b-9112-833f4c9ba096/attachment.py","path":"avm/consolidation.py","size":22187,"sha256":"d100974166df4e104cdffc2e054571362d1018892ffc4a282e9fc5e90567cfc6","contentType":"text/x-python; charset=utf-8"},{"id":"a93bf74c-ec4a-54a7-9162-a8eae12d2e15","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a93bf74c-ec4a-54a7-9162-a8eae12d2e15/attachment.py","path":"avm/core.py","size":34918,"sha256":"6e559c3cad6bdc3502af8acd42f5c2a29147f36875d5131ee173210488893e4e","contentType":"text/x-python; charset=utf-8"},{"id":"f4eaad03-d844-5592-935d-799cf39c5f99","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f4eaad03-d844-5592-935d-799cf39c5f99/attachment.py","path":"avm/daemon.py","size":35456,"sha256":"8afb571ae8d6f23cc6d3bca8ca63d3e4953657791730d357b858b7b4e9a11b70","contentType":"text/x-python; charset=utf-8"},{"id":"0682743a-fdbb-5cc1-8db2-fb918f991d45","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0682743a-fdbb-5cc1-8db2-fb918f991d45/attachment.py","path":"avm/embedding.py","size":21793,"sha256":"7a424182ebbb3fd1458a015e5cc2d1410545f851570c93268a7348bd040bcc99","contentType":"text/x-python; charset=utf-8"},{"id":"18692738-8a94-5150-9599-0eea90c567b6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/18692738-8a94-5150-9599-0eea90c567b6/attachment.yaml","path":"avm/exec_config.yaml","size":4042,"sha256":"fde09a6f407f771e838fcf969658451e37cc24988b5f4883c1ad3e8c01602bcd","contentType":"application/yaml; charset=utf-8"},{"id":"13a4241f-934b-5b7a-a732-ace340e728d5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/13a4241f-934b-5b7a-a732-ace340e728d5/attachment.py","path":"avm/exec_handler.py","size":16757,"sha256":"8a79b7c09054a65af296d34b2448d020f4b0f8afb07f05fc32be471796fe170c","contentType":"text/x-python; charset=utf-8"},{"id":"bab7cedf-df06-55ed-816f-f7ea239c1c8c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bab7cedf-df06-55ed-816f-f7ea239c1c8c/attachment.py","path":"avm/faiss_store.py","size":10332,"sha256":"eba70bfdfc94886be8173fda5d4a15b8fe04861c2e660c4173a165f68ca6e897","contentType":"text/x-python; charset=utf-8"},{"id":"ca4ab5fd-2503-5310-845d-c4997b414f04","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ca4ab5fd-2503-5310-845d-c4997b414f04/attachment.py","path":"avm/fuse_mount.py","size":58798,"sha256":"58a7f1f65d62fc4ab22cce74c590f258a1f0c0f6d66848bafc1e2ad18081c9cf","contentType":"text/x-python; charset=utf-8"},{"id":"febdfc91-cb82-57ac-bc52-910f59ee748f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/febdfc91-cb82-57ac-bc52-910f59ee748f/attachment.py","path":"avm/gossip.py","size":12028,"sha256":"1740d372b5f4d1bc824373422c6937ccaf0da9f964ca8eaafdfa6f3bc6bc828c","contentType":"text/x-python; charset=utf-8"},{"id":"6ae4685c-72dd-5ea4-bb3e-d008967efe8f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6ae4685c-72dd-5ea4-bb3e-d008967efe8f/attachment.py","path":"avm/graph.py","size":6787,"sha256":"b8869d3d8776ee5396ac3745d09bd4724f17d7f149b217c5ca7780e158438e06","contentType":"text/x-python; charset=utf-8"},{"id":"8a79eda1-95da-5ed9-916c-7fa56061b508","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8a79eda1-95da-5ed9-916c-7fa56061b508/attachment.py","path":"avm/handlers.py","size":24202,"sha256":"0bed198451159b721a3cbf7f6bf512d5a794d3d1f824da234a19a7e42a86d535","contentType":"text/x-python; charset=utf-8"},{"id":"fc142f19-9bd9-5d69-8f67-e640e44a56d3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fc142f19-9bd9-5d69-8f67-e640e44a56d3/attachment.py","path":"avm/http_client.py","size":867,"sha256":"2a49d47f733cb73f1d4b5b43f88b59dc2c43d5cb95007bb20242326c5f0f28e6","contentType":"text/x-python; charset=utf-8"},{"id":"2a91e3ed-23e9-5321-a35b-1abbf61a6755","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2a91e3ed-23e9-5321-a35b-1abbf61a6755/attachment.py","path":"avm/index_handler.py","size":20731,"sha256":"cf3f4f0868156a97f4864299831d3270713491b35d6a8a2e7acb3b7802ade530","contentType":"text/x-python; charset=utf-8"},{"id":"0f06b305-a08f-568a-9864-841a4fc565bf","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0f06b305-a08f-568a-9864-841a4fc565bf/attachment.py","path":"avm/librarian.py","size":15273,"sha256":"6ef5e80eca64a08aeaec83ba3fe3a4bb71dc4eb69ee8c726e882a48094af0665","contentType":"text/x-python; charset=utf-8"},{"id":"1944bf31-d47c-543b-88e6-bbcde19a0578","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1944bf31-d47c-543b-88e6-bbcde19a0578/attachment.py","path":"avm/mcp_server.py","size":21622,"sha256":"0b359068b1032ebd538013e2c90ce562d2ae7468e0682a74f226d287a9712b2d","contentType":"text/x-python; charset=utf-8"},{"id":"96f4a15b-3f62-5d56-bcb8-68dc5f77724b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/96f4a15b-3f62-5d56-bcb8-68dc5f77724b/attachment.py","path":"avm/multi_agent.py","size":13429,"sha256":"f1f68569bfb10e0b06488664892763650554e6ff2fb9a32d276b94034b5df423","contentType":"text/x-python; charset=utf-8"},{"id":"6a521b99-7477-555d-939c-f85b408c5eec","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6a521b99-7477-555d-939c-f85b408c5eec/attachment.py","path":"avm/node.py","size":3932,"sha256":"586a9786de181c09712a4a9e86d5c843bf7c685644de013db5331d89f72142f1","contentType":"text/x-python; charset=utf-8"},{"id":"c1729d85-2cb1-510e-99ca-f992da30ef1b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c1729d85-2cb1-510e-99ca-f992da30ef1b/attachment.py","path":"avm/permissions.py","size":18651,"sha256":"f81688014097fbe0a5821e76f63c159b4a1cf7f2a41ecc7e928e7a157fbab928","contentType":"text/x-python; charset=utf-8"},{"id":"20864275-a5b3-5138-870b-9e16541abfd9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/20864275-a5b3-5138-870b-9e16541abfd9/attachment.py","path":"avm/providers/__init__.py","size":609,"sha256":"7d97668c4b09c4d699749ee39dfde5c700391cc00e94f5e6379d9d61741695d7","contentType":"text/x-python; charset=utf-8"},{"id":"64ce7036-cf8d-5374-bfc0-1a6cdecdc8d3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/64ce7036-cf8d-5374-bfc0-1a6cdecdc8d3/attachment.py","path":"avm/providers/alpaca.py","size":8327,"sha256":"591620bc571bb7078ff98ffd5a0e1b4d4d08049a44c271549cca09d47c04be95","contentType":"text/x-python; charset=utf-8"},{"id":"d129890c-dd37-5128-99bc-1250092c2ff7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d129890c-dd37-5128-99bc-1250092c2ff7/attachment.py","path":"avm/providers/base.py","size":2401,"sha256":"d501edac008e1af16fe5b36b62054be96aa73183fb339cad8c15cf69307e5d87","contentType":"text/x-python; charset=utf-8"},{"id":"073d0546-edd2-5d91-88dc-e30dceae924d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/073d0546-edd2-5d91-88dc-e30dceae924d/attachment.py","path":"avm/providers/http_json.py","size":3866,"sha256":"262e164c8dac68bc45ab997befcd15ff15896a559b0bd0853fea64ee90ef484b","contentType":"text/x-python; charset=utf-8"},{"id":"99a1dfd3-5e47-5627-b0c0-7e668ff6442d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/99a1dfd3-5e47-5627-b0c0-7e668ff6442d/attachment.py","path":"avm/providers/indicators.py","size":14465,"sha256":"78a2f5220bbef4f1b26a6dbb34bf2f89c993e153e6701bb1ee343e7cfd692e1f","contentType":"text/x-python; charset=utf-8"},{"id":"3f87fa3b-208c-5ecc-ad6b-19ccee1a234f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3f87fa3b-208c-5ecc-ad6b-19ccee1a234f/attachment.py","path":"avm/providers/memory.py","size":3095,"sha256":"52227eadb558cb502a28eaadf32aa53d98bd2dd74050eafdd8cd3efeaa5212f1","contentType":"text/x-python; charset=utf-8"},{"id":"43a1b32f-8f01-5e4e-ad00-185b618ee039","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/43a1b32f-8f01-5e4e-ad00-185b618ee039/attachment.py","path":"avm/providers/news.py","size":6204,"sha256":"2047d6b5debe55aa67366ffbbf765d9fe3756934de51248b61f0e47ae7dda3d2","contentType":"text/x-python; charset=utf-8"},{"id":"05706296-5133-5981-8b68-34f91c6348e2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/05706296-5133-5981-8b68-34f91c6348e2/attachment.py","path":"avm/providers/watchlist.py","size":6233,"sha256":"eed0f68d792e6d63accd205387c5deb10d5f6ed91064a5957360e834833f36c6","contentType":"text/x-python; charset=utf-8"},{"id":"87c46471-e11f-51a5-b296-de5bc8f8307e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/87c46471-e11f-51a5-b296-de5bc8f8307e/attachment.py","path":"avm/retrieval.py","size":11737,"sha256":"00abe5e8ec07337e2222c228d4724e81c0187b812c099d81086c7e033aeab7f5","contentType":"text/x-python; charset=utf-8"},{"id":"a8fb0152-40e6-52d6-9fde-aade067f40c9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a8fb0152-40e6-52d6-9fde-aade067f40c9/attachment.py","path":"avm/store.py","size":20203,"sha256":"5123566bfd562387f735aebc0ad09f5097581759f83f12f9c1c4428a7e27bf48","contentType":"text/x-python; charset=utf-8"},{"id":"e0da804f-0892-5181-aa3c-1840595e891e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e0da804f-0892-5181-aa3c-1840595e891e/attachment.py","path":"avm/subscriptions.py","size":14745,"sha256":"2fdc4c0f1116c0293c5962733ae7f19d03156851240c4e966911c20534330c26","contentType":"text/x-python; charset=utf-8"},{"id":"9d8e65d2-4733-5079-b374-13beae23e7cb","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9d8e65d2-4733-5079-b374-13beae23e7cb/attachment.py","path":"avm/telemetry.py","size":8339,"sha256":"1b3184b70557296ed840340b90fb1ac46d4dacc66fa5aa01368eb063ffc77026","contentType":"text/x-python; charset=utf-8"},{"id":"d37965b2-00b0-543e-93cf-ec7c3dc85d7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d37965b2-00b0-543e-93cf-ec7c3dc85d7a/attachment.py","path":"avm/tell.py","size":22511,"sha256":"70877fdebf83e1404c5ec907a23c0d1ebafa0a5c6fb4c6d1bcd55fb117337163","contentType":"text/x-python; charset=utf-8"},{"id":"b70611d4-b5bb-542f-b19c-3e53979677a1","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b70611d4-b5bb-542f-b19c-3e53979677a1/attachment.py","path":"avm/topic_index.py","size":10312,"sha256":"a4ba678554e471c156706841ce7ce7b8a31e8b5e74c4cc463e7ad454bbe32bf0","contentType":"text/x-python; charset=utf-8"},{"id":"514db0c5-4ff1-5ce8-92d7-9e6c14f141e8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/514db0c5-4ff1-5ce8-92d7-9e6c14f141e8/attachment.py","path":"avm/utils.py","size":256,"sha256":"4fedf4a068d56b68be1e4cc3b8c9e540a3dab104ebf417759a7386985ba2feaf","contentType":"text/x-python; charset=utf-8"},{"id":"0c8f1a88-935f-566b-a894-fbe61f882a85","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0c8f1a88-935f-566b-a894-fbe61f882a85/attachment.md","path":"benchmarks/MULTIAGENT_BENCH_PLAN.md","size":4746,"sha256":"4d22caae494a9440d9339b47517346ac89a0ba18f119b4a7c05ac6b2846cf530","contentType":"text/markdown; charset=utf-8"},{"id":"656d1d8e-07af-5e94-8f56-5da9819c9f12","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/656d1d8e-07af-5e94-8f56-5da9819c9f12/attachment.py","path":"benchmarks/agent_executor.py","size":10966,"sha256":"86959864978ed12d8afb4d1786a624ae03497c3b9b67b0bdb2164068f6cdae79","contentType":"text/x-python; charset=utf-8"},{"id":"abb9ef09-1c4e-5212-947a-d9e90408bc16","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/abb9ef09-1c4e-5212-947a-d9e90408bc16/attachment.py","path":"benchmarks/avm_integration.py","size":7767,"sha256":"88d15e74bbacf635fa47d80542f6c3823fc9904980ddbbd793f2f881ede6ae36","contentType":"text/x-python; charset=utf-8"},{"id":"e0f38e02-58f9-576f-b907-7ffd3770c737","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e0f38e02-58f9-576f-b907-7ffd3770c737/attachment.py","path":"benchmarks/bench_ablation.py","size":8632,"sha256":"c98a8d6f35500708202ad3e5e1d993221fe6161c8ae9581b5837dc3d108858de","contentType":"text/x-python; charset=utf-8"},{"id":"e5d8df49-da57-5445-8d95-646bc51524e2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e5d8df49-da57-5445-8d95-646bc51524e2/attachment.py","path":"benchmarks/bench_agent_efficiency.py","size":21302,"sha256":"ead0b50b5143c5888500408b44ae73c114b1b6b662ed91f974f9f64321cb3987","contentType":"text/x-python; charset=utf-8"},{"id":"a1eb22f7-92bf-5f5d-8efa-88fd6d6a4f58","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a1eb22f7-92bf-5f5d-8efa-88fd6d6a4f58/attachment.py","path":"benchmarks/bench_comprehensive.py","size":16160,"sha256":"f36312b9145e9ec7e7bc9f43c92d5cfc623392a4ce4250de485cac31f9756707","contentType":"text/x-python; charset=utf-8"},{"id":"71a77878-c6c1-5af1-849e-bdd885ad81b8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/71a77878-c6c1-5af1-849e-bdd885ad81b8/attachment.py","path":"benchmarks/bench_librarian.py","size":8481,"sha256":"3ce66ed616f5ae886548b5c3f586a9c29f5d6f159cc9d2347d645537be0744b3","contentType":"text/x-python; charset=utf-8"},{"id":"afc7e63d-1c8c-5d56-ac22-ae65bbf9f5e3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/afc7e63d-1c8c-5d56-ac22-ae65bbf9f5e3/attachment.py","path":"benchmarks/bench_paper.py","size":23457,"sha256":"ece870ddb039812fb86f3cef8d75b8b304692d491e2b2b66cac74d501993b177","contentType":"text/x-python; charset=utf-8"},{"id":"899eccc7-ff00-55ea-814c-8099451bac51","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/899eccc7-ff00-55ea-814c-8099451bac51/attachment.py","path":"benchmarks/bench_performance.py","size":3102,"sha256":"b19e7dfc347fe52b8f7359c5bc848f50ca840791108e5c9a60815db1ba72a1df","contentType":"text/x-python; charset=utf-8"},{"id":"cb680a85-2b16-5449-9d97-bcb264af5ea6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/cb680a85-2b16-5449-9d97-bcb264af5ea6/attachment.py","path":"benchmarks/bench_topic_index.py","size":6951,"sha256":"0fe77e2697a2477f578288af0eaca6fa59154af262d3c5f7bb4bfc61c135debd","contentType":"text/x-python; charset=utf-8"},{"id":"3995b07d-5ff9-57cf-97ba-c1f5d8724814","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3995b07d-5ff9-57cf-97ba-c1f5d8724814/attachment.py","path":"benchmarks/embedding_bench.py","size":9935,"sha256":"de06a7c8f1cfcd415647759421fe56fff0a66318bdaa92ab6fe073a6efd7cd5b","contentType":"text/x-python; charset=utf-8"},{"id":"5849df70-e0bd-567e-ae3a-9b0aaa1045e3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5849df70-e0bd-567e-ae3a-9b0aaa1045e3/attachment.md","path":"benchmarks/fileorg/README.md","size":6522,"sha256":"9a7d8e34bc53218b5b21b1e8002e4509eed3db2cebd04f8fa8cf97e3d9b2004a","contentType":"text/markdown; charset=utf-8"},{"id":"a706618b-bf0f-567b-b3ac-d6c51af906a2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a706618b-bf0f-567b-b3ac-d6c51af906a2/attachment.py","path":"benchmarks/fileorg/__init__.py","size":262,"sha256":"cf37ff731507b0fa9f8f4f260ee18a406e7f4651de2ab900bb0a7fb56fd05c48","contentType":"text/x-python; charset=utf-8"},{"id":"96d6b977-8cc3-573e-83f4-4b8503eb2148","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/96d6b977-8cc3-573e-83f4-4b8503eb2148/attachment.py","path":"benchmarks/fileorg/__main__.py","size":131,"sha256":"d740f27e03c4f6964fdaa071d64a89d46776e8ee81e1146ee9961536d3a697d1","contentType":"text/x-python; charset=utf-8"},{"id":"69c2f9bb-05c7-51d6-937f-59495b923c85","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/69c2f9bb-05c7-51d6-937f-59495b923c85/attachment.py","path":"benchmarks/fileorg/fileorg.py","size":8120,"sha256":"6add8828f2859b830523f58f92048882c57b6e90cf6a595e88882ab69cea3003","contentType":"text/x-python; charset=utf-8"},{"id":"cfc60be5-ac29-585a-9c4e-2e39bd50bb11","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/cfc60be5-ac29-585a-9c4e-2e39bd50bb11/attachment.py","path":"benchmarks/librarian_bench.py","size":14951,"sha256":"8cb1fbf9858da1790dc18aa6dae200912765838ab66bf0808f60941faccabb2f","contentType":"text/x-python; charset=utf-8"},{"id":"7cc49270-13a6-5d0b-b495-44288b0ba5d0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7cc49270-13a6-5d0b-b495-44288b0ba5d0/attachment.md","path":"benchmarks/memory/shared/bench/shared_knowledge.md","size":2774,"sha256":"79e6c3d4241a45a00f5f9320f8c5f8c50b9c593fc2d8ba9b7ffa370ab3ce7a44","contentType":"text/markdown; charset=utf-8"},{"id":"dbbbcb67-f7a7-5f25-9b8f-812fa755d6b3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/dbbbcb67-f7a7-5f25-9b8f-812fa755d6b3/attachment.md","path":"benchmarks/memory/shared/bugs/auth_token_prefix.md","size":572,"sha256":"48701c282eb53758fac0269ed9602b78366f86762eb2b39f85704b47e98c3626","contentType":"text/markdown; charset=utf-8"},{"id":"04c7675e-bd46-5f76-9b15-5499cd21579e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/04c7675e-bd46-5f76-9b15-5499cd21579e/attachment.md","path":"benchmarks/memory/shared/consensus/decision_a.md","size":621,"sha256":"151fbbf40569286a94df3677ff1b023d992b901e1b24e221b8956363e977ad17","contentType":"text/markdown; charset=utf-8"},{"id":"547979a3-6e65-5ab4-a5ed-eddd8a1fc99b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/547979a3-6e65-5ab4-a5ed-eddd8a1fc99b/attachment.md","path":"benchmarks/memory/shared/consensus/decision_b.md","size":1251,"sha256":"543685a2a67139c88120282691a113a818e62a12786e3b14222c91bed26f8488","contentType":"text/markdown; charset=utf-8"},{"id":"5ba65403-4a95-5ca1-bfb0-92d1e7254b41","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5ba65403-4a95-5ca1-bfb0-92d1e7254b41/attachment.md","path":"benchmarks/memory/shared/consensus/resolution.md","size":2101,"sha256":"252fce65ceec0b536718f7c1eef77c4aefea69608f3a04b7831f8b37b48ff1dc","contentType":"text/markdown; charset=utf-8"},{"id":"01fa948b-be57-5b3b-8054-505b7bfa2e81","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/01fa948b-be57-5b3b-8054-505b7bfa2e81/attachment.md","path":"benchmarks/memory/shared/consensus/voter_1_vote.md","size":205,"sha256":"f789f85a69ac1988f19dca0418ac7ba44d5319dba5efc4db17dca77ea9e3d7ff","contentType":"text/markdown; charset=utf-8"},{"id":"3d0017fa-101b-57b4-8baa-909d8dc4be7b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3d0017fa-101b-57b4-8baa-909d8dc4be7b/attachment.md","path":"benchmarks/memory/shared/queue/high_priority.md","size":2501,"sha256":"4f0e5b0109570995fc678c48a4cb5a02abe620cbc0d539c418a8df48708d0280","contentType":"text/markdown; charset=utf-8"},{"id":"6288e37b-6f47-515a-9d13-876cf63bc3df","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6288e37b-6f47-515a-9d13-876cf63bc3df/attachment.md","path":"benchmarks/memory/shared/queue/low_priority.md","size":2363,"sha256":"56c3a4fa84f3603f9837b73fae8cffba9a71100e1f347b97bb9ff26caee6d4a0","contentType":"text/markdown; charset=utf-8"},{"id":"7a6f49f8-cf19-58cf-826c-6120da5f7304","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7a6f49f8-cf19-58cf-826c-6120da5f7304/attachment","path":"benchmarks/notification_service/.dockerignore","size":142,"sha256":"8528123963b0507f0111bb3a240adc6c660dabaf981b95ffde2ed7d963332920","contentType":"text/plain; charset=utf-8"},{"id":"6eb3057d-86b7-5e4b-8843-c86147a058dd","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6eb3057d-86b7-5e4b-8843-c86147a058dd/attachment.example","path":"benchmarks/notification_service/.env.example","size":827,"sha256":"be5351f19b1dbfded6c085cb403b39366c9e6cac5d129a9f9a2b21be55620605","contentType":"text/plain; charset=utf-8"},{"id":"73db9bd9-c340-5255-b197-9f294d3a80df","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/73db9bd9-c340-5255-b197-9f294d3a80df/attachment.yaml","path":"benchmarks/notification_service/.github/workflows/ci.yaml","size":3454,"sha256":"3d310b5ab11d2ef37dce8d6b0ac5c431de23aa1be2ac8732a0b3cfcf6e4d7bfe","contentType":"application/yaml; charset=utf-8"},{"id":"0f38cf0b-908f-5f7a-9a1b-75c1420a9cd8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0f38cf0b-908f-5f7a-9a1b-75c1420a9cd8/attachment.yaml","path":"benchmarks/notification_service/.github/workflows/pr-check.yaml","size":738,"sha256":"1fca4a438f229754109a957b7dc894b20ece4e9302ba35c099d68b2534aaa50d","contentType":"application/yaml; charset=utf-8"},{"id":"d4d9093d-0007-52d5-8651-b7bc944f2cb6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d4d9093d-0007-52d5-8651-b7bc944f2cb6/attachment","path":"benchmarks/notification_service/Dockerfile","size":679,"sha256":"233179e91067bfd2d76f90789691d39443e77058147da493d387f8785e983494","contentType":"text/plain; charset=utf-8"},{"id":"cb1eae86-2258-581f-a3f4-42d916a20640","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/cb1eae86-2258-581f-a3f4-42d916a20640/attachment.test","path":"benchmarks/notification_service/Dockerfile.test","size":183,"sha256":"187ecf0a6d7a8ad4da5fc8b81ac6f4d3a4a27a7c6ea40fd4be69c5a0b438b327","contentType":"text/plain; charset=utf-8"},{"id":"c503be06-4d11-5124-aad3-b11cc3ce0aec","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c503be06-4d11-5124-aad3-b11cc3ce0aec/attachment.py","path":"benchmarks/notification_service/__init__.py","size":675,"sha256":"172e6ac5be702c35193ceee974739fc611c2df4a777ea44b333a10c34112b6f4","contentType":"text/x-python; charset=utf-8"},{"id":"1b0e0942-bbf4-5970-994d-410cad8fcdc2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1b0e0942-bbf4-5970-994d-410cad8fcdc2/attachment.py","path":"benchmarks/notification_service/config.py","size":1188,"sha256":"7a421b71259de2bb675d95f1a25d97c39146020e7af9a17abe8da7e6f2fb3e04","contentType":"text/x-python; charset=utf-8"},{"id":"eb7dd6ee-7f81-54c2-aa74-6e87cd7397e5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/eb7dd6ee-7f81-54c2-aa74-6e87cd7397e5/attachment.yml","path":"benchmarks/notification_service/docker-compose.test.yml","size":447,"sha256":"0749cc3654c5f18589409cb61cb5abbebe81ec8b030294353918c3e9577b91c2","contentType":"application/yaml; charset=utf-8"},{"id":"be467e7a-5287-589f-ad44-ce35fe37493a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/be467e7a-5287-589f-ad44-ce35fe37493a/attachment.yml","path":"benchmarks/notification_service/docker-compose.yml","size":881,"sha256":"84b5349c9fa6b915c3f20665d730649efca5e00198ee1ddd78f60cb73fecf20c","contentType":"application/yaml; charset=utf-8"},{"id":"167ecf4a-1113-5505-a9f5-05d5cc0a6ae9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/167ecf4a-1113-5505-a9f5-05d5cc0a6ae9/attachment.py","path":"benchmarks/notification_service/handlers.py","size":4639,"sha256":"51ad773c8b51ed51994bb74cfe2e021b9eb2c1dac523b5942622739fad5d4f25","contentType":"text/x-python; charset=utf-8"},{"id":"b32a365a-d63c-59c4-bfc8-87e358b22832","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b32a365a-d63c-59c4-bfc8-87e358b22832/attachment.yaml","path":"benchmarks/notification_service/k8s/configmap.yaml","size":640,"sha256":"9c836a43d33aace23a30bfdbc271ecb0be98b0d26c37308f2dfb0c82bf88b50d","contentType":"application/yaml; charset=utf-8"},{"id":"7d8fbd35-fbb1-5252-b462-9087597e8b79","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7d8fbd35-fbb1-5252-b462-9087597e8b79/attachment.yaml","path":"benchmarks/notification_service/k8s/deployment.yaml","size":1801,"sha256":"4194573bd0041700188390791ac254c8601cd3d713e381959f0dd9418b24ddb7","contentType":"application/yaml; charset=utf-8"},{"id":"5bafcaf2-7883-5463-9303-b40bec5462f3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5bafcaf2-7883-5463-9303-b40bec5462f3/attachment.yaml","path":"benchmarks/notification_service/k8s/hpa.yaml","size":602,"sha256":"0bd6ce22388934118991b5a88821be136cf39dde87fff4cf5febb49cdf4aa19d","contentType":"application/yaml; charset=utf-8"},{"id":"dcf30c7b-4fdd-54b4-88b4-bade9acf9c91","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/dcf30c7b-4fdd-54b4-88b4-bade9acf9c91/attachment.yaml","path":"benchmarks/notification_service/k8s/kustomization.yaml","size":351,"sha256":"aa98835b01a6ba6af5ab331889c4d73bbb3bb12c0ce90d8df1b85d6fa4605ad4","contentType":"application/yaml; charset=utf-8"},{"id":"18fe469c-f731-570b-a29f-022a0a4cdcd3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/18fe469c-f731-570b-a29f-022a0a4cdcd3/attachment.yaml","path":"benchmarks/notification_service/k8s/namespace.yaml","size":174,"sha256":"af8a3be60e49df47f841c7187d92403662d44bd07728b554bcc10c4086368608","contentType":"application/yaml; charset=utf-8"},{"id":"bb99add5-0135-50f8-b826-5f6dbcaf1cab","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bb99add5-0135-50f8-b826-5f6dbcaf1cab/attachment.yaml","path":"benchmarks/notification_service/k8s/networkpolicy.yaml","size":1506,"sha256":"fa63dfa7e441ca08b95ecd0454743b5e77cef4752e2d7b96369c8761e0fd7ec7","contentType":"application/yaml; charset=utf-8"},{"id":"6c46b96c-d090-5c45-af43-83f756a79b8e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6c46b96c-d090-5c45-af43-83f756a79b8e/attachment.yaml","path":"benchmarks/notification_service/k8s/redis.yaml","size":1552,"sha256":"6c06100b8a3d8cf6dee4c8173dd5017ad27fdee369ed8193ef55c7ba76963c74","contentType":"application/yaml; charset=utf-8"},{"id":"b18fe3bd-0fa1-5804-88ff-be4e3bd8138d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b18fe3bd-0fa1-5804-88ff-be4e3bd8138d/attachment.yaml","path":"benchmarks/notification_service/k8s/secret.yaml","size":394,"sha256":"3db283021efd14fa9ea11b28d29d02511681b01cdbc7aa513b40d191110e0d13","contentType":"application/yaml; charset=utf-8"},{"id":"83f8dbae-0fa9-5e1a-9fb6-2c059a726e9f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/83f8dbae-0fa9-5e1a-9fb6-2c059a726e9f/attachment.yaml","path":"benchmarks/notification_service/k8s/service.yaml","size":330,"sha256":"03d867e3c6fda74ebdf7f5cceae98b14e4ee908724a06ec6643456b0232ad049","contentType":"application/yaml; charset=utf-8"},{"id":"8b13cb31-ed26-50da-bebb-e199b00952d5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8b13cb31-ed26-50da-bebb-e199b00952d5/attachment.py","path":"benchmarks/notification_service/logger.py","size":1650,"sha256":"01a1bad23a3c50697120309e743074d297c054a146c347985c9f76549eb57269","contentType":"text/x-python; charset=utf-8"},{"id":"41ec45b6-5339-5961-9d4f-099a73f8dcc7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/41ec45b6-5339-5961-9d4f-099a73f8dcc7/attachment.py","path":"benchmarks/notification_service/main.py","size":3329,"sha256":"9ee368dc99e75e2c277282f636198ccc6ed5ec2a52808f5eb86afb72159d0dfe","contentType":"text/x-python; charset=utf-8"},{"id":"d872bc99-ff15-515c-9ce2-cc17f92322a3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d872bc99-ff15-515c-9ce2-cc17f92322a3/attachment.py","path":"benchmarks/notification_service/models.py","size":1971,"sha256":"385a571edb8eb928e374ba701911da9398b7d05e2cfaf5a88133269dea9d0927","contentType":"text/x-python; charset=utf-8"},{"id":"62356476-ed6f-5501-9d6b-83ad54631b31","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/62356476-ed6f-5501-9d6b-83ad54631b31/attachment.py","path":"benchmarks/notification_service/processor.py","size":3527,"sha256":"9f82489e982ca7356fb9717e2c9e1b404c73928d8bf2da418aaf5ba06d61a4cb","contentType":"text/x-python; charset=utf-8"},{"id":"1d06ee7c-02d3-54da-83be-6661af35263d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1d06ee7c-02d3-54da-83be-6661af35263d/attachment.ini","path":"benchmarks/notification_service/pytest.ini","size":189,"sha256":"1bb57f9bf082f430aed19eeba5380a504acf0f19237b8b39a48477a858292c04","contentType":"text/plain; charset=utf-8"},{"id":"d30a6f8e-921b-5dd2-94a6-d36443fa4b82","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d30a6f8e-921b-5dd2-94a6-d36443fa4b82/attachment.py","path":"benchmarks/notification_service/redis_client.py","size":7354,"sha256":"7d317553110ea01d0b07bc6ece1c0ff72ba7c5504c232d7421a42f4ce5411993","contentType":"text/x-python; charset=utf-8"},{"id":"bdae8ba4-3d34-5760-9510-0be4ac3a8da8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bdae8ba4-3d34-5760-9510-0be4ac3a8da8/attachment.txt","path":"benchmarks/notification_service/requirements.txt","size":145,"sha256":"5e36c24879fc3dce1a98806676b942c34bb1e5acaae15b2f7325bbab6803f1de","contentType":"text/plain; charset=utf-8"},{"id":"fc8c39bd-5c66-5e55-99f3-498e9cb4f672","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fc8c39bd-5c66-5e55-99f3-498e9cb4f672/attachment.sh","path":"benchmarks/notification_service/scripts/deploy.sh","size":3034,"sha256":"cf948fb8e8682a02865530b1f54330fa4defec91f448b69d7e9e348977c064f1","contentType":"application/x-sh; charset=utf-8"},{"id":"fe79166b-f133-5e42-9994-c98a9c508ae6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fe79166b-f133-5e42-9994-c98a9c508ae6/attachment.sh","path":"benchmarks/notification_service/scripts/test.sh","size":1774,"sha256":"95c1e1b5569b2c84234771f3bc8d98ef74dc53c26a6c25fba0ca3eba64dcb685","contentType":"application/x-sh; charset=utf-8"},{"id":"db46e9a3-9fcf-54d4-a9ec-9a922ad4437a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/db46e9a3-9fcf-54d4-a9ec-9a922ad4437a/attachment.py","path":"benchmarks/notification_service/service.py","size":3300,"sha256":"26a14a75b27824a521ce99e79d3c7f533f8a8a7960fbfb5e93ec86badd4afc36","contentType":"text/x-python; charset=utf-8"},{"id":"99d3c344-ca4d-560a-8b5e-5d8c81ab082a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/99d3c344-ca4d-560a-8b5e-5d8c81ab082a/attachment.py","path":"benchmarks/notification_service/test_integration.py","size":6022,"sha256":"52c7ee4e7bd41c86224b9fa0123f2f339b4c18726638e6529799c5354b179e61","contentType":"text/x-python; charset=utf-8"},{"id":"a89e3c84-4ed9-5181-ac59-3b9693e941de","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a89e3c84-4ed9-5181-ac59-3b9693e941de/attachment.py","path":"benchmarks/notification_service/test_service.py","size":20381,"sha256":"14c99b93a41748e5c24b37aff4a73f9a3d3b13d8de89054de3c1266d9bf8c1c0","contentType":"text/x-python; charset=utf-8"},{"id":"59bc295e-1c2d-5812-8cd8-7678734d7343","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/59bc295e-1c2d-5812-8cd8-7678734d7343/attachment.py","path":"benchmarks/pipeline/__init__.py","size":1372,"sha256":"3899f0e7ff0a093b60f7912a59f62005ebf560ad0880721d14288c603c4d0172","contentType":"text/x-python; charset=utf-8"},{"id":"a10cb34c-8606-5867-a3e8-cd01a5ac01c4","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a10cb34c-8606-5867-a3e8-cd01a5ac01c4/attachment.py","path":"benchmarks/pipeline/pipeline.py","size":20268,"sha256":"7bee3248eaa81d7987cb6083c091f0f93724939a1e983da61fd30e39d7035e33","contentType":"text/x-python; charset=utf-8"},{"id":"f85ccdc2-27d2-54ec-8bda-d5f62b32fdda","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f85ccdc2-27d2-54ec-8bda-d5f62b32fdda/attachment.py","path":"benchmarks/pipeline/test_validator.py","size":36488,"sha256":"2d2b3faddc3ecf7474fa030edc3c6ff53ac95a0fe882325c01b730e4dfc206c3","contentType":"text/x-python; charset=utf-8"},{"id":"816cd231-4999-5e57-82a3-adcfd4acdabc","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/816cd231-4999-5e57-82a3-adcfd4acdabc/attachment.py","path":"benchmarks/pipeline/transform.py","size":24895,"sha256":"4bf43dcc0dc4af34d847e58a3fc6624441458db59ee4cebc50947a53b705972f","contentType":"text/x-python; charset=utf-8"},{"id":"b9bc4b74-8d44-5954-af1c-6aa04e99e776","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b9bc4b74-8d44-5954-af1c-6aa04e99e776/attachment.py","path":"benchmarks/pipeline/validator.py","size":22560,"sha256":"18f6287463636616a3d651001a560f611dc255a5ee20cf237eed291899069ca7","contentType":"text/x-python; charset=utf-8"},{"id":"b9a4baf7-4bc9-5870-9714-b41bf6069a6b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b9a4baf7-4bc9-5870-9714-b41bf6069a6b/attachment.md","path":"benchmarks/results/PROJECT_DECISIONS.md","size":11687,"sha256":"c52660f3b57426657f2d49423658bdabe0cf266ad9b398d72681344c1b1ab605","contentType":"text/markdown; charset=utf-8"},{"id":"6cb5ced8-fb84-527c-ba4a-18c6fe832081","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6cb5ced8-fb84-527c-ba4a-18c6fe832081/attachment.md","path":"benchmarks/results/agent_a_decision.md","size":1339,"sha256":"2574afbc54b717c029193935d501aa8dfb83d84386d2a5e07cd9d05656032953","contentType":"text/markdown; charset=utf-8"},{"id":"5efb05fe-4a24-5779-93dd-35f937c4146a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5efb05fe-4a24-5779-93dd-35f937c4146a/attachment.md","path":"benchmarks/results/agent_b_decision.md","size":1316,"sha256":"47482c02a53a30470f05d86eac0737f66331baa6d632b4b4ae7b98d5ec1e5be3","contentType":"text/markdown; charset=utf-8"},{"id":"e53328bb-e6ff-5385-94fd-be10d180a4a5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e53328bb-e6ff-5385-94fd-be10d180a4a5/attachment.json","path":"benchmarks/results/avm_features.json","size":4149,"sha256":"312d23e453e7e5ecea9fc98022a091d05885b7c4ef1b944a05c39e5c6f03dfdf","contentType":"application/json; charset=utf-8"},{"id":"9372a9d8-984a-5d76-8cc4-eebcda637321","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9372a9d8-984a-5d76-8cc4-eebcda637321/attachment.json","path":"benchmarks/results/cc-001_051db298.json","size":2607,"sha256":"348547fb64ae1218c873c98d19f7592366ed30883ee9d5b451a08476a90be62c","contentType":"application/json; charset=utf-8"},{"id":"df054bb1-9929-530a-be88-568c9c8cd83c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/df054bb1-9929-530a-be88-568c9c8cd83c/attachment.json","path":"benchmarks/results/cc-001_0f40221a.json","size":4184,"sha256":"e0a7c8619b86ec4ddaad4d4165f82e1c20b621513958c53c1a893d59bc246b44","contentType":"application/json; charset=utf-8"},{"id":"c0e9ec88-0ba7-5618-8532-e0b5b5082049","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c0e9ec88-0ba7-5618-8532-e0b5b5082049/attachment.json","path":"benchmarks/results/cc-001_ec20cb28.json","size":733,"sha256":"a86adb7b185e17a319f32d283812afcb4e434e07fb78fcf5cda794077f8140b4","contentType":"application/json; charset=utf-8"},{"id":"a73fdefe-b768-5384-8437-c3c9b3b85f78","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a73fdefe-b768-5384-8437-c3c9b3b85f78/attachment.json","path":"benchmarks/results/cc-005_0702fd53.json","size":3977,"sha256":"6a929a32b166b89ab6025851f1c6fe6e638b6fee5c71c7852d2b691da5491988","contentType":"application/json; charset=utf-8"},{"id":"7fc8a255-4344-585b-a690-1ce4e7057493","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7fc8a255-4344-585b-a690-1ce4e7057493/attachment.json","path":"benchmarks/results/cc-005_0fdffbd2.json","size":4021,"sha256":"fde71b8f00a8e3c9238fa865787c02ecfbb3750ef304288fef8fa57b08c0ac0d","contentType":"application/json; charset=utf-8"},{"id":"c04230ac-bb2c-5bda-b27d-b5a1f27cfa6a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c04230ac-bb2c-5bda-b27d-b5a1f27cfa6a/attachment.json","path":"benchmarks/results/cc-005_3a68a694.json","size":4494,"sha256":"23866e3e634eba2b0bb25798cde47254562e4f119509c6b43a06caa3ed65b469","contentType":"application/json; charset=utf-8"},{"id":"c4b9b354-2e21-5ad5-beb1-07b8e4db6a8d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c4b9b354-2e21-5ad5-beb1-07b8e4db6a8d/attachment.json","path":"benchmarks/results/cc-005_3cbe4ed1.json","size":3402,"sha256":"3886c05e3b4b3669164535745ce25bc741c215444cd5f1a2a2b24a8f6b69cbb1","contentType":"application/json; charset=utf-8"},{"id":"833fc8c9-5a16-5fe9-8151-434111c118b9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/833fc8c9-5a16-5fe9-8151-434111c118b9/attachment.json","path":"benchmarks/results/cc-005_4213fcdd.json","size":2707,"sha256":"731e43bbb5fb41579624eedf04063bc940c9581b3ac3506ac52ecc2cffbaff57","contentType":"application/json; charset=utf-8"},{"id":"28a2ff1f-c86d-5680-82f3-e0437bb373a3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/28a2ff1f-c86d-5680-82f3-e0437bb373a3/attachment.json","path":"benchmarks/results/cc-005_591de9dc.json","size":3542,"sha256":"1953db4e71da82041a6e4eb4c24930eb58e63a76255a8da249e8a23e9e88f49a","contentType":"application/json; charset=utf-8"},{"id":"6ddec33b-5e9a-5a0c-ae1f-f012f329f174","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/6ddec33b-5e9a-5a0c-ae1f-f012f329f174/attachment.json","path":"benchmarks/results/cc-005_5a2779c2.json","size":4490,"sha256":"0741cbc9dca6b79a1d9c538926cea49cb8abd00fe5e70b79a6482f3431c6781a","contentType":"application/json; charset=utf-8"},{"id":"232a70e4-247d-5ba6-8b38-154645f2fb7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/232a70e4-247d-5ba6-8b38-154645f2fb7a/attachment.json","path":"benchmarks/results/cc-005_97b2202e.json","size":4029,"sha256":"cd9fb360af57ededc0bc9de0d1f70762b5126184790595e1e9515172f15c109d","contentType":"application/json; charset=utf-8"},{"id":"b93f0aeb-e238-5bd6-9cb8-596382ef6491","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b93f0aeb-e238-5bd6-9cb8-596382ef6491/attachment.json","path":"benchmarks/results/cc-005_a90cd835.json","size":3956,"sha256":"5356cfe8734544515b811616602b5093965d1ff375380a9d5dd162a08aea509f","contentType":"application/json; charset=utf-8"},{"id":"f5482ce5-dcb8-5999-a09f-92a93b31de6b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f5482ce5-dcb8-5999-a09f-92a93b31de6b/attachment.json","path":"benchmarks/results/cc-005_b5009d2b.json","size":3537,"sha256":"14bfc7f281f9f6cbdfe3a9e402ffc6dd8f3113f9fc2afb3c0135d34c2f4218f2","contentType":"application/json; charset=utf-8"},{"id":"646c86b9-1dc0-5d13-adf6-1bc2359d7007","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/646c86b9-1dc0-5d13-adf6-1bc2359d7007/attachment.json","path":"benchmarks/results/cc-005_b8a3136d.json","size":3929,"sha256":"151d0f536c7d1f25bb39ad19489320ea439efbdd77bf28e65cb946036e506646","contentType":"application/json; charset=utf-8"},{"id":"1876cde9-b925-5619-beee-5f32cdc0a831","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1876cde9-b925-5619-beee-5f32cdc0a831/attachment.json","path":"benchmarks/results/cc-005_cf324946.json","size":4176,"sha256":"06a77a2781dfd0d90a624fdafc9d4998fe29d92ccb28d4b6a115eec701c231a7","contentType":"application/json; charset=utf-8"},{"id":"fcdd29b4-fcad-5664-8265-d8e9e8bb6489","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fcdd29b4-fcad-5664-8265-d8e9e8bb6489/attachment.json","path":"benchmarks/results/cc-005_ddb72178.json","size":2592,"sha256":"7eef121926f2546b5402eb89b1fd3e8e855337dfc275cdcb0251170361e73c30","contentType":"application/json; charset=utf-8"},{"id":"164fd0bf-8e99-548f-926c-9698a73062bc","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/164fd0bf-8e99-548f-926c-9698a73062bc/attachment.json","path":"benchmarks/results/cc-005_fcaa31f9.json","size":2720,"sha256":"c65ca2593ad3a35358715828c437b0e11da156ee91a435fd22866fc449d47607","contentType":"application/json; charset=utf-8"},{"id":"1eb78685-7f9c-5bc0-b277-c00a44335944","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1eb78685-7f9c-5bc0-b277-c00a44335944/attachment.json","path":"benchmarks/results/claude_only_benchmark.json","size":21621,"sha256":"3c8359582b4fc66c79cb8e3dd843d1b4cc9fd41646559a817293df16cf0fa114","contentType":"application/json; charset=utf-8"},{"id":"bfb2afb3-dab0-5330-bd3c-5e57f4fe5789","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bfb2afb3-dab0-5330-bd3c-5e57f4fe5789/attachment.md","path":"benchmarks/results/conflict_resolution.md","size":792,"sha256":"6731b9ba18f5000bb90a5b47aa9b5d8844a9f5df6e9a76f854c3a1958eb315e9","contentType":"text/markdown; charset=utf-8"},{"id":"06b6a57b-c305-55f2-875f-839afc30beaa","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/06b6a57b-c305-55f2-875f-839afc30beaa/attachment.json","path":"benchmarks/results/context_overflow_benchmark.json","size":9586,"sha256":"858fa7578199df8e79d10b25f45d904edd3740517b612a32e70a66b9dbd15bbb","contentType":"application/json; charset=utf-8"},{"id":"23e78208-d1ba-53f7-8c78-80aa0d8879f0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/23e78208-d1ba-53f7-8c78-80aa0d8879f0/attachment.json","path":"benchmarks/results/core_benchmark.json","size":14349,"sha256":"877b94472dd70d0a0bc94a8b5fe2385997a2a1d194b2d90b3a333e118ae45413","contentType":"application/json; charset=utf-8"},{"id":"9d063a44-baf9-53a7-ab43-8e35659bca40","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9d063a44-baf9-53a7-ab43-8e35659bca40/attachment.csv","path":"benchmarks/results/core_benchmark_transformed.csv","size":8182,"sha256":"b20300177c0c30a3fb51eb6c8bce8c5bba6ab24500fc9345ee71d64fcfeefaa8","contentType":"text/csv; charset=utf-8"},{"id":"74eeada8-618c-5d9f-92b0-556d8aa86742","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/74eeada8-618c-5d9f-92b0-556d8aa86742/attachment.json","path":"benchmarks/results/cross_session.json","size":942,"sha256":"9b2def16cc26db86a18d7b967c73b527e739fcb11950b65155c3f91fe64a084f","contentType":"application/json; charset=utf-8"},{"id":"4945e820-e018-5ffa-aff3-2ec7b7dce8df","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4945e820-e018-5ffa-aff3-2ec7b7dce8df/attachment.json","path":"benchmarks/results/domain-avm_2e9f0a91.json","size":1302,"sha256":"2a830cbfd141e3af2b0c68990783377f9798ad764b42da4184435050490337d0","contentType":"application/json; charset=utf-8"},{"id":"10443d37-3743-535c-b9cd-b51e1cd4d95c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/10443d37-3743-535c-b9cd-b51e1cd4d95c/attachment.json","path":"benchmarks/results/domain-avm_80fd98fc.json","size":1298,"sha256":"e29f8b95b3e092cfc7d121223edeb7d372a5566d11c8e558cad2f17ad225aff3","contentType":"application/json; charset=utf-8"},{"id":"9e9719ae-ab5a-512b-ba5c-e104901a3d56","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9e9719ae-ab5a-512b-ba5c-e104901a3d56/attachment.json","path":"benchmarks/results/domain-avm_920150cd.json","size":1290,"sha256":"f4e0ff5d4dbc61efacf65b4c0f72333865084a8e7c082992161f7afea9f59cac","contentType":"application/json; charset=utf-8"},{"id":"7de464ed-523b-53df-a475-4b4eb47d3c14","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7de464ed-523b-53df-a475-4b4eb47d3c14/attachment.json","path":"benchmarks/results/domain-baseline_72a25863.json","size":1061,"sha256":"73f3b4278ac383e94a35ea9c49e227f2e81df1d83d4a1b64d6b75975836622bf","contentType":"application/json; charset=utf-8"},{"id":"0983f516-6ec8-5735-9dcf-b2ac7c739c85","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0983f516-6ec8-5735-9dcf-b2ac7c739c85/attachment.json","path":"benchmarks/results/domain-baseline_b326bd58.json","size":1071,"sha256":"8d062c2f7cbd9e7e768e12861e04cff5a5299c551b7ec181a35e575b6144d9e5","contentType":"application/json; charset=utf-8"},{"id":"21c8dee4-6feb-5e82-935f-add75561e398","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/21c8dee4-6feb-5e82-935f-add75561e398/attachment.json","path":"benchmarks/results/domain-baseline_e7330d57.json","size":1069,"sha256":"0f8eb410219d0b2cc93a4b242e27c2f5c52773b5e0172220ecce9d36b1f85ac4","contentType":"application/json; charset=utf-8"},{"id":"b0ef8477-89e2-586c-a5a7-70636c12f8ca","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b0ef8477-89e2-586c-a5a7-70636c12f8ca/attachment.json","path":"benchmarks/results/domain_knowledge.json","size":749,"sha256":"c60c0d1e11a2d15374a7ff8a6154b6b78a97805ce50cc45d5ce858af4efa1054","contentType":"application/json; charset=utf-8"},{"id":"2877db0a-8e98-5487-aa41-b45b6bc4574a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2877db0a-8e98-5487-aa41-b45b6bc4574a/attachment.json","path":"benchmarks/results/extreme_collab.json","size":6768,"sha256":"9ed562ebc432747ef425b78b15a7294b5adac96f2ff4983538bc848d81a84282","contentType":"application/json; charset=utf-8"},{"id":"655f1956-692c-591d-8233-4480fb5007b4","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/655f1956-692c-591d-8233-4480fb5007b4/attachment.json","path":"benchmarks/results/forced_collab.json","size":529,"sha256":"64d2f540cdc6ac63a582c16301b6f2a46ec7868515ee57ab4092685535d004ca","contentType":"application/json; charset=utf-8"},{"id":"5dbf3a6f-7810-572d-98fb-c3aae48850b7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5dbf3a6f-7810-572d-98fb-c3aae48850b7/attachment.json","path":"benchmarks/results/heterogeneous.json","size":3216,"sha256":"8af3024b227c1dea24765d26edc5447ffcf9b67f1417ff55a0f640c6aadee8d4","contentType":"application/json; charset=utf-8"},{"id":"c59f5900-249a-5f3a-9bf0-38845321a054","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c59f5900-249a-5f3a-9bf0-38845321a054/attachment.json","path":"benchmarks/results/knowledge_retrieval_benchmark.json","size":14927,"sha256":"50a9d1bce6915d679cd52a9c15d01ad0f8ffecba03b8a8e1b31ffb9d65a02f3c","contentType":"application/json; charset=utf-8"},{"id":"fb4b5e46-4d76-5db6-a402-5b6596c25fd2","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fb4b5e46-4d76-5db6-a402-5b6596c25fd2/attachment.json","path":"benchmarks/results/knowledge_transfer.json","size":689,"sha256":"826935d7dba2ec421747a87cabe485bbb433e5128e31366b2126d9ec014d400c","contentType":"application/json; charset=utf-8"},{"id":"ead8aff9-1722-58c3-9edf-faf8f4c5ecba","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ead8aff9-1722-58c3-9edf-faf8f4c5ecba/attachment.json","path":"benchmarks/results/kr-001_205b0d0e.json","size":3006,"sha256":"262e27eca50ae289e6f8fa1930fbc108e756e9a35f9bff20f738e3ea9c650536","contentType":"application/json; charset=utf-8"},{"id":"73b51a1d-9ef8-51d5-8b0b-d1a4e2666ed0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/73b51a1d-9ef8-51d5-8b0b-d1a4e2666ed0/attachment.json","path":"benchmarks/results/kr-001_54a07bdd.json","size":1955,"sha256":"8099d24fffafb870f56f6c76da562fe6f314a42e60ceda2ba3a32bc924cfaa4a","contentType":"application/json; charset=utf-8"},{"id":"39c4db3a-8de3-5f25-9079-522116c3df10","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/39c4db3a-8de3-5f25-9079-522116c3df10/attachment.json","path":"benchmarks/results/kr-004_9d1b24ca.json","size":1289,"sha256":"6a185ace055b4cb0ccaccaed088cd7cd2e636bbc86bcf504be34ce5af82b992b","contentType":"application/json; charset=utf-8"},{"id":"7ef61167-6924-5a1e-89a4-40c99003ea73","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7ef61167-6924-5a1e-89a4-40c99003ea73/attachment.json","path":"benchmarks/results/kr-004_e8f6cb7e.json","size":1828,"sha256":"3a9c62587fe6549d80309513a83f6842c44a6f5926a40577ad69e24418aa0b28","contentType":"application/json; charset=utf-8"},{"id":"1f3ebe08-1ef0-5486-9e9a-2d9cfb868677","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1f3ebe08-1ef0-5486-9e9a-2d9cfb868677/attachment.json","path":"benchmarks/results/kt-avm_2e2cf554.json","size":1317,"sha256":"d575b0727a7b66066522ed1e732b7d0a71b7b1b781ae41cc5489d9f06c3a801f","contentType":"application/json; charset=utf-8"},{"id":"81f19504-5d96-5b7c-ab00-853515584aa6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/81f19504-5d96-5b7c-ab00-853515584aa6/attachment.json","path":"benchmarks/results/kt-baseline_40deed74.json","size":1032,"sha256":"bb5cbd63d1c50c75573eda95bb3ed4ca29e1e515c3a7c3e806409b2ff40e633c","contentType":"application/json; charset=utf-8"},{"id":"7d35ad58-dded-5beb-957d-b302e6ad609e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7d35ad58-dded-5beb-957d-b302e6ad609e/attachment.json","path":"benchmarks/results/parallel_benchmark.json","size":43897,"sha256":"a940c90659bebbca159a79ecfc24e90436a05f7d4883ba34a7a895ab009f77f0","contentType":"application/json; charset=utf-8"},{"id":"2c43ae56-6d54-5098-9ff6-cbe1d1e45c01","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2c43ae56-6d54-5098-9ff6-cbe1d1e45c01/attachment.csv","path":"benchmarks/results/pipeline_output.csv","size":8182,"sha256":"b20300177c0c30a3fb51eb6c8bce8c5bba6ab24500fc9345ee71d64fcfeefaa8","contentType":"text/csv; charset=utf-8"},{"id":"b9453717-13cb-599a-a93d-d2e07b2f334c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b9453717-13cb-599a-a93d-d2e07b2f334c/attachment.md","path":"benchmarks/results/resolver_decision.md","size":2111,"sha256":"c7f9e54975b7cf2affa1dd3ee8a23bae78c74195ad0ccfb1780862a27db6cce3","contentType":"text/markdown; charset=utf-8"},{"id":"1d2ead79-5058-5864-a958-1eacf2d6bcd0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1d2ead79-5058-5864-a958-1eacf2d6bcd0/attachment.json","path":"benchmarks/results/smart_avm.json","size":1535,"sha256":"924c1bf104393be83254a6c0dec789646bfe30f5d3287117875492c50b4d8781","contentType":"application/json; charset=utf-8"},{"id":"ec5a8290-cf4e-5b4a-b6f6-bb175aef0e4c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ec5a8290-cf4e-5b4a-b6f6-bb175aef0e4c/attachment.json","path":"benchmarks/results/summary.json","size":1238,"sha256":"5c6b133ac6e0e18a86d42ad936c14763b6b4a39b2b5a7912f619fbfa4bfaf276","contentType":"application/json; charset=utf-8"},{"id":"a562e921-2d56-59e1-8460-805847db9867","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a562e921-2d56-59e1-8460-805847db9867/attachment.json","path":"benchmarks/results/unit_benchmark.json","size":2655,"sha256":"6eb406e56cb70625253a11f1b60e89d1dd2fdc4009589ecc83dbd1e05f2f2d3d","contentType":"application/json; charset=utf-8"},{"id":"37ad7df7-51c3-5d63-b50b-a049080fb521","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/37ad7df7-51c3-5d63-b50b-a049080fb521/attachment.py","path":"benchmarks/run_all.py","size":6586,"sha256":"04d191c0c1135a94d59b3e920435852c606d8706e5c5c9bb4f53dea5c1c9e59b","contentType":"text/x-python; charset=utf-8"},{"id":"61463eb4-a86d-5af8-a119-16b9b0515239","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/61463eb4-a86d-5af8-a119-16b9b0515239/attachment.py","path":"benchmarks/run_avm_features.py","size":17823,"sha256":"c7c214ca761d8e7a4121da4c95e066c66b1b02892cbb41cc9cc08a4fbb7995f7","contentType":"text/x-python; charset=utf-8"},{"id":"c7adfec0-2da0-5a9a-b521-d0df69945aa5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c7adfec0-2da0-5a9a-b521-d0df69945aa5/attachment.py","path":"benchmarks/run_claude_only.py","size":7830,"sha256":"9c2bf7ed634a1b9a68d763c5cd1396fc636b332fc5302d396e4e80400ab0d780","contentType":"text/x-python; charset=utf-8"},{"id":"409af374-8f29-52bb-8716-089982f51d99","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/409af374-8f29-52bb-8716-089982f51d99/attachment.py","path":"benchmarks/run_context_overflow.py","size":7154,"sha256":"e307f146d98f281a6a6b5756d75c27d16613024e66e2888d15e208d94a5cd375","contentType":"text/x-python; charset=utf-8"},{"id":"a3d6eb24-8362-50f9-a491-fe14bc9b12f7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a3d6eb24-8362-50f9-a491-fe14bc9b12f7/attachment.py","path":"benchmarks/run_core_benchmark.py","size":17306,"sha256":"cf4e0e075a987543c0ef2ddff2d4edbdbb829d44b9ce16c41c387df359872405","contentType":"text/x-python; charset=utf-8"},{"id":"f5001d33-f0ac-5285-8625-9d642b3fc7b3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f5001d33-f0ac-5285-8625-9d642b3fc7b3/attachment.py","path":"benchmarks/run_cross_session.py","size":9623,"sha256":"f41597c4ce1a008769a3375ee8ca8142225d017acbeabbd3778d9a5a22a67a59","contentType":"text/x-python; charset=utf-8"},{"id":"9ae94fdd-ec90-577f-b046-8f8c7ec939d3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9ae94fdd-ec90-577f-b046-8f8c7ec939d3/attachment.py","path":"benchmarks/run_domain_knowledge.py","size":7114,"sha256":"3c37000827ff0b152732f7d35f712d7cd0f32528b6a60b3480d7d3dcd1dd0ac6","contentType":"text/x-python; charset=utf-8"},{"id":"da911321-79b7-5fd3-bcdc-e4a0b553c082","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/da911321-79b7-5fd3-bcdc-e4a0b553c082/attachment.py","path":"benchmarks/run_extreme_collab.py","size":10237,"sha256":"98460eb807b37e6314fb346820db77bd0bba27cd91b5b533414667f074daba31","contentType":"text/x-python; charset=utf-8"},{"id":"09110519-4f66-5cb9-83ab-b9c9fee5228c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/09110519-4f66-5cb9-83ab-b9c9fee5228c/attachment.py","path":"benchmarks/run_forced_collab.py","size":13607,"sha256":"bf5866b25b9e759e1ae1f0f988140a4f36f9027cae20574743f8aff5636abdbd","contentType":"text/x-python; charset=utf-8"},{"id":"850055fc-9783-5ad4-9b47-7bc08e52fd2c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/850055fc-9783-5ad4-9b47-7bc08e52fd2c/attachment.py","path":"benchmarks/run_heterogeneous.py","size":6357,"sha256":"894033b1cb9bc4220b5595216fc9576724244468907287df8779e4074370757f","contentType":"text/x-python; charset=utf-8"},{"id":"01baa586-4a12-52ea-82b0-7f1cfaa6cd05","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/01baa586-4a12-52ea-82b0-7f1cfaa6cd05/attachment.py","path":"benchmarks/run_knowledge_retrieval.py","size":6281,"sha256":"585511d34cbe101538b6b64a00a7c3bec500429307345b768ce3c1fe41938b0f","contentType":"text/x-python; charset=utf-8"},{"id":"847b78f4-888a-5f67-873f-ec615eb7faa1","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/847b78f4-888a-5f67-873f-ec615eb7faa1/attachment.py","path":"benchmarks/run_knowledge_transfer.py","size":7821,"sha256":"ff124ecbcc06065597b963db8124b7c547462a7748bb44d325e106407537e8f4","contentType":"text/x-python; charset=utf-8"},{"id":"64e70663-1609-5c1c-b2dd-55ff7fe77c48","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/64e70663-1609-5c1c-b2dd-55ff7fe77c48/attachment.py","path":"benchmarks/run_parallel.py","size":7060,"sha256":"3ff7c6c3d7a8f19fc49f278f290fc4f50b0be59ca640bb37bca1f5f99ec54e9d","contentType":"text/x-python; charset=utf-8"},{"id":"57f40c97-4be2-5473-a2a2-3f81415ed3f3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/57f40c97-4be2-5473-a2a2-3f81415ed3f3/attachment.py","path":"benchmarks/run_single.py","size":3831,"sha256":"2eb0af6441c2451404fe13dee08b8e067831066a296da6bbca7005b353113564","contentType":"text/x-python; charset=utf-8"},{"id":"f9b8b541-9946-5585-abeb-e294390725b9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f9b8b541-9946-5585-abeb-e294390725b9/attachment.py","path":"benchmarks/run_smart_avm.py","size":8322,"sha256":"28591d82ef724f656f8c18e6f1b0a7dc77d91ca66c129360f954e2bf8983eea3","contentType":"text/x-python; charset=utf-8"},{"id":"ac1309d2-b6b1-5108-825e-bde25b748a4d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ac1309d2-b6b1-5108-825e-bde25b748a4d/attachment.py","path":"benchmarks/run_unit_benchmark.py","size":9128,"sha256":"3aa47c820b8825a804b02140c182d6ec86b8e644da70608ac247681a256726f2","contentType":"text/x-python; charset=utf-8"},{"id":"86bdc0b8-5bbf-59c1-8efa-1a26c5f3623c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/86bdc0b8-5bbf-59c1-8efa-1a26c5f3623c/attachment.py","path":"benchmarks/run_with_avm.py","size":9211,"sha256":"32ebc366b0eb9abe3c03d6d385ce2b7f9be02604a3331e6fed29ff4ca412e672","contentType":"text/x-python; charset=utf-8"},{"id":"21f8f2c1-abc1-50ea-8d22-5f530a33add0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/21f8f2c1-abc1-50ea-8d22-5f530a33add0/attachment.py","path":"benchmarks/runner.py","size":10497,"sha256":"5f19f7d3dc0ba616220139e945b472faab2e7b6310d73448a6da156833b729af","contentType":"text/x-python; charset=utf-8"},{"id":"676609fd-9e19-5e1b-94e8-85734015ee01","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/676609fd-9e19-5e1b-94e8-85734015ee01/attachment.json","path":"benchmarks/scenarios/collaborative_coding.json","size":5647,"sha256":"2f2d67482f0d8d38220cdf80b93a7dd7ef63f316b600f2d38ec40333d91f1f82","contentType":"application/json; charset=utf-8"},{"id":"420367e1-0d1b-56b7-aaad-b017f376b772","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/420367e1-0d1b-56b7-aaad-b017f376b772/attachment.json","path":"benchmarks/scenarios/collaborative_coding_extended.json","size":4803,"sha256":"cc15d9cbead8113f954dba5b3d337c1f59317413a7f3bd7015c15a828ad7e646","contentType":"application/json; charset=utf-8"},{"id":"f86e9342-8b62-5fbe-9256-1a2a55390ed6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f86e9342-8b62-5fbe-9256-1a2a55390ed6/attachment.json","path":"benchmarks/scenarios/context_overflow.json","size":8157,"sha256":"4eb82ed64c6ae780faf643f077359cde9632139b3b7fc62b6f3fe95ff7d32268","contentType":"application/json; charset=utf-8"},{"id":"36256cbf-ccb6-5782-9ea3-9dae9c9927d5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/36256cbf-ccb6-5782-9ea3-9dae9c9927d5/attachment.json","path":"benchmarks/scenarios/information_sync.json","size":5477,"sha256":"5baa576bed6fb022cd718ceb9abef24eafac9a4256d2026c25ed3868a65c945a","contentType":"application/json; charset=utf-8"},{"id":"df4ec439-bde0-54b8-9f1f-d265ee50dfc0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/df4ec439-bde0-54b8-9f1f-d265ee50dfc0/attachment.json","path":"benchmarks/scenarios/information_sync_extended.json","size":4406,"sha256":"ddc14d3f160d5de2cf1c0132e129e0c333c3297fe38bb46f239a5c811345ac2a","contentType":"application/json; charset=utf-8"},{"id":"c8a63a5b-6e93-547e-ae26-fcc370891e7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c8a63a5b-6e93-547e-ae26-fcc370891e7a/attachment.json","path":"benchmarks/scenarios/knowledge_retrieval.json","size":6729,"sha256":"01fa66a92c071953cb09c42185f30d51c1c76aa6a6e3158f0ca7cc689ff105de","contentType":"application/json; charset=utf-8"},{"id":"4d47f927-7f6f-58eb-b7d2-7090fb080333","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4d47f927-7f6f-58eb-b7d2-7090fb080333/attachment.json","path":"benchmarks/scenarios/knowledge_retrieval_extended.json","size":13920,"sha256":"ab3010014ad625891d1fd640c9d3f9e618145e13e03e9d54c67836beb76d34c6","contentType":"application/json; charset=utf-8"},{"id":"a9dcc391-8e35-53c3-83c2-c01ba0f9bf34","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a9dcc391-8e35-53c3-83c2-c01ba0f9bf34/attachment.json","path":"benchmarks/scenarios/real_world_cases.json","size":5281,"sha256":"7daf622a2b6e126268f3740e29c0c94f7a206d604beb10ef03b68f20da546693","contentType":"application/json; charset=utf-8"},{"id":"d136c058-7d00-57c4-a310-55cc407683ca","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/d136c058-7d00-57c4-a310-55cc407683ca/attachment.py","path":"benchmarks/tell_bench.py","size":9870,"sha256":"0a5b75e4b84eae195f70fc09adf44ec4e8e66ef6071ec8ef2cd5206f4ccb901e","contentType":"text/x-python; charset=utf-8"},{"id":"e261e609-c4c0-5f19-b223-3c1c7ffb052a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e261e609-c4c0-5f19-b223-3c1c7ffb052a/attachment.py","path":"benchmarks/todo_api/__init__.py","size":280,"sha256":"e3803d3721475d82e48c7c2de8697b18963d060e4a65ebfefdaf3aaba2b8a6d7","contentType":"text/x-python; charset=utf-8"},{"id":"f7e96866-ce60-5e41-b171-fb7484c8b519","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f7e96866-ce60-5e41-b171-fb7484c8b519/attachment.py","path":"benchmarks/todo_api/database.py","size":2107,"sha256":"5326e80e914168a0c433e5a5a95659ac7796c97540bd1dd6824b669ac862aac0","contentType":"text/x-python; charset=utf-8"},{"id":"195e7578-9545-5ee5-bafd-9081afaae84a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/195e7578-9545-5ee5-bafd-9081afaae84a/attachment.py","path":"benchmarks/todo_api/main.py","size":2715,"sha256":"056a1ae5b4e3806fe76937d590a82cb0238b757da6d798b3b40c839f29289b96","contentType":"text/x-python; charset=utf-8"},{"id":"4b86be08-39de-5d3e-a184-989887628fbb","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4b86be08-39de-5d3e-a184-989887628fbb/attachment.py","path":"benchmarks/todo_api/models.py","size":1544,"sha256":"82027e76a0fc92197fdf8f7d6d617a1ecf0bfae9e6c7a92edf9d0a4b5ccea037","contentType":"text/x-python; charset=utf-8"},{"id":"328be1b9-492d-555b-be3c-44d2691636c4","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/328be1b9-492d-555b-be3c-44d2691636c4/attachment.txt","path":"benchmarks/todo_api/requirements.txt","size":77,"sha256":"5dc9b36766626e4868d7222f0a049a596a28b366e8ae34090fb137fb57812428","contentType":"text/plain; charset=utf-8"},{"id":"7e75e2a5-7284-5ff3-80da-14cb38e764bb","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7e75e2a5-7284-5ff3-80da-14cb38e764bb/attachment.py","path":"benchmarks/todo_api/test_api.py","size":17381,"sha256":"8900f16a0b23963e89ff53354ada49b436d79d1d69fae20c8fcaab28dd5f13a5","contentType":"text/x-python; charset=utf-8"},{"id":"c6c95dbd-8ebd-51b5-b023-838e3be937b6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c6c95dbd-8ebd-51b5-b023-838e3be937b6/attachment.py","path":"cli.py","size":8338,"sha256":"a46e751f6755aa3bd005da3cc004b3bdff4a469ccf7d96ec07c2bd956495217d","contentType":"text/x-python; charset=utf-8"},{"id":"31a7317f-bfda-5a69-b039-e788125c6119","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/31a7317f-bfda-5a69-b039-e788125c6119/attachment.yaml","path":"config.yaml","size":1239,"sha256":"57ca321869c436e7e4b892fb6f60f90ed9af969987436202cbc0511ec4000ca2","contentType":"application/yaml; charset=utf-8"},{"id":"2f235307-f828-57b3-9abd-ded56f329d37","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2f235307-f828-57b3-9abd-ded56f329d37/attachment.service","path":"deploy/avm-daemon.service","size":345,"sha256":"13df017fd433bad2e87e629eedf7b2c1d5629482353f232d2a9a3d99143316f5","contentType":"text/plain; charset=utf-8"},{"id":"40931d84-ea2a-56b8-8392-5c5536c9fc26","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/40931d84-ea2a-56b8-8392-5c5536c9fc26/attachment.sh","path":"deploy/install-linux.sh","size":344,"sha256":"7398c97bd677886ff940a88532e7eea12c843b2f2c4cc9fe603da8cd8cfb13fa","contentType":"application/x-sh; charset=utf-8"},{"id":"7d066a14-24ba-5c94-b150-6a7a5ee87bf7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7d066a14-24ba-5c94-b150-6a7a5ee87bf7/attachment.yml","path":"docker-compose.yml","size":222,"sha256":"9cb533fbf0f7bce6190ee3f424b1b363d15c8daa9d9938446bc9ba0840c94d03","contentType":"application/yaml; charset=utf-8"},{"id":"9830f486-cc14-5346-9ca3-3240f4a15548","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9830f486-cc14-5346-9ca3-3240f4a15548/attachment.md","path":"docs/AGENT-ADOPTION.md","size":5629,"sha256":"1332c6bd8611ab2098eb6634e3ba541c37f6a9826faff192c831e80bd042b6ab","contentType":"text/markdown; charset=utf-8"},{"id":"3e95d57c-acde-5908-958f-04873a8b2261","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3e95d57c-acde-5908-958f-04873a8b2261/attachment.md","path":"docs/ARCHITECTURE-ANALYSIS.md","size":10973,"sha256":"23a89ab652a16650472e9a370129b064f558c35e7e18a34e70b23ab6e6456c90","contentType":"text/markdown; charset=utf-8"},{"id":"26ff28e5-b1f1-5d3b-9ec5-405f9fe4fd35","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/26ff28e5-b1f1-5d3b-9ec5-405f9fe4fd35/attachment.md","path":"docs/DEPLOYMENT.md","size":4132,"sha256":"ece52cb6dd0befc47fac940d69e635ff7fff2ab94a93a2bb42cb7a830c1e9fb4","contentType":"text/markdown; charset=utf-8"},{"id":"f38ea649-7f42-5e2a-871b-120ce8d5dd6b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f38ea649-7f42-5e2a-871b-120ce8d5dd6b/attachment.md","path":"docs/OPENCLAW_INTEGRATION.md","size":5528,"sha256":"4b24a03d9ba4d52401f488314a5bdbcd4171b601fcc56bd1317b4eb8fc370711","contentType":"text/markdown; charset=utf-8"},{"id":"f6c32b9a-0bc7-55ec-aec3-9e4017813502","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f6c32b9a-0bc7-55ec-aec3-9e4017813502/attachment.md","path":"docs/PAPER-RESULTS.md","size":6556,"sha256":"379bb18aa127953a38bae572be3877fec2fc03d11b74a268aefdde88d61c016f","contentType":"text/markdown; charset=utf-8"},{"id":"5542fff5-43f1-58a4-abd8-97f4d708caa0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/5542fff5-43f1-58a4-abd8-97f4d708caa0/attachment.md","path":"docs/TECHNICAL-REPORT-2026-03-22.md","size":9691,"sha256":"f214d936f6ae7cb50a14d594035759dfdb4cefd02751236cfe7802f45f779596","contentType":"text/markdown; charset=utf-8"},{"id":"44cbad07-e196-5376-827b-94a35f22dc87","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/44cbad07-e196-5376-827b-94a35f22dc87/attachment.md","path":"docs/avm-benchmark-2026.md","size":4642,"sha256":"ee31696c1b3236355646acf31a374c93daaf6c81d27924c292a58f036683477d","contentType":"text/markdown; charset=utf-8"},{"id":"9b8d70c4-309e-551d-a6dc-a6ab8f288796","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9b8d70c4-309e-551d-a6dc-a6ab8f288796/attachment.yaml","path":"examples/agents.yaml","size":1748,"sha256":"d8f1c34cb48d58460951653df952f976d2f1c7bd4c5efac79f2b87755ab12b59","contentType":"application/yaml; charset=utf-8"},{"id":"20a1c0de-6e49-5179-a2b0-fdea7686e343","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/20a1c0de-6e49-5179-a2b0-fdea7686e343/attachment.yaml","path":"examples/home_assistant.yaml","size":915,"sha256":"93f6246293b340cdcf7f7c5cabe6f8c9ad8ae15f32ae6074e8a93263883eac1e","contentType":"application/yaml; charset=utf-8"},{"id":"fcd3c8db-e77d-5224-a171-6b09391d31a6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fcd3c8db-e77d-5224-a171-6b09391d31a6/attachment.yaml","path":"examples/permissions.yaml","size":1528,"sha256":"e699ac025973c5f033be27f6942a27e6db56d179f68d5ba6754f3a3b9c54debd","contentType":"application/yaml; charset=utf-8"},{"id":"f3857dbe-32ae-5030-b2c2-d349ce574d08","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f3857dbe-32ae-5030-b2c2-d349ce574d08/attachment.yaml","path":"examples/trading_bot.yaml","size":1111,"sha256":"dc87cdc91ffb10a3666c2ac9529eca2ca5ffc9b1ec0731354ef059a1b7e613d4","contentType":"application/yaml; charset=utf-8"},{"id":"915405f9-dcb8-5ff2-b6f3-cfd3f3c81b4b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/915405f9-dcb8-5ff2-b6f3-cfd3f3c81b4b/attachment.py","path":"playground.py","size":17231,"sha256":"96ba58367485a00e641282e691f245d385cad7263eb894d644107b928b93eb63","contentType":"text/x-python; charset=utf-8"},{"id":"f0c282e0-153e-53ae-ae56-3050a9b08a48","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f0c282e0-153e-53ae-ae56-3050a9b08a48/attachment.py","path":"providers.py","size":5905,"sha256":"d88c56f087c7fa8a3b405a63a375cfa6920e8116510a1db19b3e1501a21aebc0","contentType":"text/x-python; charset=utf-8"},{"id":"923d8181-cbf8-581c-80fc-04c7607ac5a7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/923d8181-cbf8-581c-80fc-04c7607ac5a7/attachment.toml","path":"pyproject.toml","size":1203,"sha256":"a8d9552ba25a62db8e38470745180e5a0c7b03dafcd4c6c2c8487e5980cf9acb","contentType":"text/plain; charset=utf-8"},{"id":"14ac7f95-a216-5790-900a-e8a10e1d39f8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/14ac7f95-a216-5790-900a-e8a10e1d39f8/attachment.py","path":"scripts/ab_benchmark.py","size":7146,"sha256":"85fbda3e55f820f0c119be215b900daeebeff47bead1c5e4e240451716d0f20f","contentType":"text/x-python; charset=utf-8"},{"id":"8f8ce4da-d887-53e2-a219-ae80ae99a127","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8f8ce4da-d887-53e2-a219-ae80ae99a127/attachment.py","path":"scripts/agent_benchmark.py","size":7180,"sha256":"7da1ae15fd0099e073fd97b2efe64bcf55974485841e238e52ff4f7b824dbbd3","contentType":"text/x-python; charset=utf-8"},{"id":"797b4907-39a5-5eed-bfcf-0ac3aa6b5cb9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/797b4907-39a5-5eed-bfcf-0ac3aa6b5cb9/attachment.py","path":"scripts/agent_benchmark_local.py","size":4033,"sha256":"10b9b5c3c8e26e0d33e8ab15fda5c24ecf1c0fb60bc73425d0171a85b0ec12d3","contentType":"text/x-python; charset=utf-8"},{"id":"49863c1b-970a-5650-8862-04512820ccd9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/49863c1b-970a-5650-8862-04512820ccd9/attachment.py","path":"scripts/avm_direct_test.py","size":2258,"sha256":"bb1eb3a3b2e312359f4f39335cfd56108449c44d4864009b2f8b811f232a2a31","contentType":"text/x-python; charset=utf-8"},{"id":"32920b88-20ce-5765-b8f3-c06bdb3820ac","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/32920b88-20ce-5765-b8f3-c06bdb3820ac/attachment.py","path":"scripts/fuse_minimal_test.py","size":4298,"sha256":"830e12225b09345763e8c8df2c8860b21e53fb9febc243f4192622b823343da6","contentType":"text/x-python; charset=utf-8"},{"id":"4071bed7-131f-52c4-8523-1fcbd21002f9","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4071bed7-131f-52c4-8523-1fcbd21002f9/attachment.py","path":"scripts/quality_benchmark.py","size":5829,"sha256":"eb2f9d9acd8e76fb414c5e969b0877dbb675fb292223eb2bf264afed40b13e1b","contentType":"text/x-python; charset=utf-8"},{"id":"4fe376d9-90dc-5160-8d9a-09580f7ae2af","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4fe376d9-90dc-5160-8d9a-09580f7ae2af/attachment.py","path":"scripts/real_bot_test.py","size":4304,"sha256":"2652b5993bebabe6c679cb705a01a97ae5c837b485d5a88efc683a1e1bdf4727","contentType":"text/x-python; charset=utf-8"},{"id":"c9fda77d-f843-56d8-947d-0f38eefa678b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c9fda77d-f843-56d8-947d-0f38eefa678b/attachment.py","path":"scripts/scenario_benchmark.py","size":25258,"sha256":"2ae02873a0b3a03e242511294968d19cb42399529625a3c824d84bdc59725323","contentType":"text/x-python; charset=utf-8"},{"id":"7ae495ba-12f4-5b73-bc98-e71bf90273b3","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7ae495ba-12f4-5b73-bc98-e71bf90273b3/attachment.py","path":"tests/__init__.py","size":12,"sha256":"13fffe1beb6a60d085088e29fbecadc9ceda168de5e7e7dc2b7d17fa2438253e","contentType":"text/x-python; charset=utf-8"},{"id":"77dedcd3-714e-5c84-97f6-74ccb63e0612","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/77dedcd3-714e-5c84-97f6-74ccb63e0612/attachment.py","path":"tests/test_advanced.py","size":11890,"sha256":"875a784464c914cf42539d6613809efcb6468532b2401519e4eb2beecae15911","contentType":"text/x-python; charset=utf-8"},{"id":"628669c2-797c-5bfd-ad91-f3ed5278c22d","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/628669c2-797c-5bfd-ad91-f3ed5278c22d/attachment.py","path":"tests/test_cli.py","size":5511,"sha256":"4cc444135a6af069dbeea2acb1ccf442b9e8337779a4f9142ed4a0b19c675d90","contentType":"text/x-python; charset=utf-8"},{"id":"f1e9209c-bb33-5048-8967-46487d8ceef5","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f1e9209c-bb33-5048-8967-46487d8ceef5/attachment.py","path":"tests/test_config.py","size":3725,"sha256":"b831bd1fff055b54d448175fa38d9840ed7ca3c677319a90498a649ce5e4eedd","contentType":"text/x-python; charset=utf-8"},{"id":"918aea11-7e98-5312-a748-60fc4d3f2ea4","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/918aea11-7e98-5312-a748-60fc4d3f2ea4/attachment.py","path":"tests/test_consolidation.py","size":5353,"sha256":"e36b19c5b2b567bd4b925af19959962548308f15885611bbfe92f929b010f9a6","contentType":"text/x-python; charset=utf-8"},{"id":"2eb67321-91d4-5223-a001-032b484b1bf8","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/2eb67321-91d4-5223-a001-032b484b1bf8/attachment.py","path":"tests/test_core.py","size":7611,"sha256":"44fe3712cb626e41def99fbe218b692f197dab9566c98489defd58dbfc38202c","contentType":"text/x-python; charset=utf-8"},{"id":"fedc0068-5155-503d-bac6-e3c442f1c4b6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/fedc0068-5155-503d-bac6-e3c442f1c4b6/attachment.py","path":"tests/test_daemon.py","size":4121,"sha256":"05c5ae334a1c0341812d10cd9dbe1eef630fff37e198ea974e49393168228f8b","contentType":"text/x-python; charset=utf-8"},{"id":"c533feed-e9cd-5f44-94c7-ea797d10a498","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/c533feed-e9cd-5f44-94c7-ea797d10a498/attachment.py","path":"tests/test_embedding.py","size":4745,"sha256":"64a8591d21dcff79ca501f992a1dd7eccc8040b5918d4af09010e924b9af6aa9","contentType":"text/x-python; charset=utf-8"},{"id":"4bad38ae-5955-55ae-87f9-f3f397d9360f","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/4bad38ae-5955-55ae-87f9-f3f397d9360f/attachment.py","path":"tests/test_faiss.py","size":7170,"sha256":"aacd8a42a8f8f09f8671daaac2a28597975cbaf7e02c73e8487dbdd7803bd5a6","contentType":"text/x-python; charset=utf-8"},{"id":"f7b586f8-84dc-544f-9fc1-b0c37c718832","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f7b586f8-84dc-544f-9fc1-b0c37c718832/attachment.py","path":"tests/test_fuse.py","size":28215,"sha256":"3d4a0064d6bba501ef6faca945b2e3f79f45247a33dca753dad90372bd9d48a8","contentType":"text/x-python; charset=utf-8"},{"id":"72e076fb-e711-5d25-a649-1580a41cd41c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/72e076fb-e711-5d25-a649-1580a41cd41c/attachment.py","path":"tests/test_gossip.py","size":8857,"sha256":"01c34ca7c74fab85887e73c47cdd8bc54312adf7e03856988b4196c838a91126","contentType":"text/x-python; charset=utf-8"},{"id":"e32fd7b0-ec85-5b66-b421-4eb438af1895","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e32fd7b0-ec85-5b66-b421-4eb438af1895/attachment.py","path":"tests/test_graph.py","size":4056,"sha256":"2ea2056328301971b0128505ea52b2b65b527a4685c6f1452a6918782e574e44","contentType":"text/x-python; charset=utf-8"},{"id":"ae71d16a-3261-5088-b7cd-1778c93e6a53","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ae71d16a-3261-5088-b7cd-1778c93e6a53/attachment.py","path":"tests/test_handlers.py","size":7110,"sha256":"b67a8a18bb18400365d4dc0516e855c69cb0a5ebfb56d6ea2115c47c6e756d3b","contentType":"text/x-python; charset=utf-8"},{"id":"38baa8bb-9ff4-525e-8abc-b1b8bff3aa7e","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/38baa8bb-9ff4-525e-8abc-b1b8bff3aa7e/attachment.py","path":"tests/test_handlers_more.py","size":4506,"sha256":"bade715475678a6f08ed244715388bfef4a6cd4bc6135018141e44e2c563e647","contentType":"text/x-python; charset=utf-8"},{"id":"7c69dab6-d348-548a-8f67-1d41f4236583","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/7c69dab6-d348-548a-8f67-1d41f4236583/attachment.py","path":"tests/test_librarian.py","size":7795,"sha256":"e9702b0c41e5dc527351628bd76f6f69581dea1c311145c364f42422afc0dd41","contentType":"text/x-python; charset=utf-8"},{"id":"e5a034c0-45dd-532c-b2cc-82a0c4fe489b","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e5a034c0-45dd-532c-b2cc-82a0c4fe489b/attachment.py","path":"tests/test_mcp.py","size":2473,"sha256":"2aed1acfc1068eb3fc49d288a65d3a0bb3b184a4115345519e48bba824a39786","contentType":"text/x-python; charset=utf-8"},{"id":"f4b22870-63b0-5d9d-96e0-dee768a9c254","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/f4b22870-63b0-5d9d-96e0-dee768a9c254/attachment.py","path":"tests/test_more_handlers.py","size":4033,"sha256":"efe1243f1b2fcc42292d73970c3aae55eed8158a8fcf30f580cf784e4bf5858c","contentType":"text/x-python; charset=utf-8"},{"id":"339b51cc-4db5-579e-a1e6-c05b83b519ba","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/339b51cc-4db5-579e-a1e6-c05b83b519ba/attachment.py","path":"tests/test_node.py","size":3143,"sha256":"d6ac9e0f35709a6d3f63e08644364bb981150a0afbd15816d725a2fb3ef07ebd","contentType":"text/x-python; charset=utf-8"},{"id":"95003c6f-78da-5370-bae8-856fea4e09f6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/95003c6f-78da-5370-bae8-856fea4e09f6/attachment.py","path":"tests/test_permissions.py","size":2990,"sha256":"cc93263fcd2fc27dd75054851835f6af6c17b21d48feedeee3f7f6bbd9ad3244","contentType":"text/x-python; charset=utf-8"},{"id":"b5e724d0-6133-5625-baca-35ee3e762a5a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b5e724d0-6133-5625-baca-35ee3e762a5a/attachment.py","path":"tests/test_providers.py","size":1547,"sha256":"74703a9ec047a0e54e7eca364479a33c7104888a541e576e0b28fedc8124f4d0","contentType":"text/x-python; charset=utf-8"},{"id":"0a95d182-2743-5157-8905-e3c35b73bfc6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/0a95d182-2743-5157-8905-e3c35b73bfc6/attachment.py","path":"tests/test_retrieval.py","size":3032,"sha256":"be7877bf5e2a060ee05160e0ceb24531e86c54988e0344d9f32b6f7669d0cabc","contentType":"text/x-python; charset=utf-8"},{"id":"8c8a4a05-3770-5e49-8ae3-aa66a644d1d0","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8c8a4a05-3770-5e49-8ae3-aa66a644d1d0/attachment.py","path":"tests/test_store.py","size":6360,"sha256":"3d1cff987f6146ea490a1df0b1266729b934ee1c10c181bfe5993792478166f8","contentType":"text/x-python; charset=utf-8"},{"id":"1f6f30de-153c-5c84-a371-bf683d011bef","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1f6f30de-153c-5c84-a371-bf683d011bef/attachment.py","path":"tests/test_subscriptions.py","size":8164,"sha256":"3b542efcf9f07e6dddb05cf293e4263a427ef77e561f6653fe1297af7d955bb6","contentType":"text/x-python; charset=utf-8"},{"id":"b50dfefb-eeaa-53dd-aecf-ecc88bf83785","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/b50dfefb-eeaa-53dd-aecf-ecc88bf83785/attachment.py","path":"tests/test_tell.py","size":16611,"sha256":"b499bcacfe9de91422591d62981c011c8a96dae8f9ac1ab5ba069aa69c33cea3","contentType":"text/x-python; charset=utf-8"},{"id":"bad6a226-e451-5891-9b00-3a1a02266721","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/bad6a226-e451-5891-9b00-3a1a02266721/attachment.py","path":"tests/test_topic_index.py","size":7204,"sha256":"bd1854c3a51af6e686e607c6f31ca5b4e2ef1bc0bc6ce111f3275e915e3d0d09","contentType":"text/x-python; charset=utf-8"},{"id":"1caf3013-e9ce-58b1-a109-0e030616d6f7","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/1caf3013-e9ce-58b1-a109-0e030616d6f7/attachment.py","path":"trading/__init__.py","size":53,"sha256":"6bae11c993600cdbabb8edc5334be7a1aa10dacaa623dd4e244cc34c2f943908","contentType":"text/x-python; charset=utf-8"},{"id":"ba29f076-2682-5e82-ad64-75a50c5bb3de","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/ba29f076-2682-5e82-ad64-75a50c5bb3de/attachment.py","path":"trading/providers.py","size":7631,"sha256":"8a6c4b560cb9a1722a4b086da4d1869c48fb8426434e75928ff43b044fd540f0","contentType":"text/x-python; charset=utf-8"}],"bundle_sha256":"9fd28506042fa0d4419e1ff8319b5d0caf8c4ecf4cad1d7b7ff92fbd464200d2","attachment_count":252,"text_attachments":249,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":3,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"general","category_label":"General"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"general","import_tag":"clean-skills-v1"}},"renderedAt":1782980045732}

AVM Memory Skill AI Virtual Memory — 多 Agent 共享记忆系统 核心能力 - 语义搜索 :embedding + FTS5 混合检索 - Token 感知 :自动截断到 token 预算 - 多 Agent :私有/共享空间隔离 + 订阅通知 - 生命周期 :自动衰减、归档、垃圾清理 - TopicIndex :O(1) recall,已知 topic 1 hop 完成 - Librarian :多 Agent 知识路由,95% hop 减少 - Gossip Protocol :去中心化发现,bloom filter digest - Memory Consolidation :睡眠式记忆整合 --- 快速开始 CLI 方式 FUSE 方式 Python API --- 🆕 多 Agent 发现 方式 1: Librarian(中心化) 当你想知道"谁知道某个话题": 延迟 : 1.7ms,95% hop 减少 方式 2: Gossip Protocol(去中心化) 每个 agent 维护一个 digest(bloom filter),周期性交换: 特点 : - 无单点故障 - 本地查询 O(1) - 假阳性 <15%,假阴性 0% - 每 agent 只需 128 bytes digest 何时用哪个? | 场景 | 推荐 | |--…