python-sandbox — Skillopedia

Python沙盒工具使用指南 v2.5 (与后端完全匹配版) 🎯 核心能力概览 Python沙盒是一个多功能的代码执行环境，支持： | 功能领域 | 主要用途 | 关键库 | |---------|---------|-------| | 数据分析 | 数据清洗、转换、聚合 | Pandas, Polars | | 高性能计算 | 内存SQL、表达式加速 | DuckDB, Numexpr, Bottleneck | | 可视化 | 图表生成与自动捕获 | Matplotlib, Seaborn | | 文档自动化 | Excel/Word/PDF/PPT生成 | python-docx, reportlab, openpyxl | | 机器学习 | 模型训练与评估 | scikit-learn, LightGBM | | 符号数学 | 公式证明、方程求解 | SymPy | | 科学计算 | 优化、积分、信号处理 | SciPy | | 流程图生成 | 架构图、流程图 | Graphviz, NetworkX | | 文本分析 | HTML解析、数据提取 | BeautifulSoup4, lxml | | 性能优化 | 机械硬盘优化、异步IO | aiofiles, joblib | --- 📁 文件处理指南 - 两种模式必须分清模式A: 工作区文件 ( 目录) 用…

, cell_content):\n cell_type = \"number\"\n elif re.search(r'^\\d{4}-\\d{2}-\\d{2}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

, cell_content):\n cell_type = \"date\"\n \n if cell_content:\n cells.append({\n \"content\": cell_content[:200],\n \"type\": cell_type\n })\n \n if cells:\n rows.append(cells)\n row_count += 1\n \n if rows and row_count > 0:\n # 深度研究分析：判断表格类型\n table_purpose = \"unknown\"\n headers = rows[0] if rows else []\n \n # 根据表头内容判断表格用途\n header_text = \" \".join([cell[\"content\"] for cell in headers])\n if any(keyword in header_text.lower() for keyword in ['价格', 'price', 'cost', '￥', '

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.

]):\n table_purpose = \"pricing\"\n elif any(keyword in header_text.lower() for keyword in ['规格', 'spec', '参数', 'parameter']):\n table_purpose = \"specifications\"\n elif any(keyword in header_text.lower() for keyword in ['对比', 'compare', 'vs', '差异']):\n table_purpose = \"comparison\"\n elif any(keyword in header_text.lower() for keyword in ['时间', 'date', '日期', 'schedule']):\n table_purpose = \"timeline\"\n \n tables.append({\n \"table_index\": table_count,\n \"table_type\": table_type,\n \"purpose\": table_purpose,\n \"row_count\": len(rows),\n \"col_count\": len(rows[0]) if rows else 0,\n \"headers\": [cell[\"content\"] for cell in headers] if headers else [],\n \"data_sample\": [[cell[\"content\"] for cell in row] for row in rows[:5]], # 前5行样本\n \"data_types\": list(set([cell[\"type\"] for row in rows[:3] for cell in row])) if rows else []\n })\n \n table_count += 1\n \n # 构建深度研究友好的输出\n return {\n \"type\": \"html_table_extraction\",\n \"title\": \"HTML表格提取报告\",\n \"timestamp\": datetime.now().isoformat(),\n \"metadata\": {\n \"html_length\": len(html_content),\n \"tables_found\": len(tables),\n \"extraction_method\": \"research_optimized_regex\",\n \"scenario\": \"deep_research_data_extraction\",\n \"limitations\": \"仅支持简单表格，嵌套表格可能无法正确处理\"\n },\n \"data\": {\n \"tables\": tables,\n \"summary\": {\n \"total_tables\": len(tables),\n \"pricing_tables\": sum(1 for table in tables if table[\"purpose\"] == \"pricing\"),\n \"spec_tables\": sum(1 for table in tables if table[\"purpose\"] == \"specifications\"),\n \"comparison_tables\": sum(1 for table in tables if table[\"purpose\"] == \"comparison\"),\n \"total_rows\": sum(table[\"row_count\"] for table in tables),\n \"total_columns\": sum(table[\"col_count\"] for table in tables)\n }\n }\n }\n \n except Exception as e:\n return {\n \"type\": \"table_extraction_error\",\n \"title\": \"表格提取失败\",\n \"error_message\": str(e),\n \"timestamp\": datetime.now().isoformat(),\n \"metadata\": {\n \"scenario\": \"deep_research_fallback\",\n \"recommendation\": \"建议使用结构化数据源或简化表格结构\"\n }\n }\n\ndef research_html_analysis(html_content: str) -> dict:\n \"\"\"\n 深度研究HTML分析 - 专为深度研究场景优化\n 结合链接和表格提取，提供研究洞察\n \"\"\"\n # 深度研究：限制输入大小，关注质量而非数量\n if len(html_content) > 200000:\n html_content = html_content[:200000] + \"\\n[HTML内容过长，已截断用于深度分析]\"\n \n # 并行提取（深度研究需要多方面信息）\n title_links = extract_html_title_and_links(html_content)\n tables = extract_simple_table_data(html_content)\n \n # 深度研究分析：提取关键洞察\n research_insights = []\n \n # 基于链接的洞察\n if title_links.get(\"type\") != \"html_extraction_error\":\n links_data = title_links.get(\"data\", {})\n if links_data.get(\"statistics\", {}).get(\"product_links\", 0) > 0:\n research_insights.append(\"页面包含产品相关链接，可能是电商或产品页面\")\n if links_data.get(\"statistics\", {}).get(\"external_links\", 0) > 5:\n research_insights.append(\"页面包含多个外部链接，可能是资源聚合或引用页面\")\n \n # 基于表格的洞察\n if tables.get(\"type\") != \"table_extraction_error\":\n tables_data = tables.get(\"data\", {})\n if tables_data.get(\"summary\", {}).get(\"pricing_tables\", 0) > 0:\n research_insights.append(\"页面包含价格表格，适合价格分析研究\")\n if tables_data.get(\"summary\", {}).get(\"comparison_tables\", 0) > 0:\n research_insights.append(\"页面包含对比表格，适合产品对比研究\")\n \n # 构建深度研究报告\n return {\n \"type\": \"deep_research_html_analysis\",\n \"title\": \"深度研究HTML分析报告\",\n \"timestamp\": datetime.now().isoformat(),\n \"metadata\": {\n \"original_length\": len(html_content),\n \"analysis_focus\": \"research_data_extraction\",\n \"version\": \"v3.1-research-optimized\",\n \"primary_use_cases\": [\n \"产品规格对比研究\",\n \"价格策略分析\", \n \"竞品分析\",\n \"技术文档解析\"\n ]\n },\n \"components\": {\n \"title_and_links\": title_links,\n \"tables\": tables\n },\n \"research_insights\": research_insights if research_insights else [\"需要进一步分析以获得深度洞察\"],\n \"recommendations\": [\n \"对于复杂页面，建议使用crawl4ai预处理\",\n \"关注页面中的结构化数据（表格、列表）\",\n \"结合文本内容进行综合分析\"\n ]\n }\n\n# ===================== 深度研究使用示例 =====================\nif __name__ == \"__main__\":\n # 示例HTML - 模拟深度研究场景\n html_content = \"\"\"\n \u003chtml>\n \u003chead>\n \u003ctitle>深度研究示例：AI模型对比分析\u003c/title>\n \u003cmeta name=\"description\" content=\"对比GPT-4, Claude 3, Gemini Pro等主流AI模型\">\n \u003c/head>\n \u003cbody>\n \u003ch1>主流AI模型对比分析\u003c/h1>\n \n \u003cnav>\n \u003ca href=\"#pricing\">价格对比\u003c/a>\n \u003ca href=\"#specs\">技术规格\u003c/a>\n \u003ca href=\"#performance\">性能测试\u003c/a>\n \u003ca href=\"https://openai.com\">OpenAI官网\u003c/a>\n \u003ca href=\"https://anthropic.com\">Anthropic官网\u003c/a>\n \u003c/nav>\n \n \u003csection id=\"pricing\">\n \u003ch2>价格对比表\u003c/h2>\n \u003ctable border=\"1\" class=\"pricing-table\">\n \u003ctr>\n \u003cth>模型\u003c/th>\u003cth>输入价格 ($/1M tokens)\u003c/th>\u003cth>输出价格 ($/1M tokens)\u003c/th>\n \u003c/tr>\n \u003ctr>\n \u003ctd>GPT-4 Turbo\u003c/td>\u003ctd>$10.00\u003c/td>\u003ctd>$30.00\u003c/td>\n \u003c/tr>\n \u003ctr>\n \u003ctd>Claude 3 Opus\u003c/td>\u003ctd>$15.00\u003c/td>\u003ctd>$75.00\u003c/td>\n \u003c/tr>\n \u003ctr>\n \u003ctd>Gemini Pro\u003c/td>\u003ctd>$0.50\u003c/td>\u003ctd>$1.50\u003c/td>\n \u003c/tr>\n \u003c/table>\n \u003c/section>\n \n \u003csection id=\"specs\">\n \u003ch2>技术规格对比\u003c/h2>\n \u003ctable class=\"spec-table\">\n \u003ctr>\u003cth>模型\u003c/th>\u003cth>上下文长度\u003c/th>\u003cth>参数规模\u003c/th>\u003cth>MMLU分数\u003c/th>\u003c/tr>\n \u003ctr>\u003ctd>GPT-4\u003c/td>\u003ctd>128K\u003c/td>\u003ctd>1.8万亿\u003c/td>\u003ctd>86.4\u003c/td>\u003c/tr>\n \u003ctr>\u003ctd>Claude 3\u003c/td>\u003ctd>200K\u003c/td>\u003ctd>未知\u003c/td>\u003ctd>87.5\u003c/td>\u003c/tr>\n \u003c/table>\n \u003c/section>\n \n \u003cdiv class=\"resources\">\n \u003ch3>相关资源\u003c/h3>\n \u003ca href=\"/whitepaper.pdf\">技术白皮书下载\u003c/a>\n \u003ca href=\"/api-docs\">API文档\u003c/a>\n \u003ca href=\"mailto:[email protected]\">联系研究团队\u003c/a>\n \u003c/div>\n \u003c/body>\n \u003c/html>\n \"\"\"\n \n # 执行深度研究分析\n print(\"=== 深度研究HTML分析 ===\")\n result = research_html_analysis(html_content)\n print(json.dumps(result, ensure_ascii=False, indent=2))\n```\n\n---\n\n## 🎯 AI使用指南（与中间件配合）\n\n### 步骤一：识别分析需求（中间件感知）\n当用户请求分析文本时，AI应：\n1. ✅ 确认文本内容是否已提供（来自data_context）\n2. ✅ 识别分析目标（价格、规格、分类等）\n3. ✅ 选择合适的提取器组合\n4. ✅ **避免使用类定义，使用函数式编程**\n5. ✅ **确保代码中没有中文标点符号**\n6. ✅ **确保输出包含type字段**\n\n### 步骤二：生成执行代码（中间件兼容）\n```python\ndef generate_analysis_code_for_ai(user_text: str, analysis_type: str) -> str:\n \"\"\"\n AI调用此函数生成可执行的沙盒代码 - 中间件兼容版\n 注意：这是给AI看的模板，不是直接在沙盒中执行的代码\n \"\"\"\n # 示例代码模板 - 注意使用英文标点\n code_template = f'''\nimport json\nimport re\nfrom datetime import datetime\n\n# 用户提供的分析文本\nTEXT_TO_ANALYZE = \"\"\"{user_text}\"\"\"\n\ndef analyze_content(text):\n \"\"\"分析函数 - 函数式版本（中间件兼容）\"\"\"\n result = {{\n \"type\": \"analysis_report\", # 🚨 必须字段\n \"title\": \"{analysis_type} analysis result\",\n \"timestamp\": datetime.now().isoformat(),\n \"metadata\": {{\n \"analysis_method\": \"regex_extraction\",\n \"input_length\": len(text)\n }},\n \"data\": {{}}\n }}\n \n # 价格提取 - 使用英文标点\n price_match = re.search(r'\\\\$\\\\s*(\\\\d+[,\\\\d]*\\\\.?\\\\d*)', text)\n if price_match:\n result[\"data\"][\"price_usd\"] = price_match.group(1)\n \n # 规格提取 - 使用英文标点\n height_match = re.search(r'(\\\\d+(?:\\\\.\\\\d+)?)\\\\s*(cm|mm|m)\\\\s*高', text, re.IGNORECASE)\n width_match = re.search(r'(\\\\d+(?:\\\\.\\\\d+)?)\\\\s*(cm|mm|m)\\\\s*宽', text, re.IGNORECASE)\n \n dimensions = {{}}\n if height_match:\n dimensions[\"height\"] = height_match.group(1) + (height_match.group(2) or \"\")\n if width_match:\n dimensions[\"width\"] = width_match.group(1) + (width_match.group(2) or \"\")\n \n if dimensions:\n result[\"data\"][\"dimensions\"] = dimensions\n \n # 确保数据不为空\n if not result[\"data\"]:\n result[\"data\"][\"status\"] = \"no_data_extracted\"\n \n return result\n\n# 执行分析\ntry:\n analysis_result = analyze_content(TEXT_TO_ANALYZE)\n \n # 🚨 必须：以JSON格式输出，ensure_ascii=False支持中文\n print(json.dumps(analysis_result, ensure_ascii=False, indent=2))\n \nexcept Exception as e:\n # 错误处理 - 中间件要求返回标准格式\n error_result = {{\n \"type\": \"analysis_error\",\n \"error_message\": str(e),\n \"timestamp\": datetime.now().isoformat(),\n \"input_sample\": TEXT_TO_ANALYZE[:200]\n }}\n print(json.dumps(error_result, ensure_ascii=False, indent=2))\n'''\n return code_template\n```\n\n### 步骤三：处理返回结果（中间件集成）\nAI收到沙盒执行结果后：\n1. ✅ 验证输出格式是否正确（type字段存在）\n2. ✅ 提取关键信息呈现给用户\n3. ✅ 提供进一步分析建议\n4. ✅ 如果失败，利用中间件的备用方案\n\n---\n\n## 🔧 故障排除与最佳实践（中间件优化版）\n\n### 常见问题解决方案（针对中间件优化）\n\n| 问题 | 原因 | 解决方案 |\n|------|------|----------|\n| 无输出 | 代码未执行print | 确保最后一行是print(json.dumps(...)) |\n| 格式错误 | 非JSON输出 | 使用json.dumps()而非str() |\n| 提取为空 | 文本格式不匹配 | 添加更灵活的正则表达式 |\n| 编码问题 | 中文字符乱码 | 使用ensure_ascii=False参数 |\n| 类定义错误 | 中间件不支持类 | 使用函数式编程替代 |\n| **中文标点错误** | **代码包含中文标点** | **全部替换为英文标点** |\n| **缺少type字段** | **中间件无法识别输出** | **必须包含type字段** |\n| **输出过长** | **中间件可能截断** | **限制输出长度，使用data_bus存储** |\n\n### 中间件特定优化建议\n1. **类型字段优先**：所有输出必须包含type字段，这是中间件识别的关键\n2. **错误处理标准化**：使用try-except包裹，返回标准错误格式\n3. **长度限制**：限制提取结果数量，避免中间件处理超长数据\n4. **时间戳添加**：为每次分析添加时间戳，便于中间件追踪\n5. **元数据丰富**：添加metadata字段，包含分析方法、版本等信息\n6. **数据总线兼容**：如果数据量大，考虑使用中间件的数据总线存储机制\n\n---\n\n## 📋 完整工作流示例（中间件兼容版）\n\n```python\n# ===================== 完整分析工作流（中间件兼容版）=====================\nimport json\nimport re\nfrom datetime import datetime\n\ndef complete_analysis_workflow(data_context: str) -> str:\n \"\"\"\n 端到端的文本分析工作流 - 中间件兼容版\n 输入：爬虫获取的文本数据\n 输出：标准化的分析报告\n \"\"\"\n \n try:\n # 1. 并行提取各类信息（使用函数而非类）\n price_info = extract_price_info(data_context)\n dimensions = extract_dimensions(data_context)\n categories = categorize_with_confidence(data_context)\n \n # 2. 构建结果 - 符合中间件要求\n report = {\n \"type\": \"comprehensive_analysis\", # 🚨 关键字段\n \"title\": \"综合文本分析报告\",\n \"timestamp\": datetime.now().isoformat(),\n \"metadata\": {\n \"analysis_tools\": \"middleware_compatible_suite\",\n \"analysis_time\": datetime.now().isoformat(),\n \"confidence\": calculate_confidence(price_info, dimensions),\n \"version\": \"v3.1-middleware-optimized\"\n },\n \"data\": {\n \"price_information\": price_info,\n \"specifications\": dimensions,\n \"content_categorization\": categories,\n \"text_statistics\": {\n \"total_length\": len(data_context),\n \"line_count\": data_context.count('\\n'),\n \"key_sentences\": extract_key_sentences(data_context, 3)\n }\n }\n }\n \n return json.dumps(report, ensure_ascii=False, indent=2)\n \n except Exception as e:\n # 错误处理 - 中间件兼容格式\n error_report = {\n \"type\": \"workflow_error\",\n \"error_message\": str(e),\n \"timestamp\": datetime.now().isoformat(),\n \"input_sample\": data_context[:500] if len(data_context) > 500 else data_context\n }\n return json.dumps(error_report, ensure_ascii=False, indent=2)\n\n# 辅助函数 - 注意使用英文标点\ndef extract_key_sentences(text: str, max_sentences: int = 3) -> list:\n \"\"\"提取关键句子 - 中间件兼容版\"\"\"\n sentences = []\n current = \"\"\n \n for char in text:\n current += char\n if char in '.!?。！？': # 中英文句末标点\n sentence = current.strip()\n if len(sentence) > 10:\n sentences.append(sentence)\n current = \"\"\n \n if len(sentences) >= max_sentences:\n break\n \n # 如果没找到足够句子，按换行分割\n if len(sentences) \u003c max_sentences:\n lines = [line.strip() for line in text.split('\\n') if len(line.strip()) > 10]\n sentences.extend(lines[:max_sentences - len(sentences)])\n \n return sentences[:max_sentences]\n\ndef calculate_confidence(price_info: dict, dimensions: dict) -> str:\n \"\"\"计算分析置信度 - 中间件兼容版\"\"\"\n price_matches = price_info.get('price_matches', [])\n has_dimensions = bool(dimensions)\n \n if price_matches and has_dimensions:\n return \"high\"\n elif price_matches or has_dimensions:\n return \"medium\"\n else:\n return \"low\"\n\n# 主执行逻辑 - 注意使用英文标点\nif __name__ == \"__main__\":\n # 示例文本 - 注意使用英文标点\n sample_text = \"\"\"\n 产品: 高端智能手表\n 价格: $299.99\n 尺寸: 高度45mm, 宽度38mm\n 材质: 不锈钢表壳, 蓝宝石玻璃\n 功能: 心率监测, GPS定位\n \"\"\"\n \n result = complete_analysis_workflow(sample_text)\n print(result)\n```\n\n---\n\n## ✅ 验证测试（中间件兼容）\n\n运行以下代码验证您的分析器：\n\n```python\n# 测试用例 - 中间件兼容版\nimport json\n\ndef run_middleware_compatible_tests():\n \"\"\"运行中间件兼容性测试\"\"\"\n test_cases = [\n {\n \"text\": \"Jimmy Choo DIDI 45 价格 $299.99 材质皮革高度45mm\",\n \"expected_type\": \"product_page_analysis\",\n \"has_price\": True,\n \"has_dimensions\": True\n },\n {\n \"text\": \"iPhone 15 Pro Max 售价 ¥9999 重量 221g 宽度78mm\",\n \"expected_type\": \"electronics_analysis\",\n \"has_price\": True,\n \"has_dimensions\": True\n },\n {\n \"text\": \"实木餐桌尺寸 180x90cm 价格 €459 高度75cm\",\n \"expected_type\": \"home_goods_analysis\",\n \"has_price\": True,\n \"has_dimensions\": True\n }\n ]\n \n test_results = []\n \n for i, test_case in enumerate(test_cases):\n # 使用函数式分析器\n dimensions = extract_dimensions(test_case[\"text\"])\n categories = categorize_content(test_case[\"text\"])\n \n result = {\n \"type\": \"test_result\",\n \"test_id\": i + 1,\n \"test_case\": test_case[\"expected_type\"],\n \"dimensions\": dimensions,\n \"categories\": categories,\n \"has_price\": \"$\" in test_case[\"text\"] or \"¥\" in test_case[\"text\"] or \"€\" in test_case[\"text\"],\n \"passed_basic_checks\": bool(dimensions) or bool(categories),\n \"middleware_compatible\": True # 标记为中间件兼容\n }\n \n test_results.append(result)\n \n # 输出汇总报告\n summary = {\n \"type\": \"test_summary\",\n \"total_tests\": len(test_results),\n \"passed_tests\": sum(1 for r in test_results if r[\"passed_basic_checks\"]),\n \"all_middleware_compatible\": all(r[\"middleware_compatible\"] for r in test_results),\n \"test_results\": test_results\n }\n \n return summary\n\n# 执行测试\nif __name__ == \"__main__\":\n summary = run_middleware_compatible_tests()\n print(json.dumps(summary, ensure_ascii=False, indent=2))\n```\n\n---\n\n## 📌 总结要点（中间件优化版）\n\n1. **与中间件完全兼容**：所有代码设计考虑了ToolExecutionMiddleware的要求\n2. **类型字段优先**：输出必须包含type字段，这是中间件识别的关键\n3. **中文标点规避**：代码中禁止使用中文标点符号，只使用英文标点\n4. **函数式编程**：避免类定义，与中间件优化保持一致\n5. **错误处理标准化**：使用try-except，返回中间件可解析的错误格式\n6. **输出格式严格**：必须使用print(json.dumps(...))格式\n7. **元数据丰富**：添加时间戳、版本号等元数据\n8. **长度限制**：控制输出长度，避免中间件处理问题\n\n## 🔄 从类到函数的转换指南（中间件要求）\n\n| 原类定义 | 转换后的函数 | 使用方式 | 中间件兼容性 |\n|---------|------------|---------|-------------|\n| `class Extractor:`\u003cbr>`def extract(self, text):` | `def extract_data(text):` | `result = extract_data(text)` | ✅ |\n| `obj = Extractor()`\u003cbr>`obj.extract(text)` | 直接调用函数 | `extract_data(text)` | ✅ |\n| 类属性（`self.config`） | 函数参数或全局常量 | `def func(text, config={})` | ✅ |\n| 多个相关方法 | 多个独立函数或主函数调用子函数 | `def main_func():`\u003cbr>`data1 = func1()`\u003cbr>`data2 = func2()` | ✅ |\n| **使用中文标点** | **全部替换为英文标点** | **result = {\"price\": \"100\"}** | ✅ |\n| **缺少type字段** | **必须添加type字段** | **{\"type\": \"analysis\", \"data\": {}}** | ✅ |\n\n## 🎯 最终检查清单（中间件优化版）\n\n在生成沙盒代码前，请确认：\n- [ ] 没有`class`关键字（函数式编程）\n- [ ] 所有功能都是函数\n- [ ] 输出包含`type`字段（中间件必需）\n- [ ] 使用`json.dumps()`输出\n- [ ] 没有网络请求或文件系统访问\n- [ ] 正则表达式有限制（避免ReDoS）\n- [ ] **代码中没有中文标点符号**\n- [ ] **使用英文标点（逗号, 句号. 冒号:）**\n- [ ] **添加时间戳和元数据**\n- [ ] **包含错误处理机制**\n- [ ] **控制输出长度**\n","content_type":"text/markdown; charset=utf-8","language":"markdown","size":45511,"content_sha256":"6ab62fd3888738fe0d27acfa1ae7ca829447b9c0be230017eeedf53f74c8d328"},{"filename":"skill-report.json","content":"{\n \"schema_version\": \"2.0\",\n \"meta\": {\n \"generated_at\": \"2026-01-16T20:24:03.910Z\",\n \"slug\": \"ck991357-python-sandbox\",\n \"source_url\": \"https://github.com/CK991357/gemini-chat/tree/main/src/skills/python_sandbox\",\n \"source_ref\": \"main\",\n \"model\": \"claude\",\n \"analysis_version\": \"3.0.0\",\n \"source_type\": \"community\",\n \"content_hash\": \"a3cadac243b051738090319c82a2313762fdba787696c285c1770c9012dc8dcc\",\n \"tree_hash\": \"65616686a3f58b544b26c47c4e580d430196bc1c7dc28f731a077a70c1439d37\"\n },\n \"skill\": {\n \"name\": \"python-sandbox\",\n \"description\": \"在沙盒环境中执行Python代码，用于数据分析、可视化和生成Excel、Word、PDF等文件。支持数据清洗、统计分析、机器学习、图表生成、文档自动化等复杂工作流。\",\n \"summary\": \"在沙盒环境中执行Python代码，用于数据分析、可视化和生成Excel、Word、PDF等文件。支持数据清洗、统计分析、机器学习、图表生成、文档自动化等复杂工作流。\",\n \"icon\": \"🐍\",\n \"version\": \"2.5\",\n \"author\": \"CK991357\",\n \"license\": \"MIT\",\n \"category\": \"code\",\n \"tags\": [\n \"python\",\n \"code\",\n \"visualization\",\n \"data-analysis\",\n \"chart\",\n \"document\",\n \"automation\",\n \"machine-learning\",\n \"reporting\",\n \"excel\",\n \"word\",\n \"pdf\",\n \"ppt\"\n ],\n \"supported_tools\": [\n \"claude\",\n \"codex\",\n \"claude-code\"\n ],\n \"risk_factors\": [\n \"external_commands\",\n \"filesystem\"\n ]\n },\n \"security_audit\": {\n \"risk_level\": \"low\",\n \"is_blocked\": false,\n \"safe_to_publish\": true,\n \"summary\": \"This is a legitimate Python code execution sandbox skill. Network is disabled, filesystem access is restricted to /data directory, and code runs in an isolated Docker container with 6GB memory limits and 90-second timeout. All 444 static findings are false positives triggered by documentation patterns (markdown code blocks, base64 encoding examples, and prohibited operation warnings). No malicious behavior detected.\",\n \"risk_factor_evidence\": [\n {\n \"factor\": \"external_commands\",\n \"evidence\": [\n {\n \"file\": \"SKILL.md\",\n \"line_start\": 36,\n \"line_end\": 40\n },\n {\n \"file\": \"SKILL.md\",\n \"line_start\": 208,\n \"line_end\": 218\n }\n ]\n },\n {\n \"factor\": \"filesystem\",\n \"evidence\": [\n {\n \"file\": \"SKILL.md\",\n \"line_start\": 36,\n \"line_end\": 43\n },\n {\n \"file\": \"SKILL.md\",\n \"line_start\": 114,\n \"line_end\": 117\n }\n ]\n }\n ],\n \"critical_findings\": [],\n \"high_findings\": [],\n \"medium_findings\": [],\n \"low_findings\": [],\n \"dangerous_patterns\": [],\n \"files_scanned\": 9,\n \"total_lines\": 6671,\n \"audit_model\": \"claude\",\n \"audited_at\": \"2026-01-16T20:24:03.910Z\"\n },\n \"content\": {\n \"user_title\": \"Execute Python for data analysis and visualization\",\n \"value_statement\": \"Need to analyze data, create charts, or generate reports? This sandbox lets AI assistants run Python code securely with full library support for Pandas, Matplotlib, scikit-learn, and document generation tools.\",\n \"seo_keywords\": [\n \"python sandbox\",\n \"code interpreter\",\n \"data analysis\",\n \"machine learning\",\n \"data visualization\",\n \"chart generation\",\n \"document automation\",\n \"pandas\",\n \"matplotlib\",\n \"claude code\",\n \"ai coding\"\n ],\n \"actual_capabilities\": [\n \"Execute Python code in a secure, isolated Docker container\",\n \"Data analysis with Pandas, NumPy, DuckDB for SQL-like queries\",\n \"Create charts and visualizations with Matplotlib, Seaborn, Graphviz\",\n \"Generate Excel, Word, PDF, and PowerPoint documents\",\n \"Build machine learning models with scikit-learn and LightGBM\",\n \"Scientific computing with SciPy and symbolic math with SymPy\"\n ],\n \"limitations\": [\n \"90-second execution timeout per code block\",\n \"6GB maximum memory allocation with swap disabled\",\n \"Filesystem access restricted to /data directory only\",\n \"Network access is completely disabled for security\"\n ],\n \"use_cases\": [\n {\n \"target_user\": \"Data Analysts\",\n \"title\": \"Data Analysis\",\n \"description\": \"Clean datasets, perform statistical analysis, and create visualizations for business insights.\"\n },\n {\n \"target_user\": \"Machine Learning Engineers\",\n \"title\": \"ML Model Building\",\n \"description\": \"Train classification, regression, and time series models with scikit-learn and LightGBM.\"\n },\n {\n \"target_user\": \"Business Professionals\",\n \"title\": \"Report Generation\",\n \"description\": \"Generate automated Excel reports, Word documents, and PDF presentations from data.\"\n }\n ],\n \"prompt_templates\": [\n {\n \"title\": \"Quick Chart\",\n \"scenario\": \"Create a bar chart\",\n \"prompt\": \"Read data.csv from /data and create a bar chart showing sales by category using Matplotlib. Save the chart and display it.\"\n },\n {\n \"title\": \"Data Summary\",\n \"scenario\": \"Analyze dataset\",\n \"prompt\": \"Load the Excel file from /data, show basic statistics, and create a summary report with key metrics.\"\n },\n {\n \"title\": \"ML Pipeline\",\n \"scenario\": \"Train prediction model\",\n \"prompt\": \"Build a classification model using the uploaded dataset. Split into train/test, evaluate accuracy, and show feature importance.\"\n },\n {\n \"title\": \"Document Report\",\n \"scenario\": \"Generate PDF report\",\n \"prompt\": \"Create a professional PDF report with analysis results, include charts from the data analysis, and output as downloadable file.\"\n }\n ],\n \"output_examples\": [\n {\n \"input\": \"Analyze the sales data and create a chart\",\n \"output\": [\n \"Data loaded successfully: 1,250 rows x 8 columns\",\n \"Key metrics: Total sales 2.3M, Average 1,840\",\n \"Top category: Electronics (35% of sales)\",\n \"Chart generated showing monthly sales trend\"\n ]\n },\n {\n \"input\": \"Build a classification model\",\n \"output\": [\n \"Dataset loaded: 500 samples, 20 features\",\n \"Training accuracy: 94.2%, Test accuracy: 91.8%\",\n \"Feature importance: top 3 features identified\",\n \"Model saved to /data/model.joblib\"\n ]\n },\n {\n \"input\": \"Generate a Word report\",\n \"output\": [\n \"Report created with 5 sections\",\n \"Summary statistics table included\",\n \"3 charts embedded from analysis\",\n \"File saved: /data/sales_report.docx\"\n ]\n }\n ],\n \"best_practices\": [\n \"Use DuckDB for large datasets to avoid loading entire file into memory\",\n \"Save intermediate results to /data for multi-step workflows\",\n \"Always call plt.show() to trigger chart capture and display\",\n \"Break complex analyses into multiple sequential code blocks\"\n ],\n \"anti_patterns\": [\n \"Loading entire 100MB+ CSV into Pandas without chunking\",\n \"Missing plt.show() call after creating charts\",\n \"Generating multiple output files in a single code execution\",\n \"Using class definitions which are not supported in sandbox environment\"\n ],\n \"faq\": [\n {\n \"question\": \"What Python libraries are available?\",\n \"answer\": \"Pandas, NumPy, SciPy, Matplotlib, scikit-learn, LightGBM, SymPy, and many more for data science tasks.\"\n },\n {\n \"question\": \"How large files can I process?\",\n \"answer\": \"Files up to 50MB work directly. For larger files, use chunked reading or DuckDB for memory-efficient SQL queries.\"\n },\n {\n \"question\": \"Can this access my local files?\",\n \"answer\": \"No. The sandbox only accesses files uploaded to the /data directory. System files are read-only and network is disabled.\"\n },\n {\n \"question\": \"Is my data secure?\",\n \"answer\": \"Yes. The sandbox runs in an isolated Docker container, network is disabled, and sessions are cleared after 24 hours of inactivity.\"\n },\n {\n \"question\": \"Why is my code timing out?\",\n \"answer\": \"Code execution has a 90-second limit. Optimize by using efficient libraries like DuckDB, processing in chunks, or splitting into steps.\"\n },\n {\n \"question\": \"How is this different from local Python?\",\n \"answer\": \"This sandbox provides consistent, reproducible environments with pre-installed libraries and automatic chart capture for AI assistant workflows.\"\n }\n ]\n },\n \"file_structure\": [\n {\n \"name\": \"references\",\n \"type\": \"dir\",\n \"path\": \"references\",\n \"children\": [\n {\n \"name\": \"matplotlib_cookbook.md\",\n \"type\": \"file\",\n \"path\": \"references/matplotlib_cookbook.md\",\n \"lines\": 544\n },\n {\n \"name\": \"ml_workflow.md\",\n \"type\": \"file\",\n \"path\": \"references/ml_workflow.md\",\n \"lines\": 1581\n },\n {\n \"name\": \"pandas_cheatsheet.md\",\n \"type\": \"file\",\n \"path\": \"references/pandas_cheatsheet.md\",\n \"lines\": 387\n },\n {\n \"name\": \"report_generator_workflow.md\",\n \"type\": \"file\",\n \"path\": \"references/report_generator_workflow.md\",\n \"lines\": 683\n },\n {\n \"name\": \"scipy_cookbook.md\",\n \"type\": \"file\",\n \"path\": \"references/scipy_cookbook.md\",\n \"lines\": 818\n },\n {\n \"name\": \"sympy_cookbook.md\",\n \"type\": \"file\",\n \"path\": \"references/sympy_cookbook.md\",\n \"lines\": 529\n },\n {\n \"name\": \"text_analysis_cookbook.md\",\n \"type\": \"file\",\n \"path\": \"references/text_analysis_cookbook.md\",\n \"lines\": 1173\n }\n ]\n },\n {\n \"name\": \"SKILL.md\",\n \"type\": \"file\",\n \"path\": \"SKILL.md\",\n \"lines\": 692\n }\n ]\n}\n","content_type":"application/json; charset=utf-8","language":"json","size":10270,"content_sha256":"9c38e4fcaee0624ed5e657d418a55f49a189f9d37bafa34f6f23535e967c3017"}],"content_json":{"type":"doc","content":[{"type":"heading","attrs":{"level":1},"content":[{"text":"Python沙盒工具使用指南 v2.5 (与后端完全匹配版)","type":"text"}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"🎯 ","type":"text"},{"text":"核心能力概览","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"Python沙盒是一个","type":"text"},{"text":"多功能的代码执行环境","type":"text","marks":[{"type":"strong"}]},{"text":"，支持：","type":"text"}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"功能领域","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"主要用途","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"关键库","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"数据分析","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"数据清洗、转换、聚合","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Pandas, Polars","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"高性能计算","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"内存SQL、表达式加速","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"DuckDB, Numexpr, Bottleneck","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"可视化","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"图表生成与自动捕获","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Matplotlib, Seaborn","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"文档自动化","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Excel/Word/PDF/PPT生成","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"python-docx, reportlab, openpyxl","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"机器学习","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"模型训练与评估","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scikit-learn, LightGBM","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"符号数学","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"公式证明、方程求解","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"SymPy","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"科学计算","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"优化、积分、信号处理","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"SciPy","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"流程图生成","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"架构图、流程图","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"Graphviz, NetworkX","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"文本分析","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"HTML解析、数据提取","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"BeautifulSoup4, lxml","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"性能优化","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"机械硬盘优化、异步IO","type":"text"}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"aiofiles, joblib","type":"text"}]}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"📁 ","type":"text"},{"text":"文件处理指南 - 两种模式必须分清","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模式A: 工作区文件 (","type":"text","marks":[{"type":"strong"}]},{"text":"/data","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 目录)","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"用途","type":"text","marks":[{"type":"strong"}]},{"text":": 数据分析、处理、持久化存储","type":"text"},{"type":"br"},{"text":"支持格式","type":"text","marks":[{"type":"strong"}]},{"text":": ","type":"text"},{"text":".csv","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".xlsx","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".xls","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".parquet","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".json","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".txt","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".feather","type":"text","marks":[{"type":"code_inline"}]},{"type":"br"},{"text":"访问方式","type":"text","marks":[{"type":"strong"}]},{"text":": 绝对路径 ","type":"text"},{"text":"/data/文件名","type":"text","marks":[{"type":"code_inline"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import pandas as pd\ndf = pd.read_csv('/data/sales.csv') # ✅ 正确","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模式B: 上下文文件 (Base64嵌入)","type":"text","marks":[{"type":"strong"}]}]},{"type":"paragraph","content":[{"text":"用途","type":"text","marks":[{"type":"strong"}]},{"text":": 图片识别、PDF内容提取","type":"text"},{"type":"br"},{"text":"支持格式","type":"text","marks":[{"type":"strong"}]},{"text":": ","type":"text"},{"text":".png","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".jpg","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".jpeg","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".pdf","type":"text","marks":[{"type":"code_inline"}]},{"text":", ","type":"text"},{"text":".txt","type":"text","marks":[{"type":"code_inline"}]},{"text":"(小文件)","type":"text"},{"type":"br"},{"text":"特点","type":"text","marks":[{"type":"strong"}]},{"text":": 文件内容直接嵌入对话，","type":"text"},{"text":"不在 ","type":"text","marks":[{"type":"strong"}]},{"text":"/data","type":"text","marks":[{"type":"code_inline"},{"type":"strong"}]},{"text":" 目录","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# ❌ 错误：无法从/data读取上传的图片\n# img = Image.open('/data/uploaded_image.png') # 会失败","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🚀 ","type":"text"},{"text":"输出规范 - 后端实际支持的格式","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"1. 图表输出 - 系统自动捕获","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import matplotlib.pyplot as plt\nplt.plot([1,2,3], [4,5,6])\nplt.title('示例图表')\nplt.show() # 🎯 关键：自动捕获，无需手动处理\n\n# 支持以下图表库的自动捕获：\n# - Matplotlib (使用 plt.show() 触发)\n# - Graphviz (创建 Digraph 对象自动捕获)\n# - NetworkX (通过 Matplotlib 渲染)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"2. 可下载文件 - 必须使用JSON格式","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import base64\nimport json\n\n# 生成文件内容后...\nfile_data = base64.b64encode(content).decode('utf-8')\n\n# 🎯 后端实际支持的输出类型：\noutput = {\n \"type\": \"excel\", # 或 \"word\", \"pdf\", \"ppt\"\n \"title\": \"销售报告.xlsx\",\n \"data_base64\": file_data # 注意：只有image类型用\"image_base64\"\n}\n\n# 对于图片输出，后端自动生成：\n# {\n# \"type\": \"image\",\n# \"title\": \"图表标题\",\n# \"image_base64\": \"base64字符串\"\n# }\n\nprint(json.dumps(output)) # 🎯 必须用JSON格式打印","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"3. 文本/数据 - 直接print","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"print(\"分析结果:\")\nprint(f\"总计: {total}\")\nprint(df.describe()) # Pandas DataFrame自动美化显示","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"后端实际支持的输出类型列表：","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\"image\"","type":"text","marks":[{"type":"code_inline"}]},{"text":" - 图表、流程图（自动捕获）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\"excel\"","type":"text","marks":[{"type":"code_inline"}]},{"text":" - Excel文件","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\"word\"","type":"text","marks":[{"type":"code_inline"}]},{"text":" - Word文档","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\"pdf\"","type":"text","marks":[{"type":"code_inline"}]},{"text":" - PDF文件","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"\"ppt\"","type":"text","marks":[{"type":"code_inline"}]},{"text":" - PowerPoint演示文稿","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"💾 ","type":"text"},{"text":"会话持久化 - 跨代码执行的文件共享","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"会话机制：","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"会话ID","type":"text","marks":[{"type":"strong"}]},{"text":": 每个会话有唯一ID，文件按会话隔离","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"超时时间","type":"text","marks":[{"type":"strong"}]},{"text":": 24小时无活动自动清理","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"工作目录","type":"text","marks":[{"type":"strong"}]},{"text":": ","type":"text"},{"text":"/data","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录对应会话工作区","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"工作流示例：","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 第一步：处理数据并保存\nimport pandas as pd\ndf = pd.read_excel('/data/原始数据.xlsx')\nprocessed = df.groupby('部门')['销售额'].sum()\nprocessed.to_csv('/data/部门汇总.csv') # ✅ 保存中间结果\nprint(\"已保存部门汇总数据\")\n\n# 第二步：读取中间结果继续分析\ndf_summary = pd.read_csv('/data/部门汇总.csv')\nprint(f\"读取到 {len(df_summary)} 个部门的汇总数据\")","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"重要提醒：","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"✅ 同一会话内文件持久化（24小时超时）","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"✅ 新会话开始时 ","type":"text"},{"text":"/data","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录为空","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"✅ 建议保存中间结果避免重复计算","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"✅ 使用同一session_id可跨多次代码执行共享文件","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"📚 ","type":"text"},{"text":"工作流参考 - 按需查阅","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"快速查找表：","type":"text","marks":[{"type":"strong"}]}]},{"type":"table","attrs":{"layout":null},"content":[{"type":"tr","content":[{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"任务类型","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"参考文件","type":"text"}]}]},{"type":"th","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"核心库","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"创建图表","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"matplotlib_cookbook.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"matplotlib, seaborn","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"数据处理","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"pandas_cheatsheet.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"pandas, duckdb","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"生成报告","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"report_generator_workflow.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"python-docx, reportlab","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"机器学习","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"ml_workflow.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scikit-learn, lightgbm","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"符号数学","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"sympy_cookbook.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"sympy","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"科学计算","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scipy_cookbook.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"scipy","type":"text"}]}]}]},{"type":"tr","content":[{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"文本解析","type":"text","marks":[{"type":"strong"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"text_analysis_cookbook.md","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"td","attrs":{"colspan":1,"rowspan":1,"colwidth":null,"alignment":""},"content":[{"type":"paragraph","content":[{"text":"beautifulsoup4, lxml","type":"text"}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"示例工作流：","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"A. 公式证明工作流","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 1. 定义符号\nimport sympy as sp\nx, y = sp.symbols('x y')\n\n# 2. 构建表达式\nlhs = (x + y)**2\nrhs = x**2 + 2*x*y + y**2\n\n# 3. 验证恒等\ndifference = sp.simplify(lhs - rhs)\nprint(f\"差值: {difference}\")\nprint(f\"是否恒等: {difference == 0}\")","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"B. ETL数据分析工作流","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# Extract\ndf = pd.read_csv('/data/raw.csv')\n\n# Transform\ndf_clean = (df\n .dropna()\n .drop_duplicates()\n .assign(profit = lambda d: d['revenue'] - d['cost']))\n\n# Load\ndf_clean.to_csv('/data/cleaned.csv', index=False)\nprint(df_clean.describe())","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"C. Graphviz流程图生成","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from graphviz import Digraph\n\n# 创建流程图\ndot = Digraph(comment='工作流程', format='png')\ndot.node('A', '数据采集')\ndot.node('B', '数据清洗')\ndot.node('C', '数据分析')\ndot.node('D', '报告生成')\n\ndot.edges(['AB', 'BC', 'CD'])\ndot.attr(rankdir='LR') # 从左到右布局\n\n# 🎯 自动捕获：Graphviz图表会被后端自动捕获并输出为图片","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"⚡ ","type":"text"},{"text":"性能优化指南 (与后端完全匹配)","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"1. 后端资源配置","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"yaml"},"content":[{"text":"内存限制: 6GB (mem_limit: \"6g\")\n预留内存: 4GB (mem_reservation: \"4g\")\nSwap限制: 禁用 (memswap_limit: \"0\") # 🔥 避免机械硬盘swap死机\nCPU限制: 75%配额 (cpu_quota: 75_000, cpu_period: 100_000)\n超时时间: 90秒\n文件系统: 只读根目录，/data可写，/tmp为tmpfs\n网络: 完全禁用 (network_disabled: true)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"2. 大文件处理策略","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"分块读取 (50MB+文件)","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"chunks = []\nfor chunk in pd.read_csv('/data/large.csv', chunksize=50000):\n processed = process_chunk(chunk) # 自定义处理函数\n chunks.append(processed)\nfinal_df = pd.concat(chunks, ignore_index=True)","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"格式转换加速","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 转换CSV为Feather格式 (提速10-100倍)\nimport pyarrow.feather as feather\ndf = pd.read_csv('/data/slow.csv')\nfeather.write_feather(df, '/data/fast.feather') # 保存\n\n# 后续读取极快\ndf_fast = feather.read_feather('/data/fast.feather')","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"3. 内存外计算 (避免OOM)","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"DuckDB内存SQL","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import duckdb\n\n# 直接查询CSV，不加载到内存\nresult = duckdb.sql(\"\"\"\n SELECT department, \n AVG(salary) as avg_salary,\n COUNT(*) as count\n FROM read_csv_auto('/data/employees.csv')\n WHERE hire_date > '2024-01-01'\n GROUP BY department\n ORDER BY avg_salary DESC\n LIMIT 10\n\"\"\").df() # 最后转为DataFrame\nprint(result)","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"Numexpr表达式加速","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import numexpr as ne\n\n# 传统方式（慢）\ndf['result'] = df['A'] * 2 + df['B'] ** 2 - df['C'] / 3\n\n# Numexpr方式（快3-5倍）\ndf['result'] = ne.evaluate(\n \"A * 2 + B ** 2 - C / 3\",\n local_dict={k: df[k].values for k in ['A', 'B', 'C']}\n)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"4. 高级优化技巧 (后端已安装支持)","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"异步文件操作 - aiofiles","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import aiofiles\nimport asyncio\n\nasync def process_large_file():\n # 异步读取，不阻塞主线程（机械硬盘特别受益）\n async with aiofiles.open('/data/large_file.csv', 'r') as f:\n content = await f.read()\n \n # 处理数据...\n \n # 异步写入\n async with aiofiles.open('/data/processed.csv', 'w') as f:\n await f.write(processed_content)\n\n# 在异步环境中调用\nawait process_large_file()","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"内存缓存与并行计算 - joblib","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from joblib import Memory\nimport time\n\n# 创建内存缓存（可配置到磁盘）\ncachedir = '/data/cache'\nmemory = Memory(cachedir, verbose=0)\n\[email protected]\ndef expensive_computation(x, y):\n \"\"\"计算结果会被缓存到磁盘\"\"\"\n time.sleep(2) # 模拟耗时计算\n return x * y + x**2\n\n# 第一次计算慢，后续从磁盘读取快\nresult1 = expensive_computation(10, 20) # 慢\nresult2 = expensive_computation(10, 20) # 快（从缓存）","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"DuckDB替代Pandas重操作","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import duckdb\n\n# ❌ 耗内存的Pandas操作\n# df = pd.read_csv('/data/large.csv')\n# result = df.groupby('category').agg({'value': ['mean', 'sum', 'count']})\n\n# ✅ 内存友好的DuckDB操作\nresult = duckdb.sql(\"\"\"\n SELECT category, \n AVG(value) as mean_value,\n SUM(value) as sum_value,\n COUNT(value) as count_value\n FROM read_csv('/data/large.csv')\n GROUP BY category\n\"\"\").df()","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"📋 ","type":"text"},{"text":"可用库快速参考 (与Dockerfile完全一致)","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"数据处理核心","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import pandas as pd # 数据分析 (v2.2.2)\nimport numpy as np # 数值计算 (v1.26.4)\nimport duckdb # 内存SQL (v0.10.2)\nimport numexpr as ne # 表达式加速 (v2.10.0)\nimport bottleneck as bn # 滚动统计加速 (v1.3.8)\nimport pyarrow.feather as feather # Feather格式支持 (v14.0.2)\nimport polars as pl # 高性能DataFrame (v0.20.3)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"机器学习增强","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from sklearn.ensemble import RandomForestClassifier # scikit-learn v1.5.0\nimport lightgbm as lgb # 梯度提升树 (v4.3.0)\nimport category_encoders as ce # 分类编码 (v2.6.3)\nfrom skopt import BayesSearchCV # 贝叶斯优化 (v0.9.0)\nimport statsmodels.api as sm # 统计模型 (v0.14.1)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"可视化与图表","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import matplotlib.pyplot as plt # 基础绘图 (v3.8.4)\nimport seaborn as sns # 统计可视化 (v0.13.2)\nimport graphviz # 流程图 (自动布局) - 系统安装\nimport networkx as nx # 网络图","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"文档生成","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from docx import Document # Word文档 (v1.1.2)\nfrom reportlab.lib.pagesizes import letter # PDF生成 (v4.0.7)\nfrom pptx import Presentation # PPT演示文稿 (v0.6.23)\nimport openpyxl # Excel操作 (v3.1.2)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"科学计算与数学","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import sympy as sp # 符号数学 (v1.12)\nimport scipy # 科学计算 (v1.14.1)\nimport scipy.optimize as opt # 优化算法","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"网页内容处理","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from bs4 import BeautifulSoup # HTML解析 (v4.12.3)\nimport lxml # 高性能解析器 (v5.2.2)\nfrom tabulate import tabulate # 格式化表格 (v0.9.0)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"性能优化与工具","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from tqdm import tqdm # 进度条显示 (v4.66.4)\nfrom joblib import Memory # 磁盘缓存和并行 (v1.3.2)\nimport aiofiles # 异步文件操作 (v24.1.0)","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"后端框架依赖","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 以下库已在后端安装，但用户代码通常不需要直接使用\n# fastapi, uvicorn, docker, pydot 等","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🚨 ","type":"text"},{"text":"重要限制与最佳实践","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"✅ 必须遵守的规则","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"图表输出","type":"text","marks":[{"type":"strong"}]},{"text":": 总是使用 ","type":"text"},{"text":"plt.show()","type":"text","marks":[{"type":"code_inline"}]},{"text":"，系统自动捕获","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"文件生成","type":"text","marks":[{"type":"strong"}]},{"text":": 必须输出特定JSON格式给可下载文件","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"文件访问","type":"text","marks":[{"type":"strong"}]},{"text":": 数据文件在 ","type":"text"},{"text":"/data","type":"text","marks":[{"type":"code_inline"}]},{"text":" 目录，媒体文件在上下文中","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"内存管理","type":"text","marks":[{"type":"strong"}]},{"text":": 容器限制6GB，","type":"text"},{"text":"Swap已禁用","type":"text","marks":[{"type":"strong"}]},{"text":"，避免使用swap","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"会话管理","type":"text","marks":[{"type":"strong"}]},{"text":": 使用session_id保持文件持久性","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"代码结构","type":"text","marks":[{"type":"strong"}]},{"text":": 避免类定义，使用纯函数式编程","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"❌ 禁止的操作","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 以下操作会被阻止：\nexec(\"危险代码\") # ❌ 动态执行（后端限制exec_globals）\n__import__('os').system('rm') # ❌ 系统命令（网络禁用）\nopen('/etc/passwd') # ❌ 访问系统文件（根目录只读）\nclass MyClass: # ❌ 类定义（sandbox限制）\n pass\n# 访问网络资源 # ❌ 网络完全禁用","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"⚠️ 性能警告","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"大文件","type":"text","marks":[{"type":"strong"}]},{"text":": >50MB时使用分块处理","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"复杂计算","type":"text","marks":[{"type":"strong"}]},{"text":": 使用DuckDB或Numexpr加速","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"重复操作","type":"text","marks":[{"type":"strong"}]},{"text":": 使用Feather格式缓存中间结果","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"内存监控","type":"text","marks":[{"type":"strong"}]},{"text":": 及时删除大变量 ","type":"text"},{"text":"del large_df","type":"text","marks":[{"type":"code_inline"}]}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"Swap已禁用","type":"text","marks":[{"type":"strong"}]},{"text":": 内存超限直接崩溃，注意内存使用","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"🔧 高级使用建议","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"纯函数式编程","type":"text","marks":[{"type":"strong"}]},{"text":": 使用字典和列表组织数据，避免类定义","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"复杂逻辑拆分","type":"text","marks":[{"type":"strong"}]},{"text":": 将复杂任务拆分为多个小函数","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"分步骤执行","type":"text","marks":[{"type":"strong"}]},{"text":": 利用会话持久化，分步执行复杂分析","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"字体支持","type":"text","marks":[{"type":"strong"}]},{"text":": 已安装中文字体（文泉驿微米黑/正黑），图表支持中文","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🔧 ","type":"text"},{"text":"故障排除与调试","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"常见问题解决","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"问题1: 内存不足","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# ❌ 错误做法\ndf = pd.read_csv('/data/huge.csv') # 可能崩溃\n\n# ✅ 正确做法\n# 方案A: 分块处理\nfor chunk in pd.read_csv('/data/huge.csv', chunksize=50000):\n process(chunk)\n\n# 方案B: 使用DuckDB内存外查询\nresult = duckdb.sql(\"SELECT * FROM read_csv_auto('/data/huge.csv') LIMIT 10000\").df()\n\n# 方案C: 转换为Feather格式\nimport pyarrow.feather as feather\ndf = pd.read_csv('/data/huge.csv')\nfeather.write_feather(df, '/data/huge.feather') # 保存为高效格式\ndf_fast = feather.read_feather('/data/huge.feather') # 快速读取","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"问题2: 处理速度慢","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# ❌ 慢速Pandas操作\ndf['result'] = df['A'] * 2 + df['B'] ** 2 - df['C'] / 3\n\n# ✅ 使用Numexpr加速\ndf['result'] = ne.evaluate(\"A * 2 + B ** 2 - C / 3\", \n {k: df[k].values for k in ['A', 'B', 'C']})\n\n# ✅ 使用Bottleneck加速滚动统计\nimport bottleneck as bn\ndf['rolling_mean'] = bn.move_mean(df['value'], window=20)","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"问题3: 图表不显示","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# ❌ 缺少show()\nplt.plot(x, y)\nplt.title('图表')\n\n# ✅ 必须调用show()\nplt.plot(x, y)\nplt.title('图表')\nplt.show() # 🎯 关键！\n\n# ✅ Graphviz图表自动捕获（无需额外调用）\ndot = Digraph()\ndot.node('A', '开始')\n# 创建对象即自动捕获","type":"text"}]},{"type":"heading","attrs":{"level":4},"content":[{"text":"问题4: 大型文件IO慢","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# ❌ 同步IO阻塞\nwith open('/data/large.txt', 'r') as f:\n content = f.read() # 阻塞主线程\n\n# ✅ 异步IO (机械硬盘特别有效)\nimport aiofiles\nimport asyncio\n\nasync def read_file_async():\n async with aiofiles.open('/data/large.txt', 'r') as f:\n return await f.read()","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"性能监控命令 (完整版补充)","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"bash"},"content":[{"text":"# 监控内存使用\nwatch -n 2 \"free -h | grep -E 'Mem|Swap'\"\n\n# 监控磁盘IO（机械硬盘关键指标）\niostat -x 2\n\n# 监控Docker容器\ndocker stats --format \"table {{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\"","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"📈 ","type":"text"},{"text":"版本更新日志","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"v2.5 核心升级 (当前版本)","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"性能库新增","type":"text","marks":[{"type":"strong"}]},{"text":": DuckDB (内存SQL)、Numexpr (表达式加速)、Bottleneck (滚动统计)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"ML增强","type":"text","marks":[{"type":"strong"}]},{"text":": LightGBM、Category Encoders、scikit-optimize (贝叶斯优化)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"工具完善","type":"text","marks":[{"type":"strong"}]},{"text":": tqdm进度条、joblib缓存、aiofiles异步IO","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"机械硬盘优化","type":"text","marks":[{"type":"strong"}]},{"text":": Swap禁用防止死机，Feather格式支持","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"库版本升级","type":"text","marks":[{"type":"strong"}]},{"text":":","type":"text"}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"scikit-learn升级到1.5.0","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"pandas升级到2.2.2","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"新增polars-lts-cpu==0.20.3","type":"text"}]}]}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"v2.4 主要功能","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"文本分析能力 (BeautifulSoup4 + lxml)","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"图表自动捕获系统完善","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"会话文件管理优化","type":"text"}]}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"v2.3 及更早","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"基础沙盒功能","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"图表自动捕获","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"文件上传支持","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🎯 ","type":"text"},{"text":"快速开始模板","type":"text","marks":[{"type":"strong"}]}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模板1: 基础数据分析","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"import pandas as pd\nimport matplotlib.pyplot as plt\n\n# 1. 读取数据\ndf = pd.read_csv('/data/data.csv')\n\n# 2. 快速分析\nprint(f\"数据形状: {df.shape}\")\nprint(df.describe())\n\n# 3. 简单可视化\ndf.groupby('category')['value'].mean().plot(kind='bar')\nplt.title('各分类平均值')\nplt.tight_layout()\nplt.show() # 🎯 自动捕获图表","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模板2: 完整报告生成","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 参考: report_generator_workflow.md\n# 包含数据读取、分析、图表、文档生成全流程\n\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom docx import Document\nimport base64, json\n\n# 1. 数据读取与分析\ndf = pd.read_excel('/data/sales_data.xlsx')\nsummary = df.groupby('region')['sales'].sum()\n\n# 2. 创建图表\nsummary.plot(kind='bar')\nplt.title('各地区销售总额')\nplt.tight_layout()\nplt.show() # 🎯 自动捕获\n\n# 3. 生成Word报告\ndoc = Document()\ndoc.add_heading('销售分析报告', 0)\ndoc.add_paragraph(f\"总销售额: ${df['sales'].sum():,.2f}\")\ndoc.add_paragraph(f\"平均销售额: ${df['sales'].mean():,.2f}\")\n\n# 4. 保存并输出\ndoc.save('/data/report.docx')\nwith open('/data/report.docx', 'rb') as f:\n file_data = base64.b64encode(f.read()).decode('utf-8')\n\n# 🎯 后端实际支持的输出格式\noutput = {\n \"type\": \"word\",\n \"title\": \"销售分析报告.docx\",\n \"data_base64\": file_data\n}\nprint(json.dumps(output))","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模板3: 机器学习建模","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"# 参考: ml_workflow.md\n# 包含数据预处理、特征工程、模型训练、评估\n\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import classification_report\n\n# 1. 加载数据\ndf = pd.read_csv('/data/iris.csv')\n\n# 2. 特征与标签\nX = df.drop('species', axis=1)\ny = df['species']\n\n# 3. 划分数据集\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.2, random_state=42\n)\n\n# 4. 训练模型\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\nmodel.fit(X_train, y_train)\n\n# 5. 评估\ny_pred = model.predict(X_test)\nprint(classification_report(y_test, y_pred))","type":"text"}]},{"type":"heading","attrs":{"level":3},"content":[{"text":"模板4: Graphviz流程图","type":"text","marks":[{"type":"strong"}]}]},{"type":"code_block","attrs":{"wrap":false,"language":"python"},"content":[{"text":"from graphviz import Digraph\n\n# 创建工作流程图\nworkflow = Digraph('工作流程', format='png')\nworkflow.attr(rankdir='LR', size='8,5')\n\n# 添加节点\nworkflow.node('start', '开始', shape='ellipse')\nworkflow.node('collect', '数据采集', shape='box')\nworkflow.node('clean', '数据清洗', shape='box')\nworkflow.node('analyze', '数据分析', shape='box')\nworkflow.node('report', '报告生成', shape='box')\nworkflow.node('end', '结束', shape='ellipse')\n\n# 添加边\nworkflow.edges([\n 'startcollect', 'collectclean', \n 'cleananalyze', 'analyzereport', 'reportend'\n])\n\n# 🎯 自动捕获：Graphviz对象创建后自动渲染为图片\n# 无需调用render()或show()","type":"text"}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"💡 ","type":"text"},{"text":"终极提示","type":"text","marks":[{"type":"strong"}]}]},{"type":"ordered_list","attrs":{"order":1,"listStyle":"number"},"content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"优先查阅参考文件","type":"text","marks":[{"type":"strong"}]},{"text":" - 不要重新发明轮子","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"利用会话持久化","type":"text","marks":[{"type":"strong"}]},{"text":" - 使用session_id保存中间结果，分步执行复杂任务","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"信任自动化系统","type":"text","marks":[{"type":"strong"}]},{"text":" - 图表、输出格式等交给后端处理","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"性能敏感用优化库","type":"text","marks":[{"type":"strong"}]},{"text":" - 大文件用DuckDB，复杂计算用Numexpr","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"测试代码片段","type":"text","marks":[{"type":"strong"}]},{"text":" - 复杂逻辑先小规模测试","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"注意内存限制","type":"text","marks":[{"type":"strong"}]},{"text":" - Swap已禁用，内存超限直接崩溃","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"使用正确输出格式","type":"text","marks":[{"type":"strong"}]},{"text":" - 只使用后端支持的JSON输出类型","type":"text"}]}]}]},{"type":"hr","attrs":{"markup":"---"}},{"type":"heading","attrs":{"level":2},"content":[{"text":"🔗 ","type":"text"},{"text":"相关资源","type":"text","marks":[{"type":"strong"}]}]},{"type":"bullet_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"完整示例库","type":"text","marks":[{"type":"strong"}]},{"text":": 所有参考文件中的代码示例","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"性能测试","type":"text","marks":[{"type":"strong"}]},{"text":": 对比不同方法的执行效率","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"最佳实践","type":"text","marks":[{"type":"strong"}]},{"text":": 各领域的标准化工作流","type":"text"}]}]},{"type":"list_item","content":[{"type":"paragraph","content":[{"text":"故障案例","type":"text","marks":[{"type":"strong"}]},{"text":": 常见问题及解决方案","type":"text"}]}]}]},{"type":"heading","attrs":{"level":2},"content":[{"text":"记住","type":"text","marks":[{"type":"strong"}]},{"text":": 这个沙盒环境已经预配置了所有库和优化，你只需要专注于业务逻辑！系统会自动处理技术细节，让你像在本地环境一样顺畅工作。","type":"text"}]}]},"metadata":{"date":"2026-06-05","name":"python-sandbox","tags":["python","code","visualization","data-analysis","chart","document","automation","machine-learning","reporting","excel","word","pdf","ppt"],"author":"@skillopedia","source":{"stars":336,"repo_name":"marketplace","origin_url":"https://github.com/aiskillstore/marketplace/blob/HEAD/skills/ck991357/python-sandbox/SKILL.md","repo_owner":"aiskillstore","body_sha256":"5a7816aa04f8c75476c035f6c87557e00623bd5ac1e81bb12c8f99bc23065bb5","cluster_key":"cde4808564a5336e9a9081e71736df2b62c472dfb84ed26bdf4a2a11e5fb3cd0","clean_bundle":{"format":"clean-skill-bundle-v1","source":"aiskillstore/marketplace/skills/ck991357/python-sandbox/SKILL.md","attachments":[{"id":"e7a0c1d9-31eb-5ce5-964a-e83766f7fccf","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/e7a0c1d9-31eb-5ce5-964a-e83766f7fccf/attachment.md","path":"references/matplotlib_cookbook.md","size":18886,"sha256":"3e231f81f5845400db27792cdf7c6f8555bf60f04590ba80e60663f02897be10","contentType":"text/markdown; charset=utf-8"},{"id":"46bb220e-88e0-5148-b4c4-403a10080893","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/46bb220e-88e0-5148-b4c4-403a10080893/attachment.md","path":"references/ml_workflow.md","size":49846,"sha256":"3917b287e4ab91667240f038d6a29f7b1b0bfe5a70117f1b03c366fd789142ed","contentType":"text/markdown; charset=utf-8"},{"id":"a7703680-fa70-58b5-8f3d-e93b945f2585","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/a7703680-fa70-58b5-8f3d-e93b945f2585/attachment.md","path":"references/pandas_cheatsheet.md","size":10489,"sha256":"44c4574eb62a854631f05d01b68c63f3db1c36bc922c3fc2d724fb55704e4b05","contentType":"text/markdown; charset=utf-8"},{"id":"3f7d734c-1481-5caf-8822-6ff0f4771821","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/3f7d734c-1481-5caf-8822-6ff0f4771821/attachment.md","path":"references/report_generator_workflow.md","size":22526,"sha256":"e1407a3410963e793645aa85918d01060fd41b59b3636d1c2b85eafbbe2aad86","contentType":"text/markdown; charset=utf-8"},{"id":"8c7acc50-652a-5d4a-bbd6-28e5c3d030c6","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/8c7acc50-652a-5d4a-bbd6-28e5c3d030c6/attachment.md","path":"references/scipy_cookbook.md","size":23379,"sha256":"3731f2ad54ae44c723eacd211009f603d70edb065408b229c180ada567a2af86","contentType":"text/markdown; charset=utf-8"},{"id":"9893caa5-93b3-54bb-8ad6-2638a822492a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/9893caa5-93b3-54bb-8ad6-2638a822492a/attachment.md","path":"references/sympy_cookbook.md","size":12529,"sha256":"c7a102d75e7b9c7c1cff5cdd31b9e5b047c3ef321e9786bca3d6849c52fb0825","contentType":"text/markdown; charset=utf-8"},{"id":"13affbad-f8ef-5c97-902f-e77154e98a7a","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/13affbad-f8ef-5c97-902f-e77154e98a7a/attachment.md","path":"references/text_analysis_cookbook.md","size":45511,"sha256":"6ab62fd3888738fe0d27acfa1ae7ca829447b9c0be230017eeedf53f74c8d328","contentType":"text/markdown; charset=utf-8"},{"id":"682aeac6-dded-5eec-88bc-37fc27a7501c","key":"uploads/10433ee7-ad12-4ae0-b34e-97553e46c6c8/682aeac6-dded-5eec-88bc-37fc27a7501c/attachment.json","path":"skill-report.json","size":10270,"sha256":"9c38e4fcaee0624ed5e657d418a55f49a189f9d37bafa34f6f23535e967c3017","contentType":"application/json; charset=utf-8"}],"bundle_sha256":"6c1b931c2dfc17c2ef05754a8be77970dfa891809453004234fb9a95233e2c9a","attachment_count":8,"text_attachments":8,"attachment_storage":"skillopedia-attachments-v1","binary_attachments":0,"excluded_attachments":[]},"cluster_size":1,"skill_md_path":"skills/ck991357/python-sandbox/SKILL.md","import_metadata":{"date":"2026-06-05","author":"@skillopedia","version":"v1","category":"data-analytics","category_label":"Data"},"exact_dupes_collapsed_into_this":0},"version":"v1","category":"data-analytics","priority":10,"tool_name":"python_sandbox","import_tag":"clean-skills-v1","references":["matplotlib_cookbook.md","pandas_cheatsheet.md","report_generator_workflow.md","ml_workflow.md","sympy_cookbook.md","scipy_cookbook.md","text_analysis_cookbook.md"],"description":"在沙盒环境中执行Python代码，用于数据分析、可视化和生成Excel、Word、PDF等文件。支持数据清洗、统计分析、机器学习、图表生成、文档自动化等复杂工作流。"}},"renderedAt":1782979230662}

Important: agents should read /llm.txt, /llms.txt, or /.well-known/skills.json to discover the public Skillopedia API.