diff --git a/.gitignore b/.gitignore index 416fb38..28144b5 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,11 @@ memory/* # Plan SOP !memory/plan_sop.md +# Skill Search SOP +!memory/skill_search/ +!memory/skill_search/** +memory/skill_search/**/__pycache__/ + # ADB UI tool !memory/adb_ui.py diff --git a/memory/skill_search/SKILL.md b/memory/skill_search/SKILL.md new file mode 100644 index 0000000..9844b3e --- /dev/null +++ b/memory/skill_search/SKILL.md @@ -0,0 +1,116 @@ +# Skill Search — API 客户端 + +> 从 10 万+ 技能卡中智能检索最适合当前环境的 skill。 + +## 架构 + +``` +┌─────────────┐ HTTPS/JSON ┌──────────────────┐ +│ 客户端 CLI │ ──────────────────▶ │ Skill Search │ +│ (本项目) │ ◀────────────────── │ API Server │ +└─────────────┘ └──────────────────┘ + • 环境检测 • 105K+ 技能卡索引 + • 结果格式化 • 四层漏斗检索引擎 + • 零数据依赖 • 环境过滤 → 安全标注 + → 语义匹配 → 质量排序 +``` + +## 快速开始 + +```bash +# 设置 API 地址 +export SKILL_SEARCH_API="https://your-server.com/api" +export SKILL_SEARCH_KEY="your-api-key" # 可选 + +# 搜索 +python -m skill_search "python testing" + +# 限定类别 +python -m skill_search "docker deploy" --category devops + +# JSON 输出(适合程序集成) +python -m skill_search "git workflow" --json + +# 查看环境信息 +python -m skill_search --env + +# 查看索引统计 +python -m skill_search --stats +``` + +## 编程接口 + +```python +from skill_search import search, detect_environment + +env = detect_environment() +results = search(query="python testing", env=env, top_k=5) + +for r in results: + print(f"{r.skill.name} (score: {r.final_score:.2f})") + print(f" {r.skill.one_line_summary}") +``` + +## 文件结构 + +``` +skill_search/ +├── SKILL.md # 本文档 +└── skill_search/ # Python 包 + ├── __init__.py # 公开 API + ├── __main__.py # CLI 入口 + ├── engine.py # HTTP 客户端(替代本地检索) + ├── index.py # SkillIndex 数据模型 + ├── env_detect.py # 本地环境检测 + └── formatter.py # 结果格式化输出 +``` + +## 环境变量 + +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `SKILL_SEARCH_API` | API 服务地址 | `https://skill-search.example.com/api` | +| `SKILL_SEARCH_KEY` | API 密钥(可选) | 无 | + +## API 协议 + +### POST /search + +请求: +```json +{ + "query": "python testing", + "env": { "os": "windows", "shell": "powershell", ... }, + "category": "coding", + "top_k": 10 +} +``` + +响应: +```json +{ + "results": [ + { + "skill": { "key": "org/repo/skill", "name": "...", ... }, + "relevance": 0.85, + "quality": 7.2, + "final_score": 0.78, + "match_reasons": ["完整短语匹配", "标签匹配: python"], + "warnings": [] + } + ] +} +``` + +### POST /stats + +请求: `{}` 或 `{"env": {...}}` + +响应: +```json +{ + "total": 105586, + "safe_count": 98234, + "categories": { "coding": 45000, "devops": 12000, ... } +} +``` \ No newline at end of file diff --git a/memory/skill_search/skill_search/__init__.py b/memory/skill_search/skill_search/__init__.py new file mode 100644 index 0000000..70b246f --- /dev/null +++ b/memory/skill_search/skill_search/__init__.py @@ -0,0 +1,8 @@ +"""skill_search — Skill 检索 API 客户端""" +from .engine import ( + SkillIndex, SearchResult, SkillSearchError, + search, get_stats, detect_environment, +) + +__all__ = ["SkillIndex", "SearchResult", "SkillSearchError", + "search", "get_stats", "detect_environment"] \ No newline at end of file diff --git a/memory/skill_search/skill_search/__main__.py b/memory/skill_search/skill_search/__main__.py new file mode 100644 index 0000000..6691a27 --- /dev/null +++ b/memory/skill_search/skill_search/__main__.py @@ -0,0 +1,116 @@ +"""CLI 入口: python -m skill_search""" +from __future__ import annotations +import argparse, json, sys +from .engine import SearchResult, SkillSearchError, detect_environment, search, get_stats + + +# ── 格式化 ─────────────────────────────────────────────── + +def format_results(results: list[SearchResult], env: dict, query: str) -> str: + lines = [f'🔍 搜索: "{query}"', + f"🖥️ 环境: {env.get('os','?')} / {env.get('shell','?')} / {', '.join(env.get('runtimes',[]))}", + f"📊 找到 {len(results)} 个匹配结果\n"] + if not results: + lines.append("未找到匹配的 skill。试试其他关键词?") + return "\n".join(lines) + for i, r in enumerate(results, 1): + s = r.skill + safe_icon = "🟢" if s.autonomous_safe else "🔴" + score_bar = "█" * int(r.final_score * 10) + "░" * (10 - int(r.final_score * 10)) + lines += [ + f"{'─'*60}", + f"#{i} {safe_icon} {s.name}", + f" 路径: {s.key}", + f" 类别: {s.category} | 标签: {', '.join(s.tags[:5])}", + f" 摘要: {s.one_line_summary}", + f" 评分: [{score_bar}] {r.final_score:.2f} (相关={r.relevance:.2f} 质量={r.quality:.1f})", + f" 清晰={s.clarity} 完整={s.completeness} 可操作={s.actionability} | 形式={s.form}", + ] + if r.match_reasons: + lines.append(f" 匹配: {' | '.join(r.match_reasons[:3])}") + if r.warnings: + lines.extend(f" {w}" for w in r.warnings) + lines.append("") + lines.append(f"{'─'*60}") + return "\n".join(lines) + + +def format_results_json(results: list[SearchResult]) -> list[dict]: + out = [] + for r in results: + s = r.skill + out.append({ + "rank": len(out) + 1, "key": s.key, "name": s.name, + "category": s.category, "tags": s.tags, + "description": s.description, "one_line_summary": s.one_line_summary, + "scores": {"final": round(r.final_score, 3), "relevance": round(r.relevance, 3), + "quality": round(r.quality, 1), "clarity": s.clarity, + "completeness": s.completeness, "actionability": s.actionability}, + "safety": {"autonomous_safe": s.autonomous_safe, "blast_radius": s.blast_radius, + "requires_credentials": s.requires_credentials, + "data_exposure": s.data_exposure, "effect_scope": s.effect_scope}, + "platform": {"os": s.os, "runtimes": s.runtimes, "tools": s.tools, "services": s.services}, + "warnings": r.warnings, "match_reasons": r.match_reasons, + }) + return out + + +# ── CLI ────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(prog="skill_search", + description="Skill 检索系统 — 根据环境和需求智能推荐 skill(API 客户端)") + parser.add_argument("query", nargs="?", help="搜索关键词(如: 'python testing')") + parser.add_argument("--category", "-cat", help="限定类别") + parser.add_argument("--top", "-k", type=int, default=10, help="返回结果数(默认 10)") + parser.add_argument("--json", action="store_true", help="JSON 格式输出") + parser.add_argument("--env", action="store_true", help="仅显示检测到的环境信息") + parser.add_argument("--stats", action="store_true", help="显示索引统计信息") + parser.add_argument("--api-url", help="指定 API 地址(也可用 SKILL_SEARCH_API 环境变量)") + args = parser.parse_args() + + if args.api_url: + import os; os.environ["SKILL_SEARCH_API"] = args.api_url + + env = detect_environment() + + if args.env: + print("🖥️ 当前环境:") + print(f" OS: {env['os']}") + print(f" Shell: {env['shell']}") + print(f" 运行时: {', '.join(env['runtimes'])}") + print(f" 工具: {', '.join(env['tools'])}") + print(f" 模型能力: tool_calling={env['model']['tool_calling']}, " + f"reasoning={env['model']['reasoning']}, context={env['model']['context_window']}") + return + + if args.stats: + try: + stats = get_stats(env) + print(f"📊 索引统计:") + print(f" 总计: {stats.get('total', '?')} 个 skills") + print(f" 自动安全: {stats.get('safe_count', '?')} 个") + if 'categories' in stats: + print(f" 类别分布:") + for cat, cnt in sorted(stats['categories'].items(), key=lambda x: -x[1]): + print(f" {cat:15s} {cnt:4d}") + except SkillSearchError as e: + print(f"❌ {e}", file=sys.stderr); sys.exit(1) + return + + if not args.query: + parser.print_help(); return + + try: + results = search(query=args.query, env=env, category=args.category, top_k=args.top) + except SkillSearchError as e: + print(f"❌ {e}", file=sys.stderr); sys.exit(1) + + if args.json: + print(json.dumps(format_results_json(results), indent=2, ensure_ascii=False)) + else: + print(format_results(results, env, args.query)) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/memory/skill_search/skill_search/engine.py b/memory/skill_search/skill_search/engine.py new file mode 100644 index 0000000..5625a4f --- /dev/null +++ b/memory/skill_search/skill_search/engine.py @@ -0,0 +1,156 @@ +"""Skill 检索引擎 — API 客户端(含数据模型与环境检测)""" +from __future__ import annotations +import json, os, platform, shutil, subprocess, urllib.request, urllib.error +from dataclasses import dataclass, field + +# ── 数据模型 ───────────────────────────────────────────── + +@dataclass +class SkillIndex: + """Skill 索引条目(与服务端结构对齐)""" + key: str + name: str = "" + description: str = "" + one_line_summary: str = "" + category: str = "" + tags: list[str] = field(default_factory=list) + language: str = "en" + os: list[str] = field(default_factory=list) + shell: list[str] = field(default_factory=list) + runtimes: list[str] = field(default_factory=list) + tools: list[str] = field(default_factory=list) + services: list[str] = field(default_factory=list) + needs_tool_calling: bool = False + needs_reasoning: bool = False + min_context_window: str = "standard" + decay_risk: str = "low" + clarity: int = 0 + completeness: int = 0 + actionability: int = 0 + autonomous_safe: bool = True + blast_radius: str = "low" + requires_credentials: bool = False + data_exposure: str = "none" + effect_scope: str = "local" + form: str = "" + estimated_tokens: str = "medium" + capabilities: list[str] = field(default_factory=list) + github_stars: int = 0 + github_url: str = "" + + @property + def quality_score(self): + return self.clarity * 0.3 + self.completeness * 0.3 + self.actionability * 0.4 + + @classmethod + def from_dict(cls, d): + known = {f.name for f in cls.__dataclass_fields__.values()} + return cls(**{k: v for k, v in d.items() if k in known}) + + +@dataclass +class SearchResult: + """单条检索结果""" + skill: SkillIndex + relevance: float = 0.0 + quality: float = 0.0 + final_score: float = 0.0 + match_reasons: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + @classmethod + def from_dict(cls, d): + skill = SkillIndex.from_dict(d.get("skill", d)) + return cls(skill=skill, relevance=d.get("relevance", 0.0), + quality=d.get("quality", 0.0), final_score=d.get("final_score", 0.0), + match_reasons=d.get("match_reasons", []), warnings=d.get("warnings", [])) + + +# ── 环境检测 ───────────────────────────────────────────── + +def _run(cmd): + try: + r = subprocess.run(cmd.split(), capture_output=True, text=True, timeout=5) + return r.stdout.strip() if r.returncode == 0 else "" + except Exception: + return "" + +def _detect_os(): + s = platform.system().lower() + return {"darwin": "macos", "linux": "linux", "windows": "windows"}.get(s, s) + +def _detect_shell(): + shell = os.environ.get("SHELL", "") + if "zsh" in shell: return "zsh" + if "bash" in shell: return "bash" + if platform.system() == "Windows": return "powershell" + return os.path.basename(shell) if shell else "unknown" + +def _detect_runtimes(): + checks = {"python": ["python3", "python"], "node": ["node"], "go": ["go"], + "rust": ["rustc"], "java": ["java"], "ruby": ["ruby"], + "php": ["php"], "dotnet": ["dotnet"]} + found = [] + for name, cmds in checks.items(): + for cmd in cmds: + if shutil.which(cmd): + found.append(name); break + return found + +def _detect_tools(): + tools = ["git", "docker", "npm", "pip", "curl", "wget", "kubectl", + "terraform", "aws", "gcloud", "az", "brew", "cargo", "make", "cmake"] + return [t for t in tools if shutil.which(t)] + +def detect_environment(): + """采集完整环境信息""" + return {"os": _detect_os(), "shell": _detect_shell(), + "runtimes": _detect_runtimes(), "tools": _detect_tools(), + "model": {"tool_calling": True, "reasoning": True, "context_window": "large"}} + + +# ── API 配置与调用 ──────────────────────────────────────── + +DEFAULT_API_URL = "http://www.fudankw.cn:58787" + +def _get_api_url(): + return os.environ.get("SKILL_SEARCH_API", DEFAULT_API_URL) + +def _get_api_key(): + return os.environ.get("SKILL_SEARCH_KEY") + +class SkillSearchError(Exception): + pass + +def _api_request(endpoint, payload): + url = f"{_get_api_url()}/{endpoint}" + data = json.dumps(payload).encode("utf-8") + headers = {"Content-Type": "application/json"} + api_key = _get_api_key() + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + raise SkillSearchError(f"API 错误 {e.code}: {body}") from e + except urllib.error.URLError as e: + raise SkillSearchError(f"无法连接服务: {e.reason}") from e + except Exception as e: + raise SkillSearchError(f"请求失败: {e}") from e + + +# ── 公开接口 ───────────────────────────────────────────── + +def search(query, env=None, category=None, top_k=10): + if env is None: env = detect_environment() + payload = {"query": query, "env": env, "top_k": top_k} + if category: payload["category"] = category + resp = _api_request("search", payload) + return [SearchResult.from_dict(r) for r in resp.get("results", [])] + +def get_stats(env=None): + if env is None: env = detect_environment() + return _api_request("stats", {"env": env}) \ No newline at end of file