add skill_search SOP to tracked memory files
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -60,6 +60,11 @@ memory/*
|
||||
# Plan SOP
|
||||
!memory/plan_sop.md
|
||||
|
||||
# Skill Search SOP
|
||||
!memory/skill_search/
|
||||
!memory/skill_search/**
|
||||
memory/skill_search/**/__pycache__/
|
||||
|
||||
# ADB UI tool
|
||||
!memory/adb_ui.py
|
||||
|
||||
|
||||
116
memory/skill_search/SKILL.md
Normal file
116
memory/skill_search/SKILL.md
Normal file
@@ -0,0 +1,116 @@
|
||||
# Skill Search — API 客户端
|
||||
|
||||
> 从 10 万+ 技能卡中智能检索最适合当前环境的 skill。
|
||||
|
||||
## 架构
|
||||
|
||||
```
|
||||
┌─────────────┐ HTTPS/JSON ┌──────────────────┐
|
||||
│ 客户端 CLI │ ──────────────────▶ │ Skill Search │
|
||||
│ (本项目) │ ◀────────────────── │ API Server │
|
||||
└─────────────┘ └──────────────────┘
|
||||
• 环境检测 • 105K+ 技能卡索引
|
||||
• 结果格式化 • 四层漏斗检索引擎
|
||||
• 零数据依赖 • 环境过滤 → 安全标注
|
||||
→ 语义匹配 → 质量排序
|
||||
```
|
||||
|
||||
## 快速开始
|
||||
|
||||
```bash
|
||||
# 设置 API 地址
|
||||
export SKILL_SEARCH_API="https://your-server.com/api"
|
||||
export SKILL_SEARCH_KEY="your-api-key" # 可选
|
||||
|
||||
# 搜索
|
||||
python -m skill_search "python testing"
|
||||
|
||||
# 限定类别
|
||||
python -m skill_search "docker deploy" --category devops
|
||||
|
||||
# JSON 输出(适合程序集成)
|
||||
python -m skill_search "git workflow" --json
|
||||
|
||||
# 查看环境信息
|
||||
python -m skill_search --env
|
||||
|
||||
# 查看索引统计
|
||||
python -m skill_search --stats
|
||||
```
|
||||
|
||||
## 编程接口
|
||||
|
||||
```python
|
||||
from skill_search import search, detect_environment
|
||||
|
||||
env = detect_environment()
|
||||
results = search(query="python testing", env=env, top_k=5)
|
||||
|
||||
for r in results:
|
||||
print(f"{r.skill.name} (score: {r.final_score:.2f})")
|
||||
print(f" {r.skill.one_line_summary}")
|
||||
```
|
||||
|
||||
## 文件结构
|
||||
|
||||
```
|
||||
skill_search/
|
||||
├── SKILL.md # 本文档
|
||||
└── skill_search/ # Python 包
|
||||
├── __init__.py # 公开 API
|
||||
├── __main__.py # CLI 入口
|
||||
├── engine.py # HTTP 客户端(替代本地检索)
|
||||
├── index.py # SkillIndex 数据模型
|
||||
├── env_detect.py # 本地环境检测
|
||||
└── formatter.py # 结果格式化输出
|
||||
```
|
||||
|
||||
## 环境变量
|
||||
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `SKILL_SEARCH_API` | API 服务地址 | `https://skill-search.example.com/api` |
|
||||
| `SKILL_SEARCH_KEY` | API 密钥(可选) | 无 |
|
||||
|
||||
## API 协议
|
||||
|
||||
### POST /search
|
||||
|
||||
请求:
|
||||
```json
|
||||
{
|
||||
"query": "python testing",
|
||||
"env": { "os": "windows", "shell": "powershell", ... },
|
||||
"category": "coding",
|
||||
"top_k": 10
|
||||
}
|
||||
```
|
||||
|
||||
响应:
|
||||
```json
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"skill": { "key": "org/repo/skill", "name": "...", ... },
|
||||
"relevance": 0.85,
|
||||
"quality": 7.2,
|
||||
"final_score": 0.78,
|
||||
"match_reasons": ["完整短语匹配", "标签匹配: python"],
|
||||
"warnings": []
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### POST /stats
|
||||
|
||||
请求: `{}` 或 `{"env": {...}}`
|
||||
|
||||
响应:
|
||||
```json
|
||||
{
|
||||
"total": 105586,
|
||||
"safe_count": 98234,
|
||||
"categories": { "coding": 45000, "devops": 12000, ... }
|
||||
}
|
||||
```
|
||||
8
memory/skill_search/skill_search/__init__.py
Normal file
8
memory/skill_search/skill_search/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""skill_search — Skill 检索 API 客户端"""
|
||||
from .engine import (
|
||||
SkillIndex, SearchResult, SkillSearchError,
|
||||
search, get_stats, detect_environment,
|
||||
)
|
||||
|
||||
__all__ = ["SkillIndex", "SearchResult", "SkillSearchError",
|
||||
"search", "get_stats", "detect_environment"]
|
||||
116
memory/skill_search/skill_search/__main__.py
Normal file
116
memory/skill_search/skill_search/__main__.py
Normal file
@@ -0,0 +1,116 @@
|
||||
"""CLI 入口: python -m skill_search"""
|
||||
from __future__ import annotations
|
||||
import argparse, json, sys
|
||||
from .engine import SearchResult, SkillSearchError, detect_environment, search, get_stats
|
||||
|
||||
|
||||
# ── 格式化 ───────────────────────────────────────────────
|
||||
|
||||
def format_results(results: list[SearchResult], env: dict, query: str) -> str:
|
||||
lines = [f'🔍 搜索: "{query}"',
|
||||
f"🖥️ 环境: {env.get('os','?')} / {env.get('shell','?')} / {', '.join(env.get('runtimes',[]))}",
|
||||
f"📊 找到 {len(results)} 个匹配结果\n"]
|
||||
if not results:
|
||||
lines.append("未找到匹配的 skill。试试其他关键词?")
|
||||
return "\n".join(lines)
|
||||
for i, r in enumerate(results, 1):
|
||||
s = r.skill
|
||||
safe_icon = "🟢" if s.autonomous_safe else "🔴"
|
||||
score_bar = "█" * int(r.final_score * 10) + "░" * (10 - int(r.final_score * 10))
|
||||
lines += [
|
||||
f"{'─'*60}",
|
||||
f"#{i} {safe_icon} {s.name}",
|
||||
f" 路径: {s.key}",
|
||||
f" 类别: {s.category} | 标签: {', '.join(s.tags[:5])}",
|
||||
f" 摘要: {s.one_line_summary}",
|
||||
f" 评分: [{score_bar}] {r.final_score:.2f} (相关={r.relevance:.2f} 质量={r.quality:.1f})",
|
||||
f" 清晰={s.clarity} 完整={s.completeness} 可操作={s.actionability} | 形式={s.form}",
|
||||
]
|
||||
if r.match_reasons:
|
||||
lines.append(f" 匹配: {' | '.join(r.match_reasons[:3])}")
|
||||
if r.warnings:
|
||||
lines.extend(f" {w}" for w in r.warnings)
|
||||
lines.append("")
|
||||
lines.append(f"{'─'*60}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_results_json(results: list[SearchResult]) -> list[dict]:
|
||||
out = []
|
||||
for r in results:
|
||||
s = r.skill
|
||||
out.append({
|
||||
"rank": len(out) + 1, "key": s.key, "name": s.name,
|
||||
"category": s.category, "tags": s.tags,
|
||||
"description": s.description, "one_line_summary": s.one_line_summary,
|
||||
"scores": {"final": round(r.final_score, 3), "relevance": round(r.relevance, 3),
|
||||
"quality": round(r.quality, 1), "clarity": s.clarity,
|
||||
"completeness": s.completeness, "actionability": s.actionability},
|
||||
"safety": {"autonomous_safe": s.autonomous_safe, "blast_radius": s.blast_radius,
|
||||
"requires_credentials": s.requires_credentials,
|
||||
"data_exposure": s.data_exposure, "effect_scope": s.effect_scope},
|
||||
"platform": {"os": s.os, "runtimes": s.runtimes, "tools": s.tools, "services": s.services},
|
||||
"warnings": r.warnings, "match_reasons": r.match_reasons,
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
# ── CLI ──────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="skill_search",
|
||||
description="Skill 检索系统 — 根据环境和需求智能推荐 skill(API 客户端)")
|
||||
parser.add_argument("query", nargs="?", help="搜索关键词(如: 'python testing')")
|
||||
parser.add_argument("--category", "-cat", help="限定类别")
|
||||
parser.add_argument("--top", "-k", type=int, default=10, help="返回结果数(默认 10)")
|
||||
parser.add_argument("--json", action="store_true", help="JSON 格式输出")
|
||||
parser.add_argument("--env", action="store_true", help="仅显示检测到的环境信息")
|
||||
parser.add_argument("--stats", action="store_true", help="显示索引统计信息")
|
||||
parser.add_argument("--api-url", help="指定 API 地址(也可用 SKILL_SEARCH_API 环境变量)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.api_url:
|
||||
import os; os.environ["SKILL_SEARCH_API"] = args.api_url
|
||||
|
||||
env = detect_environment()
|
||||
|
||||
if args.env:
|
||||
print("🖥️ 当前环境:")
|
||||
print(f" OS: {env['os']}")
|
||||
print(f" Shell: {env['shell']}")
|
||||
print(f" 运行时: {', '.join(env['runtimes'])}")
|
||||
print(f" 工具: {', '.join(env['tools'])}")
|
||||
print(f" 模型能力: tool_calling={env['model']['tool_calling']}, "
|
||||
f"reasoning={env['model']['reasoning']}, context={env['model']['context_window']}")
|
||||
return
|
||||
|
||||
if args.stats:
|
||||
try:
|
||||
stats = get_stats(env)
|
||||
print(f"📊 索引统计:")
|
||||
print(f" 总计: {stats.get('total', '?')} 个 skills")
|
||||
print(f" 自动安全: {stats.get('safe_count', '?')} 个")
|
||||
if 'categories' in stats:
|
||||
print(f" 类别分布:")
|
||||
for cat, cnt in sorted(stats['categories'].items(), key=lambda x: -x[1]):
|
||||
print(f" {cat:15s} {cnt:4d}")
|
||||
except SkillSearchError as e:
|
||||
print(f"❌ {e}", file=sys.stderr); sys.exit(1)
|
||||
return
|
||||
|
||||
if not args.query:
|
||||
parser.print_help(); return
|
||||
|
||||
try:
|
||||
results = search(query=args.query, env=env, category=args.category, top_k=args.top)
|
||||
except SkillSearchError as e:
|
||||
print(f"❌ {e}", file=sys.stderr); sys.exit(1)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(format_results_json(results), indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(format_results(results, env, args.query))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
156
memory/skill_search/skill_search/engine.py
Normal file
156
memory/skill_search/skill_search/engine.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""Skill 检索引擎 — API 客户端(含数据模型与环境检测)"""
|
||||
from __future__ import annotations
|
||||
import json, os, platform, shutil, subprocess, urllib.request, urllib.error
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# ── 数据模型 ─────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class SkillIndex:
|
||||
"""Skill 索引条目(与服务端结构对齐)"""
|
||||
key: str
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
one_line_summary: str = ""
|
||||
category: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
language: str = "en"
|
||||
os: list[str] = field(default_factory=list)
|
||||
shell: list[str] = field(default_factory=list)
|
||||
runtimes: list[str] = field(default_factory=list)
|
||||
tools: list[str] = field(default_factory=list)
|
||||
services: list[str] = field(default_factory=list)
|
||||
needs_tool_calling: bool = False
|
||||
needs_reasoning: bool = False
|
||||
min_context_window: str = "standard"
|
||||
decay_risk: str = "low"
|
||||
clarity: int = 0
|
||||
completeness: int = 0
|
||||
actionability: int = 0
|
||||
autonomous_safe: bool = True
|
||||
blast_radius: str = "low"
|
||||
requires_credentials: bool = False
|
||||
data_exposure: str = "none"
|
||||
effect_scope: str = "local"
|
||||
form: str = ""
|
||||
estimated_tokens: str = "medium"
|
||||
capabilities: list[str] = field(default_factory=list)
|
||||
github_stars: int = 0
|
||||
github_url: str = ""
|
||||
|
||||
@property
|
||||
def quality_score(self):
|
||||
return self.clarity * 0.3 + self.completeness * 0.3 + self.actionability * 0.4
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d):
|
||||
known = {f.name for f in cls.__dataclass_fields__.values()}
|
||||
return cls(**{k: v for k, v in d.items() if k in known})
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
"""单条检索结果"""
|
||||
skill: SkillIndex
|
||||
relevance: float = 0.0
|
||||
quality: float = 0.0
|
||||
final_score: float = 0.0
|
||||
match_reasons: list[str] = field(default_factory=list)
|
||||
warnings: list[str] = field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d):
|
||||
skill = SkillIndex.from_dict(d.get("skill", d))
|
||||
return cls(skill=skill, relevance=d.get("relevance", 0.0),
|
||||
quality=d.get("quality", 0.0), final_score=d.get("final_score", 0.0),
|
||||
match_reasons=d.get("match_reasons", []), warnings=d.get("warnings", []))
|
||||
|
||||
|
||||
# ── 环境检测 ─────────────────────────────────────────────
|
||||
|
||||
def _run(cmd):
|
||||
try:
|
||||
r = subprocess.run(cmd.split(), capture_output=True, text=True, timeout=5)
|
||||
return r.stdout.strip() if r.returncode == 0 else ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _detect_os():
|
||||
s = platform.system().lower()
|
||||
return {"darwin": "macos", "linux": "linux", "windows": "windows"}.get(s, s)
|
||||
|
||||
def _detect_shell():
|
||||
shell = os.environ.get("SHELL", "")
|
||||
if "zsh" in shell: return "zsh"
|
||||
if "bash" in shell: return "bash"
|
||||
if platform.system() == "Windows": return "powershell"
|
||||
return os.path.basename(shell) if shell else "unknown"
|
||||
|
||||
def _detect_runtimes():
|
||||
checks = {"python": ["python3", "python"], "node": ["node"], "go": ["go"],
|
||||
"rust": ["rustc"], "java": ["java"], "ruby": ["ruby"],
|
||||
"php": ["php"], "dotnet": ["dotnet"]}
|
||||
found = []
|
||||
for name, cmds in checks.items():
|
||||
for cmd in cmds:
|
||||
if shutil.which(cmd):
|
||||
found.append(name); break
|
||||
return found
|
||||
|
||||
def _detect_tools():
|
||||
tools = ["git", "docker", "npm", "pip", "curl", "wget", "kubectl",
|
||||
"terraform", "aws", "gcloud", "az", "brew", "cargo", "make", "cmake"]
|
||||
return [t for t in tools if shutil.which(t)]
|
||||
|
||||
def detect_environment():
|
||||
"""采集完整环境信息"""
|
||||
return {"os": _detect_os(), "shell": _detect_shell(),
|
||||
"runtimes": _detect_runtimes(), "tools": _detect_tools(),
|
||||
"model": {"tool_calling": True, "reasoning": True, "context_window": "large"}}
|
||||
|
||||
|
||||
# ── API 配置与调用 ────────────────────────────────────────
|
||||
|
||||
DEFAULT_API_URL = "http://www.fudankw.cn:58787"
|
||||
|
||||
def _get_api_url():
|
||||
return os.environ.get("SKILL_SEARCH_API", DEFAULT_API_URL)
|
||||
|
||||
def _get_api_key():
|
||||
return os.environ.get("SKILL_SEARCH_KEY")
|
||||
|
||||
class SkillSearchError(Exception):
|
||||
pass
|
||||
|
||||
def _api_request(endpoint, payload):
|
||||
url = f"{_get_api_url()}/{endpoint}"
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
headers = {"Content-Type": "application/json"}
|
||||
api_key = _get_api_key()
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")
|
||||
raise SkillSearchError(f"API 错误 {e.code}: {body}") from e
|
||||
except urllib.error.URLError as e:
|
||||
raise SkillSearchError(f"无法连接服务: {e.reason}") from e
|
||||
except Exception as e:
|
||||
raise SkillSearchError(f"请求失败: {e}") from e
|
||||
|
||||
|
||||
# ── 公开接口 ─────────────────────────────────────────────
|
||||
|
||||
def search(query, env=None, category=None, top_k=10):
|
||||
if env is None: env = detect_environment()
|
||||
payload = {"query": query, "env": env, "top_k": top_k}
|
||||
if category: payload["category"] = category
|
||||
resp = _api_request("search", payload)
|
||||
return [SearchResult.from_dict(r) for r in resp.get("results", [])]
|
||||
|
||||
def get_stats(env=None):
|
||||
if env is None: env = detect_environment()
|
||||
return _api_request("stats", {"env": env})
|
||||
Reference in New Issue
Block a user