Spaces:

ANJD
/

Ody

Runtime error

App Files Files Community

Ody / services /memory /skill_format.py

ANJD

Upload 387 files

4ac029f verified 21 days ago

Raw

History Blame Contribute Delete

15.8 kB

	"""SKILL.md parser & writer.

	Reads/writes a single skill from a `SKILL.md` file with YAML frontmatter
	and a structured markdown body. Inspired by Hermes' skills format
	(https://hermes-agent.nousresearch.com/docs/user-guide/features/skills).

	Frontmatter shape (YAML):

	---
	name: open-pr-from-branch
	description: One-line summary surfaced in the skills index.
	version: 1.0.0
	category: dev
	tags: [git, github]
	platforms: [linux, macos] # optional
	requires_toolsets: [] # optional
	fallback_for_toolsets: [] # optional
	status: published # draft \| published
	confidence: 0.8 # 0..1
	source: learned # learned \| taught \| imported
	teacher_model: claude-opus-4-7 # optional
	created: 2026-05-09T21:43:00Z
	---

	Body sections (any subset; rendered as headings):

	## When to Use
	Trigger conditions in plain English.

	## Procedure
	1. First step
	2. Second step

	## Pitfalls
	- Common failure mode + how to recover

	## Verification
	- How to confirm success

	Anything else (raw paragraphs after the last known section) is preserved
	in `body_extra` and round-trips on save.

	Usage counters (`uses`, `last_used`) live in a sidecar `_usage.json` keyed
	by skill name, so the SKILL.md file doesn't churn on every retrieval.
	"""

	from __future__ import annotations

	import json
	import logging
	import re
	from dataclasses import dataclass, field
	from datetime import datetime
	from typing import Any, Dict, List, Optional

	logger = logging.getLogger(__name__)

	# ---------------------------------------------------------------------------
	# Slugify
	# ---------------------------------------------------------------------------

	_SLUG_RE = re.compile(r"[^a-z0-9]+")


	def slugify(text: str, fallback: str = "skill") -> str:
	"""Convert a free-form title to a kebab-case slug suitable for a directory
	name. Strips non-alphanumerics, collapses runs, trims leading/trailing
	dashes. Caps at 60 chars."""
	s = str(text or "").strip().lower()
	s = _SLUG_RE.sub("-", s)
	s = s.strip("-")
	return (s or fallback)[:60]


	# ---------------------------------------------------------------------------
	# Frontmatter (minimal YAML — we don't pull in PyYAML for one feature)
	# ---------------------------------------------------------------------------

	# We accept a tiny subset of YAML: scalar `key: value`, inline lists `[a, b]`,
	# and block lists with `-`. That covers everything in our schema and avoids
	# a new dependency.

	_FM_KEY_RE = re.compile(r"^([a-z_][a-z0-9_]):\s(.*)$", re.IGNORECASE)
	_FM_BLOCK_LIST_RE = re.compile(r"^\s-\s(.*)$")


	def _parse_scalar(raw: str) -> Any:
	raw = raw.strip()
	if raw == "":
	return ""
	if raw.startswith("[") and raw.endswith("]"):
	inner = raw[1:-1].strip()
	if not inner:
	return []
	return [_parse_scalar(p) for p in _split_top_level(inner, ",")]
	if raw.lower() in ("true", "yes"):
	return True
	if raw.lower() in ("false", "no"):
	return False
	if raw.lower() in ("null", "none", "~"):
	return None
	if (raw[0] == raw[-1]) and raw[0] in ("'", '"'):
	return raw[1:-1]
	# Try number
	try:
	if "." in raw:
	return float(raw)
	return int(raw)
	except ValueError:
	pass
	return raw


	def _split_top_level(s: str, sep: str) -> List[str]:
	"""Split `s` on `sep` ignoring separators inside [] or quotes."""
	out, buf, depth, quote = [], [], 0, None
	for ch in s:
	if quote:
	buf.append(ch)
	if ch == quote:
	quote = None
	continue
	if ch in ("'", '"'):
	quote = ch
	buf.append(ch)
	continue
	if ch == "[":
	depth += 1
	elif ch == "]":
	depth = max(0, depth - 1)
	if ch == sep and depth == 0:
	out.append("".join(buf).strip())
	buf = []
	continue
	buf.append(ch)
	if buf:
	out.append("".join(buf).strip())
	return out


	def parse_frontmatter(text: str) -> tuple[Dict[str, Any], str]:
	"""Pull the YAML frontmatter out of a SKILL.md and return (fm, body)."""
	if not text.startswith("---"):
	return {}, text
	end = text.find("\n---", 3)
	if end < 0:
	return {}, text
	fm_text = text[3:end].lstrip("\n")
	body = text[end + 4:].lstrip("\n")
	fm: Dict[str, Any] = {}
	pending_key: Optional[str] = None
	for line in fm_text.splitlines():
	if not line.strip() or line.lstrip().startswith("#"):
	continue
	m = _FM_KEY_RE.match(line)
	if m:
	key, val = m.group(1), m.group(2)
	if val.strip() == "":
	pending_key = key
	fm[key] = []
	else:
	fm[key] = _parse_scalar(val)
	pending_key = None
	continue
	m2 = _FM_BLOCK_LIST_RE.match(line)
	if m2 and pending_key:
	existing = fm.get(pending_key)
	if not isinstance(existing, list):
	fm[pending_key] = []
	fm[pending_key].append(_parse_scalar(m2.group(1)))
	return fm, body


	def _emit_scalar(v: Any) -> str:
	if v is None:
	return "null"
	if isinstance(v, bool):
	return "true" if v else "false"
	if isinstance(v, (int, float)):
	return str(v)
	if isinstance(v, list):
	return "[" + ", ".join(_emit_scalar(x) for x in v) + "]"
	s = str(v)
	if any(c in s for c in (":", "#", "\n", "[", "]", "{", "}", ",", "&", "*", "!", "\|", ">", "'", '"', "%", "@")):
	return json.dumps(s)
	return s


	def _as_list(v: Any) -> List[str]:
	if v is None:
	return []
	if isinstance(v, list):
	return [str(x) for x in v if x not in (None, "")]
	return [str(v)]


	def _as_float(v: Any, default: float = 0.8) -> float:
	try:
	return float(v)
	except (TypeError, ValueError):
	return default


	def emit_frontmatter(fm: Dict[str, Any]) -> str:
	lines = []
	for k, v in fm.items():
	if v is None or v == [] or v == "":
	continue
	lines.append(f"{k}: {_emit_scalar(v)}")
	return "\n".join(lines)


	# ---------------------------------------------------------------------------
	# Skill body sections
	# ---------------------------------------------------------------------------

	_KNOWN_SECTIONS = ("when_to_use", "procedure", "pitfalls", "verification")
	_HEADING_TO_KEY = {
	"when to use": "when_to_use",
	"procedure": "procedure",
	"steps": "procedure",
	"pitfalls": "pitfalls",
	"verification": "verification",
	}
	_KEY_TO_HEADING = {
	"when_to_use": "When to Use",
	"procedure": "Procedure",
	"pitfalls": "Pitfalls",
	"verification": "Verification",
	}


	def parse_body(body: str) -> Dict[str, Any]:
	"""Split a SKILL.md body into known sections.

	Returns:
	{
	"when_to_use": str,
	"procedure": list[str], # numbered/bulleted lines
	"pitfalls": list[str],
	"verification": list[str],
	"body_extra": str, # anything not under a known heading
	}
	"""
	out = {k: ([] if k != "when_to_use" else "") for k in _KNOWN_SECTIONS}
	out["body_extra"] = ""
	if not body or not body.strip():
	return out

	sections: List[tuple[Optional[str], List[str]]] = [(None, [])]
	for line in body.splitlines():
	m = re.match(r"^##\s+(.?)\s$", line)
	if m:
	heading = m.group(1).strip().lower()
	key = _HEADING_TO_KEY.get(heading)
	sections.append((key, []))
	continue
	sections[-1][1].append(line)

	for key, lines in sections:
	text = "\n".join(lines).strip("\n")
	if key is None:
	extras = text.strip()
	if extras:
	out["body_extra"] = (out["body_extra"] + "\n\n" + extras).strip()
	continue
	if key == "when_to_use":
	out["when_to_use"] = text.strip()
	else:
	out[key] = _parse_list_lines(text)
	return out


	def _parse_list_lines(text: str) -> List[str]:
	"""Pull bullet/numbered lines out of a section body. Plain paragraphs are
	treated as a single entry."""
	items: List[str] = []
	for line in (text or "").splitlines():
	s = line.strip()
	if not s:
	continue
	m = re.match(r"^(?:[-]\|\d+[.)])\s+(.)$", s)
	if m:
	items.append(m.group(1).strip())
	elif items:
	# continuation of previous bullet
	items[-1] = items[-1] + " " + s
	else:
	items.append(s)
	return items


	def emit_body(sections: Dict[str, Any]) -> str:
	parts: List[str] = []
	when = (sections.get("when_to_use") or "").strip()
	if when:
	parts.append(f"## {_KEY_TO_HEADING['when_to_use']}\n\n{when}")
	for key in ("procedure", "pitfalls", "verification"):
	items = sections.get(key) or []
	if not items:
	continue
	heading = _KEY_TO_HEADING[key]
	if key == "procedure":
	body = "\n".join(f"{i + 1}. {x}" for i, x in enumerate(items))
	else:
	body = "\n".join(f"- {x}" for x in items)
	parts.append(f"## {heading}\n\n{body}")
	extra = (sections.get("body_extra") or "").strip()
	if extra:
	parts.append(extra)
	return "\n\n".join(parts) + ("\n" if parts else "")


	# ---------------------------------------------------------------------------
	# Skill record
	# ---------------------------------------------------------------------------


	@dataclass
	class Skill:
	name: str # slug, dir name
	description: str = ""
	version: str = "1.0.0"
	category: str = "general"
	tags: List[str] = field(default_factory=list)
	platforms: List[str] = field(default_factory=list)
	requires_toolsets: List[str] = field(default_factory=list)
	fallback_for_toolsets: List[str] = field(default_factory=list)
	status: str = "draft" # draft \| published
	confidence: float = 0.8
	source: str = "learned"
	teacher_model: Optional[str] = None
	owner: Optional[str] = None
	created: str = "" # ISO8601
	when_to_use: str = ""
	procedure: List[str] = field(default_factory=list)
	pitfalls: List[str] = field(default_factory=list)
	verification: List[str] = field(default_factory=list)
	body_extra: str = ""
	# Sidecar (not persisted in SKILL.md)
	uses: int = 0
	last_used: Optional[int] = None
	# File path on disk (set when read)
	path: Optional[str] = None

	# ----------------------------------------------------------------------
	# Serialization
	# ----------------------------------------------------------------------

	def to_frontmatter(self) -> Dict[str, Any]:
	fm: Dict[str, Any] = {
	"name": self.name,
	"description": self.description,
	"version": self.version,
	"category": self.category,
	}
	if self.tags: fm["tags"] = list(self.tags)
	if self.platforms: fm["platforms"] = list(self.platforms)
	if self.requires_toolsets: fm["requires_toolsets"] = list(self.requires_toolsets)
	if self.fallback_for_toolsets: fm["fallback_for_toolsets"] = list(self.fallback_for_toolsets)
	fm["status"] = self.status
	fm["confidence"] = round(float(self.confidence), 3)
	fm["source"] = self.source
	if self.teacher_model: fm["teacher_model"] = self.teacher_model
	if self.owner: fm["owner"] = self.owner
	fm["created"] = self.created or _now_iso()
	return fm

	def to_dict(self) -> Dict[str, Any]:
	d = {
	"id": self.name, # slug doubles as id
	"name": self.name,
	"description": self.description,
	"version": self.version,
	"category": self.category,
	"tags": list(self.tags),
	"platforms": list(self.platforms),
	"requires_toolsets": list(self.requires_toolsets),
	"fallback_for_toolsets": list(self.fallback_for_toolsets),
	"status": self.status,
	"confidence": round(float(self.confidence), 3),
	"source": self.source,
	"teacher_model": self.teacher_model,
	"owner": self.owner,
	"created": self.created,
	"when_to_use": self.when_to_use,
	"procedure": list(self.procedure),
	"pitfalls": list(self.pitfalls),
	"verification": list(self.verification),
	"body_extra": self.body_extra,
	"uses": int(self.uses or 0),
	"last_used": self.last_used,
	"path": self.path,
	}
	# Back-compat aliases for the old API/UI
	d["title"] = self.description or self.name.replace("-", " ").title()
	d["problem"] = self.when_to_use
	d["solution"] = (self.procedure[0] if self.procedure else "") if not self.body_extra else self.body_extra
	d["steps"] = list(self.procedure)
	return d

	@classmethod
	def from_markdown(cls, text: str, *, path: Optional[str] = None) -> "Skill":
	fm, body = parse_frontmatter(text)
	sections = parse_body(body)
	raw_name = fm.get("name")
	name = slugify(raw_name if raw_name not in (None, "") else fm.get("description", ""), fallback="skill")
	return cls(
	name=name,
	description=str(fm.get("description", "") or ""),
	version=str(fm.get("version", "1.0.0") or "1.0.0"),
	category=str(fm.get("category", "general") or "general"),
	tags=_as_list(fm.get("tags")),
	platforms=_as_list(fm.get("platforms")),
	requires_toolsets=_as_list(fm.get("requires_toolsets")),
	fallback_for_toolsets=_as_list(fm.get("fallback_for_toolsets")),
	status=str(fm.get("status", "draft") or "draft"),
	confidence=_as_float(fm.get("confidence", 0.8), 0.8),
	source=str(fm.get("source", "learned") or "learned"),
	teacher_model=str(fm.get("teacher_model")) if fm.get("teacher_model") else None,
	owner=str(fm.get("owner")) if fm.get("owner") else None,
	created=str(fm.get("created") or _now_iso()),
	when_to_use=sections["when_to_use"],
	procedure=list(sections["procedure"]),
	pitfalls=list(sections["pitfalls"]),
	verification=list(sections["verification"]),
	body_extra=sections["body_extra"],
	path=path,
	)

	def to_markdown(self) -> str:
	fm = emit_frontmatter(self.to_frontmatter())
	body = emit_body({
	"when_to_use": self.when_to_use,
	"procedure": self.procedure,
	"pitfalls": self.pitfalls,
	"verification": self.verification,
	"body_extra": self.body_extra,
	})
	return f"---\n{fm}\n---\n\n{body}"


	def _now_iso() -> str:
	return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")