#!/usr/bin/env python3 """Generate skills.jsonl from trajectory JSON files.""" import json import re from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent TRAJECTORIES_DIR = PROJECT_ROOT / "trajectories" OUTPUT_FILE = PROJECT_ROOT / "data" / "skills.jsonl" def generate_skill(task: str, app: str) -> str: """Derive a snake_case skill name from the task description and app package.""" t = task.lower() if "whatsapp" in t or app == "com.whatsapp": return "whatsapp_send_message" if app == "com.spotify.music" or "spotify" in t: if "pause" in t: return "spotify_pause" if "search" in t and "play" in t: return "spotify_search_play" return "spotify_play_playlist" if "alarm" in t or app == "com.google.android.deskclock": return "create_alarm" if "linkedin" in t or app == "com.linkedin.android": return "linkedin_search_person" if app == "com.ubercab" or "uber" in t: return "uber_request_ride" if "youtube" in t or app == "com.google.android.youtube": return "youtube_search" if "calendar" in t or "calender" in t or app == "com.google.android.calendar": return "calendar_create_event" if "slack" in t or app == "com.Slack": return "slack_open_channel" if "bluetooth" in t: return "bluetooth_enable" if "wifi" in t: return "wifi_enable" if app == "com.google.android.gm" or "gmail" in t or ("mail" in t and "send" in t): return "gmail_send_email" if "camera" in t or app == "com.motorola.camera5": return "camera_take_photo" if "contacts" in t or app == "com.google.android.contacts": return "contacts_search" return _fallback_skill(t) def _fallback_skill(task: str) -> str: """Build a skill name from the first few meaningful words in the task.""" stop_words = { "a", "an", "the", "on", "to", "for", "in", "and", "my", "it", "using", "saying", "from", "at", "with", "am", "pm", } words = re.findall(r"[a-z]+", task) meaningful = [w for w in words if w not in stop_words][:3] return "_".join(meaningful) if meaningful else "unknown_skill" def main() -> None: records = [] for path in sorted(TRAJECTORIES_DIR.glob("*.json")): with path.open(encoding="utf-8") as f: data = json.load(f) task = data["task"] app = data.get("app", "") records.append({"skill": generate_skill(task, app), "task": task}) with OUTPUT_FILE.open("w", encoding="utf-8") as f: for record in records: f.write(json.dumps(record) + "\n") print(f"Wrote {len(records)} skills to {OUTPUT_FILE}") if __name__ == "__main__": main()