| |
| """Generate skills.jsonl from trajectory JSON files.""" |
|
|
| import json |
| import re |
| from pathlib import Path |
|
|
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| TRAJECTORIES_DIR = PROJECT_ROOT / "trajectories" |
| OUTPUT_FILE = PROJECT_ROOT / "data" / "skills.jsonl" |
|
|
|
|
| def generate_skill(task: str, app: str) -> str: |
| """Derive a snake_case skill name from the task description and app package.""" |
| t = task.lower() |
|
|
| if "whatsapp" in t or app == "com.whatsapp": |
| return "whatsapp_send_message" |
|
|
| if app == "com.spotify.music" or "spotify" in t: |
| if "pause" in t: |
| return "spotify_pause" |
| if "search" in t and "play" in t: |
| return "spotify_search_play" |
| return "spotify_play_playlist" |
|
|
| if "alarm" in t or app == "com.google.android.deskclock": |
| return "create_alarm" |
|
|
| if "linkedin" in t or app == "com.linkedin.android": |
| return "linkedin_search_person" |
|
|
| if app == "com.ubercab" or "uber" in t: |
| return "uber_request_ride" |
|
|
| if "youtube" in t or app == "com.google.android.youtube": |
| return "youtube_search" |
|
|
| if "calendar" in t or "calender" in t or app == "com.google.android.calendar": |
| return "calendar_create_event" |
|
|
| if "slack" in t or app == "com.Slack": |
| return "slack_open_channel" |
|
|
| if "bluetooth" in t: |
| return "bluetooth_enable" |
|
|
| if "wifi" in t: |
| return "wifi_enable" |
|
|
| if app == "com.google.android.gm" or "gmail" in t or ("mail" in t and "send" in t): |
| return "gmail_send_email" |
|
|
| if "camera" in t or app == "com.motorola.camera5": |
| return "camera_take_photo" |
|
|
| if "contacts" in t or app == "com.google.android.contacts": |
| return "contacts_search" |
|
|
| return _fallback_skill(t) |
|
|
|
|
| def _fallback_skill(task: str) -> str: |
| """Build a skill name from the first few meaningful words in the task.""" |
| stop_words = { |
| "a", "an", "the", "on", "to", "for", "in", "and", "my", "it", |
| "using", "saying", "from", "at", "with", "am", "pm", |
| } |
| words = re.findall(r"[a-z]+", task) |
| meaningful = [w for w in words if w not in stop_words][:3] |
| return "_".join(meaningful) if meaningful else "unknown_skill" |
|
|
|
|
| def main() -> None: |
| records = [] |
|
|
| for path in sorted(TRAJECTORIES_DIR.glob("*.json")): |
| with path.open(encoding="utf-8") as f: |
| data = json.load(f) |
|
|
| task = data["task"] |
| app = data.get("app", "") |
| records.append({"skill": generate_skill(task, app), "task": task}) |
|
|
| with OUTPUT_FILE.open("w", encoding="utf-8") as f: |
| for record in records: |
| f.write(json.dumps(record) + "\n") |
|
|
| print(f"Wrote {len(records)} skills to {OUTPUT_FILE}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|