android-skill-router / scripts /generate_skill_dataset.py
kriyanshi's picture
Prepare Android Skill Router for Build Small hackathon submission.
6524169
Raw
History Blame Contribute Delete
2.78 kB
#!/usr/bin/env python3
"""Generate skills.jsonl from trajectory JSON files."""
import json
import re
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
TRAJECTORIES_DIR = PROJECT_ROOT / "trajectories"
OUTPUT_FILE = PROJECT_ROOT / "data" / "skills.jsonl"
def generate_skill(task: str, app: str) -> str:
"""Derive a snake_case skill name from the task description and app package."""
t = task.lower()
if "whatsapp" in t or app == "com.whatsapp":
return "whatsapp_send_message"
if app == "com.spotify.music" or "spotify" in t:
if "pause" in t:
return "spotify_pause"
if "search" in t and "play" in t:
return "spotify_search_play"
return "spotify_play_playlist"
if "alarm" in t or app == "com.google.android.deskclock":
return "create_alarm"
if "linkedin" in t or app == "com.linkedin.android":
return "linkedin_search_person"
if app == "com.ubercab" or "uber" in t:
return "uber_request_ride"
if "youtube" in t or app == "com.google.android.youtube":
return "youtube_search"
if "calendar" in t or "calender" in t or app == "com.google.android.calendar":
return "calendar_create_event"
if "slack" in t or app == "com.Slack":
return "slack_open_channel"
if "bluetooth" in t:
return "bluetooth_enable"
if "wifi" in t:
return "wifi_enable"
if app == "com.google.android.gm" or "gmail" in t or ("mail" in t and "send" in t):
return "gmail_send_email"
if "camera" in t or app == "com.motorola.camera5":
return "camera_take_photo"
if "contacts" in t or app == "com.google.android.contacts":
return "contacts_search"
return _fallback_skill(t)
def _fallback_skill(task: str) -> str:
"""Build a skill name from the first few meaningful words in the task."""
stop_words = {
"a", "an", "the", "on", "to", "for", "in", "and", "my", "it",
"using", "saying", "from", "at", "with", "am", "pm",
}
words = re.findall(r"[a-z]+", task)
meaningful = [w for w in words if w not in stop_words][:3]
return "_".join(meaningful) if meaningful else "unknown_skill"
def main() -> None:
records = []
for path in sorted(TRAJECTORIES_DIR.glob("*.json")):
with path.open(encoding="utf-8") as f:
data = json.load(f)
task = data["task"]
app = data.get("app", "")
records.append({"skill": generate_skill(task, app), "task": task})
with OUTPUT_FILE.open("w", encoding="utf-8") as f:
for record in records:
f.write(json.dumps(record) + "\n")
print(f"Wrote {len(records)} skills to {OUTPUT_FILE}")
if __name__ == "__main__":
main()