Spaces:

Vijayadhith7
/

AURA-Backend

Running

App Files Files Community

AURA-Backend / python_backend /services /llama_touch_dataset.py

Vijayadhith7

Upload 22 files

188709e verified 9 days ago

raw

history blame contribute delete

5.5 kB

	import os
	import json
	import shutil
	import logging
	import subprocess
	from typing import Iterator, Dict, Any, List

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger("LlamaTouchDataset")

	class LlamaTouchDatasetManager:
	"""
	Manager to clone, load, and parse the LlamaTouch dataset.
	Used to train Aura Assist's mobile workflow automation, task completion planning, and action prediction engines.

	Repository Reference: https://github.com/LlamaTouch/LlamaTouch
	"""
	def __init__(self, workspace_dir: str = "./memory/datasets/llama_touch"):
	self.workspace_dir = workspace_dir
	self.repo_dir = os.path.join(self.workspace_dir, "repo")
	self.dataset_dir = os.path.join(self.repo_dir, "dataset") # LlamaTouch dataset location
	os.makedirs(self.workspace_dir, exist_ok=True)

	def clone_repository(self, repo_url: str = "https://github.com/LlamaTouch/LlamaTouch.git") -> str:
	"""
	Clones the LlamaTouch git repository if not already present.
	Returns the absolute path to the cloned repository.
	"""
	if os.path.exists(self.repo_dir):
	logger.info("LlamaTouch repository already exists. Pulling latest updates...")
	try:
	subprocess.run(["git", "pull"], cwd=self.repo_dir, check=True, capture_output=True)
	return self.repo_dir
	except Exception as e:
	logger.warning(f"Git pull failed, using cached files: {e}")
	return self.repo_dir

	logger.info(f"Cloning LlamaTouch dataset repository from {repo_url}...")
	try:
	subprocess.run(["git", "clone", repo_url, self.repo_dir], check=True, capture_output=True)
	logger.info("Repository cloned successfully!")
	return self.repo_dir
	except Exception as e:
	logger.error(f"Failed to clone LlamaTouch repository: {e}")
	raise RuntimeError(f"Cloning failed: {e}")

	def _parse_task_json(self, file_path: str) -> List[Dict[str, Any]]:
	"""
	Parses a single LlamaTouch task trace file.
	Extracts tasks, sequential steps, UI elements, action categories (tap, type, scroll), coordinates, and expectations.
	"""
	with open(file_path, "r", encoding="utf-8") as f:
	data = json.load(f)

	parsed_steps = []

	# LlamaTouch files typically contain list of multi-step task traces
	traces = data if isinstance(data, list) else [data]
	for trace in traces:
	task_desc = trace.get("task_description", trace.get("goal", ""))
	app_name = trace.get("app_name", "Android App")
	steps = []

	for step in trace.get("steps", trace.get("actions", [])):
	action_type = step.get("action_type", step.get("type", "tap"))
	touch_coords = step.get("coordinates", step.get("coords", [0, 0]))
	typed_text = step.get("text", "")

	steps.append({
	"action": action_type,
	"target_coordinates": {
	"x": touch_coords[0] if len(touch_coords) > 0 else 0,
	"y": touch_coords[1] if len(touch_coords) > 1 else 0
	},
	"input_text": typed_text,
	"target_element": step.get("element_label", "")
	})

	parsed_steps.append({
	"task_description": task_desc,
	"app": app_name,
	"steps": steps,
	"metadata": {
	"package": trace.get("package_name", ""),
	"category": trace.get("category", "utility")
	}
	})

	return parsed_steps

	def load_traces(self) -> Iterator[Dict[str, Any]]:
	"""
	Loads and yields parsed task-action sequences from the dataset folder.
	Formatted directly into workflow sequences compatible with Aura Assist.
	"""
	search_path = self.dataset_dir if os.path.exists(self.dataset_dir) else self.repo_dir

	found_files = []
	for root, _, files in os.walk(search_path):
	for file in files:
	if file.endswith(".json") and not file.startswith("."):
	found_files.append(os.path.join(root, file))

	if not found_files:
	logger.warning(f"No task trace files (.json) found in {search_path}")
	return

	logger.info(f"Loading {len(found_files)} LlamaTouch task trace files...")
	for file_path in found_files:
	try:
	parsed_list = self._parse_task_json(file_path)
	for task in parsed_list:
	yield task
	except Exception as e:
	logger.error(f"Error parsing task trace file {file_path}: {e}")
	continue

	def clear_cache(self):
	"""Removes the cloned dataset repo to free up local disk space."""
	if os.path.exists(self.workspace_dir):
	shutil.rmtree(self.workspace_dir)
	logger.info("LlamaTouch cache cleared.")

	# Quick test execution scaffold
	if __name__ == "__main__":
	manager = LlamaTouchDatasetManager()
	logger.info("LlamaTouch Dataset Manager Scaffolding initialized.")
	print("LlamaTouch Dataset Ready for mobile workflow action prediction training!")