Spaces:

Vijayadhith7
/

AURA-Backend

Running

File size: 5,063 Bytes

188709e

import os
import json
import shutil
import logging
import subprocess
from typing import Iterator, Dict, Any, List

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ScreenAnnotationDataset")

class ScreenAnnotationDatasetManager:
    """
    Manager to clone, load, and parse Google Research's Screen Annotation Dataset.
    Used to train Aura Assist's UI element detection, icon labeling, and layout contextual reasoning.
    
    Repository Reference: https://github.com/google-research-datasets/screen_annotation
    """
    def __init__(self, workspace_dir: str = "./memory/datasets/screen_annotation"):
        self.workspace_dir = workspace_dir
        self.repo_dir = os.path.join(self.workspace_dir, "repo")
        self.annotations_dir = os.path.join(self.repo_dir, "annotations")
        os.makedirs(self.workspace_dir, exist_ok=True)

    def clone_repository(self, repo_url: str = "https://github.com/google-research-datasets/screen_annotation.git") -> str:
        """
        Clones the Google Screen Annotation git repository if not already present.
        Returns the absolute path to the cloned repository.
        """
        if os.path.exists(self.repo_dir):
            logger.info("Screen Annotation repository already exists. Pulling latest updates...")
            try:
                subprocess.run(["git", "pull"], cwd=self.repo_dir, check=True, capture_output=True)
                return self.repo_dir
            except Exception as e:
                logger.warning(f"Git pull failed, using cached files: {e}")
                return self.repo_dir

        logger.info(f"Cloning Screen Annotation dataset repository from {repo_url}...")
        try:
            subprocess.run(["git", "clone", repo_url, self.repo_dir], check=True, capture_output=True)
            logger.info("Repository cloned successfully!")
            return self.repo_dir
        except Exception as e:
            logger.error(f"Failed to clone Screen Annotation repository: {e}")
            raise RuntimeError(f"Cloning failed: {e}")

    def _parse_annotation_file(self, file_path: str) -> Dict[str, Any]:
        """
        Parses a single JSON screen annotation file.
        Extracts bounding boxes, element types (button, icon, text, checkbox), and text values.
        """
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        screen_id = data.get("screen_id", os.path.basename(file_path).split(".")[0])
        elements = []

        # Parse annotation list (coordinates and semantics)
        for item in data.get("annotations", []):
            bounds = item.get("bounds", [0, 0, 0, 0]) # [left, top, right, bottom]
            label = item.get("label", "unknown")
            text = item.get("text", "")
            
            elements.append({
                "type": label,
                "text": text,
                "bounds": {
                    "left": bounds[0],
                    "top": bounds[1],
                    "right": bounds[2],
                    "bottom": bounds[3]
                }
            })

        return {
            "screen_id": screen_id,
            "app_name": data.get("app_name", "Unknown"),
            "elements": elements
        }

    def load_annotations(self) -> Iterator[Dict[str, Any]]:
        """
        Loads and yields parsed screen annotations from the annotations directory.
        These are formatted directly into layouts compatible with Aura Assist.
        """
        if not os.path.exists(self.annotations_dir):
            # Fallback check if directories are nested differently
            logger.warning(f"Annotations path {self.annotations_dir} does not exist. Scanning repository root...")
            search_path = self.repo_dir
        else:
            search_path = self.annotations_dir

        found_files = []
        for root, _, files in os.walk(search_path):
            for file in files:
                if file.endswith(".json"):
                    found_files.append(os.path.join(root, file))

        if not found_files:
            logger.warning(f"No JSON annotation files found in {search_path}")
            return

        logger.info(f"Loading {len(found_files)} JSON screen annotations...")
        for file_path in found_files:
            try:
                yield self._parse_annotation_file(file_path)
            except Exception as e:
                logger.error(f"Error parsing annotation file {file_path}: {e}")
                continue

    def clear_cache(self):
        """Removes the cloned dataset repo to free up local disk space."""
        if os.path.exists(self.workspace_dir):
            shutil.rmtree(self.workspace_dir)
            logger.info("Screen Annotation cache cleared.")

# Quick test execution scaffold
if __name__ == "__main__":
    manager = ScreenAnnotationDatasetManager()
    logger.info("Screen Annotation Dataset Parser Scaffolding initialized.")
    print("Screen Annotation Dataset Ready for element-level layout training!")