File size: 5,063 Bytes
188709e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import json
import shutil
import logging
import subprocess
from typing import Iterator, Dict, Any, List

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ScreenAnnotationDataset")

class ScreenAnnotationDatasetManager:
    """
    Manager to clone, load, and parse Google Research's Screen Annotation Dataset.
    Used to train Aura Assist's UI element detection, icon labeling, and layout contextual reasoning.
    
    Repository Reference: https://github.com/google-research-datasets/screen_annotation
    """
    def __init__(self, workspace_dir: str = "./memory/datasets/screen_annotation"):
        self.workspace_dir = workspace_dir
        self.repo_dir = os.path.join(self.workspace_dir, "repo")
        self.annotations_dir = os.path.join(self.repo_dir, "annotations")
        os.makedirs(self.workspace_dir, exist_ok=True)

    def clone_repository(self, repo_url: str = "https://github.com/google-research-datasets/screen_annotation.git") -> str:
        """
        Clones the Google Screen Annotation git repository if not already present.
        Returns the absolute path to the cloned repository.
        """
        if os.path.exists(self.repo_dir):
            logger.info("Screen Annotation repository already exists. Pulling latest updates...")
            try:
                subprocess.run(["git", "pull"], cwd=self.repo_dir, check=True, capture_output=True)
                return self.repo_dir
            except Exception as e:
                logger.warning(f"Git pull failed, using cached files: {e}")
                return self.repo_dir

        logger.info(f"Cloning Screen Annotation dataset repository from {repo_url}...")
        try:
            subprocess.run(["git", "clone", repo_url, self.repo_dir], check=True, capture_output=True)
            logger.info("Repository cloned successfully!")
            return self.repo_dir
        except Exception as e:
            logger.error(f"Failed to clone Screen Annotation repository: {e}")
            raise RuntimeError(f"Cloning failed: {e}")

    def _parse_annotation_file(self, file_path: str) -> Dict[str, Any]:
        """
        Parses a single JSON screen annotation file.
        Extracts bounding boxes, element types (button, icon, text, checkbox), and text values.
        """
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        screen_id = data.get("screen_id", os.path.basename(file_path).split(".")[0])
        elements = []

        # Parse annotation list (coordinates and semantics)
        for item in data.get("annotations", []):
            bounds = item.get("bounds", [0, 0, 0, 0]) # [left, top, right, bottom]
            label = item.get("label", "unknown")
            text = item.get("text", "")
            
            elements.append({
                "type": label,
                "text": text,
                "bounds": {
                    "left": bounds[0],
                    "top": bounds[1],
                    "right": bounds[2],
                    "bottom": bounds[3]
                }
            })

        return {
            "screen_id": screen_id,
            "app_name": data.get("app_name", "Unknown"),
            "elements": elements
        }

    def load_annotations(self) -> Iterator[Dict[str, Any]]:
        """
        Loads and yields parsed screen annotations from the annotations directory.
        These are formatted directly into layouts compatible with Aura Assist.
        """
        if not os.path.exists(self.annotations_dir):
            # Fallback check if directories are nested differently
            logger.warning(f"Annotations path {self.annotations_dir} does not exist. Scanning repository root...")
            search_path = self.repo_dir
        else:
            search_path = self.annotations_dir

        found_files = []
        for root, _, files in os.walk(search_path):
            for file in files:
                if file.endswith(".json"):
                    found_files.append(os.path.join(root, file))

        if not found_files:
            logger.warning(f"No JSON annotation files found in {search_path}")
            return

        logger.info(f"Loading {len(found_files)} JSON screen annotations...")
        for file_path in found_files:
            try:
                yield self._parse_annotation_file(file_path)
            except Exception as e:
                logger.error(f"Error parsing annotation file {file_path}: {e}")
                continue

    def clear_cache(self):
        """Removes the cloned dataset repo to free up local disk space."""
        if os.path.exists(self.workspace_dir):
            shutil.rmtree(self.workspace_dir)
            logger.info("Screen Annotation cache cleared.")

# Quick test execution scaffold
if __name__ == "__main__":
    manager = ScreenAnnotationDatasetManager()
    logger.info("Screen Annotation Dataset Parser Scaffolding initialized.")
    print("Screen Annotation Dataset Ready for element-level layout training!")