File size: 3,781 Bytes
2a729e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""

Image processing helper functions for OmniParser

"""
import cv2
import numpy as np
from PIL import Image
from pathlib import Path
from typing import Tuple, List


def load_image(image_path: str) -> Image.Image:
    """Load image from file"""
    return Image.open(image_path)


def resize_image(image: Image.Image, max_size: Tuple[int, int] = (1920, 1080)) -> Image.Image:
    """Resize image to max dimensions while maintaining aspect ratio"""
    image.thumbnail(max_size, Image.Resampling.LANCZOS)
    return image


def capture_screenshot() -> Image.Image:
    """Capture screenshot (requires mss or similar)"""
    try:
        import mss
        with mss.mss() as sct:
            monitor = sct.monitors[1]  # Primary monitor
            screenshot = sct.grab(monitor)
            return Image.frombytes('RGB', screenshot.size, screenshot.rgb)
    except ImportError:
        print("โš ๏ธ  mss not installed. Install with: pip install mss")
        return None


def annotate_image(image: Image.Image, elements: List[dict]) -> Image.Image:
    """Draw bounding boxes on image for visualization"""
    img_copy = image.copy()
    from PIL import ImageDraw, ImageFont
    
    draw = ImageDraw.Draw(img_copy)
    
    colors = {
        "button": "red",
        "textfield": "blue",
        "icon": "green",
        "text": "yellow",
        "image": "purple"
    }
    
    for i, elem in enumerate(elements):
        bbox = elem.get("bbox", [])
        if len(bbox) >= 4:
            x1, y1, x2, y2 = bbox[:4]
            elem_type = elem.get("element_type", "unknown")
            color = colors.get(elem_type, "white")
            
            # Draw bounding box
            draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
            
            # Draw label
            label = f"{elem.get('label', 'elem')} ({elem.get('confidence', 0):.2f})"
            draw.text((x1, y1 - 10), label, fill=color)
    
    return img_copy


def save_image(image: Image.Image, output_path: str):
    """Save image to file"""
    image.save(output_path)
    print(f"โœ… Image saved: {output_path}")


def create_sample_screenshot() -> Image.Image:
    """Create a simple sample image for testing"""
    # Create a blank image with some shapes
    img = Image.new('RGB', (800, 600), color='white')
    from PIL import ImageDraw
    
    draw = ImageDraw.Draw(img)
    
    # Draw some sample UI elements
    # Button
    draw.rectangle([50, 50, 200, 100], fill='lightblue', outline='blue', width=2)
    draw.text((80, 65), "Click Me", fill='black')
    
    # Search box
    draw.rectangle([250, 50, 700, 100], fill='white', outline='gray', width=2)
    draw.text((260, 65), "Search...", fill='gray')
    
    # Menu items
    for i, text in enumerate(['Home', 'About', 'Contact']):
        y = 150 + i * 50
        draw.rectangle([50, y, 200, y + 40], fill='lightgray', outline='black', width=1)
        draw.text((70, y + 10), text, fill='black')
    
    # Status area
    draw.rectangle([250, 150, 700, 500], fill='lightyellow', outline='orange', width=2)
    draw.text((260, 160), "Status Area", fill='black')
    
    return img


if __name__ == "__main__":
    print("Image Processing Examples")
    print("=" * 60)
    
    # Create sample image
    print("๐Ÿ“ท Creating sample screenshot...")
    sample_img = create_sample_screenshot()
    sample_img.save("sample_screenshot.png")
    print("โœ… Sample saved as: sample_screenshot.png")
    
    # Resize example
    print("\n๐Ÿ“ Resizing image...")
    resized = resize_image(sample_img, (640, 480))
    print(f"โœ… Resized to: {resized.size}")
    
    print("\nโœ… All examples completed!")