File size: 7,121 Bytes
5669b22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# pip install pyautogui pyautogui pillow pyobjc pywin32
import sys
import os
import uuid
import shutil
import base64
import subprocess
import asyncio
import os
import json
import base64
import pyautogui
import ctypes
import time

if sys.platform == "darwin":
    from computer.loop import sampling_loop, APIProvider
    from computer.tools import ToolResult
    from anthropic.types.beta import BetaMessage, BetaMessageParam
    from anthropic import APIResponse
    async def control_computer(api_key: str, instruction: str, api_response_callback=None):
        messages: list[BetaMessageParam] = [
            {
                "role": "user",
                "content": instruction,
            }
        ]
        
        provider = APIProvider.ANTHROPIC
    
        # Define callbacks (you can customize these)
        def output_callback(content_block):
            if isinstance(content_block, dict) and content_block.get("type") == "text":
                print("Assistant:", content_block.get("text"))
    
        def tool_output_callback(result: ToolResult, tool_use_id: str):
            if result.output:
                print(f"> Tool Output [{tool_use_id}]:", result.output)
            if result.error:
                print(f"!!! Tool Error [{tool_use_id}]:", result.error)
            if result.base64_image:
                # Save the image to a file if needed
                os.makedirs("cache", exist_ok=True)
                image_data = result.base64_image
                with open(f"cache/screenshot_{tool_use_id}.png", "wb") as f:
                    f.write(base64.b64decode(image_data))
                print(f"Took screenshot screenshot_{tool_use_id}.png")
    
        # If no api_response_callback is provided, use a default one
        if api_response_callback is None:
            def api_response_callback(response: APIResponse[BetaMessage]):
                print(
                    "\n---------------\nAPI Response:\n",
                    json.dumps(json.loads(response.text)["content"], indent=4),  # type: ignore
                    "\n",
                )
    
        # Run the sampling loop
        messages = await sampling_loop(
            model="claude-3-5-sonnet-20241022",
            provider=provider,
            system_prompt_suffix="",
            messages=messages,
            output_callback=output_callback,
            tool_output_callback=tool_output_callback,
            api_response_callback=api_response_callback,
            api_key=api_key,
            only_n_most_recent_images=10,
            max_tokens=4096,
        )


def get_clipboard_content():
    content = {}

    if sys.platform.startswith('win'):  # Windows
        import win32clipboard
        import win32con
        from PIL import Image
        import io

        win32clipboard.OpenClipboard()
        try:
            if win32clipboard.IsClipboardFormatAvailable(win32con.CF_UNICODETEXT):
                text_data = win32clipboard.GetClipboardData(win32con.CF_UNICODETEXT)
                content['text'] = text_data
            if win32clipboard.IsClipboardFormatAvailable(win32clipboard.CF_DIB):
                dib = win32clipboard.GetClipboardData(win32clipboard.CF_DIB)
                image = Image.open(io.BytesIO(dib))
                buffered = io.BytesIO()
                image.save(buffered, format="PNG")
                img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
                content['image'] = img_str
        except Exception as e:
            print(f"Error accessing clipboard: {e}")
        finally:
            win32clipboard.CloseClipboard()
    elif sys.platform == 'darwin': # MacOS
        from AppKit import NSPasteboard, NSPasteboardTypePNG, NSPasteboardTypeString
        from Foundation import NSData

        pasteboard = NSPasteboard.generalPasteboard()
        types = pasteboard.types()

        if NSPasteboardTypeString in types:
            text = pasteboard.stringForType_(NSPasteboardTypeString)
            content['text'] = text

        if NSPasteboardTypePNG in types:
            data = pasteboard.dataForType_(NSPasteboardTypePNG)
            if data:
                img_str = base64.b64encode(data.bytes()).decode('utf-8')
                content['image'] = img_str
    else:
        print("Unsupported platform for clipboard operations.")
    return content

def copy_selected_content():
    if sys.platform.startswith('win'):
        pyautogui.hotkey('ctrl', 'c')
    elif sys.platform == 'darwin':
        subprocess.run(['osascript', '-e', 'tell application "System Events" to keystroke "c" using {command down}'])
    else:
        print("Unsupported platform for copy operation.")

import sys
import subprocess

def screenshot_and_copy():
    if sys.platform.startswith('win'):
        from PIL import ImageGrab
        import win32clipboard
        from io import BytesIO

        img = ImageGrab.grab()
        output = BytesIO()
        img.save(output, 'BMP')
        data = output.getvalue()[14:]
        output.close()

        win32clipboard.OpenClipboard()
        win32clipboard.EmptyClipboard()
        win32clipboard.SetClipboardData(win32clipboard.CF_DIB, data)
        win32clipboard.CloseClipboard()
    elif sys.platform == 'darwin':
        subprocess.run(['screencapture', '-c'])
    else:
        raise NotImplementedError("Unsupported OS")


def input_text(text):
    if sys.platform.startswith('win'):
        # Windows implementation
        import win32clipboard
        import win32con
        import win32gui
        import win32api

        def set_clipboard_text(text):
            win32clipboard.OpenClipboard()
            win32clipboard.EmptyClipboard()
            win32clipboard.SetClipboardData(win32con.CF_UNICODETEXT, text)
            win32clipboard.CloseClipboard()

        def paste_to_active_window():
            hwnd = win32gui.GetForegroundWindow()
            if hwnd:
                win32api.PostMessage(hwnd, win32con.WM_PASTE, 0, 0)

        set_clipboard_text(text)
        paste_to_active_window()

    elif sys.platform == 'darwin':
        def set_clipboard_text_mac(text):
            p = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE)
            p.communicate(input=text.encode('utf-8'))

        def paste_in_active_app():
            script = '''

            tell application "System Events"

                keystroke "v" using command down

            end tell

            '''
            subprocess.call(['osascript', '-e', script])

        set_clipboard_text_mac(text)
        paste_in_active_app()
    else:
        raise NotImplementedError("Unsupported OS")

if __name__ == "__main__":
    # copy_selected_content()
    
    # clipboard_content = get_clipboard_content()
    # print(clipboard_content)
    
    screenshot_and_copy()

    # time.sleep(2)
    # input_text("Hello, this is a test. 你好,这是一个测试。")