File size: 2,347 Bytes
3508f42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from utils.screen_utils import ScreenController
from models.vision_model import VisionModel
import time

class Controller:
    def __init__(self):
        self.screen_controller = ScreenController()
        self.vision_model = VisionModel()
        
    def execute_command(self, command):
        """执行自然语言命令"""
        try:
            # 1. 解释命令
            action_plan = self.vision_model.interpret_command(command)
            if not action_plan:
                return False, "无法解析命令"
            
            # 2. 获取当前屏幕截图
            screen = self.screen_controller.capture_screen()
            
            # 3. 定位目标元素
            location = self.vision_model.get_element_location(screen, action_plan['target'])
            if not location:
                return False, f"无法找到目标元素:{action_plan['target']}"
            
            # 4. 执行操作
            success = self._perform_action(action_plan['action'], location, action_plan.get('params', {}))
            
            return success, "操作执行成功" if success else "操作执行失败"
            
        except Exception as e:
            return False, f"执行命令时出错:{str(e)}"
    
    def _perform_action(self, action, location, params):
        """执行具体操作"""
        x, y = location.get('x'), location.get('y')
        
        if not (x and y):
            return False
            
        if action == 'click':
            return self.screen_controller.click_position(x, y)
        elif action == 'type':
            self.screen_controller.click_position(x, y)
            return self.screen_controller.type_text(params.get('text', ''))
        elif action == 'press':
            return self.screen_controller.press_key(params.get('key', ''))
        elif action == 'move':
            return self.screen_controller.move_to(x, y)
        elif action == 'drag':
            target_x = params.get('target_x')
            target_y = params.get('target_y')
            if target_x and target_y:
                self.screen_controller.move_to(x, y)
                return self.screen_controller.drag_to(target_x, target_y)
        elif action == 'scroll':
            return self.screen_controller.scroll(params.get('clicks', 0))
        
        return False