| | from utils.screen_utils import ScreenController |
| | from models.vision_model import VisionModel |
| | import time |
| |
|
| | class Controller: |
| | def __init__(self): |
| | self.screen_controller = ScreenController() |
| | self.vision_model = VisionModel() |
| | |
| | def execute_command(self, command): |
| | """执行自然语言命令""" |
| | try: |
| | |
| | action_plan = self.vision_model.interpret_command(command) |
| | if not action_plan: |
| | return False, "无法解析命令" |
| | |
| | |
| | screen = self.screen_controller.capture_screen() |
| | |
| | |
| | location = self.vision_model.get_element_location(screen, action_plan['target']) |
| | if not location: |
| | return False, f"无法找到目标元素:{action_plan['target']}" |
| | |
| | |
| | success = self._perform_action(action_plan['action'], location, action_plan.get('params', {})) |
| | |
| | return success, "操作执行成功" if success else "操作执行失败" |
| | |
| | except Exception as e: |
| | return False, f"执行命令时出错:{str(e)}" |
| | |
| | def _perform_action(self, action, location, params): |
| | """执行具体操作""" |
| | x, y = location.get('x'), location.get('y') |
| | |
| | if not (x and y): |
| | return False |
| | |
| | if action == 'click': |
| | return self.screen_controller.click_position(x, y) |
| | elif action == 'type': |
| | self.screen_controller.click_position(x, y) |
| | return self.screen_controller.type_text(params.get('text', '')) |
| | elif action == 'press': |
| | return self.screen_controller.press_key(params.get('key', '')) |
| | elif action == 'move': |
| | return self.screen_controller.move_to(x, y) |
| | elif action == 'drag': |
| | target_x = params.get('target_x') |
| | target_y = params.get('target_y') |
| | if target_x and target_y: |
| | self.screen_controller.move_to(x, y) |
| | return self.screen_controller.drag_to(target_x, target_y) |
| | elif action == 'scroll': |
| | return self.screen_controller.scroll(params.get('clicks', 0)) |
| | |
| | return False |