File size: 2,347 Bytes
3508f42 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | from utils.screen_utils import ScreenController
from models.vision_model import VisionModel
import time
class Controller:
def __init__(self):
self.screen_controller = ScreenController()
self.vision_model = VisionModel()
def execute_command(self, command):
"""执行自然语言命令"""
try:
# 1. 解释命令
action_plan = self.vision_model.interpret_command(command)
if not action_plan:
return False, "无法解析命令"
# 2. 获取当前屏幕截图
screen = self.screen_controller.capture_screen()
# 3. 定位目标元素
location = self.vision_model.get_element_location(screen, action_plan['target'])
if not location:
return False, f"无法找到目标元素:{action_plan['target']}"
# 4. 执行操作
success = self._perform_action(action_plan['action'], location, action_plan.get('params', {}))
return success, "操作执行成功" if success else "操作执行失败"
except Exception as e:
return False, f"执行命令时出错:{str(e)}"
def _perform_action(self, action, location, params):
"""执行具体操作"""
x, y = location.get('x'), location.get('y')
if not (x and y):
return False
if action == 'click':
return self.screen_controller.click_position(x, y)
elif action == 'type':
self.screen_controller.click_position(x, y)
return self.screen_controller.type_text(params.get('text', ''))
elif action == 'press':
return self.screen_controller.press_key(params.get('key', ''))
elif action == 'move':
return self.screen_controller.move_to(x, y)
elif action == 'drag':
target_x = params.get('target_x')
target_y = params.get('target_y')
if target_x and target_y:
self.screen_controller.move_to(x, y)
return self.screen_controller.drag_to(target_x, target_y)
elif action == 'scroll':
return self.screen_controller.scroll(params.get('clicks', 0))
return False |