Upload agent.py

3921f4a verified 9 days ago

7.36 kB

	# C:\agent\agent.py
	from smolagents import CodeAgent, TransformersModel, GradioUI, tool
	import subprocess
	import json
	from pathlib import Path
	from datetime import datetime

	# =============================================================================
	# TOOLS
	# =============================================================================

	@tool
	def execute_command(command: str) -> str:
	"""
	Execute a shell command and return the output. Be careful with destructive commands.

	Args:
	command: The shell command string to execute.
	"""
	try:
	result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=60)
	output = f"[Exit code: {result.returncode}]\n\nSTDOUT:\n{result.stdout or '(no output)'}\n\nSTDERR:\n{result.stderr or '(no errors)'}"
	return output[:4000]
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def read_file(file_path: str) -> str:
	"""
	Read a file from disk and return its contents.

	Args:
	file_path: Absolute or relative path to the file to read.
	"""
	try:
	path = Path(file_path).expanduser().resolve()
	if not path.exists():
	return f"File not found: {path}"
	content = path.read_text(errors="replace")
	return content[:6000] + ("..." if len(content) > 6000 else "")
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def write_file(file_path: str, content: str) -> str:
	"""
	Write text to a file. Creates parent directories if needed. Overwrites existing files.

	Args:
	file_path: Absolute or relative path where the file should be written.
	content: The text content to write into the file.
	"""
	try:
	path = Path(file_path).expanduser().resolve()
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text(content, encoding="utf-8")
	return f"Wrote {len(content)} chars to {path}"
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def list_directory(dir_path: str = ".") -> str:
	"""
	List files and folders in a directory.

	Args:
	dir_path: Path to the directory to list. Defaults to current directory.
	"""
	try:
	path = Path(dir_path).expanduser().resolve()
	if not path.is_dir():
	return f"Not a directory: {path}"
	entries = [f"{'[DIR]' if p.is_dir() else '[FILE]'} {p.name}" for p in sorted(path.iterdir())]
	return f"Contents of {path} ({len(entries)} items):\n" + "\n".join(entries)
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def get_system_info() -> str:
	"""
	Get Windows system info and GPU status via nvidia-smi.
	No arguments needed.
	"""
	try:
	import platform
	info = {"os": platform.platform(), "python": platform.python_version(), "cwd": str(Path.cwd())}
	result = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.used,memory.total,temperature.gpu", "--format=csv,noheader"], capture_output=True, text=True)
	info["gpu"] = result.stdout.strip() if result.returncode == 0 else "nvidia-smi failed"
	return json.dumps(info, indent=2)
	except Exception as e:
	return f"Error: {str(e)}"

	@tool
	def get_time() -> str:
	"""
	Get the current local date and time.
	No arguments needed.
	"""
	return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# =============================================================================
	# OPTIONAL DESKTOP TOOLS (mouse, keyboard, screenshots)
	# =============================================================================

	try:
	import pyautogui
	HAS_PYAUTO = True
	except ImportError:
	HAS_PYAUTO = False

	if HAS_PYAUTO:
	import io, base64
	from PIL import Image

	@tool
	def screenshot() -> str:
	"""
	Take a screenshot of the current screen and return it as a base64 PNG data URI.
	No arguments needed.
	"""
	img = pyautogui.screenshot()
	buf = io.BytesIO()
	img.save(buf, format="PNG")
	return f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}"

	@tool
	def click(x: int, y: int) -> str:
	"""
	Click the left mouse button at the specified screen coordinates.

	Args:
	x: Horizontal screen coordinate in pixels from the left edge.
	y: Vertical screen coordinate in pixels from the top edge.
	"""
	pyautogui.click(x, y)
	return f"Clicked at ({x}, {y})"

	@tool
	def type_text(text: str) -> str:
	"""
	Type the given text as keyboard input.

	Args:
	text: The text string to type.
	"""
	pyautogui.typewrite(text, interval=0.01)
	return f"Typed: {text[:50]}{'...' if len(text) > 50 else ''}"

	@tool
	def press_key(key: str) -> str:
	"""
	Press a single keyboard key or a key combination.

	Args:
	key: The key or combination to press, e.g. 'enter', 'ctrl+c', 'alt+tab'.
	"""
	pyautogui.press(key)
	return f"Pressed: {key}"

	@tool
	def screen_size() -> str:
	"""
	Get the current screen resolution as width and height.
	No arguments needed.
	"""
	w, h = pyautogui.size()
	return json.dumps({"width": w, "height": h})
	else:
	print("[!] pyautogui not installed — desktop control tools disabled")

	# =============================================================================
	# LOAD MODEL
	# =============================================================================

	print("[*] Loading Qwen2.5-7B-Instruct...")
	print("[*] First run downloads ~15GB to HuggingFace cache. This takes time.")
	print("[*] After that, it loads instantly from disk.\n")

	model = TransformersModel(
	model_id="Qwen/Qwen2.5-7B-Instruct",
	device_map="auto",
	torch_dtype="auto",
	max_new_tokens=4096,
	)

	tools = [execute_command, read_file, write_file, list_directory, get_system_info, get_time]
	if HAS_PYAUTO:
	tools.extend([screenshot, click, type_text, press_key, screen_size])

	agent = CodeAgent(
	tools=tools,
	model=model,
	max_steps=15,
	additional_authorized_imports=["os", "sys", "json", "pathlib", "subprocess", "re", "datetime", "random", "math"],
	)

	print("[*] Agent ready!\n")

	# =============================================================================
	# RUN
	# =============================================================================

	if __name__ == "__main__":
	print("=" * 50)
	print(" AGENT READY")
	print(" Type a task and hit Enter")
	print(" 'exit' to quit \| 'ui' for web interface")
	print("=" * 50)

	while True:
	user_input = input("\n> ").strip()
	if user_input.lower() in ("exit", "quit", "q"):
	print("Shutting down.")
	break
	if user_input.lower() == "ui":
	print("[*] Launching Gradio UI at http://localhost:7860 ...")
	GradioUI(agent).launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
	continue
	if not user_input:
	continue

	print("\n[*] Agent thinking...")
	try:
	result = agent.run(user_input)
	print(f"\n[Result]\n{result}\n")
	except Exception as e:
	print(f"\n[!] Error: {e}")