File size: 7,357 Bytes

3921f4a

# C:\agent\agent.py
from smolagents import CodeAgent, TransformersModel, GradioUI, tool
import subprocess
import json
from pathlib import Path
from datetime import datetime

# =============================================================================
# TOOLS
# =============================================================================

@tool
def execute_command(command: str) -> str:
    """
    Execute a shell command and return the output. Be careful with destructive commands.

    Args:
        command: The shell command string to execute.
    """
    try:
        result = subprocess.run(command, shell=True, capture_output=True, text=True, timeout=60)
        output = f"[Exit code: {result.returncode}]\n\nSTDOUT:\n{result.stdout or '(no output)'}\n\nSTDERR:\n{result.stderr or '(no errors)'}"
        return output[:4000]
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def read_file(file_path: str) -> str:
    """
    Read a file from disk and return its contents.

    Args:
        file_path: Absolute or relative path to the file to read.
    """
    try:
        path = Path(file_path).expanduser().resolve()
        if not path.exists():
            return f"File not found: {path}"
        content = path.read_text(errors="replace")
        return content[:6000] + ("..." if len(content) > 6000 else "")
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def write_file(file_path: str, content: str) -> str:
    """
    Write text to a file. Creates parent directories if needed. Overwrites existing files.

    Args:
        file_path: Absolute or relative path where the file should be written.
        content: The text content to write into the file.
    """
    try:
        path = Path(file_path).expanduser().resolve()
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(content, encoding="utf-8")
        return f"Wrote {len(content)} chars to {path}"
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def list_directory(dir_path: str = ".") -> str:
    """
    List files and folders in a directory.

    Args:
        dir_path: Path to the directory to list. Defaults to current directory.
    """
    try:
        path = Path(dir_path).expanduser().resolve()
        if not path.is_dir():
            return f"Not a directory: {path}"
        entries = [f"{'[DIR]' if p.is_dir() else '[FILE]'} {p.name}" for p in sorted(path.iterdir())]
        return f"Contents of {path} ({len(entries)} items):\n" + "\n".join(entries)
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def get_system_info() -> str:
    """
    Get Windows system info and GPU status via nvidia-smi.
    No arguments needed.
    """
    try:
        import platform
        info = {"os": platform.platform(), "python": platform.python_version(), "cwd": str(Path.cwd())}
        result = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.used,memory.total,temperature.gpu", "--format=csv,noheader"], capture_output=True, text=True)
        info["gpu"] = result.stdout.strip() if result.returncode == 0 else "nvidia-smi failed"
        return json.dumps(info, indent=2)
    except Exception as e:
        return f"Error: {str(e)}"

@tool
def get_time() -> str:
    """
    Get the current local date and time.
    No arguments needed.
    """
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# =============================================================================
# OPTIONAL DESKTOP TOOLS (mouse, keyboard, screenshots)
# =============================================================================

try:
    import pyautogui
    HAS_PYAUTO = True
except ImportError:
    HAS_PYAUTO = False

if HAS_PYAUTO:
    import io, base64
    from PIL import Image

    @tool
    def screenshot() -> str:
        """
        Take a screenshot of the current screen and return it as a base64 PNG data URI.
        No arguments needed.
        """
        img = pyautogui.screenshot()
        buf = io.BytesIO()
        img.save(buf, format="PNG")
        return f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}"

    @tool
    def click(x: int, y: int) -> str:
        """
        Click the left mouse button at the specified screen coordinates.

        Args:
            x: Horizontal screen coordinate in pixels from the left edge.
            y: Vertical screen coordinate in pixels from the top edge.
        """
        pyautogui.click(x, y)
        return f"Clicked at ({x}, {y})"

    @tool
    def type_text(text: str) -> str:
        """
        Type the given text as keyboard input.

        Args:
            text: The text string to type.
        """
        pyautogui.typewrite(text, interval=0.01)
        return f"Typed: {text[:50]}{'...' if len(text) > 50 else ''}"

    @tool
    def press_key(key: str) -> str:
        """
        Press a single keyboard key or a key combination.

        Args:
            key: The key or combination to press, e.g. 'enter', 'ctrl+c', 'alt+tab'.
        """
        pyautogui.press(key)
        return f"Pressed: {key}"

    @tool
    def screen_size() -> str:
        """
        Get the current screen resolution as width and height.
        No arguments needed.
        """
        w, h = pyautogui.size()
        return json.dumps({"width": w, "height": h})
else:
    print("[!] pyautogui not installed — desktop control tools disabled")

# =============================================================================
# LOAD MODEL
# =============================================================================

print("[*] Loading Qwen2.5-7B-Instruct...")
print("[*] First run downloads ~15GB to HuggingFace cache. This takes time.")
print("[*] After that, it loads instantly from disk.\n")

model = TransformersModel(
    model_id="Qwen/Qwen2.5-7B-Instruct",
    device_map="auto",
    torch_dtype="auto",
    max_new_tokens=4096,
)

tools = [execute_command, read_file, write_file, list_directory, get_system_info, get_time]
if HAS_PYAUTO:
    tools.extend([screenshot, click, type_text, press_key, screen_size])

agent = CodeAgent(
    tools=tools,
    model=model,
    max_steps=15,
    additional_authorized_imports=["os", "sys", "json", "pathlib", "subprocess", "re", "datetime", "random", "math"],
)

print("[*] Agent ready!\n")

# =============================================================================
# RUN
# =============================================================================

if __name__ == "__main__":
    print("=" * 50)
    print("  AGENT READY")
    print("  Type a task and hit Enter")
    print("  'exit' to quit | 'ui' for web interface")
    print("=" * 50)

    while True:
        user_input = input("\n> ").strip()
        if user_input.lower() in ("exit", "quit", "q"):
            print("Shutting down.")
            break
        if user_input.lower() == "ui":
            print("[*] Launching Gradio UI at http://localhost:7860 ...")
            GradioUI(agent).launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
            continue
        if not user_input:
            continue

        print("\n[*] Agent thinking...")
        try:
            result = agent.run(user_input)
            print(f"\n[Result]\n{result}\n")
        except Exception as e:
            print(f"\n[!] Error: {e}")