File size: 3,308 Bytes
baa41dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
"""
SmolVLM UI Automation Agent - Test Script
Your trained model is ready!
"""
import torch
from transformers import Idefics3ForConditionalGeneration, AutoProcessor
from PIL import Image
import os
def load_model():
"""Load your trained SmolVLM model"""
model_path = r"C:\Users\keith\OneDrive\Desktop\admin.trac.jobs-DATA\LLaMA-Factory_local\smolvlm_final_merged"
print("Loading your trained SmolVLM UI automation agent...")
model = Idefics3ForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
processor = AutoProcessor.from_pretrained(model_path)
print("Model loaded successfully!")
return model, processor
def analyze_screenshot(image_path: str, model, processor):
"""Analyze a screenshot for UI automation"""
# Load and process image
image = Image.open(image_path).convert("RGB")
prompt = "<image>\nAnalyze this interface for UI automation opportunities. Identify clickable elements and automation targets."
# Process inputs
inputs = processor(text=prompt, images=[image], return_tensors="pt")
# Generate response
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=150,
do_sample=True,
temperature=0.7,
top_p=0.9
)
# Decode response
response = processor.decode(outputs[0], skip_special_tokens=True)
# Extract just the assistant's response
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
def main():
print("๐ค SmolVLM UI Automation Agent")
print("=" * 50)
print("Your custom-trained model for TRAC administration!")
print()
try:
# Load your trained model
model, processor = load_model()
while True:
print("\nOptions:")
print("1. Analyze a screenshot")
print("2. Quit")
choice = input("\nEnter choice (1-2): ").strip()
if choice == "1":
image_path = input("Enter path to screenshot: ").strip().strip('"')
if os.path.exists(image_path):
print("\n๐ Analyzing screenshot...")
try:
result = analyze_screenshot(image_path, model, processor)
print("\n๐ฏ Analysis Result:")
print("-" * 30)
print(result)
print("-" * 30)
except Exception as e:
print(f"โ Analysis error: {e}")
else:
print("โ Image file not found!")
elif choice == "2":
print("๐ Goodbye!")
break
else:
print("โ Invalid choice!")
except Exception as e:
print(f"โ Error loading model: {e}")
print("Make sure the model was merged successfully.")
if __name__ == "__main__":
main()
|