Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from playwright.sync_api import sync_playwright | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoProcessor | |
| from PIL import Image | |
| import os | |
| import subprocess | |
| import spaces | |
| # Устанавливаем браузер и все системные зависимости | |
| subprocess.run(["playwright", "install", "chromium"], check=True) | |
| model_id = "allenai/Molmo-7B-D-0924" | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| # Загружаем модель один раз при старте | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=True, | |
| device_map="auto", | |
| ) | |
| model.eval() | |
| def run_agent(url, prompt): | |
| with sync_playwright() as p: | |
| browser = p.chromium.launch( | |
| headless=True, | |
| args=["--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage"] | |
| ) | |
| context = browser.new_context(viewport={"width": 1280, "height": 720}) | |
| page = context.new_page() | |
| try: | |
| target_url = url if url.startswith("http") else f"https://{url}" | |
| page.goto(target_url, timeout=60000) | |
| page.wait_for_timeout(3000) | |
| screenshot_path = "/tmp/snapshot.png" | |
| page.screenshot(path=screenshot_path) | |
| image = Image.open(screenshot_path).convert("RGB") | |
| inputs = processor.process(images=[image], text=prompt) | |
| inputs = { | |
| k: v.to(device="cuda") if torch.is_tensor(v) else v | |
| for k, v in inputs.items() | |
| } | |
| with torch.no_grad(): | |
| output = model.generate_from_batch( | |
| inputs, | |
| max_new_tokens=200, | |
| stop_strings=["<|endoftext|>"] | |
| ) | |
| generated_text = processor.tokenizer.decode( | |
| output[0], skip_special_tokens=True | |
| ) | |
| browser.close() | |
| return image, generated_text | |
| except Exception as e: | |
| try: | |
| browser.close() | |
| except Exception: | |
| pass | |
| return None, f"Ошибка: {str(e)}" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🚀 Molmo AI Web Agent") | |
| with gr.Row(): | |
| with gr.Column(): | |
| url_input = gr.Textbox(label="URL", value="google.com") | |
| prompt_input = gr.Textbox(label="Запрос", value="Point to the search bar") | |
| btn = gr.Button("Запустить") | |
| with gr.Row(): | |
| out_img = gr.Image(label="Скриншот", type="pil") | |
| out_txt = gr.Textbox(label="Ответ") | |
| btn.click(fn=run_agent, inputs=[url_input, prompt_input], outputs=[out_img, out_txt]) | |
| demo.launch() |