Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from playwright.sync_api import sync_playwright | |
| from flax import linen as nn | |
| from jax import random | |
| import jax | |
| import jax.numpy as jnp | |
| # Define LLaVA model parameters | |
| MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" | |
| MAX_LENGTH = 512 | |
| NUM_BEAMS = 5 | |
| # Define Flax model for action generation | |
| class ActionModel(nn.Module): | |
| vocab_size: int | |
| hidden_size: int | |
| num_layers: int | |
| def setup(self): | |
| self.embedding = nn.Embed(self.vocab_size, self.hidden_size) | |
| self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, num_layers=self.num_layers) | |
| self.dense = nn.Dense(self.vocab_size) | |
| def __call__(self, inputs, init_state): | |
| embedded = self.embedding(inputs) | |
| output, new_state = self.lstm(embedded, init_state) | |
| logits = self.dense(output) | |
| return logits, new_state | |
| # Initialize Flax model | |
| vocab_size = 50257 | |
| hidden_size = 1024 | |
| num_layers = 2 | |
| key = random.PRNGKey(0) | |
| model = ActionModel(vocab_size, hidden_size, num_layers) | |
| init_state = model.lstm.initialize_carry(key, (1, hidden_size)) | |
| # Function to generate actions using LLaVA model | |
| def generate_actions(input_text, browser, page): | |
| # Load LLaVA model | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) | |
| # Prepare input for LLaVA | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| inputs = inputs.to(model.device) | |
| # Generate response | |
| outputs = model.generate( | |
| input_ids=inputs.input_ids, | |
| max_length=MAX_LENGTH, | |
| num_beams=NUM_BEAMS, | |
| temperature=0.7, | |
| ) | |
| # Decode response and extract actions | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| actions = response.split("\n") | |
| # Perform actions | |
| for action in actions: | |
| if "open website" in action: | |
| website = action.split(" ")[-1] | |
| page.goto(website) | |
| elif "click" in action: | |
| selector = action.split(" ")[-1] | |
| page.click(selector) | |
| elif "type" in action: | |
| text = action.split(" ")[-1] | |
| page.type(text) | |
| elif "submit" in action: | |
| page.press("Enter") | |
| else: | |
| print(f"Action not recognized: {action}") | |
| # Function to initialize browser and page | |
| def initialize_browser(): | |
| with sync_playwright() as p: | |
| browser = p.chromium.launch() | |
| page = browser.new_page() | |
| return browser, page | |
| # Gradio interface | |
| def run_agent(input_text): | |
| with sync_playwright() as p: | |
| browser, page = initialize_browser() | |
| generate_actions(input_text, browser, page) | |
| return f"Successfully executed actions based on: {input_text}" | |
| iface = gr.Interface( | |
| fn=run_agent, | |
| inputs=gr.Textbox(label="Enter your request"), | |
| outputs=gr.Textbox(label="Response"), | |
| title="Automated Agent", | |
| description="Enter a task or instruction for the agent to perform." | |
| ) | |
| iface.launch() |