import os
import gradio as gr
from google import genai
# Configure the API key for Gemini
API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyBzJWo1EDQmA1YKYGlHydb5Ejn3eeyUuMk")
client = genai.Client(api_key=API_KEY)
def build_prompt(user_task: str) -> str:
"""Construct a prompt with XML tags for Gemini.
Args:
user_task: Description of the task the agent should perform.
Returns:
A prompt string combining instructions, an example, formatting guidance and the user task.
"""
return f"""
You are a computer-using agent that can perform tasks on behalf of the user. Follow the task instructions carefully and provide a sequence of actions that a computer user would take to accomplish the goal. Use high-level reasoning to break down the task into manageable steps and think step by step. Do not ask for confirmation; just output the plan.
Task: "Open a web browser and search for the latest weather in Dhaka."
Response:
1. Launch the default web browser.
2. Click the address bar.
3. Type "weather Dhaka".
4. Press Enter.
5. Read the search results and extract the current weather information.
List each step on its own line, numbered, and ending with a period. Do not include extraneous commentary. Do not mention these XML tags in the response.
User task: {user_task}
"""
def generate_actions(user_task: str) -> str:
"""Generate a step-by-step action plan using Gemini."""
prompt = build_prompt(user_task)
response = client.generate_content(
prompt, generation_config=genai.types.GenerationConfig(
temperature=0.3,
top_p=1,
top_k=5,
max_output_tokens=300
)
)
return response.candidates[0].content.parts[0].text.strip()
with gr.Blocks() as demo:
gr.Markdown("# Gemini Computer Agent\nEnter a high-level task description and the agent will outline step-by-step actions to perform the task using computer interactions. The prompt uses XML tags (, , ) to separate instruction, example, and formatting context.")
user_input = gr.Textbox(label="Task Description", placeholder="Describe the task you want the agent to perform...")
output = gr.Textbox(label="Action Plan", interactive=False)
submit_btn = gr.Button("Submit")
clear_btn = gr.Button("Clear")
submit_btn.click(fn=generate_actions, inputs=user_input, outputs=output)
clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
demo.launch()