Spaces:

likhonhfai
/

gemini-computer-agent

Sleeping

App Files Files Community

gemini-computer-agent / app.py

likhonhfai

Update app.py to use new Gemini API key and improved code

22c5195 verified 5 months ago

raw

history blame contribute delete

2.59 kB

	import os
	import gradio as gr
	from google import genai

	# Configure the API key for Gemini
	API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyBzJWo1EDQmA1YKYGlHydb5Ejn3eeyUuMk")
	client = genai.Client(api_key=API_KEY)

	def build_prompt(user_task: str) -> str:
	"""Construct a prompt with XML tags for Gemini.

	Args:
	user_task: Description of the task the agent should perform.

	Returns:
	A prompt string combining instructions, an example, formatting guidance and the user task.
	"""
	return f"""<instructions>
	You are a computer-using agent that can perform tasks on behalf of the user. Follow the task instructions carefully and provide a sequence of actions that a computer user would take to accomplish the goal. Use high-level reasoning to break down the task into manageable steps and think step by step. Do not ask for confirmation; just output the plan.
	</instructions>
	<example>
	Task: "Open a web browser and search for the latest weather in Dhaka."
	Response:
	1. Launch the default web browser.
	2. Click the address bar.
	3. Type "weather Dhaka".
	4. Press Enter.
	5. Read the search results and extract the current weather information.
	</example>
	<formatting>
	List each step on its own line, numbered, and ending with a period. Do not include extraneous commentary. Do not mention these XML tags in the response.
	</formatting>
	User task: {user_task}
	"""

	def generate_actions(user_task: str) -> str:
	"""Generate a step-by-step action plan using Gemini."""
	prompt = build_prompt(user_task)
	response = client.generate_content(
	prompt, generation_config=genai.types.GenerationConfig(
	temperature=0.3,
	top_p=1,
	top_k=5,
	max_output_tokens=300
	)
	)
	return response.candidates[0].content.parts[0].text.strip()

	with gr.Blocks() as demo:
	gr.Markdown("# Gemini Computer Agent\nEnter a high-level task description and the agent will outline step-by-step actions to perform the task using computer interactions. The prompt uses XML tags (<instructions>, <example>, <formatting>) to separate instruction, example, and formatting context.")
	user_input = gr.Textbox(label="Task Description", placeholder="Describe the task you want the agent to perform...")
	output = gr.Textbox(label="Action Plan", interactive=False)
	submit_btn = gr.Button("Submit")
	clear_btn = gr.Button("Clear")

	submit_btn.click(fn=generate_actions, inputs=user_input, outputs=output)
	clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])

	demo.launch()