# app.py import os import json import requests from flask import Flask, request, jsonify app = Flask(__name__) # ---- API KEYS ---- SKYVERN_API_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ5MDMwNTkxMjcsInN1YiI6Im9fNDM5ODczMDQ4OTgwNTgyMjQwIn0.IlKt9HFbDg1dqbv2qHxH5aRU4dwn4MFzxHXUNhzL5BA" GEMINI_API_KEY = "AIzaSyB5DoxsabUkiUxfEVTcoMuzH-1hfJ0ZLH0" # replace with your actual Gemini key # Skyvern endpoint SKYVERN_BASE_URL = "https://api.skyvern.com/v1" # replace with correct Skyvern API base # Gemini endpoint GEMINI_BASE_URL = "https://api.openai.com/v1beta" # Gemini API base def run_skyvern_task(task_instruction): """ Sends a task to Skyvern API for execution. Returns a dict with task status, result, and screenshot URL (if any). """ headers = { "Authorization": f"Bearer {SKYVERN_API_KEY}", "Content-Type": "application/json" } payload = { "instructions": task_instruction, "returnArtifacts": True } try: response = requests.post(f"{SKYVERN_BASE_URL}/tasks", headers=headers, json=payload) data = response.json() if response.status_code != 200: return {"error": data, "success": False} # Extract screenshot if available screenshot_url = None if "artifacts" in data and isinstance(data["artifacts"], list): for artifact in data["artifacts"]: if artifact.get("type") in ["screenshot", "image"]: screenshot_url = artifact.get("url") break return { "success": True, "result": data.get("result") or "Task completed", "screenshot": screenshot_url, "raw": data } except Exception as e: return {"error": str(e), "success": False} def get_gemini_response(prompt_text): """ Sends the user's natural language command to Gemini API to interpret instructions. Returns the processed text for Skyvern task. """ headers = { "Authorization": f"Bearer {GEMINI_API_KEY}", "Content-Type": "application/json" } payload = { "model": "gemini-pro", "input": prompt_text } try: response = requests.post(f"{GEMINI_BASE_URL}/generateContent", headers=headers, json=payload) data = response.json() if response.status_code != 200: return {"error": data, "success": False} return {"success": True, "text": data.get("outputText") or prompt_text} except Exception as e: return {"error": str(e), "success": False} @app.route("/execute", methods=["POST"]) def execute_command(): """ Accepts JSON payload: {"command": "user command in natural language"} Processes it via Gemini for natural language understanding Then sends task to Skyvern Returns JSON with result and screenshot URL """ data = request.get_json() if not data or "command" not in data: return jsonify({"error": "Command is required", "success": False}), 400 user_command = data["command"] # 1. Process natural language via Gemini gemini_response = get_gemini_response(user_command) if not gemini_response.get("success"): return jsonify({"error": gemini_response.get("error"), "success": False}), 500 processed_command = gemini_response.get("text") # 2. Send processed command to Skyvern skyvern_response = run_skyvern_task(processed_command) if not skyvern_response.get("success"): return jsonify({"error": skyvern_response.get("error"), "success": False}), 500 return jsonify({ "success": True, "user_command": user_command, "processed_command": processed_command, "result": skyvern_response.get("result"), "screenshot": skyvern_response.get("screenshot"), "raw": skyvern_response.get("raw") }) if __name__ == "__main__": app.run(host="0.0.0.0", port=8000, debug=True)