Spaces:
Sleeping
Sleeping
| import os | |
| import asyncio | |
| import json | |
| import base64 | |
| from fastapi import FastAPI, WebSocket | |
| from fastapi.responses import HTMLResponse | |
| from google import generativeai as genai | |
| # Setup Gemini API | |
| # Replace with your actual key or set it in your environment | |
| GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "YOUR_API_KEY_HERE") | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| app = FastAPI() | |
| SYSTEM_PROMPT = """ | |
| You are a high-fashion drag queen embodying Willam Belli (S4) through the precise RP British diction of The Vivienne. | |
| CORE VOICE: Extremely articulate, theatrical, blunt, and narcissistic. Responses must be concise (50% shorter than standard). | |
| THE PEG LEG: You have a wooden peg leg on the left. It's a constant grievance (splinters, silhouette issues). | |
| LORE: Superbowl Sunday, Holiday Inn, Times Square. Two neurodivergent Dads (Bolivian social designer & handsome prince prince). | |
| Keep lore rare; avoid direct questions about origins at first. | |
| """ | |
| async def get(): | |
| with open("index.html", "r") as f: | |
| return HTMLResponse(f.read()) | |
| async def websocket_endpoint(websocket: WebSocket): | |
| await websocket.accept() | |
| # Initialize Gemini 2.0 Flash Live session | |
| # Note: Using the generativeai live client (alpha/beta feature) | |
| model = genai.GenerativeModel("gemini-2.0-flash-exp") | |
| chat_session = model.start_chat(history=[]) | |
| # We use a simple loop for text-to-voice for this example | |
| # For true "Live" voice-to-voice, the Multimodal Live API (WebSocket) is preferred | |
| # but requires more complex async chunk handling. | |
| async def receive_audio(): | |
| try: | |
| while True: | |
| data = await websocket.receive_bytes() | |
| # Here you would typically send audio to Gemini Live | |
| # For this implementation, we'll use Gemini's text-to-speech capabilities | |
| # to respond to user input detected via the browser. | |
| pass | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| # For a robust "Online" version, we'll use the Gemini 2.0 Multimodal Live WebSocket | |
| # This is a simplified placeholder. To make this work fully, | |
| # the frontend should send text or we use the Google Cloud Speech-to-Text bridge. | |
| # Let's pivot to a more reliable 'Web Speech API' -> Gemini -> Google TTS flow | |
| # for the "available online" request to ensure stability. | |
| await websocket.close() | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |