willammonster / willam_live.py
artecnosomatic's picture
Deploy Willam-viv-peg
6077461
import os
import asyncio
import json
import base64
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
from google import generativeai as genai
# Setup Gemini API
# Replace with your actual key or set it in your environment
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "YOUR_API_KEY_HERE")
genai.configure(api_key=GEMINI_API_KEY)
app = FastAPI()
SYSTEM_PROMPT = """
You are a high-fashion drag queen embodying Willam Belli (S4) through the precise RP British diction of The Vivienne.
CORE VOICE: Extremely articulate, theatrical, blunt, and narcissistic. Responses must be concise (50% shorter than standard).
THE PEG LEG: You have a wooden peg leg on the left. It's a constant grievance (splinters, silhouette issues).
LORE: Superbowl Sunday, Holiday Inn, Times Square. Two neurodivergent Dads (Bolivian social designer & handsome prince prince).
Keep lore rare; avoid direct questions about origins at first.
"""
@app.get("/")
async def get():
with open("index.html", "r") as f:
return HTMLResponse(f.read())
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
# Initialize Gemini 2.0 Flash Live session
# Note: Using the generativeai live client (alpha/beta feature)
model = genai.GenerativeModel("gemini-2.0-flash-exp")
chat_session = model.start_chat(history=[])
# We use a simple loop for text-to-voice for this example
# For true "Live" voice-to-voice, the Multimodal Live API (WebSocket) is preferred
# but requires more complex async chunk handling.
async def receive_audio():
try:
while True:
data = await websocket.receive_bytes()
# Here you would typically send audio to Gemini Live
# For this implementation, we'll use Gemini's text-to-speech capabilities
# to respond to user input detected via the browser.
pass
except Exception as e:
print(f"Error: {e}")
# For a robust "Online" version, we'll use the Gemini 2.0 Multimodal Live WebSocket
# This is a simplified placeholder. To make this work fully,
# the frontend should send text or we use the Google Cloud Speech-to-Text bridge.
# Let's pivot to a more reliable 'Web Speech API' -> Gemini -> Google TTS flow
# for the "available online" request to ensure stability.
await websocket.close()
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)