Spaces:

SalexAI
/

public-airo-api

Sleeping

App Files Files Community

SalexAI commited on Sep 7, 2025

Commit

978c3bc

verified ·

1 Parent(s): c132b4a

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -44

app.py CHANGED Viewed

@@ -1,81 +1,168 @@
 import os
 import requests
-from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 app = FastAPI()
-# Allow ScratchX / PenguinMod
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # 🔒 restrict later if desired
     allow_credentials=True,
-    allow_methods=["GET", "POST", "OPTIONS"],
     allow_headers=["*"],
 )
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
-OPENAI_REALTIME_URL = "https://api.openai.com/v1/realtime/sessions"
-def _mint_ephemeral(model: str, voice: str):
-    """Helper to call OpenAI and mint ephemeral token."""
-    if not OPENAI_API_KEY:
-        return JSONResponse(
-            status_code=500,
-            content={"error": "OPENAI_API_KEY not set in environment"},
-        )
-    headers = {
         "Authorization": f"Bearer {OPENAI_API_KEY}",
-        "Content-Type": "application/json",
-        "OpenAI-Beta": "realtime=v1",
     }
-    body = {"model": model, "voice": voice}
-    try:
-        r = requests.post(OPENAI_REALTIME_URL, headers=headers, json=body)
-        r.raise_for_status()
-        return r.json()
-    except Exception as e:
-        return JSONResponse(status_code=500, content={"error": str(e)})
-# --- Health endpoints ---
 @app.get("/health")
 @app.get("/health/")
-@app.get("/proxy/health")
-@app.get("/proxy/health/")
 def health():
-    return {"status": "ok"}
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    print(f"[DEBUG] Incoming: {request.method} {request.url.path}")
-    response = await call_next(request)
-    return response
-# --- Ephemeral endpoints ---
 @app.get("/ephemeral")
 @app.get("/ephemeral/")
-@app.get("/proxy/ephemeral")
-@app.get("/proxy/ephemeral/")
-def ephemeral_get(model: str = "gpt-4o-realtime-preview", voice: str = "verse"):
-    return _mint_ephemeral(model, voice)
 @app.post("/ephemeral")
 @app.post("/ephemeral/")
-@app.post("/proxy/ephemeral")
-@app.post("/proxy/ephemeral/")
 async def ephemeral_post(request: Request):
     try:
         data = await request.json()
-        model = data.get("model", "gpt-4o-realtime-preview")
-        voice = data.get("voice", "verse")
     except Exception:
-        model, voice = "gpt-4o-realtime-preview", "verse"
-    return _mint_ephemeral(model, voice)

 import os
 import requests
+from typing import Optional
+from fastapi import FastAPI, Request, UploadFile, File, Form, Response
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 app = FastAPI()
+# CORS: keep wide for dev; you can restrict origins later
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS", "HEAD"],
     allow_headers=["*"],
+    expose_headers=["*"],
 )
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+REALTIME_SESSION_URL = "https://api.openai.com/v1/realtime/sessions"
+AUDIO_TRANSCRIBE_URL = "https://api.openai.com/v1/audio/transcriptions"
+# defaults (you can tune these)
+DEFAULT_REALTIME_MODEL = "gpt-realtime"
+DEFAULT_VOICE = "verse"
+# New STT defaults — fast + cheap; switch to gpt-4o-transcribe for peak accuracy
+DEFAULT_STT_MODEL = "gpt-4o-mini-transcribe"
+# ---------- helpers ----------
+def _json_err(msg: str, code: int = 500):
+    return JSONResponse(
+        status_code=code,
+        content={"error": msg},
+        headers={"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"},
+    )
+def _auth_headers(beta_realtime: bool = False):
+    h = {
         "Authorization": f"Bearer {OPENAI_API_KEY}",
     }
+    if beta_realtime:
+        # required for Realtime session creation
+        h["OpenAI-Beta"] = "realtime=v1"
+    return h
+# ---------- health ----------
 @app.get("/health")
 @app.get("/health/")
 def health():
+    return JSONResponse({"status": "ok"}, headers={"Access-Control-Allow-Origin": "*"})
+# ---------- realtime ephemeral ----------
+def mint_ephemeral(model: str = DEFAULT_REALTIME_MODEL, voice: str = DEFAULT_VOICE):
+    if not OPENAI_API_KEY:
+        return _json_err("OPENAI_API_KEY not set in environment", 500)
+    try:
+        r = requests.post(
+            REALTIME_SESSION_URL,
+            headers={**_auth_headers(beta_realtime=True), "Content-Type": "application/json"},
+            json={"model": model, "voice": voice},
+            timeout=15,
+        )
+        r.raise_for_status()
+        return JSONResponse(
+            status_code=200,
+            content=r.json(),
+            headers={"Access-Control-Allow-Origin": "*", "Content-Type": "application/json"},
+        )
+    except Exception as e:
+        return _json_err(str(e), 500)
 @app.get("/ephemeral")
 @app.get("/ephemeral/")
+def ephemeral_get(model: str = DEFAULT_REALTIME_MODEL, voice: str = DEFAULT_VOICE):
+    return mint_ephemeral(model, voice)
 @app.post("/ephemeral")
 @app.post("/ephemeral/")
 async def ephemeral_post(request: Request):
     try:
         data = await request.json()
+        model = data.get("model", DEFAULT_REALTIME_MODEL)
+        voice = data.get("voice", DEFAULT_VOICE)
     except Exception:
+        model, voice = DEFAULT_REALTIME_MODEL, DEFAULT_VOICE
+    return mint_ephemeral(model, voice)
+# Catch-all (helps when callers accidentally hit "/" with signed proxy params)
+@app.api_route("/", methods=["GET", "POST", "OPTIONS", "HEAD"])
+@app.api_route("/{_path:path}", methods=["GET", "POST", "OPTIONS", "HEAD"])
+async def catch_all(request: Request, _path: str = ""):
+    # Serve ephemeral token everywhere except the explicit /transcribe path
+    if request.url.path.startswith("/transcribe"):
+        return JSONResponse(
+            {"error": "use POST /transcribe for audio"}, status_code=405,
+            headers={"Access-Control-Allow-Origin": "*"}
+        )
+    if request.method == "OPTIONS":
+        return Response(
+            status_code=204,
+            headers={
+                "Access-Control-Allow-Origin": "*",
+                "Access-Control-Allow-Methods": "GET, POST, OPTIONS, HEAD",
+                "Access-Control-Allow-Headers": "*",
+            },
+        )
+    # default: mint realtime token (handy for clients that strip paths)
+    return mint_ephemeral(DEFAULT_REALTIME_MODEL, DEFAULT_VOICE)
+# ---------- NEW: speech-to-text via OpenAI ----------
+@app.post("/transcribe")
+async def transcribe(
+    file: UploadFile = File(..., description="Audio file (wav/mp3/m4a/webm/ogg)"),
+    model: str = Form(DEFAULT_STT_MODEL),
+    language: Optional[str] = Form(None),
+    response_format: str = Form("json"),  # "json" | "text" | "srt" | "verbose_json" | "vtt" (model dependent)
+):
+    """
+    Proxy to OpenAI audio/transcriptions.
+    - Default model: gpt-4o-mini-transcribe (fast). Use gpt-4o-transcribe for max accuracy.
+    - Send multipart/form-data with 'file' plus optional fields.
+    """
+    if not OPENAI_API_KEY:
+        return _json_err("OPENAI_API_KEY not set in environment", 500)
+    try:
+        # read bytes once
+        data_bytes = await file.read()
+        files = {
+            "file": (file.filename or "audio", data_bytes, file.content_type or "application/octet-stream")
+        }
+        form = {"model": model}
+        if language:
+            form["language"] = language
+        if response_format:
+            form["response_format"] = response_format
+        r = requests.post(
+            AUDIO_TRANSCRIBE_URL,
+            headers=_auth_headers(),
+            files=files,
+            data=form,  # multipart form fields
+            timeout=60,
+        )
+        # If text/plain was requested, forward as text response
+        if response_format == "text":
+            return Response(content=r.text, media_type="text/plain", headers={"Access-Control-Allow-Origin": "*"})
+        # Otherwise assume JSON or SRT/VTT handled as text but wrapped in JSON for consistency
+        try:
+            r.raise_for_status()
+            ct = r.headers.get("content-type", "")
+            if "application/json" in ct:
+                return JSONResponse(r.json(), headers={"Access-Control-Allow-Origin": "*"})
+            # Non-JSON payloads (srt, vtt) — wrap as {text: "..."}
+            return JSONResponse({"text": r.text}, headers={"Access-Control-Allow-Origin": "*"})
+        except Exception:
+            # bubble up OpenAI's error text
+            return _json_err(r.text or "Transcription failed", r.status_code if r.status_code else 500)
+    except Exception as e:
+        return _json_err(str(e), 500)