Update main.py
Browse files
main.py
CHANGED
|
@@ -4,6 +4,7 @@ import uuid
|
|
| 4 |
from datetime import datetime
|
| 5 |
from typing import Optional, List, Literal
|
| 6 |
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
|
|
|
| 7 |
from pydantic import BaseModel, Field
|
| 8 |
import logging
|
| 9 |
import os
|
|
@@ -13,13 +14,14 @@ logging.basicConfig(level=logging.INFO)
|
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
app = FastAPI(
|
| 16 |
-
title="OpenAI Compatible
|
| 17 |
-
description="OpenAI-compatible API for image generation using Captions backend",
|
| 18 |
version="1.0.0"
|
| 19 |
)
|
| 20 |
|
| 21 |
# Configuration
|
| 22 |
CAPTIONS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/gen-ai/image"
|
|
|
|
| 23 |
BEARER_TOKEN = os.getenv("CAPTIONS_BEARER_TOKEN", "eyJhbGciOiJSUzI1NiIsImtpZCI6IjU3YmZiMmExMWRkZmZjMGFkMmU2ODE0YzY4NzYzYjhjNjg3NTgxZDgiLCJ0eXAiOiJKV1QifQ.eyJnb29nbGUiOnRydWUsImlzcyI6Imh0dHBzOi8vc2VjdXJldG9rZW4uZ29vZ2xlLmNvbS9jYXB0aW9ucy1mNmRlOSIsImF1ZCI6ImNhcHRpb25zLWY2ZGU5IiwiYXV0aF90aW1lIjoxNzU1MzYyODEzLCJ1c2VyX2lkIjoic3hWek5XaUYyempXYmUxTjNjd3UiLCJzdWIiOiJzeFZ6TldpRjJ6aldiZTFOM2N3dSIsImlhdCI6MTc1NTM2MjgxMywiZXhwIjoxNzU1MzY2NDEzLCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7fSwic2lnbl9pbl9wcm92aWRlciI6ImN1c3RvbSJ9fQ.jGuhWp-w8jlGy8xmMjqOyig_LVcr53udFgMjrQTJtKtE_J_iVkvMLncO2TnJ2BquoEp9pwVlZIG-imlFe6Uhtz95-t1oHENf5yzUWu3HocFsNVeAZh9avi_iObSYM_pFOT9lwRNzk1oMa6LbwViuVgTXvHDse9T4_nDfmCBbWngWksh1_JGtnrK2qPb5YD8Hr26itDRMx8mzUr2cQqtU9mU0R910CROqsNaQ9ovemeGe-2RT-hZku4VVYAMDOdvcFsgcf_BJTLRikmc3T7Ekx8T0KM6ZpTgr34wtnl7rpDBNOX0cOSYu3NEUDBnhNJKmPl5qL08gcYEur1ijP2mcTA")
|
| 24 |
|
| 25 |
# Model mappings from OpenAI model names to Captions model IDs
|
|
@@ -42,6 +44,142 @@ MODEL_MAPPINGS = {
|
|
| 42 |
"stable-diffusion": "stable-diffusion-3-5-large"
|
| 43 |
}
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
# Available models information
|
| 46 |
AVAILABLE_MODELS = {
|
| 47 |
"google-imagen-3": {"name": "Imagen 3", "provider": "Google"},
|
|
@@ -65,6 +203,14 @@ class ImageGenerationRequest(BaseModel):
|
|
| 65 |
style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="Style of the generated images")
|
| 66 |
user: Optional[str] = Field(None, description="A unique identifier representing your end-user")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
# OpenAI-compatible response models
|
| 69 |
class ImageData(BaseModel):
|
| 70 |
url: Optional[str] = None
|
|
@@ -86,6 +232,16 @@ class CaptionsSubmitRequest(BaseModel):
|
|
| 86 |
class CaptionsStatusRequest(BaseModel):
|
| 87 |
operationId: str
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# In-memory storage for operation tracking (use Redis in production)
|
| 90 |
operations_store = {}
|
| 91 |
|
|
@@ -104,6 +260,10 @@ def get_aspect_ratio_from_size(size: str) -> int:
|
|
| 104 |
}
|
| 105 |
return size_map.get(size, 1)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
async def submit_image_generation(prompt: str, model: str = "dall-e-3", size: str = "1024x1024") -> str:
|
| 108 |
"""Submit image generation request to Captions API"""
|
| 109 |
headers = {
|
|
@@ -420,6 +580,286 @@ async def get_generation_status(operation_id: str):
|
|
| 420 |
logger.error(f"Error checking generation status: {e}")
|
| 421 |
raise HTTPException(status_code=500, detail="Failed to check generation status")
|
| 422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
@app.get("/health")
|
| 424 |
async def health_check():
|
| 425 |
"""Health check endpoint"""
|
|
@@ -429,21 +869,30 @@ async def health_check():
|
|
| 429 |
async def root():
|
| 430 |
"""Root endpoint with API information"""
|
| 431 |
return {
|
| 432 |
-
"message": "OpenAI Compatible Image Generation API",
|
| 433 |
"version": "1.0.0",
|
| 434 |
"supported_models": list(AVAILABLE_MODELS.keys()),
|
| 435 |
"openai_aliases": list(MODEL_MAPPINGS.keys()),
|
|
|
|
|
|
|
| 436 |
"endpoints": {
|
| 437 |
"models": "/v1/models",
|
|
|
|
| 438 |
"image_generation": "/v1/images/generations",
|
| 439 |
"async_generation": "/v1/images/generations/async",
|
| 440 |
"status_check": "/v1/images/generations/status/{operation_id}",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
"health": "/health",
|
| 442 |
"docs": "/docs"
|
| 443 |
},
|
| 444 |
"example_curl": {
|
| 445 |
"generate_image": "curl -X POST 'http://localhost:8000/v1/images/generations' -H 'Content-Type: application/json' -d '{\"prompt\": \"a cat\", \"model\": \"dall-e-3\", \"size\": \"1024x1024\"}'",
|
| 446 |
-
"list_models": "curl -X GET 'http://localhost:8000/v1/models'"
|
|
|
|
|
|
|
| 447 |
}
|
| 448 |
}
|
| 449 |
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
from typing import Optional, List, Literal
|
| 6 |
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
| 7 |
+
from fastapi.responses import StreamingResponse
|
| 8 |
from pydantic import BaseModel, Field
|
| 9 |
import logging
|
| 10 |
import os
|
|
|
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
app = FastAPI(
|
| 17 |
+
title="OpenAI Compatible API - Images & TTS",
|
| 18 |
+
description="OpenAI-compatible API for image generation and text-to-speech using Captions backend",
|
| 19 |
version="1.0.0"
|
| 20 |
)
|
| 21 |
|
| 22 |
# Configuration
|
| 23 |
CAPTIONS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/gen-ai/image"
|
| 24 |
+
CAPTIONS_TTS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/voiceover/tts"
|
| 25 |
BEARER_TOKEN = os.getenv("CAPTIONS_BEARER_TOKEN", "eyJhbGciOiJSUzI1NiIsImtpZCI6IjU3YmZiMmExMWRkZmZjMGFkMmU2ODE0YzY4NzYzYjhjNjg3NTgxZDgiLCJ0eXAiOiJKV1QifQ.eyJnb29nbGUiOnRydWUsImlzcyI6Imh0dHBzOi8vc2VjdXJldG9rZW4uZ29vZ2xlLmNvbS9jYXB0aW9ucy1mNmRlOSIsImF1ZCI6ImNhcHRpb25zLWY2ZGU5IiwiYXV0aF90aW1lIjoxNzU1MzYyODEzLCJ1c2VyX2lkIjoic3hWek5XaUYyempXYmUxTjNjd3UiLCJzdWIiOiJzeFZ6TldpRjJ6aldiZTFOM2N3dSIsImlhdCI6MTc1NTM2MjgxMywiZXhwIjoxNzU1MzY2NDEzLCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7fSwic2lnbl9pbl9wcm92aWRlciI6ImN1c3RvbSJ9fQ.jGuhWp-w8jlGy8xmMjqOyig_LVcr53udFgMjrQTJtKtE_J_iVkvMLncO2TnJ2BquoEp9pwVlZIG-imlFe6Uhtz95-t1oHENf5yzUWu3HocFsNVeAZh9avi_iObSYM_pFOT9lwRNzk1oMa6LbwViuVgTXvHDse9T4_nDfmCBbWngWksh1_JGtnrK2qPb5YD8Hr26itDRMx8mzUr2cQqtU9mU0R910CROqsNaQ9ovemeGe-2RT-hZku4VVYAMDOdvcFsgcf_BJTLRikmc3T7Ekx8T0KM6ZpTgr34wtnl7rpDBNOX0cOSYu3NEUDBnhNJKmPl5qL08gcYEur1ijP2mcTA")
|
| 26 |
|
| 27 |
# Model mappings from OpenAI model names to Captions model IDs
|
|
|
|
| 44 |
"stable-diffusion": "stable-diffusion-3-5-large"
|
| 45 |
}
|
| 46 |
|
| 47 |
+
# TTS Voice mappings from OpenAI voice names to Captions voice IDs
|
| 48 |
+
VOICE_MAPPINGS = {
|
| 49 |
+
"alloy": "0s0tckZNA4EDjsNWIGpn", # Brandon (OpenAI)
|
| 50 |
+
"echo": "VfJEoIjcuedwbnVocfwS", # John (OpenAI)
|
| 51 |
+
"fable": "aIJGQIEdPBlV4bWoLgiC", # Jordan (OpenAI)
|
| 52 |
+
"onyx": "NkxXZNRZuGVagP3gLTlk", # James (OpenAI)
|
| 53 |
+
"nova": "dEcutGbESImg8uIOJOb3", # Julie (OpenAI)
|
| 54 |
+
"shimmer": "OsLeLksKZUcYFR6Rj3AV", # Lea (OpenAI)
|
| 55 |
+
# Additional popular voices
|
| 56 |
+
"brandon": "0s0tckZNA4EDjsNWIGpn",
|
| 57 |
+
"nicole": "2OMmjuvizlUUkgCLYrEU",
|
| 58 |
+
"jamal": "4VCohb9n7kc8qQAMbC9T",
|
| 59 |
+
"xavier": "6LVJ04FKnALQY4vuI3xi",
|
| 60 |
+
"emma": "7pjl1PlCtijY5E7k9nex",
|
| 61 |
+
"alexandra": "8OwpkBz4OXvyOgg6uSVM",
|
| 62 |
+
"josh": "9H5PLh8sHyc4NiQba2sO",
|
| 63 |
+
"vincent": "A6YwaBVPdqMuPU5guI31",
|
| 64 |
+
"bella": "DVkGI1gOEQwhI9D98kgV",
|
| 65 |
+
"sophia": "Dw4Y69nCUd0lijzanffn",
|
| 66 |
+
"ethan": "FNrD9UXPRmnlfELyZfOH",
|
| 67 |
+
"greg": "GFvARbVuizGj4jkdG1iN",
|
| 68 |
+
"isabella": "GNliQ6gOp8Y96hz0uPSY",
|
| 69 |
+
"mason": "Jc5LFEs9ONmW3vilHdpg",
|
| 70 |
+
"justin": "LWoskltOczE5nVUCPFCl",
|
| 71 |
+
"bradford": "Lvu57Tdi6WU0LrCkf3W0",
|
| 72 |
+
"ally": "NJSANg1RFfytiL3apSc0",
|
| 73 |
+
"maddy": "NX9RZUSep3h9RzDoipkJ",
|
| 74 |
+
"george": "NmypOAkKcWovPSbjMJPk",
|
| 75 |
+
"brian": "Pt04qYLGmK9HateRrrdh",
|
| 76 |
+
"taylor": "QQ0vIwK2AgVtbHZk3wYq",
|
| 77 |
+
"samara": "QyFFVFY5hzA5T7sVv9JI",
|
| 78 |
+
"linda": "RzrSQgnXwblMgDyOeOuy",
|
| 79 |
+
"liam": "SveSw38zJT860NRIeiVk",
|
| 80 |
+
"hope": "UfOKaDAlzOMjZnyEhPH1",
|
| 81 |
+
"william": "VesROIDY8lJS6zz8xTRb",
|
| 82 |
+
"dwight": "W76fVeloaQcuN71bIQF6",
|
| 83 |
+
"lisa": "ZbuIjlIzHpIc8oO17kWW",
|
| 84 |
+
"arial": "aCWKe1NzicFCAkohj7TY",
|
| 85 |
+
"elliot": "arGkfQC5Z0yNlNrYLlE8",
|
| 86 |
+
"rhea": "blo9kiIBaFNr0UCI2gpA",
|
| 87 |
+
"leo": "bqvJyFf80waIYPYiv6zX",
|
| 88 |
+
"eve": "cQ0q3hcj9Bm4IccGDY9C",
|
| 89 |
+
"serena": "e3zFWWHHfNk6vOh5kbBX",
|
| 90 |
+
"domi": "eSojoW8lMv5whHRCJugk",
|
| 91 |
+
"alex": "eXjri1H442qcs35pWaTr",
|
| 92 |
+
"blondie": "fHmK4z2cR0VXxvQmd7ei",
|
| 93 |
+
"nathan": "gO0Do5f1lCvLoIvbl6dx",
|
| 94 |
+
"daniel": "grqhFog58KWjgcO6t4ya",
|
| 95 |
+
"tara": "iBsjG6Kk8tmO0ldX7Aho",
|
| 96 |
+
"maya": "iWBJcyi2qdFpXYRGt42f",
|
| 97 |
+
"ashley": "j51tO8Upz9wEVIUkynCJ",
|
| 98 |
+
"matthew": "lJQLBnDNpkkc4RIgqhIZ",
|
| 99 |
+
"andrew": "lQS5Hszd1P0W2m18M4ME",
|
| 100 |
+
"olivia": "ltYBSrCwVJp0I99DmLfq",
|
| 101 |
+
"adam": "m1t6JeyI9DXRhnCg8kuX",
|
| 102 |
+
"mark": "okc8JAt7Vb3u20k4soKB",
|
| 103 |
+
"micah": "r0ZdS6QBWDxmcRN7HxWq",
|
| 104 |
+
"elli": "r4gww888sYU82aKZSUHy",
|
| 105 |
+
"sylvia": "rJmVxgRa6YI9bALBqvtC",
|
| 106 |
+
"noah": "rgqCbvqWKIaxYs54d7xS",
|
| 107 |
+
"kayla": "s1YBw3dmanbLNCq7MXI8",
|
| 108 |
+
"carla": "sUXCiUMyEVHBC7sRlPZY",
|
| 109 |
+
"owen": "tijk10imWq7nGRawDD62",
|
| 110 |
+
"lila": "wjOnivHr3V1ZGNuCMZJI",
|
| 111 |
+
"sam": "xpkvvHUyS37s3f84MObW",
|
| 112 |
+
"antoni": "y5nGwtfzvQ2OhrBXZnj5",
|
| 113 |
+
"ava": "zYqKDc8tFTIsAhJFpTaC"
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
# Available voices information
|
| 117 |
+
AVAILABLE_VOICES = {
|
| 118 |
+
"0s0tckZNA4EDjsNWIGpn": {"name": "Brandon", "gender": "male", "accent": "american", "provider": "OpenAI"},
|
| 119 |
+
"2OMmjuvizlUUkgCLYrEU": {"name": "Nicole", "gender": "female", "accent": "australian", "provider": "Cartesia"},
|
| 120 |
+
"4VCohb9n7kc8qQAMbC9T": {"name": "Jamal", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 121 |
+
"6LVJ04FKnALQY4vuI3xi": {"name": "Xavier", "gender": "male", "accent": "american", "provider": "PlayHT"},
|
| 122 |
+
"7pjl1PlCtijY5E7k9nex": {"name": "Emma", "gender": "female", "accent": "american", "provider": "Google"},
|
| 123 |
+
"8OwpkBz4OXvyOgg6uSVM": {"name": "Alexandra", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 124 |
+
"9H5PLh8sHyc4NiQba2sO": {"name": "Josh", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 125 |
+
"A6YwaBVPdqMuPU5guI31": {"name": "Vincent", "gender": "male", "accent": "american", "provider": "PlayHT"},
|
| 126 |
+
"DVkGI1gOEQwhI9D98kgV": {"name": "Bella", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 127 |
+
"Dw4Y69nCUd0lijzanffn": {"name": "Sophia", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 128 |
+
"FNrD9UXPRmnlfELyZfOH": {"name": "Ethan", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 129 |
+
"GFvARbVuizGj4jkdG1iN": {"name": "Greg", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 130 |
+
"GNliQ6gOp8Y96hz0uPSY": {"name": "Isabella", "gender": "female", "accent": "american", "provider": "Google"},
|
| 131 |
+
"Jc5LFEs9ONmW3vilHdpg": {"name": "Mason", "gender": "male", "accent": "american", "provider": "Google"},
|
| 132 |
+
"LWoskltOczE5nVUCPFCl": {"name": "Justin", "gender": "male", "accent": "american", "provider": "Cartesia"},
|
| 133 |
+
"Lvu57Tdi6WU0LrCkf3W0": {"name": "Bradford", "gender": "male", "accent": "british", "provider": "ElevenLabs"},
|
| 134 |
+
"NJSANg1RFfytiL3apSc0": {"name": "Ally", "gender": "female", "accent": "american", "provider": "PlayHT"},
|
| 135 |
+
"NX9RZUSep3h9RzDoipkJ": {"name": "Maddy", "gender": "female", "accent": "american", "provider": "PlayHT"},
|
| 136 |
+
"NkxXZNRZuGVagP3gLTlk": {"name": "James", "gender": "male", "accent": "british", "provider": "OpenAI"},
|
| 137 |
+
"NmypOAkKcWovPSbjMJPk": {"name": "George", "gender": "male", "accent": "british", "provider": "Cartesia"},
|
| 138 |
+
"OsLeLksKZUcYFR6Rj3AV": {"name": "Lea", "gender": "female", "accent": "american", "provider": "OpenAI"},
|
| 139 |
+
"Pt04qYLGmK9HateRrrdh": {"name": "Brian", "gender": "male", "accent": "american", "provider": "Cartesia"},
|
| 140 |
+
"QQ0vIwK2AgVtbHZk3wYq": {"name": "Taylor", "gender": "female", "accent": "british", "provider": "ElevenLabs"},
|
| 141 |
+
"QyFFVFY5hzA5T7sVv9JI": {"name": "Samara", "gender": "female", "accent": "british", "provider": "ElevenLabs"},
|
| 142 |
+
"RzrSQgnXwblMgDyOeOuy": {"name": "Linda", "gender": "female", "accent": "british", "provider": "PlayHT"},
|
| 143 |
+
"SveSw38zJT860NRIeiVk": {"name": "Liam", "gender": "male", "accent": "american", "provider": "Google"},
|
| 144 |
+
"UfOKaDAlzOMjZnyEhPH1": {"name": "Hope", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 145 |
+
"VesROIDY8lJS6zz8xTRb": {"name": "William", "gender": "male", "accent": "american", "provider": "Google"},
|
| 146 |
+
"VfJEoIjcuedwbnVocfwS": {"name": "John", "gender": "male", "accent": "american", "provider": "OpenAI"},
|
| 147 |
+
"W76fVeloaQcuN71bIQF6": {"name": "Dwight", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 148 |
+
"ZbuIjlIzHpIc8oO17kWW": {"name": "Lisa", "gender": "female", "accent": "american", "provider": "PlayHT"},
|
| 149 |
+
"aCWKe1NzicFCAkohj7TY": {"name": "Arial", "gender": "female", "accent": "american", "provider": "Cartesia"},
|
| 150 |
+
"aIJGQIEdPBlV4bWoLgiC": {"name": "Jordan", "gender": "male", "accent": "american", "provider": "OpenAI"},
|
| 151 |
+
"arGkfQC5Z0yNlNrYLlE8": {"name": "Elliot", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 152 |
+
"blo9kiIBaFNr0UCI2gpA": {"name": "Rhea", "gender": "female", "accent": "australian", "provider": "PlayHT"},
|
| 153 |
+
"bqvJyFf80waIYPYiv6zX": {"name": "Leo", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 154 |
+
"cQ0q3hcj9Bm4IccGDY9C": {"name": "Eve", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 155 |
+
"dEcutGbESImg8uIOJOb3": {"name": "Julie", "gender": "female", "accent": "american", "provider": "OpenAI"},
|
| 156 |
+
"e3zFWWHHfNk6vOh5kbBX": {"name": "Serena", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 157 |
+
"eSojoW8lMv5whHRCJugk": {"name": "Domi", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 158 |
+
"eXjri1H442qcs35pWaTr": {"name": "Alex", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 159 |
+
"fHmK4z2cR0VXxvQmd7ei": {"name": "Blondie", "gender": "female", "accent": "british", "provider": "ElevenLabs"},
|
| 160 |
+
"gO0Do5f1lCvLoIvbl6dx": {"name": "Nathan", "gender": "male", "accent": "british", "provider": "PlayHT"},
|
| 161 |
+
"grqhFog58KWjgcO6t4ya": {"name": "Daniel", "gender": "male", "accent": "american", "provider": "PlayHT"},
|
| 162 |
+
"iBsjG6Kk8tmO0ldX7Aho": {"name": "Tara", "gender": "female", "accent": "american", "provider": "Cartesia"},
|
| 163 |
+
"iWBJcyi2qdFpXYRGt42f": {"name": "Maya", "gender": "female", "accent": "american", "provider": "Cartesia"},
|
| 164 |
+
"j51tO8Upz9wEVIUkynCJ": {"name": "Ashley", "gender": "female", "accent": "american", "provider": "OpenAI"},
|
| 165 |
+
"lJQLBnDNpkkc4RIgqhIZ": {"name": "Matthew", "gender": "male", "accent": "australian", "provider": "Cartesia"},
|
| 166 |
+
"lQS5Hszd1P0W2m18M4ME": {"name": "Andrew", "gender": "male", "accent": "american", "provider": "Cartesia"},
|
| 167 |
+
"ltYBSrCwVJp0I99DmLfq": {"name": "Olivia", "gender": "female", "accent": "american", "provider": "Google"},
|
| 168 |
+
"m1t6JeyI9DXRhnCg8kuX": {"name": "Adam", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 169 |
+
"okc8JAt7Vb3u20k4soKB": {"name": "Mark", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 170 |
+
"r0ZdS6QBWDxmcRN7HxWq": {"name": "Micah", "gender": "male", "accent": "british", "provider": "ElevenLabs"},
|
| 171 |
+
"r4gww888sYU82aKZSUHy": {"name": "Elli", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 172 |
+
"rJmVxgRa6YI9bALBqvtC": {"name": "Sylvia", "gender": "female", "accent": "american", "provider": "OpenAI"},
|
| 173 |
+
"rgqCbvqWKIaxYs54d7xS": {"name": "Noah", "gender": "male", "accent": "australian", "provider": "ElevenLabs"},
|
| 174 |
+
"s1YBw3dmanbLNCq7MXI8": {"name": "Kayla", "gender": "female", "accent": "american", "provider": "OpenAI"},
|
| 175 |
+
"sUXCiUMyEVHBC7sRlPZY": {"name": "Carla", "gender": "female", "accent": "american", "provider": "Cartesia"},
|
| 176 |
+
"tijk10imWq7nGRawDD62": {"name": "Owen", "gender": "male", "accent": "american", "provider": "Google"},
|
| 177 |
+
"wjOnivHr3V1ZGNuCMZJI": {"name": "Lila", "gender": "female", "accent": "american", "provider": "ElevenLabs"},
|
| 178 |
+
"xpkvvHUyS37s3f84MObW": {"name": "Sam", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 179 |
+
"y5nGwtfzvQ2OhrBXZnj5": {"name": "Antoni", "gender": "male", "accent": "american", "provider": "ElevenLabs"},
|
| 180 |
+
"zYqKDc8tFTIsAhJFpTaC": {"name": "Ava", "gender": "female", "accent": "american", "provider": "Google"}
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
# Available models information
|
| 184 |
AVAILABLE_MODELS = {
|
| 185 |
"google-imagen-3": {"name": "Imagen 3", "provider": "Google"},
|
|
|
|
| 203 |
style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="Style of the generated images")
|
| 204 |
user: Optional[str] = Field(None, description="A unique identifier representing your end-user")
|
| 205 |
|
| 206 |
+
# TTS request models
|
| 207 |
+
class TTSRequest(BaseModel):
|
| 208 |
+
model: str = Field("tts-1", description="The TTS model to use")
|
| 209 |
+
input: str = Field(..., description="The text to generate audio for")
|
| 210 |
+
voice: str = Field("alloy", description="The voice to use for generation")
|
| 211 |
+
response_format: Optional[Literal["mp3", "opus", "aac", "flac"]] = Field("mp3", description="The format to audio in")
|
| 212 |
+
speed: Optional[float] = Field(1.0, ge=0.25, le=4.0, description="The speed of the generated audio")
|
| 213 |
+
|
| 214 |
# OpenAI-compatible response models
|
| 215 |
class ImageData(BaseModel):
|
| 216 |
url: Optional[str] = None
|
|
|
|
| 232 |
class CaptionsStatusRequest(BaseModel):
|
| 233 |
operationId: str
|
| 234 |
|
| 235 |
+
# TTS models for Captions API
|
| 236 |
+
class CaptionsTTSSubmitRequest(BaseModel):
|
| 237 |
+
text: str
|
| 238 |
+
voiceId: str = "4VCohb9n7kc8qQAMbC9T" # Default to Jamal
|
| 239 |
+
modelId: str = "QHwZJt6xARgiV04YqEFY" # Default TTS model
|
| 240 |
+
optimisticProjectId: str
|
| 241 |
+
|
| 242 |
+
class CaptionsTTSStatusRequest(BaseModel):
|
| 243 |
+
operationId: str
|
| 244 |
+
|
| 245 |
# In-memory storage for operation tracking (use Redis in production)
|
| 246 |
operations_store = {}
|
| 247 |
|
|
|
|
| 260 |
}
|
| 261 |
return size_map.get(size, 1)
|
| 262 |
|
| 263 |
+
def get_captions_voice_id(openai_voice: str) -> str:
|
| 264 |
+
"""Convert OpenAI voice name to Captions voice ID"""
|
| 265 |
+
return VOICE_MAPPINGS.get(openai_voice.lower(), "0s0tckZNA4EDjsNWIGpn") # Default to Brandon
|
| 266 |
+
|
| 267 |
async def submit_image_generation(prompt: str, model: str = "dall-e-3", size: str = "1024x1024") -> str:
|
| 268 |
"""Submit image generation request to Captions API"""
|
| 269 |
headers = {
|
|
|
|
| 580 |
logger.error(f"Error checking generation status: {e}")
|
| 581 |
raise HTTPException(status_code=500, detail="Failed to check generation status")
|
| 582 |
|
| 583 |
+
# TTS Endpoints
|
| 584 |
+
@app.post("/v1/audio/speech")
|
| 585 |
+
async def create_speech(request: TTSRequest):
|
| 586 |
+
"""
|
| 587 |
+
Generate speech from text using OpenAI-compatible API
|
| 588 |
+
"""
|
| 589 |
+
try:
|
| 590 |
+
# Convert OpenAI voice to Captions voice ID
|
| 591 |
+
voice_id = get_captions_voice_id(request.voice)
|
| 592 |
+
|
| 593 |
+
# Prepare the request for Captions API
|
| 594 |
+
captions_request = CaptionsTTSSubmitRequest(
|
| 595 |
+
text=request.input,
|
| 596 |
+
voiceId=voice_id,
|
| 597 |
+
modelId="QHwZJt6xARgiV04YqEFY", # Default TTS model
|
| 598 |
+
optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}"
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
# Submit TTS generation request
|
| 602 |
+
async with httpx.AsyncClient() as client:
|
| 603 |
+
response = await client.post(
|
| 604 |
+
f"{CAPTIONS_TTS_BASE_URL}/generate/submit",
|
| 605 |
+
json=captions_request.dict(),
|
| 606 |
+
headers={
|
| 607 |
+
"Authorization": f"Bearer {BEARER_TOKEN}",
|
| 608 |
+
"Content-Type": "application/json",
|
| 609 |
+
"x-app-version": "1.0.0",
|
| 610 |
+
"x-device-id": "api-client"
|
| 611 |
+
},
|
| 612 |
+
timeout=30.0
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
if response.status_code != 200:
|
| 616 |
+
logger.error(f"TTS submit failed: {response.text}")
|
| 617 |
+
raise HTTPException(status_code=response.status_code, detail="TTS generation failed")
|
| 618 |
+
|
| 619 |
+
result = response.json()
|
| 620 |
+
operation_id = result["data"]["operationId"]
|
| 621 |
+
|
| 622 |
+
# Store operation details
|
| 623 |
+
operations_store[operation_id] = {
|
| 624 |
+
"type": "tts",
|
| 625 |
+
"voice_id": voice_id,
|
| 626 |
+
"text": request.input,
|
| 627 |
+
"format": request.response_format,
|
| 628 |
+
"created_at": datetime.now()
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
# Poll for completion
|
| 632 |
+
max_retries = 60 # 60 seconds max wait
|
| 633 |
+
retry_count = 0
|
| 634 |
+
|
| 635 |
+
while retry_count < max_retries:
|
| 636 |
+
status_response = await client.post(
|
| 637 |
+
f"{CAPTIONS_TTS_BASE_URL}/generate/status",
|
| 638 |
+
json={"operationId": operation_id},
|
| 639 |
+
headers={
|
| 640 |
+
"Authorization": f"Bearer {BEARER_TOKEN}",
|
| 641 |
+
"Content-Type": "application/json",
|
| 642 |
+
"x-app-version": "1.0.0",
|
| 643 |
+
"x-device-id": "api-client"
|
| 644 |
+
},
|
| 645 |
+
timeout=30.0
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
if status_response.status_code != 200:
|
| 649 |
+
await asyncio.sleep(1)
|
| 650 |
+
retry_count += 1
|
| 651 |
+
continue
|
| 652 |
+
|
| 653 |
+
status_result = status_response.json()
|
| 654 |
+
state = status_result["data"]["state"]
|
| 655 |
+
|
| 656 |
+
if state == "COMPLETE":
|
| 657 |
+
audio_url = status_result["data"]["url"]
|
| 658 |
+
|
| 659 |
+
# Fetch the audio file
|
| 660 |
+
audio_response = await client.get(audio_url)
|
| 661 |
+
if audio_response.status_code == 200:
|
| 662 |
+
# Return audio file directly
|
| 663 |
+
return StreamingResponse(
|
| 664 |
+
iter([audio_response.content]),
|
| 665 |
+
media_type="audio/mpeg",
|
| 666 |
+
headers={
|
| 667 |
+
"Content-Disposition": f"attachment; filename=speech.{request.response_format}"
|
| 668 |
+
}
|
| 669 |
+
)
|
| 670 |
+
else:
|
| 671 |
+
raise HTTPException(status_code=500, detail="Failed to fetch generated audio")
|
| 672 |
+
|
| 673 |
+
elif state == "FAILED":
|
| 674 |
+
raise HTTPException(status_code=500, detail="TTS generation failed")
|
| 675 |
+
|
| 676 |
+
# Still processing, wait and retry
|
| 677 |
+
await asyncio.sleep(1)
|
| 678 |
+
retry_count += 1
|
| 679 |
+
|
| 680 |
+
# Timeout
|
| 681 |
+
raise HTTPException(status_code=408, detail="TTS generation timed out")
|
| 682 |
+
|
| 683 |
+
except HTTPException:
|
| 684 |
+
raise
|
| 685 |
+
except Exception as e:
|
| 686 |
+
logger.error(f"Error in TTS generation: {e}")
|
| 687 |
+
raise HTTPException(status_code=500, detail="Internal server error")
|
| 688 |
+
|
| 689 |
+
@app.post("/v1/audio/speech/async")
|
| 690 |
+
async def create_speech_async(request: TTSRequest, background_tasks: BackgroundTasks):
|
| 691 |
+
"""
|
| 692 |
+
Start async TTS generation and return operation ID
|
| 693 |
+
"""
|
| 694 |
+
try:
|
| 695 |
+
# Convert OpenAI voice to Captions voice ID
|
| 696 |
+
voice_id = get_captions_voice_id(request.voice)
|
| 697 |
+
|
| 698 |
+
# Prepare the request for Captions API
|
| 699 |
+
captions_request = CaptionsTTSSubmitRequest(
|
| 700 |
+
text=request.input,
|
| 701 |
+
voiceId=voice_id,
|
| 702 |
+
modelId="QHwZJt6xARgiV04YqEFY", # Default TTS model
|
| 703 |
+
optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}"
|
| 704 |
+
)
|
| 705 |
+
|
| 706 |
+
# Submit TTS generation request
|
| 707 |
+
async with httpx.AsyncClient() as client:
|
| 708 |
+
response = await client.post(
|
| 709 |
+
f"{CAPTIONS_TTS_BASE_URL}/generate/submit",
|
| 710 |
+
json=captions_request.dict(),
|
| 711 |
+
headers={
|
| 712 |
+
"Authorization": f"Bearer {BEARER_TOKEN}",
|
| 713 |
+
"Content-Type": "application/json",
|
| 714 |
+
"x-app-version": "1.0.0",
|
| 715 |
+
"x-device-id": "api-client"
|
| 716 |
+
},
|
| 717 |
+
timeout=30.0
|
| 718 |
+
)
|
| 719 |
+
|
| 720 |
+
if response.status_code != 200:
|
| 721 |
+
logger.error(f"TTS submit failed: {response.text}")
|
| 722 |
+
raise HTTPException(status_code=response.status_code, detail="TTS generation failed")
|
| 723 |
+
|
| 724 |
+
result = response.json()
|
| 725 |
+
operation_id = result["data"]["operationId"]
|
| 726 |
+
|
| 727 |
+
# Store operation details
|
| 728 |
+
operations_store[operation_id] = {
|
| 729 |
+
"type": "tts",
|
| 730 |
+
"voice_id": voice_id,
|
| 731 |
+
"text": request.input,
|
| 732 |
+
"format": request.response_format,
|
| 733 |
+
"created_at": datetime.now(),
|
| 734 |
+
"status": "processing"
|
| 735 |
+
}
|
| 736 |
+
|
| 737 |
+
return {"operation_id": operation_id, "status": "processing"}
|
| 738 |
+
|
| 739 |
+
except HTTPException:
|
| 740 |
+
raise
|
| 741 |
+
except Exception as e:
|
| 742 |
+
logger.error(f"Error in async TTS generation: {e}")
|
| 743 |
+
raise HTTPException(status_code=500, detail="Internal server error")
|
| 744 |
+
|
| 745 |
+
@app.get("/v1/audio/speech/status/{operation_id}")
|
| 746 |
+
async def get_tts_status(operation_id: str):
|
| 747 |
+
"""
|
| 748 |
+
Check the status of a TTS generation operation
|
| 749 |
+
"""
|
| 750 |
+
if operation_id not in operations_store:
|
| 751 |
+
raise HTTPException(status_code=404, detail="Operation not found")
|
| 752 |
+
|
| 753 |
+
operation = operations_store[operation_id]
|
| 754 |
+
if operation["type"] != "tts":
|
| 755 |
+
raise HTTPException(status_code=400, detail="Invalid operation type")
|
| 756 |
+
|
| 757 |
+
try:
|
| 758 |
+
async with httpx.AsyncClient() as client:
|
| 759 |
+
response = await client.post(
|
| 760 |
+
f"{CAPTIONS_TTS_BASE_URL}/generate/status",
|
| 761 |
+
json={"operationId": operation_id},
|
| 762 |
+
headers={
|
| 763 |
+
"Authorization": f"Bearer {BEARER_TOKEN}",
|
| 764 |
+
"Content-Type": "application/json",
|
| 765 |
+
"x-app-version": "1.0.0",
|
| 766 |
+
"x-device-id": "api-client"
|
| 767 |
+
},
|
| 768 |
+
timeout=30.0
|
| 769 |
+
)
|
| 770 |
+
|
| 771 |
+
if response.status_code != 200:
|
| 772 |
+
return {"status": "error", "error": "Failed to check status"}
|
| 773 |
+
|
| 774 |
+
result = response.json()
|
| 775 |
+
state = result["data"]["state"]
|
| 776 |
+
|
| 777 |
+
if state == "COMPLETE":
|
| 778 |
+
audio_url = result["data"]["url"]
|
| 779 |
+
operations_store[operation_id]["status"] = "completed"
|
| 780 |
+
operations_store[operation_id]["url"] = audio_url
|
| 781 |
+
return {
|
| 782 |
+
"status": "completed",
|
| 783 |
+
"url": audio_url,
|
| 784 |
+
"operation_id": operation_id
|
| 785 |
+
}
|
| 786 |
+
elif state == "FAILED":
|
| 787 |
+
operations_store[operation_id]["status"] = "failed"
|
| 788 |
+
return {"status": "failed", "operation_id": operation_id}
|
| 789 |
+
else:
|
| 790 |
+
operations_store[operation_id]["status"] = "processing"
|
| 791 |
+
return {"status": "processing", "operation_id": operation_id}
|
| 792 |
+
|
| 793 |
+
except Exception as e:
|
| 794 |
+
logger.error(f"Error checking TTS status: {e}")
|
| 795 |
+
raise HTTPException(status_code=500, detail="Failed to check TTS status")
|
| 796 |
+
|
| 797 |
+
@app.get("/v1/audio/speech/download/{operation_id}")
|
| 798 |
+
async def download_tts_audio(operation_id: str):
|
| 799 |
+
"""
|
| 800 |
+
Download the generated audio file
|
| 801 |
+
"""
|
| 802 |
+
if operation_id not in operations_store:
|
| 803 |
+
raise HTTPException(status_code=404, detail="Operation not found")
|
| 804 |
+
|
| 805 |
+
operation = operations_store[operation_id]
|
| 806 |
+
if operation["type"] != "tts":
|
| 807 |
+
raise HTTPException(status_code=400, detail="Invalid operation type")
|
| 808 |
+
|
| 809 |
+
if operation.get("status") != "completed":
|
| 810 |
+
raise HTTPException(status_code=400, detail="Audio not ready yet")
|
| 811 |
+
|
| 812 |
+
audio_url = operation.get("url")
|
| 813 |
+
if not audio_url:
|
| 814 |
+
raise HTTPException(status_code=404, detail="Audio URL not found")
|
| 815 |
+
|
| 816 |
+
try:
|
| 817 |
+
async with httpx.AsyncClient() as client:
|
| 818 |
+
audio_response = await client.get(audio_url)
|
| 819 |
+
if audio_response.status_code == 200:
|
| 820 |
+
format_type = operation.get("format", "mp3")
|
| 821 |
+
return StreamingResponse(
|
| 822 |
+
iter([audio_response.content]),
|
| 823 |
+
media_type="audio/mpeg",
|
| 824 |
+
headers={
|
| 825 |
+
"Content-Disposition": f"attachment; filename=speech.{format_type}"
|
| 826 |
+
}
|
| 827 |
+
)
|
| 828 |
+
else:
|
| 829 |
+
raise HTTPException(status_code=500, detail="Failed to fetch generated audio")
|
| 830 |
+
|
| 831 |
+
except Exception as e:
|
| 832 |
+
logger.error(f"Error downloading TTS audio: {e}")
|
| 833 |
+
raise HTTPException(status_code=500, detail="Failed to download audio")
|
| 834 |
+
|
| 835 |
+
@app.get("/v1/voices")
|
| 836 |
+
async def list_voices():
|
| 837 |
+
"""
|
| 838 |
+
List available TTS voices
|
| 839 |
+
"""
|
| 840 |
+
voices = []
|
| 841 |
+
for voice_id, voice_info in AVAILABLE_VOICES.items():
|
| 842 |
+
# Find OpenAI compatible name
|
| 843 |
+
openai_name = None
|
| 844 |
+
for name, mapped_id in VOICE_MAPPINGS.items():
|
| 845 |
+
if mapped_id == voice_id:
|
| 846 |
+
openai_name = name
|
| 847 |
+
break
|
| 848 |
+
|
| 849 |
+
voices.append({
|
| 850 |
+
"id": voice_id,
|
| 851 |
+
"name": voice_info["name"],
|
| 852 |
+
"openai_name": openai_name,
|
| 853 |
+
"gender": voice_info["gender"],
|
| 854 |
+
"accent": voice_info["accent"],
|
| 855 |
+
"provider": voice_info["provider"]
|
| 856 |
+
})
|
| 857 |
+
|
| 858 |
+
return {
|
| 859 |
+
"voices": voices,
|
| 860 |
+
"openai_compatible": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
|
| 861 |
+
}
|
| 862 |
+
|
| 863 |
@app.get("/health")
|
| 864 |
async def health_check():
|
| 865 |
"""Health check endpoint"""
|
|
|
|
| 869 |
async def root():
|
| 870 |
"""Root endpoint with API information"""
|
| 871 |
return {
|
| 872 |
+
"message": "OpenAI Compatible Image Generation & TTS API",
|
| 873 |
"version": "1.0.0",
|
| 874 |
"supported_models": list(AVAILABLE_MODELS.keys()),
|
| 875 |
"openai_aliases": list(MODEL_MAPPINGS.keys()),
|
| 876 |
+
"supported_voices": len(AVAILABLE_VOICES),
|
| 877 |
+
"openai_voice_aliases": list(set([k for k in VOICE_MAPPINGS.keys() if k in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]])),
|
| 878 |
"endpoints": {
|
| 879 |
"models": "/v1/models",
|
| 880 |
+
"voices": "/v1/voices",
|
| 881 |
"image_generation": "/v1/images/generations",
|
| 882 |
"async_generation": "/v1/images/generations/async",
|
| 883 |
"status_check": "/v1/images/generations/status/{operation_id}",
|
| 884 |
+
"tts": "/v1/audio/speech",
|
| 885 |
+
"tts_async": "/v1/audio/speech/async",
|
| 886 |
+
"tts_status": "/v1/audio/speech/status/{operation_id}",
|
| 887 |
+
"tts_download": "/v1/audio/speech/download/{operation_id}",
|
| 888 |
"health": "/health",
|
| 889 |
"docs": "/docs"
|
| 890 |
},
|
| 891 |
"example_curl": {
|
| 892 |
"generate_image": "curl -X POST 'http://localhost:8000/v1/images/generations' -H 'Content-Type: application/json' -d '{\"prompt\": \"a cat\", \"model\": \"dall-e-3\", \"size\": \"1024x1024\"}'",
|
| 893 |
+
"list_models": "curl -X GET 'http://localhost:8000/v1/models'",
|
| 894 |
+
"generate_speech": "curl -X POST 'http://localhost:8000/v1/audio/speech' -H 'Content-Type: application/json' -d '{\"model\": \"tts-1\", \"input\": \"Hello world\", \"voice\": \"alloy\"}' --output speech.mp3",
|
| 895 |
+
"list_voices": "curl -X GET 'http://localhost:8000/v1/voices'"
|
| 896 |
}
|
| 897 |
}
|
| 898 |
|