|
|
import asyncio |
|
|
import httpx |
|
|
import uuid |
|
|
from datetime import datetime |
|
|
from typing import Optional, List, Literal |
|
|
from fastapi import FastAPI, HTTPException, BackgroundTasks |
|
|
from fastapi.responses import StreamingResponse |
|
|
from pydantic import BaseModel, Field |
|
|
import logging |
|
|
import os |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
app = FastAPI( |
|
|
title="OpenAI Compatible API - Images & TTS", |
|
|
description="OpenAI-compatible API for image generation and text-to-speech using Captions backend", |
|
|
version="1.0.0" |
|
|
) |
|
|
|
|
|
|
|
|
CAPTIONS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/gen-ai/image" |
|
|
CAPTIONS_TTS_BASE_URL = "https://core.captions-web-api.xyz/proxy/v1/voiceover/tts" |
|
|
BEARER_TOKEN = os.getenv("CAPTIONS_BEARER_TOKEN", "eyJhbGciOiJSUzI1NiIsImtpZCI6IjU3YmZiMmExMWRkZmZjMGFkMmU2ODE0YzY4NzYzYjhjNjg3NTgxZDgiLCJ0eXAiOiJKV1QifQ.eyJnb29nbGUiOnRydWUsImlzcyI6Imh0dHBzOi8vc2VjdXJldG9rZW4uZ29vZ2xlLmNvbS9jYXB0aW9ucy1mNmRlOSIsImF1ZCI6ImNhcHRpb25zLWY2ZGU5IiwiYXV0aF90aW1lIjoxNzU1MzYyODEzLCJ1c2VyX2lkIjoic3hWek5XaUYyempXYmUxTjNjd3UiLCJzdWIiOiJzeFZ6TldpRjJ6aldiZTFOM2N3dSIsImlhdCI6MTc1NTYwMTE2NCwiZXhwIjoxNzU1NjA0NzY0LCJmaXJlYmFzZSI6eyJpZGVudGl0aWVzIjp7fSwic2lnbl9pbl9wcm92aWRlciI6ImN1c3RvbSJ9fQ.Nu7u9Xu8aeuUQPTQ8Rhe4qwbDhMk96s8dveFxyj9g6Zas4G_yU3KIdYcFVc4y85ieTNq8oKDmT3RAAgEOwTH4V6Ev1sHiKHQNX1GJp5dG0D6snH-zM4v6vmdIK3V6NgR72-ta5lzzc_aOg4Nbd4Y5tjdnc9rHNUhq-_hf4YCHFWfHjaw4gbYTNmZ_90UxL_d4d9e7tPE70FdNkjbu5XC_efZN7WNzNRJLhnj-JV--FQ94rC_zKxn6WAA-zPo-l7vfFq9nK_zEfqp-SR2c2xivdfR25f4HghfYn0nK0Xjep13pXHw7XeO0oz668ada_GIaXjIAodv7linkrJ3CXChqg") |
|
|
|
|
|
|
|
|
MODEL_MAPPINGS = { |
|
|
"dall-e-3": "openai-dalle-3", |
|
|
"dall-e-2": "openai-dalle-3", |
|
|
"gpt-4o": "openai-gpt-4o-image", |
|
|
"google-imagen-3": "google-imagen-3", |
|
|
"imagen-3": "google-imagen-3", |
|
|
"luma-photon": "luma-photon", |
|
|
"photon": "luma-photon", |
|
|
"flux-1-1-pro": "bfl-flux-1-1-pro", |
|
|
"flux": "bfl-flux-1-1-pro", |
|
|
"ideogram-v1": "ideogram-v1", |
|
|
"ideogram": "ideogram-v1", |
|
|
"recraft-v3": "recraft-v3", |
|
|
"recraft": "recraft-v3", |
|
|
"stable-diffusion-3-5": "stable-diffusion-3-5-large", |
|
|
"sd-3-5": "stable-diffusion-3-5-large", |
|
|
"stable-diffusion": "stable-diffusion-3-5-large" |
|
|
} |
|
|
|
|
|
|
|
|
VOICE_MAPPINGS = { |
|
|
"alloy": "0s0tckZNA4EDjsNWIGpn", |
|
|
"echo": "VfJEoIjcuedwbnVocfwS", |
|
|
"fable": "aIJGQIEdPBlV4bWoLgiC", |
|
|
"onyx": "NkxXZNRZuGVagP3gLTlk", |
|
|
"nova": "dEcutGbESImg8uIOJOb3", |
|
|
"shimmer": "OsLeLksKZUcYFR6Rj3AV", |
|
|
|
|
|
"brandon": "0s0tckZNA4EDjsNWIGpn", |
|
|
"nicole": "2OMmjuvizlUUkgCLYrEU", |
|
|
"jamal": "4VCohb9n7kc8qQAMbC9T", |
|
|
"xavier": "6LVJ04FKnALQY4vuI3xi", |
|
|
"emma": "7pjl1PlCtijY5E7k9nex", |
|
|
"alexandra": "8OwpkBz4OXvyOgg6uSVM", |
|
|
"josh": "9H5PLh8sHyc4NiQba2sO", |
|
|
"vincent": "A6YwaBVPdqMuPU5guI31", |
|
|
"bella": "DVkGI1gOEQwhI9D98kgV", |
|
|
"sophia": "Dw4Y69nCUd0lijzanffn", |
|
|
"ethan": "FNrD9UXPRmnlfELyZfOH", |
|
|
"greg": "GFvARbVuizGj4jkdG1iN", |
|
|
"isabella": "GNliQ6gOp8Y96hz0uPSY", |
|
|
"mason": "Jc5LFEs9ONmW3vilHdpg", |
|
|
"justin": "LWoskltOczE5nVUCPFCl", |
|
|
"bradford": "Lvu57Tdi6WU0LrCkf3W0", |
|
|
"ally": "NJSANg1RFfytiL3apSc0", |
|
|
"maddy": "NX9RZUSep3h9RzDoipkJ", |
|
|
"george": "NmypOAkKcWovPSbjMJPk", |
|
|
"brian": "Pt04qYLGmK9HateRrrdh", |
|
|
"taylor": "QQ0vIwK2AgVtbHZk3wYq", |
|
|
"samara": "QyFFVFY5hzA5T7sVv9JI", |
|
|
"linda": "RzrSQgnXwblMgDyOeOuy", |
|
|
"liam": "SveSw38zJT860NRIeiVk", |
|
|
"hope": "UfOKaDAlzOMjZnyEhPH1", |
|
|
"william": "VesROIDY8lJS6zz8xTRb", |
|
|
"dwight": "W76fVeloaQcuN71bIQF6", |
|
|
"lisa": "ZbuIjlIzHpIc8oO17kWW", |
|
|
"arial": "aCWKe1NzicFCAkohj7TY", |
|
|
"elliot": "arGkfQC5Z0yNlNrYLlE8", |
|
|
"rhea": "blo9kiIBaFNr0UCI2gpA", |
|
|
"leo": "bqvJyFf80waIYPYiv6zX", |
|
|
"eve": "cQ0q3hcj9Bm4IccGDY9C", |
|
|
"serena": "e3zFWWHHfNk6vOh5kbBX", |
|
|
"domi": "eSojoW8lMv5whHRCJugk", |
|
|
"alex": "eXjri1H442qcs35pWaTr", |
|
|
"blondie": "fHmK4z2cR0VXxvQmd7ei", |
|
|
"nathan": "gO0Do5f1lCvLoIvbl6dx", |
|
|
"daniel": "grqhFog58KWjgcO6t4ya", |
|
|
"tara": "iBsjG6Kk8tmO0ldX7Aho", |
|
|
"maya": "iWBJcyi2qdFpXYRGt42f", |
|
|
"ashley": "j51tO8Upz9wEVIUkynCJ", |
|
|
"matthew": "lJQLBnDNpkkc4RIgqhIZ", |
|
|
"andrew": "lQS5Hszd1P0W2m18M4ME", |
|
|
"olivia": "ltYBSrCwVJp0I99DmLfq", |
|
|
"adam": "m1t6JeyI9DXRhnCg8kuX", |
|
|
"mark": "okc8JAt7Vb3u20k4soKB", |
|
|
"micah": "r0ZdS6QBWDxmcRN7HxWq", |
|
|
"elli": "r4gww888sYU82aKZSUHy", |
|
|
"sylvia": "rJmVxgRa6YI9bALBqvtC", |
|
|
"noah": "rgqCbvqWKIaxYs54d7xS", |
|
|
"kayla": "s1YBw3dmanbLNCq7MXI8", |
|
|
"carla": "sUXCiUMyEVHBC7sRlPZY", |
|
|
"owen": "tijk10imWq7nGRawDD62", |
|
|
"lila": "wjOnivHr3V1ZGNuCMZJI", |
|
|
"sam": "xpkvvHUyS37s3f84MObW", |
|
|
"antoni": "y5nGwtfzvQ2OhrBXZnj5", |
|
|
"ava": "zYqKDc8tFTIsAhJFpTaC" |
|
|
} |
|
|
|
|
|
|
|
|
AVAILABLE_VOICES = { |
|
|
"0s0tckZNA4EDjsNWIGpn": {"name": "Brandon", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
|
|
"2OMmjuvizlUUkgCLYrEU": {"name": "Nicole", "gender": "female", "accent": "australian", "provider": "Cartesia"}, |
|
|
"4VCohb9n7kc8qQAMbC9T": {"name": "Jamal", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"6LVJ04FKnALQY4vuI3xi": {"name": "Xavier", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
|
|
"7pjl1PlCtijY5E7k9nex": {"name": "Emma", "gender": "female", "accent": "american", "provider": "Google"}, |
|
|
"8OwpkBz4OXvyOgg6uSVM": {"name": "Alexandra", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"9H5PLh8sHyc4NiQba2sO": {"name": "Josh", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"A6YwaBVPdqMuPU5guI31": {"name": "Vincent", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
|
|
"DVkGI1gOEQwhI9D98kgV": {"name": "Bella", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"Dw4Y69nCUd0lijzanffn": {"name": "Sophia", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"FNrD9UXPRmnlfELyZfOH": {"name": "Ethan", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"GFvARbVuizGj4jkdG1iN": {"name": "Greg", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"GNliQ6gOp8Y96hz0uPSY": {"name": "Isabella", "gender": "female", "accent": "american", "provider": "Google"}, |
|
|
"Jc5LFEs9ONmW3vilHdpg": {"name": "Mason", "gender": "male", "accent": "american", "provider": "Google"}, |
|
|
"LWoskltOczE5nVUCPFCl": {"name": "Justin", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
|
|
"Lvu57Tdi6WU0LrCkf3W0": {"name": "Bradford", "gender": "male", "accent": "british", "provider": "ElevenLabs"}, |
|
|
"NJSANg1RFfytiL3apSc0": {"name": "Ally", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
|
|
"NX9RZUSep3h9RzDoipkJ": {"name": "Maddy", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
|
|
"NkxXZNRZuGVagP3gLTlk": {"name": "James", "gender": "male", "accent": "british", "provider": "OpenAI"}, |
|
|
"NmypOAkKcWovPSbjMJPk": {"name": "George", "gender": "male", "accent": "british", "provider": "Cartesia"}, |
|
|
"OsLeLksKZUcYFR6Rj3AV": {"name": "Lea", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
|
|
"Pt04qYLGmK9HateRrrdh": {"name": "Brian", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
|
|
"QQ0vIwK2AgVtbHZk3wYq": {"name": "Taylor", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
|
|
"QyFFVFY5hzA5T7sVv9JI": {"name": "Samara", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
|
|
"RzrSQgnXwblMgDyOeOuy": {"name": "Linda", "gender": "female", "accent": "british", "provider": "PlayHT"}, |
|
|
"SveSw38zJT860NRIeiVk": {"name": "Liam", "gender": "male", "accent": "american", "provider": "Google"}, |
|
|
"UfOKaDAlzOMjZnyEhPH1": {"name": "Hope", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"VesROIDY8lJS6zz8xTRb": {"name": "William", "gender": "male", "accent": "american", "provider": "Google"}, |
|
|
"VfJEoIjcuedwbnVocfwS": {"name": "John", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
|
|
"W76fVeloaQcuN71bIQF6": {"name": "Dwight", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"ZbuIjlIzHpIc8oO17kWW": {"name": "Lisa", "gender": "female", "accent": "american", "provider": "PlayHT"}, |
|
|
"aCWKe1NzicFCAkohj7TY": {"name": "Arial", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
|
|
"aIJGQIEdPBlV4bWoLgiC": {"name": "Jordan", "gender": "male", "accent": "american", "provider": "OpenAI"}, |
|
|
"arGkfQC5Z0yNlNrYLlE8": {"name": "Elliot", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"blo9kiIBaFNr0UCI2gpA": {"name": "Rhea", "gender": "female", "accent": "australian", "provider": "PlayHT"}, |
|
|
"bqvJyFf80waIYPYiv6zX": {"name": "Leo", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"cQ0q3hcj9Bm4IccGDY9C": {"name": "Eve", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"dEcutGbESImg8uIOJOb3": {"name": "Julie", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
|
|
"e3zFWWHHfNk6vOh5kbBX": {"name": "Serena", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"eSojoW8lMv5whHRCJugk": {"name": "Domi", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"eXjri1H442qcs35pWaTr": {"name": "Alex", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"fHmK4z2cR0VXxvQmd7ei": {"name": "Blondie", "gender": "female", "accent": "british", "provider": "ElevenLabs"}, |
|
|
"gO0Do5f1lCvLoIvbl6dx": {"name": "Nathan", "gender": "male", "accent": "british", "provider": "PlayHT"}, |
|
|
"grqhFog58KWjgcO6t4ya": {"name": "Daniel", "gender": "male", "accent": "american", "provider": "PlayHT"}, |
|
|
"iBsjG6Kk8tmO0ldX7Aho": {"name": "Tara", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
|
|
"iWBJcyi2qdFpXYRGt42f": {"name": "Maya", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
|
|
"j51tO8Upz9wEVIUkynCJ": {"name": "Ashley", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
|
|
"lJQLBnDNpkkc4RIgqhIZ": {"name": "Matthew", "gender": "male", "accent": "australian", "provider": "Cartesia"}, |
|
|
"lQS5Hszd1P0W2m18M4ME": {"name": "Andrew", "gender": "male", "accent": "american", "provider": "Cartesia"}, |
|
|
"ltYBSrCwVJp0I99DmLfq": {"name": "Olivia", "gender": "female", "accent": "american", "provider": "Google"}, |
|
|
"m1t6JeyI9DXRhnCg8kuX": {"name": "Adam", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"okc8JAt7Vb3u20k4soKB": {"name": "Mark", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"r0ZdS6QBWDxmcRN7HxWq": {"name": "Micah", "gender": "male", "accent": "british", "provider": "ElevenLabs"}, |
|
|
"r4gww888sYU82aKZSUHy": {"name": "Elli", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"rJmVxgRa6YI9bALBqvtC": {"name": "Sylvia", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
|
|
"rgqCbvqWKIaxYs54d7xS": {"name": "Noah", "gender": "male", "accent": "australian", "provider": "ElevenLabs"}, |
|
|
"s1YBw3dmanbLNCq7MXI8": {"name": "Kayla", "gender": "female", "accent": "american", "provider": "OpenAI"}, |
|
|
"sUXCiUMyEVHBC7sRlPZY": {"name": "Carla", "gender": "female", "accent": "american", "provider": "Cartesia"}, |
|
|
"tijk10imWq7nGRawDD62": {"name": "Owen", "gender": "male", "accent": "american", "provider": "Google"}, |
|
|
"wjOnivHr3V1ZGNuCMZJI": {"name": "Lila", "gender": "female", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"xpkvvHUyS37s3f84MObW": {"name": "Sam", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"y5nGwtfzvQ2OhrBXZnj5": {"name": "Antoni", "gender": "male", "accent": "american", "provider": "ElevenLabs"}, |
|
|
"zYqKDc8tFTIsAhJFpTaC": {"name": "Ava", "gender": "female", "accent": "american", "provider": "Google"} |
|
|
} |
|
|
|
|
|
|
|
|
AVAILABLE_MODELS = { |
|
|
"google-imagen-3": {"name": "Imagen 3", "provider": "Google"}, |
|
|
"openai-gpt-4o-image": {"name": "GPT-4o", "provider": "OpenAI"}, |
|
|
"luma-photon": {"name": "Photon", "provider": "Luma AI"}, |
|
|
"bfl-flux-1-1-pro": {"name": "Flux 1.1 Pro", "provider": "Black Forest Labs"}, |
|
|
"ideogram-v1": {"name": "Ideogram V1", "provider": "Ideogram"}, |
|
|
"openai-dalle-3": {"name": "DALL-E 3 HD", "provider": "OpenAI"}, |
|
|
"recraft-v3": {"name": "Recraft V3", "provider": "Recraft"}, |
|
|
"stable-diffusion-3-5-large": {"name": "SD 3.5", "provider": "Stability AI"} |
|
|
} |
|
|
|
|
|
|
|
|
class ImageGenerationRequest(BaseModel): |
|
|
prompt: str = Field(..., description="A text description of the desired image(s)") |
|
|
model: Optional[str] = Field("dall-e-3", description="The model to use for image generation") |
|
|
n: Optional[int] = Field(1, ge=1, le=10, description="Number of images to generate") |
|
|
quality: Optional[Literal["standard", "hd"]] = Field("standard", description="Quality of the image") |
|
|
response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="Response format") |
|
|
size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] = Field("1024x1024", description="Size of the generated images") |
|
|
style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="Style of the generated images") |
|
|
user: Optional[str] = Field(None, description="A unique identifier representing your end-user") |
|
|
|
|
|
|
|
|
class TTSRequest(BaseModel): |
|
|
model: str = Field("tts-1", description="The TTS model to use") |
|
|
input: str = Field(..., description="The text to generate audio for") |
|
|
voice: str = Field("alloy", description="The voice to use for generation") |
|
|
response_format: Optional[Literal["mp3", "opus", "aac", "flac"]] = Field("mp3", description="The format to audio in") |
|
|
speed: Optional[float] = Field(1.0, ge=0.25, le=4.0, description="The speed of the generated audio") |
|
|
|
|
|
|
|
|
class ImageData(BaseModel): |
|
|
url: Optional[str] = None |
|
|
b64_json: Optional[str] = None |
|
|
revised_prompt: Optional[str] = None |
|
|
|
|
|
class ImageGenerationResponse(BaseModel): |
|
|
created: int |
|
|
data: List[ImageData] |
|
|
|
|
|
|
|
|
class CaptionsSubmitRequest(BaseModel): |
|
|
modelId: str = "openai-gpt-4o-image" |
|
|
prompt: str |
|
|
aspectRatio: int = 2 |
|
|
magicPrompt: bool = False |
|
|
optimisticProjectId: str |
|
|
|
|
|
class CaptionsStatusRequest(BaseModel): |
|
|
operationId: str |
|
|
|
|
|
|
|
|
class CaptionsTTSSubmitRequest(BaseModel): |
|
|
text: str |
|
|
voiceId: str = "4VCohb9n7kc8qQAMbC9T" |
|
|
modelId: str = "QHwZJt6xARgiV04YqEFY" |
|
|
optimisticProjectId: str |
|
|
|
|
|
class CaptionsTTSStatusRequest(BaseModel): |
|
|
operationId: str |
|
|
|
|
|
|
|
|
operations_store = {} |
|
|
|
|
|
def get_captions_model_id(openai_model: str) -> str: |
|
|
"""Convert OpenAI model name to Captions model ID""" |
|
|
return MODEL_MAPPINGS.get(openai_model, "openai-dalle-3") |
|
|
|
|
|
def get_aspect_ratio_from_size(size: str) -> int: |
|
|
"""Convert OpenAI size format to Captions aspect ratio""" |
|
|
size_map = { |
|
|
"256x256": 1, |
|
|
"512x512": 1, |
|
|
"1024x1024": 1, |
|
|
"1792x1024": 2, |
|
|
"1024x1792": 3 |
|
|
} |
|
|
return size_map.get(size, 1) |
|
|
|
|
|
def get_captions_voice_id(openai_voice: str) -> str: |
|
|
"""Convert OpenAI voice name to Captions voice ID""" |
|
|
return VOICE_MAPPINGS.get(openai_voice.lower(), "0s0tckZNA4EDjsNWIGpn") |
|
|
|
|
|
async def submit_image_generation(prompt: str, model: str = "dall-e-3", size: str = "1024x1024") -> str: |
|
|
"""Submit image generation request to Captions API""" |
|
|
headers = { |
|
|
"accept": "application/json, text/plain, */*", |
|
|
"authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"content-type": "application/json", |
|
|
"origin": "https://desktop.captions.ai", |
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-captions-user-timezone": "UTC", |
|
|
"x-device-id": str(uuid.uuid4()).replace("-", "") |
|
|
} |
|
|
|
|
|
payload = { |
|
|
"modelId": get_captions_model_id(model), |
|
|
"prompt": prompt, |
|
|
"aspectRatio": get_aspect_ratio_from_size(size), |
|
|
"magicPrompt": False, |
|
|
"optimisticProjectId": f"API-{uuid.uuid4()}" |
|
|
} |
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
try: |
|
|
response = await client.post( |
|
|
f"{CAPTIONS_BASE_URL}/generate/submit", |
|
|
headers=headers, |
|
|
json=payload, |
|
|
timeout=30.0 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
if result.get("success"): |
|
|
operation_id = result["data"]["operationId"] |
|
|
logger.info(f"Image generation submitted with operation ID: {operation_id}") |
|
|
return operation_id |
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Failed to submit image generation") |
|
|
|
|
|
except httpx.RequestError as e: |
|
|
logger.error(f"Request error: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to connect to image generation service") |
|
|
except Exception as e: |
|
|
logger.error(f"Unexpected error: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
async def check_generation_status(operation_id: str) -> dict: |
|
|
"""Check the status of image generation""" |
|
|
headers = { |
|
|
"accept": "application/json, text/plain, */*", |
|
|
"authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"content-type": "application/json", |
|
|
"origin": "https://desktop.captions.ai", |
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-captions-user-timezone": "UTC", |
|
|
"x-device-id": str(uuid.uuid4()).replace("-", "") |
|
|
} |
|
|
|
|
|
payload = {"operationId": operation_id} |
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
try: |
|
|
response = await client.post( |
|
|
f"{CAPTIONS_BASE_URL}/generate/status", |
|
|
headers=headers, |
|
|
json=payload, |
|
|
timeout=30.0 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
if result.get("success"): |
|
|
return result["data"] |
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Failed to check generation status") |
|
|
|
|
|
except httpx.RequestError as e: |
|
|
logger.error(f"Request error: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to connect to status service") |
|
|
except Exception as e: |
|
|
logger.error(f"Unexpected error: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
async def wait_for_completion(operation_id: str, max_wait_time: int = 300) -> dict: |
|
|
"""Wait for image generation to complete with polling""" |
|
|
start_time = datetime.now() |
|
|
retry_count = 0 |
|
|
max_retries = 3 |
|
|
|
|
|
while True: |
|
|
try: |
|
|
status_data = await check_generation_status(operation_id) |
|
|
retry_count = 0 |
|
|
|
|
|
|
|
|
if status_data.get("state") == 2: |
|
|
if "complete" in status_data: |
|
|
return status_data["complete"] |
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Generation completed but no result data") |
|
|
|
|
|
|
|
|
if status_data.get("state") == 3: |
|
|
raise HTTPException(status_code=500, detail="Image generation failed") |
|
|
|
|
|
|
|
|
elapsed = (datetime.now() - start_time).total_seconds() |
|
|
if elapsed > max_wait_time: |
|
|
raise HTTPException(status_code=408, detail="Image generation timeout") |
|
|
|
|
|
|
|
|
if status_data.get("state") == 1: |
|
|
logger.info(f"Operation {operation_id} still processing...") |
|
|
|
|
|
|
|
|
wait_time = min(5, 2 + (elapsed / 60)) |
|
|
await asyncio.sleep(wait_time) |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
retry_count += 1 |
|
|
if retry_count >= max_retries: |
|
|
logger.error(f"Max retries exceeded for operation {operation_id}: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to check generation status after multiple retries") |
|
|
|
|
|
logger.warning(f"Retry {retry_count}/{max_retries} for operation {operation_id}: {e}") |
|
|
await asyncio.sleep(2 ** retry_count) |
|
|
|
|
|
@app.get("/v1/models") |
|
|
async def list_models(): |
|
|
"""List available models compatible with OpenAI format""" |
|
|
models = [] |
|
|
for model_id, info in AVAILABLE_MODELS.items(): |
|
|
|
|
|
models.append({ |
|
|
"id": model_id, |
|
|
"object": "model", |
|
|
"created": 1234567890, |
|
|
"owned_by": info["provider"].lower().replace(" ", "-"), |
|
|
"name": info["name"], |
|
|
"provider": info["provider"] |
|
|
}) |
|
|
|
|
|
|
|
|
for alias, captions_id in MODEL_MAPPINGS.items(): |
|
|
if captions_id == model_id and alias not in [m["id"] for m in models]: |
|
|
models.append({ |
|
|
"id": alias, |
|
|
"object": "model", |
|
|
"created": 1234567890, |
|
|
"owned_by": info["provider"].lower().replace(" ", "-"), |
|
|
"name": info["name"], |
|
|
"provider": info["provider"] |
|
|
}) |
|
|
|
|
|
return {"object": "list", "data": models} |
|
|
|
|
|
@app.post("/v1/images/generations", response_model=ImageGenerationResponse) |
|
|
async def create_image(request: ImageGenerationRequest): |
|
|
""" |
|
|
Creates an image given a text prompt. |
|
|
Compatible with OpenAI's image generation API. |
|
|
""" |
|
|
try: |
|
|
logger.info(f"Received image generation request: prompt='{request.prompt[:100]}...', model='{request.model}', size='{request.size}'") |
|
|
|
|
|
|
|
|
captions_model_id = get_captions_model_id(request.model) |
|
|
if captions_model_id not in AVAILABLE_MODELS: |
|
|
raise HTTPException(status_code=400, detail=f"Model '{request.model}' is not supported") |
|
|
|
|
|
|
|
|
if not request.prompt or len(request.prompt.strip()) == 0: |
|
|
raise HTTPException(status_code=400, detail="Prompt cannot be empty") |
|
|
|
|
|
if len(request.prompt) > 1000: |
|
|
raise HTTPException(status_code=400, detail="Prompt exceeds maximum length of 1000 characters") |
|
|
|
|
|
|
|
|
operation_id = await submit_image_generation(request.prompt, request.model, request.size) |
|
|
logger.info(f"Image generation submitted with operation ID: {operation_id}") |
|
|
|
|
|
|
|
|
completion_data = await wait_for_completion(operation_id) |
|
|
|
|
|
|
|
|
if not completion_data.get("assetResolvedUrl"): |
|
|
raise HTTPException(status_code=500, detail="Generation completed but no image URL received") |
|
|
|
|
|
|
|
|
image_data = ImageData( |
|
|
url=completion_data.get("assetResolvedUrl"), |
|
|
revised_prompt=request.prompt |
|
|
) |
|
|
|
|
|
response = ImageGenerationResponse( |
|
|
created=int(datetime.now().timestamp()), |
|
|
data=[image_data] |
|
|
) |
|
|
|
|
|
logger.info(f"Image generation completed successfully for operation: {operation_id}") |
|
|
return response |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Unexpected error in image generation: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
@app.post("/v1/images/generations/async") |
|
|
async def create_image_async(request: ImageGenerationRequest): |
|
|
""" |
|
|
Starts an image generation request and returns operation ID for status checking. |
|
|
Non-blocking version of the generation API. |
|
|
""" |
|
|
try: |
|
|
logger.info(f"Received async image generation request: prompt='{request.prompt[:100]}...', model='{request.model}', size='{request.size}'") |
|
|
|
|
|
|
|
|
captions_model_id = get_captions_model_id(request.model) |
|
|
if captions_model_id not in AVAILABLE_MODELS: |
|
|
raise HTTPException(status_code=400, detail=f"Model '{request.model}' is not supported") |
|
|
|
|
|
|
|
|
if not request.prompt or len(request.prompt.strip()) == 0: |
|
|
raise HTTPException(status_code=400, detail="Prompt cannot be empty") |
|
|
|
|
|
if len(request.prompt) > 1000: |
|
|
raise HTTPException(status_code=400, detail="Prompt exceeds maximum length of 1000 characters") |
|
|
|
|
|
|
|
|
operation_id = await submit_image_generation(request.prompt, request.model, request.size) |
|
|
|
|
|
|
|
|
operations_store[operation_id] = { |
|
|
"created": int(datetime.now().timestamp()), |
|
|
"prompt": request.prompt, |
|
|
"model": request.model, |
|
|
"size": request.size, |
|
|
"status": "processing" |
|
|
} |
|
|
|
|
|
return { |
|
|
"operation_id": operation_id, |
|
|
"status": "submitted", |
|
|
"created": int(datetime.now().timestamp()), |
|
|
"status_url": f"/v1/images/generations/status/{operation_id}" |
|
|
} |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Unexpected error in async image generation: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
@app.get("/v1/images/generations/status/{operation_id}") |
|
|
async def get_generation_status(operation_id: str): |
|
|
""" |
|
|
Check the status of an image generation operation. |
|
|
""" |
|
|
try: |
|
|
if operation_id not in operations_store: |
|
|
raise HTTPException(status_code=404, detail="Operation ID not found") |
|
|
|
|
|
|
|
|
status_data = await check_generation_status(operation_id) |
|
|
operation_info = operations_store[operation_id] |
|
|
|
|
|
|
|
|
if status_data.get("state") == 1: |
|
|
return { |
|
|
"operation_id": operation_id, |
|
|
"status": "processing", |
|
|
"created": operation_info["created"], |
|
|
"estimated_completion": None |
|
|
} |
|
|
elif status_data.get("state") == 2: |
|
|
|
|
|
operations_store[operation_id]["status"] = "completed" |
|
|
|
|
|
|
|
|
image_data = ImageData( |
|
|
url=status_data["complete"].get("assetResolvedUrl"), |
|
|
revised_prompt=operation_info["prompt"] |
|
|
) |
|
|
|
|
|
return { |
|
|
"operation_id": operation_id, |
|
|
"status": "completed", |
|
|
"created": operation_info["created"], |
|
|
"data": [image_data.dict()] |
|
|
} |
|
|
elif status_data.get("state") == 3: |
|
|
operations_store[operation_id]["status"] = "failed" |
|
|
return { |
|
|
"operation_id": operation_id, |
|
|
"status": "failed", |
|
|
"created": operation_info["created"], |
|
|
"error": "Image generation failed" |
|
|
} |
|
|
else: |
|
|
return { |
|
|
"operation_id": operation_id, |
|
|
"status": "unknown", |
|
|
"created": operation_info["created"], |
|
|
"error": "Unknown status" |
|
|
} |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Error checking generation status: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to check generation status") |
|
|
|
|
|
|
|
|
@app.post("/v1/audio/speech") |
|
|
async def create_speech(request: TTSRequest): |
|
|
""" |
|
|
Generate speech from text using OpenAI-compatible API |
|
|
""" |
|
|
try: |
|
|
|
|
|
voice_id = get_captions_voice_id(request.voice) |
|
|
|
|
|
|
|
|
captions_request = CaptionsTTSSubmitRequest( |
|
|
text=request.input, |
|
|
voiceId=voice_id, |
|
|
modelId="QHwZJt6xARgiV04YqEFY", |
|
|
optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}" |
|
|
) |
|
|
|
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
response = await client.post( |
|
|
f"{CAPTIONS_TTS_BASE_URL}/generate/submit", |
|
|
json=captions_request.dict(), |
|
|
headers={ |
|
|
"Authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"Content-Type": "application/json", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-device-id": "api-client" |
|
|
}, |
|
|
timeout=30.0 |
|
|
) |
|
|
|
|
|
if response.status_code != 200: |
|
|
logger.error(f"TTS submit failed: {response.text}") |
|
|
raise HTTPException(status_code=response.status_code, detail="TTS generation failed") |
|
|
|
|
|
result = response.json() |
|
|
operation_id = result["data"]["operationId"] |
|
|
|
|
|
|
|
|
operations_store[operation_id] = { |
|
|
"type": "tts", |
|
|
"voice_id": voice_id, |
|
|
"text": request.input, |
|
|
"format": request.response_format, |
|
|
"created_at": datetime.now() |
|
|
} |
|
|
|
|
|
|
|
|
max_retries = 60 |
|
|
retry_count = 0 |
|
|
|
|
|
while retry_count < max_retries: |
|
|
status_response = await client.post( |
|
|
f"{CAPTIONS_TTS_BASE_URL}/generate/status", |
|
|
json={"operationId": operation_id}, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"Content-Type": "application/json", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-device-id": "api-client" |
|
|
}, |
|
|
timeout=30.0 |
|
|
) |
|
|
|
|
|
if status_response.status_code != 200: |
|
|
await asyncio.sleep(1) |
|
|
retry_count += 1 |
|
|
continue |
|
|
|
|
|
status_result = status_response.json() |
|
|
state = status_result["data"]["state"] |
|
|
|
|
|
if state == "COMPLETE": |
|
|
audio_url = status_result["data"]["url"] |
|
|
|
|
|
|
|
|
audio_response = await client.get(audio_url) |
|
|
if audio_response.status_code == 200: |
|
|
|
|
|
return StreamingResponse( |
|
|
iter([audio_response.content]), |
|
|
media_type="audio/mpeg", |
|
|
headers={ |
|
|
"Content-Disposition": f"attachment; filename=speech.{request.response_format}" |
|
|
} |
|
|
) |
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Failed to fetch generated audio") |
|
|
|
|
|
elif state == "FAILED": |
|
|
raise HTTPException(status_code=500, detail="TTS generation failed") |
|
|
|
|
|
|
|
|
await asyncio.sleep(1) |
|
|
retry_count += 1 |
|
|
|
|
|
|
|
|
raise HTTPException(status_code=408, detail="TTS generation timed out") |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Error in TTS generation: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
@app.post("/v1/audio/speech/async") |
|
|
async def create_speech_async(request: TTSRequest, background_tasks: BackgroundTasks): |
|
|
""" |
|
|
Start async TTS generation and return operation ID |
|
|
""" |
|
|
try: |
|
|
|
|
|
voice_id = get_captions_voice_id(request.voice) |
|
|
|
|
|
|
|
|
captions_request = CaptionsTTSSubmitRequest( |
|
|
text=request.input, |
|
|
voiceId=voice_id, |
|
|
modelId="QHwZJt6xARgiV04YqEFY", |
|
|
optimisticProjectId=f"tts-{uuid.uuid4().hex[:8]}" |
|
|
) |
|
|
|
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
response = await client.post( |
|
|
f"{CAPTIONS_TTS_BASE_URL}/generate/submit", |
|
|
json=captions_request.dict(), |
|
|
headers={ |
|
|
"Authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"Content-Type": "application/json", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-device-id": "api-client" |
|
|
}, |
|
|
timeout=30.0 |
|
|
) |
|
|
|
|
|
if response.status_code != 200: |
|
|
logger.error(f"TTS submit failed: {response.text}") |
|
|
raise HTTPException(status_code=response.status_code, detail="TTS generation failed") |
|
|
|
|
|
result = response.json() |
|
|
operation_id = result["data"]["operationId"] |
|
|
|
|
|
|
|
|
operations_store[operation_id] = { |
|
|
"type": "tts", |
|
|
"voice_id": voice_id, |
|
|
"text": request.input, |
|
|
"format": request.response_format, |
|
|
"created_at": datetime.now(), |
|
|
"status": "processing" |
|
|
} |
|
|
|
|
|
return {"operation_id": operation_id, "status": "processing"} |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Error in async TTS generation: {e}") |
|
|
raise HTTPException(status_code=500, detail="Internal server error") |
|
|
|
|
|
@app.get("/v1/audio/speech/status/{operation_id}") |
|
|
async def get_tts_status(operation_id: str): |
|
|
""" |
|
|
Check the status of a TTS generation operation |
|
|
""" |
|
|
if operation_id not in operations_store: |
|
|
raise HTTPException(status_code=404, detail="Operation not found") |
|
|
|
|
|
operation = operations_store[operation_id] |
|
|
if operation["type"] != "tts": |
|
|
raise HTTPException(status_code=400, detail="Invalid operation type") |
|
|
|
|
|
try: |
|
|
async with httpx.AsyncClient() as client: |
|
|
response = await client.post( |
|
|
f"{CAPTIONS_TTS_BASE_URL}/generate/status", |
|
|
json={"operationId": operation_id}, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {BEARER_TOKEN}", |
|
|
"Content-Type": "application/json", |
|
|
"x-app-version": "1.0.0", |
|
|
"x-device-id": "api-client" |
|
|
}, |
|
|
timeout=30.0 |
|
|
) |
|
|
|
|
|
if response.status_code != 200: |
|
|
return {"status": "error", "error": "Failed to check status"} |
|
|
|
|
|
result = response.json() |
|
|
state = result["data"]["state"] |
|
|
|
|
|
if state == "COMPLETE": |
|
|
audio_url = result["data"]["url"] |
|
|
operations_store[operation_id]["status"] = "completed" |
|
|
operations_store[operation_id]["url"] = audio_url |
|
|
return { |
|
|
"status": "completed", |
|
|
"url": audio_url, |
|
|
"operation_id": operation_id |
|
|
} |
|
|
elif state == "FAILED": |
|
|
operations_store[operation_id]["status"] = "failed" |
|
|
return {"status": "failed", "operation_id": operation_id} |
|
|
else: |
|
|
operations_store[operation_id]["status"] = "processing" |
|
|
return {"status": "processing", "operation_id": operation_id} |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error checking TTS status: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to check TTS status") |
|
|
|
|
|
@app.get("/v1/audio/speech/download/{operation_id}") |
|
|
async def download_tts_audio(operation_id: str): |
|
|
""" |
|
|
Download the generated audio file |
|
|
""" |
|
|
if operation_id not in operations_store: |
|
|
raise HTTPException(status_code=404, detail="Operation not found") |
|
|
|
|
|
operation = operations_store[operation_id] |
|
|
if operation["type"] != "tts": |
|
|
raise HTTPException(status_code=400, detail="Invalid operation type") |
|
|
|
|
|
if operation.get("status") != "completed": |
|
|
raise HTTPException(status_code=400, detail="Audio not ready yet") |
|
|
|
|
|
audio_url = operation.get("url") |
|
|
if not audio_url: |
|
|
raise HTTPException(status_code=404, detail="Audio URL not found") |
|
|
|
|
|
try: |
|
|
async with httpx.AsyncClient() as client: |
|
|
audio_response = await client.get(audio_url) |
|
|
if audio_response.status_code == 200: |
|
|
format_type = operation.get("format", "mp3") |
|
|
return StreamingResponse( |
|
|
iter([audio_response.content]), |
|
|
media_type="audio/mpeg", |
|
|
headers={ |
|
|
"Content-Disposition": f"attachment; filename=speech.{format_type}" |
|
|
} |
|
|
) |
|
|
else: |
|
|
raise HTTPException(status_code=500, detail="Failed to fetch generated audio") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Error downloading TTS audio: {e}") |
|
|
raise HTTPException(status_code=500, detail="Failed to download audio") |
|
|
|
|
|
@app.get("/v1/voices") |
|
|
async def list_voices(): |
|
|
""" |
|
|
List available TTS voices |
|
|
""" |
|
|
voices = [] |
|
|
for voice_id, voice_info in AVAILABLE_VOICES.items(): |
|
|
|
|
|
openai_name = None |
|
|
for name, mapped_id in VOICE_MAPPINGS.items(): |
|
|
if mapped_id == voice_id: |
|
|
openai_name = name |
|
|
break |
|
|
|
|
|
voices.append({ |
|
|
"id": voice_id, |
|
|
"name": voice_info["name"], |
|
|
"openai_name": openai_name, |
|
|
"gender": voice_info["gender"], |
|
|
"accent": voice_info["accent"], |
|
|
"provider": voice_info["provider"] |
|
|
}) |
|
|
|
|
|
return { |
|
|
"voices": voices, |
|
|
"openai_compatible": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"] |
|
|
} |
|
|
|
|
|
@app.get("/health") |
|
|
async def health_check(): |
|
|
"""Health check endpoint""" |
|
|
return {"status": "healthy", "timestamp": datetime.now().isoformat()} |
|
|
|
|
|
@app.get("/") |
|
|
async def root(): |
|
|
"""Root endpoint with API information""" |
|
|
return { |
|
|
"message": "OpenAI Compatible Image Generation & TTS API", |
|
|
"version": "1.0.0", |
|
|
"supported_models": list(AVAILABLE_MODELS.keys()), |
|
|
"openai_aliases": list(MODEL_MAPPINGS.keys()), |
|
|
"supported_voices": len(AVAILABLE_VOICES), |
|
|
"openai_voice_aliases": list(set([k for k in VOICE_MAPPINGS.keys() if k in ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]])), |
|
|
"endpoints": { |
|
|
"models": "/v1/models", |
|
|
"voices": "/v1/voices", |
|
|
"image_generation": "/v1/images/generations", |
|
|
"async_generation": "/v1/images/generations/async", |
|
|
"status_check": "/v1/images/generations/status/{operation_id}", |
|
|
"tts": "/v1/audio/speech", |
|
|
"tts_async": "/v1/audio/speech/async", |
|
|
"tts_status": "/v1/audio/speech/status/{operation_id}", |
|
|
"tts_download": "/v1/audio/speech/download/{operation_id}", |
|
|
"health": "/health", |
|
|
"docs": "/docs" |
|
|
}, |
|
|
"example_curl": { |
|
|
"generate_image": "curl -X POST 'http://localhost:8000/v1/images/generations' -H 'Content-Type: application/json' -d '{\"prompt\": \"a cat\", \"model\": \"dall-e-3\", \"size\": \"1024x1024\"}'", |
|
|
"list_models": "curl -X GET 'http://localhost:8000/v1/models'", |
|
|
"generate_speech": "curl -X POST 'http://localhost:8000/v1/audio/speech' -H 'Content-Type: application/json' -d '{\"model\": \"tts-1\", \"input\": \"Hello world\", \"voice\": \"alloy\"}' --output speech.mp3", |
|
|
"list_voices": "curl -X GET 'http://localhost:8000/v1/voices'" |
|
|
} |
|
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |