import gradio as gr import requests import json import time import os # --- D-ID API Configuration --- D_ID_API_BASE_URL = "https://api.d-id.com" D_ID_TALKS_ENDPOINT = f"{D_ID_API_BASE_URL}/talks" DEFAULT_AVATAR_IMAGE_URL = "https://placehold.co/256x256/ADD8E6/000000?text=Avatar" # --- Available Voices (expandable) --- AVAILABLE_VOICES = [ "en-US-JennyNeural", "en-US-GuyNeural", "en-GB-RyanNeural", "en-GB-SoniaNeural", "es-ES-ElviraNeural", "fr-FR-DeniseNeural" ] # --- Function to call D-ID API --- def generate_did_video(api_key: str, text_input: str, avatar_image_url: str, voice_id: str): if not api_key: yield None, "❌ Error: D-ID API Key is required." return if not text_input: yield None, "❌ Error: Text input is required." return avatar_image_url = avatar_image_url or DEFAULT_AVATAR_IMAGE_URL headers = { "Authorization": f"Basic {api_key}", "Content-Type": "application/json" } payload = { "script": { "type": "text", "input": text_input, "provider": { "type": "microsoft", "voice_id": voice_id } }, "source_url": avatar_image_url } try: yield None, "📤 Sending request to D-ID API..." response = requests.post(D_ID_TALKS_ENDPOINT, headers=headers, json=payload) response.raise_for_status() talk_data = response.json() talk_id = talk_data.get("id") if not talk_id: yield None, "❌ Error: No talk ID returned." return yield None, f"⏳ Talk created (ID: {talk_id}). Polling status..." status = "" retries = 0 max_retries = 30 video_url = None while retries < max_retries: time.sleep(5) retries += 1 poll_response = requests.get(f"{D_ID_TALKS_ENDPOINT}/{talk_id}", headers=headers) poll_response.raise_for_status() status_data = poll_response.json() status = status_data.get("status", "") if status == "done": video_url = status_data.get("result_url") if video_url: yield video_url, "✅ Video generation complete!" return else: yield None, "❌ Error: No result URL found." return elif status == "error": error_message = status_data.get("error", "Unknown error") yield None, f"❌ Error: {error_message}" return else: yield None, f"🔄 Status: {status} (Attempt {retries}/{max_retries})" yield None, "❌ Error: Timeout. Try again later." except requests.exceptions.RequestException as e: yield None, f"❌ Request error: {e}" except Exception as e: yield None, f"❌ Unexpected error: {e}" # --- Gradio Interface --- with gr.Blocks() as demo: gr.Markdown("# 🗣️ D-ID Talking Avatar Video Generator") with gr.Row(): api_key_input = gr.Textbox( label="🔐 D-ID API Key (Base64)", type="password", placeholder="e.g., aGVsbG86cGFzc3dvcmQ=", info="Base64 of `username:password`" ) avatar_image_url_input = gr.Textbox( label="🖼️ Avatar Image URL (Optional)", placeholder=DEFAULT_AVATAR_IMAGE_URL, info="Public image URL. Default avatar will be used if blank." ) with gr.Row(): text_input = gr.Textbox( label="📝 Text for Avatar", lines=4, placeholder="Type what you want your avatar to say..." ) voice_dropdown = gr.Dropdown( label="🎤 Voice Selection", choices=AVAILABLE_VOICES, value=AVAILABLE_VOICES[0] ) generate_btn = gr.Button("🚀 Generate Video") output_video = gr.Video(label="🎬 Output Video") status_output = gr.Markdown("") generate_btn.click( fn=generate_did_video, inputs=[api_key_input, text_input, avatar_image_url_input, voice_dropdown], outputs=[output_video, status_output] ) gr.Examples( examples=[ ["", "Hello! I'm your AI avatar.", DEFAULT_AVATAR_IMAGE_URL, "en-US-JennyNeural"], ["", "This is a demo of D-ID + Hugging Face!", DEFAULT_AVATAR_IMAGE_URL, "en-US-GuyNeural"] ], inputs=[api_key_input, text_input, avatar_image_url_input, voice_dropdown] ) demo.launch()