Spaces:
Build error
Build error
| import gradio as gr | |
| import anthropic | |
| import base64 | |
| import json | |
| import os | |
| import tempfile | |
| import subprocess | |
| import textwrap | |
| from pathlib import Path | |
| import torch | |
| import numpy as np | |
| from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance, ImageChops | |
| import cv2 | |
| import io | |
| import re | |
| import time | |
| # ββ TTS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| from TTS.api import TTS as CoquiTTS | |
| # ββ Diffusion βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| from diffusers import ( | |
| StableDiffusionPipeline, | |
| AnimateDiffPipeline, | |
| MotionAdapter, | |
| DDIMScheduler, | |
| EulerDiscreteScheduler, | |
| ) | |
| from diffusers.utils import export_to_video | |
| # ββ Anthropic client (reads ANTHROPIC_API_KEY from env) ββββββββββββββββββββββ | |
| client = anthropic.Anthropic() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Constants | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| VIDEO_W, VIDEO_H = 1080, 1920 # 1K Shorts (9:16) | |
| FPS = 30 | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| SD_MODEL = "runwayml/stable-diffusion-v1-5" | |
| MOTION_ADAPTER = "guoyww/animatediff-motion-adapter-v1-5-2" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Lazy-loaded singletons | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _tts_model = None | |
| _anim_pipe = None | |
| def get_tts(): | |
| global _tts_model | |
| if _tts_model is None: | |
| _tts_model = CoquiTTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE) | |
| return _tts_model | |
| def get_anim_pipe(): | |
| global _anim_pipe | |
| if _anim_pipe is None: | |
| adapter = MotionAdapter.from_pretrained(MOTION_ADAPTER) | |
| scheduler = DDIMScheduler.from_pretrained( | |
| SD_MODEL, subfolder="scheduler", | |
| clip_sample=False, timestep_spacing="linspace", | |
| beta_schedule="linear", steps_offset=1, | |
| ) | |
| _anim_pipe = AnimateDiffPipeline.from_pretrained( | |
| SD_MODEL, | |
| motion_adapter=adapter, | |
| scheduler=scheduler, | |
| torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32, | |
| ).to(DEVICE) | |
| _anim_pipe.enable_attention_slicing() | |
| if DEVICE == "cuda": | |
| _anim_pipe.enable_model_cpu_offload() | |
| return _anim_pipe | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 1 β Extract characters from uploaded images via Claude Vision | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def extract_characters(images: list) -> dict: | |
| """Send images to Claude and get structured character descriptions.""" | |
| content = [] | |
| for img in images: | |
| if img is None: | |
| continue | |
| if isinstance(img, np.ndarray): | |
| pil_img = Image.fromarray(img) | |
| else: | |
| pil_img = img | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="JPEG", quality=90) | |
| b64 = base64.standard_b64encode(buf.getvalue()).decode() | |
| content.append({ | |
| "type": "image", | |
| "source": {"type": "base64", "media_type": "image/jpeg", "data": b64}, | |
| }) | |
| content.append({ | |
| "type": "text", | |
| "text": ( | |
| "Analyze every person/character visible in these images. " | |
| "Return ONLY a valid JSON array (no markdown) where each element has:\n" | |
| " name β inferred or generic name (e.g. 'Hero', 'Villain')\n" | |
| " appearance β detailed physical description for image generation\n" | |
| " personality β 2-sentence personality inference\n" | |
| " voice_style β one of: male_deep, male_mid, female_soft, female_strong, child\n" | |
| "Example: [{\"name\":\"Hero\",\"appearance\":\"...\",\"personality\":\"...\",\"voice_style\":\"male_mid\"}]" | |
| ), | |
| }) | |
| response = client.messages.create( | |
| model="claude-opus-4-5", | |
| max_tokens=1500, | |
| messages=[{"role": "user", "content": content}], | |
| ) | |
| raw = response.content[0].text.strip() | |
| # Strip possible markdown fences | |
| raw = re.sub(r"^```[a-z]*\n?", "", raw) | |
| raw = re.sub(r"\n?```$", "", raw) | |
| characters = json.loads(raw) | |
| return characters | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 2 β Generate script / storyboard via Claude | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_script(characters: list, user_prompt: str) -> dict: | |
| """Generate a short-video script with scenes, dialogue, and actions.""" | |
| char_summary = json.dumps( | |
| [{"name": c["name"], "personality": c["personality"]} for c in characters], | |
| indent=2, | |
| ) | |
| system = ( | |
| "You are a creative director specializing in viral short-form video content. " | |
| "Write punchy, engaging scripts for 30β60 second YouTube / TikTok Shorts." | |
| ) | |
| prompt = ( | |
| f"Characters:\n{char_summary}\n\n" | |
| f"User concept: {user_prompt}\n\n" | |
| "Create a script. Return ONLY valid JSON (no markdown) with this schema:\n" | |
| "{\n" | |
| " \"title\": \"...\",\n" | |
| " \"style\": \"cinematic | anime | cartoon | realistic\",\n" | |
| " \"scenes\": [\n" | |
| " {\n" | |
| " \"scene_id\": 1,\n" | |
| " \"setting\": \"brief scene description\",\n" | |
| " \"action\": \"what characters physically do\",\n" | |
| " \"dialogue\": [{\"character\": \"Name\", \"line\": \"...\"}],\n" | |
| " \"visual_fx\": \"e.g. slow-motion, glitch, zoom, none\",\n" | |
| " \"duration_sec\": 8\n" | |
| " }\n" | |
| " ]\n" | |
| "}" | |
| ) | |
| response = client.messages.create( | |
| model="claude-opus-4-5", | |
| max_tokens=2000, | |
| system=system, | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| raw = response.content[0].text.strip() | |
| raw = re.sub(r"^```[a-z]*\n?", "", raw) | |
| raw = re.sub(r"\n?```$", "", raw) | |
| return json.loads(raw) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 3 β Generate animated clip for one scene with AnimateDiff | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_scene_clip(scene: dict, characters: list, style: str, tmp_dir: str) -> str: | |
| """Render a short animated clip for the given scene, return path to MP4.""" | |
| char_map = {c["name"]: c["appearance"] for c in characters} | |
| # Build SD prompt | |
| char_descriptions = " and ".join( | |
| [char_map.get(d["character"], d["character"]) for d in scene.get("dialogue", [])] | |
| or [c["appearance"] for c in characters[:2]] | |
| ) | |
| style_tag = { | |
| "cinematic": "cinematic photography, film grain, dramatic lighting", | |
| "anime": "anime style, cel shading, vibrant colors", | |
| "cartoon": "cartoon style, bold outlines, saturated colors", | |
| "realistic": "photorealistic, ultra detailed, 8k", | |
| }.get(style, "cinematic photography") | |
| sd_prompt = ( | |
| f"{style_tag}, {char_descriptions}, " | |
| f"{scene['setting']}, {scene['action']}, " | |
| "masterpiece, best quality, vertical composition 9:16" | |
| ) | |
| neg_prompt = ( | |
| "low quality, blurry, deformed, ugly, watermark, text, " | |
| "nsfw, bad anatomy, extra limbs" | |
| ) | |
| num_frames = min(16, max(8, scene.get("duration_sec", 8) * 2)) | |
| pipe = get_anim_pipe() | |
| with torch.inference_mode(): | |
| output = pipe( | |
| prompt=sd_prompt, | |
| negative_prompt=neg_prompt, | |
| num_frames=num_frames, | |
| guidance_scale=7.5, | |
| num_inference_steps=25, | |
| width=512, | |
| height=896, # 9:16 native | |
| generator=torch.Generator(device=DEVICE).manual_seed(42), | |
| ) | |
| frames = output.frames[0] # list of PIL images | |
| # Scale to 1080Γ1920 | |
| frames_resized = [f.resize((VIDEO_W, VIDEO_H), Image.LANCZOS) for f in frames] | |
| # Apply visual FX | |
| frames_fx = apply_visual_fx(frames_resized, scene.get("visual_fx", "none")) | |
| # Export to temporary MP4 | |
| scene_path = os.path.join(tmp_dir, f"scene_{scene['scene_id']:02d}_raw.mp4") | |
| export_to_video(frames_fx, scene_path, fps=FPS // 2) | |
| # Loop / extend to fill duration using ffmpeg | |
| final_path = os.path.join(tmp_dir, f"scene_{scene['scene_id']:02d}.mp4") | |
| target_dur = scene.get("duration_sec", 8) | |
| subprocess.run( | |
| [ | |
| "ffmpeg", "-y", "-stream_loop", "-1", "-i", scene_path, | |
| "-t", str(target_dur), "-c:v", "libx264", | |
| "-vf", f"scale={VIDEO_W}:{VIDEO_H}", | |
| "-pix_fmt", "yuv420p", final_path, | |
| ], | |
| check=True, capture_output=True, | |
| ) | |
| return final_path | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Visual FX helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def apply_visual_fx(frames: list, fx: str) -> list: | |
| fx = (fx or "none").lower() | |
| if "slow" in fx: | |
| frames = [f for f in frames for _ in range(2)] | |
| if "zoom" in fx: | |
| frames = zoom_effect(frames) | |
| if "glitch" in fx: | |
| frames = glitch_effect(frames) | |
| if "vignette" in fx: | |
| frames = [add_vignette(f) for f in frames] | |
| return frames | |
| def zoom_effect(frames): | |
| result = [] | |
| n = len(frames) | |
| for i, f in enumerate(frames): | |
| scale = 1.0 + 0.15 * (i / max(n - 1, 1)) | |
| w, h = f.size | |
| new_w, new_h = int(w * scale), int(h * scale) | |
| f2 = f.resize((new_w, new_h), Image.LANCZOS) | |
| left = (new_w - w) // 2 | |
| top = (new_h - h) // 2 | |
| result.append(f2.crop((left, top, left + w, top + h))) | |
| return result | |
| def glitch_effect(frames): | |
| result = [] | |
| for i, f in enumerate(frames): | |
| if i % 4 == 0: | |
| arr = np.array(f) | |
| shift = np.random.randint(5, 20) | |
| arr[:, :, 0] = np.roll(arr[:, :, 0], shift, axis=1) | |
| arr[:, :, 2] = np.roll(arr[:, :, 2], -shift, axis=1) | |
| result.append(Image.fromarray(arr)) | |
| else: | |
| result.append(f) | |
| return result | |
| def add_vignette(img: Image.Image) -> Image.Image: | |
| w, h = img.size | |
| mask = Image.new("L", (w, h), 0) | |
| draw = ImageDraw.Draw(mask) | |
| for i in range(100): | |
| alpha = int(255 * (i / 100) ** 2) | |
| draw.ellipse([i * w // 200, i * h // 200, | |
| w - i * w // 200, h - i * h // 200], fill=alpha) | |
| vig = Image.new("RGB", (w, h), (0, 0, 0)) | |
| img_copy = img.copy() | |
| img_copy.paste(vig, mask=ImageChops.invert(mask)) | |
| return img_copy | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 4 β Text-to-Speech for dialogue lines | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def synthesize_dialogue(scene: dict, tmp_dir: str) -> list: | |
| """Returns list of (audio_path, duration) tuples for each dialogue line.""" | |
| tts = get_tts() | |
| audio_files = [] | |
| for idx, d in enumerate(scene.get("dialogue", [])): | |
| line = d.get("line", "").strip() | |
| if not line: | |
| continue | |
| out_path = os.path.join(tmp_dir, f"s{scene['scene_id']:02d}_d{idx:02d}.wav") | |
| tts.tts_to_file(text=line, file_path=out_path) | |
| # Get duration | |
| probe = subprocess.run( | |
| ["ffprobe", "-v", "error", "-show_entries", | |
| "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", out_path], | |
| capture_output=True, text=True, | |
| ) | |
| dur = float(probe.stdout.strip() or "1.5") | |
| audio_files.append((out_path, dur, d.get("character", ""), line)) | |
| return audio_files | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 5 β Burn subtitles / captions onto scene clip | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def burn_subtitles(video_path: str, dialogue_audio: list, tmp_dir: str) -> str: | |
| """Use ffmpeg to mix voice-overs and burn subtitle text onto the video.""" | |
| if not dialogue_audio: | |
| return video_path | |
| out_path = video_path.replace("_raw.mp4", "_sub.mp4").replace(".mp4", "_sub.mp4") | |
| # Build filter complex | |
| audio_inputs = [] | |
| filter_parts = [] | |
| audio_streams = [] | |
| inputs = ["-i", video_path] | |
| offset = 0.0 | |
| for i, (wav, dur, char, line) in enumerate(dialogue_audio): | |
| inputs += ["-i", wav] | |
| audio_inputs.append(f"[{i+1}:a]adelay={int(offset*1000)}|{int(offset*1000)}[a{i}]") | |
| audio_streams.append(f"[a{i}]") | |
| offset += dur | |
| # Subtitle drawtext entries | |
| drawtext = [] | |
| t_offset = 0.0 | |
| for wav, dur, char, line in dialogue_audio: | |
| safe_line = line.replace("'", "\\'").replace(":", "\\:") | |
| safe_char = char.replace("'", "\\'") | |
| wrapped = textwrap.fill(safe_line, 28) | |
| drawtext.append( | |
| f"drawtext=text='{safe_char}\\: {wrapped}'" | |
| f":fontcolor=white:fontsize=42:borderw=3:bordercolor=black" | |
| f":x=(w-text_w)/2:y=h-200" | |
| f":enable='between(t,{t_offset:.2f},{t_offset+dur:.2f})'" | |
| ) | |
| t_offset += dur | |
| vf = ",".join(drawtext) if drawtext else "null" | |
| filter_complex = ";".join(audio_inputs) | |
| if audio_streams: | |
| filter_complex += f";{''.join(audio_streams)}amix=inputs={len(audio_streams)}:duration=longest[amix]" | |
| cmd = ["ffmpeg", "-y"] + inputs + [ | |
| "-filter_complex", filter_complex, | |
| "-vf", vf, | |
| "-map", "0:v", | |
| ] | |
| if audio_streams: | |
| cmd += ["-map", "[amix]"] | |
| cmd += ["-c:v", "libx264", "-c:a", "aac", | |
| "-pix_fmt", "yuv420p", "-shortest", out_path] | |
| subprocess.run(cmd, check=True, capture_output=True) | |
| return out_path | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 6 β Add intro title card and outro | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def make_title_card(title: str, tmp_dir: str, duration: float = 2.5) -> str: | |
| """Generate a stylish title card image and convert to short clip.""" | |
| img = Image.new("RGB", (VIDEO_W, VIDEO_H), color=(10, 10, 20)) | |
| draw = ImageDraw.Draw(img) | |
| # Gradient background | |
| for y in range(VIDEO_H): | |
| r = int(10 + 40 * y / VIDEO_H) | |
| g = int(10 + 20 * y / VIDEO_H) | |
| b = int(20 + 60 * y / VIDEO_H) | |
| draw.line([(0, y), (VIDEO_W, y)], fill=(r, g, b)) | |
| # Try to load a bold font, fall back to default | |
| try: | |
| font_big = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 90) | |
| font_sm = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 45) | |
| except Exception: | |
| font_big = ImageFont.load_default() | |
| font_sm = font_big | |
| # Glow effect β draw text multiple times offset | |
| for dx, dy in [(-3,-3),(3,-3),(-3,3),(3,3)]: | |
| draw.text((VIDEO_W//2 + dx, VIDEO_H//2 + dy), title, | |
| font=font_big, fill=(100, 80, 200), anchor="mm") | |
| draw.text((VIDEO_W//2, VIDEO_H//2), title, | |
| font=font_big, fill=(255, 255, 255), anchor="mm") | |
| draw.text((VIDEO_W//2, VIDEO_H//2 + 110), "AI Short Film", | |
| font=font_sm, fill=(160, 140, 255), anchor="mm") | |
| card_img = os.path.join(tmp_dir, "title_card.jpg") | |
| card_clip = os.path.join(tmp_dir, "title_card.mp4") | |
| img.save(card_img, quality=95) | |
| subprocess.run( | |
| ["ffmpeg", "-y", "-loop", "1", "-i", card_img, | |
| "-t", str(duration), "-c:v", "libx264", | |
| "-vf", f"scale={VIDEO_W}:{VIDEO_H},fade=t=out:st={duration-0.5}:d=0.5", | |
| "-pix_fmt", "yuv420p", card_clip], | |
| check=True, capture_output=True, | |
| ) | |
| return card_clip | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Step 7 β Concatenate all clips into final Short | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def concatenate_clips(clip_paths: list, tmp_dir: str, output_path: str) -> str: | |
| list_file = os.path.join(tmp_dir, "concat_list.txt") | |
| with open(list_file, "w") as f: | |
| for p in clip_paths: | |
| f.write(f"file '{p}'\n") | |
| subprocess.run( | |
| ["ffmpeg", "-y", "-f", "concat", "-safe", "0", | |
| "-i", list_file, "-c:v", "libx264", "-c:a", "aac", | |
| "-vf", f"scale={VIDEO_W}:{VIDEO_H}", | |
| "-pix_fmt", "yuv420p", output_path], | |
| check=True, capture_output=True, | |
| ) | |
| return output_path | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Master pipeline | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate_short_video(images, user_prompt, progress=gr.Progress(track_tqdm=True)): | |
| if not images: | |
| return None, "β Please upload at least one image." | |
| if not user_prompt.strip(): | |
| return None, "β Please enter a prompt / scene description." | |
| tmp_dir = tempfile.mkdtemp(prefix="short_vid_") | |
| log_lines = [] | |
| def log(msg): | |
| log_lines.append(msg) | |
| return "\n".join(log_lines) | |
| try: | |
| # ββ 1. Extract characters βββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.05, desc="π Extracting characters from imagesβ¦") | |
| yield None, log("π Extracting characters from imagesβ¦") | |
| characters = extract_characters(images) | |
| char_names = [c["name"] for c in characters] | |
| yield None, log(f"β Found characters: {', '.join(char_names)}") | |
| # ββ 2. Generate script ββββββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.15, desc="π Generating scriptβ¦") | |
| yield None, log("π Generating script with Claudeβ¦") | |
| script = generate_script(characters, user_prompt) | |
| title = script.get("title", "AI Short") | |
| style = script.get("style", "cinematic") | |
| scenes = script.get("scenes", []) | |
| yield None, log(f"β Script ready: '{title}' | Style: {style} | Scenes: {len(scenes)}") | |
| # ββ 3. Title card βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.20, desc="π¬ Creating title cardβ¦") | |
| yield None, log("π¬ Creating title cardβ¦") | |
| title_clip = make_title_card(title, tmp_dir) | |
| all_clips = [title_clip] | |
| # ββ 4. Scene loop βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| total_scenes = len(scenes) | |
| for idx, scene in enumerate(scenes): | |
| pct_base = 0.20 + 0.65 * (idx / total_scenes) | |
| progress(pct_base, desc=f"π₯ Rendering scene {idx+1}/{total_scenes}β¦") | |
| yield None, log(f"\nπ₯ Scene {idx+1}/{total_scenes}: {scene.get('setting','')}") | |
| # 4a. Animate the scene | |
| yield None, log(" β³ Generating animationβ¦") | |
| scene_clip = generate_scene_clip(scene, characters, style, tmp_dir) | |
| # 4b. TTS for dialogue | |
| yield None, log(" β³ Synthesising dialogue audioβ¦") | |
| audio_lines = synthesize_dialogue(scene, tmp_dir) | |
| # 4c. Burn subtitles | |
| if audio_lines: | |
| yield None, log(" β³ Burning subtitlesβ¦") | |
| scene_clip = burn_subtitles(scene_clip, audio_lines, tmp_dir) | |
| all_clips.append(scene_clip) | |
| yield None, log(f" β Scene {idx+1} done.") | |
| # ββ 5. Concatenate ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| progress(0.90, desc="βοΈ Assembling final videoβ¦") | |
| yield None, log("\nβοΈ Assembling final Shortβ¦") | |
| output_path = os.path.join(tmp_dir, "final_short.mp4") | |
| concatenate_clips(all_clips, tmp_dir, output_path) | |
| progress(1.0, desc="β Done!") | |
| yield None, log(f"\nπ Done! Video saved: {output_path}") | |
| yield output_path, log("π¬ Your AI Short is ready below!") | |
| except Exception as e: | |
| import traceback | |
| err = traceback.format_exc() | |
| yield None, log(f"\nβ Error: {e}\n{err}") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| body { background: #0d0d1a !important; } | |
| .gradio-container { max-width: 900px; margin: auto; } | |
| #title { text-align:center; font-size:2.2rem; font-weight:800; | |
| background: linear-gradient(135deg,#a78bfa,#60a5fa,#f472b6); | |
| -webkit-background-clip:text; -webkit-text-fill-color:transparent; } | |
| #subtitle { text-align:center; color:#94a3b8; margin-bottom:1.5rem; } | |
| .generate-btn { background: linear-gradient(135deg,#7c3aed,#2563eb) !important; | |
| color:white !important; font-size:1.1rem !important; | |
| padding:0.9rem 2rem !important; border-radius:12px !important; } | |
| """ | |
| with gr.Blocks(css=CSS, title="AI Short Video Generator") as demo: | |
| gr.HTML("<h1 id='title'>π¬ AI Short Video Generator</h1>") | |
| gr.HTML("<p id='subtitle'>Upload character images β describe a scene β get a 1K Shorts-ready video</p>") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| img_input = gr.Gallery( | |
| label="π· Upload Character Images", | |
| type="pil", | |
| columns=3, | |
| height=300, | |
| interactive=True, | |
| ) | |
| prompt_input = gr.Textbox( | |
| label="π Scene / Story Prompt", | |
| placeholder="e.g. Two friends find a mysterious glowing box in the forestβ¦", | |
| lines=4, | |
| ) | |
| gen_btn = gr.Button("π Generate Short Video", elem_classes="generate-btn") | |
| with gr.Column(scale=1): | |
| log_output = gr.Textbox(label="π Generation Log", lines=20, interactive=False) | |
| video_output = gr.Video(label="π¬ Your AI Short", height=500) | |
| gr.HTML(""" | |
| <div style='text-align:center;color:#475569;font-size:0.85rem;margin-top:1rem'> | |
| β‘ Powered by Claude Vision Β· AnimateDiff Β· Stable Diffusion Β· Coqui TTS Β· FFmpeg | |
| </div> | |
| """) | |
| gen_btn.click( | |
| fn=generate_short_video, | |
| inputs=[img_input, prompt_input], | |
| outputs=[video_output, log_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, share=False) | |