Spaces:
Running
Running
| """ | |
| CineStory AI β Image β Interactive Branching Story β Cinematic Narrated Video | |
| Architecture (all $0.00): | |
| Vision: Groq free API (Llama 4 Scout) β rich scene understanding | |
| Story: Together AI free tier (Llama 3.1 8B) β branching narratives | |
| Images: Together AI Flux Schnell-Free β stylised chapter keyframes | |
| TTS: Kokoro 82M on CPU β #1 ranked TTS, zero cost | |
| Composer: ffmpeg Ken Burns β audio-synced storyboard video, CPU only | |
| No video generation APIs. No GPU. Total cost per story: $0.00. | |
| """ | |
| import os | |
| import json | |
| import time | |
| import tempfile | |
| import logging | |
| import socket | |
| import gradio as gr | |
| from vision import analyze_scene, scene_to_story_prompt | |
| from story import ( | |
| generate_opening, continue_story, generate_linear_story, StoryState, | |
| ) | |
| from tts import generate_speech, VOICE_MAP | |
| from composer import ( | |
| create_cinematic_story_video, get_style_names, STYLE_PRESETS, | |
| get_image_gen_errors, | |
| ) | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("cinestory") | |
| def _load_local_env(env_file: str = ".env") -> None: | |
| if not os.path.exists(env_file): | |
| return | |
| try: | |
| with open(env_file, "r", encoding="utf-8") as f: | |
| for raw_line in f: | |
| line = raw_line.strip() | |
| if not line or line.startswith("#") or "=" not in line: | |
| continue | |
| key, value = line.split("=", 1) | |
| key = key.strip() | |
| value = value.strip().strip('"').strip("'") | |
| if key and key not in os.environ: | |
| os.environ[key] = value | |
| except Exception as e: | |
| logger.warning(f"Failed to load {env_file}: {e}") | |
| _load_local_env() | |
| WORK_DIR = tempfile.mkdtemp(prefix="cinestory_") | |
| def _find_free_port(default_port: int = 7860) -> int: | |
| try: | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: | |
| sock.bind(("0.0.0.0", default_port)) | |
| return default_port | |
| except OSError: | |
| pass | |
| try: | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: | |
| sock.bind(("0.0.0.0", 0)) | |
| return int(sock.getsockname()[1]) | |
| except OSError: | |
| return default_port | |
| # ββ Pipeline Functions ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def process_image(image_path): | |
| """Analyze uploaded image with Groq vision β rich structured JSON.""" | |
| if image_path is None: | |
| return "Please upload an image first.", "{}" | |
| try: | |
| scene = analyze_scene(image_path) | |
| summary = ( | |
| f"**Scene:** {scene.get('scene_description', 'N/A')}\n\n" | |
| f"**Mood:** {scene.get('mood', 'N/A')} | " | |
| f"**Atmosphere:** {scene.get('atmosphere', 'N/A')}\n\n" | |
| f"**Setting:** {scene.get('setting', 'N/A')} " | |
| f"({scene.get('time_of_day', '')})\n\n" | |
| f"**Narrative Potential:** {scene.get('narrative_potential', 'N/A')}\n\n" | |
| f"**Sensory Details:** {scene.get('sensory_details', 'N/A')}" | |
| ) | |
| return summary, json.dumps(scene) | |
| except Exception as e: | |
| logger.error(f"Scene analysis failed: {e}") | |
| return f"Error analyzing image: {str(e)}", "{}" | |
| def generate_story_opening(scene_json, genre, tone, theme, conflict, ending): | |
| """Generate branching story opening with 3 choices.""" | |
| try: | |
| scene = json.loads(scene_json) if scene_json else {} | |
| except json.JSONDecodeError: | |
| scene = {"scene_description": scene_json} | |
| preferences = { | |
| "genre": genre, "tone": tone, "theme": theme, | |
| "conflict": conflict, "ending": ending, | |
| } | |
| prompt = scene_to_story_prompt(scene, preferences) | |
| try: | |
| state = generate_opening(prompt) | |
| choices_text = "" | |
| if state.choices: | |
| choices_text = "\n\n---\n**What happens next?**\n" | |
| for i, c in enumerate(state.choices): | |
| choices_text += f"\n**Option {i+1}:** {c}" | |
| state_dict = { | |
| "scene_context": state.scene_context, | |
| "chapters": state.chapters, | |
| "current_text": state.current_text, | |
| "choices": state.choices, | |
| "branch_depth": state.branch_depth, | |
| "max_branches": state.max_branches, | |
| } | |
| return ( | |
| state.current_text + choices_text, | |
| json.dumps(state_dict), | |
| gr.update(visible=bool(state.choices)), | |
| ) | |
| except Exception as e: | |
| logger.error(f"Story generation failed: {e}") | |
| return f"Error: {str(e)}", "{}", gr.update(visible=False) | |
| def make_choice(choice_num, state_json): | |
| """Continue story based on user's branch choice.""" | |
| try: | |
| sd = json.loads(state_json) | |
| state = StoryState( | |
| scene_context=sd["scene_context"], | |
| chapters=sd["chapters"], | |
| current_text=sd["current_text"], | |
| choices=sd["choices"], | |
| branch_depth=sd["branch_depth"], | |
| max_branches=sd.get("max_branches", 2), | |
| ) | |
| new_state = continue_story(state, choice_num) | |
| full_story = "\n\n---\n\n".join(new_state.chapters) | |
| choices_text = "" | |
| if new_state.choices: | |
| choices_text = "\n\n---\n**What happens next?**\n" | |
| for i, c in enumerate(new_state.choices): | |
| choices_text += f"\n**Option {i+1}:** {c}" | |
| elif new_state.branch_depth >= new_state.max_branches: | |
| choices_text = "\n\n---\n*π¬ Story Complete! Generate your cinematic video below.*" | |
| new_dict = { | |
| "scene_context": new_state.scene_context, | |
| "chapters": new_state.chapters, | |
| "current_text": new_state.current_text, | |
| "choices": new_state.choices, | |
| "branch_depth": new_state.branch_depth, | |
| "max_branches": new_state.max_branches, | |
| } | |
| return ( | |
| full_story + choices_text, | |
| json.dumps(new_dict), | |
| gr.update(visible=bool(new_state.choices)), | |
| ) | |
| except Exception as e: | |
| logger.error(f"Story continuation failed: {e}") | |
| return f"Error: {str(e)}", state_json, gr.update(visible=False) | |
| def generate_audio_only(state_json, voice_name, speed): | |
| """Generate narration audio without video (quick preview).""" | |
| try: | |
| sd = json.loads(state_json) | |
| full_text = "\n\n".join(sd.get("chapters", [])) | |
| if not full_text.strip(): | |
| return None, "No story text to narrate." | |
| voice_id = VOICE_MAP.get(voice_name, "af_heart") | |
| output_path = os.path.join(WORK_DIR, "narration_preview.wav") | |
| start = time.time() | |
| generate_speech(full_text, voice=voice_id, speed=speed, output_path=output_path) | |
| elapsed = time.time() - start | |
| return output_path, f"Audio generated in {elapsed:.1f}s using Kokoro ({voice_name})" | |
| except Exception as e: | |
| logger.error(f"TTS failed: {e}") | |
| return None, f"Error: {str(e)}" | |
| def generate_cinematic_video( | |
| image_path, state_json, scene_json, voice_name, speed, style_name, | |
| progress=gr.Progress(track_tqdm=False), | |
| ): | |
| """ | |
| End-to-end: story chapters β stylised images β per-chapter audio β | |
| Ken Burns storyboard video synced to narration. | |
| Each image displays for exactly as long as its chapter is narrated. | |
| Total cost: $0.00. | |
| """ | |
| if image_path is None: | |
| return None, None, "Upload an image first." | |
| try: | |
| sd = json.loads(state_json) | |
| chapters = sd.get("chapters", []) | |
| if not chapters: | |
| return None, None, "Generate a story first." | |
| scene = json.loads(scene_json) if scene_json else {} | |
| except json.JSONDecodeError: | |
| return None, None, "Invalid story state." | |
| voice_id = VOICE_MAP.get(voice_name, "af_heart") | |
| output_path = os.path.join(WORK_DIR, "cinestory_final.mp4") | |
| try: | |
| progress(0.1, desc="Generating chapter images...") | |
| result = create_cinematic_story_video( | |
| chapters=chapters, | |
| scene_json=scene, | |
| original_image_path=image_path, | |
| style_name=style_name, | |
| voice=voice_id, | |
| speed=speed, | |
| output_path=output_path, | |
| ) | |
| durations = ", ".join( | |
| f"Ch{d['chapter']}: {d['duration_s']}s" for d in result.chapter_durations | |
| ) | |
| # Check if any image gen errors occurred (partial fallbacks) | |
| img_errors = get_image_gen_errors() | |
| error_note = "" | |
| if img_errors: | |
| error_note = ( | |
| f"\n\nβ οΈ **Image generation warnings** ({len(img_errors)}):\n" | |
| + "\n".join(f"- {e[:120]}" for e in img_errors[-5:]) | |
| ) | |
| status = ( | |
| f"**Video created in {result.generation_time}s** | " | |
| f"Duration: {result.total_duration}s | " | |
| f"Chapters: {result.num_chapters}\n\n" | |
| f"Timing: {durations}\n\n" | |
| f"Cost: **${result.cost_usd:.2f}**" | |
| f"{error_note}" | |
| ) | |
| # Also extract audio for the audio player | |
| audio_path = os.path.join(WORK_DIR, "narration_combined.wav") | |
| # Combine chapter audios if they exist in temp dir | |
| import glob | |
| ch_audios = sorted(glob.glob(os.path.join( | |
| os.path.dirname(output_path), "..", "cinestory_vid_*", "ch_audio_*.wav" | |
| ))) | |
| if not ch_audios: | |
| # Generate a single combined audio as fallback | |
| full_text = "\n\n".join(chapters) | |
| generate_speech(full_text, voice=voice_id, speed=speed, output_path=audio_path) | |
| else: | |
| from tts import concatenate_audio | |
| concatenate_audio(ch_audios, audio_path, pause_seconds=0.5) | |
| return result.video_path, audio_path, status | |
| except Exception as e: | |
| logger.error(f"Cinematic video failed: {e}", exc_info=True) | |
| img_errors = get_image_gen_errors() | |
| detail = "" | |
| if img_errors: | |
| detail = "\n\n**Image generation errors:**\n" + "\n".join( | |
| f"- {err[:150]}" for err in img_errors[-5:] | |
| ) | |
| return None, None, f"Error: {str(e)}{detail}" | |
| # ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Fraunces:opsz,wght,SOFT@9..144,500,50&display=swap'); | |
| /* ββ Force light mode via CSS variables βββββββββββββββββββ */ | |
| :root, .dark { | |
| --block-background-fill: white !important; | |
| --panel-background-fill: white !important; | |
| --body-background-fill: #f8faf6 !important; | |
| --background-fill-primary: white !important; | |
| --background-fill-secondary: #f8faf6 !important; | |
| --border-color-primary: #d1d5db !important; | |
| --block-border-color: #e5e7eb !important; | |
| --input-background-fill: white !important; | |
| --body-text-color: #1f2937 !important; | |
| --block-label-text-color: #1e293b !important; | |
| --block-title-text-color: #1e293b !important; | |
| } | |
| .gradio-container { | |
| background: | |
| radial-gradient(1200px 600px at 0% -10%, #d9efe9 0%, transparent 60%), | |
| radial-gradient(1000px 500px at 100% 0%, #ffe7cc 0%, transparent 55%), | |
| linear-gradient(140deg, #f5f7f2, #fefcf8) !important; | |
| font-family: "Space Grotesk", ui-sans-serif, system-ui, sans-serif; | |
| color: #1f2937; | |
| } | |
| /* ββ All text dark ββββββββββββββββββββββββββββββββββββββββ */ | |
| .gradio-container .prose, .gradio-container .prose *, | |
| .gradio-container .markdown-text, .gradio-container .markdown-text *, | |
| .gradio-container label, .gradio-container label span, | |
| .gradio-container p, .gradio-container h1, | |
| .gradio-container h2, .gradio-container h3 { | |
| color: #1f2937 !important; | |
| } | |
| /* ββ Media player controls: leave untouched βββββββββββββββ */ | |
| .gradio-container audio, .gradio-container audio *, | |
| .gradio-container video, .gradio-container video *, | |
| .gradio-container button svg, .gradio-container button path { | |
| color: unset !important; | |
| fill: unset !important; | |
| } | |
| /* ββ Accordion header βββββββββββββββββββββββββββββββββββββ */ | |
| .gradio-container .label-wrap { | |
| background: linear-gradient(135deg, #eef7f4, #fdf5ec) !important; | |
| color: #1e293b !important; | |
| } | |
| .gradio-container .label-wrap * { color: #1e293b !important; } | |
| /* ββ Dropdown labels: no colored background βββββββββββββββ */ | |
| .gradio-container label > span { background: transparent !important; } | |
| /* ββ Inline code: light teal instead of dark block ββββββββ */ | |
| .gradio-container code, | |
| .gradio-container .prose code, | |
| .gradio-container .markdown-text code { | |
| background: rgba(15, 118, 110, 0.08) !important; | |
| color: #0f766e !important; | |
| padding: 0.15em 0.4em; | |
| border-radius: 4px; | |
| } | |
| /* ββ Label badges (Genre, Tone etc): no colored bg ββββββββ */ | |
| .gradio-container .block label span, | |
| .gradio-container span[data-testid], | |
| .gradio-container .gr-input-label { | |
| background: transparent !important; | |
| background-color: transparent !important; | |
| color: #1e293b !important; | |
| } | |
| /* ββ Hero ββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .hero { | |
| border: 1px solid rgba(20,30,24,0.10); | |
| background: linear-gradient(120deg, #ffffff, #f8fffc) !important; | |
| border-radius: 18px; padding: 1.1rem 1.2rem; | |
| box-shadow: 0 10px 28px rgba(16,24,40,0.06); | |
| margin-bottom: 0.8rem; | |
| } | |
| .hero h1 { | |
| margin: 0; color: #1e293b !important; | |
| font-family: "Fraunces", Georgia, serif; | |
| font-size: clamp(1.8rem, 3.2vw, 2.45rem); | |
| } | |
| .hero p { margin: 0.55rem 0 0; color: #5b6472 !important; font-size: 0.99rem; } | |
| /* ββ Step chips ββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .flow-guide { | |
| display: grid; | |
| grid-template-columns: repeat(4, minmax(130px, 1fr)); | |
| gap: 0.55rem; margin: 0.5rem 0 1rem; | |
| } | |
| .guide-chip { | |
| border: 1px solid rgba(15,118,110,0.18); | |
| background: linear-gradient(140deg, #ffffff, #f2fbf8) !important; | |
| border-radius: 12px; padding: 0.6rem 0.72rem; | |
| font-size: 0.88rem; color: #1e293b !important; | |
| box-shadow: 0 4px 14px rgba(15,118,110,0.06); | |
| } | |
| .guide-chip * { color: #1e293b !important; } | |
| .guide-chip b { color: #0f766e !important; font-weight: 700; } | |
| /* ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .panel-title { color: #1e293b !important; font-weight: 700; font-size: 1rem; } | |
| .helper-note { color: #5b6472 !important; font-size: 0.88rem; margin-bottom: 0.4rem; } | |
| .cost-tag { | |
| font-family: ui-monospace, monospace; font-size: 0.86em; | |
| color: #047857 !important; | |
| border: 1px solid rgba(4,120,87,0.2); | |
| background: rgba(236,253,245,0.7) !important; | |
| border-radius: 10px; padding: 0.6rem 0.7rem; | |
| } | |
| button.primary, button.primary * { color: white !important; } | |
| @media (max-width: 960px) { | |
| .flow-guide { grid-template-columns: repeat(2, minmax(120px, 1fr)); } | |
| } | |
| """ | |
| CUSTOM_THEME = gr.themes.Soft(primary_hue="emerald", secondary_hue="orange") | |
| def build_app(): | |
| # Gradio 6 moved theme/css from Blocks() to launch(). | |
| # We try Blocks() first (works in Gradio 5), fall back to bare Blocks. | |
| try: | |
| app_context = gr.Blocks( | |
| title="CineStory AI", | |
| theme=CUSTOM_THEME, | |
| css=CUSTOM_CSS, | |
| ) | |
| except TypeError: | |
| # Gradio 6: theme/css not accepted in constructor | |
| app_context = gr.Blocks(title="CineStory AI") | |
| with app_context as app: | |
| gr.HTML( | |
| "<div class='hero'>" | |
| "<h1>CineStory AI</h1>" | |
| "<p>Turn one image into a short interactive story, then export a narrated cinematic video.</p>" | |
| "</div>" | |
| "<div class='flow-guide'>" | |
| "<div class='guide-chip'><b>Step 1</b><br>Upload and analyze your image</div>" | |
| "<div class='guide-chip'><b>Step 2</b><br>Generate story and choose branches</div>" | |
| "<div class='guide-chip'><b>Step 3</b><br>Preview narration voice</div>" | |
| "<div class='guide-chip'><b>Step 4</b><br>Create the final cinematic video</div>" | |
| "</div>" | |
| ) | |
| scene_json = gr.State("{}") | |
| story_state = gr.State("{}") | |
| with gr.Row(): | |
| # ββ Left: inputs ββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=2): | |
| gr.HTML( | |
| "<div class='panel-title'>1) Image and story controls</div>" | |
| "<div class='helper-note'>Start by uploading one image, then tune story direction and voice.</div>" | |
| ) | |
| image_input = gr.Image(type="filepath", label="Choose an image") | |
| gr.Markdown("### Story Preferences") | |
| with gr.Row(): | |
| genre = gr.Dropdown( | |
| ["Fantasy", "Science Fiction", "Mystery", | |
| "Romance", "Horror", "Adventure"], | |
| value="Fantasy", label="Genre", | |
| ) | |
| tone = gr.Dropdown( | |
| ["Serious", "Light-hearted", "Humorous", | |
| "Dark", "Whimsical"], | |
| value="Serious", label="Tone", | |
| ) | |
| with gr.Row(): | |
| theme = gr.Dropdown( | |
| ["Self-discovery", "Redemption", "Love", | |
| "Justice", "Survival", "Freedom"], | |
| value="Self-discovery", label="Theme", | |
| ) | |
| conflict = gr.Dropdown( | |
| ["Internal struggle", "Person vs. Society", | |
| "Person vs. Nature", "Person vs. Person"], | |
| value="Internal struggle", label="Conflict", | |
| ) | |
| ending = gr.Dropdown( | |
| ["Happy", "Bittersweet", "Open-ended", "Tragic", "Twist"], | |
| value="Open-ended", label="Ending", | |
| ) | |
| analyze_btn = gr.Button( | |
| "Step 1: Analyze Image", variant="primary", size="lg", | |
| ) | |
| gr.Markdown("### Visual Style and Voice") | |
| with gr.Row(): | |
| style_select = gr.Dropdown( | |
| get_style_names(), | |
| value="Watercolor Storybook", | |
| label="Art Style", | |
| ) | |
| voice_select = gr.Dropdown( | |
| list(VOICE_MAP.keys()), | |
| value="Narrator (Female, Warm)", | |
| label="Narrator Voice", | |
| ) | |
| speed_slider = gr.Slider( | |
| 0.5, 1.5, value=1.0, step=0.1, label="Narration Speed", | |
| ) | |
| # ββ Right: outputs ββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=3): | |
| with gr.Accordion("Step 1 Output: Scene Analysis", open=True): | |
| scene_display = gr.Markdown( | |
| "*Click **Step 1: Analyze Image** after uploading your image.*" | |
| ) | |
| with gr.Accordion("Step 2: Story", open=True): | |
| story_display = gr.Markdown( | |
| "*After analysis, click **Step 2: Generate Story** and pick your branch options.*" | |
| ) | |
| generate_story_btn = gr.Button( | |
| "Step 2: Generate Story", variant="primary", size="lg", | |
| ) | |
| with gr.Group(visible=False) as choice_group: | |
| gr.Markdown("**Choose what happens next (up to 2 rounds):**") | |
| with gr.Row(): | |
| choice_1_btn = gr.Button("Option 1", variant="secondary") | |
| choice_2_btn = gr.Button("Option 2", variant="secondary") | |
| choice_3_btn = gr.Button("Option 3", variant="secondary") | |
| with gr.Accordion("Optional Step 3: Audio Preview", open=False): | |
| audio_btn = gr.Button( | |
| "Step 3: Preview Narration Audio", variant="secondary", | |
| ) | |
| audio_output = gr.Audio( | |
| label="Narration Preview", type="filepath", | |
| ) | |
| audio_status = gr.Markdown("") | |
| with gr.Accordion("Step 4: Cinematic Video", open=True): | |
| gr.Markdown( | |
| "*Creates stylized chapter images, narration, and a stitched video. " | |
| "Run this after you are happy with the story choices.*" | |
| ) | |
| video_btn = gr.Button( | |
| "Step 4: Create Cinematic Story Video", | |
| variant="primary", size="lg", | |
| ) | |
| video_output = gr.Video(label="Story Video") | |
| video_audio = gr.Audio( | |
| label="Full Narration", type="filepath", visible=True, | |
| ) | |
| video_status = gr.Markdown("", elem_classes="cost-tag") | |
| # ββ Wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| analyze_btn.click( | |
| fn=process_image, | |
| inputs=[image_input], | |
| outputs=[scene_display, scene_json], | |
| ) | |
| generate_story_btn.click( | |
| fn=generate_story_opening, | |
| inputs=[scene_json, genre, tone, theme, conflict, ending], | |
| outputs=[story_display, story_state, choice_group], | |
| ) | |
| choice_1_btn.click( | |
| fn=lambda s: make_choice(0, s), | |
| inputs=[story_state], | |
| outputs=[story_display, story_state, choice_group], | |
| ) | |
| choice_2_btn.click( | |
| fn=lambda s: make_choice(1, s), | |
| inputs=[story_state], | |
| outputs=[story_display, story_state, choice_group], | |
| ) | |
| choice_3_btn.click( | |
| fn=lambda s: make_choice(2, s), | |
| inputs=[story_state], | |
| outputs=[story_display, story_state, choice_group], | |
| ) | |
| audio_btn.click( | |
| fn=generate_audio_only, | |
| inputs=[story_state, voice_select, speed_slider], | |
| outputs=[audio_output, audio_status], | |
| ) | |
| video_btn.click( | |
| fn=generate_cinematic_video, | |
| inputs=[ | |
| image_input, story_state, scene_json, | |
| voice_select, speed_slider, style_select, | |
| ], | |
| outputs=[video_output, video_audio, video_status], | |
| ) | |
| return app | |
| # ββ Entry Point βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == "__main__": | |
| app = build_app() | |
| port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) | |
| server_port = _find_free_port(default_port=port) | |
| logger.info(f"Launching on port {server_port}") | |
| launch_kwargs = dict( | |
| share=False, | |
| server_port=server_port, | |
| ) | |
| # Gradio 6 accepts theme/css in launch() | |
| try: | |
| app.launch( | |
| pwa=True, | |
| favicon_path=( | |
| "./assets/favicon.png" | |
| if os.path.exists("./assets/favicon.png") else None | |
| ), | |
| theme=CUSTOM_THEME, | |
| css=CUSTOM_CSS, | |
| **launch_kwargs, | |
| ) | |
| except TypeError: | |
| # Gradio 5: theme/css already set in Blocks(), launch doesn't accept them | |
| app.launch( | |
| pwa=True, | |
| favicon_path=( | |
| "./assets/favicon.png" | |
| if os.path.exists("./assets/favicon.png") else None | |
| ), | |
| **launch_kwargs, | |
| ) |