CineStoryAI / app.py
adi-123's picture
Update app.py
751d179 verified
"""
CineStory AI β€” Image β†’ Interactive Branching Story β†’ Cinematic Narrated Video
Architecture (all $0.00):
Vision: Groq free API (Llama 4 Scout) β€” rich scene understanding
Story: Together AI free tier (Llama 3.1 8B) β€” branching narratives
Images: Together AI Flux Schnell-Free β€” stylised chapter keyframes
TTS: Kokoro 82M on CPU β€” #1 ranked TTS, zero cost
Composer: ffmpeg Ken Burns β€” audio-synced storyboard video, CPU only
No video generation APIs. No GPU. Total cost per story: $0.00.
"""
import os
import json
import time
import tempfile
import logging
import socket
import gradio as gr
from vision import analyze_scene, scene_to_story_prompt
from story import (
generate_opening, continue_story, generate_linear_story, StoryState,
)
from tts import generate_speech, VOICE_MAP
from composer import (
create_cinematic_story_video, get_style_names, STYLE_PRESETS,
get_image_gen_errors,
)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("cinestory")
def _load_local_env(env_file: str = ".env") -> None:
if not os.path.exists(env_file):
return
try:
with open(env_file, "r", encoding="utf-8") as f:
for raw_line in f:
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip().strip('"').strip("'")
if key and key not in os.environ:
os.environ[key] = value
except Exception as e:
logger.warning(f"Failed to load {env_file}: {e}")
_load_local_env()
WORK_DIR = tempfile.mkdtemp(prefix="cinestory_")
def _find_free_port(default_port: int = 7860) -> int:
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind(("0.0.0.0", default_port))
return default_port
except OSError:
pass
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind(("0.0.0.0", 0))
return int(sock.getsockname()[1])
except OSError:
return default_port
# ── Pipeline Functions ────────────────────────────────────────────────────────
def process_image(image_path):
"""Analyze uploaded image with Groq vision β†’ rich structured JSON."""
if image_path is None:
return "Please upload an image first.", "{}"
try:
scene = analyze_scene(image_path)
summary = (
f"**Scene:** {scene.get('scene_description', 'N/A')}\n\n"
f"**Mood:** {scene.get('mood', 'N/A')} | "
f"**Atmosphere:** {scene.get('atmosphere', 'N/A')}\n\n"
f"**Setting:** {scene.get('setting', 'N/A')} "
f"({scene.get('time_of_day', '')})\n\n"
f"**Narrative Potential:** {scene.get('narrative_potential', 'N/A')}\n\n"
f"**Sensory Details:** {scene.get('sensory_details', 'N/A')}"
)
return summary, json.dumps(scene)
except Exception as e:
logger.error(f"Scene analysis failed: {e}")
return f"Error analyzing image: {str(e)}", "{}"
def generate_story_opening(scene_json, genre, tone, theme, conflict, ending):
"""Generate branching story opening with 3 choices."""
try:
scene = json.loads(scene_json) if scene_json else {}
except json.JSONDecodeError:
scene = {"scene_description": scene_json}
preferences = {
"genre": genre, "tone": tone, "theme": theme,
"conflict": conflict, "ending": ending,
}
prompt = scene_to_story_prompt(scene, preferences)
try:
state = generate_opening(prompt)
choices_text = ""
if state.choices:
choices_text = "\n\n---\n**What happens next?**\n"
for i, c in enumerate(state.choices):
choices_text += f"\n**Option {i+1}:** {c}"
state_dict = {
"scene_context": state.scene_context,
"chapters": state.chapters,
"current_text": state.current_text,
"choices": state.choices,
"branch_depth": state.branch_depth,
"max_branches": state.max_branches,
}
return (
state.current_text + choices_text,
json.dumps(state_dict),
gr.update(visible=bool(state.choices)),
)
except Exception as e:
logger.error(f"Story generation failed: {e}")
return f"Error: {str(e)}", "{}", gr.update(visible=False)
def make_choice(choice_num, state_json):
"""Continue story based on user's branch choice."""
try:
sd = json.loads(state_json)
state = StoryState(
scene_context=sd["scene_context"],
chapters=sd["chapters"],
current_text=sd["current_text"],
choices=sd["choices"],
branch_depth=sd["branch_depth"],
max_branches=sd.get("max_branches", 2),
)
new_state = continue_story(state, choice_num)
full_story = "\n\n---\n\n".join(new_state.chapters)
choices_text = ""
if new_state.choices:
choices_text = "\n\n---\n**What happens next?**\n"
for i, c in enumerate(new_state.choices):
choices_text += f"\n**Option {i+1}:** {c}"
elif new_state.branch_depth >= new_state.max_branches:
choices_text = "\n\n---\n*🎬 Story Complete! Generate your cinematic video below.*"
new_dict = {
"scene_context": new_state.scene_context,
"chapters": new_state.chapters,
"current_text": new_state.current_text,
"choices": new_state.choices,
"branch_depth": new_state.branch_depth,
"max_branches": new_state.max_branches,
}
return (
full_story + choices_text,
json.dumps(new_dict),
gr.update(visible=bool(new_state.choices)),
)
except Exception as e:
logger.error(f"Story continuation failed: {e}")
return f"Error: {str(e)}", state_json, gr.update(visible=False)
def generate_audio_only(state_json, voice_name, speed):
"""Generate narration audio without video (quick preview)."""
try:
sd = json.loads(state_json)
full_text = "\n\n".join(sd.get("chapters", []))
if not full_text.strip():
return None, "No story text to narrate."
voice_id = VOICE_MAP.get(voice_name, "af_heart")
output_path = os.path.join(WORK_DIR, "narration_preview.wav")
start = time.time()
generate_speech(full_text, voice=voice_id, speed=speed, output_path=output_path)
elapsed = time.time() - start
return output_path, f"Audio generated in {elapsed:.1f}s using Kokoro ({voice_name})"
except Exception as e:
logger.error(f"TTS failed: {e}")
return None, f"Error: {str(e)}"
def generate_cinematic_video(
image_path, state_json, scene_json, voice_name, speed, style_name,
progress=gr.Progress(track_tqdm=False),
):
"""
End-to-end: story chapters β†’ stylised images β†’ per-chapter audio β†’
Ken Burns storyboard video synced to narration.
Each image displays for exactly as long as its chapter is narrated.
Total cost: $0.00.
"""
if image_path is None:
return None, None, "Upload an image first."
try:
sd = json.loads(state_json)
chapters = sd.get("chapters", [])
if not chapters:
return None, None, "Generate a story first."
scene = json.loads(scene_json) if scene_json else {}
except json.JSONDecodeError:
return None, None, "Invalid story state."
voice_id = VOICE_MAP.get(voice_name, "af_heart")
output_path = os.path.join(WORK_DIR, "cinestory_final.mp4")
try:
progress(0.1, desc="Generating chapter images...")
result = create_cinematic_story_video(
chapters=chapters,
scene_json=scene,
original_image_path=image_path,
style_name=style_name,
voice=voice_id,
speed=speed,
output_path=output_path,
)
durations = ", ".join(
f"Ch{d['chapter']}: {d['duration_s']}s" for d in result.chapter_durations
)
# Check if any image gen errors occurred (partial fallbacks)
img_errors = get_image_gen_errors()
error_note = ""
if img_errors:
error_note = (
f"\n\n⚠️ **Image generation warnings** ({len(img_errors)}):\n"
+ "\n".join(f"- {e[:120]}" for e in img_errors[-5:])
)
status = (
f"**Video created in {result.generation_time}s** | "
f"Duration: {result.total_duration}s | "
f"Chapters: {result.num_chapters}\n\n"
f"Timing: {durations}\n\n"
f"Cost: **${result.cost_usd:.2f}**"
f"{error_note}"
)
# Also extract audio for the audio player
audio_path = os.path.join(WORK_DIR, "narration_combined.wav")
# Combine chapter audios if they exist in temp dir
import glob
ch_audios = sorted(glob.glob(os.path.join(
os.path.dirname(output_path), "..", "cinestory_vid_*", "ch_audio_*.wav"
)))
if not ch_audios:
# Generate a single combined audio as fallback
full_text = "\n\n".join(chapters)
generate_speech(full_text, voice=voice_id, speed=speed, output_path=audio_path)
else:
from tts import concatenate_audio
concatenate_audio(ch_audios, audio_path, pause_seconds=0.5)
return result.video_path, audio_path, status
except Exception as e:
logger.error(f"Cinematic video failed: {e}", exc_info=True)
img_errors = get_image_gen_errors()
detail = ""
if img_errors:
detail = "\n\n**Image generation errors:**\n" + "\n".join(
f"- {err[:150]}" for err in img_errors[-5:]
)
return None, None, f"Error: {str(e)}{detail}"
# ── Gradio UI ─────────────────────────────────────────────────────────────────
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&family=Fraunces:opsz,wght,SOFT@9..144,500,50&display=swap');
/* ── Force light mode via CSS variables ─────────────────── */
:root, .dark {
--block-background-fill: white !important;
--panel-background-fill: white !important;
--body-background-fill: #f8faf6 !important;
--background-fill-primary: white !important;
--background-fill-secondary: #f8faf6 !important;
--border-color-primary: #d1d5db !important;
--block-border-color: #e5e7eb !important;
--input-background-fill: white !important;
--body-text-color: #1f2937 !important;
--block-label-text-color: #1e293b !important;
--block-title-text-color: #1e293b !important;
}
.gradio-container {
background:
radial-gradient(1200px 600px at 0% -10%, #d9efe9 0%, transparent 60%),
radial-gradient(1000px 500px at 100% 0%, #ffe7cc 0%, transparent 55%),
linear-gradient(140deg, #f5f7f2, #fefcf8) !important;
font-family: "Space Grotesk", ui-sans-serif, system-ui, sans-serif;
color: #1f2937;
}
/* ── All text dark ──────────────────────────────────────── */
.gradio-container .prose, .gradio-container .prose *,
.gradio-container .markdown-text, .gradio-container .markdown-text *,
.gradio-container label, .gradio-container label span,
.gradio-container p, .gradio-container h1,
.gradio-container h2, .gradio-container h3 {
color: #1f2937 !important;
}
/* ── Media player controls: leave untouched ─────────────── */
.gradio-container audio, .gradio-container audio *,
.gradio-container video, .gradio-container video *,
.gradio-container button svg, .gradio-container button path {
color: unset !important;
fill: unset !important;
}
/* ── Accordion header ───────────────────────────────────── */
.gradio-container .label-wrap {
background: linear-gradient(135deg, #eef7f4, #fdf5ec) !important;
color: #1e293b !important;
}
.gradio-container .label-wrap * { color: #1e293b !important; }
/* ── Dropdown labels: no colored background ─────────────── */
.gradio-container label > span { background: transparent !important; }
/* ── Inline code: light teal instead of dark block ──────── */
.gradio-container code,
.gradio-container .prose code,
.gradio-container .markdown-text code {
background: rgba(15, 118, 110, 0.08) !important;
color: #0f766e !important;
padding: 0.15em 0.4em;
border-radius: 4px;
}
/* ── Label badges (Genre, Tone etc): no colored bg ──────── */
.gradio-container .block label span,
.gradio-container span[data-testid],
.gradio-container .gr-input-label {
background: transparent !important;
background-color: transparent !important;
color: #1e293b !important;
}
/* ── Hero ────────────────────────────────────────────────── */
.hero {
border: 1px solid rgba(20,30,24,0.10);
background: linear-gradient(120deg, #ffffff, #f8fffc) !important;
border-radius: 18px; padding: 1.1rem 1.2rem;
box-shadow: 0 10px 28px rgba(16,24,40,0.06);
margin-bottom: 0.8rem;
}
.hero h1 {
margin: 0; color: #1e293b !important;
font-family: "Fraunces", Georgia, serif;
font-size: clamp(1.8rem, 3.2vw, 2.45rem);
}
.hero p { margin: 0.55rem 0 0; color: #5b6472 !important; font-size: 0.99rem; }
/* ── Step chips ──────────────────────────────────────────── */
.flow-guide {
display: grid;
grid-template-columns: repeat(4, minmax(130px, 1fr));
gap: 0.55rem; margin: 0.5rem 0 1rem;
}
.guide-chip {
border: 1px solid rgba(15,118,110,0.18);
background: linear-gradient(140deg, #ffffff, #f2fbf8) !important;
border-radius: 12px; padding: 0.6rem 0.72rem;
font-size: 0.88rem; color: #1e293b !important;
box-shadow: 0 4px 14px rgba(15,118,110,0.06);
}
.guide-chip * { color: #1e293b !important; }
.guide-chip b { color: #0f766e !important; font-weight: 700; }
/* ── Helpers ─────────────────────────────────────────────── */
.panel-title { color: #1e293b !important; font-weight: 700; font-size: 1rem; }
.helper-note { color: #5b6472 !important; font-size: 0.88rem; margin-bottom: 0.4rem; }
.cost-tag {
font-family: ui-monospace, monospace; font-size: 0.86em;
color: #047857 !important;
border: 1px solid rgba(4,120,87,0.2);
background: rgba(236,253,245,0.7) !important;
border-radius: 10px; padding: 0.6rem 0.7rem;
}
button.primary, button.primary * { color: white !important; }
@media (max-width: 960px) {
.flow-guide { grid-template-columns: repeat(2, minmax(120px, 1fr)); }
}
"""
CUSTOM_THEME = gr.themes.Soft(primary_hue="emerald", secondary_hue="orange")
def build_app():
# Gradio 6 moved theme/css from Blocks() to launch().
# We try Blocks() first (works in Gradio 5), fall back to bare Blocks.
try:
app_context = gr.Blocks(
title="CineStory AI",
theme=CUSTOM_THEME,
css=CUSTOM_CSS,
)
except TypeError:
# Gradio 6: theme/css not accepted in constructor
app_context = gr.Blocks(title="CineStory AI")
with app_context as app:
gr.HTML(
"<div class='hero'>"
"<h1>CineStory AI</h1>"
"<p>Turn one image into a short interactive story, then export a narrated cinematic video.</p>"
"</div>"
"<div class='flow-guide'>"
"<div class='guide-chip'><b>Step 1</b><br>Upload and analyze your image</div>"
"<div class='guide-chip'><b>Step 2</b><br>Generate story and choose branches</div>"
"<div class='guide-chip'><b>Step 3</b><br>Preview narration voice</div>"
"<div class='guide-chip'><b>Step 4</b><br>Create the final cinematic video</div>"
"</div>"
)
scene_json = gr.State("{}")
story_state = gr.State("{}")
with gr.Row():
# ── Left: inputs ──────────────────────────────────────────────
with gr.Column(scale=2):
gr.HTML(
"<div class='panel-title'>1) Image and story controls</div>"
"<div class='helper-note'>Start by uploading one image, then tune story direction and voice.</div>"
)
image_input = gr.Image(type="filepath", label="Choose an image")
gr.Markdown("### Story Preferences")
with gr.Row():
genre = gr.Dropdown(
["Fantasy", "Science Fiction", "Mystery",
"Romance", "Horror", "Adventure"],
value="Fantasy", label="Genre",
)
tone = gr.Dropdown(
["Serious", "Light-hearted", "Humorous",
"Dark", "Whimsical"],
value="Serious", label="Tone",
)
with gr.Row():
theme = gr.Dropdown(
["Self-discovery", "Redemption", "Love",
"Justice", "Survival", "Freedom"],
value="Self-discovery", label="Theme",
)
conflict = gr.Dropdown(
["Internal struggle", "Person vs. Society",
"Person vs. Nature", "Person vs. Person"],
value="Internal struggle", label="Conflict",
)
ending = gr.Dropdown(
["Happy", "Bittersweet", "Open-ended", "Tragic", "Twist"],
value="Open-ended", label="Ending",
)
analyze_btn = gr.Button(
"Step 1: Analyze Image", variant="primary", size="lg",
)
gr.Markdown("### Visual Style and Voice")
with gr.Row():
style_select = gr.Dropdown(
get_style_names(),
value="Watercolor Storybook",
label="Art Style",
)
voice_select = gr.Dropdown(
list(VOICE_MAP.keys()),
value="Narrator (Female, Warm)",
label="Narrator Voice",
)
speed_slider = gr.Slider(
0.5, 1.5, value=1.0, step=0.1, label="Narration Speed",
)
# ── Right: outputs ────────────────────────────────────────────
with gr.Column(scale=3):
with gr.Accordion("Step 1 Output: Scene Analysis", open=True):
scene_display = gr.Markdown(
"*Click **Step 1: Analyze Image** after uploading your image.*"
)
with gr.Accordion("Step 2: Story", open=True):
story_display = gr.Markdown(
"*After analysis, click **Step 2: Generate Story** and pick your branch options.*"
)
generate_story_btn = gr.Button(
"Step 2: Generate Story", variant="primary", size="lg",
)
with gr.Group(visible=False) as choice_group:
gr.Markdown("**Choose what happens next (up to 2 rounds):**")
with gr.Row():
choice_1_btn = gr.Button("Option 1", variant="secondary")
choice_2_btn = gr.Button("Option 2", variant="secondary")
choice_3_btn = gr.Button("Option 3", variant="secondary")
with gr.Accordion("Optional Step 3: Audio Preview", open=False):
audio_btn = gr.Button(
"Step 3: Preview Narration Audio", variant="secondary",
)
audio_output = gr.Audio(
label="Narration Preview", type="filepath",
)
audio_status = gr.Markdown("")
with gr.Accordion("Step 4: Cinematic Video", open=True):
gr.Markdown(
"*Creates stylized chapter images, narration, and a stitched video. "
"Run this after you are happy with the story choices.*"
)
video_btn = gr.Button(
"Step 4: Create Cinematic Story Video",
variant="primary", size="lg",
)
video_output = gr.Video(label="Story Video")
video_audio = gr.Audio(
label="Full Narration", type="filepath", visible=True,
)
video_status = gr.Markdown("", elem_classes="cost-tag")
# ── Wiring ────────────────────────────────────────────────────────
analyze_btn.click(
fn=process_image,
inputs=[image_input],
outputs=[scene_display, scene_json],
)
generate_story_btn.click(
fn=generate_story_opening,
inputs=[scene_json, genre, tone, theme, conflict, ending],
outputs=[story_display, story_state, choice_group],
)
choice_1_btn.click(
fn=lambda s: make_choice(0, s),
inputs=[story_state],
outputs=[story_display, story_state, choice_group],
)
choice_2_btn.click(
fn=lambda s: make_choice(1, s),
inputs=[story_state],
outputs=[story_display, story_state, choice_group],
)
choice_3_btn.click(
fn=lambda s: make_choice(2, s),
inputs=[story_state],
outputs=[story_display, story_state, choice_group],
)
audio_btn.click(
fn=generate_audio_only,
inputs=[story_state, voice_select, speed_slider],
outputs=[audio_output, audio_status],
)
video_btn.click(
fn=generate_cinematic_video,
inputs=[
image_input, story_state, scene_json,
voice_select, speed_slider, style_select,
],
outputs=[video_output, video_audio, video_status],
)
return app
# ── Entry Point ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
app = build_app()
port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
server_port = _find_free_port(default_port=port)
logger.info(f"Launching on port {server_port}")
launch_kwargs = dict(
share=False,
server_port=server_port,
)
# Gradio 6 accepts theme/css in launch()
try:
app.launch(
pwa=True,
favicon_path=(
"./assets/favicon.png"
if os.path.exists("./assets/favicon.png") else None
),
theme=CUSTOM_THEME,
css=CUSTOM_CSS,
**launch_kwargs,
)
except TypeError:
# Gradio 5: theme/css already set in Blocks(), launch doesn't accept them
app.launch(
pwa=True,
favicon_path=(
"./assets/favicon.png"
if os.path.exists("./assets/favicon.png") else None
),
**launch_kwargs,
)