|
|
import streamlit as st |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
from diffusers import StableDiffusionPipeline |
|
|
import torch |
|
|
import json |
|
|
import textwrap |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="AI Story to Movie Scene Generator", |
|
|
page_icon="π¬", |
|
|
layout="wide" |
|
|
) |
|
|
|
|
|
st.title("π¬ AI Story β Movie Scene Generator") |
|
|
st.write( |
|
|
""" |
|
|
Paste a short story, and this app will: |
|
|
1. Break it into **cinematic scenes** (title, setting, characters, mood, summary). |
|
|
2. Generate a **visual prompt** for each scene. |
|
|
3. Turn prompts into **AI images** in either: |
|
|
- π§ͺ Anime-style visuals |
|
|
- π₯ Realistic cinematic visuals |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.header("Visual Style Settings") |
|
|
style = st.sidebar.selectbox( |
|
|
"Choose visual style for images:", |
|
|
["Anime", "Cinematic Realistic"] |
|
|
) |
|
|
|
|
|
|
|
|
def build_styled_prompt(base_prompt: str, style: str) -> str: |
|
|
""" |
|
|
Take the base visual prompt from the scene and inject style instructions. |
|
|
""" |
|
|
base_prompt = base_prompt.strip() |
|
|
if style == "Anime": |
|
|
return ( |
|
|
base_prompt + |
|
|
", anime style, detailed 2D illustration, clean line art, vibrant colors, " |
|
|
"studio anime, keyframe, sharp focus, highly detailed, dramatic lighting" |
|
|
) |
|
|
else: |
|
|
return ( |
|
|
base_prompt + |
|
|
", ultra realistic, cinematic lighting, 35mm film, depth of field, 4k, " |
|
|
"high detail, dramatic shadows, film still, volumetric light, highly detailed" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_scene_model(): |
|
|
model_name = "google/flan-t5-base" |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
return tokenizer, model |
|
|
|
|
|
|
|
|
tokenizer, scene_model = load_scene_model() |
|
|
|
|
|
|
|
|
def generate_text(prompt: str, max_new_tokens: int = 256) -> str: |
|
|
""" |
|
|
Helper to generate text from Flan-T5 given an instruction-style prompt. |
|
|
""" |
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True) |
|
|
output_ids = scene_model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
num_beams=4, |
|
|
temperature=0.7, |
|
|
top_p=0.95, |
|
|
early_stopping=True, |
|
|
) |
|
|
return tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_story_into_chunks(story_text: str, max_chars_per_chunk: int = 600): |
|
|
""" |
|
|
Split the story into rough chunks based on paragraphs and length |
|
|
so each chunk can be turned into a scene by the model. |
|
|
""" |
|
|
paragraphs = [p.strip() for p in story_text.split("\n") if p.strip()] |
|
|
|
|
|
chunks = [] |
|
|
current = "" |
|
|
for p in paragraphs: |
|
|
if len(current) + len(p) + 1 <= max_chars_per_chunk: |
|
|
current += "\n" + p |
|
|
else: |
|
|
if current.strip(): |
|
|
chunks.append(current.strip()) |
|
|
current = p |
|
|
if current.strip(): |
|
|
chunks.append(current.strip()) |
|
|
|
|
|
return chunks |
|
|
|
|
|
|
|
|
def chunk_to_scene(chunk_text: str, scene_id: int): |
|
|
""" |
|
|
Convert one story chunk into a structured scene JSON using the LLM. |
|
|
""" |
|
|
prompt = f""" |
|
|
You are a movie director's assistant. |
|
|
|
|
|
Read the following part of a story and extract a SINGLE movie scene in structured JSON. |
|
|
|
|
|
Story chunk: |
|
|
\"\"\"{chunk_text}\"\"\" |
|
|
|
|
|
Return JSON with the following keys: |
|
|
- scene_id (integer) |
|
|
- title (short scene title) |
|
|
- setting (where, when) |
|
|
- characters (list of names) |
|
|
- mood (emotional tone, e.g. tense, hopeful) |
|
|
- summary (2-3 sentences) |
|
|
- visual_prompt (a single detailed description to be used for generating a cinematic image, including lighting, style, camera angle) |
|
|
|
|
|
Only output valid JSON, nothing else. |
|
|
""" |
|
|
raw = generate_text(prompt, max_new_tokens=256) |
|
|
|
|
|
|
|
|
try: |
|
|
data = json.loads(raw) |
|
|
except Exception: |
|
|
|
|
|
data = { |
|
|
"scene_id": scene_id, |
|
|
"title": f"Scene {scene_id}", |
|
|
"setting": "", |
|
|
"characters": [], |
|
|
"mood": "", |
|
|
"summary": raw.strip(), |
|
|
"visual_prompt": raw.strip() |
|
|
} |
|
|
|
|
|
|
|
|
data["scene_id"] = scene_id |
|
|
return data |
|
|
|
|
|
|
|
|
def story_to_scenes(story_text: str): |
|
|
""" |
|
|
Full pipeline: story text -> chunks -> list of scene dicts. |
|
|
""" |
|
|
chunks = split_story_into_chunks(story_text, max_chars_per_chunk=600) |
|
|
scenes = [] |
|
|
for i, chunk in enumerate(chunks, start=1): |
|
|
scene = chunk_to_scene(chunk, scene_id=i) |
|
|
scenes.append(scene) |
|
|
return scenes |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_image_model(): |
|
|
""" |
|
|
Load Stable Diffusion pipeline for image generation. |
|
|
Uses CPU on Spaces by default; will use GPU if available. |
|
|
""" |
|
|
model_id = "runwayml/stable-diffusion-v1-5" |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
dtype = torch.float16 |
|
|
else: |
|
|
dtype = torch.float32 |
|
|
|
|
|
pipe = StableDiffusionPipeline.from_pretrained( |
|
|
model_id, |
|
|
torch_dtype=dtype, |
|
|
safety_checker=None |
|
|
) |
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
pipe = pipe.to("cuda") |
|
|
else: |
|
|
pipe = pipe.to("cpu") |
|
|
|
|
|
return pipe |
|
|
|
|
|
|
|
|
def generate_scene_image(prompt: str): |
|
|
""" |
|
|
Generate a single image from a text prompt using Stable Diffusion. |
|
|
""" |
|
|
pipe = load_image_model() |
|
|
|
|
|
image = pipe( |
|
|
prompt, |
|
|
num_inference_steps=25, |
|
|
guidance_scale=7.5 |
|
|
).images[0] |
|
|
return image |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.subheader("π Paste Your Story") |
|
|
|
|
|
default_story = """\ |
|
|
Once upon a time in a neon city, Aarav wandered the alleys alone. |
|
|
He had lost track of time after the government AI marked his family as 'non-compliant'. |
|
|
|
|
|
One night, while standing on a rooftop, he noticed a masked stranger watching him. |
|
|
The stranger claimed to know the truth about the cityβs AI and its hidden rules. |
|
|
|
|
|
Aarav followed reluctantly, unaware that every step was being monitored by invisible drones. |
|
|
""" |
|
|
|
|
|
story_text = st.text_area( |
|
|
"Paste a short story (3β15 paragraphs works best):", |
|
|
value=default_story, |
|
|
height=260 |
|
|
) |
|
|
|
|
|
generate_clicked = st.button("π¬ Generate Scenes") |
|
|
|
|
|
if "scenes" not in st.session_state: |
|
|
st.session_state["scenes"] = None |
|
|
|
|
|
if generate_clicked: |
|
|
if not story_text.strip(): |
|
|
st.error("Please paste a story first.") |
|
|
else: |
|
|
with st.spinner("Breaking story into scenes..."): |
|
|
scenes = story_to_scenes(story_text) |
|
|
st.session_state["scenes"] = scenes |
|
|
st.success(f"Generated {len(scenes)} scene(s).") |
|
|
|
|
|
scenes = st.session_state.get("scenes", None) |
|
|
|
|
|
if scenes: |
|
|
st.markdown("---") |
|
|
st.subheader("π Generated Scenes & Visuals") |
|
|
|
|
|
for scene in scenes: |
|
|
scene_id = scene.get("scene_id", "?") |
|
|
title = scene.get("title", f"Scene {scene_id}") |
|
|
setting = scene.get("setting", "") |
|
|
mood = scene.get("mood", "") |
|
|
characters = scene.get("characters", []) |
|
|
summary = scene.get("summary", "") |
|
|
base_prompt = scene.get("visual_prompt", "") |
|
|
|
|
|
styled_prompt = build_styled_prompt(base_prompt, style) |
|
|
|
|
|
with st.expander(f"Scene {scene_id}: {title}", expanded=True): |
|
|
st.markdown(f"**Setting:** {setting}") |
|
|
st.markdown(f"**Mood:** {mood}") |
|
|
st.markdown(f"**Characters:** {', '.join(characters) or 'N/A'}") |
|
|
|
|
|
st.markdown("**Summary:**") |
|
|
st.write(summary) |
|
|
|
|
|
st.markdown("**Base Visual Prompt:**") |
|
|
st.code(textwrap.fill(base_prompt, width=90), language="text") |
|
|
|
|
|
st.markdown(f"**Styled Prompt for {style} Image:**") |
|
|
st.code(textwrap.fill(styled_prompt, width=90), language="text") |
|
|
|
|
|
img_btn = st.button( |
|
|
f"πΌ Generate {style} Image for Scene {scene_id}", |
|
|
key=f"img_btn_{scene_id}" |
|
|
) |
|
|
if img_btn: |
|
|
with st.spinner("Generating image... This may take some time."): |
|
|
img = generate_scene_image(styled_prompt) |
|
|
st.image(img, caption=f"Scene {scene_id} β {title} ({style})", use_column_width=True) |
|
|
|
|
|
else: |
|
|
st.info("Paste a story and click **Generate Scenes** to begin.") |
|
|
|