Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from diffusers import StableDiffusionPipeline | |
| import torch | |
| import json | |
| import textwrap | |
| # ========================= | |
| # 1. PAGE CONFIG | |
| # ========================= | |
| st.set_page_config( | |
| page_title="AI Story to Movie Scene Generator", | |
| page_icon="🎬", | |
| layout="wide" | |
| ) | |
| st.title("🎬 AI Story → Movie Scene Generator") | |
| st.write( | |
| """ | |
| Paste a short story, and this app will: | |
| 1. Break it into **cinematic scenes** (title, setting, characters, mood, summary). | |
| 2. Generate a **visual prompt** for each scene. | |
| 3. Turn prompts into **AI images** in either: | |
| - 🧪 Anime-style visuals | |
| - 🎥 Realistic cinematic visuals | |
| """ | |
| ) | |
| # ========================= | |
| # 2. SIDEBAR: VISUAL STYLE | |
| # ========================= | |
| st.sidebar.header("Visual Style Settings") | |
| style = st.sidebar.selectbox( | |
| "Choose visual style for images:", | |
| ["Anime", "Cinematic Realistic"] | |
| ) | |
| def build_styled_prompt(base_prompt: str, style: str) -> str: | |
| """ | |
| Take the base visual prompt from the scene and inject style instructions. | |
| """ | |
| base_prompt = base_prompt.strip() | |
| if style == "Anime": | |
| return ( | |
| base_prompt + | |
| ", anime style, detailed 2D illustration, clean line art, vibrant colors, " | |
| "studio anime, keyframe, sharp focus, highly detailed, dramatic lighting" | |
| ) | |
| else: # Cinematic Realistic | |
| return ( | |
| base_prompt + | |
| ", ultra realistic, cinematic lighting, 35mm film, depth of field, 4k, " | |
| "high detail, dramatic shadows, film still, volumetric light, highly detailed" | |
| ) | |
| # ========================= | |
| # 3. LOAD LLM (FLAN-T5) - CACHED | |
| # ========================= | |
| def load_scene_model(): | |
| model_name = "google/flan-t5-base" # good starting point; can upgrade to -large later | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| return tokenizer, model | |
| tokenizer, scene_model = load_scene_model() | |
| def generate_text(prompt: str, max_new_tokens: int = 256) -> str: | |
| """ | |
| Helper to generate text from Flan-T5 given an instruction-style prompt. | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True) | |
| output_ids = scene_model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| num_beams=4, | |
| temperature=0.7, | |
| top_p=0.95, | |
| early_stopping=True, | |
| ) | |
| return tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| # ========================= | |
| # 4. STORY → CHUNKS → SCENES LOGIC | |
| # ========================= | |
| def split_story_into_chunks(story_text: str, max_chars_per_chunk: int = 600): | |
| """ | |
| Split the story into rough chunks based on paragraphs and length | |
| so each chunk can be turned into a scene by the model. | |
| """ | |
| paragraphs = [p.strip() for p in story_text.split("\n") if p.strip()] | |
| chunks = [] | |
| current = "" | |
| for p in paragraphs: | |
| if len(current) + len(p) + 1 <= max_chars_per_chunk: | |
| current += "\n" + p | |
| else: | |
| if current.strip(): | |
| chunks.append(current.strip()) | |
| current = p | |
| if current.strip(): | |
| chunks.append(current.strip()) | |
| return chunks | |
| def chunk_to_scene(chunk_text: str, scene_id: int): | |
| """ | |
| Convert one story chunk into a structured scene JSON using the LLM. | |
| """ | |
| prompt = f""" | |
| You are a movie director's assistant. | |
| Read the following part of a story and extract a SINGLE movie scene in structured JSON. | |
| Story chunk: | |
| \"\"\"{chunk_text}\"\"\" | |
| Return JSON with the following keys: | |
| - scene_id (integer) | |
| - title (short scene title) | |
| - setting (where, when) | |
| - characters (list of names) | |
| - mood (emotional tone, e.g. tense, hopeful) | |
| - summary (2-3 sentences) | |
| - visual_prompt (a single detailed description to be used for generating a cinematic image, including lighting, style, camera angle) | |
| Only output valid JSON, nothing else. | |
| """ | |
| raw = generate_text(prompt, max_new_tokens=256) | |
| # Try to parse JSON | |
| try: | |
| data = json.loads(raw) | |
| except Exception: | |
| # Fallback: wrap raw text into a basic structure | |
| data = { | |
| "scene_id": scene_id, | |
| "title": f"Scene {scene_id}", | |
| "setting": "", | |
| "characters": [], | |
| "mood": "", | |
| "summary": raw.strip(), | |
| "visual_prompt": raw.strip() | |
| } | |
| # Ensure scene_id is set correctly | |
| data["scene_id"] = scene_id | |
| return data | |
| def story_to_scenes(story_text: str): | |
| """ | |
| Full pipeline: story text -> chunks -> list of scene dicts. | |
| """ | |
| chunks = split_story_into_chunks(story_text, max_chars_per_chunk=600) | |
| scenes = [] | |
| for i, chunk in enumerate(chunks, start=1): | |
| scene = chunk_to_scene(chunk, scene_id=i) | |
| scenes.append(scene) | |
| return scenes | |
| # ========================= | |
| # 5. LOAD STABLE DIFFUSION PIPELINE (IMAGE MODEL) | |
| # ========================= | |
| def load_image_model(): | |
| """ | |
| Load Stable Diffusion pipeline for image generation. | |
| Uses CPU on Spaces by default; will use GPU if available. | |
| """ | |
| model_id = "runwayml/stable-diffusion-v1-5" | |
| if torch.cuda.is_available(): | |
| dtype = torch.float16 | |
| else: | |
| dtype = torch.float32 | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| model_id, | |
| torch_dtype=dtype, | |
| safety_checker=None # can be customized if needed | |
| ) | |
| if torch.cuda.is_available(): | |
| pipe = pipe.to("cuda") | |
| else: | |
| pipe = pipe.to("cpu") | |
| return pipe | |
| def generate_scene_image(prompt: str): | |
| """ | |
| Generate a single image from a text prompt using Stable Diffusion. | |
| """ | |
| pipe = load_image_model() | |
| # You can tweak num_inference_steps and guidance_scale for quality/speed tradeoff | |
| image = pipe( | |
| prompt, | |
| num_inference_steps=25, | |
| guidance_scale=7.5 | |
| ).images[0] | |
| return image | |
| # ========================= | |
| # 6. STREAMLIT UI | |
| # ========================= | |
| st.subheader("📝 Paste Your Story") | |
| default_story = """\ | |
| Once upon a time in a neon city, Aarav wandered the alleys alone. | |
| He had lost track of time after the government AI marked his family as 'non-compliant'. | |
| One night, while standing on a rooftop, he noticed a masked stranger watching him. | |
| The stranger claimed to know the truth about the city’s AI and its hidden rules. | |
| Aarav followed reluctantly, unaware that every step was being monitored by invisible drones. | |
| """ | |
| story_text = st.text_area( | |
| "Paste a short story (3–15 paragraphs works best):", | |
| value=default_story, | |
| height=260 | |
| ) | |
| generate_clicked = st.button("🎬 Generate Scenes") | |
| if "scenes" not in st.session_state: | |
| st.session_state["scenes"] = None | |
| if generate_clicked: | |
| if not story_text.strip(): | |
| st.error("Please paste a story first.") | |
| else: | |
| with st.spinner("Breaking story into scenes..."): | |
| scenes = story_to_scenes(story_text) | |
| st.session_state["scenes"] = scenes | |
| st.success(f"Generated {len(scenes)} scene(s).") | |
| scenes = st.session_state.get("scenes", None) | |
| if scenes: | |
| st.markdown("---") | |
| st.subheader("📚 Generated Scenes & Visuals") | |
| for scene in scenes: | |
| scene_id = scene.get("scene_id", "?") | |
| title = scene.get("title", f"Scene {scene_id}") | |
| setting = scene.get("setting", "") | |
| mood = scene.get("mood", "") | |
| characters = scene.get("characters", []) | |
| summary = scene.get("summary", "") | |
| base_prompt = scene.get("visual_prompt", "") | |
| styled_prompt = build_styled_prompt(base_prompt, style) | |
| with st.expander(f"Scene {scene_id}: {title}", expanded=True): | |
| st.markdown(f"**Setting:** {setting}") | |
| st.markdown(f"**Mood:** {mood}") | |
| st.markdown(f"**Characters:** {', '.join(characters) or 'N/A'}") | |
| st.markdown("**Summary:**") | |
| st.write(summary) | |
| st.markdown("**Base Visual Prompt:**") | |
| st.code(textwrap.fill(base_prompt, width=90), language="text") | |
| st.markdown(f"**Styled Prompt for {style} Image:**") | |
| st.code(textwrap.fill(styled_prompt, width=90), language="text") | |
| img_btn = st.button( | |
| f"🖼 Generate {style} Image for Scene {scene_id}", | |
| key=f"img_btn_{scene_id}" | |
| ) | |
| if img_btn: | |
| with st.spinner("Generating image... This may take some time."): | |
| img = generate_scene_image(styled_prompt) | |
| st.image(img, caption=f"Scene {scene_id} – {title} ({style})", use_column_width=True) | |
| else: | |
| st.info("Paste a story and click **Generate Scenes** to begin.") | |