File size: 35,581 Bytes
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
cfa934c
3e86d86
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
cfa934c
 
 
 
3e86d86
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
cfa934c
3e86d86
 
cfa934c
3e86d86
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
cfa934c
3e86d86
 
 
cfa934c
3e86d86
 
 
 
 
 
cfa934c
3e86d86
cfa934c
3e86d86
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
 
3e86d86
 
 
cfa934c
 
 
 
 
 
 
3e86d86
cfa934c
3e86d86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfa934c
 
3e86d86
 
cfa934c
 
3e86d86
 
 
 
 
 
 
 
 
cfa934c
3e86d86
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import cv2 # Used internally by moviepy/pillow if needed, good to have it
import numpy as np
from PIL import Image, ImageDraw, ImageFont # PIL for image handling
import os
import tempfile
import zipfile
import nltk
from moviepy.editor import ImageClip, CompositeVideoClip, TextClip, vfx # vfx for effects
import re
from typing import List, Tuple, Dict
import gc
import time
import shutil # For robust directory cleanup
from datetime import datetime # For unique zip filenames

# Download required NLTK data (punkt tokenizer)
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

class StoryVideoGenerator:
    def __init__(self):
        self.device = "cpu" # Explicitly set to CPU for free-tier compatibility
        self.pipe = None
        self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
        self.current_seed = 42 # Base seed for consistency across runs
        
        # Ensure outputs directory exists (for the final ZIP file)
        os.makedirs("outputs", exist_ok=True)
        
    def load_model(self):
        """Load the Stable Diffusion model optimized for CPU"""
        if self.pipe is None:
            print("Loading Stable Diffusion model...")
            model_id = "runwayml/stable-diffusion-v1-5" # Standard Stable Diffusion v1.5
            self.pipe = StableDiffusionPipeline.from_pretrained(
                model_id,
                torch_dtype=torch.float32, # Crucial for CPU performance and memory
                safety_checker=None,       # Disable for speed and reduced memory
                requires_safety_checker=False # Explicitly ensure safety checker is off
            )
            self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
            self.pipe = self.pipe.to(self.device)
            self.pipe.enable_attention_slicing() # Memory optimization
            self.pipe.enable_vae_slicing()       # Memory optimization
            self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
            print("Stable Diffusion model loaded successfully on CPU.")
            
    def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
        """Break story into logical segments for video generation"""
        # Clean the story text
        story = re.sub(r'\s+', ' ', story.strip())
        
        # Split into sentences
        sentences = nltk.sent_tokenize(story)
        
        segments = []
        current_segment_sentences = []
        current_word_count = 0
        
        # Aim for segments of roughly 25-35 words for 10 seconds of video.
        # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
        # We'll use 30 words as a target, allowing for some flexibility.
        words_per_segment_target = 30 
        
        for sentence in sentences:
            sentence_word_count = len(sentence.split())
            
            # If adding this sentence exceeds the target AND we already have some content,
            # finalize the current segment and start a new one.
            if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
                segments.append(' '.join(current_segment_sentences))
                current_segment_sentences = [sentence]
                current_word_count = sentence_word_count
            else:
                # Add sentence to current segment
                current_segment_sentences.append(sentence)
                current_word_count += sentence_word_count
        
        # Add any remaining segment
        if current_segment_sentences:
            segments.append(' '.join(current_segment_sentences))
        
        # Limit to max_segments to prevent excessively long generation times
        if len(segments) > max_segments:
            print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
            segments = segments[:max_segments]
            
        return segments
    
    def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
        """Create optimized prompt and negative prompt for image generation"""
        # Extract key elements from segment
        actions = self.extract_actions(segment_text)
        location = self.extract_location(segment_text)
        mood = self.extract_mood(segment_text)
        
        # Define style mapping for diverse visuals
        style_map = {
            "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
            "realistic": "photorealistic, cinematic still, highly detailed, realistic textures, natural volumetric lighting, lifelike",
            "fantasy": "epic fantasy art, magical realism, ethereal, grand, mythical creatures, enchanted forest, dramatic lighting",
            "digital_art": "digital painting, concept art, rich textures, detailed brushwork, high resolution, professional illustration",
            "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
        }
        selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
        
        # Build comprehensive prompt
        prompt_parts = []
        
        # Character description first for consistency emphasis and core subject
        if character_desc:
            prompt_parts.append(f"A single {character_desc}")
        else:
            prompt_parts.append(f"A person")

        # Add action and location, ensuring they blend with the character
        if actions:
            prompt_parts.append(f"is {actions}")
        if location:
            prompt_parts.append(f"in {location}")
        
        # Include the original segment text for additional context for the AI
        prompt_parts.append(f"Scene depicts: {segment_text}") 
        
        # Add mood last, to influence atmosphere
        if mood:
            prompt_parts.append(f"with a {mood} atmosphere")
            
        prompt_parts.append(selected_style)
        prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
            
        final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
        
        # Comprehensive negative prompt to avoid common Stable Diffusion flaws
        negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
        
        return final_prompt, negative_prompt
    
    def extract_actions(self, text: str) -> str:
        """Extract main actions from text segment (improved with more variety)"""
        action_keywords = {
            'walk': 'walking gracefully', 'run': 'running swiftly', 'sit': 'sitting peacefully',
            'stand': 'standing still', 'look': 'looking intently', 'smile': 'smiling brightly',
            'cry': 'crying sadly', 'laugh': 'laughing joyfully', 'jump': 'jumping high',
            'sleep': 'sleeping soundly', 'eat': 'eating thoughtfully', 'drink': 'drinking refreshing water',
            'fight': 'fighting bravely', 'talk': 'talking animatedly', 'discover': 'discovering something new',
            'explore': 'exploring cautiously', 'fly': 'flying majestically', 'venture': 'venturing forward',
            'encounter': 'encountering a challenge', 'approach': 'approaching with caution',
            'read': 'reading a book', 'write': 'writing in a journal', 'think': 'thinking deeply',
            'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
            'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
        }
        
        text_lower = text.lower()
        found_actions = []
        
        for keyword, description in action_keywords.items():
            if keyword in text_lower:
                found_actions.append(description)
        
        return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
    
    def extract_location(self, text: str) -> str:
        """Extract location/setting from text segment (improved with specific descriptions)"""
        location_keywords = {
            'forest': 'a dense, ancient forest', 'castle': 'a grand, medieval castle', 'room': 'a cozy, well-lit room',
            'kitchen': 'a rustic, warm kitchen', 'garden': 'a vibrant, blooming garden', 'street': 'a bustling city street',
            'house': 'a quaint, welcoming house', 'mountain': 'a majestic, snow-capped mountain peak', 'beach': 'a serene, sandy beach at sunset',
            'city': 'a futuristic, neon-lit city', 'village': 'a charming, peaceful village', 'school': 'a busy school hallway',
            'office': 'a modern, minimalist office', 'park': 'a green, expansive park with trees', 'library': 'a quiet, old library filled with books',
            'store': 'a busy, colorful general store', 'restaurant': 'a lively, elegant restaurant', 'hospital': 'a sterile hospital corridor',
            'church': 'an old, gothic church', 'bridge': 'an ancient stone bridge over a river', 'cave': 'a mysterious, dimly lit cave',
            'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
            'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
        }
        
        text_lower = text.lower()
        
        for keyword, description in location_keywords.items():
            if keyword in text_lower:
                return description
        
        return "a richly detailed background setting" # More descriptive default if no specific location found
    
    def extract_mood(self, text: str) -> str:
        """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
        mood_keywords = {
            'happy': 'joyful and uplifting, vibrant and cheerful lighting', 'sad': 'somber and melancholic, muted colors, soft lighting',
            'scary': 'ominous and frightening, dark shadows, dramatic contrasts', 'exciting': 'energetic and thrilling, dynamic motion, vibrant colors',
            'peaceful': 'serene and tranquil, soft, diffused lighting, calm atmosphere', 'angry': 'tense and dramatic, strong contrasts, dark clouds',
            'mysterious': 'enigmatic and suspenseful, foggy, low key lighting, hidden elements', 'calm': 'calm and quiet, still, harmonious',
            'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
            'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
        }
        
        text_lower = text.lower()
        
        for mood, description in mood_keywords.items():
            if mood in text_lower:
                return description
        
        return "a fitting atmosphere" # Default for a general mood
    
    def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
        """Generate image for a story segment"""
        # Use consistent base seed for character consistency, adjusted per segment
        seed = self.current_seed + segment_num
        generator = torch.Generator(device=self.device).manual_seed(seed)
        
        # Generate image
        print(f"Generating image with prompt: {prompt[:150]}...")
        with torch.no_grad(): # Disable gradient calculations for inference
            # Use autocast for mixed precision, even on CPU, it provides a consistent interface
            with torch.autocast(device_type="cpu"): 
                result = self.pipe(
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    num_inference_steps=25,  # Moderate steps for quality
                    guidance_scale=8.0,      # Increased for stronger adherence to prompt
                    generator=generator,
                    height=512,
                    width=512
                )
        
        return result.images[0]
    
    def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
        """Create a video clip from image with text overlay and motion"""
        
        # Resize image to 512x512 if it's not already (ensures consistent video size)
        image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing

        # Convert PIL Image to NumPy array for MoviePy
        img_array = np.array(image)
        
        # Create ImageClip from NumPy array
        clip = ImageClip(img_array, duration=duration) 
        
        # Add subtle Ken Burns effect (zoom + pan)
        # Zoom from 1.0 to 1.15 over the duration
        clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
        
        # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
        # This is a fixed slight pan that goes over the duration of the clip
        start_x_offset = 0.05
        start_y_offset = 0.05
        
        clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
        
        # Create text overlay using MoviePy's TextClip
        try:
            # Look for common font paths on Linux systems
            font_path_for_moviepy = None
            for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 
                      "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
                      "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
                      "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
                      "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
                if os.path.exists(p):
                    font_path_for_moviepy = p
                    break
            
            if font_path_for_moviepy:
                # Use a larger font size that scales with 512x512 video
                text_clip = TextClip(
                    text,
                    fontsize=30, # Increased font size for readability
                    color='white',
                    stroke_color='black',
                    stroke_width=2, # Stronger outline
                    font=font_path_for_moviepy, # Specify font file
                    method='caption', # For better text wrapping
                    size=(clip.w * 0.9, None) # 90% width for text wrapping
                ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
            else:
                print("Warning: Could not find system font for MoviePy, using default 'sans' font.")
                text_clip = TextClip(
                    text,
                    fontsize=26,
                    color='white',
                    stroke_color='black',
                    stroke_width=2,
                    font='sans', # MoviePy default sans-serif font
                    method='caption',
                    size=(clip.w * 0.9, None)
                ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
                
        except Exception as e:
            print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
            text_clip = TextClip(
                text,
                fontsize=26,
                color='white',
                stroke_color='black',
                stroke_width=2,
                font='sans',
                method='caption',
                size=(clip.w * 0.9, None)
            ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)


        # Composite video with text
        final_clip = CompositeVideoClip([clip, text_clip])
        
        # Export video
        # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
        output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
        
        print(f"Exporting video to {output_path}...")
        final_clip.write_videofile(
            output_path,
            fps=24, # Standard FPS for smooth playback
            codec='libx264', # Common codec for MP4, good compatibility
            audio=False,     # No audio as per requirements to save CPU
            verbose=False,   # Suppress verbose MoviePy output
            logger=None,     # Suppress MoviePy logger output
            preset='medium'  # 'medium' preset for balance of speed and quality on CPU
        )
        print(f"Video exported to {output_path}")
        
        # Close clips to free resources, crucial for MoviePy
        clip.close()
        text_clip.close()
        final_clip.close()
        
        return output_path
    
    def cleanup(self):
        """Clean up temporary files and directories"""
        print(f"Cleaning up temporary directory: {self.temp_dir}")
        if os.path.exists(self.temp_dir):
            shutil.rmtree(self.temp_dir)
        self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
        
        # Also clean the 'outputs' directory for old zip files to prevent disk overuse
        output_files = os.listdir("outputs")
        for f in output_files:
            if f.endswith(".zip"):
                file_path = os.path.join("outputs", f)
                try:
                    os.remove(file_path)
                    print(f"Removed old zip file: {file_path}")
                except Exception as e:
                    print(f"Error removing old zip file {file_path}: {e}")

        # Clear memory caches (important for CPU models too)
        if torch.cuda.is_available(): # Check if CUDA is actually available before calling
            torch.cuda.empty_cache()
        gc.collect() # Trigger Python's garbage collector for general memory cleanup

# Initialize generator globally
generator = StoryVideoGenerator()

def process_story_gradio(story_text: str, character_description: str, style: str, progress=gr.Progress()):
    """
    Gradio-compatible wrapper function for the main story processing.
    Yields updates for Gradio UI components.
    """
    
    generator.cleanup() # Clean up temp files from previous runs at the start of a new request

    if not story_text.strip():
        # Yield initial empty state and error message
        yield (
            "Please enter a story to generate videos.",
            "<p>No story provided.</p>",
            None, # No video
            [],   # Empty gallery
            None  # No zip
        )
        return
    
    try:
        # Load model if not already loaded (this is optimized to run once per Space lifecycle)
        progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
        generator.load_model()
        
        # Segment the story
        progress(0.05, desc="Analyzing story structure and preparing segments...")
        segments = generator.segment_story(story_text)
        
        if not segments:
            yield (
                "Error: Could not segment the story. Please try a longer or more detailed story.",
                "<p>Story could not be segmented. Please ensure it has enough content.</p>",
                None,
                [],
                None
            )
            return
        
        total_segments = len(segments)
        initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
        
        # Initial yield: show segment count
        yield (
            initial_status_message,
            f"""
            <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
                <p>Story will be broken into <strong>{total_segments} segments</strong>.</p>
                <p>Starting video generation now...</p>
            </div>
            """,
            None, # No video yet
            [],   # Empty gallery
            None  # No zip yet
        )

        # Generate a base seed for overall character consistency across segments
        # Using a hash of both character description and the story for more unique runs
        generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1) 
        
        generated_video_paths = []
        generated_image_paths_for_gallery = []

        for i, segment_text in enumerate(segments):
            segment_idx = i + 1
            
            # --- Step 1: Update status and show current prompt details ---
            current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
            progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
            
            prompt, negative_prompt = generator.create_prompt(
                segment_text, character_description, style, i
            )
            
            # Prepare HTML for current segment details
            segment_details_html = f"""
            <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
                <h4>Current Story Segment ({segment_idx}/{total_segments}):</h4>
                <p><strong>{segment_text}</strong></p>
                <h4>AI Prompt Used:</h4>
                <p><em>{prompt}</em></p>
            </div>
            """
            
            yield (
                current_status_message,
                segment_details_html,
                None, # Still no video for this segment yet
                generated_image_paths_for_gallery.copy(), # Keep existing gallery (important to send a copy)
                None
            )
            time.sleep(0.1) # Small delay for UI update (helps Gradio refresh)

            # --- Step 2: Generate Image ---
            progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
            image = generator.generate_image(prompt, negative_prompt, i)
            
            # Save image for the gallery (important to save to a persistent temp path)
            img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
            img_path = os.path.join(generator.temp_dir, img_filename)
            image.save(img_path)
            generated_image_paths_for_gallery.append(img_path)
            
            # --- Step 3: Create Video Clip ---
            progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
            video_path = generator.create_video_clip(image, segment_text)
            generated_video_paths.append(video_path)
            
            # --- Step 4: Yield current segment's video and updated gallery ---
            current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
            yield (
                current_status_message,
                segment_details_html, # Retain details HTML
                video_path, # Provide path to the latest video
                generated_image_paths_for_gallery.copy(), # Update gallery with new image (send a copy)
                None
            )
            time.sleep(0.1) # Small delay for UI update

        # --- Final Step: Generate ZIP file and update final status ---
        progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
        
        # Create a unique zip file name in the 'outputs' directory
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_filename = f"story_videos_{timestamp}.zip"
        final_zip_path = os.path.join("outputs", zip_filename) 
        
        with zipfile.ZipFile(final_zip_path, 'w') as zipf:
            for idx, vid_path in enumerate(generated_video_paths):
                # Only add if file exists and is not a directory
                if os.path.isfile(vid_path):
                    zipf.write(vid_path, os.path.basename(vid_path)) # Add video to zip with just filename
            for idx, img_path in enumerate(generated_image_paths_for_gallery):
                if os.path.isfile(img_path):
                    zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
            
        final_status_message = f"βœ… Story video generation complete! All {total_segments} segments generated and available for download."
        progress(1.0, desc="Complete!")
        
        yield (
            final_status_message,
            "<p>All segments have been processed. Download the complete ZIP file below!</p>",
            generated_video_paths[-1] if generated_video_paths else None, # Show last video for final preview
            generated_image_paths_for_gallery, # Final state of the gallery
            final_zip_path # Provide the path to the downloadable ZIP
        )
        
    except Exception as e:
        import traceback
        print(f"An unexpected error occurred: {e}")
        traceback.print_exc() # Print full traceback for debugging to the logs
        yield (
            f"An error occurred during generation: {str(e)}. Please check your input and try again.",
            "<p>Error during processing. Check logs for details.</p>",
            None,
            [],
            None
        )
    finally:
        generator.cleanup() # Ensure cleanup after completion or error

# --- Gradio Interface Definition ---
def create_interface():
    """Create the Gradio interface"""
    
    with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
        
        gr.Markdown("""
        # 🎬 AI Text-to-Video Story Generator
        
        Transform your written stories into animated video sequences! This tool breaks your story into segments 
        and creates a 10-second video clip for each part, maintaining character consistency throughout.
        
        **Features:**
        - ✨ Converts text stories to video sequences
        - 🎭 Maintains character consistency across segments  
        - 🎨 Multiple art styles available
        - πŸ“± Optimized for free-tier CPU processing
        - πŸ“¦ Download individual clips or complete ZIP package
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                story_input = gr.Textbox(
                    label="πŸ“– Your Story",
                    placeholder="Enter your story here... (e.g., 'Once upon a time, there was a brave knight named Sir Arthur who lived in a magnificent castle...')",
                    lines=8,
                    max_lines=15,
                    info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
                )
                
                character_input = gr.Textbox(
                    label="πŸ‘€ Main Character Description",
                    placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
                    lines=3,
                    info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
                )
                
                style_dropdown = gr.Dropdown(
                    label="🎨 Art Style",
                    choices=[
                        ("Cartoon", "cartoon"),
                        ("Realistic", "realistic"), 
                        ("Fantasy Art", "fantasy"),
                        ("Digital Art", "digital_art"),
                        ("Anime", "anime")
                    ],
                    value="digital_art", # Default to digital art
                    info="Select the artistic style for your video segments. This affects the overall visual look."
                )
                
                generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
                
            with gr.Column(scale=1):
                gr.Markdown("""
                ### πŸ’‘ Tips for Best Results:
                
                **Story Writing:**
                -   Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
                -   Include **clear actions and locations** for your character (e.g., "walking in the forest").
                -   Describe **scenes vividly** to help the AI generate relevant visuals.
                
                **Character Description:**
                -   Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
                -   Include **clothing or distinctive features** for better consistency across videos.
                
                **Processing Time:**
                -   This application runs on **free-tier CPU hardware**.
                -   Each 10-second segment can take **1-3 minutes** to generate.
                -   Please be patient! **Progress updates** will keep you informed.
                -   If it seems stuck, check the logs in the "Logs" tab of your Space.
                """)
        
        gr.Markdown("---")
        
        # Output sections
        status_output = gr.Textbox(
            label="πŸ“Š Generation Status",
            lines=2, # Shorter to reduce space, but dynamically resizes based on content
            interactive=False,
            value="Enter your story and click 'Generate' to begin!"
        )
        
        # HTML output for detailed current segment text and AI prompt
        current_segment_details_html = gr.HTML(
            label="Current Segment Details & AI Prompt",
            value="<p>Details for the current segment will appear here as it's processed.</p>"
        )

        with gr.Row():
            # Live video preview for the currently generated 10-second segment
            current_video_preview = gr.Video(
                label="πŸŽ₯ Live Segment Preview",
                width=512,
                height=512,
                interactive=False,
                autoplay=True, # Auto-play the new segment when it loads
                show_share_button=False # Hide share button
            )
            
            # Gallery to show generated images cumulatively
            image_gallery = gr.Gallery(
                label="πŸ–ΌοΈ Generated Images (Overall Story Visuals)",
                show_label=True,
                elem_id="image_gallery", # Unique ID for potential CSS styling
                columns=3, # More columns for a compact view
                rows=2,
                object_fit="contain",
                height="auto",
                allow_preview=False # Prevents pop-up on click, if desired
            )

        # Final downloadable ZIP file
        download_zip_file = gr.File(
            label="⬇️ Download All Videos & Images (ZIP)",
            file_count="single", # Only one file can be downloaded
            interactive=False,   # User cannot upload, only download
            type="filepath",     # Gradio expects a file path to enable download
            info="Your complete story video and individual images will be available here."
        )

        # Define the click action for the generate button
        # The outputs here must match the order of values yielded by process_story_gradio
        generate_btn.click(
            fn=process_story_gradio,
            inputs=[
                story_input,
                character_input,
                style_dropdown
            ],
            outputs=[
                status_output,
                current_segment_details_html,
                current_video_preview,
                image_gallery,
                download_zip_file
            ], # CORRECTED: This closes the 'outputs' list.
            api_name="generate_story_video",
            concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
        ) # This correctly closes the 'generate_btn.click' method call

        # Examples for quick testing
        gr.Examples(
            examples=[
                [
                    "Elara, a young witch with a curious spirit, lived in a cottage nestled deep within the Whispering Woods. One day, while gathering herbs, she stumbled upon an ancient, glowing rune carved into a tree. As she touched it, a shimmering portal opened, revealing a world of floating islands and crystalline creatures. Elara, filled with wonder, stepped through, eager to explore this new magical realm.",
                    "A young witch with long, wavy red hair, green eyes, wearing a flowing dark green robe and carrying a small wooden staff.",
                    "fantasy"
                ],
                [
                    "Detective Miles Corbin adjusted his fedora as he walked down the rain-slicked alley. The neon signs of the city cast long, distorted shadows around him. He pulled out his worn notebook, reviewing the cryptic message left at the crime scene. A black cat darted past his feet, vanishing into the darkness, a faint echo of a distant siren barely audible. He knew this case would be tougher than he thought.",
                    "A grizzled detective in a classic trench coat and a wide-brimmed fedora, with a stern, tired expression.",
                    "realistic"
                ],
                [
                    "Zorp, the friendly alien, landed his spaceship in a bustling city park. Children gasped and pointed, but Zorp merely offered a three-fingered wave and a glowing smile. He then pulled out a device that projected vibrant holographic images of his home planet, a world of purple skies and bouncing jelly-like creatures. Everyone cheered, delighted by the unexpected show. Zorp felt happy to share his culture.",
                    "A small, green alien with large, friendly black eyes, three long fingers, and a shiny silver jumpsuit.",
                    "cartoon"
                ]
            ],
            inputs=[
                story_input,
                character_input,
                style_dropdown
            ],
            label="Try these example stories!"
            # You can uncomment the line below if you want examples to run automatically when clicked
            # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
        )

    return interface

# Launch the Gradio app
if __name__ == "__main__":
    app = create_interface()
    # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
    app.queue(max_size=1, concurrency_count=1)
    app.launch()