Munaf1987 commited on
Commit
1ab3619
·
verified ·
1 Parent(s): 8cd6e88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -663
app.py CHANGED
@@ -1,663 +1 @@
1
- import gradio as gr
2
- import torch
3
- import numpy as np
4
- import cv2
5
- from PIL import Image
6
- import json
7
- import os
8
- from typing import List, Dict, Any
9
- import tempfile
10
- import subprocess
11
- from pathlib import Path
12
- import spaces
13
- import gc
14
-
15
- # All open-source HuggingFace models
16
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
17
- from diffusers import (
18
- StableDiffusionPipeline,
19
- StableDiffusionXLPipeline,
20
- StableVideoDiffusionPipeline,
21
- AnimateDiffPipeline,
22
- MotionAdapter,
23
- DDIMScheduler
24
- )
25
- from diffusers.utils import export_to_video
26
- import soundfile as sf
27
- from TTS.api import TTS
28
-
29
- class CartoonFilmGenerator:
30
- def __init__(self):
31
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
32
- self.temp_dir = tempfile.mkdtemp()
33
-
34
- # Model configurations for ZeroGPU optimization
35
- self.models_loaded = False
36
-
37
- @spaces.GPU
38
- def load_models(self):
39
- """Load models on-demand for ZeroGPU efficiency"""
40
- if self.models_loaded:
41
- return
42
-
43
- print("Loading open-source models...")
44
-
45
- try:
46
- # 1. Text generation for script enhancement (Open source)
47
- self.text_generator = pipeline(
48
- "text-generation",
49
- model="microsoft/DialoGPT-large",
50
- tokenizer="microsoft/DialoGPT-large",
51
- device=0 if self.device == "cuda" else -1,
52
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
53
- )
54
- print("✅ Text generator loaded")
55
-
56
- except Exception as e:
57
- print(f"❌ Text generator failed: {e}")
58
- self.text_generator = None
59
-
60
- try:
61
- # 2. Image generation - SDXL (fully open source)
62
- self.image_generator = StableDiffusionXLPipeline.from_pretrained(
63
- "stabilityai/stable-diffusion-xl-base-1.0",
64
- torch_dtype=torch.float16,
65
- use_safetensors=True,
66
- variant="fp16"
67
- ).to(self.device)
68
-
69
- # Enable memory optimizations (updated methods)
70
- self.image_generator.enable_vae_slicing()
71
- self.image_generator.enable_vae_tiling()
72
- if hasattr(self.image_generator, 'enable_memory_efficient_attention'):
73
- self.image_generator.enable_memory_efficient_attention()
74
- elif hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
75
- try:
76
- self.image_generator.enable_xformers_memory_efficient_attention()
77
- except:
78
- print("XFormers not available, using default attention")
79
-
80
- print("✅ Image generator (SDXL) loaded")
81
-
82
- except Exception as e:
83
- print(f"❌ SDXL failed, trying SD 1.5: {e}")
84
- try:
85
- # Fallback to SD 1.5
86
- self.image_generator = StableDiffusionPipeline.from_pretrained(
87
- "runwayml/stable-diffusion-v1-5",
88
- torch_dtype=torch.float16,
89
- use_safetensors=True
90
- ).to(self.device)
91
-
92
- # Enable memory optimizations for SD 1.5
93
- self.image_generator.enable_vae_slicing()
94
- if hasattr(self.image_generator, 'enable_vae_tiling'):
95
- self.image_generator.enable_vae_tiling()
96
- if hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
97
- try:
98
- self.image_generator.enable_xformers_memory_efficient_attention()
99
- except:
100
- print("XFormers not available")
101
-
102
- print("✅ Image generator (SD 1.5) loaded")
103
-
104
- except Exception as e2:
105
- print(f"❌ All image generators failed: {e2}")
106
- self.image_generator = None
107
-
108
- try:
109
- # 3. Video generation - AnimateDiff (open source)
110
- adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
111
- self.video_generator = AnimateDiffPipeline.from_pretrained(
112
- "runwayml/stable-diffusion-v1-5",
113
- motion_adapter=adapter,
114
- torch_dtype=torch.float16
115
- ).to(self.device)
116
-
117
- self.video_generator.scheduler = DDIMScheduler.from_pretrained(
118
- "runwayml/stable-diffusion-v1-5",
119
- subfolder="scheduler",
120
- clip_sample=False,
121
- timestep_spacing="linspace",
122
- beta_schedule="linear",
123
- steps_offset=1,
124
- )
125
-
126
- # Enable memory optimizations
127
- self.video_generator.enable_vae_slicing()
128
- if hasattr(self.video_generator, 'enable_vae_tiling'):
129
- self.video_generator.enable_vae_tiling()
130
- if hasattr(self.video_generator, 'enable_xformers_memory_efficient_attention'):
131
- try:
132
- self.video_generator.enable_xformers_memory_efficient_attention()
133
- except:
134
- print("XFormers not available for video generator")
135
-
136
- print("✅ Video generator (AnimateDiff) loaded")
137
-
138
- except Exception as e:
139
- print(f"❌ Video generator failed: {e}")
140
- self.video_generator = None
141
-
142
- try:
143
- # 4. Text-to-Speech (Open source XTTS)
144
- self.tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
145
- print("✅ TTS model loaded")
146
-
147
- except Exception as e:
148
- print(f"❌ TTS model failed: {e}")
149
- self.tts_model = None
150
-
151
- self.models_loaded = True
152
- print("🎬 Model loading completed!")
153
-
154
- def clear_gpu_memory(self):
155
- """Clear GPU memory between operations"""
156
- if torch.cuda.is_available():
157
- torch.cuda.empty_cache()
158
- gc.collect()
159
-
160
- def enhance_script_with_llm(self, raw_script: str) -> Dict[str, Any]:
161
- """Use open-source LLM to enhance the script"""
162
-
163
- # Always return structured script (fallback method)
164
- return self.create_structured_script(raw_script, raw_script)
165
-
166
- def create_structured_script(self, original: str, enhanced: str) -> Dict[str, Any]:
167
- """Create structured script data"""
168
-
169
- # Extract key elements from the script
170
- words = original.lower().split()
171
-
172
- # Determine main character and setting
173
- if any(word in words for word in ['boy', 'man', 'hero', 'prince']):
174
- main_char = "brave young hero"
175
- elif any(word in words for word in ['girl', 'woman', 'princess', 'heroine']):
176
- main_char = "brave young heroine"
177
- else:
178
- main_char = "friendly protagonist"
179
-
180
- # Determine setting
181
- if any(word in words for word in ['forest', 'woods', 'trees']):
182
- setting = "magical forest"
183
- elif any(word in words for word in ['city', 'town', 'urban']):
184
- setting = "bustling city"
185
- elif any(word in words for word in ['space', 'stars', 'planet']):
186
- setting = "cosmic space"
187
- else:
188
- setting = "colorful fantasy world"
189
-
190
- # Create 8 scenes for shorter processing time
191
- scenes = []
192
- scene_templates = [
193
- "Introduction of the main character",
194
- "Character discovers the challenge",
195
- "Meeting helpful friends",
196
- "First obstacle appears",
197
- "Character shows determination",
198
- "Final confrontation",
199
- "Resolution and victory",
200
- "Happy ending celebration"
201
- ]
202
-
203
- for i, template in enumerate(scene_templates):
204
- scenes.append({
205
- "scene_number": i + 1,
206
- "description": f"{template} in the {setting}",
207
- "characters_present": [main_char] if i % 3 != 0 else [main_char, "supporting character"],
208
- "dialogue": [
209
- {"character": main_char, "text": f"This is scene {i+1} where {template.lower()}."}
210
- ],
211
- "background": f"{setting} with {['sunrise', 'daylight', 'sunset', 'moonlight'][i % 4]} lighting",
212
- "mood": ["hopeful", "determined", "friendly", "tense", "brave", "exciting", "triumphant", "joyful"][i],
213
- "duration": "30"
214
- })
215
-
216
- return {
217
- "characters": [
218
- {
219
- "name": main_char,
220
- "description": f"Cartoon-style {main_char} with expressive eyes, friendly smile, colorful outfit, animated style",
221
- "personality": "brave, kind, determined"
222
- },
223
- {
224
- "name": "supporting character",
225
- "description": "Helpful cartoon companion with warm colors, friendly appearance, supporting role",
226
- "personality": "loyal, wise, encouraging"
227
- }
228
- ],
229
- "scenes": scenes,
230
- "style": "Modern 2D cartoon animation, bright colors, expressive characters, family-friendly"
231
- }
232
-
233
- @spaces.GPU
234
- def generate_character_images(self, characters: List[Dict]) -> Dict[str, str]:
235
- """Generate character images using available image generator"""
236
- self.load_models()
237
- character_images = {}
238
-
239
- if not self.image_generator:
240
- print("❌ No image generator available")
241
- return character_images
242
-
243
- for character in characters:
244
- prompt = f"cartoon character sheet, {character['description']}, multiple poses, clean white background, 2D animation style, colorful, expressive, high quality"
245
- negative_prompt = "realistic, 3D, dark, scary, inappropriate, low quality, blurry"
246
-
247
- try:
248
- image = self.image_generator(
249
- prompt=prompt,
250
- negative_prompt=negative_prompt,
251
- num_inference_steps=20, # Reduced for speed
252
- guidance_scale=7.5,
253
- height=768, # Smaller for memory efficiency
254
- width=768
255
- ).images[0]
256
-
257
- char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
258
- image.save(char_path)
259
- character_images[character['name']] = char_path
260
- print(f"✅ Generated character: {character['name']}")
261
-
262
- # Clear memory after each character
263
- self.clear_gpu_memory()
264
-
265
- except Exception as e:
266
- print(f"❌ Error generating character {character['name']}: {e}")
267
-
268
- return character_images
269
-
270
- @spaces.GPU
271
- def generate_background_images(self, scenes: List[Dict]) -> Dict[int, str]:
272
- """Generate background images for each scene"""
273
- self.load_models()
274
- background_images = {}
275
-
276
- if not self.image_generator:
277
- print("❌ No image generator available")
278
- return background_images
279
-
280
- for scene in scenes:
281
- prompt = f"cartoon background, {scene['background']}, {scene['mood']} atmosphere, animated style, no characters, detailed environment, bright colors, 2D animation"
282
- negative_prompt = "characters, people, realistic, dark, scary, low quality"
283
-
284
- try:
285
- image = self.image_generator(
286
- prompt=prompt,
287
- negative_prompt=negative_prompt,
288
- num_inference_steps=15, # Reduced for speed
289
- guidance_scale=7.0,
290
- height=512, # 16:9 aspect ratio
291
- width=768
292
- ).images[0]
293
-
294
- bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
295
- image.save(bg_path)
296
- background_images[scene['scene_number']] = bg_path
297
- print(f"✅ Generated background for scene {scene['scene_number']}")
298
-
299
- # Clear memory after each background
300
- self.clear_gpu_memory()
301
-
302
- except Exception as e:
303
- print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
304
-
305
- return background_images
306
-
307
- @spaces.GPU
308
- def generate_scene_videos(self, scenes: List[Dict], character_images: Dict, background_images: Dict) -> List[str]:
309
- """Generate animated videos for each scene"""
310
- self.load_models()
311
- scene_videos = []
312
-
313
- for scene in scenes:
314
- try:
315
- if self.video_generator:
316
- # Create prompt for scene animation
317
- characters_text = ", ".join(scene['characters_present'])
318
- prompt = f"cartoon animation, {characters_text} in {scene['background']}, {scene['mood']} mood, 2D animated style, smooth motion, family friendly"
319
- negative_prompt = "realistic, 3D, static, blurry, low quality, scary"
320
-
321
- # Generate animated video using AnimateDiff
322
- video_frames = self.video_generator(
323
- prompt=prompt,
324
- negative_prompt=negative_prompt,
325
- num_frames=12, # Reduced frames for speed
326
- guidance_scale=7.5,
327
- num_inference_steps=15, # Reduced steps
328
- height=512,
329
- width=768
330
- ).frames[0]
331
-
332
- # Save video
333
- video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"
334
- export_to_video(video_frames, video_path, fps=6)
335
- scene_videos.append(video_path)
336
- print(f"✅ Generated video for scene {scene['scene_number']}")
337
-
338
- # Clear GPU memory
339
- self.clear_gpu_memory()
340
-
341
- else:
342
- # Fallback: create static video
343
- if scene['scene_number'] in background_images:
344
- video_path = self.create_static_video(
345
- Image.open(background_images[scene['scene_number']]),
346
- int(scene.get('duration', 30)),
347
- scene['scene_number']
348
- )
349
- scene_videos.append(video_path)
350
- print(f"✅ Created static video for scene {scene['scene_number']}")
351
-
352
- except Exception as e:
353
- print(f"❌ Error generating video for scene {scene['scene_number']}: {e}")
354
- # Create fallback static video
355
- if scene['scene_number'] in background_images:
356
- try:
357
- video_path = self.create_static_video(
358
- Image.open(background_images[scene['scene_number']]),
359
- int(scene.get('duration', 30)),
360
- scene['scene_number']
361
- )
362
- scene_videos.append(video_path)
363
- print(f"✅ Created fallback video for scene {scene['scene_number']}")
364
- except Exception as e2:
365
- print(f"❌ Fallback video creation failed: {e2}")
366
-
367
- return scene_videos
368
-
369
- def create_static_video(self, image: Image.Image, duration: int, scene_num: int) -> str:
370
- """Create video from static image with simple effects"""
371
- video_path = f"{self.temp_dir}/scene_{scene_num}.mp4"
372
-
373
- try:
374
- # Convert PIL to OpenCV
375
- img_array = np.array(image.resize((768, 512)))
376
- img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
377
-
378
- # Create video writer
379
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
380
- fps = 24
381
- out = cv2.VideoWriter(video_path, fourcc, fps, (768, 512))
382
-
383
- # Add simple zoom effect
384
- for i in range(duration * fps):
385
- scale = 1.0 + (i / (duration * fps)) * 0.05 # Slight zoom
386
- h, w = img_array.shape[:2]
387
- center_x, center_y = w // 2, h // 2
388
-
389
- # Create zoom matrix
390
- M = cv2.getRotationMatrix2D((center_x, center_y), 0, scale)
391
- zoomed = cv2.warpAffine(img_array, M, (w, h))
392
-
393
- out.write(zoomed)
394
-
395
- out.release()
396
- return video_path
397
-
398
- except Exception as e:
399
- print(f"❌ Static video creation failed: {e}")
400
- return None
401
-
402
- @spaces.GPU
403
- def generate_audio(self, scenes: List[Dict]) -> str:
404
- """Generate audio using open-source XTTS"""
405
- if not self.tts_model:
406
- print("❌ No TTS model available")
407
- return None
408
-
409
- self.load_models()
410
-
411
- try:
412
- audio_segments = []
413
- sample_rate = 22050
414
-
415
- for scene in scenes:
416
- scene_audio = []
417
-
418
- # Generate speech for dialogue
419
- for dialogue in scene.get('dialogue', []):
420
- text = dialogue['text']
421
-
422
- # Generate audio using XTTS
423
- audio = self.tts_model.tts(
424
- text=text,
425
- language="en"
426
- )
427
-
428
- scene_audio.extend(audio)
429
-
430
- # Add pause between scenes
431
- pause = np.zeros(int(sample_rate * 1.0)) # 1 second pause
432
- scene_audio.extend(pause)
433
- audio_segments.extend(scene_audio)
434
-
435
- # Save combined audio
436
- audio_path = f"{self.temp_dir}/film_audio.wav"
437
- sf.write(audio_path, audio_segments, sample_rate)
438
-
439
- self.clear_gpu_memory()
440
- return audio_path
441
-
442
- except Exception as e:
443
- print(f"❌ Audio generation failed: {e}")
444
- return None
445
-
446
- def merge_videos_with_ffmpeg(self, scene_videos: List[str], audio_path: str = None) -> str:
447
- """Merge videos using ffmpeg"""
448
- if not scene_videos:
449
- return None
450
-
451
- final_video_path = f"{self.temp_dir}/final_cartoon_film.mp4"
452
-
453
- try:
454
- # Create concat file
455
- concat_file = f"{self.temp_dir}/concat_list.txt"
456
- with open(concat_file, 'w') as f:
457
- for video in scene_videos:
458
- if os.path.exists(video):
459
- f.write(f"file '{os.path.abspath(video)}'\n")
460
-
461
- if audio_path and os.path.exists(audio_path):
462
- # Merge videos with audio
463
- cmd = [
464
- 'ffmpeg', '-f', 'concat', '-safe', '0', '-i', concat_file,
465
- '-i', audio_path,
466
- '-c:v', 'libx264', '-c:a', 'aac',
467
- '-shortest', '-y', final_video_path
468
- ]
469
- else:
470
- # Merge videos without audio
471
- cmd = [
472
- 'ffmpeg', '-f', 'concat', '-safe', '0', '-i', concat_file,
473
- '-c', 'copy', '-y', final_video_path
474
- ]
475
-
476
- result = subprocess.run(cmd, capture_output=True, text=True)
477
- if result.returncode == 0:
478
- print("✅ Video merging successful")
479
- return final_video_path
480
- else:
481
- print(f"❌ FFmpeg error: {result.stderr}")
482
- return None
483
-
484
- except Exception as e:
485
- print(f"❌ Video merging failed: {e}")
486
- return None
487
-
488
- @spaces.GPU
489
- def generate_cartoon_film(self, script: str, include_audio: bool = True) -> tuple:
490
- """Main function to generate complete cartoon film"""
491
- try:
492
- print("🎬 Starting cartoon film generation...")
493
-
494
- # Step 1: Enhance script
495
- print("📝 Processing script...")
496
- processed_script = self.enhance_script_with_llm(script)
497
-
498
- # Step 2: Generate characters
499
- print("👥 Creating characters...")
500
- character_images = self.generate_character_images(processed_script['characters'])
501
-
502
- # Step 3: Generate backgrounds
503
- print("🏞️ Creating backgrounds...")
504
- background_images = self.generate_background_images(processed_script['scenes'])
505
-
506
- # Step 4: Generate scene videos
507
- print("🎥 Creating videos...")
508
- scene_videos = self.generate_scene_videos(
509
- processed_script['scenes'],
510
- character_images,
511
- background_images
512
- )
513
-
514
- # Step 5: Generate audio
515
- audio_path = None
516
- if include_audio:
517
- print("🎵 Creating audio...")
518
- audio_path = self.generate_audio(processed_script['scenes'])
519
-
520
- # Step 6: Merge final video
521
- print("🎞️ Finalizing film...")
522
- final_video = self.merge_videos_with_ffmpeg(scene_videos, audio_path)
523
-
524
- if final_video and os.path.exists(final_video):
525
- print("✅ Film generation complete!")
526
- return final_video, processed_script, "✅ Cartoon film generated successfully!"
527
- else:
528
- print("⚠️ Partial success - some steps may have failed")
529
- return None, processed_script, "⚠️ Partial generation - check individual steps"
530
-
531
- except Exception as e:
532
- print(f"❌ Generation failed: {e}")
533
- # Return error information in proper format
534
- error_info = {
535
- "error": True,
536
- "message": str(e),
537
- "characters": [],
538
- "scenes": [],
539
- "style": "Error occurred during generation"
540
- }
541
- return None, error_info, f"❌ Generation failed: {str(e)}"
542
-
543
- # Initialize generator
544
- generator = CartoonFilmGenerator()
545
-
546
- @spaces.GPU
547
- def create_cartoon_film(script, include_audio):
548
- """Gradio interface function"""
549
- if not script.strip():
550
- empty_response = {
551
- "error": True,
552
- "message": "No script provided",
553
- "characters": [],
554
- "scenes": [],
555
- "style": "Please enter a script"
556
- }
557
- return None, empty_response, "❌ Please enter a script"
558
-
559
- return generator.generate_cartoon_film(script, include_audio)
560
-
561
- # Gradio Interface optimized for ZeroGPU
562
- with gr.Blocks(
563
- title="🎬 AI Cartoon Film Generator",
564
- theme=gr.themes.Soft(),
565
- css="""
566
- .gradio-container {
567
- max-width: 1200px !important;
568
- }
569
- """
570
- ) as demo:
571
-
572
- gr.Markdown("""
573
- # 🎬 AI Cartoon Film Generator (100% Open Source)
574
-
575
- Transform your script into a complete cartoon film using only open-source models!
576
-
577
- **🔥 Features:**
578
- - **Stable Diffusion XL/1.5** for character & background generation
579
- - **AnimateDiff** for video animation
580
- - **XTTS** for voice synthesis
581
- - **ZeroGPU optimized** - completely free!
582
- - **No API keys required** - everything is open source
583
-
584
- **⚡ Fixed compatibility issues and memory optimization**
585
- """)
586
-
587
- with gr.Row():
588
- with gr.Column(scale=1):
589
- script_input = gr.Textbox(
590
- label="📝 Your Script",
591
- placeholder="Enter your story idea here! Can be just a few sentences - the AI will expand it into a full cartoon film.\n\nExample: 'A young explorer discovers a magical forest where animals can talk and help find a lost treasure.'",
592
- lines=8,
593
- max_lines=15
594
- )
595
-
596
- with gr.Row():
597
- include_audio = gr.Checkbox(
598
- label="🎵 Include AI-Generated Voices",
599
- value=False, # Default to False for faster testing
600
- info="Generate speech for character dialogue"
601
- )
602
-
603
- generate_btn = gr.Button(
604
- "🎬 Generate Cartoon Film",
605
- variant="primary",
606
- size="lg"
607
- )
608
-
609
- gr.Markdown("""
610
- **⏱️ Processing Time:** 5-10 minutes
611
- **🎥 Output:** 4-5 minute MP4 film
612
- **📱 Models:** SDXL + AnimateDiff + XTTS
613
- """)
614
-
615
- with gr.Column(scale=1):
616
- video_output = gr.Video(
617
- label="🎬 Generated Cartoon Film",
618
- height=400
619
- )
620
-
621
- status_output = gr.Textbox(
622
- label="📊 Status",
623
- lines=3
624
- )
625
-
626
- script_details = gr.JSON(
627
- label="📋 Generated Script Details",
628
- visible=True
629
- )
630
-
631
- # Event handlers
632
- generate_btn.click(
633
- fn=create_cartoon_film,
634
- inputs=[script_input, include_audio],
635
- outputs=[video_output, script_details, status_output],
636
- show_progress=True
637
- )
638
-
639
- # Example scripts
640
- gr.Examples(
641
- examples=[
642
- ["A brave young explorer discovers a magical forest where talking animals help her find a lost treasure.", False],
643
- ["Two best friends go on a space adventure to help a friendly alien return home.", False],
644
- ["A small robot learns about emotions when it meets a lonely child in the city.", False],
645
- ["A young artist discovers their drawings come to life and must help solve problems.", False]
646
- ],
647
- inputs=[script_input, include_audio],
648
- label="💡 Try these example scripts:"
649
- )
650
-
651
- gr.Markdown("""
652
- ---
653
- **🔧 Fixed Issues:**
654
- - ✅ Memory optimization methods updated for latest diffusers
655
- - ✅ Fallback models for compatibility
656
- - ✅ Better error handling and logging
657
- - ✅ Reduced parameters for ZeroGPU efficiency
658
-
659
- **💝 Completely free and open source!** No API keys required.
660
- """)
661
-
662
- if __name__ == "__main__":
663
- demo.queue(max_size=3).launch()
 
1
+ 14,21