Munaf1987 commited on
Commit
8cd6e88
Β·
verified Β·
1 Parent(s): caaaf21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +248 -180
app.py CHANGED
@@ -16,6 +16,7 @@ import gc
16
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
17
  from diffusers import (
18
  StableDiffusionPipeline,
 
19
  StableVideoDiffusionPipeline,
20
  AnimateDiffPipeline,
21
  MotionAdapter,
@@ -41,51 +42,114 @@ class CartoonFilmGenerator:
41
 
42
  print("Loading open-source models...")
43
 
44
- # 1. Text generation for script enhancement (Open source)
45
- self.text_generator = pipeline(
46
- "text-generation",
47
- model="microsoft/DialoGPT-large",
48
- tokenizer="microsoft/DialoGPT-large",
49
- device=0 if self.device == "cuda" else -1,
50
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
51
- )
52
-
53
- # 2. Image generation - SDXL (fully open source)
54
- self.image_generator = StableDiffusionPipeline.from_pretrained(
55
- "stabilityai/stable-diffusion-xl-base-1.0",
56
- torch_dtype=torch.float16,
57
- use_safetensors=True,
58
- variant="fp16"
59
- ).to(self.device)
60
-
61
- # Enable memory efficient attention
62
- self.image_generator.enable_memory_efficient_attention()
63
- self.image_generator.enable_vae_slicing()
64
 
65
- # 3. Video generation - AnimateDiff (open source)
66
- adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
67
- self.video_generator = AnimateDiffPipeline.from_pretrained(
68
- "runwayml/stable-diffusion-v1-5",
69
- motion_adapter=adapter,
70
- torch_dtype=torch.float16
71
- ).to(self.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- self.video_generator.scheduler = DDIMScheduler.from_pretrained(
74
- "runwayml/stable-diffusion-v1-5",
75
- subfolder="scheduler",
76
- clip_sample=False,
77
- timestep_spacing="linspace",
78
- beta_schedule="linear",
79
- steps_offset=1,
80
- )
81
- self.video_generator.enable_vae_slicing()
82
- self.video_generator.enable_memory_efficient_attention()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # 4. Text-to-Speech (Open source XTTS)
85
- self.tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
 
 
 
 
 
 
86
 
87
  self.models_loaded = True
88
- print("All open-source models loaded successfully!")
89
 
90
  def clear_gpu_memory(self):
91
  """Clear GPU memory between operations"""
@@ -96,39 +160,8 @@ class CartoonFilmGenerator:
96
  def enhance_script_with_llm(self, raw_script: str) -> Dict[str, Any]:
97
  """Use open-source LLM to enhance the script"""
98
 
99
- # Structured prompt for script enhancement
100
- enhancement_prompt = f"""
101
- Original script: {raw_script}
102
-
103
- Transform this into a detailed 8-minute cartoon film with:
104
- - 12 scenes (40 seconds each)
105
- - Consistent characters
106
- - Clear scene descriptions
107
- - Simple dialogue
108
- - Visual descriptions for animation
109
-
110
- Create a story structure with beginning, middle, and end.
111
- """
112
-
113
- try:
114
- # Use the text generation pipeline
115
- response = self.text_generator(
116
- enhancement_prompt,
117
- max_length=1000,
118
- num_return_sequences=1,
119
- temperature=0.7,
120
- do_sample=True,
121
- pad_token_id=self.text_generator.tokenizer.eos_token_id
122
- )
123
-
124
- enhanced_script = response[0]['generated_text']
125
-
126
- except Exception as e:
127
- print(f"LLM enhancement failed: {e}")
128
- enhanced_script = raw_script
129
-
130
- # Create structured output (fallback method)
131
- return self.create_structured_script(raw_script, enhanced_script)
132
 
133
  def create_structured_script(self, original: str, enhanced: str) -> Dict[str, Any]:
134
  """Create structured script data"""
@@ -154,18 +187,14 @@ class CartoonFilmGenerator:
154
  else:
155
  setting = "colorful fantasy world"
156
 
157
- # Create 12 scenes for 8-minute film
158
  scenes = []
159
  scene_templates = [
160
  "Introduction of the main character",
161
- "Character discovers the challenge",
162
  "Meeting helpful friends",
163
  "First obstacle appears",
164
  "Character shows determination",
165
- "Meeting the antagonist",
166
- "Major challenge or conflict",
167
- "Character feels doubt",
168
- "Friends provide support",
169
  "Final confrontation",
170
  "Resolution and victory",
171
  "Happy ending celebration"
@@ -177,11 +206,11 @@ class CartoonFilmGenerator:
177
  "description": f"{template} in the {setting}",
178
  "characters_present": [main_char] if i % 3 != 0 else [main_char, "supporting character"],
179
  "dialogue": [
180
- {"character": main_char, "text": f"Scene {i+1} dialogue based on: {template}"}
181
  ],
182
  "background": f"{setting} with {['sunrise', 'daylight', 'sunset', 'moonlight'][i % 4]} lighting",
183
- "mood": ["hopeful", "determined", "friendly", "tense", "brave", "worried", "dramatic", "uncertain", "supportive", "exciting", "triumphant", "joyful"][i],
184
- "duration": "40"
185
  })
186
 
187
  return {
@@ -203,10 +232,14 @@ class CartoonFilmGenerator:
203
 
204
  @spaces.GPU
205
  def generate_character_images(self, characters: List[Dict]) -> Dict[str, str]:
206
- """Generate character images using SDXL"""
207
  self.load_models()
208
  character_images = {}
209
 
 
 
 
 
210
  for character in characters:
211
  prompt = f"cartoon character sheet, {character['description']}, multiple poses, clean white background, 2D animation style, colorful, expressive, high quality"
212
  negative_prompt = "realistic, 3D, dark, scary, inappropriate, low quality, blurry"
@@ -215,21 +248,22 @@ class CartoonFilmGenerator:
215
  image = self.image_generator(
216
  prompt=prompt,
217
  negative_prompt=negative_prompt,
218
- num_inference_steps=25,
219
  guidance_scale=7.5,
220
- height=1024,
221
- width=1024
222
  ).images[0]
223
 
224
  char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
225
  image.save(char_path)
226
  character_images[character['name']] = char_path
 
227
 
228
  # Clear memory after each character
229
  self.clear_gpu_memory()
230
 
231
  except Exception as e:
232
- print(f"Error generating character {character['name']}: {e}")
233
 
234
  return character_images
235
 
@@ -239,6 +273,10 @@ class CartoonFilmGenerator:
239
  self.load_models()
240
  background_images = {}
241
 
 
 
 
 
242
  for scene in scenes:
243
  prompt = f"cartoon background, {scene['background']}, {scene['mood']} atmosphere, animated style, no characters, detailed environment, bright colors, 2D animation"
244
  negative_prompt = "characters, people, realistic, dark, scary, low quality"
@@ -247,100 +285,127 @@ class CartoonFilmGenerator:
247
  image = self.image_generator(
248
  prompt=prompt,
249
  negative_prompt=negative_prompt,
250
- num_inference_steps=20,
251
  guidance_scale=7.0,
252
- height=576,
253
- width=1024 # 16:9 aspect ratio
254
  ).images[0]
255
 
256
  bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
257
  image.save(bg_path)
258
  background_images[scene['scene_number']] = bg_path
 
259
 
260
  # Clear memory after each background
261
  self.clear_gpu_memory()
262
 
263
  except Exception as e:
264
- print(f"Error generating background for scene {scene['scene_number']}: {e}")
265
 
266
  return background_images
267
 
268
  @spaces.GPU
269
  def generate_scene_videos(self, scenes: List[Dict], character_images: Dict, background_images: Dict) -> List[str]:
270
- """Generate animated videos for each scene using AnimateDiff"""
271
  self.load_models()
272
  scene_videos = []
273
 
274
  for scene in scenes:
275
  try:
276
- # Create prompt for scene animation
277
- characters_text = ", ".join(scene['characters_present'])
278
- prompt = f"cartoon animation, {characters_text} in {scene['background']}, {scene['mood']} mood, 2D animated style, smooth motion, family friendly"
279
- negative_prompt = "realistic, 3D, static, blurry, low quality, scary"
280
-
281
- # Generate animated video using AnimateDiff
282
- video_frames = self.video_generator(
283
- prompt=prompt,
284
- negative_prompt=negative_prompt,
285
- num_frames=16, # 16 frames for smooth motion
286
- guidance_scale=7.5,
287
- num_inference_steps=20,
288
- height=576,
289
- width=1024
290
- ).frames[0]
291
-
292
- # Save video
293
- video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"
294
- export_to_video(video_frames, video_path, fps=8)
295
- scene_videos.append(video_path)
296
-
297
- # Clear GPU memory
298
- self.clear_gpu_memory()
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  except Exception as e:
301
- print(f"Error generating video for scene {scene['scene_number']}: {e}")
302
- # Fallback: create static video
303
  if scene['scene_number'] in background_images:
304
- video_path = self.create_static_video(
305
- Image.open(background_images[scene['scene_number']]),
306
- int(scene.get('duration', 40)),
307
- scene['scene_number']
308
- )
309
- scene_videos.append(video_path)
 
 
 
 
310
 
311
  return scene_videos
312
 
313
  def create_static_video(self, image: Image.Image, duration: int, scene_num: int) -> str:
314
- """Fallback: Create video from static image"""
315
  video_path = f"{self.temp_dir}/scene_{scene_num}.mp4"
316
 
317
- # Convert PIL to OpenCV
318
- img_array = np.array(image.resize((1024, 576)))
319
- img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
320
-
321
- # Create video writer
322
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
323
- fps = 24
324
- out = cv2.VideoWriter(video_path, fourcc, fps, (1024, 576))
325
-
326
- # Add simple zoom effect
327
- for i in range(duration * fps):
328
- scale = 1.0 + (i / (duration * fps)) * 0.1 # Slight zoom
329
- h, w = img_array.shape[:2]
330
- center_x, center_y = w // 2, h // 2
331
 
332
- # Create zoom matrix
333
- M = cv2.getRotationMatrix2D((center_x, center_y), 0, scale)
334
- zoomed = cv2.warpAffine(img_array, M, (w, h))
 
335
 
336
- out.write(zoomed)
337
-
338
- out.release()
339
- return video_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
  @spaces.GPU
342
  def generate_audio(self, scenes: List[Dict]) -> str:
343
  """Generate audio using open-source XTTS"""
 
 
 
 
344
  self.load_models()
345
 
346
  try:
@@ -375,7 +440,7 @@ class CartoonFilmGenerator:
375
  return audio_path
376
 
377
  except Exception as e:
378
- print(f"Audio generation failed: {e}")
379
  return None
380
 
381
  def merge_videos_with_ffmpeg(self, scene_videos: List[str], audio_path: str = None) -> str:
@@ -410,35 +475,36 @@ class CartoonFilmGenerator:
410
 
411
  result = subprocess.run(cmd, capture_output=True, text=True)
412
  if result.returncode == 0:
 
413
  return final_video_path
414
  else:
415
- print(f"FFmpeg error: {result.stderr}")
416
  return None
417
 
418
  except Exception as e:
419
- print(f"Video merging failed: {e}")
420
  return None
421
 
422
  @spaces.GPU
423
  def generate_cartoon_film(self, script: str, include_audio: bool = True) -> tuple:
424
  """Main function to generate complete cartoon film"""
425
  try:
426
- progress_updates = []
427
 
428
  # Step 1: Enhance script
429
- progress_updates.append("🎬 Processing and enhancing script...")
430
  processed_script = self.enhance_script_with_llm(script)
431
 
432
  # Step 2: Generate characters
433
- progress_updates.append("πŸ‘₯ Creating character designs...")
434
  character_images = self.generate_character_images(processed_script['characters'])
435
 
436
  # Step 3: Generate backgrounds
437
- progress_updates.append("🏞️ Generating scene backgrounds...")
438
  background_images = self.generate_background_images(processed_script['scenes'])
439
 
440
  # Step 4: Generate scene videos
441
- progress_updates.append("πŸŽ₯ Creating animated scenes...")
442
  scene_videos = self.generate_scene_videos(
443
  processed_script['scenes'],
444
  character_images,
@@ -448,20 +514,23 @@ class CartoonFilmGenerator:
448
  # Step 5: Generate audio
449
  audio_path = None
450
  if include_audio:
451
- progress_updates.append("🎡 Generating audio and voices...")
452
  audio_path = self.generate_audio(processed_script['scenes'])
453
 
454
  # Step 6: Merge final video
455
- progress_updates.append("🎞️ Merging final cartoon film...")
456
  final_video = self.merge_videos_with_ffmpeg(scene_videos, audio_path)
457
 
458
  if final_video and os.path.exists(final_video):
 
459
  return final_video, processed_script, "βœ… Cartoon film generated successfully!"
460
  else:
461
- return None, processed_script, "❌ Error in final video generation"
 
462
 
463
  except Exception as e:
464
- # Return error information in proper JSON format
 
465
  error_info = {
466
  "error": True,
467
  "message": str(e),
@@ -503,16 +572,16 @@ with gr.Blocks(
503
  gr.Markdown("""
504
  # 🎬 AI Cartoon Film Generator (100% Open Source)
505
 
506
- Transform your script into a complete 7-10 minute cartoon film using only open-source models!
507
 
508
  **πŸ”₯ Features:**
509
- - **Stable Diffusion XL** for high-quality character & background generation
510
- - **AnimateDiff** for smooth video animation
511
- - **XTTS** for multilingual voice synthesis
512
- - **All models run on ZeroGPU** - completely free!
513
  - **No API keys required** - everything is open source
514
 
515
- **⚑ Optimized for Hugging Face ZeroGPU**
516
  """)
517
 
518
  with gr.Row():
@@ -527,7 +596,7 @@ with gr.Blocks(
527
  with gr.Row():
528
  include_audio = gr.Checkbox(
529
  label="🎡 Include AI-Generated Voices",
530
- value=True,
531
  info="Generate speech for character dialogue"
532
  )
533
 
@@ -538,9 +607,9 @@ with gr.Blocks(
538
  )
539
 
540
  gr.Markdown("""
541
- **⏱️ Processing Time:** 10-15 minutes
542
- **πŸŽ₯ Output:** 7-10 minute MP4 film
543
- **πŸ“± All models:** 100% open source & free
544
  """)
545
 
546
  with gr.Column(scale=1):
@@ -551,12 +620,12 @@ with gr.Blocks(
551
 
552
  status_output = gr.Textbox(
553
  label="πŸ“Š Status",
554
- lines=2
555
  )
556
 
557
  script_details = gr.JSON(
558
  label="πŸ“‹ Generated Script Details",
559
- visible=False
560
  )
561
 
562
  # Event handlers
@@ -570,10 +639,10 @@ with gr.Blocks(
570
  # Example scripts
571
  gr.Examples(
572
  examples=[
573
- ["A brave young explorer discovers a magical forest where talking animals help her find a lost treasure that will save her village.", True],
574
- ["Two best friends embark on a space adventure to help a friendly alien return home while learning about friendship and courage.", True],
575
- ["A small robot in a big city learns about emotions and friendship when it meets a lonely child who needs a companion.", False],
576
- ["A young artist discovers their drawings come to life and must help the characters solve problems in both the real and drawn worlds.", True]
577
  ],
578
  inputs=[script_input, include_audio],
579
  label="πŸ’‘ Try these example scripts:"
@@ -581,14 +650,13 @@ with gr.Blocks(
581
 
582
  gr.Markdown("""
583
  ---
584
- **πŸ”§ Technical Details:**
585
- - **Image Generation:** Stable Diffusion XL (open source)
586
- - **Video Animation:** AnimateDiff (open source)
587
- - **Voice Synthesis:** XTTS v2 (open source)
588
- - **Script Enhancement:** DialoGPT (open source)
589
- - **Infrastructure:** Hugging Face ZeroGPU (free)
590
 
591
- **πŸ’ Completely free and open source!** No API keys or subscriptions required.
592
  """)
593
 
594
  if __name__ == "__main__":
 
16
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
17
  from diffusers import (
18
  StableDiffusionPipeline,
19
+ StableDiffusionXLPipeline,
20
  StableVideoDiffusionPipeline,
21
  AnimateDiffPipeline,
22
  MotionAdapter,
 
42
 
43
  print("Loading open-source models...")
44
 
45
+ try:
46
+ # 1. Text generation for script enhancement (Open source)
47
+ self.text_generator = pipeline(
48
+ "text-generation",
49
+ model="microsoft/DialoGPT-large",
50
+ tokenizer="microsoft/DialoGPT-large",
51
+ device=0 if self.device == "cuda" else -1,
52
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
53
+ )
54
+ print("βœ… Text generator loaded")
55
+
56
+ except Exception as e:
57
+ print(f"❌ Text generator failed: {e}")
58
+ self.text_generator = None
 
 
 
 
 
 
59
 
60
+ try:
61
+ # 2. Image generation - SDXL (fully open source)
62
+ self.image_generator = StableDiffusionXLPipeline.from_pretrained(
63
+ "stabilityai/stable-diffusion-xl-base-1.0",
64
+ torch_dtype=torch.float16,
65
+ use_safetensors=True,
66
+ variant="fp16"
67
+ ).to(self.device)
68
+
69
+ # Enable memory optimizations (updated methods)
70
+ self.image_generator.enable_vae_slicing()
71
+ self.image_generator.enable_vae_tiling()
72
+ if hasattr(self.image_generator, 'enable_memory_efficient_attention'):
73
+ self.image_generator.enable_memory_efficient_attention()
74
+ elif hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
75
+ try:
76
+ self.image_generator.enable_xformers_memory_efficient_attention()
77
+ except:
78
+ print("XFormers not available, using default attention")
79
+
80
+ print("βœ… Image generator (SDXL) loaded")
81
+
82
+ except Exception as e:
83
+ print(f"❌ SDXL failed, trying SD 1.5: {e}")
84
+ try:
85
+ # Fallback to SD 1.5
86
+ self.image_generator = StableDiffusionPipeline.from_pretrained(
87
+ "runwayml/stable-diffusion-v1-5",
88
+ torch_dtype=torch.float16,
89
+ use_safetensors=True
90
+ ).to(self.device)
91
+
92
+ # Enable memory optimizations for SD 1.5
93
+ self.image_generator.enable_vae_slicing()
94
+ if hasattr(self.image_generator, 'enable_vae_tiling'):
95
+ self.image_generator.enable_vae_tiling()
96
+ if hasattr(self.image_generator, 'enable_xformers_memory_efficient_attention'):
97
+ try:
98
+ self.image_generator.enable_xformers_memory_efficient_attention()
99
+ except:
100
+ print("XFormers not available")
101
+
102
+ print("βœ… Image generator (SD 1.5) loaded")
103
+
104
+ except Exception as e2:
105
+ print(f"❌ All image generators failed: {e2}")
106
+ self.image_generator = None
107
 
108
+ try:
109
+ # 3. Video generation - AnimateDiff (open source)
110
+ adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2")
111
+ self.video_generator = AnimateDiffPipeline.from_pretrained(
112
+ "runwayml/stable-diffusion-v1-5",
113
+ motion_adapter=adapter,
114
+ torch_dtype=torch.float16
115
+ ).to(self.device)
116
+
117
+ self.video_generator.scheduler = DDIMScheduler.from_pretrained(
118
+ "runwayml/stable-diffusion-v1-5",
119
+ subfolder="scheduler",
120
+ clip_sample=False,
121
+ timestep_spacing="linspace",
122
+ beta_schedule="linear",
123
+ steps_offset=1,
124
+ )
125
+
126
+ # Enable memory optimizations
127
+ self.video_generator.enable_vae_slicing()
128
+ if hasattr(self.video_generator, 'enable_vae_tiling'):
129
+ self.video_generator.enable_vae_tiling()
130
+ if hasattr(self.video_generator, 'enable_xformers_memory_efficient_attention'):
131
+ try:
132
+ self.video_generator.enable_xformers_memory_efficient_attention()
133
+ except:
134
+ print("XFormers not available for video generator")
135
+
136
+ print("βœ… Video generator (AnimateDiff) loaded")
137
+
138
+ except Exception as e:
139
+ print(f"❌ Video generator failed: {e}")
140
+ self.video_generator = None
141
 
142
+ try:
143
+ # 4. Text-to-Speech (Open source XTTS)
144
+ self.tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
145
+ print("βœ… TTS model loaded")
146
+
147
+ except Exception as e:
148
+ print(f"❌ TTS model failed: {e}")
149
+ self.tts_model = None
150
 
151
  self.models_loaded = True
152
+ print("🎬 Model loading completed!")
153
 
154
  def clear_gpu_memory(self):
155
  """Clear GPU memory between operations"""
 
160
  def enhance_script_with_llm(self, raw_script: str) -> Dict[str, Any]:
161
  """Use open-source LLM to enhance the script"""
162
 
163
+ # Always return structured script (fallback method)
164
+ return self.create_structured_script(raw_script, raw_script)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def create_structured_script(self, original: str, enhanced: str) -> Dict[str, Any]:
167
  """Create structured script data"""
 
187
  else:
188
  setting = "colorful fantasy world"
189
 
190
+ # Create 8 scenes for shorter processing time
191
  scenes = []
192
  scene_templates = [
193
  "Introduction of the main character",
194
+ "Character discovers the challenge",
195
  "Meeting helpful friends",
196
  "First obstacle appears",
197
  "Character shows determination",
 
 
 
 
198
  "Final confrontation",
199
  "Resolution and victory",
200
  "Happy ending celebration"
 
206
  "description": f"{template} in the {setting}",
207
  "characters_present": [main_char] if i % 3 != 0 else [main_char, "supporting character"],
208
  "dialogue": [
209
+ {"character": main_char, "text": f"This is scene {i+1} where {template.lower()}."}
210
  ],
211
  "background": f"{setting} with {['sunrise', 'daylight', 'sunset', 'moonlight'][i % 4]} lighting",
212
+ "mood": ["hopeful", "determined", "friendly", "tense", "brave", "exciting", "triumphant", "joyful"][i],
213
+ "duration": "30"
214
  })
215
 
216
  return {
 
232
 
233
  @spaces.GPU
234
  def generate_character_images(self, characters: List[Dict]) -> Dict[str, str]:
235
+ """Generate character images using available image generator"""
236
  self.load_models()
237
  character_images = {}
238
 
239
+ if not self.image_generator:
240
+ print("❌ No image generator available")
241
+ return character_images
242
+
243
  for character in characters:
244
  prompt = f"cartoon character sheet, {character['description']}, multiple poses, clean white background, 2D animation style, colorful, expressive, high quality"
245
  negative_prompt = "realistic, 3D, dark, scary, inappropriate, low quality, blurry"
 
248
  image = self.image_generator(
249
  prompt=prompt,
250
  negative_prompt=negative_prompt,
251
+ num_inference_steps=20, # Reduced for speed
252
  guidance_scale=7.5,
253
+ height=768, # Smaller for memory efficiency
254
+ width=768
255
  ).images[0]
256
 
257
  char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
258
  image.save(char_path)
259
  character_images[character['name']] = char_path
260
+ print(f"βœ… Generated character: {character['name']}")
261
 
262
  # Clear memory after each character
263
  self.clear_gpu_memory()
264
 
265
  except Exception as e:
266
+ print(f"❌ Error generating character {character['name']}: {e}")
267
 
268
  return character_images
269
 
 
273
  self.load_models()
274
  background_images = {}
275
 
276
+ if not self.image_generator:
277
+ print("❌ No image generator available")
278
+ return background_images
279
+
280
  for scene in scenes:
281
  prompt = f"cartoon background, {scene['background']}, {scene['mood']} atmosphere, animated style, no characters, detailed environment, bright colors, 2D animation"
282
  negative_prompt = "characters, people, realistic, dark, scary, low quality"
 
285
  image = self.image_generator(
286
  prompt=prompt,
287
  negative_prompt=negative_prompt,
288
+ num_inference_steps=15, # Reduced for speed
289
  guidance_scale=7.0,
290
+ height=512, # 16:9 aspect ratio
291
+ width=768
292
  ).images[0]
293
 
294
  bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
295
  image.save(bg_path)
296
  background_images[scene['scene_number']] = bg_path
297
+ print(f"βœ… Generated background for scene {scene['scene_number']}")
298
 
299
  # Clear memory after each background
300
  self.clear_gpu_memory()
301
 
302
  except Exception as e:
303
+ print(f"❌ Error generating background for scene {scene['scene_number']}: {e}")
304
 
305
  return background_images
306
 
307
  @spaces.GPU
308
  def generate_scene_videos(self, scenes: List[Dict], character_images: Dict, background_images: Dict) -> List[str]:
309
+ """Generate animated videos for each scene"""
310
  self.load_models()
311
  scene_videos = []
312
 
313
  for scene in scenes:
314
  try:
315
+ if self.video_generator:
316
+ # Create prompt for scene animation
317
+ characters_text = ", ".join(scene['characters_present'])
318
+ prompt = f"cartoon animation, {characters_text} in {scene['background']}, {scene['mood']} mood, 2D animated style, smooth motion, family friendly"
319
+ negative_prompt = "realistic, 3D, static, blurry, low quality, scary"
320
+
321
+ # Generate animated video using AnimateDiff
322
+ video_frames = self.video_generator(
323
+ prompt=prompt,
324
+ negative_prompt=negative_prompt,
325
+ num_frames=12, # Reduced frames for speed
326
+ guidance_scale=7.5,
327
+ num_inference_steps=15, # Reduced steps
328
+ height=512,
329
+ width=768
330
+ ).frames[0]
331
+
332
+ # Save video
333
+ video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"
334
+ export_to_video(video_frames, video_path, fps=6)
335
+ scene_videos.append(video_path)
336
+ print(f"βœ… Generated video for scene {scene['scene_number']}")
337
+
338
+ # Clear GPU memory
339
+ self.clear_gpu_memory()
340
+
341
+ else:
342
+ # Fallback: create static video
343
+ if scene['scene_number'] in background_images:
344
+ video_path = self.create_static_video(
345
+ Image.open(background_images[scene['scene_number']]),
346
+ int(scene.get('duration', 30)),
347
+ scene['scene_number']
348
+ )
349
+ scene_videos.append(video_path)
350
+ print(f"βœ… Created static video for scene {scene['scene_number']}")
351
 
352
  except Exception as e:
353
+ print(f"❌ Error generating video for scene {scene['scene_number']}: {e}")
354
+ # Create fallback static video
355
  if scene['scene_number'] in background_images:
356
+ try:
357
+ video_path = self.create_static_video(
358
+ Image.open(background_images[scene['scene_number']]),
359
+ int(scene.get('duration', 30)),
360
+ scene['scene_number']
361
+ )
362
+ scene_videos.append(video_path)
363
+ print(f"βœ… Created fallback video for scene {scene['scene_number']}")
364
+ except Exception as e2:
365
+ print(f"❌ Fallback video creation failed: {e2}")
366
 
367
  return scene_videos
368
 
369
  def create_static_video(self, image: Image.Image, duration: int, scene_num: int) -> str:
370
+ """Create video from static image with simple effects"""
371
  video_path = f"{self.temp_dir}/scene_{scene_num}.mp4"
372
 
373
+ try:
374
+ # Convert PIL to OpenCV
375
+ img_array = np.array(image.resize((768, 512)))
376
+ img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
 
 
 
 
 
 
 
 
 
 
377
 
378
+ # Create video writer
379
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
380
+ fps = 24
381
+ out = cv2.VideoWriter(video_path, fourcc, fps, (768, 512))
382
 
383
+ # Add simple zoom effect
384
+ for i in range(duration * fps):
385
+ scale = 1.0 + (i / (duration * fps)) * 0.05 # Slight zoom
386
+ h, w = img_array.shape[:2]
387
+ center_x, center_y = w // 2, h // 2
388
+
389
+ # Create zoom matrix
390
+ M = cv2.getRotationMatrix2D((center_x, center_y), 0, scale)
391
+ zoomed = cv2.warpAffine(img_array, M, (w, h))
392
+
393
+ out.write(zoomed)
394
+
395
+ out.release()
396
+ return video_path
397
+
398
+ except Exception as e:
399
+ print(f"❌ Static video creation failed: {e}")
400
+ return None
401
 
402
  @spaces.GPU
403
  def generate_audio(self, scenes: List[Dict]) -> str:
404
  """Generate audio using open-source XTTS"""
405
+ if not self.tts_model:
406
+ print("❌ No TTS model available")
407
+ return None
408
+
409
  self.load_models()
410
 
411
  try:
 
440
  return audio_path
441
 
442
  except Exception as e:
443
+ print(f"❌ Audio generation failed: {e}")
444
  return None
445
 
446
  def merge_videos_with_ffmpeg(self, scene_videos: List[str], audio_path: str = None) -> str:
 
475
 
476
  result = subprocess.run(cmd, capture_output=True, text=True)
477
  if result.returncode == 0:
478
+ print("βœ… Video merging successful")
479
  return final_video_path
480
  else:
481
+ print(f"❌ FFmpeg error: {result.stderr}")
482
  return None
483
 
484
  except Exception as e:
485
+ print(f"❌ Video merging failed: {e}")
486
  return None
487
 
488
  @spaces.GPU
489
  def generate_cartoon_film(self, script: str, include_audio: bool = True) -> tuple:
490
  """Main function to generate complete cartoon film"""
491
  try:
492
+ print("🎬 Starting cartoon film generation...")
493
 
494
  # Step 1: Enhance script
495
+ print("πŸ“ Processing script...")
496
  processed_script = self.enhance_script_with_llm(script)
497
 
498
  # Step 2: Generate characters
499
+ print("πŸ‘₯ Creating characters...")
500
  character_images = self.generate_character_images(processed_script['characters'])
501
 
502
  # Step 3: Generate backgrounds
503
+ print("🏞️ Creating backgrounds...")
504
  background_images = self.generate_background_images(processed_script['scenes'])
505
 
506
  # Step 4: Generate scene videos
507
+ print("πŸŽ₯ Creating videos...")
508
  scene_videos = self.generate_scene_videos(
509
  processed_script['scenes'],
510
  character_images,
 
514
  # Step 5: Generate audio
515
  audio_path = None
516
  if include_audio:
517
+ print("🎡 Creating audio...")
518
  audio_path = self.generate_audio(processed_script['scenes'])
519
 
520
  # Step 6: Merge final video
521
+ print("🎞️ Finalizing film...")
522
  final_video = self.merge_videos_with_ffmpeg(scene_videos, audio_path)
523
 
524
  if final_video and os.path.exists(final_video):
525
+ print("βœ… Film generation complete!")
526
  return final_video, processed_script, "βœ… Cartoon film generated successfully!"
527
  else:
528
+ print("⚠️ Partial success - some steps may have failed")
529
+ return None, processed_script, "⚠️ Partial generation - check individual steps"
530
 
531
  except Exception as e:
532
+ print(f"❌ Generation failed: {e}")
533
+ # Return error information in proper format
534
  error_info = {
535
  "error": True,
536
  "message": str(e),
 
572
  gr.Markdown("""
573
  # 🎬 AI Cartoon Film Generator (100% Open Source)
574
 
575
+ Transform your script into a complete cartoon film using only open-source models!
576
 
577
  **πŸ”₯ Features:**
578
+ - **Stable Diffusion XL/1.5** for character & background generation
579
+ - **AnimateDiff** for video animation
580
+ - **XTTS** for voice synthesis
581
+ - **ZeroGPU optimized** - completely free!
582
  - **No API keys required** - everything is open source
583
 
584
+ **⚑ Fixed compatibility issues and memory optimization**
585
  """)
586
 
587
  with gr.Row():
 
596
  with gr.Row():
597
  include_audio = gr.Checkbox(
598
  label="🎡 Include AI-Generated Voices",
599
+ value=False, # Default to False for faster testing
600
  info="Generate speech for character dialogue"
601
  )
602
 
 
607
  )
608
 
609
  gr.Markdown("""
610
+ **⏱️ Processing Time:** 5-10 minutes
611
+ **πŸŽ₯ Output:** 4-5 minute MP4 film
612
+ **πŸ“± Models:** SDXL + AnimateDiff + XTTS
613
  """)
614
 
615
  with gr.Column(scale=1):
 
620
 
621
  status_output = gr.Textbox(
622
  label="πŸ“Š Status",
623
+ lines=3
624
  )
625
 
626
  script_details = gr.JSON(
627
  label="πŸ“‹ Generated Script Details",
628
+ visible=True
629
  )
630
 
631
  # Event handlers
 
639
  # Example scripts
640
  gr.Examples(
641
  examples=[
642
+ ["A brave young explorer discovers a magical forest where talking animals help her find a lost treasure.", False],
643
+ ["Two best friends go on a space adventure to help a friendly alien return home.", False],
644
+ ["A small robot learns about emotions when it meets a lonely child in the city.", False],
645
+ ["A young artist discovers their drawings come to life and must help solve problems.", False]
646
  ],
647
  inputs=[script_input, include_audio],
648
  label="πŸ’‘ Try these example scripts:"
 
650
 
651
  gr.Markdown("""
652
  ---
653
+ **πŸ”§ Fixed Issues:**
654
+ - βœ… Memory optimization methods updated for latest diffusers
655
+ - βœ… Fallback models for compatibility
656
+ - βœ… Better error handling and logging
657
+ - βœ… Reduced parameters for ZeroGPU efficiency
 
658
 
659
+ **πŸ’ Completely free and open source!** No API keys required.
660
  """)
661
 
662
  if __name__ == "__main__":