Spaces:

dream2589632147
/

Dream-MultiStyle-Video-Colorizer

Running on Zero

App Files Files Community

dream2589632147 commited on Dec 10, 2025

Commit

fd6068a

verified ·

1 Parent(s): b4aa6ab

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -58

app.py CHANGED Viewed

@@ -3,48 +3,71 @@ import torch
 import cv2
 import numpy as np
 from PIL import Image
-from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
 from controlnet_aux import CannyDetector
-from diffusers.utils import load_image
 import os
 import shutil
 import tempfile
 import datetime
 import ffmpeg
-# 1. تهيئة النموذج
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
 try:
-    print(f"Loading models on: {device}...")
     controlnet_model = ControlNetModel.from_pretrained(
         "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
     )
     model_id = "runwayml/stable-diffusion-v1-5"
     pipe = StableDiffusionControlNetPipeline.from_pretrained(
         model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
     )
-    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
     pipe.to(device)
-    print("Models loaded successfully.")
 except Exception as e:
-    print(f"Error loading models on CUDA: {e}. Switching to CPU.")
-    controlnet_model = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
-    pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, controlnet=controlnet_model)
-    pipe.to("cpu")
 canny_processor = CannyDetector()
-# 2. دالة معالجة الفيديو والنموذج
-def colorize_video_multistyle(video_file, reference_image_path, prompt, style_choice, steps=25):
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    output_temp_video_no_audio = os.path.join(tempfile.gettempdir(), f"temp_colored_{timestamp}_no_audio.mp4")
-    final_output_name = f"colored_output_{timestamp}.mp4"
-    # === 1. استخراج الإطارات والصوت ===
-    # 1.1 استخراج الصوت
     audio_path = os.path.join(tempfile.gettempdir(), f"temp_audio_{timestamp}.aac")
     try:
         (
             ffmpeg
@@ -54,57 +77,76 @@ def colorize_video_multistyle(video_file, reference_image_path, prompt, style_ch
         )
         audio_exists = True
     except ffmpeg.Error:
-        audio_exists = False
-        print("No audio found or extraction failed. Proceeding without audio.")
-    # 1.2 قراءة الفيديو للإطارات
     cap = cv2.VideoCapture(video_file)
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # 2. تجهيز المدخلات للنموذج
     style_prompts = {
-        "Auto Color": "photorealistic color photo, cinematic, detailed, masterpiece",
-        "Vivid": "highly saturated, vibrant color photo, pop art colors",
-        "Vintage": "sepia tone, old film grain, 1940s vintage look",
     }
-    final_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}"
-    negative_prompt = "lowres, bad anatomy, bad hands, blurry, distorted, nsfw, frame, border, changed details, monochrome"
     colored_frames = []
-    # 3. معالجة الإطارات (التلوين)
-    while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
         pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         canny_image = canny_processor(pil_image)
-        image_out = pipe(
-            prompt=final_prompt,
-            negative_prompt=negative_prompt,
-            image=canny_image,
-            num_inference_steps=steps,
-            guidance_scale=7.5
-        ).images[0]
         colored_frames.append(np.array(image_out))
     cap.release()
-    # 4. تجميع الإطارات في فيديو مؤقت (MP4) باستخدام OpenCV
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_temp_video_no_audio, fourcc, fps, (width, height))
     for frame in colored_frames:
         out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
     out.release()
-    # 5. دمج الفيديو الملون مع الصوت الأصلي باستخدام FFMPEG-Python
     if audio_exists:
         try:
             (
@@ -113,32 +155,49 @@ def colorize_video_multistyle(video_file, reference_image_path, prompt, style_ch
                 .output(ffmpeg.input(audio_path).audio, final_output_name, vcodec='copy', acodec='copy')
                 .run(overwrite_output=True, quiet=True)
             )
-        except ffmpeg.Error as e:
-            print(f"FFMPEG merge failed: {e.stderr.decode('utf8')}")
-            shutil.copy(output_temp_video_no_audio, final_output_name) # العودة إلى الفيديو بدون صوت
     else:
         shutil.copy(output_temp_video_no_audio, final_output_name)
-    # 6. تنظيف الملفات المؤقتة
-    if os.path.exists(audio_path):
-        os.remove(audio_path)
     return final_output_name
-# 3. واجهة Gradio النهائية
-iface = gr.Interface(
-    fn=colorize_video_multistyle,
-    inputs=[
-        gr.Video(label="ملف الفيديو (إلزامي)"),
-        gr.Image(label="الصورة المرجعية (لنقل الألوان)", type="filepath"),
-        gr.Textbox(label="المطالبة النصية (لتوجيه التلوين)", value=""),
-        gr.Dropdown(["Auto Color", "Vivid", "Vintage"], label="اختيار النمط المسبق", value="Auto Color"),
-        gr.Slider(minimum=10, maximum=50, step=5, value=25, label="خطوات التوليد")
-    ],
-    outputs=gr.Video(label="الفيديو الملون (MP4)"),
-    title="🎨 Multi-Style Video Colorizer",
-    description="تلوين احترافي للفيديو باستخدام ControlNet."
-)
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

 import cv2
 import numpy as np
 from PIL import Image
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, LCMScheduler
 from controlnet_aux import CannyDetector
 import os
 import shutil
 import tempfile
 import datetime
 import ffmpeg
+# ==========================================
+# 1. إعدادات الجهاز والنموذج (مع تسريع LCM)
+# ==========================================
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
+print(f"🚀 Running on: {device}")
+if device == "cpu":
+    print("⚠️ تحذير: المعالجة ستكون بطيئة جداً على CPU. يرجى تفعيل GPU من إعدادات Hugging Face.")
 try:
+    print("⏳ Loading ControlNet & SD Models...")
+    # تحميل ControlNet
     controlnet_model = ControlNetModel.from_pretrained(
         "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
     )
+    # تحميل Stable Diffusion 1.5
     model_id = "runwayml/stable-diffusion-v1-5"
     pipe = StableDiffusionControlNetPipeline.from_pretrained(
         model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
     )
+    # === الخطوة السحرية للسرعة (LCM-LoRA) ===
+    print("⚡ Injecting LCM-LoRA for speed...")
+    pipe.load_lora_weights("latent-consistency/lcm-lora-sdv1-5")
+    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+    # =========================================
     pipe.to(device)
+    # تفعيل تحسينات الذاكرة إذا كنا على GPU
+    if device == "cuda":
+        pipe.enable_attention_slicing()
+    print("✅ Models loaded successfully with LCM Acceleration.")
 except Exception as e:
+    print(f"❌ Error loading models: {e}")
+    raise e
 canny_processor = CannyDetector()
+# ==========================================
+# 2. دالة المعالجة الأساسية
+# ==========================================
+def colorize_video_multistyle(video_file, prompt, style_choice, steps=5):
+    if not video_file:
+        return None
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_temp_video_no_audio = os.path.join(tempfile.gettempdir(), f"temp_vis_{timestamp}.mp4")
+    final_output_name = f"colored_lcm_{timestamp}.mp4"
+    # --- استخراج الصوت ---
     audio_path = os.path.join(tempfile.gettempdir(), f"temp_audio_{timestamp}.aac")
+    audio_exists = False
     try:
         (
             ffmpeg
         )
         audio_exists = True
     except ffmpeg.Error:
+        print("⚠️ Warning: No audio found or extraction failed.")
+    # --- قراءة الفيديو ---
     cap = cv2.VideoCapture(video_file)
+    if not cap.isOpened():
+        return None
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    # تحديد عدد الإطارات للمعالجة (لتجنب التوقف في الفيديوهات الطويلة جداً في النسخة التجريبية)
+    # total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # تجهيز البرومبت
     style_prompts = {
+        "Auto Color": "photorealistic, 8k, masterpiece, cinematic lighting",
+        "Vivid": "vibrant colors, high saturation, pop art style, colorful",
+        "Vintage": "sepia, 1950s film look, grain, nostalgia",
+        "Cyberpunk": "neon lights, cyberpunk, blue and pink hues, futuristic"
     }
+    # إضافة "colorized" للبرومبت لتعزيز التلوين
+    full_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}, colorized video"
+    # البرومبت السلبي (مهم جداً)
+    negative_prompt = "black and white, monochrome, greyscale, low quality, blurry, distorted, nsfw, watermark"
     colored_frames = []
+    print("🎬 Starting Frame Processing...")
+    while True:
         ret, frame = cap.read()
         if not ret:
             break
+        # تحويل من BGR (OpenCV) إلى RGB (PIL)
         pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        # استخراج الحواف (Canny)
         canny_image = canny_processor(pil_image)
+        # المعالجة باستخدام LCM (لاحظ guidance_scale المنخفض)
+        with torch.inference_mode():
+            image_out = pipe(
+                prompt=full_prompt,
+                negative_prompt=negative_prompt,
+                image=canny_image,
+                num_inference_steps=steps, # LCM يحتاج خطوات قليلة (4-8)
+                guidance_scale=1.5,        # LCM يحتاج قيمة منخفضة (بين 1.0 و 2.0)
+                controlnet_conditioning_scale=1.0
+            ).images[0]
+        # تحويل النتيجة لـ NumPy
         colored_frames.append(np.array(image_out))
     cap.release()
+    # --- تجميع الفيديو ---
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_temp_video_no_audio, fourcc, fps, (width, height))
     for frame in colored_frames:
+        # تحويل من RGB إلى BGR للكتابة
         out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
     out.release()
+    # --- دمج الصوت (إن وجد) ---
     if audio_exists:
         try:
             (
                 .output(ffmpeg.input(audio_path).audio, final_output_name, vcodec='copy', acodec='copy')
                 .run(overwrite_output=True, quiet=True)
             )
+        except ffmpeg.Error:
+            shutil.copy(output_temp_video_no_audio, final_output_name)
     else:
         shutil.copy(output_temp_video_no_audio, final_output_name)
+    # تنظيف
+    if os.path.exists(audio_path): os.remove(audio_path)
     return final_output_name
+# ==========================================
+# 3. واجهة Gradio
+# ==========================================
+custom_css = """
+#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
+"""
+with gr.Blocks(css=custom_css, title="Turbo Video Colorizer") as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# ⚡ Turbo Video Colorizer (LCM Accelerated)")
+        gr.Markdown("تلوين الفيديو بسرعة عالية باستخدام تقنية LCM-LoRA.")
+        with gr.Row():
+            video_input = gr.Video(label="رفع الف��ديو (يفضل فيديو قصير)")
+        with gr.Row():
+            prompt_input = gr.Textbox(label="وصف المشهد (اختياري)", placeholder="مثال: A man walking in the street")
+            style_dropdown = gr.Dropdown(
+                ["Auto Color", "Vivid", "Vintage", "Cyberpunk"],
+                label="النمط", value="Auto Color"
+            )
+        # LCM يحتاج خطوات قليلة، لذا قللنا الحد الأقصى
+        steps_slider = gr.Slider(minimum=4, maximum=12, step=1, value=5, label="الخطوات (Steps) - 5 هي الأفضل للسرعة")
+        submit_btn = gr.Button("🎨 تلوين الفيديو الآن", variant="primary")
+        video_output = gr.Video(label="النتيجة")
+        submit_btn.click(
+            fn=colorize_video_multistyle,
+            inputs=[video_input, prompt_input, style_dropdown, steps_slider],
+            outputs=video_output
+        )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)