Spaces:

dream2589632147
/

Dream-MultiStyle-Video-Colorizer

Sleeping

App Files Files Community

dream2589632147 commited on Nov 13, 2025

Commit

2e2a632

verified ·

1 Parent(s): 1c4e450

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -139

app.py CHANGED Viewed

@@ -1,139 +1,129 @@
-import gradio as gr
-import torch
-import cv2
-import numpy as np
-from PIL import Image
-from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
-from controlnet_aux import CannyDetector
-from diffusers.utils import load_image
-from moviepy.editor import VideoFileClip
-import os
-import shutil
-import tempfile # لاستخدام مجلدات مؤقتة آمنة
-# 1. تهيئة النموذج
-# تحديد جهاز التشغيل (يجب أن يكون CUDA في بيئة GPU Docker)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if device == "cuda" else torch.float32
-# تحميل ControlNet (نموذج Canny)
-try:
-    controlnet_model = ControlNetModel.from_pretrained(
-        "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
-    )
-    # تحميل الـ Pipeline الرئيسية
-    model_id = "runwayml/stable-diffusion-v1-5"
-    pipe = StableDiffusionControlNetPipeline.from_pretrained(
-        model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
-    )
-    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
-    pipe.to(device)
-    print("Models loaded successfully.")
-except Exception as e:
-    print(f"Error loading models: {e}. Running on CPU (slow).")
-    # محاولة التحميل بدون GPU إذا فشل CUDA
-    pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, controlnet=controlnet_model)
-    pipe.to("cpu")
-# تهيئة مُعالِج Canny
-canny_processor = CannyDetector()
-# 2. دالة معالجة الفيديو والنموذج
-def colorize_video_multistyle(video_file, reference_image_path, prompt, style_choice, steps=25):
-    # استخدام مجلد مؤقت لضمان التنظيف
-    with tempfile.TemporaryDirectory() as temp_dir:
-        # 1. استخراج الإطارات والصوت
-        clip = VideoFileClip(video_file)
-        audio_file = None
-        if clip.audio:
-            audio_file = os.path.join(temp_dir, "temp_audio.mp3")
-            clip.audio.write_audiofile(audio_file, verbose=False, logger=None)
-        fps = clip.fps
-        # 2. تجهيز المدخلات للنموذج
-        style_prompts = {
-            "Auto Color": "photorealistic color photo, cinematic, detailed, masterpiece",
-            "Vivid": "highly saturated, vibrant color photo, pop art colors",
-            "Vintage": "sepia tone, old film grain, 1940s vintage look",
-        }
-        final_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}"
-        negative_prompt = "lowres, bad anatomy, bad hands, blurry, distorted, nsfw, frame, border, changed details"
-        # تجهيز الصورة المرجعية (IP-Adapter - يُفترض أنه محمل إذا كان مطلوباً)
-        # ip_adapter_images = []
-        # if reference_image_path:
-        #     ref_image = load_image(reference_image_path).convert("RGB")
-        #     ip_adapter_images.append(ref_image)
-        #     pipe.set_ip_adapter_images(ip_adapter_images) # يجب إلغاء التعليق إذا تم تحميل IP-Adapter
-        colored_frames = []
-        # 3. معالجة الإطارات (التلوين باستخدام ControlNet)
-        for i, frame in enumerate(clip.iter_frames(fps=fps, dtype='uint8')):
-            pil_image = Image.fromarray(frame).convert("RGB")
-            # الخطوة الحاسمة: استخراج خريطة Canny للحفاظ على الهيكل
-            canny_image = canny_processor(pil_image)
-            # تمرير خريطة Canny للنموذج
-            image_out = pipe(
-                prompt=final_prompt,
-                negative_prompt=negative_prompt,
-                image=canny_image, # ControlNet Canny Input
-                num_inference_steps=steps,
-                guidance_scale=7.5
-            ).images[0]
-            colored_frames.append(np.array(image_out))
-        # 4. تجميع الإطارات في فيديو MP4
-        output_video_path = os.path.join(temp_dir, "colored_temp_video.mp4")
-        height, width, layers = colored_frames[0].shape
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
-        for frame in colored_frames:
-            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-        out.release()
-        # 5. إضافة الصوت الأصلي
-        final_video_path = "final_colored_video.mp4" # يتم إنشاء هذا الملف في المجلد الحالي
-        if audio_file and os.path.exists(audio_file):
-            video_clip = VideoFileClip(output_video_path)
-            # التأكد من أن مدة الفيديو الملون تتطابق مع مدة الصوت
-            final_clip = video_clip.set_audio(clip.audio)
-            final_clip.write_videofile(final_video_path, codec='libx264', audio_codec='aac', verbose=False, logger=None)
-        else:
-            shutil.copy(output_video_path, final_video_path)
-        clip.close()
-        return final_video_path
-# 3. واجهة Gradio للإنتاج
-iface = gr.Interface(
-    fn=colorize_video_multistyle,
-    inputs=[
-        gr.Video(label="ملف الفيديو (إلزامي)"),
-        gr.Image(label="الصورة المرجعية (لنقل الألوان)", type="filepath", required=False),
-        gr.Textbox(label="المطالبة النصية (لتوجيه التلوين)", required=False, value=""),
-        gr.Dropdown(["Auto Color", "Vivid", "Vintage"], label="اختيار النمط المسبق", value="Auto Color"),
-        gr.Slider(minimum=10, maximum=50, step=5, value=25, label="خطوات التوليد (للسرعة/الجودة)")
-    ],
-    outputs=gr.Video(label="الفيديو الملون (MP4)"),
-    title="🎨 Multi-Style Video Colorizer (Dockerized)",
-    description="تلوين الفيديو مع الحفاظ على الهيكل باستخدام ControlNet."
-)
-if __name__ == "__main__":
-    # تشغيل Gradio على 0.0.0.0 والمنفذ 7860 للسماح بالوصول من خارج الحاوية
-    iface.launch(server_name="0.0.0.0", server_port=7860)

+import gradio as gr
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
+from controlnet_aux import CannyDetector
+from diffusers.utils import load_image
+from moviepy.editor import VideoFileClip
+import os
+import shutil
+import tempfile
+import datetime
+# 1. تهيئة النموذج
+# تحديد جهاز التشغيل (GPU إذا كان متاحًا محليًا)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if device == "cuda" else torch.float32
+try:
+    controlnet_model = ControlNetModel.from_pretrained(
+        "lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype
+    )
+    model_id = "runwayml/stable-diffusion-v1-5"
+    pipe = StableDiffusionControlNetPipeline.from_pretrained(
+        model_id, controlnet=controlnet_model, torch_dtype=torch_dtype
+    )
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    pipe.to(device)
+    print("Models loaded successfully on:", device)
+except Exception as e:
+    print(f"Error loading models on CUDA: {e}. Switching to CPU.")
+    controlnet_model = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
+    pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id, controlnet=controlnet_model)
+    pipe.to("cpu")
+canny_processor = CannyDetector()
+# 2. دالة معالجة الفيديو والنموذج (بدون تغيير)
+def colorize_video_multistyle(video_file, reference_image_path, prompt, style_choice, steps=25):
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    final_output_name = f"colored_output_{timestamp}.mp4"
+    with tempfile.TemporaryDirectory() as temp_dir:
+        clip = VideoFileClip(video_file)
+        audio_file = None
+        if clip.audio:
+            audio_file = os.path.join(temp_dir, "temp_audio.mp3")
+            clip.audio.write_audiofile(audio_file, verbose=False, logger=None)
+        fps = clip.fps
+        style_prompts = {
+            "Auto Color": "photorealistic color photo, cinematic, detailed, masterpiece",
+            "Vivid": "highly saturated, vibrant color photo, pop art colors",
+            "Vintage": "sepia tone, old film grain, 1940s vintage look",
+        }
+        final_prompt = f"{prompt}, {style_prompts.get(style_choice, '')}"
+        negative_prompt = "lowres, bad anatomy, bad hands, blurry, distorted, nsfw, frame, border, changed details, monochrome"
+        colored_frames = []
+        for i, frame in enumerate(clip.iter_frames(fps=fps, dtype='uint8')):
+            pil_image = Image.fromarray(frame).convert("RGB")
+            canny_image = canny_processor(pil_image)
+            # يمكنك إزالة هذه الأسطر إذا لم تقم بتحميل IP-Adapter
+            # ip_adapter_images = []
+            # if reference_image_path:
+            #     ref_image = load_image(reference_image_path).convert("RGB")
+            #     ip_adapter_images.append(ref_image)
+            #     # pipe.set_ip_adapter_images(ip_adapter_images)
+            image_out = pipe(
+                prompt=final_prompt,
+                negative_prompt=negative_prompt,
+                image=canny_image,
+                num_inference_steps=steps,
+                guidance_scale=7.5
+            ).images[0]
+            colored_frames.append(np.array(image_out))
+        output_video_path = os.path.join(temp_dir, "colored_temp_video.mp4")
+        height, width, layers = colored_frames[0].shape
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
+        for frame in colored_frames:
+            out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
+        out.release()
+        if audio_file and os.path.exists(audio_file):
+            video_clip = VideoFileClip(output_video_path)
+            final_clip = video_clip.set_audio(clip.audio)
+            final_clip.write_videofile(final_output_name, codec='libx264', audio_codec='aac', verbose=False, logger=None)
+        else:
+            shutil.copy(output_video_path, final_output_name)
+        clip.close()
+        return final_output_name
+# 3. واجهة Gradio النهائية والتشغيل
+iface = gr.Interface(
+    fn=colorize_video_multistyle,
+    inputs=[
+        gr.Video(label="ملف الفيديو (إلزامي)"),
+        gr.Image(label="الصورة المرجعية (لنقل الألوان)", type="filepath", required=False),
+        gr.Textbox(label="المطالبة النصية (لتوجيه التلوين)", required=False, value=""),
+        gr.Dropdown(["Auto Color", "Vivid", "Vintage"], label="اختيار النمط المسبق", value="Auto Color"),
+        gr.Slider(minimum=10, maximum=50, step=5, value=25, label="خطوات التوليد (للسرعة/الجودة)")
+    ],
+    outputs=gr.Video(label="الفيديو الملون (MP4)"),
+    title="🎨 Multi-Style Video Colorizer (Gradio)",
+    description="تلوين احترافي للفيديو باستخدام ControlNet: يُحافظ على الهيكل وتتغير الألوان فقط."
+)
+if __name__ == "__main__":
+    # هذا السطر يشغل الواجهة على عنوان محلي يمكن الوصول إليه عبر المتصفح
+    # يمكنك استخدام share=True لمشاركة الواجهة مؤقتاً عبر الإنترنت
+    iface.launch() # تم حذف server_name و server_port للاعتماد على إعدادات Gradio الافتراضية