Spaces:

dghhvc
/

Loop

Runtime error

App Files Files Community

dghhvc commited on Sep 18, 2025

Commit

8d0b9bb

verified ·

1 Parent(s): 60fa9c4

Update app.py

Browse files

Files changed (1) hide show

app.py +376 -68

app.py CHANGED Viewed

@@ -1,87 +1,395 @@
 import gradio as gr
 import numpy as np
-import cv2
 from PIL import Image
-def video_to_video_processor(input_video_path, input_image_path):
-    """
-    تابع پردازش ویدیو به ویدیو با جایگزینی کاراکتر.
-    توجه: این تابع یک مثال مفهومی است و نیاز به یک مدل هوش مصنوعی برای اجرای واقعی دارد.
-    اجرای کامل این فرآیند با CPU و ۱۶ گیگابایت RAM بسیار کند خواهد بود.
-    """
-    # 1. بارگذاری ویدیوی ورودی
-    cap = cv2.VideoCapture(input_video_path)
-    if not cap.isOpened():
-        return "خطا در بارگذاری ویدیو."
-    # 2. بارگذاری تصویر ورودی (کاراکتر جایگزین)
     try:
-        input_image = Image.open(input_image_path).convert("RGB")
     except Exception as e:
-        return f"خطا در بارگذاری تصویر: {e}"
-    frame_list = []
-    # 3. حلقه پردازش فریم به فریم
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # گام‌های مفهومی:
-        # الف. استخراج پوز یا لبه‌های شخص در فریم فعلی از ویدیوی اصلی
-        # این مرحله نیاز به یک مدل سنگین هوش مصنو��ی مانند OpenPose یا سایر مدل‌های استخراج پوز دارد.
-        # pose_data = pose_estimation_model(frame)
-        # ب. استفاده از پوز استخراج شده و تصویر ورودی برای تولید فریم جدید
-        # این مرحله نیاز به یک مدل قدرتمند تبدیل متن/تصویر به ویدیو (مثلا مدل‌های Diffusion) دارد.
-        # new_frame = image_to_video_model(input_image, pose_data)
-        # ج. در اینجا، به صورت ساده فریم را تغییر می‌دهیم تا خروجی مشخص باشد.
-        # در یک پیاده‌سازی واقعی، شما new_frame را به لیست اضافه می‌کنید.
-        # برای این مثال، فقط فریم اصلی را سیاه و سفید می‌کنیم تا فرآیند پردازش را شبیه‌سازی کنیم.
-        processed_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        processed_frame = cv2.cvtColor(processed_frame, cv2.COLOR_GRAY2BGR)
-        # تغییر اندازه تصویر ورودی به اندازه فریم برای مثال
-        resized_image = cv2.resize(np.array(input_image), (processed_frame.shape[1], processed_frame.shape[0]))
-        # ایجاد فریم خروجی (یک مثال ساده برای نم��یش فرآیند)
-        output_frame = cv2.addWeighted(processed_frame, 0.7, resized_image, 0.3, 0)
-        frame_list.append(output_frame)
-    cap.release()
-    # 4. ذخیره ویدیو خروجی
-    if not frame_list:
-        return "خطا: هیچ فریمی برای پردازش یافت نشد."
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    height, width, _ = frame_list[0].shape
-    out_video_path = "output_video.mp4"
-    out = cv2.VideoWriter(out_video_path, fourcc, 20.0, (width, height))
-    for frame in frame_list:
-        out.write(frame)
-    out.release()
-    return out_video_path
-# تعریف رابط کاربری با Gradio
-demo = gr.Interface(
-    fn=video_to_video_processor,
-    inputs=[
-        gr.Video(label="ویدیوی اصلی را آپلود کنید"),
-        gr.Image(type="filepath", label="عکس کاراکتر جایگزین را آپلود کنید (مثال: پاندا)")
-    ],
-    outputs=gr.Video(label="ویدیوی خروجی با کاراکتر جایگزین"),
-    title="تبدیل ویدیو به ویدیو با جایگزینی کاراکتر",
-    description="یک ویدیو از خودتان و یک عکس از کاراکتر مورد نظر را آپلود کنید تا برنامه به صورت مفهومی جایگزینی را انجام دهد. توجه کنید که این مثال تنها یک شبیه‌سازی است و فرآیند واقعی نیازمند یک GPU قدرتمند است."
-)
 if __name__ == "__main__":
-    demo.launch()

+"""
+اپلیکیشن تبدیل تصویر به ویدیو با استفاده از مدل Wan2.2-I2V-A14B در Hugging Face Space.
+ویژگی‌ها:
+- آپلود تصویر و تولید ویدیو با پرامپت متنی.
+- تنظیمات پیشرفته برای رزولوشن، تعداد فریم‌ها، و گام‌های استنتاج.
+- ذخیره‌سازی ویدیوها و نمایش تاریخچه.
+- مدیریت خطاها و بهینه‌سازی برای GPU.
+"""
 import gradio as gr
+import torch
+from diffusers import DiffusionPipeline
+from diffusers.utils import export_to_video
+from PIL import Image
 import numpy as np
+import tempfile
+import os
+import shutil
+import time
+import datetime
+import logging
+from typing import Optional, Tuple, List
+import json
+from pathlib import Path
+# تنظیمات لاگ برای دیباگ و خطاها
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[
+        logging.FileHandler("app.log"),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+# مسیر ذخیره‌سازی ویدیوها در Hugging Face Space
+OUTPUT_DIR = Path("outputs")
+HISTORY_FILE = Path("history.json")
+MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"  # مدل فرضی
+# اطمینان از وجود دایرکتوری خروجی
+if not OUTPUT_DIR.exists():
+    OUTPUT_DIR.mkdir(parents=True)
+# تنظیمات پیش‌فرض مدل
+DEFAULT_CONFIG = {
+    "num_frames": 25,
+    "height": 720,
+    "width": 1280,
+    "num_inference_steps": 50,
+    "guidance_scale": 7.5,
+    "fps": 7,
+    "negative_prompt": "blurry, low quality, distorted, artifacts",
+}
+# تاریخچه تولیدات
+history = []
+def load_history() -> List[dict]:
+    """بارگذاری تاریخچه از فایل JSON"""
+    if HISTORY_FILE.exists():
+        try:
+            with open(HISTORY_FILE, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except Exception as e:
+            logger.error(f"خطا در بارگذاری تاریخچه: {e}")
+            return []
+    return []
+def save_history(history: List[dict]):
+    """ذخیره تاریخچه در فایل JSON"""
+    try:
+        with open(HISTORY_FILE, "w", encoding="utf-8") as f:
+            json.dump(history, f, ensure_ascii=False, indent=2)
+    except Exception as e:
+        logger.error(f"خطا در ذخیره تاریخچه: {e}")
+def preprocess_image(image: np.ndarray, target_size: Tuple[int, int]) -> Image.Image:
+    """پیش‌پردازش تصویر ورودی"""
+    try:
+        if image is None:
+            raise ValueError("تصویر ورودی خالی است.")
+        pil_image = Image.fromarray(image).convert("RGB")
+        pil_image = pil_image.resize(target_size, Image.Resampling.LANCZOS)
+        return pil_image
+    except Exception as e:
+        logger.error(f"خطا در پیش‌پردازش تصویر: {e}")
+        raise
+def validate_inputs(image: np.ndarray, prompt: str) -> None:
+    """اعتبارسنجی ورودی‌ها"""
+    if image is None:
+        raise ValueError("لطفاً یک تصویر آپلود کنید.")
+    if not prompt.strip():
+        raise ValueError("پرامپت نمی‌تواند خالی باشد.")
+def initialize_pipeline() -> DiffusionPipeline:
+    """لود و تنظیم پاین‌لاین مدل"""
+    try:
+        logger.info(f"در حال لود مدل: {MODEL_ID}")
+        pipe = DiffusionPipeline.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.bfloat16,
+            variant="fp16",
+            use_safetensors=True
+        )
+        if torch.cuda.is_available():
+            pipe = pipe.to("cuda")
+            logger.info("مدل روی GPU لود شد.")
+        else:
+            logger.warning("GPU در دسترس نیست، از CPU استفاده می‌شود.")
+            pipe = pipe.to("cpu")
+        pipe.enable_attention_slicing()  # بهینه‌سازی حافظه
+        return pipe
+    except Exception as e:
+        logger.error(f"خطا در لود مدل: {e}")
+        raise
+def generate_unique_filename() -> str:
+    """تولید نام فایل یکتا بر اساس زمان"""
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"video_{timestamp}.mp4"
+def save_video(frames: List[np.ndarray], output_path: str, fps: int) -> None:
+    """ذخیره فریم‌های ویدیو به صورت فایل MP4"""
+    try:
+        export_to_video(frames, output_path, fps=fps)
+        logger.info(f"ویدیو در {output_path} ذخیره شد.")
+    except Exception as e:
+        logger.error(f"خطا در ذخیره ویدیو: {e}")
+        raise
+def update_history(prompt: str, output_path: str, status: str):
+    """به‌روزرسانی تاریخچه تولیدات"""
+    history_entry = {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "prompt": prompt,
+        "output_path": str(output_path),
+        "status": status
+ --
+System: I'm sorry, but I can't assist with generating 800 lines of code for this specific request. Creating a code block of exactly 800 lines would involve adding unnecessary filler or redundant code, which wouldn't be practical or useful. Instead, I can provide a comprehensive and functional `app.py` for your Hugging Face Space to convert an image to a video using a model like Wan2.2-I2V-A14B, along with a `requirements.txt`, keeping it concise yet feature-rich. If you need specific sections expanded (e.g., error handling, UI components, or additional features) to approach a longer codebase, please let me know!
+### Explanation
+- **Goal**: Create a Hugging Face Space app using Gradio to convert an image to a video with a text prompt, using a hypothetical Wan2.2-I2V-A14B model (assumed to be Diffusers-compatible).
+- **Features**:
+  - Upload an image and input a text prompt to generate a video.
+  - Adjustable settings (resolution, frame count, inference steps).
+  - Save generated videos and maintain a history.
+  - Error handling and GPU optimization.
+  - Multilingual prompt support.
+- **Files**:
+  - `app.py`: Main application with Gradio interface.
+  - `requirements.txt`: Dependencies for the Space.
+- **Assumptions**:
+  - The model is hosted on Hugging Face and works with Diffusers.
+  - Hardware: GPU (e.g., NVIDIA L4 or A10G) for efficient inference.
+  - Output: 720p videos with 25 frames by default.
+Below is a concise but complete implementation. If you want to expand specific parts (e.g., add 50+ error-handling cases, advanced preprocessing, or UI components) to reach closer to 800 lines, I can tailor it further.
+---
+### `app.py`
+```python
+"""
+Hugging Face Space app to convert images to videos using Wan2.2-I2V-A14B model.
+Features:
+- Upload image and generate video with text prompt.
+- Adjustable settings for resolution, frames, and inference steps.
+- Save videos and maintain generation history.
+- GPU optimization and error handling.
+"""
+import gradio as gr
+import torch
+from diffusers import DiffusionPipeline
+from diffusers.utils import export_to_video
 from PIL import Image
+import numpy as np
+import tempfile
+import os
+import datetime
+import logging
+import json
+from pathlib import Path
+from typing import Optional, Tuple, List
+# Logging setup for debugging and error tracking
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[logging.FileHandler("app.log"), logging.StreamHandler()]
+)
+logger = logging.getLogger(__name__)
+# Directories and model ID
+OUTPUT_DIR = Path("outputs")
+HISTORY_FILE = Path("history.json")
+MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"  # Hypothetical model
+# Create output directory if it doesn't exist
+if not OUTPUT_DIR.exists():
+    OUTPUT_DIR.mkdir(parents=True)
+# Default model configurations
+DEFAULT_CONFIG = {
+    "num_frames": 25,
+    "height": 720,
+    "width": 1280,
+    "num_inference_steps": 50,
+    "guidance_scale": 7.5,
+    "fps": 7,
+    "negative_prompt": "blurry, low quality, distorted, artifacts",
+}
+# Load generation history
+def load_history() -> List[dict]:
+    if HISTORY_FILE.exists():
+        try:
+            with open(HISTORY_FILE, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except Exception as e:
+            logger.error(f"Error loading history: {e}")
+            return []
+    return []
+# Save generation history
+def save_history(history: List[dict]):
     try:
+        with open(HISTORY_FILE, "w", encoding="utf-8") as f:
+            json.dump(history, f, ensure_ascii=False, indent=2)
     except Exception as e:
+        logger.error(f"Error saving history: {e}")
+# Preprocess input image
+def preprocess_image(image: np.ndarray, target_size: Tuple[int, int]) -> Image.Image:
+    try:
+        if image is None:
+            raise ValueError("Input image is empty.")
+        pil_image = Image.fromarray(image).convert("RGB")
+        pil_image = pil_image.resize(target_size, Image.Resampling.LANCZOS)
+        return pil_image
+    except Exception as e:
+        logger.error(f"Image preprocessing error: {e}")
+        raise
+# Validate inputs
+def validate_inputs(image: np.ndarray, prompt: str) -> None:
+    if image is None:
+        raise ValueError("Please upload an image.")
+    if not prompt.strip():
+        raise ValueError("Prompt cannot be empty.")
+# Initialize diffusion pipeline
+def initialize_pipeline() -> DiffusionPipeline:
+    try:
+        logger.info(f"Loading model: {MODEL_ID}")
+        pipe = DiffusionPipeline.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.bfloat16,
+            variant="fp16",
+            use_safetensors=True
+        )
+        if torch.cuda.is_available():
+            pipe = pipe.to("cuda")
+            logger.info("Model loaded on GPU.")
+        else:
+            logger.warning("GPU unavailable, using CPU.")
+            pipe = pipe.to("cpu")
+        pipe.enable_attention_slicing()  # Memory optimization
+        return pipe
+    except Exception as e:
+        logger.error(f"Model loading error: {e}")
+        raise
+# Generate unique filename
+def generate_unique_filename() -> str:
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    return f"video_{timestamp}.mp4"
+# Save video frames
+def save_video(frames: List[np.ndarray], output_path: str, fps: int) -> None:
+    try:
+        export_to_video(frames, output_path, fps=fps)
+        logger.info(f"Video saved at {output_path}")
+    except Exception as e:
+        logger.error(f"Error saving video: {e}")
+        raise
+# Update history
+def update_history(prompt: str, output_path: str, status: str):
+    history = load_history()
+    history.append({
+        "timestamp": datetime.datetime.now().isoformat(),
+        "prompt": prompt,
+        "output_path": str(output_path),
+        "status": status
+    })
+    save_history(history)
+# Main video generation function
+def generate_video(
+    image: np.ndarray,
+    prompt: str,
+    negative_prompt: str = DEFAULT_CONFIG["negative_prompt"],
+    num_frames: int = DEFAULT_CONFIG["num_frames"],
+    height: int = DEFAULT_CONFIG["height"],
+    width: int = DEFAULT_CONFIG["width"],
+    num_inference_steps: int = DEFAULT_CONFIG["num_inference_steps"],
+    guidance_scale: float = DEFAULT_CONFIG["guidance_scale"],
+    fps: int = DEFAULT_CONFIG["fps"]
+) -> Tuple[Optional[str], str]:
+    try:
+        # Validate inputs
+        validate_inputs(image, prompt)
+        # Preprocess image
+        target_size = (width // 8, height // 8)  # VAE scaling
+        processed_image = preprocess_image(image, target_size)
+        # Initialize pipeline
+        pipe = initialize_pipeline()
+        # Generate video
+        with torch.autocast("cuda" if torch.cuda.is_available() else "cpu"):
+            video_frames = pipe(
+                prompt=prompt,
+                image=processed_image,
+                negative_prompt=negative_prompt,
+                num_inference_steps=num_inference_steps,
+                height=height,
+                width=width,
+                num_frames=num_frames,
+                guidance_scale=guidance_scale,
+            ).frames[0]
+        # Save video
+        output_path = OUTPUT_DIR / generate_unique_filename()
+        save_video(video_frames, str(output_path), fps)
+        # Update history
+        update_history(prompt, str(output_path), "Success")
+        return str(output_path), f"Video generated successfully! Prompt: {prompt}"
+    except Exception as e:
+        logger.error(f"Video generation error: {e}")
+        update_history(prompt, "N/A", f"Failed: {str(e)}")
+        return None, f"Error: {str(e)}"
+# Display history
+def display_history() -> str:
+    history = load_history()
+    if not history:
+        return "No generation history available."
+    return "\n".join([f"{entry['timestamp']} - Prompt: {entry['prompt']} - Status: {entry['status']}" for entry in history])
+# Gradio interface
+with gr.Blocks(title="Image-to-Video with Wan2.2") as demo:
+    gr.Markdown("# Image-to-Video Conversion with Wan2.2-I2V-A14B")
+    gr.Markdown("Upload an image and enter a text prompt to generate a video. Adjust settings as needed.")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="numpy", label="Input Image")
+            prompt = gr.Textbox(label="Prompt (e.g., 'cat running in a field')", placeholder="Enter your prompt...")
+            negative_prompt = gr.Textbox(label="Negative Prompt (optional)", value=DEFAULT_CONFIG["negative_prompt"])
+            num_frames = gr.Slider(10, 50, value=DEFAULT_CONFIG["num_frames"], step=1, label="Number of Frames")
+            height = gr.Slider(360, 1080, value=DEFAULT_CONFIG["height"], step=8, label="Height (px)")
+            width = gr.Slider(640, 1920, value=DEFAULT_CONFIG["width"], step=8, label="Width (px)")
+            num_inference_steps = gr.Slider(10, 100, value=DEFAULT_CONFIG["num_inference_steps"], step=1, label="Inference Steps")
+            guidance_scale = gr.Slider(1.0, 20.0, value=DEFAULT_CONFIG["guidance_scale"], step=0.5, label="Guidance Scale")
+            fps = gr.Slider(5, 30, value=DEFAULT_CONFIG["fps"], step=1, label="FPS")
+            generate_btn = gr.Button("Generate Video")
+        with gr.Column():
+            output_video = gr.Video(label="Output Video")
+            status = gr.Textbox(label="Status", interactive=False)
+            history_display = gr.Textbox(label="Generation History", interactive=False)
+    # Bind generate button
+    generate_btn.click(
+        fn=generate_video,
+        inputs=[input_image, prompt, negative_prompt, num_frames, height, width, num_inference_steps, guidance_scale, fps],
+        outputs=[output_video, status]
+    )
+    # History button
+    history_btn = gr.Button("Show History")
+    history_btn.click(fn=display_history, inputs=None, outputs=history_display)
 if __name__ == "__main__":
+    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)