File size: 4,379 Bytes
8bcbcec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import requests
import time
from io import BytesIO

# ---------- CONFIG ----------
HF_API_TOKEN = "YOUR_HF_API_KEY"  # <-- yaha apna Hugging Face token daalna
# Example image->video model (change if you prefer another)
IMAGE_TO_VIDEO_MODEL = "ali-vilab/image-to-video"  
API_URL = f"https://api-inference.huggingface.co/models/{IMAGE_TO_VIDEO_MODEL}"
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
# ----------------------------

def call_image_to_video_api(image_bytes, prompt, steps=20):
    """
    Calls HF image->video model as multipart (image file + prompt).
    Returns bytes (video) on success or None on failure.
    """
    files = {
        "image": ("upload.png", image_bytes, "image/png")
    }
    data = {
        "prompt": prompt,
        # optional params: model-specific, keep conservative defaults
        "num_inference_steps": str(steps)
    }
    try:
        resp = requests.post(API_URL, headers=HEADERS, files=files, data=data, timeout=120)
    except Exception as e:
        return None, f"Request failed: {e}"

    if resp.status_code == 200:
        # Some HF inference endpoints return raw bytes, some return JSON with base64.
        # Try to detect common cases.
        content_type = resp.headers.get("content-type", "")
        if "video" in content_type or content_type.startswith("application/octet-stream"):
            return resp.content, None
        # If JSON with base64
        try:
            js = resp.json()
            if isinstance(js, dict) and "video" in js:
                import base64
                video_b64 = js["video"]
                return base64.b64decode(video_b64), None
        except Exception:
            pass
        return None, f"Unexpected response content-type: {content_type}"
    else:
        # try parse error
        try:
            err = resp.json()
        except Exception:
            err = resp.text
        return None, f"Model error {resp.status_code}: {err}"

def generate_animation(image, prompt, steps, progress=gr.Progress()):
    """
    Gradio handler: takes uploaded image (PIL or np array), prompt string and returns video bytes or error message.
    """
    if image is None or prompt is None or prompt.strip() == "":
        return None, "Please upload an image and write a descriptive animation prompt."

    # convert PIL image to PNG bytes
    if hasattr(image, "save"):
        img_bytes = BytesIO()
        image.save(img_bytes, format="PNG")
        img_bytes = img_bytes.getvalue()
    else:
        return None, "Invalid image format."

    progress(0, desc="Sending to model...")
    # call external HF model (this may take time)
    video_bytes, error = call_image_to_video_api(img_bytes, prompt, steps=int(steps))
    if error:
        return None, f"Failed: {error}"

    progress(100, desc="Done")
    return BytesIO(video_bytes), None

# ----- Gradio UI ----- (standalone tab / app)
with gr.Blocks() as demo:
    gr.Markdown("<h2 style='text-align:center'>✨ Image Animator — SaEdit MultiAi</h2>")
    with gr.Row():
        with gr.Column(scale=2):
            upload = gr.Image(type="pil", label="Upload image (face/portrait/scene)")
            prompt = gr.Textbox(label="Describe animation (e.g. 'slow zoom-in, gentle sparkles, 3s loop')", lines=3)
            steps = gr.Slider(minimum=5, maximum=50, step=1, value=18, label="Inference steps (quality → speed)")
            generate_btn = gr.Button("Generate Animation")
            note = gr.Markdown(
                "⚠️ Note: This uses a remote image→video model. Processing may take from 30s to a few minutes depending on model and load."
            )
        with gr.Column(scale=3):
            out_video = gr.Video(label="Animated output (mp4)")
            status = gr.Textbox(label="Status / Errors", interactive=False)

    def on_click_generate(img, txt, steps_val):
        video_file, err = generate_animation(img, txt, steps_val)
        if err:
            return None, str(err)
        # return an IO object readable by Gradio as a video
        return video_file, "Success — download below."

    generate_btn.click(on_click_generate, [upload, prompt, steps], [out_video, status])

# If you want to use this as a module and import the Blocks into your main app,
# you can import demo and place it into a Tab.
if __name__ == "__main__":
    demo.launch()