SaEdit-MultiAi / image_animator.py
Stiphan's picture
Add Image Animator (image-to-video) feature
8bcbcec verified
import gradio as gr
import requests
import time
from io import BytesIO
# ---------- CONFIG ----------
HF_API_TOKEN = "YOUR_HF_API_KEY" # <-- yaha apna Hugging Face token daalna
# Example image->video model (change if you prefer another)
IMAGE_TO_VIDEO_MODEL = "ali-vilab/image-to-video"
API_URL = f"https://api-inference.huggingface.co/models/{IMAGE_TO_VIDEO_MODEL}"
HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
# ----------------------------
def call_image_to_video_api(image_bytes, prompt, steps=20):
"""
Calls HF image->video model as multipart (image file + prompt).
Returns bytes (video) on success or None on failure.
"""
files = {
"image": ("upload.png", image_bytes, "image/png")
}
data = {
"prompt": prompt,
# optional params: model-specific, keep conservative defaults
"num_inference_steps": str(steps)
}
try:
resp = requests.post(API_URL, headers=HEADERS, files=files, data=data, timeout=120)
except Exception as e:
return None, f"Request failed: {e}"
if resp.status_code == 200:
# Some HF inference endpoints return raw bytes, some return JSON with base64.
# Try to detect common cases.
content_type = resp.headers.get("content-type", "")
if "video" in content_type or content_type.startswith("application/octet-stream"):
return resp.content, None
# If JSON with base64
try:
js = resp.json()
if isinstance(js, dict) and "video" in js:
import base64
video_b64 = js["video"]
return base64.b64decode(video_b64), None
except Exception:
pass
return None, f"Unexpected response content-type: {content_type}"
else:
# try parse error
try:
err = resp.json()
except Exception:
err = resp.text
return None, f"Model error {resp.status_code}: {err}"
def generate_animation(image, prompt, steps, progress=gr.Progress()):
"""
Gradio handler: takes uploaded image (PIL or np array), prompt string and returns video bytes or error message.
"""
if image is None or prompt is None or prompt.strip() == "":
return None, "Please upload an image and write a descriptive animation prompt."
# convert PIL image to PNG bytes
if hasattr(image, "save"):
img_bytes = BytesIO()
image.save(img_bytes, format="PNG")
img_bytes = img_bytes.getvalue()
else:
return None, "Invalid image format."
progress(0, desc="Sending to model...")
# call external HF model (this may take time)
video_bytes, error = call_image_to_video_api(img_bytes, prompt, steps=int(steps))
if error:
return None, f"Failed: {error}"
progress(100, desc="Done")
return BytesIO(video_bytes), None
# ----- Gradio UI ----- (standalone tab / app)
with gr.Blocks() as demo:
gr.Markdown("<h2 style='text-align:center'>✨ Image Animator — SaEdit MultiAi</h2>")
with gr.Row():
with gr.Column(scale=2):
upload = gr.Image(type="pil", label="Upload image (face/portrait/scene)")
prompt = gr.Textbox(label="Describe animation (e.g. 'slow zoom-in, gentle sparkles, 3s loop')", lines=3)
steps = gr.Slider(minimum=5, maximum=50, step=1, value=18, label="Inference steps (quality → speed)")
generate_btn = gr.Button("Generate Animation")
note = gr.Markdown(
"⚠️ Note: This uses a remote image→video model. Processing may take from 30s to a few minutes depending on model and load."
)
with gr.Column(scale=3):
out_video = gr.Video(label="Animated output (mp4)")
status = gr.Textbox(label="Status / Errors", interactive=False)
def on_click_generate(img, txt, steps_val):
video_file, err = generate_animation(img, txt, steps_val)
if err:
return None, str(err)
# return an IO object readable by Gradio as a video
return video_file, "Success — download below."
generate_btn.click(on_click_generate, [upload, prompt, steps], [out_video, status])
# If you want to use this as a module and import the Blocks into your main app,
# you can import demo and place it into a Tab.
if __name__ == "__main__":
demo.launch()