Spaces:
Sleeping
Sleeping
File size: 5,645 Bytes
0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 0e9675b 0d7b5a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import io, uuid, base64, requests, random
from PIL import Image
import gradio as gr
# ====== CONFIG ======
# Your Modal endpoint (POST with multipart body; prompt/seed in querystring)
MODAL_URL = "https://moonmath-ai--moonmath-i2v-backend-moonmathinference-run.modal.run"
REQUEST_TIMEOUT_SEC = 600 # adjust if your backend needs longer
# ====== BACKEND CALLER ======
def call_modal_backend(prompt: str, image: Image.Image | None, seed: int | None):
"""
Sends prompt + optional image to the Modal backend.
Accepts:
- raw MP4 bytes response
- JSON with video_url or base64 video
Returns a path or URL usable by gr.Video.
"""
if not prompt and image is None:
raise gr.Error("Please provide a prompt or upload an image.")
# Build multipart body if image provided
files = None
if image is not None:
buf = io.BytesIO()
image.save(buf, format="PNG") # change to JPEG if your backend expects it
buf.seek(0)
files = {"image_bytes": ("input.png", buf, "image/png")}
# Query string params
params = {}
if prompt:
params["prompt"] = prompt
if seed is not None:
params["seed"] = str(seed)
# Perform request
res = requests.post(
MODAL_URL,
params=params,
files=files,
headers={"accept": "application/json"},
timeout=REQUEST_TIMEOUT_SEC,
)
res.raise_for_status()
ctype = (res.headers.get("content-type") or "").lower()
# 1) Raw MP4 bytes directly
if "video/mp4" in ctype or ctype.startswith("application/octet-stream"):
mp4_path = f"out_{uuid.uuid4().hex[:8]}.mp4"
with open(mp4_path, "wb") as f:
f.write(res.content)
return mp4_path
# 2) JSON (URL or base64)
if "application/json" in ctype:
data = res.json()
url = data.get("video_url") or data.get("url") or data.get("result", {}).get("video_url")
if url:
return url # gr.Video can stream a URL
b64 = (
data.get("video_b64")
or data.get("video_bytes")
or data.get("result", {}).get("video_b64")
)
if b64:
if "," in b64: # strip data: header if present
b64 = b64.split(",", 1)[1]
blob = base64.b64decode(b64)
mp4_path = f"out_{uuid.uuid4().hex[:8]}.mp4"
with open(mp4_path, "wb") as f:
f.write(blob)
return mp4_path
raise gr.Error(f"Backend JSON did not contain a video field. Keys: {list(data.keys())}")
# 3) Fallback: write bytes as mp4
mp4_path = f"out_{uuid.uuid4().hex[:8]}.mp4"
with open(mp4_path, "wb") as f:
f.write(res.content)
return mp4_path
# ====== UI CALLBACK ======
def on_generate(prompt, image, seed, lock_longshot):
"""
lock_longshot is included so you can later inject constraints server-side if needed.
For now it simply forwards prompt & image to your Modal backend.
"""
# If user left seed blank, generate one
if seed is None or str(seed).strip() == "":
seed_val = random.randint(0, 2**31 - 1)
else:
# Gradio Number returns float; cast safely
try:
seed_val = int(seed)
except Exception:
seed_val = random.randint(0, 2**31 - 1)
# (Optional) reinforce long-shot constraints in prompt (safe no-op if you don’t need it)
if lock_longshot and prompt:
musts = [
"single continuous long shot",
"no cuts, no new shot, no angle switch",
"smooth camera motion (pan/tilt/zoom only)",
"unbroken continuity"
]
prompt = prompt.strip() + ". " + "; ".join(musts)
video_path_or_url = call_modal_backend(prompt, image, seed_val)
info = f"Seed: {seed_val}"
return video_path_or_url, info
# ====== STYLE ======
CUSTOM_CSS = """
.gradio-container { padding: 24px; }
/* Big rounded prompt box */
#prompt-box textarea {
border-radius: 28px !important;
min-height: 180px;
font-size: 18px;
line-height: 1.45;
padding: 18px 22px;
}
/* Rounded square image card */
#add-image .wrap,
#add-image .input-image,
#add-image .empty {
border-radius: 28px !important;
min-width: 240px;
min-height: 240px;
}
/* Pill generate button */
#gen-btn button {
border-radius: 999px !important;
padding: 12px 24px;
font-size: 18px;
}
"""
# ====== APP ======
with gr.Blocks(css=CUSTOM_CSS, title="Stitch UI – Modal Hook") as demo:
gr.Markdown("### Stitch – turn prompt/image into a generated video (Modal backend)")
# Row 1: Big rounded prompt input
prompt_tb = gr.Textbox(
label=None,
placeholder="Prompt input",
lines=8,
elem_id="prompt-box"
)
# Row 2: Left image card, right controls (seed + generate)
with gr.Row():
with gr.Column(scale=1, min_width=300):
img_in = gr.Image(label="Add Image", type="pil", elem_id="add-image")
with gr.Column(scale=3, min_width=300):
with gr.Row():
seed_in = gr.Number(value=None, label="Seed (optional)")
lock_long = gr.Checkbox(value=True, label="Lock camera (long shot, no cuts)")
gen_btn = gr.Button("Generate", elem_id="gen-btn")
# Output
with gr.Row():
video_out = gr.Video(label="Output Video", interactive=False, autoplay=True)
info_out = gr.Markdown("")
gen_btn.click(
fn=on_generate,
inputs=[prompt_tb, img_in, seed_in, lock_long],
outputs=[video_out, info_out]
)
if __name__ == "__main__":
demo.launch() |