Spaces:

Nomnommish
/

xAI_Imagine_Studio

Running

App Files Files Community

Nomnommish commited on Mar 14

Commit

5efedb6

verified ·

1 Parent(s): 453bc8d

Update app.py

Browse files

Files changed (1) hide show

app.py +239 -23

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 import base64
 import mimetypes
 import tempfile
 from pathlib import Path
 from urllib.parse import quote
@@ -22,7 +23,7 @@ IMAGE_RESOLUTIONS = ["1k", "2k"]
 VIDEO_ASPECT_RATIOS = ["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3"]
 VIDEO_RESOLUTIONS = ["480p", "720p"]
-APP_TITLE = "xAI Imagine Studio — T2I + I2I + I2V + V2V"
 def auth_headers(api_key: str) -> dict:
@@ -49,6 +50,25 @@ def file_to_data_uri(filepath: str) -> str:
     return f"data:{mime};base64,{b64}"
 def download_bytes_to_temp(content: bytes, suffix: str) -> str:
     fd, out_path = tempfile.mkstemp(suffix=suffix)
     os.close(fd)
@@ -100,6 +120,31 @@ def extract_video_path(video_input):
     return str(video_input)
 def generate_t2i(api_key, model, prompt, n, aspect_ratio, resolution, progress=gr.Progress(track_tqdm=False)):
     headers = auth_headers(api_key)
     payload = {
@@ -146,28 +191,12 @@ def generate_t2i(api_key, model, prompt, n, aspect_ratio, resolution, progress=g
     return gallery, paths[0], paths, f"Generated {len(paths)} image(s)."
-def edit_like_i2i(api_key, model, prompt, input_image_path, aspect_ratio, progress=gr.Progress(track_tqdm=False)):
     headers = auth_headers(api_key)
-    if not input_image_path:
-        raise gr.Error("Please upload an image.")
-    if not (prompt or "").strip():
-        raise gr.Error("Please enter a prompt.")
-    payload = {
-        "model": model or DEFAULT_IMAGE_MODEL,
-        "prompt": prompt.strip(),
-        "image": {
-            "url": file_to_data_uri(input_image_path),
-            "type": "image_url",
-        },
-        "response_format": "b64_json",
-    }
-    if aspect_ratio and aspect_ratio != "auto":
-        payload["aspect_ratio"] = aspect_ratio
-    progress(0.2, desc="Editing image...")
     resp = requests.post(
         f"{API_BASE}/images/edits",
         headers=headers,
@@ -332,6 +361,98 @@ def generate_v2v(
     return out, out, f"V2V complete. Request ID: {request_id}. Duration: {actual_duration}s"
 css_path = Path("style.css")
 css = css_path.read_text(encoding="utf-8") if css_path.exists() else ""
@@ -343,6 +464,9 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
     t2i_first_image_state = gr.State(None)
     t2i_all_images_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=2):
@@ -404,7 +528,12 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
         with gr.Tab("Image → Image"):
             with gr.Row():
                 with gr.Column():
-                    i2i_input = gr.Image(label="Upload Source Image", type="filepath")
                     i2i_prompt = gr.Textbox(label="Transformation Prompt", lines=6)
                     i2i_aspect = gr.Dropdown(label="Aspect Ratio Override", choices=IMAGE_ASPECT_RATIOS, value="auto")
                     i2i_btn = gr.Button("Generate I2I", variant="primary")
@@ -417,7 +546,12 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
         with gr.Tab("Image Edit"):
             with gr.Row():
                 with gr.Column():
-                    edit_input = gr.Image(label="Upload Image", type="filepath")
                     edit_prompt = gr.Textbox(label="Edit Prompt", lines=6)
                     edit_aspect = gr.Dropdown(label="Aspect Ratio Override", choices=IMAGE_ASPECT_RATIOS, value="auto")
                     edit_btn = gr.Button("Edit Image", variant="primary")
@@ -456,6 +590,37 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
                     v2v_video_out = gr.Video(label="Generated V2V Video")
                     v2v_download = gr.File(label="Download V2V Video")
     t2i_btn.click(
         fn=generate_t2i,
         inputs=[api_key, image_model, t2i_prompt, t2i_n, t2i_aspect, t2i_resolution],
@@ -499,6 +664,11 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
         ],
         outputs=[i2v_video, i2v_download, i2v_status],
         api_name=False,
     )
     v2v_btn.click(
@@ -506,6 +676,52 @@ with gr.Blocks(title=APP_TITLE, css=css, theme=gr.themes.Soft()) as demo:
         inputs=[api_key, video_model, v2v_prompt, v2v_video_in, poll_timeout, poll_interval],
         outputs=[v2v_video_out, v2v_download, v2v_status],
         api_name=False,
     )
 if __name__ == "__main__":

 import base64
 import mimetypes
 import tempfile
+import subprocess
 from pathlib import Path
 from urllib.parse import quote
 VIDEO_ASPECT_RATIOS = ["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3"]
 VIDEO_RESOLUTIONS = ["480p", "720p"]
+APP_TITLE = "xAI Imagine Studio — T2I + I2I + I2V + V2V + Video Expand"
 def auth_headers(api_key: str) -> dict:
     return f"data:{mime};base64,{b64}"
+def normalize_uploaded_files(files):
+    if not files:
+        return []
+    if isinstance(files, (str, Path)):
+        return [str(files)]
+    normalized = []
+    for item in files:
+        if isinstance(item, str):
+            normalized.append(item)
+        elif isinstance(item, dict):
+            path = item.get("path") or item.get("name")
+            if path:
+                normalized.append(path)
+        else:
+            normalized.append(str(item))
+    return normalized
 def download_bytes_to_temp(content: bytes, suffix: str) -> str:
     fd, out_path = tempfile.mkstemp(suffix=suffix)
     os.close(fd)
     return str(video_input)
+def build_image_edit_payload(prompt: str, image_paths: list[str], model: str, aspect_ratio: str):
+    if not image_paths:
+        raise gr.Error("Please upload at least one image.")
+    if not (prompt or "").strip():
+        raise gr.Error("Please enter a prompt.")
+    images = [{"url": file_to_data_uri(path), "type": "image_url"} for path in image_paths]
+    payload = {
+        "model": model or DEFAULT_IMAGE_MODEL,
+        "prompt": prompt.strip(),
+        "response_format": "b64_json",
+    }
+    if len(images) == 1:
+        payload["image"] = images[0]
+    else:
+        payload["images"] = images
+    if aspect_ratio and aspect_ratio != "auto":
+        payload["aspect_ratio"] = aspect_ratio
+    return payload
 def generate_t2i(api_key, model, prompt, n, aspect_ratio, resolution, progress=gr.Progress(track_tqdm=False)):
     headers = auth_headers(api_key)
     payload = {
     return gallery, paths[0], paths, f"Generated {len(paths)} image(s)."
+def edit_like_i2i(api_key, model, prompt, input_images, aspect_ratio, progress=gr.Progress(track_tqdm=False)):
     headers = auth_headers(api_key)
+    image_paths = normalize_uploaded_files(input_images)
+    payload = build_image_edit_payload(prompt, image_paths, model, aspect_ratio)
+    progress(0.2, desc="Editing image(s)...")
     resp = requests.post(
         f"{API_BASE}/images/edits",
         headers=headers,
     return out, out, f"V2V complete. Request ID: {request_id}. Duration: {actual_duration}s"
+def get_video_duration(video_path: str) -> float:
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        video_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise gr.Error(f"Could not read video duration:\n{result.stderr}")
+    try:
+        return float(result.stdout.strip())
+    except Exception:
+        raise gr.Error("Could not parse video duration.")
+def extract_frame_from_video(video_path: str, seconds: float) -> str:
+    fd, frame_path = tempfile.mkstemp(suffix=".png")
+    os.close(fd)
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-ss", str(seconds),
+        "-i", video_path,
+        "-frames:v", "1",
+        frame_path,
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    if result.returncode != 0:
+        raise gr.Error(f"Frame extraction failed:\n{result.stderr}")
+    return frame_path
+def prepare_expand_video(video_input, use_last_generated_video, last_generated_video_path):
+    video_path = last_generated_video_path if use_last_generated_video and last_generated_video_path else extract_video_path(video_input)
+    if not video_path:
+        raise gr.Error("Upload a video or enable 'Use last generated video'.")
+    duration = get_video_duration(video_path)
+    max_time = max(0.1, duration)
+    return video_path, gr.update(maximum=max_time, value=min(max_time / 2, max_time)), f"Loaded video. Duration: {duration:.2f}s"
+def extract_expand_frame(
+    video_input,
+    use_last_generated_video,
+    last_generated_video_path,
+    timestamp_seconds,
+):
+    video_path = last_generated_video_path if use_last_generated_video and last_generated_video_path else extract_video_path(video_input)
+    if not video_path:
+        raise gr.Error("Upload a video or enable 'Use last generated video'.")
+    duration = get_video_duration(video_path)
+    ts = max(0.0, min(float(timestamp_seconds), max(duration - 0.01, 0.0)))
+    frame_path = extract_frame_from_video(video_path, ts)
+    return frame_path, frame_path, f"Extracted frame at {ts:.2f}s"
+def continue_video_from_frame(
+    api_key,
+    model,
+    prompt,
+    extracted_frame_path,
+    duration,
+    aspect_ratio,
+    resolution,
+    timeout_seconds,
+    poll_interval,
+    progress=gr.Progress(track_tqdm=False),
+):
+    if not extracted_frame_path:
+        raise gr.Error("Extract a frame first.")
+    return generate_i2v(
+        api_key=api_key,
+        model=model,
+        prompt=prompt,
+        uploaded_image_path=extracted_frame_path,
+        use_last_t2i_image=False,
+        last_t2i_first_image=None,
+        duration=duration,
+        aspect_ratio=aspect_ratio,
+        resolution=resolution,
+        timeout_seconds=timeout_seconds,
+        poll_interval=poll_interval,
+        progress=progress,
+    )
 css_path = Path("style.css")
 css = css_path.read_text(encoding="utf-8") if css_path.exists() else ""
     t2i_first_image_state = gr.State(None)
     t2i_all_images_state = gr.State([])
+    last_generated_video_state = gr.State(None)
+    expand_source_video_state = gr.State(None)
+    expand_frame_state = gr.State(None)
     with gr.Row():
         with gr.Column(scale=2):
         with gr.Tab("Image → Image"):
             with gr.Row():
                 with gr.Column():
+                    i2i_input = gr.File(
+                        label="Upload Source Image(s)",
+                        file_count="multiple",
+                        file_types=["image"],
+                        type="filepath",
+                    )
                     i2i_prompt = gr.Textbox(label="Transformation Prompt", lines=6)
                     i2i_aspect = gr.Dropdown(label="Aspect Ratio Override", choices=IMAGE_ASPECT_RATIOS, value="auto")
                     i2i_btn = gr.Button("Generate I2I", variant="primary")
         with gr.Tab("Image Edit"):
             with gr.Row():
                 with gr.Column():
+                    edit_input = gr.File(
+                        label="Upload Image(s)",
+                        file_count="multiple",
+                        file_types=["image"],
+                        type="filepath",
+                    )
                     edit_prompt = gr.Textbox(label="Edit Prompt", lines=6)
                     edit_aspect = gr.Dropdown(label="Aspect Ratio Override", choices=IMAGE_ASPECT_RATIOS, value="auto")
                     edit_btn = gr.Button("Edit Image", variant="primary")
                     v2v_video_out = gr.Video(label="Generated V2V Video")
                     v2v_download = gr.File(label="Download V2V Video")
+        with gr.Tab("Video Expand"):
+            with gr.Row():
+                with gr.Column():
+                    expand_video_input = gr.Video(label="Upload Source Video")
+                    use_last_generated_video = gr.Checkbox(label="Use last generated video", value=True)
+                    expand_load_btn = gr.Button("Load Video", variant="secondary")
+                    expand_video_status = gr.Textbox(label="Video Status", interactive=False, lines=3)
+                    expand_timestamp = gr.Slider(
+                        label="Frame timestamp (seconds)",
+                        minimum=0,
+                        maximum=10,
+                        step=0.1,
+                        value=0,
+                    )
+                    expand_extract_btn = gr.Button("Extract Frame", variant="secondary")
+                    expand_frame_status = gr.Textbox(label="Frame Status", interactive=False, lines=3)
+                    expand_prompt = gr.Textbox(label="Continuation Prompt", lines=6)
+                    expand_duration = gr.Slider(label="Next Segment Duration", minimum=1, maximum=15, step=1, value=5)
+                    expand_aspect = gr.Dropdown(label="Aspect Ratio", choices=VIDEO_ASPECT_RATIOS, value="16:9")
+                    expand_resolution = gr.Dropdown(label="Resolution", choices=VIDEO_RESOLUTIONS, value="480p")
+                    expand_btn = gr.Button("Generate Next Video Segment", variant="primary")
+                    expand_status = gr.Textbox(label="Expand Status", interactive=False, lines=5)
+                with gr.Column():
+                    expand_frame_preview = gr.Image(label="Extracted Frame", type="filepath")
+                    expand_frame_download = gr.File(label="Download Extracted Frame")
+                    expand_video_out = gr.Video(label="Expanded Video")
+                    expand_video_download = gr.File(label="Download Expanded Video")
     t2i_btn.click(
         fn=generate_t2i,
         inputs=[api_key, image_model, t2i_prompt, t2i_n, t2i_aspect, t2i_resolution],
         ],
         outputs=[i2v_video, i2v_download, i2v_status],
         api_name=False,
+    ).then(
+        fn=lambda p: p,
+        inputs=[i2v_video],
+        outputs=[last_generated_video_state],
+        api_name=False,
     )
     v2v_btn.click(
         inputs=[api_key, video_model, v2v_prompt, v2v_video_in, poll_timeout, poll_interval],
         outputs=[v2v_video_out, v2v_download, v2v_status],
         api_name=False,
+    ).then(
+        fn=lambda p: p,
+        inputs=[v2v_video_out],
+        outputs=[last_generated_video_state],
+        api_name=False,
+    )
+    expand_load_btn.click(
+        fn=prepare_expand_video,
+        inputs=[expand_video_input, use_last_generated_video, last_generated_video_state],
+        outputs=[expand_source_video_state, expand_timestamp, expand_video_status],
+        api_name=False,
+    )
+    expand_extract_btn.click(
+        fn=extract_expand_frame,
+        inputs=[expand_video_input, use_last_generated_video, last_generated_video_state, expand_timestamp],
+        outputs=[expand_frame_preview, expand_frame_download, expand_frame_status],
+        api_name=False,
+    ).then(
+        fn=lambda p: p,
+        inputs=[expand_frame_preview],
+        outputs=[expand_frame_state],
+        api_name=False,
+    )
+    expand_btn.click(
+        fn=continue_video_from_frame,
+        inputs=[
+            api_key,
+            video_model,
+            expand_prompt,
+            expand_frame_state,
+            expand_duration,
+            expand_aspect,
+            expand_resolution,
+            poll_timeout,
+            poll_interval,
+        ],
+        outputs=[expand_video_out, expand_video_download, expand_status],
+        api_name=False,
+    ).then(
+        fn=lambda p: p,
+        inputs=[expand_video_out],
+        outputs=[last_generated_video_state],
+        api_name=False,
     )
 if __name__ == "__main__":