3v324v23's picture
Refactor application to use Gradio for the frontend and remove Dockerfile and unused dependencies
3660521
import os
import sys
import warnings
# HF Spaces currently emits a noisy FutureWarning from a vendored dependency:
# `torch.distributed.reduce_op` is deprecated ...
warnings.filterwarnings(
"ignore",
category=FutureWarning,
message=r"`torch\.distributed\.reduce_op` is deprecated.*",
module=r"spaces\._vendor\.codefind\.registry",
)
import gradio as gr
from caption_blip import UserOptions, caption_image_path, caption_video
# Some environments (notably containers) can emit noisy asyncio unraisable exceptions
# on shutdown like: ValueError: Invalid file descriptor: -1
# This is harmless but can confuse users and clutter logs.
def _unraisablehook_filter_asyncio_fd(unraisable: "sys.UnraisableHookArgs") -> None: # type: ignore[name-defined]
exc = unraisable.exc_value
if isinstance(exc, ValueError) and "Invalid file descriptor" in str(exc):
return
sys.__unraisablehook__(unraisable)
sys.unraisablehook = _unraisablehook_filter_asyncio_fd
def _require_openrouter() -> None:
if not os.environ.get("OPENROUTER_API_KEY"):
raise gr.Error(
"Missing server secret OPENROUTER_API_KEY. "
"In Hugging Face Spaces: Settings → Secrets → add OPENROUTER_API_KEY, then restart the Space."
)
def _run_image(
image_path: str,
text_style: str,
platform: str,
keywords: str,
description: str,
hashtags: bool,
emojis: bool,
language: str,
caption_length: str,
) -> str:
_require_openrouter()
opts = UserOptions(
text_style=text_style,
platform=platform,
keywords=keywords or None,
description=description or None,
hashtags=hashtags,
emojis=emojis,
language=language,
caption_length=caption_length,
)
base = caption_image_path(image_path)
from caption_blip import compose_image_caption
try:
return compose_image_caption(base, opts, use_openrouter=True)
except RuntimeError as e:
msg = str(e)
if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower():
raise gr.Error(
"The AI service (OpenRouter) timed out. Please try again in a moment. "
"If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S."
)
raise
def _run_video(
video_path: str,
text_style: str,
platform: str,
keywords: str,
description: str,
hashtags: bool,
emojis: bool,
language: str,
caption_length: str,
) -> str:
_require_openrouter()
opts = UserOptions(
text_style=text_style,
platform=platform,
keywords=keywords or None,
description=description or None,
hashtags=hashtags,
emojis=emojis,
language=language,
caption_length=caption_length,
)
try:
return caption_video(video_path, opts, use_openrouter=True)
except RuntimeError as e:
msg = str(e)
if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower():
raise gr.Error(
"The AI service (OpenRouter) timed out. Please try again in a moment. "
"If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S."
)
raise
def build_demo() -> gr.Blocks:
with gr.Blocks(title="AI Caption Generator") as demo:
gr.Markdown("# AI Caption Generator\nUpload an image or a short video and get a caption.")
platforms = ["Instagram", "LinkedIn", "X(Twitter)", "TikTok", "Facebook", "Pinterest", "Threads"]
languages = ["English", "French", "Spanish", "German", "Italian", "Arabic", "Chinese"]
lengths = ["small", "medium", "large"]
with gr.Tab("Image"):
image = gr.Image(type="filepath", label="Upload image")
out = gr.Textbox(label="Caption", lines=4)
with gr.Row():
text_style = gr.Dropdown(
[
"casual",
"formal",
"educational",
"funny",
"dry",
"direct",
"Storytelling",
"Emotional",
"Hook-first",
"Motivational",
"Call-to-Action",
],
value="educational",
label="Text style",
)
platform = gr.Dropdown(platforms, value="LinkedIn", label="Platform")
keywords = gr.Textbox(label="Keywords (optional)", placeholder="e.g. AI, startups, productivity")
description = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption")
with gr.Row():
language = gr.Dropdown(languages, value="English", label="Language")
caption_length = gr.Dropdown(lengths, value="medium", label="Caption length")
with gr.Row():
hashtags = gr.Checkbox(value=False, label="Add hashtags")
emojis = gr.Checkbox(value=False, label="Use emojis")
btn = gr.Button("Generate caption")
btn.click(
_run_image,
inputs=[image, text_style, platform, keywords, description, hashtags, emojis, language, caption_length],
outputs=[out],
api_name="caption_image",
)
with gr.Tab("Video"):
video = gr.Video(label="Upload video")
outv = gr.Textbox(label="Caption", lines=5)
with gr.Row():
text_style_v = gr.Dropdown(
[
"casual",
"formal",
"educational",
"funny",
"dry",
"direct",
"Storytelling",
"Emotional",
"Hook-first",
"Motivational",
"Call-to-Action",
],
value="educational",
label="Text style",
)
platform_v = gr.Dropdown(platforms, value="LinkedIn", label="Platform")
keywords_v = gr.Textbox(label="Keywords (optional)", placeholder="e.g. product launch, demo")
description_v = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption")
with gr.Row():
language_v = gr.Dropdown(languages, value="English", label="Language")
caption_length_v = gr.Dropdown(lengths, value="medium", label="Caption length")
with gr.Row():
hashtags_v = gr.Checkbox(value=False, label="Add hashtags")
emojis_v = gr.Checkbox(value=False, label="Use emojis")
btnv = gr.Button("Generate caption")
btnv.click(
_run_video,
inputs=[video, text_style_v, platform_v, keywords_v, description_v, hashtags_v, emojis_v, language_v, caption_length_v],
outputs=[outv],
api_name="caption_video",
)
gr.Markdown(
"**Notes**\n"
"- First run downloads the BLIP model from Hugging Face unless `BLIP_OFFLINE=1`.\n"
"- This demo requires the server-side secret `OPENROUTER_API_KEY`."
)
return demo
# Hugging Face Spaces expects a top-level variable named `demo` (or `app`).
demo = build_demo()
demo.queue()
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", "7860")),
ssr_mode=False,
)