import os import sys import warnings # HF Spaces currently emits a noisy FutureWarning from a vendored dependency: # `torch.distributed.reduce_op` is deprecated ... warnings.filterwarnings( "ignore", category=FutureWarning, message=r"`torch\.distributed\.reduce_op` is deprecated.*", module=r"spaces\._vendor\.codefind\.registry", ) import gradio as gr from caption_blip import UserOptions, caption_image_path, caption_video # Some environments (notably containers) can emit noisy asyncio unraisable exceptions # on shutdown like: ValueError: Invalid file descriptor: -1 # This is harmless but can confuse users and clutter logs. def _unraisablehook_filter_asyncio_fd(unraisable: "sys.UnraisableHookArgs") -> None: # type: ignore[name-defined] exc = unraisable.exc_value if isinstance(exc, ValueError) and "Invalid file descriptor" in str(exc): return sys.__unraisablehook__(unraisable) sys.unraisablehook = _unraisablehook_filter_asyncio_fd def _require_openrouter() -> None: if not os.environ.get("OPENROUTER_API_KEY"): raise gr.Error( "Missing server secret OPENROUTER_API_KEY. " "In Hugging Face Spaces: Settings → Secrets → add OPENROUTER_API_KEY, then restart the Space." ) def _run_image( image_path: str, text_style: str, platform: str, keywords: str, description: str, hashtags: bool, emojis: bool, language: str, caption_length: str, ) -> str: _require_openrouter() opts = UserOptions( text_style=text_style, platform=platform, keywords=keywords or None, description=description or None, hashtags=hashtags, emojis=emojis, language=language, caption_length=caption_length, ) base = caption_image_path(image_path) from caption_blip import compose_image_caption try: return compose_image_caption(base, opts, use_openrouter=True) except RuntimeError as e: msg = str(e) if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower(): raise gr.Error( "The AI service (OpenRouter) timed out. Please try again in a moment. " "If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S." ) raise def _run_video( video_path: str, text_style: str, platform: str, keywords: str, description: str, hashtags: bool, emojis: bool, language: str, caption_length: str, ) -> str: _require_openrouter() opts = UserOptions( text_style=text_style, platform=platform, keywords=keywords or None, description=description or None, hashtags=hashtags, emojis=emojis, language=language, caption_length=caption_length, ) try: return caption_video(video_path, opts, use_openrouter=True) except RuntimeError as e: msg = str(e) if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower(): raise gr.Error( "The AI service (OpenRouter) timed out. Please try again in a moment. " "If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S." ) raise def build_demo() -> gr.Blocks: with gr.Blocks(title="AI Caption Generator") as demo: gr.Markdown("# AI Caption Generator\nUpload an image or a short video and get a caption.") platforms = ["Instagram", "LinkedIn", "X(Twitter)", "TikTok", "Facebook", "Pinterest", "Threads"] languages = ["English", "French", "Spanish", "German", "Italian", "Arabic", "Chinese"] lengths = ["small", "medium", "large"] with gr.Tab("Image"): image = gr.Image(type="filepath", label="Upload image") out = gr.Textbox(label="Caption", lines=4) with gr.Row(): text_style = gr.Dropdown( [ "casual", "formal", "educational", "funny", "dry", "direct", "Storytelling", "Emotional", "Hook-first", "Motivational", "Call-to-Action", ], value="educational", label="Text style", ) platform = gr.Dropdown(platforms, value="LinkedIn", label="Platform") keywords = gr.Textbox(label="Keywords (optional)", placeholder="e.g. AI, startups, productivity") description = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption") with gr.Row(): language = gr.Dropdown(languages, value="English", label="Language") caption_length = gr.Dropdown(lengths, value="medium", label="Caption length") with gr.Row(): hashtags = gr.Checkbox(value=False, label="Add hashtags") emojis = gr.Checkbox(value=False, label="Use emojis") btn = gr.Button("Generate caption") btn.click( _run_image, inputs=[image, text_style, platform, keywords, description, hashtags, emojis, language, caption_length], outputs=[out], api_name="caption_image", ) with gr.Tab("Video"): video = gr.Video(label="Upload video") outv = gr.Textbox(label="Caption", lines=5) with gr.Row(): text_style_v = gr.Dropdown( [ "casual", "formal", "educational", "funny", "dry", "direct", "Storytelling", "Emotional", "Hook-first", "Motivational", "Call-to-Action", ], value="educational", label="Text style", ) platform_v = gr.Dropdown(platforms, value="LinkedIn", label="Platform") keywords_v = gr.Textbox(label="Keywords (optional)", placeholder="e.g. product launch, demo") description_v = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption") with gr.Row(): language_v = gr.Dropdown(languages, value="English", label="Language") caption_length_v = gr.Dropdown(lengths, value="medium", label="Caption length") with gr.Row(): hashtags_v = gr.Checkbox(value=False, label="Add hashtags") emojis_v = gr.Checkbox(value=False, label="Use emojis") btnv = gr.Button("Generate caption") btnv.click( _run_video, inputs=[video, text_style_v, platform_v, keywords_v, description_v, hashtags_v, emojis_v, language_v, caption_length_v], outputs=[outv], api_name="caption_video", ) gr.Markdown( "**Notes**\n" "- First run downloads the BLIP model from Hugging Face unless `BLIP_OFFLINE=1`.\n" "- This demo requires the server-side secret `OPENROUTER_API_KEY`." ) return demo # Hugging Face Spaces expects a top-level variable named `demo` (or `app`). demo = build_demo() demo.queue() if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=int(os.environ.get("PORT", "7860")), ssr_mode=False, )