Spaces:

xornyv3
/

caption_generator

Running

App Files Files Community

caption_generator / app.py

3v324v23

Refactor application to use Gradio for the frontend and remove Dockerfile and unused dependencies

3660521 3 days ago

raw

history blame contribute delete

7.99 kB

	import os
	import sys
	import warnings

	# HF Spaces currently emits a noisy FutureWarning from a vendored dependency:
	# `torch.distributed.reduce_op` is deprecated ...
	warnings.filterwarnings(
	"ignore",
	category=FutureWarning,
	message=r"`torch\.distributed\.reduce_op` is deprecated.*",
	module=r"spaces\._vendor\.codefind\.registry",
	)
	import gradio as gr

	from caption_blip import UserOptions, caption_image_path, caption_video


	# Some environments (notably containers) can emit noisy asyncio unraisable exceptions
	# on shutdown like: ValueError: Invalid file descriptor: -1
	# This is harmless but can confuse users and clutter logs.
	def _unraisablehook_filter_asyncio_fd(unraisable: "sys.UnraisableHookArgs") -> None: # type: ignore[name-defined]
	exc = unraisable.exc_value
	if isinstance(exc, ValueError) and "Invalid file descriptor" in str(exc):
	return
	sys.__unraisablehook__(unraisable)


	sys.unraisablehook = _unraisablehook_filter_asyncio_fd


	def _require_openrouter() -> None:
	if not os.environ.get("OPENROUTER_API_KEY"):
	raise gr.Error(
	"Missing server secret OPENROUTER_API_KEY. "
	"In Hugging Face Spaces: Settings → Secrets → add OPENROUTER_API_KEY, then restart the Space."
	)


	def _run_image(
	image_path: str,
	text_style: str,
	platform: str,
	keywords: str,
	description: str,
	hashtags: bool,
	emojis: bool,
	language: str,
	caption_length: str,
	) -> str:
	_require_openrouter()
	opts = UserOptions(
	text_style=text_style,
	platform=platform,
	keywords=keywords or None,
	description=description or None,
	hashtags=hashtags,
	emojis=emojis,
	language=language,
	caption_length=caption_length,
	)

	base = caption_image_path(image_path)
	from caption_blip import compose_image_caption

	try:
	return compose_image_caption(base, opts, use_openrouter=True)
	except RuntimeError as e:
	msg = str(e)
	if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower():
	raise gr.Error(
	"The AI service (OpenRouter) timed out. Please try again in a moment. "
	"If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S."
	)
	raise


	def _run_video(
	video_path: str,
	text_style: str,
	platform: str,
	keywords: str,
	description: str,
	hashtags: bool,
	emojis: bool,
	language: str,
	caption_length: str,
	) -> str:
	_require_openrouter()
	opts = UserOptions(
	text_style=text_style,
	platform=platform,
	keywords=keywords or None,
	description=description or None,
	hashtags=hashtags,
	emojis=emojis,
	language=language,
	caption_length=caption_length,
	)
	try:
	return caption_video(video_path, opts, use_openrouter=True)
	except RuntimeError as e:
	msg = str(e)
	if "openrouter" in msg.lower() or "timed out" in msg.lower() or "timeout" in msg.lower():
	raise gr.Error(
	"The AI service (OpenRouter) timed out. Please try again in a moment. "
	"If this keeps happening, switch to a faster OpenRouter model or increase OPENROUTER_TIMEOUT_S."
	)
	raise


	def build_demo() -> gr.Blocks:
	with gr.Blocks(title="AI Caption Generator") as demo:
	gr.Markdown("# AI Caption Generator\nUpload an image or a short video and get a caption.")

	platforms = ["Instagram", "LinkedIn", "X(Twitter)", "TikTok", "Facebook", "Pinterest", "Threads"]
	languages = ["English", "French", "Spanish", "German", "Italian", "Arabic", "Chinese"]
	lengths = ["small", "medium", "large"]

	with gr.Tab("Image"):
	image = gr.Image(type="filepath", label="Upload image")
	out = gr.Textbox(label="Caption", lines=4)

	with gr.Row():
	text_style = gr.Dropdown(
	[
	"casual",
	"formal",
	"educational",
	"funny",
	"dry",
	"direct",
	"Storytelling",
	"Emotional",
	"Hook-first",
	"Motivational",
	"Call-to-Action",
	],
	value="educational",
	label="Text style",
	)
	platform = gr.Dropdown(platforms, value="LinkedIn", label="Platform")

	keywords = gr.Textbox(label="Keywords (optional)", placeholder="e.g. AI, startups, productivity")
	description = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption")

	with gr.Row():
	language = gr.Dropdown(languages, value="English", label="Language")
	caption_length = gr.Dropdown(lengths, value="medium", label="Caption length")

	with gr.Row():
	hashtags = gr.Checkbox(value=False, label="Add hashtags")
	emojis = gr.Checkbox(value=False, label="Use emojis")

	btn = gr.Button("Generate caption")
	btn.click(
	_run_image,
	inputs=[image, text_style, platform, keywords, description, hashtags, emojis, language, caption_length],
	outputs=[out],
	api_name="caption_image",
	)

	with gr.Tab("Video"):
	video = gr.Video(label="Upload video")
	outv = gr.Textbox(label="Caption", lines=5)

	with gr.Row():
	text_style_v = gr.Dropdown(
	[
	"casual",
	"formal",
	"educational",
	"funny",
	"dry",
	"direct",
	"Storytelling",
	"Emotional",
	"Hook-first",
	"Motivational",
	"Call-to-Action",
	],
	value="educational",
	label="Text style",
	)
	platform_v = gr.Dropdown(platforms, value="LinkedIn", label="Platform")

	keywords_v = gr.Textbox(label="Keywords (optional)", placeholder="e.g. product launch, demo")
	description_v = gr.Textbox(label="Description / additional context (optional)", lines=3, placeholder="Add any extra info you want reflected in the caption")

	with gr.Row():
	language_v = gr.Dropdown(languages, value="English", label="Language")
	caption_length_v = gr.Dropdown(lengths, value="medium", label="Caption length")

	with gr.Row():
	hashtags_v = gr.Checkbox(value=False, label="Add hashtags")
	emojis_v = gr.Checkbox(value=False, label="Use emojis")

	btnv = gr.Button("Generate caption")
	btnv.click(
	_run_video,
	inputs=[video, text_style_v, platform_v, keywords_v, description_v, hashtags_v, emojis_v, language_v, caption_length_v],
	outputs=[outv],
	api_name="caption_video",
	)

	gr.Markdown(
	"Notes\n"
	"- First run downloads the BLIP model from Hugging Face unless `BLIP_OFFLINE=1`.\n"
	"- This demo requires the server-side secret `OPENROUTER_API_KEY`."
	)

	return demo


	# Hugging Face Spaces expects a top-level variable named `demo` (or `app`).
	demo = build_demo()
	demo.queue()


	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", "7860")),
	ssr_mode=False,
	)