Spaces:
Running
Running
| import math | |
| import os | |
| import logging | |
| import time | |
| import warnings | |
| from dataclasses import dataclass | |
| from functools import lru_cache | |
| from typing import Any, Iterable, Mapping, Optional | |
| import requests | |
| from PIL import Image | |
| import torch | |
| # Prevent repeated warning spam: | |
| # FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated ... Use `HF_HOME` instead. | |
| if "TRANSFORMERS_CACHE" in os.environ and "HF_HOME" not in os.environ: | |
| os.environ["HF_HOME"] = os.environ["TRANSFORMERS_CACHE"] | |
| os.environ.pop("TRANSFORMERS_CACHE", None) | |
| warnings.filterwarnings( | |
| "ignore", | |
| category=FutureWarning, | |
| message=r"Using `TRANSFORMERS_CACHE` is deprecated.*", | |
| ) | |
| # --- BLIP offline mode --- | |
| # If True, BLIP model/processor will ONLY load from local cache and will never | |
| # attempt to contact huggingface.co (no timeouts/retries). If the model isn't | |
| # cached yet, you'll get a fast error telling you to run once online. | |
| # | |
| # For deployment, keep this OFF unless you have a pre-populated model cache. | |
| BLIP_OFFLINE = os.environ.get("BLIP_OFFLINE", "0").strip() in ("1", "true", "True") | |
| if BLIP_OFFLINE: | |
| os.environ["HF_HUB_OFFLINE"] = "1" | |
| os.environ["TRANSFORMERS_OFFLINE"] = "1" | |
| # If your network is slow, Hugging Face Hub's default 10s read timeout can cause | |
| # repeated retries when resolving files (HEAD/ETag). Bump timeouts to be more tolerant. | |
| def _bump_env_timeout(name: str, minimum_seconds: int) -> None: | |
| raw = os.environ.get(name) | |
| try: | |
| current = int(raw) if raw is not None else None | |
| except ValueError: | |
| current = None | |
| if current is None or current < minimum_seconds: | |
| os.environ[name] = str(minimum_seconds) | |
| _bump_env_timeout("HF_HUB_ETAG_TIMEOUT", 60) | |
| _bump_env_timeout("HF_HUB_DOWNLOAD_TIMEOUT", 300) | |
| # Reduce noisy hub retry logs (optional). Comment these out if you want detailed logs. | |
| logging.getLogger("huggingface_hub").setLevel(logging.ERROR) | |
| from transformers import BlipForConditionalGeneration, BlipProcessor | |
| MODEL_ID = "Salesforce/blip-image-captioning-base" | |
| IMAGE_PATH = "test3.jpg" | |
| # text_style examples: casual, formal, genz, funny, dry, educational, gen alpha, inspirational, mysterious, direct | |
| # caption_length examples: short, medium, long, or a number of words like "20" | |
| USER_OPTIONS = { | |
| "text_style": "funny", | |
| "platform": "LinkedIn", | |
| "keywords": "", | |
| "hashtags": False, | |
| "language": "English", | |
| "caption_length": "medium", | |
| } | |
| SHOW_PROGRESS = True | |
| def _progress_printer(enabled: bool = True): | |
| last_bucket = {"v": -1} | |
| def log(percent: int, message: str = "") -> None: | |
| if not enabled: | |
| return | |
| percent = max(0, min(int(percent), 100)) | |
| bucket = (percent // 10) * 10 | |
| if bucket != last_bucket["v"]: | |
| last_bucket["v"] = bucket | |
| if message: | |
| print(f"[{bucket:>3}%] {message}") | |
| else: | |
| print(f"[{bucket:>3}%]") | |
| return log | |
| class UserOptions: | |
| text_style: str = "casual" | |
| platform: str = "Instagram" | |
| keywords: Optional[str] = None | |
| description: Optional[str] = None | |
| hashtags: bool = True | |
| emojis: bool = False | |
| language: str = "English" | |
| caption_length: str = "short" | |
| def _normalize_user_options(user_options: Any) -> UserOptions: | |
| if user_options is None: | |
| return UserOptions() | |
| if isinstance(user_options, UserOptions): | |
| return user_options | |
| if isinstance(user_options, Mapping): | |
| style = user_options.get("text_style", user_options.get("tone_style", "casual")) | |
| keywords = user_options.get("keywords", user_options.get("description")) | |
| description = user_options.get("description") | |
| hashtags = user_options.get("hashtags", user_options.get("add_hashtags", True)) | |
| emojis = user_options.get("emojis", user_options.get("use_emojis", False)) | |
| caption_length = user_options.get("caption_length", user_options.get("length", "short")) | |
| return UserOptions( | |
| text_style=str(style), | |
| platform=str(user_options.get("platform", "Instagram")), | |
| keywords=( | |
| None | |
| if keywords in (None, "") | |
| else str(keywords) | |
| ), | |
| description=( | |
| None | |
| if description in (None, "") | |
| else str(description) | |
| ), | |
| hashtags=bool(hashtags), | |
| emojis=bool(emojis), | |
| language=str(user_options.get("language", "English")), | |
| caption_length=str(caption_length), | |
| ) | |
| raise TypeError("user_options must be a UserOptions, dict-like, or None") | |
| def _caption_length_instruction(caption_length: str) -> str: | |
| value = (caption_length or "").strip().lower() | |
| if not value: | |
| return "" | |
| if value.isdigit(): | |
| return f"Target length: about {int(value)} words." | |
| if value in {"small", "short", "brief"}: | |
| return "Target length: short (1–2 sentences)." | |
| if value in {"medium", "normal"}: | |
| return "Target length: medium (2–4 sentences)." | |
| if value in {"large", "long", "detailed"}: | |
| return "Target length: long (a short paragraph)." | |
| return f"Target length: {caption_length}." | |
| def _additional_context_instruction(user_options: UserOptions) -> str: | |
| """Format user-provided context so the LLM actually uses it.""" | |
| parts: list[str] = [] | |
| if user_options.keywords: | |
| parts.append(f"Keywords to incorporate: {user_options.keywords}") | |
| if user_options.description: | |
| parts.append(f"Additional description/context: {user_options.description}") | |
| if not parts: | |
| return "" | |
| return "\n".join(parts) | |
| def _get_device() -> str: | |
| return "cuda" if torch.cuda.is_available() else "cpu" | |
| def _load_blip(model_id: str = MODEL_ID): | |
| device = _get_device() | |
| local_only = bool(BLIP_OFFLINE) | |
| try: | |
| processor = BlipProcessor.from_pretrained( | |
| model_id, | |
| # Avoid torchvision dependency in minimal environments (e.g. HF Spaces) | |
| # by forcing the "slow" image processor. | |
| use_fast=False, | |
| local_files_only=local_only, | |
| ) | |
| model = BlipForConditionalGeneration.from_pretrained( | |
| model_id, | |
| local_files_only=local_only, | |
| ).to(device) | |
| except Exception as e: | |
| if local_only: | |
| raise RuntimeError( | |
| "BLIP_OFFLINE is enabled but the BLIP model isn't available in the local cache yet. " | |
| "Temporarily set BLIP_OFFLINE = False and run once online to download the model, " | |
| "then set BLIP_OFFLINE back to True." | |
| ) from e | |
| raise | |
| model.eval() | |
| return processor, model, device | |
| def caption_image(image: Image.Image) -> str: | |
| """Generate a short caption for a PIL image using BLIP. | |
| Keeps the existing generation settings intact. | |
| """ | |
| processor, model, device = _load_blip(MODEL_ID) | |
| image = image.convert("RGB") | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| out = model.generate( | |
| **inputs, | |
| max_new_tokens=20, # lower = faster | |
| num_beams=3, # lower = faster (5 -> 3 is a good tradeoff) | |
| ) | |
| return processor.decode(out[0], skip_special_tokens=True) | |
| def caption_image_path(image_path: str) -> str: | |
| image = Image.open(image_path).convert("RGB") | |
| return caption_image(image) | |
| _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp", ".tif", ".tiff"} | |
| _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".wmv", ".m4v"} | |
| def _looks_like_video(path: str) -> bool: | |
| ext = os.path.splitext(path)[1].lower() | |
| if ext in _VIDEO_EXTS: | |
| return True | |
| if ext in _IMAGE_EXTS: | |
| return False | |
| # Unknown extension: try opening as image; if that fails, treat as video. | |
| try: | |
| Image.open(path) | |
| return False | |
| except Exception: | |
| return True | |
| def extract_frames(video_path: str, frames_per_minute: int = 8, min_frames: int = 8) -> list[Image.Image]: | |
| """Extract frames from a video as PIL images. | |
| Sampling rules: | |
| - Target 8 frames per minute. | |
| - Ensure a minimum of 8 frames total. | |
| - If video < 1 minute: pick 8 evenly spaced frames across whole duration. | |
| - If video >= 1 minute: pick 8 evenly spaced frames within each minute. | |
| """ | |
| if not os.path.exists(video_path): | |
| raise FileNotFoundError(f"Video not found: {video_path}") | |
| try: | |
| import cv2 # type: ignore | |
| except Exception as e: # pragma: no cover | |
| raise RuntimeError( | |
| "opencv-python is required for video support. Install with: pip install opencv-python" | |
| ) from e | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise RuntimeError(f"Failed to open video: {video_path}") | |
| fps = float(cap.get(cv2.CAP_PROP_FPS) or 0.0) | |
| frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0) | |
| if fps <= 0.0: | |
| fps = 30.0 | |
| duration_sec = frame_count / fps if frame_count > 0 else float(cap.get(cv2.CAP_PROP_POS_MSEC) or 0.0) / 1000.0 | |
| if duration_sec <= 0.0 and frame_count > 0: | |
| duration_sec = frame_count / fps | |
| if duration_sec <= 0.0: | |
| cap.release() | |
| raise RuntimeError("Could not determine video duration.") | |
| minutes = int(math.ceil(duration_sec / 60.0)) | |
| sample_times: list[float] = [] | |
| if duration_sec < 60.0: | |
| total = max(min_frames, frames_per_minute) | |
| # Evenly spaced across full duration. | |
| for i in range(total): | |
| t = (duration_sec * i) / total | |
| sample_times.append(t) | |
| else: | |
| # 8 frames per minute, evenly spaced within each minute. | |
| for m in range(minutes): | |
| start = 60.0 * m | |
| end = min(60.0 * (m + 1), duration_sec) | |
| if end <= start: | |
| continue | |
| for i in range(frames_per_minute): | |
| t = start + (end - start) * (i / frames_per_minute) | |
| sample_times.append(t) | |
| # Convert to frame indices and dedupe while preserving order. | |
| seen: set[int] = set() | |
| frame_indices: list[int] = [] | |
| for t in sample_times: | |
| idx = int(t * fps) | |
| if frame_count > 0: | |
| idx = max(0, min(idx, frame_count - 1)) | |
| if idx not in seen: | |
| seen.add(idx) | |
| frame_indices.append(idx) | |
| images: list[Image.Image] = [] | |
| for idx in frame_indices: | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, idx) | |
| ok, frame = cap.read() | |
| if not ok or frame is None: | |
| continue | |
| # OpenCV gives BGR; convert to RGB. | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| images.append(Image.fromarray(frame_rgb)) | |
| cap.release() | |
| if not images: | |
| raise RuntimeError("No frames extracted from video.") | |
| # Enforce minimum frames if possible by re-sampling across entire duration. | |
| if len(images) < min_frames and frame_count > 0: | |
| cap = cv2.VideoCapture(video_path) | |
| extra_indices: list[int] = [] | |
| for i in range(min_frames): | |
| idx = int((frame_count * i) / min_frames) | |
| idx = max(0, min(idx, frame_count - 1)) | |
| if idx not in seen: | |
| extra_indices.append(idx) | |
| seen.add(idx) | |
| for idx in extra_indices: | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, idx) | |
| ok, frame = cap.read() | |
| if not ok or frame is None: | |
| continue | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| images.append(Image.fromarray(frame_rgb)) | |
| cap.release() | |
| if len(images) < min_frames: | |
| raise RuntimeError(f"Extracted {len(images)} frames; expected at least {min_frames}.") | |
| return images | |
| def generate_frame_captions(images: Iterable[Image.Image]) -> list[str]: | |
| captions: list[str] = [] | |
| imgs = list(images) | |
| total = len(imgs) | |
| progress = _progress_printer(SHOW_PROGRESS) | |
| if total: | |
| progress(0, f"Captioning {total} frames...") | |
| for i, img in enumerate(imgs, start=1): | |
| try: | |
| captions.append(caption_image(img)) | |
| except Exception as e: | |
| captions.append(f"[caption_failed: {e}]") | |
| if total: | |
| pct = int((i / total) * 100) | |
| progress(pct, f"Captioned {i}/{total} frames") | |
| return captions | |
| def _openrouter_chat(messages: list[dict], model: str, timeout_s: int = 60) -> str: | |
| api_key = os.environ.get("OPENROUTER_API_KEY") | |
| if not api_key: | |
| raise RuntimeError("Missing OPENROUTER_API_KEY environment variable.") | |
| # HF Spaces (and some networks) can be bursty/slow. Allow tuning via env. | |
| try: | |
| timeout_s = int(os.environ.get("OPENROUTER_TIMEOUT_S", str(timeout_s))) | |
| except ValueError: | |
| pass | |
| try: | |
| max_retries = int(os.environ.get("OPENROUTER_MAX_RETRIES", "2")) | |
| except ValueError: | |
| max_retries = 2 | |
| url = "https://openrouter.ai/api/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": model, | |
| "messages": messages, | |
| # Keep responses snappy to reduce timeouts. | |
| "max_tokens": 350, | |
| } | |
| last_err: Exception | None = None | |
| for attempt in range(max_retries + 1): | |
| # Progressive backoff + longer timeout per attempt. | |
| attempt_timeout = timeout_s + (attempt * 45) | |
| try: | |
| resp = requests.post( | |
| url, | |
| headers=headers, | |
| json=payload, | |
| # (connect timeout, read timeout) | |
| timeout=(10, attempt_timeout), | |
| ) | |
| break | |
| except (requests.Timeout, requests.RequestException) as e: | |
| last_err = e | |
| if attempt >= max_retries: | |
| raise RuntimeError(f"OpenRouter request failed: {e}") from e | |
| time.sleep(0.8 * (attempt + 1)) | |
| if resp.status_code >= 400: | |
| raise RuntimeError(f"OpenRouter API error {resp.status_code}: {resp.text}") | |
| try: | |
| data = resp.json() | |
| except ValueError as e: | |
| raise RuntimeError(f"OpenRouter returned non-JSON response: {resp.text}") from e | |
| try: | |
| return data["choices"][0]["message"]["content"].strip() | |
| except Exception as e: | |
| raise RuntimeError(f"Unexpected OpenRouter response format: {data}") from e | |
| def _clean_caption_output(text: str) -> str: | |
| """Extract only the final caption text from an LLM response. | |
| Some reasoning-style models occasionally append analysis/metadata (e.g. bullet lists like | |
| "- Maintains casual French tone..."). We defensively strip those so the UI only shows the caption. | |
| """ | |
| t = (text or "").strip() | |
| if not t: | |
| return "" | |
| # Remove common reasoning blocks (DeepSeek R1 style). | |
| if "<think>" in t and "</think>" in t: | |
| t = t.split("</think>")[-1].strip() | |
| # Unwrap fenced blocks if the model returns ```text ...```. | |
| if t.startswith("```"): | |
| parts = t.split("```") | |
| # parts[0] is empty; parts[1] contains optional language + content. | |
| if len(parts) >= 3: | |
| inner = parts[1] | |
| inner_lines = inner.splitlines() | |
| if inner_lines and inner_lines[0].strip().isalpha() and len(inner_lines[0].strip()) <= 12: | |
| inner = "\n".join(inner_lines[1:]) | |
| t = inner.strip() or t | |
| # Drop leading labels. | |
| for prefix in ( | |
| "final caption:", | |
| "final:", | |
| "caption:", | |
| "output:", | |
| "réponse finale:", | |
| "réponse:", | |
| "résultat:", | |
| ): | |
| if t.lower().startswith(prefix): | |
| t = t[len(prefix) :].strip() | |
| break | |
| # Hard cut at explicit meta sections. | |
| lower = t.lower() | |
| for marker in ( | |
| "with this caption:", | |
| "with this caption", | |
| "explanation:", | |
| "analysis:", | |
| "notes:", | |
| "justification:", | |
| "raison:", | |
| "pourquoi:", | |
| ): | |
| idx = lower.find(marker) | |
| if idx != -1: | |
| t = t[:idx].strip() | |
| lower = t.lower() | |
| break | |
| # If there's a meta bullet section appended, cut it off. | |
| meta_keywords = ( | |
| "maintain", | |
| "maintains", | |
| "tone", | |
| "style", | |
| "language", | |
| "hashtags", | |
| "emoji", | |
| "length", | |
| "platform", | |
| "casual", | |
| "formal", | |
| "explication", | |
| "analyse", | |
| ) | |
| lines = t.splitlines() | |
| for i, line in enumerate(lines): | |
| s = line.strip() | |
| if not s.startswith("-"): | |
| continue | |
| s_lower = s.lower() | |
| if any(k in s_lower for k in meta_keywords): | |
| t = "\n".join(lines[:i]).strip() | |
| break | |
| # Remove surrounding quotes (common in model outputs). | |
| if len(t) >= 2 and ((t[0] == t[-1] == '"') or (t[0] == t[-1] == "'")): | |
| t = t[1:-1].strip() | |
| return t | |
| def compose_video_caption(frame_captions: list[str], user_options: Any, use_openrouter: bool = True) -> str: | |
| if not frame_captions: | |
| raise ValueError("frame_captions is empty") | |
| user_options = _normalize_user_options(user_options) | |
| def _fallback_summary() -> str: | |
| # Cheap deterministic fallback: return a compact summary based on a few unique frame captions. | |
| uniq: list[str] = [] | |
| seen: set[str] = set() | |
| for c in frame_captions: | |
| c2 = (c or "").strip() | |
| if not c2 or c2.startswith("[caption_failed"): | |
| continue | |
| k = c2.lower() | |
| if k in seen: | |
| continue | |
| seen.add(k) | |
| uniq.append(c2) | |
| if len(uniq) >= 6: | |
| break | |
| if not uniq: | |
| return "A short video clip." | |
| return "Video shows: " + "; ".join(uniq) | |
| if not use_openrouter: | |
| return _fallback_summary() | |
| model = "deepseek/deepseek-r1-0528:free" | |
| captions_text = "\n".join(f"- {c}" for c in frame_captions) | |
| system = ( | |
| "You are a video caption summarizer. " | |
| "Given short captions of sampled frames from a video, infer a single coherent caption " | |
| "that describes the overall video. " | |
| "Do NOT list frames one-by-one. Output ONLY the final caption text, no quotes." | |
| ) | |
| extra_ctx = _additional_context_instruction(user_options) | |
| user = ( | |
| "Requirements:\n" | |
| f"- Output language: {user_options.language}\n" | |
| f"- Target platform: {user_options.platform}\n" | |
| f"- Text style: {user_options.text_style}\n" | |
| f"- Emojis: {'allowed' if user_options.emojis else 'do not use'}\n" | |
| f"- Hashtags: {'include' if user_options.hashtags else 'do not include'}\n" | |
| f"- {_caption_length_instruction(user_options.caption_length)}\n" | |
| + (f"\nAdditional context (use this to steer the caption; incorporate keywords if provided):\n{extra_ctx}\n" if extra_ctx else "\n") | |
| + f"\nFrame captions:\n{captions_text}" | |
| ) | |
| out = _openrouter_chat( | |
| messages=[ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ], | |
| model=model, | |
| ) | |
| cleaned = _clean_caption_output(out) | |
| return cleaned if cleaned else _fallback_summary() | |
| def compose_image_caption(base_caption: str, user_options: Any, use_openrouter: bool = True) -> str: | |
| if not base_caption.strip(): | |
| raise ValueError("base_caption is empty") | |
| user_options = _normalize_user_options(user_options) | |
| if not use_openrouter: | |
| return base_caption.strip() | |
| model = "deepseek/deepseek-r1-0528:free" | |
| system = ( | |
| "You are a caption fuser/editor for social platforms. " | |
| "Given a base caption and preferences, output a polished final caption. " | |
| "If additional context/keywords are provided, incorporate them naturally and consistently. " | |
| "Output ONLY the final caption text, no quotes, no explanations." | |
| ) | |
| extra_ctx = _additional_context_instruction(user_options) | |
| user = ( | |
| f"Base caption: {base_caption}\n" | |
| "Requirements:\n" | |
| f"- Text style: {user_options.text_style}\n" | |
| f"- Target platform: {user_options.platform}\n" | |
| f"- Output language: {user_options.language}\n" | |
| f"- Emojis: {'allowed' if user_options.emojis else 'do not use'}\n" | |
| f"- Hashtags: {'include' if user_options.hashtags else 'do not include'}\n" | |
| f"- {_caption_length_instruction(user_options.caption_length)}\n" | |
| + (f"\nAdditional context (use this to steer the caption; incorporate keywords if provided):\n{extra_ctx}\n" if extra_ctx else "") | |
| ) | |
| out = _openrouter_chat( | |
| messages=[ | |
| {"role": "system", "content": system}, | |
| {"role": "user", "content": user}, | |
| ], | |
| model=model, | |
| ) | |
| cleaned = _clean_caption_output(out) | |
| # If the model returns an empty/fully-stripped response, fall back to the base caption. | |
| return cleaned if cleaned else base_caption.strip() | |
| def caption_video(video_path: str, user_options: Any = None, use_openrouter: bool = True) -> str: | |
| user_options = _normalize_user_options(user_options) | |
| progress = _progress_printer(SHOW_PROGRESS) | |
| progress(0, "Starting video captioning") | |
| frames = extract_frames(video_path) | |
| progress(10, f"Extracted {len(frames)} frames") | |
| # Map frame caption progress into 10%..80% so we still have room for compose steps. | |
| frame_captions: list[str] = [] | |
| total = len(frames) | |
| for i, img in enumerate(frames, start=1): | |
| try: | |
| frame_captions.append(caption_image(img)) | |
| except Exception as e: | |
| frame_captions.append(f"[caption_failed: {e}]") | |
| if total: | |
| pct = 10 + int((i / total) * 70) | |
| progress(pct, f"Captioned {i}/{total} frames") | |
| progress(80, "Summarizing video") | |
| base_caption = compose_video_caption(frame_captions, user_options, use_openrouter=use_openrouter) | |
| progress(90, "Polishing final caption") | |
| final_caption = compose_image_caption(base_caption, user_options, use_openrouter=use_openrouter) | |
| progress(100, "Done") | |
| return final_caption | |
| def main(): | |
| """Simple manual entrypoint. | |
| - No args: captions IMAGE_PATH (image or video auto-detected). | |
| - One arg: captions that path (image or video auto-detected). | |
| """ | |
| import sys | |
| media_path = IMAGE_PATH if len(sys.argv) == 1 else sys.argv[1] | |
| opts = _normalize_user_options(USER_OPTIONS) | |
| if _looks_like_video(media_path): | |
| try: | |
| final = caption_video(media_path, opts, use_openrouter=True) | |
| except RuntimeError as e: | |
| # If OpenRouter isn't configured, still return a deterministic fallback. | |
| if "OPENROUTER_API_KEY" in str(e): | |
| final = caption_video(media_path, opts, use_openrouter=False) | |
| else: | |
| raise | |
| print(final) | |
| else: | |
| progress = _progress_printer(SHOW_PROGRESS) | |
| progress(0, "Starting image captioning") | |
| base = caption_image_path(media_path) | |
| progress(50, "Polishing caption with AI") | |
| try: | |
| final = compose_image_caption(base, opts, use_openrouter=True) | |
| except RuntimeError as e: | |
| # If OpenRouter isn't configured, still return the raw BLIP caption. | |
| if "OPENROUTER_API_KEY" in str(e): | |
| final = base | |
| else: | |
| raise | |
| progress(100, "Done") | |
| print(final) | |
| if __name__ == "__main__": | |
| main() | |