Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import base64 | |
| from openai import OpenAI | |
| def image_caption(image_url: str) -> str: | |
| """ | |
| Describe an image or answer a specific question about it. | |
| If OPENAI_API_KEY is set, uses GPT-4V (or configured VLM_MODEL_ID) for visual QA. | |
| Otherwise falls back to HuggingFace BLIP captioning. | |
| """ | |
| # 检查是否使用 OpenAI VLM | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| vlm_model = os.getenv("VLM_MODEL_ID", "gpt-4-vision-preview") | |
| use_openai = openai_api_key and vlm_model.startswith("gpt-4") | |
| if use_openai: | |
| try: | |
| # 下载图片并转为 base64 | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} if "huggingface.co" in image_url else {} | |
| img_response = requests.get(image_url, headers=headers, timeout=15) | |
| img_response.raise_for_status() | |
| img_b64 = base64.b64encode(img_response.content).decode() | |
| client = OpenAI(api_key=openai_api_key) | |
| response = client.chat.completions.create( | |
| model=vlm_model, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Describe the chess position and provide the best next move for black (algebraic notation). If it's not chess, just describe the image."}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}} | |
| ] | |
| } | |
| ], | |
| max_tokens=256, | |
| temperature=0.0, | |
| ) | |
| answer = response.choices[0].message.content.strip() | |
| return answer if answer else "No answer from VLM." | |
| except Exception as e: | |
| return f"VLM error: {str(e)}" | |
| else: | |
| # 降级到 BLIP 描述 | |
| try: | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
| img_data = requests.get(image_url, headers=headers, timeout=10).content | |
| img_b64 = base64.b64encode(img_data).decode() | |
| api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base" | |
| api_headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} | |
| response = requests.post(api_url, headers=api_headers, json={"inputs": img_b64}, timeout=30) | |
| if response.status_code == 200: | |
| result = response.json() | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0].get('generated_text', 'No caption') | |
| return str(result) | |
| else: | |
| return f"Caption API error: {response.status_code}" | |
| except Exception as e: | |
| return f"Image processing error: {str(e)}" |