import os import requests import base64 from openai import OpenAI def image_caption(image_url: str) -> str: """ Describe an image or answer a specific question about it. If OPENAI_API_KEY is set, uses GPT-4V (or configured VLM_MODEL_ID) for visual QA. Otherwise falls back to HuggingFace BLIP captioning. """ # 检查是否使用 OpenAI VLM openai_api_key = os.getenv("OPENAI_API_KEY") vlm_model = os.getenv("VLM_MODEL_ID", "gpt-4-vision-preview") use_openai = openai_api_key and vlm_model.startswith("gpt-4") if use_openai: try: # 下载图片并转为 base64 headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} if "huggingface.co" in image_url else {} img_response = requests.get(image_url, headers=headers, timeout=15) img_response.raise_for_status() img_b64 = base64.b64encode(img_response.content).decode() client = OpenAI(api_key=openai_api_key) response = client.chat.completions.create( model=vlm_model, messages=[ { "role": "user", "content": [ {"type": "text", "text": "Describe the chess position and provide the best next move for black (algebraic notation). If it's not chess, just describe the image."}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}} ] } ], max_tokens=256, temperature=0.0, ) answer = response.choices[0].message.content.strip() return answer if answer else "No answer from VLM." except Exception as e: return f"VLM error: {str(e)}" else: # 降级到 BLIP 描述 try: headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} img_data = requests.get(image_url, headers=headers, timeout=10).content img_b64 = base64.b64encode(img_data).decode() api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base" api_headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} response = requests.post(api_url, headers=api_headers, json={"inputs": img_b64}, timeout=30) if response.status_code == 200: result = response.json() if isinstance(result, list) and len(result) > 0: return result[0].get('generated_text', 'No caption') return str(result) else: return f"Caption API error: {response.status_code}" except Exception as e: return f"Image processing error: {str(e)}"