Spaces:
Runtime error
Runtime error
| import os | |
| from fastapi import Request | |
| from starlette.responses import JSONResponse | |
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForVision2Seq | |
| from PIL import Image | |
| # Get the secret key from Hugging Face secrets | |
| API_KEY = os.environ.get("API_KEY") | |
| # Load model and processor | |
| processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") | |
| model = AutoModelForVision2Seq.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", device_map="auto") | |
| # Prompt for spine recognition | |
| prompt = "What is the album title and artist on this CD spine?" | |
| # Auth wrapper | |
| def with_auth(fn): | |
| async def wrapper(image: Image.Image, request: Request): | |
| if request.headers.get("x-api-key") != API_KEY: | |
| return JSONResponse({"error": "unauthorized"}, status_code=401) | |
| return fn(image) | |
| return wrapper | |
| # Inference function | |
| def extract_text(image: Image.Image): | |
| inputs = processor(prompt=prompt, images=image, return_tensors="pt").to(model.device) | |
| generated_ids = model.generate(**inputs, max_new_tokens=128) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return {"text": generated_text} | |
| # Gradio app | |
| iface = gr.Interface(fn=extract_text, inputs="image", outputs="json") | |
| iface.launch( | |
| server_name="0.0.0.0", server_port=7860, share=True, api_name="extract_text" | |
| ) | |