cd-bookshelf / app.py
adcp's picture
Update app.py
6ff79ed verified
import os
from fastapi import Request
from starlette.responses import JSONResponse
import gradio as gr
from transformers import AutoProcessor, AutoModelForVision2Seq
from PIL import Image
# Get the secret key from Hugging Face secrets
API_KEY = os.environ.get("API_KEY")
# Load model and processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
model = AutoModelForVision2Seq.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", device_map="auto")
# Prompt for spine recognition
prompt = "What is the album title and artist on this CD spine?"
# Auth wrapper
def with_auth(fn):
async def wrapper(image: Image.Image, request: Request):
if request.headers.get("x-api-key") != API_KEY:
return JSONResponse({"error": "unauthorized"}, status_code=401)
return fn(image)
return wrapper
# Inference function
@with_auth
def extract_text(image: Image.Image):
inputs = processor(prompt=prompt, images=image, return_tensors="pt").to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=128)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return {"text": generated_text}
# Gradio app
iface = gr.Interface(fn=extract_text, inputs="image", outputs="json")
iface.launch(
server_name="0.0.0.0", server_port=7860, share=True, api_name="extract_text"
)