paddleocr / app.py
chipling's picture
Update app.py
9ceea25 verified
from fastapi import FastAPI, UploadFile, File
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
import io
app = FastAPI()
# Load model (optimized for CPU/Low RAM)
model_id = "THUDM/glm-4v-9b" # Or the specific 0.9B GLM-OCR variant
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = model_id.from_pretrained(model_id, trust_remote_code=True).cpu().eval()
@app.post("/convert")
async def convert_image(file: UploadFile = File(...)):
image_data = await file.read()
image = Image.open(io.BytesIO(image_data)).convert("RGB")
# Specific prompt to trigger Chart-to-HTML
prompt = "Read this chart and output the data as a clean HTML table with headers."
inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": prompt}],
add_generation_prompt=True, tokenize=True, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_new_tokens=1000)
response = tokenizer.decode(outputs[0])
return {"html_result": response}