from fastapi import FastAPI, UploadFile, File from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import torch import io app = FastAPI() # Load model (optimized for CPU/Low RAM) model_id = "THUDM/glm-4v-9b" # Or the specific 0.9B GLM-OCR variant tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = model_id.from_pretrained(model_id, trust_remote_code=True).cpu().eval() @app.post("/convert") async def convert_image(file: UploadFile = File(...)): image_data = await file.read() image = Image.open(io.BytesIO(image_data)).convert("RGB") # Specific prompt to trigger Chart-to-HTML prompt = "Read this chart and output the data as a clean HTML table with headers." inputs = tokenizer.apply_chat_template([{"role": "user", "image": image, "content": prompt}], add_generation_prompt=True, tokenize=True, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=1000) response = tokenizer.decode(outputs[0]) return {"html_result": response}