Spaces:
Sleeping
Sleeping
| # from fastapi import FastAPI, UploadFile, File | |
| # # from transformers import AutoProcessor, AutoModelForVision2Seq,AutoModel | |
| # from transformers import AutoProcessor,AutoModel | |
| # from PIL import Image | |
| # import torch | |
| # import io | |
| # app = FastAPI() | |
| # MODEL_ID = "zai-org/GLM-OCR" | |
| # print("Loading GLM-OCR model...") | |
| # # processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| # # model = AutoModelForVision2Seq.from_pretrained( | |
| # # MODEL_ID, | |
| # # torch_dtype=torch.float32 | |
| # # ) | |
| # # processor = AutoProcessor.from_pretrained( | |
| # # MODEL_ID, | |
| # # trust_remote_code=True | |
| # # ) | |
| # # model = AutoModelForVision2Seq.from_pretrained( | |
| # # MODEL_ID, | |
| # # trust_remote_code=True, | |
| # # torch_dtype=torch.float32 | |
| # # ) | |
| # processor = AutoProcessor.from_pretrained( | |
| # MODEL_ID, | |
| # trust_remote_code=True | |
| # ) | |
| # model = AutoModel.from_pretrained( | |
| # MODEL_ID, | |
| # trust_remote_code=True | |
| # ) | |
| # @app.get("/") | |
| # async def root(): | |
| # return {"status": "GLM-OCR API is running"} | |
| # @app.post("/ocr") | |
| # async def extract_text(file: UploadFile = File(...)): | |
| # try: | |
| # contents = await file.read() | |
| # image = Image.open(io.BytesIO(contents)).convert("RGB") | |
| # # inputs = processor(images=image, return_tensors="pt") | |
| # inputs = processor( | |
| # text="Extract all text from the document", | |
| # images=image, | |
| # return_tensors="pt" | |
| # ) | |
| # with torch.no_grad(): | |
| # outputs = model.generate(**inputs, max_new_tokens=1024) | |
| # text = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
| # return { | |
| # "success": True, | |
| # "text": text | |
| # } | |
| # except Exception as e: | |
| # return { | |
| # "success": False, | |
| # "error": str(e) | |
| # } | |
| from fastapi import FastAPI, UploadFile, File | |
| from transformers import AutoProcessor, GlmOcrForConditionalGeneration | |
| from PIL import Image | |
| import torch | |
| import io | |
| app = FastAPI() | |
| MODEL_ID = "zai-org/GLM-OCR" | |
| print("Loading GLM-OCR model...") | |
| # Initialize Processor and Model specifically for GLM-OCR | |
| processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model = GlmOcrForConditionalGeneration.from_pretrained( | |
| MODEL_ID, | |
| trust_remote_code=True, | |
| torch_dtype=torch.float32 # Use torch.bfloat16 if you have a GPU | |
| ).eval() | |
| async def root(): | |
| return {"status": "GLM-OCR API is running"} | |
| async def extract_text(file: UploadFile = File(...)): | |
| try: | |
| # Read and prepare image | |
| contents = await file.read() | |
| image = Image.open(io.BytesIO(contents)).convert("RGB") | |
| # 1. Define the conversation structure | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": image}, | |
| {"type": "text", "text": "Extract all text from this image."} | |
| ], | |
| } | |
| ] | |
| # 2. Use the chat template to prepare inputs | |
| # This fixes the 'NoneType' error by providing valid input_ids | |
| inputs = processor.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_dict=True, | |
| return_tensors="pt" | |
| ) | |
| # 3. Generate | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=1024, | |
| do_sample=False | |
| ) | |
| # 4. Decode the result | |
| # We slice the output to remove the prompt tokens and keep only the response | |
| generated_ids = outputs[:, inputs['input_ids'].shape[1]:] | |
| text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return { | |
| "success": True, | |
| "text": text.strip() | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": str(e) | |
| } |