Spaces:

Nick-2x
/

Lab_report_analysis

Sleeping

App Files Files Community

Nick-2x commited on Apr 17

Commit

6785323

verified ·

1 Parent(s): f398029

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -33

app.py CHANGED Viewed

@@ -1,42 +1,97 @@
-from fastapi import FastAPI, UploadFile, File
-# from transformers import AutoProcessor, AutoModelForVision2Seq,AutoModel
-from transformers import AutoProcessor,AutoModel
-from PIL import Image
-import torch
-import io
-app = FastAPI()
-MODEL_ID = "zai-org/GLM-OCR"
-print("Loading GLM-OCR model...")
-# processor = AutoProcessor.from_pretrained(MODEL_ID)
-# model = AutoModelForVision2Seq.from_pretrained(
-#     MODEL_ID,
-#     torch_dtype=torch.float32
-# )
 # processor = AutoProcessor.from_pretrained(
 #     MODEL_ID,
 #     trust_remote_code=True
 # )
-# model = AutoModelForVision2Seq.from_pretrained(
 #     MODEL_ID,
-#     trust_remote_code=True,
-#     torch_dtype=torch.float32
 # )
-processor = AutoProcessor.from_pretrained(
-    MODEL_ID,
-    trust_remote_code=True
-)
-model = AutoModel.from_pretrained(
     MODEL_ID,
-    trust_remote_code=True
-)
 @app.get("/")
 async def root():
@@ -45,24 +100,48 @@ async def root():
 @app.post("/ocr")
 async def extract_text(file: UploadFile = File(...)):
     try:
         contents = await file.read()
         image = Image.open(io.BytesIO(contents)).convert("RGB")
-        # inputs = processor(images=image, return_tensors="pt")
-        inputs = processor(
-        text="Extract all text from the document",
-        images=image,
-        return_tensors="pt"
-)
         with torch.no_grad():
-            outputs = model.generate(**inputs, max_new_tokens=1024)
-        text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
         return {
             "success": True,
-            "text": text
         }
     except Exception as e:

+# from fastapi import FastAPI, UploadFile, File
+# # from transformers import AutoProcessor, AutoModelForVision2Seq,AutoModel
+# from transformers import AutoProcessor,AutoModel
+# from PIL import Image
+# import torch
+# import io
+# app = FastAPI()
+# MODEL_ID = "zai-org/GLM-OCR"
+# print("Loading GLM-OCR model...")
+# # processor = AutoProcessor.from_pretrained(MODEL_ID)
+# # model = AutoModelForVision2Seq.from_pretrained(
+# #     MODEL_ID,
+# #     torch_dtype=torch.float32
+# # )
+# # processor = AutoProcessor.from_pretrained(
+# #     MODEL_ID,
+# #     trust_remote_code=True
+# # )
+# # model = AutoModelForVision2Seq.from_pretrained(
+# #     MODEL_ID,
+# #     trust_remote_code=True,
+# #     torch_dtype=torch.float32
+# # )
 # processor = AutoProcessor.from_pretrained(
 #     MODEL_ID,
 #     trust_remote_code=True
 # )
+# model = AutoModel.from_pretrained(
 #     MODEL_ID,
+#     trust_remote_code=True
 # )
+# @app.get("/")
+# async def root():
+#     return {"status": "GLM-OCR API is running"}
+# @app.post("/ocr")
+# async def extract_text(file: UploadFile = File(...)):
+#     try:
+#         contents = await file.read()
+#         image = Image.open(io.BytesIO(contents)).convert("RGB")
+#         # inputs = processor(images=image, return_tensors="pt")
+#         inputs = processor(
+#         text="Extract all text from the document",
+#         images=image,
+#         return_tensors="pt"
+# )
+#         with torch.no_grad():
+#             outputs = model.generate(**inputs, max_new_tokens=1024)
+#         text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+#         return {
+#             "success": True,
+#             "text": text
+#         }
+#     except Exception as e:
+#         return {
+#             "success": False,
+#             "error": str(e)
+#         }
+from fastapi import FastAPI, UploadFile, File
+from transformers import AutoProcessor, GlmOcrForConditionalGeneration
+from PIL import Image
+import torch
+import io
+app = FastAPI()
+MODEL_ID = "zai-org/GLM-OCR"
+print("Loading GLM-OCR model...")
+# Initialize Processor and Model specifically for GLM-OCR
+processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = GlmOcrForConditionalGeneration.from_pretrained(
     MODEL_ID,
+    trust_remote_code=True,
+    torch_dtype=torch.float32 # Use torch.bfloat16 if you have a GPU
+).eval()
 @app.get("/")
 async def root():
 @app.post("/ocr")
 async def extract_text(file: UploadFile = File(...)):
     try:
+        # Read and prepare image
         contents = await file.read()
         image = Image.open(io.BytesIO(contents)).convert("RGB")
+        # 1. Define the conversation structure
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": "Extract all text from this image."}
+                ],
+            }
+        ]
+        # 2. Use the chat template to prepare inputs
+        # This fixes the 'NoneType' error by providing valid input_ids
+        inputs = processor.apply_chat_template(
+            messages,
+            images=[image],
+            tokenize=True,
+            add_generation_prompt=True,
+            return_dict=True,
+            return_tensors="pt"
+        )
+        # 3. Generate
         with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                do_sample=False
+            )
+        # 4. Decode the result
+        # We slice the output to remove the prompt tokens and keep only the response
+        generated_ids = outputs[:, inputs['input_ids'].shape[1]:]
+        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         return {
             "success": True,
+            "text": text.strip()
         }
     except Exception as e: