Spaces:

cafierom
/

OCR

Sleeping

App Files Files Community

cafierom commited on Feb 18

Commit

0bcd9ba

verified ·

1 Parent(s): 1c1c78f

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -68

app.py CHANGED Viewed

@@ -1,69 +1,70 @@
-from transformers import AutoProcessor, AutoModelForImageTextToText
-import torch
-import gradio as gr
-import spaces
-MODEL_PATH = "zai-org/GLM-OCR"
-processor = AutoProcessor.from_pretrained(MODEL_PATH)
-model = AutoModelForImageTextToText.from_pretrained(
-    pretrained_model_name_or_path=MODEL_PATH,
-    torch_dtype="auto",
-    device_map="auto",
-)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-@spaces.GPU
-def read_img(img):
-  '''
-    Takes in an image file and returns the text recognized from the image.
-        Args:
-            img: the input image file
-        Returns:
-            output_text: a string of the text recognized from the image
-  '''
-  messages = [
-    {
-        "role": "user",
-        "content": [
-            {"type": "image",
-                "url": img},
-            {"type": "text",
-                "text": "Text Recognition:"}],
-    }
-    ]
-  inputs = processor.apply_chat_template(
-    messages,
-    tokenize=True,
-    add_generation_prompt=True,
-    return_dict=True,
-    return_tensors="pt"
-  ).to(model.device)
-  inputs.pop("token_type_ids", None)
-  generated_ids = model.generate(**inputs, max_new_tokens=8192)
-  output_text = processor.decode(generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
-  return output_text
-with gr.Blocks() as imgsmiles:
-  top = gr.Markdown(
-      """
-      # OCR with ZAI GLM
-      """)
-  agent_flag_choice = gr.Radio(choices = ['True', 'False'],label="Are you an Agent?", interactive=True, value='False', scale = 2)
-  with gr.Row():
-    inputs=gr.Image(type="filepath")
-    text_out = gr.Textbox(lines=2, label="Text Output")
-  submit_button = gr.Button("Submit")
-  clear_button = gr.ClearButton([inputs, text_out], value = "Clear")
-  # agent_button = gr.Button("Agent use only")
-  submit_button.click(read_img, [inputs], [text_out])
-  # agent_button.click(agent_read_img, [agent_flag_choice, inputs], [text_out, None])
 imgsmiles.launch(mcp_server=True)

+import spaces
+from transformers import AutoProcessor, AutoModelForImageTextToText
+import torch
+import gradio as gr
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_PATH = "zai-org/GLM-OCR"
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+model = AutoModelForImageTextToText.from_pretrained(
+    pretrained_model_name_or_path=MODEL_PATH,
+    torch_dtype="auto",
+    device_map="auto",
+)
+@spaces.GPU
+def read_img(img):
+  '''
+    Takes in an image file and returns the text recognized from the image.
+        Args:
+            img: the input image file
+        Returns:
+            output_text: a string of the text recognized from the image
+  '''
+  messages = [
+    {
+        "role": "user",
+        "content": [
+            {"type": "image",
+                "url": img},
+            {"type": "text",
+                "text": "Text Recognition:"}],
+    }
+    ]
+  inputs = processor.apply_chat_template(
+    messages,
+    tokenize=True,
+    add_generation_prompt=True,
+    return_dict=True,
+    return_tensors="pt"
+  )
+  inputs.pop("token_type_ids", None)
+  generated_ids = model.generate(**inputs, max_new_tokens=8192)
+  output_text = processor.decode(generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
+  return output_text
+with gr.Blocks() as imgsmiles:
+  top = gr.Markdown(
+      """
+      # OCR with ZAI GLM
+      """)
+  agent_flag_choice = gr.Radio(choices = ['True', 'False'],label="Are you an Agent?", interactive=True, value='False', scale = 2)
+  with gr.Row():
+    inputs=gr.Image(type="filepath")
+    text_out = gr.Textbox(lines=2, label="Text Output")
+  submit_button = gr.Button("Submit")
+  clear_button = gr.ClearButton([inputs, text_out], value = "Clear")
+  # agent_button = gr.Button("Agent use only")
+  submit_button.click(read_img, [inputs], [text_out])
+  # agent_button.click(agent_read_img, [agent_flag_choice, inputs], [text_out, None])
 imgsmiles.launch(mcp_server=True)