Antharee commited on
Commit
eaa18ed
·
verified ·
1 Parent(s): ef180e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,28 +1,36 @@
 
1
  import torch
2
- from PIL import Image
3
  import gradio as gr
 
4
  from transformers import AutoProcessor, AutoModelForVision2Seq
5
 
6
- processor = AutoProcessor.from_pretrained("scb10x/typhoon-ocr-3b", use_auth_token=False)
 
 
 
 
 
 
 
 
7
  model = AutoModelForVision2Seq.from_pretrained(
8
  "scb10x/typhoon-ocr-3b",
9
  torch_dtype=torch.float16,
10
  device_map="auto",
11
- use_auth_token=False
12
  )
13
 
14
  def ocr_infer(image):
 
 
15
  try:
16
- if image is None:
17
- return "❌ Error: No image provided"
18
-
19
  image = image.convert("RGB")
20
  inputs = processor(images=image, return_tensors="pt")
21
-
22
  if inputs is None or "pixel_values" not in inputs:
23
  return "❌ Error: Invalid processor output"
24
-
25
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
26
  generated_ids = model.generate(**inputs, max_new_tokens=256)
27
  result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
28
  return result
@@ -30,11 +38,5 @@ def ocr_infer(image):
30
  except Exception as e:
31
  return f"❌ Error during inference: {e}"
32
 
33
- iface = gr.Interface(
34
- fn=ocr_infer,
35
- inputs=gr.Image(type="pil"),
36
- outputs="text",
37
- title="Typhoon OCR 3B"
38
- )
39
-
40
  iface.launch()
 
1
+ import os
2
  import torch
 
3
  import gradio as gr
4
+ from PIL import Image
5
  from transformers import AutoProcessor, AutoModelForVision2Seq
6
 
7
+ hf_token = os.getenv("HUGGINGFACE_TOKEN")
8
+
9
+ token_args = {}
10
+ if hf_token:
11
+ token_args = {"use_auth_token": hf_token}
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ processor = AutoProcessor.from_pretrained("scb10x/typhoon-ocr-3b", **token_args)
16
  model = AutoModelForVision2Seq.from_pretrained(
17
  "scb10x/typhoon-ocr-3b",
18
  torch_dtype=torch.float16,
19
  device_map="auto",
20
+ **token_args
21
  )
22
 
23
  def ocr_infer(image):
24
+ if image is None:
25
+ return "❌ Error: No image provided"
26
  try:
 
 
 
27
  image = image.convert("RGB")
28
  inputs = processor(images=image, return_tensors="pt")
29
+
30
  if inputs is None or "pixel_values" not in inputs:
31
  return "❌ Error: Invalid processor output"
32
+
33
+ inputs = {k: v.to(device) for k, v in inputs.items()}
34
  generated_ids = model.generate(**inputs, max_new_tokens=256)
35
  result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
36
  return result
 
38
  except Exception as e:
39
  return f"❌ Error during inference: {e}"
40
 
41
+ iface = gr.Interface(fn=ocr_infer, inputs=gr.Image(type="pil"), outputs="text", title="Typhoon OCR 3B")
 
 
 
 
 
 
42
  iface.launch()