mrrtmob commited on
Commit
5789876
·
verified ·
1 Parent(s): 0f5e761

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -40
app.py CHANGED
@@ -1,45 +1,19 @@
1
  import gradio as gr
2
- try:
3
- import spaces
4
- except ImportError:
5
- class spaces:
6
- @staticmethod
7
- def GPU(func):
8
- return func
9
-
10
  from kiri_ocr import OCR
11
  from PIL import Image, ImageDraw
12
  import numpy as np
13
- import torch
14
 
15
- # Initialize OCR and force FP32
16
  try:
17
  print("Loading Kiri OCR model...")
18
- print(f"PyTorch version: {torch.__version__}")
19
- print(f"CUDA available: {torch.cuda.is_available()}")
20
-
21
- # Load model normally
22
- ocr = OCR(verbose=True, device="cuda")
23
-
24
- # FORCE FP32 after loading
25
- if hasattr(ocr, 'model') and ocr.model is not None:
26
- print("Converting model to FP32...")
27
- ocr.model = ocr.model.float()
28
-
29
- # Also update the config to prevent future conversions
30
- if hasattr(ocr, 'transformer_cfg'):
31
- ocr.transformer_cfg.USE_FP16 = False
32
- print("✓ Disabled FP16 in config")
33
-
34
- print("✓ Model loaded successfully on GPU with FP32")
35
-
36
  except Exception as e:
37
  print(f"Error loading model: {e}")
38
- import traceback
39
- traceback.print_exc()
40
  ocr = None
41
 
42
- @spaces.GPU
43
  def process_image(image_path):
44
  if ocr is None:
45
  return None, "Error: OCR model failed to load."
@@ -49,14 +23,8 @@ def process_image(image_path):
49
 
50
  try:
51
  print(f"Processing image: {image_path}")
52
-
53
- # Ensure model is in FP32 mode before processing
54
- if hasattr(ocr, 'model'):
55
- ocr.model = ocr.model.float()
56
-
57
- # Disable autocast to prevent automatic FP16
58
- with torch.cuda.amp.autocast(enabled=False):
59
- text, results = ocr.extract_text(image_path, verbose=True)
60
 
61
  print(f"Extracted {len(results)} regions.")
62
 
@@ -71,6 +39,7 @@ def process_image(image_path):
71
  for item in results:
72
  if 'box' in item:
73
  x, y, w, h = item['box']
 
74
  x, y, w, h = int(x), int(y), int(w), int(h)
75
  draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
76
 
@@ -95,4 +64,4 @@ demo = gr.Interface(
95
  )
96
 
97
  if __name__ == "__main__":
98
- demo.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
  from kiri_ocr import OCR
3
  from PIL import Image, ImageDraw
4
  import numpy as np
5
+ import os
6
 
7
+ # Initialize OCR
8
  try:
9
  print("Loading Kiri OCR model...")
10
+ # Use verbose=True to see what's happening
11
+ ocr = OCR(verbose=True)
12
+ print("Model loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  except Exception as e:
14
  print(f"Error loading model: {e}")
 
 
15
  ocr = None
16
 
 
17
  def process_image(image_path):
18
  if ocr is None:
19
  return None, "Error: OCR model failed to load."
 
23
 
24
  try:
25
  print(f"Processing image: {image_path}")
26
+ # extract_text returns (text, results)
27
+ text, results = ocr.extract_text(image_path, verbose=True)
 
 
 
 
 
 
28
 
29
  print(f"Extracted {len(results)} regions.")
30
 
 
39
  for item in results:
40
  if 'box' in item:
41
  x, y, w, h = item['box']
42
+ # Ensure coordinates are ints
43
  x, y, w, h = int(x), int(y), int(w), int(h)
44
  draw.rectangle([x, y, x + w, y + h], outline="red", width=3)
45
 
 
64
  )
65
 
66
  if __name__ == "__main__":
67
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)