Alfonso Velasco commited on
Commit
67b16f3
·
1 Parent(s): f7708ca

Fix Tesseract version parsing and OMP_NUM_THREADS error

Browse files
Files changed (2) hide show
  1. app.py +71 -26
  2. requirements.txt +1 -1
app.py CHANGED
@@ -8,20 +8,38 @@ import io
8
  import base64
9
  import fitz # PyMuPDF
10
  import tempfile
 
 
 
 
11
 
12
  app = FastAPI()
13
 
14
- # Initialize model on startup
15
- processor = LayoutLMv3Processor.from_pretrained(
16
- "microsoft/layoutlmv3-base",
17
- apply_ocr=True # OCR will work with Tesseract installed
18
- )
19
- model = LayoutLMv3ForTokenClassification.from_pretrained(
20
- "microsoft/layoutlmv3-base"
21
- )
22
- model.eval()
23
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
- model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  class DocumentRequest(BaseModel):
27
  pdf: str = None
@@ -68,14 +86,27 @@ def process_pdf(pdf_bytes):
68
  img_data = pix.tobytes("png")
69
  image = Image.open(io.BytesIO(img_data)).convert("RGB")
70
 
71
- # Process with LayoutLMv3
72
- encoding = processor(
73
- image,
74
- truncation=True,
75
- padding="max_length",
76
- max_length=512,
77
- return_tensors="pt"
78
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  encoding = {k: v.to(device) for k, v in encoding.items() if isinstance(v, torch.Tensor)}
81
 
@@ -104,6 +135,7 @@ def process_pdf(pdf_bytes):
104
  })
105
 
106
  pdf_document.close()
 
107
 
108
  return {
109
  "document_type": "pdf",
@@ -115,13 +147,26 @@ def process_image(image_bytes):
115
  """Process single image"""
116
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
117
 
118
- encoding = processor(
119
- image,
120
- truncation=True,
121
- padding="max_length",
122
- max_length=512,
123
- return_tensors="pt"
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  encoding = {k: v.to(device) for k, v in encoding.items() if isinstance(v, torch.Tensor)}
127
 
 
8
  import base64
9
  import fitz # PyMuPDF
10
  import tempfile
11
+ import os
12
+
13
+ # Fix the OMP_NUM_THREADS issue
14
+ os.environ['OMP_NUM_THREADS'] = '1'
15
 
16
  app = FastAPI()
17
 
18
+ # Initialize model on startup with error handling
19
+ try:
20
+ processor = LayoutLMv3Processor.from_pretrained(
21
+ "microsoft/layoutlmv3-base",
22
+ apply_ocr=True
23
+ )
24
+ model = LayoutLMv3ForTokenClassification.from_pretrained(
25
+ "microsoft/layoutlmv3-base"
26
+ )
27
+ model.eval()
28
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ model.to(device)
30
+ except Exception as e:
31
+ print(f"Error loading model: {e}")
32
+ # Fallback to no OCR if there's an issue
33
+ processor = LayoutLMv3Processor.from_pretrained(
34
+ "microsoft/layoutlmv3-base",
35
+ apply_ocr=False
36
+ )
37
+ model = LayoutLMv3ForTokenClassification.from_pretrained(
38
+ "microsoft/layoutlmv3-base"
39
+ )
40
+ model.eval()
41
+ device = torch.device("cpu")
42
+ model.to(device)
43
 
44
  class DocumentRequest(BaseModel):
45
  pdf: str = None
 
86
  img_data = pix.tobytes("png")
87
  image = Image.open(io.BytesIO(img_data)).convert("RGB")
88
 
89
+ try:
90
+ # Try with OCR
91
+ encoding = processor(
92
+ image,
93
+ truncation=True,
94
+ padding="max_length",
95
+ max_length=512,
96
+ return_tensors="pt"
97
+ )
98
+ except Exception as ocr_error:
99
+ print(f"OCR failed: {ocr_error}, using fallback")
100
+ # Fallback: process without OCR
101
+ encoding = processor(
102
+ image,
103
+ text=[""] * 512, # Dummy text
104
+ boxes=[[0, 0, 0, 0]] * 512, # Dummy boxes
105
+ truncation=True,
106
+ padding="max_length",
107
+ max_length=512,
108
+ return_tensors="pt"
109
+ )
110
 
111
  encoding = {k: v.to(device) for k, v in encoding.items() if isinstance(v, torch.Tensor)}
112
 
 
135
  })
136
 
137
  pdf_document.close()
138
+ os.unlink(tmp_file.name) # Clean up temp file
139
 
140
  return {
141
  "document_type": "pdf",
 
147
  """Process single image"""
148
  image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
149
 
150
+ try:
151
+ encoding = processor(
152
+ image,
153
+ truncation=True,
154
+ padding="max_length",
155
+ max_length=512,
156
+ return_tensors="pt"
157
+ )
158
+ except Exception as e:
159
+ print(f"OCR failed: {e}, using fallback")
160
+ # Fallback without OCR
161
+ encoding = processor(
162
+ image,
163
+ text=[""] * 512,
164
+ boxes=[[0, 0, 0, 0]] * 512,
165
+ truncation=True,
166
+ padding="max_length",
167
+ max_length=512,
168
+ return_tensors="pt"
169
+ )
170
 
171
  encoding = {k: v.to(device) for k, v in encoding.items() if isinstance(v, torch.Tensor)}
172
 
requirements.txt CHANGED
@@ -3,6 +3,6 @@ uvicorn[standard]
3
  transformers>=4.35.0
4
  torch>=2.0.0
5
  pillow>=9.0.0
6
- pytesseract>=0.3.10
7
  pymupdf>=1.23.0
8
  pydantic
 
3
  transformers>=4.35.0
4
  torch>=2.0.0
5
  pillow>=9.0.0
6
+ pytesseract==0.3.10
7
  pymupdf>=1.23.0
8
  pydantic