Soumik Bose commited on
Commit
3ddb265
·
1 Parent(s): 8be8190
Files changed (4) hide show
  1. .gitignore +126 -0
  2. Dockerfile +9 -12
  3. main.py +262 -22
  4. requirements.txt +3 -2
.gitignore ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment Variables
2
+ .env
3
+ .env.local
4
+ .env.*.local
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.so
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # Virtual Environments
31
+ venv/
32
+ env/
33
+ ENV/
34
+ env.bak/
35
+ venv.bak/
36
+ .venv
37
+
38
+ # IDEs
39
+ .vscode/
40
+ .idea/
41
+ *.swp
42
+ *.swo
43
+ *~
44
+ .DS_Store
45
+
46
+ # Jupyter Notebook
47
+ .ipynb_checkpoints
48
+
49
+ # PyCharm
50
+ .idea/
51
+
52
+ # Testing
53
+ .pytest_cache/
54
+ .coverage
55
+ .coverage.*
56
+ htmlcov/
57
+ .tox/
58
+ .nox/
59
+ coverage.xml
60
+ *.cover
61
+ .hypothesis/
62
+
63
+ # Logs
64
+ *.log
65
+ logs/
66
+ *.log.*
67
+
68
+ # Temporary files
69
+ *.tmp
70
+ *.temp
71
+ temp/
72
+ tmp/
73
+ *.bak
74
+
75
+ # OCR Processing files
76
+ uploads/
77
+ processed/
78
+ output/
79
+ samples/
80
+ *.pdf
81
+ *.jpg
82
+ *.jpeg
83
+ *.png
84
+ *.bmp
85
+ *.webp
86
+ *.tiff
87
+
88
+ # Docker
89
+ *.env.docker
90
+ docker-compose.override.yml
91
+
92
+ # OS
93
+ Thumbs.db
94
+ .DS_Store
95
+ *.swp
96
+
97
+ # Database
98
+ *.db
99
+ *.sqlite
100
+ *.sqlite3
101
+
102
+ # Cache
103
+ .cache/
104
+ *.cache
105
+ __pycache__/
106
+ .mypy_cache/
107
+ .dmypy.json
108
+ dmypy.json
109
+
110
+ # Model files (if downloading OCR models)
111
+ *.onnx
112
+ models/
113
+ weights/
114
+
115
+ # Hugging Face cache
116
+ .huggingface/
117
+
118
+ # Node modules (if using any JS tooling)
119
+ node_modules/
120
+
121
+ # Secrets and certificates
122
+ *.pem
123
+ *.key
124
+ *.crt
125
+ secrets/
126
+ credentials/
Dockerfile CHANGED
@@ -2,23 +2,15 @@ FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
- # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  curl \
8
- tesseract-ocr \
9
- tesseract-ocr-eng \
10
- tesseract-ocr-deu \
11
- tesseract-ocr-fra \
12
- tesseract-ocr-spa \
13
- tesseract-ocr-por \
14
- tesseract-ocr-ita \
15
- tesseract-ocr-rus \
16
- tesseract-ocr-chi-sim \
17
- tesseract-ocr-jpn \
18
- tesseract-ocr-kor \
19
  poppler-utils \
20
  libgl1 \
21
  libglib2.0-0 \
 
 
 
22
  && rm -rf /var/lib/apt/lists/*
23
 
24
  # Fix: Ensure logs appear immediately in the console
@@ -27,19 +19,24 @@ ENV PYTHONIOENCODING=UTF-8
27
  ENV HF_HOME=/tmp/cache
28
  ENV PORT=7860
29
 
 
30
  COPY requirements.txt .
31
  RUN pip install --upgrade pip setuptools wheel \
32
  && pip install --default-timeout=100 --retries=10 --no-cache-dir -r requirements.txt
33
 
 
34
  COPY . .
35
 
 
36
  RUN useradd -m appuser && chown -R appuser /app
37
  USER appuser
38
 
 
39
  RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME}
40
 
41
  EXPOSE $PORT
42
 
 
43
  CMD bash -c "\
44
  (while true; do curl -s https://xce009-ocr-api.hf.space >/dev/null; sleep 300; done) & \
45
  uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 4"
 
2
 
3
  WORKDIR /app
4
 
5
+ # Install system dependencies for RapidOCR and PDF processing
6
  RUN apt-get update && apt-get install -y \
7
  curl \
 
 
 
 
 
 
 
 
 
 
 
8
  poppler-utils \
9
  libgl1 \
10
  libglib2.0-0 \
11
+ libgomp1 \
12
+ gcc \
13
+ g++ \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  # Fix: Ensure logs appear immediately in the console
 
19
  ENV HF_HOME=/tmp/cache
20
  ENV PORT=7860
21
 
22
+ # Copy requirements and install dependencies
23
  COPY requirements.txt .
24
  RUN pip install --upgrade pip setuptools wheel \
25
  && pip install --default-timeout=100 --retries=10 --no-cache-dir -r requirements.txt
26
 
27
+ # Copy application files
28
  COPY . .
29
 
30
+ # Create non-root user
31
  RUN useradd -m appuser && chown -R appuser /app
32
  USER appuser
33
 
34
+ # Create cache directory
35
  RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME}
36
 
37
  EXPOSE $PORT
38
 
39
+ # Start the application
40
  CMD bash -c "\
41
  (while true; do curl -s https://xce009-ocr-api.hf.space >/dev/null; sleep 300; done) & \
42
  uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 4"
main.py CHANGED
@@ -11,7 +11,9 @@ from contextvars import ContextVar
11
 
12
  # Third-party imports
13
  import uvicorn
14
- import pytesseract
 
 
15
  from fastapi import (
16
  FastAPI, File, UploadFile, Depends,
17
  HTTPException, Request, status
@@ -39,10 +41,16 @@ class Config:
39
  MAX_SIZE = int(os.getenv("MAX_FILE_SIZE", 52428800)) # 50MB
40
  ALLOWED_ORIGINS = [o.strip() for o in os.getenv("ALLOWED_ORIGINS", "").split(",") if o.strip()]
41
  ALLOWED_TYPES = ["image/jpeg", "image/png", "image/bmp", "image/webp", "application/pdf"]
 
 
 
 
 
 
 
42
 
43
  class RequestIdFilter(logging.Filter):
44
  def filter(self, record):
45
- # Automatically pull request_id from the context variable
46
  record.request_id = request_id_ctx.get()
47
  return True
48
 
@@ -50,7 +58,7 @@ logging.basicConfig(
50
  level=logging.INFO,
51
  format='%(asctime)s | %(levelname)s | ReqID:%(request_id)s | %(message)s',
52
  datefmt='%Y-%m-%d %H:%M:%S',
53
- force=True # Ensures our config is applied
54
  )
55
  logger = logging.getLogger("ocr_api")
56
  logger.addFilter(RequestIdFilter())
@@ -72,6 +80,8 @@ class PageResult(BaseModel):
72
  index: int
73
  page_number: int
74
  text: str
 
 
75
 
76
  class OCRResult(BaseModel):
77
  filename: str
@@ -79,6 +89,7 @@ class OCRResult(BaseModel):
79
  saved_file_path: str
80
  total_pages: int
81
  pages_content: List[PageResult]
 
82
 
83
  class APIResponse(BaseResponse):
84
  data: Optional[OCRResult] = None
@@ -116,12 +127,146 @@ class FileValidator:
116
  raise HTTPException(413, "File too large")
117
  return tmp_path
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  class OCRProcessor:
120
- @classmethod
121
- def process_file(cls, file_path: str, content_type: str) -> dict:
122
- """Note: No longer passing request_id; logger picks it up from contextvars automatically."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  start = time.perf_counter()
124
  pages_content = []
 
125
 
126
  try:
127
  logger.info(f"Processing File: {file_path}")
@@ -134,16 +279,54 @@ class OCRProcessor:
134
  for idx, img in enumerate(images):
135
  page_num = idx + 1
136
  logger.info(f"Scanning Page {page_num}/{total}")
137
- text = pytesseract.image_to_string(img).strip()
138
- pages_content.append({"index": idx, "page_number": page_num, "text": text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  else:
140
  logger.info("Scanning Single Image...")
141
- img = Image.open(file_path)
142
- text = pytesseract.image_to_string(img).strip()
143
- pages_content.append({"index": 0, "page_number": 1, "text": text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- logger.info(f"OCR Complete in {(time.perf_counter()-start)*1000:.2f}ms")
146
- return {"total_pages": len(pages_content), "pages_content": pages_content}
 
 
 
 
 
 
 
147
 
148
  except Exception as e:
149
  logger.error(f"OCR Logic Failure: {str(e)}")
@@ -180,7 +363,14 @@ async def request_context_middleware(request: Request, call_next):
180
  return response
181
  except Exception as e:
182
  logger.exception("Middleware caught crash")
183
- return JSONResponse(status_code=500, content={"status":"error","message":"Internal Server Error","request_id":req_id})
 
 
 
 
 
 
 
184
  finally:
185
  # 3. Clean up Context
186
  request_id_ctx.reset(token)
@@ -195,9 +385,34 @@ async def root(request: Request):
195
  "request_id": request.state.request_id,
196
  "process_time_ms": 0,
197
  "status": StatusEnum.SUCCESS,
198
- "message": "OCR API Active"
 
 
199
  }
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  @app.post("/api/v1/get_data", response_model=APIResponse)
202
  async def extract_data(
203
  request: Request,
@@ -212,10 +427,11 @@ async def extract_data(
212
  FileValidator.validate(file)
213
  tmp_path = FileValidator.check_size_and_save(file)
214
 
215
- # CPU heavy task run in thread pool.
216
- # ContextVars are automatically copied to the thread.
 
217
  result = await run_in_threadpool(
218
- OCRProcessor.process_file,
219
  tmp_path,
220
  file.content_type
221
  )
@@ -230,7 +446,8 @@ async def extract_data(
230
  "content_type": file.content_type,
231
  "saved_file_path": tmp_path,
232
  "total_pages": result["total_pages"],
233
- "pages_content": result["pages_content"]
 
234
  }
235
  }
236
 
@@ -250,7 +467,30 @@ async def extract_data(
250
  if tmp_path:
251
  logger.info(f"File preserved at: {tmp_path}")
252
  try:
253
- os.remove(tmp_path)
254
- logger.info(f"Temporary file deleted: {tmp_path}")
255
  except Exception as e:
256
- logger.warning(f"Failed to delete temp file: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Third-party imports
13
  import uvicorn
14
+ import cv2
15
+ import numpy as np
16
+ from rapidocr_onnxruntime import RapidOCR
17
  from fastapi import (
18
  FastAPI, File, UploadFile, Depends,
19
  HTTPException, Request, status
 
41
  MAX_SIZE = int(os.getenv("MAX_FILE_SIZE", 52428800)) # 50MB
42
  ALLOWED_ORIGINS = [o.strip() for o in os.getenv("ALLOWED_ORIGINS", "").split(",") if o.strip()]
43
  ALLOWED_TYPES = ["image/jpeg", "image/png", "image/bmp", "image/webp", "application/pdf"]
44
+
45
+ # RapidOCR Settings
46
+ USE_ANGLE_CLS = os.getenv("OCR_USE_ANGLE_CLS", "true").lower() == "true"
47
+ USE_TEXT_SCORE = os.getenv("OCR_USE_TEXT_SCORE", "true").lower() == "true"
48
+ MIN_HEIGHT = int(os.getenv("OCR_MIN_HEIGHT", "30"))
49
+ TEXT_SCORE_THRESHOLD = float(os.getenv("OCR_TEXT_SCORE", "0.5"))
50
+ ENABLE_PREPROCESSING = os.getenv("OCR_PREPROCESSING", "true").lower() == "true"
51
 
52
  class RequestIdFilter(logging.Filter):
53
  def filter(self, record):
 
54
  record.request_id = request_id_ctx.get()
55
  return True
56
 
 
58
  level=logging.INFO,
59
  format='%(asctime)s | %(levelname)s | ReqID:%(request_id)s | %(message)s',
60
  datefmt='%Y-%m-%d %H:%M:%S',
61
+ force=True
62
  )
63
  logger = logging.getLogger("ocr_api")
64
  logger.addFilter(RequestIdFilter())
 
80
  index: int
81
  page_number: int
82
  text: str
83
+ confidence: Optional[float] = None
84
+ lines_detected: Optional[int] = None
85
 
86
  class OCRResult(BaseModel):
87
  filename: str
 
89
  saved_file_path: str
90
  total_pages: int
91
  pages_content: List[PageResult]
92
+ average_confidence: Optional[float] = None
93
 
94
  class APIResponse(BaseResponse):
95
  data: Optional[OCRResult] = None
 
127
  raise HTTPException(413, "File too large")
128
  return tmp_path
129
 
130
+ class RapidOCREngine:
131
+ """Singleton RapidOCR engine for efficient reuse"""
132
+ _instance = None
133
+ _engine = None
134
+
135
+ def __new__(cls):
136
+ if cls._instance is None:
137
+ cls._instance = super().__new__(cls)
138
+ cls._instance._initialize_engine()
139
+ return cls._instance
140
+
141
+ def _initialize_engine(self):
142
+ """Initialize RapidOCR with optimized settings"""
143
+ try:
144
+ self._engine = RapidOCR(
145
+ det_use_cuda=False,
146
+ cls_use_cuda=False,
147
+ rec_use_cuda=False,
148
+ use_angle_cls=Config.USE_ANGLE_CLS,
149
+ use_text_score=Config.USE_TEXT_SCORE,
150
+ print_verbose=False,
151
+ min_height=Config.MIN_HEIGHT,
152
+ text_score=Config.TEXT_SCORE_THRESHOLD
153
+ )
154
+ logger.info("RapidOCR engine initialized successfully")
155
+ except Exception as e:
156
+ logger.error(f"Failed to initialize RapidOCR: {str(e)}")
157
+ raise
158
+
159
+ def get_engine(self):
160
+ return self._engine
161
+
162
+ @staticmethod
163
+ def preprocess_image(img_array):
164
+ """Enhanced preprocessing for better accuracy"""
165
+ if not Config.ENABLE_PREPROCESSING:
166
+ return img_array
167
+
168
+ try:
169
+ # Convert to grayscale if needed
170
+ if len(img_array.shape) == 3:
171
+ gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
172
+ else:
173
+ gray = img_array
174
+
175
+ # Denoise
176
+ denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
177
+
178
+ # Enhance contrast using CLAHE
179
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
180
+ contrast = clahe.apply(denoised)
181
+
182
+ # Sharpen
183
+ kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
184
+ sharpened = cv2.filter2D(contrast, -1, kernel)
185
+
186
+ # Adaptive threshold
187
+ processed = cv2.adaptiveThreshold(
188
+ sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
189
+ cv2.THRESH_BINARY, 11, 2
190
+ )
191
+
192
+ return processed
193
+ except Exception as e:
194
+ logger.warning(f"Preprocessing failed, using original image: {str(e)}")
195
+ return img_array
196
+
197
  class OCRProcessor:
198
+ def __init__(self):
199
+ self.ocr_engine = RapidOCREngine().get_engine()
200
+
201
+ def _extract_from_image(self, img_array) -> dict:
202
+ """Extract text from a single image using RapidOCR"""
203
+ try:
204
+ # Preprocess image
205
+ processed_img = RapidOCREngine.preprocess_image(img_array)
206
+
207
+ # Perform OCR
208
+ result, elapse = self.ocr_engine(processed_img)
209
+
210
+ if result is None or len(result) == 0:
211
+ return {
212
+ "text": "",
213
+ "confidence": 0.0,
214
+ "lines_detected": 0
215
+ }
216
+
217
+ # Parse results
218
+ texts = []
219
+ confidences = []
220
+
221
+ for line in result:
222
+ try:
223
+ if isinstance(line, (list, tuple)):
224
+ if len(line) == 2:
225
+ # [box, text] or [text, confidence]
226
+ if isinstance(line[0], (list, tuple)):
227
+ _, text = line
228
+ confidence = 1.0
229
+ else:
230
+ text, confidence = line
231
+ elif len(line) == 3:
232
+ # [box, text, confidence]
233
+ _, text, confidence = line
234
+ elif len(line) >= 4:
235
+ _, text, confidence = line[0], line[1], line[2]
236
+ else:
237
+ continue
238
+
239
+ texts.append(str(text))
240
+ confidences.append(float(confidence) if confidence is not None else 1.0)
241
+ except Exception as e:
242
+ logger.debug(f"Skipping malformed line: {e}")
243
+ continue
244
+
245
+ if not texts:
246
+ return {
247
+ "text": "",
248
+ "confidence": 0.0,
249
+ "lines_detected": 0
250
+ }
251
+
252
+ combined_text = '\n'.join(texts)
253
+ avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
254
+
255
+ return {
256
+ "text": combined_text,
257
+ "confidence": avg_confidence,
258
+ "lines_detected": len(texts)
259
+ }
260
+
261
+ except Exception as e:
262
+ logger.error(f"Image OCR extraction failed: {str(e)}")
263
+ raise ValueError(f"OCR extraction error: {str(e)}")
264
+
265
+ def process_file(self, file_path: str, content_type: str) -> dict:
266
+ """Process PDF or image file and extract text"""
267
  start = time.perf_counter()
268
  pages_content = []
269
+ all_confidences = []
270
 
271
  try:
272
  logger.info(f"Processing File: {file_path}")
 
279
  for idx, img in enumerate(images):
280
  page_num = idx + 1
281
  logger.info(f"Scanning Page {page_num}/{total}")
282
+
283
+ # Convert PIL Image to numpy array for OpenCV
284
+ img_array = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
285
+
286
+ # Extract text
287
+ ocr_result = self._extract_from_image(img_array)
288
+
289
+ pages_content.append({
290
+ "index": idx,
291
+ "page_number": page_num,
292
+ "text": ocr_result["text"],
293
+ "confidence": ocr_result["confidence"],
294
+ "lines_detected": ocr_result["lines_detected"]
295
+ })
296
+
297
+ if ocr_result["confidence"] > 0:
298
+ all_confidences.append(ocr_result["confidence"])
299
  else:
300
  logger.info("Scanning Single Image...")
301
+
302
+ # Read image with OpenCV
303
+ img_array = cv2.imread(file_path)
304
+ if img_array is None:
305
+ raise ValueError("Failed to load image file")
306
+
307
+ # Extract text
308
+ ocr_result = self._extract_from_image(img_array)
309
+
310
+ pages_content.append({
311
+ "index": 0,
312
+ "page_number": 1,
313
+ "text": ocr_result["text"],
314
+ "confidence": ocr_result["confidence"],
315
+ "lines_detected": ocr_result["lines_detected"]
316
+ })
317
+
318
+ if ocr_result["confidence"] > 0:
319
+ all_confidences.append(ocr_result["confidence"])
320
 
321
+ avg_confidence = sum(all_confidences) / len(all_confidences) if all_confidences else 0.0
322
+
323
+ logger.info(f"OCR Complete in {(time.perf_counter()-start)*1000:.2f}ms | Avg Confidence: {avg_confidence:.2%}")
324
+
325
+ return {
326
+ "total_pages": len(pages_content),
327
+ "pages_content": pages_content,
328
+ "average_confidence": avg_confidence
329
+ }
330
 
331
  except Exception as e:
332
  logger.error(f"OCR Logic Failure: {str(e)}")
 
363
  return response
364
  except Exception as e:
365
  logger.exception("Middleware caught crash")
366
+ return JSONResponse(
367
+ status_code=500,
368
+ content={
369
+ "status": "error",
370
+ "message": "Internal Server Error",
371
+ "request_id": req_id
372
+ }
373
+ )
374
  finally:
375
  # 3. Clean up Context
376
  request_id_ctx.reset(token)
 
385
  "request_id": request.state.request_id,
386
  "process_time_ms": 0,
387
  "status": StatusEnum.SUCCESS,
388
+ "message": "RapidOCR API Active",
389
+ "engine": "RapidOCR",
390
+ "version": "1.0.0"
391
  }
392
 
393
+ @app.get("/health")
394
+ async def health_check(request: Request):
395
+ """Health check endpoint"""
396
+ try:
397
+ # Verify OCR engine is initialized
398
+ engine = RapidOCREngine().get_engine()
399
+ return {
400
+ "request_id": request.state.request_id,
401
+ "status": StatusEnum.SUCCESS,
402
+ "message": "Service healthy",
403
+ "ocr_engine": "ready"
404
+ }
405
+ except Exception as e:
406
+ return JSONResponse(
407
+ status_code=503,
408
+ content={
409
+ "request_id": request.state.request_id,
410
+ "status": StatusEnum.ERROR,
411
+ "message": "Service unhealthy",
412
+ "error": str(e)
413
+ }
414
+ )
415
+
416
  @app.post("/api/v1/get_data", response_model=APIResponse)
417
  async def extract_data(
418
  request: Request,
 
427
  FileValidator.validate(file)
428
  tmp_path = FileValidator.check_size_and_save(file)
429
 
430
+ # CPU heavy task run in thread pool
431
+ # ContextVars are automatically copied to the thread
432
+ processor = OCRProcessor()
433
  result = await run_in_threadpool(
434
+ processor.process_file,
435
  tmp_path,
436
  file.content_type
437
  )
 
446
  "content_type": file.content_type,
447
  "saved_file_path": tmp_path,
448
  "total_pages": result["total_pages"],
449
+ "pages_content": result["pages_content"],
450
+ "average_confidence": result.get("average_confidence", 0.0)
451
  }
452
  }
453
 
 
467
  if tmp_path:
468
  logger.info(f"File preserved at: {tmp_path}")
469
  try:
470
+ os.remove(tmp_path)
471
+ logger.info(f"Temporary file deleted: {tmp_path}")
472
  except Exception as e:
473
+ logger.warning(f"Failed to delete temp file: {str(e)}")
474
+
475
+ # ==========================================
476
+ # 6. STARTUP
477
+ # ==========================================
478
+
479
+ @app.on_event("startup")
480
+ async def startup_event():
481
+ """Initialize OCR engine on startup"""
482
+ logger.info("Starting OCR API with RapidOCR engine...")
483
+ try:
484
+ RapidOCREngine() # Initialize singleton
485
+ logger.info("RapidOCR engine ready")
486
+ except Exception as e:
487
+ logger.error(f"Failed to initialize OCR engine: {str(e)}")
488
+ raise
489
+
490
+ if __name__ == "__main__":
491
+ uvicorn.run(
492
+ "main:app",
493
+ host="0.0.0.0",
494
+ port=int(os.getenv("PORT", 7860)),
495
+ workers=4
496
+ )
requirements.txt CHANGED
@@ -5,8 +5,9 @@ python-dotenv>=1.0
5
  aiohttp==3.11.13
6
  requests==2.32.3
7
  pypdf==5.1.0
8
- pytesseract==0.3.13
9
  opencv-python-headless==4.12.0.88
10
  numpy<2.3.0
11
  pdf2image==1.17.0
12
- Pillow==11.2.1
 
 
 
5
  aiohttp==3.11.13
6
  requests==2.32.3
7
  pypdf==5.1.0
 
8
  opencv-python-headless==4.12.0.88
9
  numpy<2.3.0
10
  pdf2image==1.17.0
11
+ Pillow==11.2.1
12
+ rapidocr-onnxruntime>=1.3.0
13
+ onnxruntime>=1.16.0