sharshar1 commited on
Commit
df154dc
·
verified ·
1 Parent(s): d139163

Upload 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. main.py +9 -12
  3. requirements.txt +3 -1
Dockerfile CHANGED
@@ -8,13 +8,13 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
8
  WORKDIR /app
9
 
10
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
11
  libgl1 \
12
  libglib2.0-0 \
13
  libsm6 \
14
  libxext6 \
15
  libxrender1 \
16
  libgomp1 \
17
- poppler-utils \
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  COPY requirements.txt .
 
8
  WORKDIR /app
9
 
10
  RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ poppler-utils \
12
  libgl1 \
13
  libglib2.0-0 \
14
  libsm6 \
15
  libxext6 \
16
  libxrender1 \
17
  libgomp1 \
 
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  COPY requirements.txt .
main.py CHANGED
@@ -31,28 +31,25 @@ app.add_middleware(
31
 
32
  @app.on_event("startup")
33
  async def startup_event():
34
- """Preload models on startup to avoid delays on first request."""
35
- try:
36
- print("Loading OCR models...")
37
- get_models()
38
- print("Models loaded successfully!")
39
- except Exception as e:
40
- print(f"Warning: Could not preload models: {e}")
41
- print("Models will be loaded on first request.")
42
 
43
 
44
  def get_models():
45
- """Lazy load PaddleX models."""
46
  global paddle_detector, paddle_recognizer
47
-
48
  if paddle_detector is None or paddle_recognizer is None:
49
  try:
50
  from paddlex import create_model
 
51
  paddle_detector = create_model("PP-OCRv5_server_det")
52
  paddle_recognizer = create_model("arabic_PP-OCRv5_mobile_rec")
 
53
  except Exception as e:
54
- raise RuntimeError(f"Failed to load OCR models: {str(e)}")
55
-
 
 
 
56
  return paddle_detector, paddle_recognizer
57
 
58
 
 
31
 
32
  @app.on_event("startup")
33
  async def startup_event():
34
+ print("Server started. OCR models will be loaded lazily on first request.")
 
 
 
 
 
 
 
35
 
36
 
37
  def get_models():
 
38
  global paddle_detector, paddle_recognizer
39
+
40
  if paddle_detector is None or paddle_recognizer is None:
41
  try:
42
  from paddlex import create_model
43
+ print("Loading PaddleX OCR models...")
44
  paddle_detector = create_model("PP-OCRv5_server_det")
45
  paddle_recognizer = create_model("arabic_PP-OCRv5_mobile_rec")
46
+ print("Models loaded.")
47
  except Exception as e:
48
+ raise HTTPException(
49
+ status_code=500,
50
+ detail=f"OCR models failed to load: {str(e)}"
51
+ )
52
+
53
  return paddle_detector, paddle_recognizer
54
 
55
 
requirements.txt CHANGED
@@ -8,4 +8,6 @@ pdf2image
8
  paddlepaddle
9
  paddlex
10
  opencv-contrib-python
11
- pypdfium2
 
 
 
8
  paddlepaddle
9
  paddlex
10
  opencv-contrib-python
11
+ pypdfium2
12
+ pyclipper
13
+ shapely