marcosremar2 commited on
Commit
b50c572
·
1 Parent(s): 85c59d5

Simplify Docker setup and dependencies for minimal API version

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -33
  2. entrypoint.sh +3 -108
  3. requirements.txt +1 -9
Dockerfile CHANGED
@@ -3,14 +3,7 @@ FROM python:3.10-slim
3
  # Install system dependencies
4
  RUN apt-get update && \
5
  apt-get install -y --no-install-recommends \
6
- build-essential \
7
  curl \
8
- libgl1-mesa-glx \
9
- libglib2.0-0 \
10
- wget \
11
- git \
12
- libopenblas-dev \
13
- libgomp1 \
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  # Set working directory
@@ -20,20 +13,11 @@ WORKDIR /app
20
  ENV PYTHONUNBUFFERED=1 \
21
  PYTHONDONTWRITEBYTECODE=1 \
22
  UPLOAD_FOLDER=/tmp/pdf_uploads \
23
- OUTPUT_FOLDER=/tmp/pdf_output \
24
- HF_HUB_DISABLE_PROGRESS_BARS=1 \
25
- HF_HUB_ENABLE_HF_TRANSFER=0
26
 
27
  # Create necessary directories and set permissions
28
  RUN mkdir -p /tmp/pdf_uploads /tmp/pdf_output \
29
- && mkdir -p /tmp/models/MFD/YOLO \
30
- && mkdir -p /tmp/models/MFR/unimernet \
31
- && mkdir -p /tmp/models/table/rapid \
32
- && mkdir -p /tmp/models/layout/doclayout \
33
- && chmod -R 777 /tmp/pdf_uploads /tmp/pdf_output /tmp/models
34
-
35
- # Create magic-pdf config directories
36
- RUN mkdir -p /root/.config/magic_pdf
37
 
38
  # Copy requirements and app files
39
  COPY requirements.txt /app/
@@ -42,23 +26,8 @@ COPY entrypoint.sh /app/
42
  RUN chmod +x /app/entrypoint.sh
43
 
44
  # Install Python dependencies
45
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel
46
  RUN pip install --no-cache-dir -r requirements.txt
47
 
48
- # Copy default magic-pdf config
49
- RUN echo '{"device-mode":"cpu","layout-config":{"model":"doclayout_yolo","enable":true},"formula-config":{"mfd_model":"yolo_v8_mfd","mfr_model":"unimernet_small","enable":false},"table-config":{"model":"rapid_table","sub_model":"slanet_plus","enable":false}}' > /root/.config/magic_pdf/magic-pdf.json
50
-
51
- # Download model files directly
52
- RUN curl -L https://huggingface.co/marcosremar2/mineru-models/resolve/main/doclayout_yolo.pt -o /tmp/models/layout/doclayout/doclayout_yolo.pt
53
- RUN curl -L https://huggingface.co/marcosremar2/mineru-models/resolve/main/yolo_v8_ft.pt -o /tmp/models/MFD/YOLO/yolo_v8_ft.pt
54
-
55
- # Verify model files were downloaded correctly
56
- RUN ls -la /tmp/models/layout/doclayout/doclayout_yolo.pt
57
- RUN ls -la /tmp/models/MFD/YOLO/yolo_v8_ft.pt
58
-
59
- # Check if magic-pdf exists but don't require it to work yet
60
- RUN pip show minerupdf || echo "MinerU PDF will be installed at runtime"
61
-
62
  # Expose port
63
  EXPOSE 7860
64
 
 
3
  # Install system dependencies
4
  RUN apt-get update && \
5
  apt-get install -y --no-install-recommends \
 
6
  curl \
 
 
 
 
 
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
  # Set working directory
 
13
  ENV PYTHONUNBUFFERED=1 \
14
  PYTHONDONTWRITEBYTECODE=1 \
15
  UPLOAD_FOLDER=/tmp/pdf_uploads \
16
+ OUTPUT_FOLDER=/tmp/pdf_output
 
 
17
 
18
  # Create necessary directories and set permissions
19
  RUN mkdir -p /tmp/pdf_uploads /tmp/pdf_output \
20
+ && chmod -R 777 /tmp/pdf_uploads /tmp/pdf_output
 
 
 
 
 
 
 
21
 
22
  # Copy requirements and app files
23
  COPY requirements.txt /app/
 
26
  RUN chmod +x /app/entrypoint.sh
27
 
28
  # Install Python dependencies
 
29
  RUN pip install --no-cache-dir -r requirements.txt
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Expose port
32
  EXPOSE 7860
33
 
entrypoint.sh CHANGED
@@ -5,115 +5,10 @@ set -e
5
  echo "Starting entrypoint.sh..."
6
  echo "User: $(whoami)"
7
  echo "Current directory: $(pwd)"
8
- echo "Directory listing:"
9
- ls -la
10
 
11
- # Set environment variables to disable HF progress bars (prevents hanging)
12
- export HF_HUB_DISABLE_PROGRESS_BARS=1
13
- export HF_HUB_ENABLE_HF_TRANSFER=0
14
-
15
- # Verify directories existence and permissions
16
- echo "Checking directories..."
17
- for dir in /tmp/pdf_uploads /tmp/pdf_output /tmp/models/MFD/YOLO /tmp/models/MFR/unimernet /tmp/models/table/rapid /tmp/models/layout/doclayout
18
- do
19
- if [ ! -d "$dir" ]; then
20
- echo "Creating directory: $dir"
21
- mkdir -p "$dir"
22
- fi
23
- chmod -R 777 "$dir"
24
- echo "Directory $dir is ready"
25
- done
26
-
27
- # Copy config file to all possible locations to ensure it's found
28
- echo "Setting up magic-pdf config..."
29
- CONFIG_CONTENT='{
30
- "device-mode": "cpu",
31
- "layout-config": {
32
- "model": "doclayout_yolo",
33
- "enable": true
34
- },
35
- "formula-config": {
36
- "mfd_model": "yolo_v8_mfd",
37
- "mfr_model": "unimernet_small",
38
- "enable": false
39
- },
40
- "table-config": {
41
- "model": "rapid_table",
42
- "sub_model": "slanet_plus",
43
- "enable": false
44
- }
45
- }'
46
-
47
- # Create config in all possible locations
48
- mkdir -p ~/.config/magic_pdf
49
- echo "$CONFIG_CONTENT" > ~/.config/magic_pdf/magic-pdf.json
50
- echo "$CONFIG_CONTENT" > ~/magic-pdf.json
51
- echo "$CONFIG_CONTENT" > /app/magic-pdf.json
52
- echo "$CONFIG_CONTENT" > /home/user/magic-pdf.json
53
- echo "$CONFIG_CONTENT" > /root/.config/magic_pdf/magic-pdf.json
54
-
55
- # Download model function with validation
56
- function download_model_with_validation() {
57
- local model_path=$1
58
- local model_url=$2
59
- local max_attempts=3
60
- local attempt=1
61
-
62
- echo "Checking model file: $model_path"
63
-
64
- if [ ! -f "$model_path" ] || [ ! -s "$model_path" ]; then
65
- while [ $attempt -le $max_attempts ]; do
66
- echo "Downloading model attempt $attempt/$max_attempts: $model_url"
67
- curl -L --retry 5 --retry-delay 2 "$model_url" -o "$model_path"
68
-
69
- # Verify file exists and has content
70
- if [ -f "$model_path" ] && [ -s "$model_path" ]; then
71
- size=$(du -h "$model_path" | cut -f1)
72
- echo "✅ Model downloaded successfully ($size): $model_path"
73
- return 0
74
- else
75
- echo "❌ Download failed or file is empty. Retrying..."
76
- rm -f "$model_path" 2>/dev/null
77
- attempt=$((attempt+1))
78
- sleep 2
79
- fi
80
- done
81
-
82
- echo "❌ Failed to download model after $max_attempts attempts: $model_url"
83
- exit 1
84
- else
85
- size=$(du -h "$model_path" | cut -f1)
86
- echo "✅ Model already exists ($size): $model_path"
87
- fi
88
- }
89
-
90
- # Download and verify all required models
91
- echo "Verifying model files..."
92
- download_model_with_validation "/tmp/models/layout/doclayout/doclayout_yolo.pt" "https://huggingface.co/marcosremar2/mineru-models/resolve/main/doclayout_yolo.pt"
93
- download_model_with_validation "/tmp/models/MFD/YOLO/yolo_v8_ft.pt" "https://huggingface.co/marcosremar2/mineru-models/resolve/main/yolo_v8_ft.pt"
94
-
95
- # List all model files for verification
96
- echo "Model files verification:"
97
- find /tmp/models -type f -exec ls -la {} \;
98
-
99
- # Verify magic-pdf exists and is executable
100
- echo "Checking magic-pdf installation..."
101
- if command -v magic-pdf &> /dev/null; then
102
- echo "magic-pdf found. Testing version:"
103
- magic-pdf --version || echo "Warning: magic-pdf command exists but might not be functioning properly."
104
- else
105
- echo "magic-pdf command not found. Installing..."
106
- pip install --no-cache-dir minerupdf==1.3.6
107
- if command -v magic-pdf &> /dev/null; then
108
- echo "magic-pdf installed successfully. Testing version:"
109
- magic-pdf --version || echo "Warning: magic-pdf command was installed but might not be functioning properly."
110
- else
111
- echo "Warning: Failed to install magic-pdf."
112
- echo "PATH: $PATH"
113
- pip list | grep miner
114
- # Continue anyway
115
- fi
116
- fi
117
 
118
  # Start the Flask application
119
  echo "Starting Flask application..."
 
5
  echo "Starting entrypoint.sh..."
6
  echo "User: $(whoami)"
7
  echo "Current directory: $(pwd)"
 
 
8
 
9
+ # Create necessary directories
10
+ mkdir -p /tmp/pdf_uploads /tmp/pdf_output
11
+ chmod -R 777 /tmp/pdf_uploads /tmp/pdf_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Start the Flask application
14
  echo "Starting Flask application..."
requirements.txt CHANGED
@@ -1,12 +1,4 @@
1
  flask==2.3.3
2
  flask-cors==4.0.0
3
  werkzeug==2.3.7
4
- Pillow>=9.0.0
5
- numpy>=1.20.0
6
- requests>=2.25.0
7
- opencv-python-headless>=4.5.0
8
- torch>=1.8.0
9
- torchvision>=0.9.0
10
- transformers>=4.15.0
11
- huggingface_hub>=0.11.0
12
- minerupdf==1.3.6
 
1
  flask==2.3.3
2
  flask-cors==4.0.0
3
  werkzeug==2.3.7
4
+ requests>=2.25.0