Spaces:
Sleeping
Sleeping
Commit ·
ca54b04
1
Parent(s): 1dc37e8
Enhance Dockerfile and Streamlit app for Hugging Face directory management
Browse files- Updated Dockerfile to create additional directories for Hugging Face caching with appropriate permissions.
- Enhanced Streamlit app to configure environment variables for Hugging Face and implement robust directory creation with error handling and logging.
- Improved fallback mechanisms for directory creation to ensure stability in various environments.
- Dockerfile +6 -2
- TROUBLESHOOTING.md +31 -1
- src/streamlit_app.py +23 -0
Dockerfile
CHANGED
|
@@ -10,13 +10,17 @@ RUN apt-get update && apt-get install -y \
|
|
| 10 |
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
# Create necessary directories with proper permissions
|
| 13 |
-
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data && \
|
| 14 |
chmod 755 /app/.streamlit && \
|
| 15 |
chmod 777 /tmp/docling_temp && \
|
| 16 |
chmod 777 /tmp/easyocr_models && \
|
| 17 |
chmod 777 /tmp/cache && \
|
| 18 |
chmod 777 /tmp/config && \
|
| 19 |
-
chmod 777 /tmp/data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
COPY requirements.txt ./
|
| 22 |
COPY src/ ./src/
|
|
|
|
| 10 |
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
# Create necessary directories with proper permissions
|
| 13 |
+
RUN mkdir -p /app/.streamlit /tmp/docling_temp /tmp/easyocr_models /tmp/cache /tmp/config /tmp/data /tmp/huggingface /tmp/huggingface_cache /tmp/transformers_cache /tmp/datasets_cache && \
|
| 14 |
chmod 755 /app/.streamlit && \
|
| 15 |
chmod 777 /tmp/docling_temp && \
|
| 16 |
chmod 777 /tmp/easyocr_models && \
|
| 17 |
chmod 777 /tmp/cache && \
|
| 18 |
chmod 777 /tmp/config && \
|
| 19 |
+
chmod 777 /tmp/data && \
|
| 20 |
+
chmod 777 /tmp/huggingface && \
|
| 21 |
+
chmod 777 /tmp/huggingface_cache && \
|
| 22 |
+
chmod 777 /tmp/transformers_cache && \
|
| 23 |
+
chmod 777 /tmp/datasets_cache
|
| 24 |
|
| 25 |
COPY requirements.txt ./
|
| 26 |
COPY src/ ./src/
|
TROUBLESHOOTING.md
CHANGED
|
@@ -95,4 +95,34 @@ The app automatically sets these environment variables:
|
|
| 95 |
- `HOME=/tmp/docling_temp` (or fallback)
|
| 96 |
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
| 97 |
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
| 98 |
-
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
- `HOME=/tmp/docling_temp` (or fallback)
|
| 96 |
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
| 97 |
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
| 98 |
+
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
| 99 |
+
|
| 100 |
+
### Hugging Face Hub Permission Errors
|
| 101 |
+
|
| 102 |
+
If you encounter Hugging Face Hub permission errors like:
|
| 103 |
+
```
|
| 104 |
+
PermissionError: [Errno 13] Permission denied: '/.cache'
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
The app now handles these by:
|
| 108 |
+
1. Setting `HF_HOME`, `HF_CACHE_HOME`, `TRANSFORMERS_CACHE`, and `HF_DATASETS_CACHE` to writable directories
|
| 109 |
+
2. Creating all necessary Hugging Face cache directories with proper permissions
|
| 110 |
+
3. Using fallback directories if the primary ones fail
|
| 111 |
+
|
| 112 |
+
### Environment Variables
|
| 113 |
+
|
| 114 |
+
The app automatically sets these environment variables:
|
| 115 |
+
- `STREAMLIT_SERVER_FILE_WATCHER_TYPE=none`
|
| 116 |
+
- `STREAMLIT_SERVER_HEADLESS=true`
|
| 117 |
+
- `STREAMLIT_BROWSER_GATHER_USAGE_STATS=false`
|
| 118 |
+
- `STREAMLIT_SERVER_ENABLE_CORS=false`
|
| 119 |
+
- `STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false`
|
| 120 |
+
- `EASYOCR_MODULE_PATH=/tmp/easyocr_models` (or fallback)
|
| 121 |
+
- `HOME=/tmp/docling_temp` (or fallback)
|
| 122 |
+
- `XDG_CACHE_HOME=/tmp/cache` (or fallback)
|
| 123 |
+
- `XDG_CONFIG_HOME=/tmp/config` (or fallback)
|
| 124 |
+
- `XDG_DATA_HOME=/tmp/data` (or fallback)
|
| 125 |
+
- `HF_HOME=/tmp/huggingface` (or fallback)
|
| 126 |
+
- `HF_CACHE_HOME=/tmp/huggingface_cache` (or fallback)
|
| 127 |
+
- `TRANSFORMERS_CACHE=/tmp/transformers_cache` (or fallback)
|
| 128 |
+
- `HF_DATASETS_CACHE=/tmp/datasets_cache` (or fallback)
|
src/streamlit_app.py
CHANGED
|
@@ -97,6 +97,29 @@ for env_var in ['XDG_CACHE_HOME', 'XDG_CONFIG_HOME', 'XDG_DATA_HOME']:
|
|
| 97 |
except Exception as e:
|
| 98 |
logging.warning(f"Could not create directory for {env_var}: {e}")
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
# Log startup information
|
| 101 |
logging.info("=" * 50)
|
| 102 |
logging.info("Docling Streamlit App Starting")
|
|
|
|
| 97 |
except Exception as e:
|
| 98 |
logging.warning(f"Could not create directory for {env_var}: {e}")
|
| 99 |
|
| 100 |
+
# Configure Hugging Face Hub to use writable directories
|
| 101 |
+
os.environ['HF_HOME'] = os.path.join(TEMP_DIR, 'huggingface')
|
| 102 |
+
os.environ['HF_CACHE_HOME'] = os.path.join(TEMP_DIR, 'huggingface_cache')
|
| 103 |
+
os.environ['TRANSFORMERS_CACHE'] = os.path.join(TEMP_DIR, 'transformers_cache')
|
| 104 |
+
os.environ['HF_DATASETS_CACHE'] = os.path.join(TEMP_DIR, 'datasets_cache')
|
| 105 |
+
|
| 106 |
+
# Create Hugging Face directories
|
| 107 |
+
hf_dirs = ['HF_HOME', 'HF_CACHE_HOME', 'TRANSFORMERS_CACHE', 'HF_DATASETS_CACHE']
|
| 108 |
+
for env_var in hf_dirs:
|
| 109 |
+
try:
|
| 110 |
+
os.makedirs(os.environ[env_var], exist_ok=True)
|
| 111 |
+
logging.info(f"Created Hugging Face directory for {env_var}: {os.environ[env_var]}")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
logging.warning(f"Could not create Hugging Face directory for {env_var}: {e}")
|
| 114 |
+
# Fallback to /tmp if the temp directory fails
|
| 115 |
+
fallback_path = os.path.join('/tmp', env_var.lower())
|
| 116 |
+
os.environ[env_var] = fallback_path
|
| 117 |
+
try:
|
| 118 |
+
os.makedirs(fallback_path, exist_ok=True)
|
| 119 |
+
logging.info(f"Using fallback Hugging Face directory for {env_var}: {fallback_path}")
|
| 120 |
+
except Exception as e2:
|
| 121 |
+
logging.error(f"Failed to create fallback Hugging Face directory for {env_var}: {e2}")
|
| 122 |
+
|
| 123 |
# Log startup information
|
| 124 |
logging.info("=" * 50)
|
| 125 |
logging.info("Docling Streamlit App Starting")
|