Spaces:
Sleeping
Sleeping
| # setup.sh - Setup script for Hugging Face Spaces | |
| set -e # Exit on any error | |
| echo "π Setting up OCR dependencies for Hugging Face Spaces..." | |
| # Update package list | |
| echo "π¦ Updating package list..." | |
| apt-get update -qq | |
| # Install system dependencies for OCR | |
| echo "π§ Installing system dependencies..." | |
| apt-get install -y -qq tesseract-ocr tesseract-ocr-ben tesseract-ocr-eng poppler-utils | |
| # Verify Tesseract installation | |
| echo "β Verifying Tesseract installation..." | |
| tesseract --version | |
| # Check available languages | |
| echo "π Available Tesseract languages:" | |
| tesseract --list-langs | |
| # Create tessdata directory if it doesn't exist | |
| TESSDATA_DIR="/usr/share/tesseract-ocr/4.00/tessdata" | |
| if [ ! -d "$TESSDATA_DIR" ]; then | |
| echo "π Creating tessdata directory..." | |
| mkdir -p "$TESSDATA_DIR" | |
| fi | |
| # Download additional language data if needed | |
| echo "π₯ Ensuring language data is available..." | |
| # Bengali language support | |
| if [ ! -f "$TESSDATA_DIR/ben.traineddata" ]; then | |
| echo "π§π© Installing Bengali language data..." | |
| wget -q -O "$TESSDATA_DIR/ben.traineddata" \ | |
| https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata || \ | |
| echo "β οΈ Warning: Could not download Bengali language data" | |
| fi | |
| # English language support | |
| if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then | |
| echo "πΊπΈ Installing English language data..." | |
| wget -q -O "$TESSDATA_DIR/eng.traineddata" \ | |
| https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata || \ | |
| echo "β οΈ Warning: Could not download English language data" | |
| fi | |
| # Set permissions | |
| chmod -R 755 "$TESSDATA_DIR" 2>/dev/null || true | |
| echo "π Setup completed successfully!" | |
| echo "π Final verification:" | |
| tesseract --list-langs | grep -E "(ben|eng)" || echo "β οΈ Some languages may not be available" |