#!/bin/bash # setup.sh - Setup script for Hugging Face Spaces set -e # Exit on any error echo "πŸš€ Setting up OCR dependencies for Hugging Face Spaces..." # Update package list echo "πŸ“¦ Updating package list..." apt-get update -qq # Install system dependencies for OCR echo "πŸ”§ Installing system dependencies..." apt-get install -y -qq tesseract-ocr tesseract-ocr-ben tesseract-ocr-eng poppler-utils # Verify Tesseract installation echo "βœ… Verifying Tesseract installation..." tesseract --version # Check available languages echo "🌐 Available Tesseract languages:" tesseract --list-langs # Create tessdata directory if it doesn't exist TESSDATA_DIR="/usr/share/tesseract-ocr/4.00/tessdata" if [ ! -d "$TESSDATA_DIR" ]; then echo "πŸ“ Creating tessdata directory..." mkdir -p "$TESSDATA_DIR" fi # Download additional language data if needed echo "πŸ“₯ Ensuring language data is available..." # Bengali language support if [ ! -f "$TESSDATA_DIR/ben.traineddata" ]; then echo "πŸ‡§πŸ‡© Installing Bengali language data..." wget -q -O "$TESSDATA_DIR/ben.traineddata" \ https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata || \ echo "⚠️ Warning: Could not download Bengali language data" fi # English language support if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then echo "πŸ‡ΊπŸ‡Έ Installing English language data..." wget -q -O "$TESSDATA_DIR/eng.traineddata" \ https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata || \ echo "⚠️ Warning: Could not download English language data" fi # Set permissions chmod -R 755 "$TESSDATA_DIR" 2>/dev/null || true echo "πŸŽ‰ Setup completed successfully!" echo "πŸ“Š Final verification:" tesseract --list-langs | grep -E "(ben|eng)" || echo "⚠️ Some languages may not be available"