Ash2749's picture
Update setup.sh
3b879b4 verified
#!/bin/bash
# setup.sh - Setup script for Hugging Face Spaces
set -e # Exit on any error
echo "πŸš€ Setting up OCR dependencies for Hugging Face Spaces..."
# Update package list
echo "πŸ“¦ Updating package list..."
apt-get update -qq
# Install system dependencies for OCR
echo "πŸ”§ Installing system dependencies..."
apt-get install -y -qq tesseract-ocr tesseract-ocr-ben tesseract-ocr-eng poppler-utils
# Verify Tesseract installation
echo "βœ… Verifying Tesseract installation..."
tesseract --version
# Check available languages
echo "🌐 Available Tesseract languages:"
tesseract --list-langs
# Create tessdata directory if it doesn't exist
TESSDATA_DIR="/usr/share/tesseract-ocr/4.00/tessdata"
if [ ! -d "$TESSDATA_DIR" ]; then
echo "πŸ“ Creating tessdata directory..."
mkdir -p "$TESSDATA_DIR"
fi
# Download additional language data if needed
echo "πŸ“₯ Ensuring language data is available..."
# Bengali language support
if [ ! -f "$TESSDATA_DIR/ben.traineddata" ]; then
echo "πŸ‡§πŸ‡© Installing Bengali language data..."
wget -q -O "$TESSDATA_DIR/ben.traineddata" \
https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata || \
echo "⚠️ Warning: Could not download Bengali language data"
fi
# English language support
if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then
echo "πŸ‡ΊπŸ‡Έ Installing English language data..."
wget -q -O "$TESSDATA_DIR/eng.traineddata" \
https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata || \
echo "⚠️ Warning: Could not download English language data"
fi
# Set permissions
chmod -R 755 "$TESSDATA_DIR" 2>/dev/null || true
echo "πŸŽ‰ Setup completed successfully!"
echo "πŸ“Š Final verification:"
tesseract --list-langs | grep -E "(ben|eng)" || echo "⚠️ Some languages may not be available"