Spaces:
Sleeping
Sleeping
File size: 1,835 Bytes
befccc3 3b879b4 befccc3 3b879b4 befccc3 3b879b4 befccc3 3b879b4 befccc3 3b879b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | #!/bin/bash
# setup.sh - Setup script for Hugging Face Spaces
set -e # Exit on any error
echo "π Setting up OCR dependencies for Hugging Face Spaces..."
# Update package list
echo "π¦ Updating package list..."
apt-get update -qq
# Install system dependencies for OCR
echo "π§ Installing system dependencies..."
apt-get install -y -qq tesseract-ocr tesseract-ocr-ben tesseract-ocr-eng poppler-utils
# Verify Tesseract installation
echo "β
Verifying Tesseract installation..."
tesseract --version
# Check available languages
echo "π Available Tesseract languages:"
tesseract --list-langs
# Create tessdata directory if it doesn't exist
TESSDATA_DIR="/usr/share/tesseract-ocr/4.00/tessdata"
if [ ! -d "$TESSDATA_DIR" ]; then
echo "π Creating tessdata directory..."
mkdir -p "$TESSDATA_DIR"
fi
# Download additional language data if needed
echo "π₯ Ensuring language data is available..."
# Bengali language support
if [ ! -f "$TESSDATA_DIR/ben.traineddata" ]; then
echo "π§π© Installing Bengali language data..."
wget -q -O "$TESSDATA_DIR/ben.traineddata" \
https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata || \
echo "β οΈ Warning: Could not download Bengali language data"
fi
# English language support
if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then
echo "πΊπΈ Installing English language data..."
wget -q -O "$TESSDATA_DIR/eng.traineddata" \
https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata || \
echo "β οΈ Warning: Could not download English language data"
fi
# Set permissions
chmod -R 755 "$TESSDATA_DIR" 2>/dev/null || true
echo "π Setup completed successfully!"
echo "π Final verification:"
tesseract --list-langs | grep -E "(ben|eng)" || echo "β οΈ Some languages may not be available" |