Spaces:
Runtime error
Runtime error
| # Master Course Scraper - Bash Wrapper | |
| # This is a convenience wrapper around the Python master scraper | |
| set -e # Exit on any error | |
| # Colors for output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| BLUE='\033[0;34m' | |
| NC='\033[0m' # No Color | |
| # Get the directory where this script is located | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" | |
| # Default values | |
| CONFIG_FILE="$PROJECT_ROOT/config/scraping_config.yaml" | |
| DRY_RUN=false | |
| TOPIC="" | |
| PLATFORM="" | |
| # Function to show usage | |
| show_usage() { | |
| echo -e "${BLUE}Master Course Scraper${NC}" | |
| echo "" | |
| echo "Usage: $0 [OPTIONS]" | |
| echo "" | |
| echo "Options:" | |
| echo " -c, --config FILE Use custom configuration file (default: config/scraping_config.yaml)" | |
| echo " -d, --dry-run Show what would be scraped without actually running" | |
| echo " -t, --topic TOPIC Only scrape courses for this topic (partial match)" | |
| echo " -p, --platform PLATFORM Only scrape from this platform (coursera, udemy, edx)" | |
| echo " -h, --help Show this help message" | |
| echo "" | |
| echo "Examples:" | |
| echo " $0 # Scrape everything in config" | |
| echo " $0 --dry-run # Show what would be scraped" | |
| echo " $0 --topic \"machine learning\" # Only scrape ML courses" | |
| echo " $0 --platform coursera # Only scrape from Coursera" | |
| echo " $0 --topic python --platform udemy # Only Python courses from Udemy" | |
| echo "" | |
| } | |
| # Parse command line arguments | |
| while [[ $# -gt 0 ]]; do | |
| case $1 in | |
| -c|--config) | |
| CONFIG_FILE="$2" | |
| shift 2 | |
| ;; | |
| -d|--dry-run) | |
| DRY_RUN=true | |
| shift | |
| ;; | |
| -t|--topic) | |
| TOPIC="$2" | |
| shift 2 | |
| ;; | |
| -p|--platform) | |
| PLATFORM="$2" | |
| shift 2 | |
| ;; | |
| -h|--help) | |
| show_usage | |
| exit 0 | |
| ;; | |
| *) | |
| echo -e "${RED}Error: Unknown option $1${NC}" | |
| show_usage | |
| exit 1 | |
| ;; | |
| esac | |
| done | |
| # Check if config file exists | |
| if [[ ! -f "$CONFIG_FILE" ]]; then | |
| echo -e "${RED}Error: Configuration file not found: $CONFIG_FILE${NC}" | |
| exit 1 | |
| fi | |
| # Find Python executable using pipenv | |
| if ! command -v pipenv &> /dev/null; then | |
| echo -e "${RED}Error: pipenv not found. Please install pipenv first.${NC}" | |
| echo "Install with: pip install pipenv" | |
| exit 1 | |
| fi | |
| # Build the command using pipenv run | |
| CMD=("pipenv" "run" "python" "$SCRIPT_DIR/master_scraper.py" "--config" "$CONFIG_FILE") | |
| if [[ "$DRY_RUN" == true ]]; then | |
| CMD+=("--dry-run") | |
| fi | |
| if [[ -n "$TOPIC" ]]; then | |
| CMD+=("--topic" "$TOPIC") | |
| fi | |
| if [[ -n "$PLATFORM" ]]; then | |
| CMD+=("--platform" "$PLATFORM") | |
| fi | |
| # Show what we're about to run | |
| echo -e "${BLUE}Running master scraper...${NC}" | |
| echo -e "${YELLOW}Config:${NC} $CONFIG_FILE" | |
| if [[ "$DRY_RUN" == true ]]; then | |
| echo -e "${YELLOW}Mode:${NC} DRY RUN" | |
| fi | |
| if [[ -n "$TOPIC" ]]; then | |
| echo -e "${YELLOW}Topic filter:${NC} $TOPIC" | |
| fi | |
| if [[ -n "$PLATFORM" ]]; then | |
| echo -e "${YELLOW}Platform filter:${NC} $PLATFORM" | |
| fi | |
| echo "" | |
| # Change to project directory | |
| cd "$PROJECT_ROOT" | |
| # Run the command | |
| if "${CMD[@]}"; then | |
| echo -e "${GREEN}✅ Master scraper completed successfully${NC}" | |
| else | |
| echo -e "${RED}❌ Master scraper failed${NC}" | |
| exit 1 | |
| fi | |