TREA_2.0_codebase / run_pipeline.sh
malay-36's picture
Upload folder using huggingface_hub
fec9168 verified
#!/bin/bash
################################################################################
# Temporal Reasoning Audio Dataset Generation Pipeline
#
# This script orchestrates the entire dataset creation process for all tasks.
################################################################################
set -e # Exit on error
# Default configuration
CONFIG_FILE="config.yaml"
OUTPUT_DIR=""
TASKS=""
PYTHON_CMD="python"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored messages
print_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Function to print usage
usage() {
cat << EOF
Usage: $0 [OPTIONS]
Temporal Reasoning Audio Dataset Generation Pipeline
OPTIONS:
-c, --config FILE Configuration file (default: config.yaml)
-o, --output DIR Output directory (overrides config)
-t, --tasks TASKS Specific tasks to run: count,duration,order,volume
(default: all enabled tasks)
-p, --python CMD Python command to use (default: python)
-h, --help Display this help message
EXAMPLES:
# Run all tasks with default config
$0
# Run with custom config
$0 --config my_config.yaml
# Run specific tasks only
$0 --tasks count,duration
# Use custom output directory
$0 --output /path/to/output
# Combine options
$0 --config custom.yaml --tasks count,order --output ./my_dataset
EOF
}
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
-o|--output)
OUTPUT_DIR="$2"
shift 2
;;
-t|--tasks)
TASKS="$2"
shift 2
;;
-p|--python)
PYTHON_CMD="$2"
shift 2
;;
-h|--help)
usage
exit 0
;;
*)
print_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# Get script directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Check if config file exists
if [ ! -f "$SCRIPT_DIR/$CONFIG_FILE" ]; then
print_error "Config file not found: $CONFIG_FILE"
exit 1
fi
# Print header
echo ""
echo "================================================================================"
echo " TEMPORAL REASONING AUDIO DATASET GENERATION PIPELINE"
echo "================================================================================"
echo ""
print_info "Configuration: $CONFIG_FILE"
print_info "Python command: $PYTHON_CMD"
[ -n "$OUTPUT_DIR" ] && print_info "Output directory: $OUTPUT_DIR"
[ -n "$TASKS" ] && print_info "Tasks to run: $TASKS"
echo ""
# Check Python dependencies
print_info "Checking Python dependencies..."
$PYTHON_CMD -c "import yaml, pandas, pydub" 2>/dev/null
if [ $? -ne 0 ]; then
print_error "Missing required Python packages. Please install:"
echo " pip install pyyaml pandas pydub"
exit 1
fi
print_success "Dependencies OK"
echo ""
# Build Python command arguments
PYTHON_ARGS="$SCRIPT_DIR/main.py --config $SCRIPT_DIR/$CONFIG_FILE"
[ -n "$OUTPUT_DIR" ] && PYTHON_ARGS="$PYTHON_ARGS --output $OUTPUT_DIR"
if [ -n "$TASKS" ]; then
# Convert comma-separated to space-separated for Python argparse
TASKS_SPACE=$(echo $TASKS | tr ',' ' ')
PYTHON_ARGS="$PYTHON_ARGS --tasks $TASKS_SPACE"
fi
# Run the pipeline
print_info "Starting pipeline..."
echo ""
$PYTHON_CMD $PYTHON_ARGS
if [ $? -eq 0 ]; then
echo ""
echo "================================================================================"
print_success "PIPELINE COMPLETED SUCCESSFULLY!"
echo "================================================================================"
echo ""
else
echo ""
echo "================================================================================"
print_error "PIPELINE FAILED!"
echo "================================================================================"
echo ""
exit 1
fi