| #!/bin/bash |
| if [ -z "${ROBOMETER_PROCESSED_DATASETS_PATH:-$RBM_PROCESSED_DATASETS_PATH}" ]; then |
| echo "ROBOMETER_PROCESSED_DATASETS_PATH (or RBM_PROCESSED_DATASETS_PATH) is not set" |
| exit 1 |
| fi |
|
|
| cd "${ROBOMETER_PROCESSED_DATASETS_PATH:-$RBM_PROCESSED_DATASETS_PATH}" || exit 1 |
|
|
| |
| declare -A processed_archives |
|
|
| |
| echo "Processing split archives..." |
| for file in *.tar.partaa; do |
| if [ -f "$file" ]; then |
| |
| base_name="${file%.partaa}" |
| |
| echo "Extracting split archive: $base_name" |
| |
| cat "${base_name}.part"* | tar -xvf - |
|
|
| |
| if [ $? -eq 0 ]; then |
| rm "${base_name}.part"* |
| else |
| echo "Failed to extract $base_name, will need to retry and remove the failed parts" |
| continue |
| fi |
| |
| processed_archives["$base_name"]=1 |
|
|
| fi |
| done |
|
|
| |
| echo "Processing split archives..." |
| for file in *.tar.part-aa; do |
| if [ -f "$file" ]; then |
| |
| base_name="${file%.part-aa}" |
| echo "Extracting split archive: $base_name" |
| |
| cat "${base_name}.part"* | tar -xvf - |
|
|
| |
| if [ $? -eq 0 ]; then |
| rm "${base_name}.part"* |
| else |
| echo "Failed to extract $base_name, will need to retry and remove the failed parts" |
| continue |
| fi |
| |
| processed_archives["$base_name"]=1 |
| fi |
| done |
|
|
| |
| echo "Processing regular tar files..." |
| for file in *.tar; do |
| if [ -f "$file" ]; then |
| |
| if [ -z "${processed_archives[$file]}" ]; then |
| echo "Extracting: $file" |
| tar -xvf "$file" |
|
|
| |
| if [ $? -eq 0 ]; then |
| processed_archives["$file"]=1 |
| rm "$file" |
| else |
| echo "Failed to extract $file, will need to retry and remove the failed tar file" |
| continue |
| fi |
| fi |
| fi |
| done |
|
|
| |
| |
| if [ -d "processed_datasets" ]; then |
| echo "Moving datasets out of processed_datasets subdirectory..." |
| for dir in processed_datasets/*; do |
| if [ -d "$dir" ]; then |
| mv "$dir" . |
| fi |
| done |
| rm -rf processed_datasets |
| echo "Done moving datasets out of processed_datasets subdirectory!" |
| fi |
|
|
| |
| for file in *.tar; do |
| if [ -z "${processed_archives[$file]}" ]; then |
| echo "Failed to extract $file" |
| fi |
| done |
| cd .. |
| echo "Done extracting all archives!" |