|
|
import os |
|
|
import json |
|
|
import time |
|
|
import re |
|
|
import uuid |
|
|
import torch |
|
|
import soundfile as sf |
|
|
import sys |
|
|
from datasets import load_from_disk, Dataset, Features, Audio, Value |
|
|
from dotenv import load_dotenv |
|
|
import datetime |
|
|
from tqdm import tqdm |
|
|
import traceback |
|
|
|
|
|
|
|
|
|
|
|
kimia_project_parent_dir = "/home/chenyifu/audio-r1/r1-a/response_generation/Kimi-Audio" |
|
|
|
|
|
|
|
|
if os.path.isdir(kimia_project_parent_dir): |
|
|
if kimia_project_parent_dir not in sys.path: |
|
|
sys.path.insert(0, kimia_project_parent_dir) |
|
|
print(f"Added '{kimia_project_parent_dir}' to Python path.") |
|
|
|
|
|
try: |
|
|
from kimia_infer.api.kimia import KimiAudio |
|
|
except ImportError as import_err: |
|
|
print(f"Error: Could not import KimiAudio from '{kimia_project_parent_dir}'.") |
|
|
print(f"ImportError: {import_err}") |
|
|
print("Please ensure the 'kimia_infer' directory exists within the specified path and check dependencies.") |
|
|
exit(1) |
|
|
else: |
|
|
print(f"Error: Kimi project parent directory not found: '{kimia_project_parent_dir}'") |
|
|
print("Please update the 'kimia_project_parent_dir' variable in the script.") |
|
|
exit(1) |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
KIMI_MODEL_NAME = "kimi_audio" |
|
|
KIMI_MODEL_PATH = "/home/chenyifu/audio-r1/r1-a/response_generation/Kimi-Audio/checkpoint/Kimi-Audio-7B-Instruct" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
INPUT_DATASET_DIR = "/home/chenyifu/audio-r1/r1-a/dataset/preference_sampling_tasks" |
|
|
OUTPUT_DATASET_DIR = "/home/chenyifu/audio-r1/r1-a/dataset/preference_tasks_with_kimi" |
|
|
|
|
|
|
|
|
OUTPUT_AUDIO_ROOT_DIR = "/home/chenyifu/audio-r1/r1-a/generated_audio/kimi" |
|
|
OUTPUT_AUDIO_FORMAT = "wav" |
|
|
OUTPUT_AUDIO_SAMPLERATE = 24000 |
|
|
|
|
|
|
|
|
KIMI_SAMPLING_PARAMS = { |
|
|
"audio_temperature": 0.8, |
|
|
"audio_top_k": 10, |
|
|
"text_temperature": 0.0, |
|
|
"text_top_k": 5, |
|
|
"audio_repetition_penalty": 1.0, |
|
|
"audio_repetition_window_size": 64, |
|
|
"text_repetition_penalty": 1.0, |
|
|
"text_repetition_window_size": 16, |
|
|
|
|
|
} |
|
|
KIMI_OUTPUT_TYPE = "both" |
|
|
|
|
|
|
|
|
SAVE_EVERY_N_SAMPLES = 50 |
|
|
|
|
|
|
|
|
|
|
|
def format_time(seconds): |
|
|
"""Formats seconds into a human-readable string H:MM:SS""" |
|
|
if seconds < 0: |
|
|
return "N/A" |
|
|
return str(datetime.timedelta(seconds=int(seconds))) |
|
|
|
|
|
|
|
|
|
|
|
def parse_ultra_history(history_str): |
|
|
"""Parses the specific history string format from ultra metadata for Kimi.""" |
|
|
messages = [] |
|
|
|
|
|
pattern = re.compile(r"\[\s*(USER|ASSISTANT)\s*\]\s*([\s\S]*?)(?=\s*\[\s*(?:USER|ASSISTANT)\s*\]|$)") |
|
|
matches = pattern.findall(history_str) |
|
|
if not matches and history_str and history_str.strip(): |
|
|
|
|
|
if history_str.lower().startswith("user:") or history_str.lower().startswith("[user]"): |
|
|
role = "user" |
|
|
content = re.sub(r"^(user:|\[user\])\s*", "", history_str, flags=re.IGNORECASE).strip() |
|
|
if content: messages.append({"role": role, "message_type": "text", "content": content}) |
|
|
elif history_str.lower().startswith("assistant:") or history_str.lower().startswith("[assistant]"): |
|
|
role = "assistant" |
|
|
content = re.sub(r"^(assistant:|\[assistant\])\s*", "", history_str, flags=re.IGNORECASE).strip() |
|
|
if content: messages.append({"role": role, "message_type": "text", "content": content}) |
|
|
else: |
|
|
print(f"Warning: Could not parse history string format: {history_str[:100]}...") |
|
|
return messages |
|
|
|
|
|
for role_tag, content in matches: |
|
|
role = role_tag.strip().lower() |
|
|
cleaned_content = content.strip() |
|
|
if cleaned_content: |
|
|
|
|
|
messages.append({"role": role, "message_type": "text", "content": cleaned_content}) |
|
|
return messages |
|
|
|
|
|
|
|
|
|
|
|
def call_kimi_model(model, messages_input, sampling_params, output_audio_filepath, output_sample_rate): |
|
|
"""Calls the Kimi-Audio model, saves audio, returns text and audio path.""" |
|
|
try: |
|
|
|
|
|
os.makedirs(os.path.dirname(output_audio_filepath), exist_ok=True) |
|
|
|
|
|
|
|
|
wav_output, text_output = model.generate( |
|
|
messages_input, |
|
|
**sampling_params, |
|
|
output_type=KIMI_OUTPUT_TYPE |
|
|
) |
|
|
|
|
|
|
|
|
saved_audio_path = None |
|
|
if wav_output is not None and isinstance(wav_output, torch.Tensor) and wav_output.numel() > 0: |
|
|
try: |
|
|
|
|
|
|
|
|
audio_data = wav_output.detach().cpu().view(-1).numpy() |
|
|
|
|
|
|
|
|
if audio_data.dtype != 'float32': |
|
|
|
|
|
|
|
|
if np.issubdtype(audio_data.dtype, np.integer): |
|
|
|
|
|
|
|
|
|
|
|
audio_data = audio_data.astype(np.float32) |
|
|
else: |
|
|
audio_data = audio_data.astype(np.float32) |
|
|
|
|
|
|
|
|
sf.write(output_audio_filepath, audio_data, output_sample_rate) |
|
|
|
|
|
|
|
|
if os.path.exists(output_audio_filepath) and os.path.getsize(output_audio_filepath) > 100: |
|
|
saved_audio_path = output_audio_filepath |
|
|
else: |
|
|
print(f" Error: Kimi generate finished but output audio file seems empty or too small at {output_audio_filepath}") |
|
|
if os.path.exists(output_audio_filepath): |
|
|
try: os.remove(output_audio_filepath) |
|
|
except OSError as rm_err: print(f" Warning: Could not remove empty/small file {output_audio_filepath}: {rm_err}") |
|
|
except ImportError: |
|
|
print("Error: NumPy library not found. Please install it (`pip install numpy`)") |
|
|
return "[ERROR: NumPy Missing]", None |
|
|
except Exception as sf_err: |
|
|
print(f" Error saving Kimi audio output to {output_audio_filepath}: {sf_err}") |
|
|
traceback.print_exc() |
|
|
if os.path.exists(output_audio_filepath): |
|
|
try: os.remove(output_audio_filepath) |
|
|
except OSError as rm_err: print(f" Warning: Could not remove potentially corrupt file {output_audio_filepath}: {rm_err}") |
|
|
elif wav_output is None: |
|
|
print(" Warning: Kimi model did not return an audio tensor (wav_output is None).") |
|
|
elif isinstance(wav_output, torch.Tensor) and wav_output.numel() == 0: |
|
|
print(" Warning: Kimi model returned an empty audio tensor.") |
|
|
else: |
|
|
print(f" Warning: Kimi model returned unexpected audio output type: {type(wav_output)}. Expected torch.Tensor.") |
|
|
|
|
|
|
|
|
|
|
|
response_text_cleaned = "" |
|
|
if isinstance(text_output, str): |
|
|
response_text_cleaned = text_output.strip() |
|
|
elif text_output is not None: |
|
|
response_text_cleaned = str(text_output).strip() |
|
|
else: |
|
|
|
|
|
if saved_audio_path: |
|
|
response_text_cleaned = "[Audio Generated, No Text Output]" |
|
|
else: |
|
|
response_text_cleaned = "[ERROR: No Text Output]" |
|
|
|
|
|
|
|
|
|
|
|
return response_text_cleaned, saved_audio_path |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\n --- Error during Kimi model call ---") |
|
|
|
|
|
first_message = messages_input[0] if messages_input else "N/A" |
|
|
last_message_content = messages_input[-1]['content'] if messages_input else "N/A" |
|
|
if isinstance(last_message_content, str) and len(last_message_content) > 100 : |
|
|
last_message_preview = last_message_content[:100] + "..." |
|
|
else: |
|
|
last_message_preview = last_message_content |
|
|
|
|
|
print(f" Input Messages Info: Count={len(messages_input)}, First={first_message}, Last Content Preview='{last_message_preview}'") |
|
|
print(f" Exception Type: {type(e).__name__}") |
|
|
print(f" Error Details: {e}") |
|
|
print(" Traceback:") |
|
|
traceback.print_exc() |
|
|
print(" --- End Error Details ---") |
|
|
|
|
|
|
|
|
if 'output_audio_filepath' in locals() and os.path.exists(output_audio_filepath): |
|
|
try: |
|
|
os.remove(output_audio_filepath) |
|
|
except OSError as rm_err: |
|
|
print(f" Warning: Could not remove file {output_audio_filepath} after error: {rm_err}") |
|
|
|
|
|
return "[ERROR: Kimi Model Call Failed]", None |
|
|
|
|
|
|
|
|
def save_checkpoint(data_list, features, output_dir, fallback_dir=None): |
|
|
"""Saves the current state of the data list as a Hugging Face Dataset.""" |
|
|
if not data_list: |
|
|
print("\nSkipping checkpoint save: data list is empty.") |
|
|
return |
|
|
|
|
|
print(f"\nSaving checkpoint with {len(data_list)} rows to {output_dir}...") |
|
|
try: |
|
|
|
|
|
data_to_save = [dict(item) for item in data_list] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
temp_dataset = Dataset.from_list(data_to_save) |
|
|
|
|
|
|
|
|
updated_dataset = temp_dataset.cast(features) |
|
|
|
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
updated_dataset.save_to_disk(output_dir) |
|
|
print("Checkpoint saved successfully.") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error saving checkpoint dataset using save_to_disk to {output_dir}: {e}") |
|
|
traceback.print_exc() |
|
|
if fallback_dir: |
|
|
|
|
|
fallback_path = os.path.join(fallback_dir, f"updated_{KIMI_MODEL_NAME}_data_checkpoint_{int(time.time())}.jsonl") |
|
|
print(f"Attempting to save data as JSON Lines fallback to: {fallback_path}") |
|
|
try: |
|
|
os.makedirs(fallback_dir, exist_ok=True) |
|
|
with open(fallback_path, 'w', encoding='utf-8') as f: |
|
|
|
|
|
for item in data_to_save: |
|
|
|
|
|
serializable_item = {} |
|
|
for k, v in item.items(): |
|
|
if isinstance(v, (datetime.datetime, datetime.date)): |
|
|
serializable_item[k] = v.isoformat() |
|
|
elif isinstance(v, bytes): |
|
|
serializable_item[k] = v.decode('utf-8', errors='ignore') |
|
|
elif isinstance(v, torch.Tensor): |
|
|
print(f" Warning: Found unexpected Tensor for key '{k}' in fallback save. Converting to list.") |
|
|
serializable_item[k] = v.tolist() |
|
|
elif not isinstance(v, (str, int, float, bool, list, dict, type(None))): |
|
|
print(f" Warning: Converting non-standard type {type(v)} for key '{k}' to string for JSON fallback.") |
|
|
serializable_item[k] = str(v) |
|
|
else: |
|
|
serializable_item[k] = v |
|
|
try: |
|
|
f.write(json.dumps(serializable_item, ensure_ascii=False) + '\n') |
|
|
except TypeError as json_type_err: |
|
|
print(f" Skipping row due to JSON serialization error: {json_type_err} in item part: {k}={v}") |
|
|
print("Fallback JSON Lines checkpoint saved successfully.") |
|
|
except Exception as json_e: |
|
|
print(f"Error saving fallback JSON Lines checkpoint: {json_e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("="*30) |
|
|
print("STEP 1: Loading Dataset") |
|
|
print("="*30) |
|
|
dataset = None |
|
|
original_features = None |
|
|
|
|
|
|
|
|
if os.path.exists(OUTPUT_DATASET_DIR): |
|
|
print(f"Found existing Kimi processed dataset directory at: {OUTPUT_DATASET_DIR}") |
|
|
print("Attempting to load it to resume processing...") |
|
|
try: |
|
|
dataset = load_from_disk(OUTPUT_DATASET_DIR) |
|
|
original_features = dataset.features |
|
|
print(f"Resumed Kimi dataset loaded successfully with {len(dataset)} rows.") |
|
|
print(f"Features from resumed dataset: {original_features}") |
|
|
except Exception as e: |
|
|
print(f"Warning: Error loading existing Kimi dataset from {OUTPUT_DATASET_DIR}: {e}") |
|
|
traceback.print_exc() |
|
|
print("Will attempt to load the original input dataset instead.") |
|
|
dataset = None |
|
|
else: |
|
|
print(f"No existing Kimi processed dataset found at {OUTPUT_DATASET_DIR}.") |
|
|
print("Will attempt to load the original input dataset.") |
|
|
|
|
|
|
|
|
|
|
|
if dataset is None: |
|
|
print(f"\nLoading original input dataset from: {INPUT_DATASET_DIR}") |
|
|
if not os.path.exists(INPUT_DATASET_DIR): |
|
|
print(f"FATAL: Original input dataset directory not found at {INPUT_DATASET_DIR}") |
|
|
exit(1) |
|
|
try: |
|
|
dataset = load_from_disk(INPUT_DATASET_DIR) |
|
|
original_features = dataset.features |
|
|
print(f"Original input dataset loaded successfully with {len(dataset)} rows.") |
|
|
print(f"Features from input dataset: {original_features}") |
|
|
except Exception as e: |
|
|
print(f"FATAL: Error loading original input dataset from {INPUT_DATASET_DIR}: {e}") |
|
|
traceback.print_exc() |
|
|
exit(1) |
|
|
|
|
|
|
|
|
if dataset is None or original_features is None: |
|
|
print("FATAL: Failed to load any dataset. Exiting.") |
|
|
exit(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*30) |
|
|
print(f"STEP 2: Identifying '{KIMI_MODEL_NAME}' Tasks to Process") |
|
|
print("="*30) |
|
|
pkusafe_tasks_indices = [] |
|
|
other_tasks_indices = [] |
|
|
|
|
|
|
|
|
for idx, row in enumerate(dataset): |
|
|
source_dataset = row.get('source_dataset') |
|
|
processed_in_row = False |
|
|
for i in range(1, 4): |
|
|
model_key = f"model_{i}" |
|
|
response_text_key = f"response_text_{i}" |
|
|
|
|
|
is_target_model_task = row.get(model_key) == KIMI_MODEL_NAME |
|
|
is_unfilled = not row.get(response_text_key) |
|
|
|
|
|
if is_target_model_task and is_unfilled and not processed_in_row: |
|
|
task_info = (idx, i) |
|
|
if source_dataset == 'pkusafe': |
|
|
pkusafe_tasks_indices.append(task_info) |
|
|
else: |
|
|
other_tasks_indices.append(task_info) |
|
|
processed_in_row = True |
|
|
|
|
|
|
|
|
tasks_to_process_indices = pkusafe_tasks_indices + other_tasks_indices |
|
|
total_tasks_to_process = len(tasks_to_process_indices) |
|
|
|
|
|
print(f"Found {len(pkusafe_tasks_indices)} 'pkusafe' tasks and {len(other_tasks_indices)} other tasks requiring '{KIMI_MODEL_NAME}' processing in the loaded dataset.") |
|
|
print(f"Total tasks remaining to process: {total_tasks_to_process}") |
|
|
|
|
|
if total_tasks_to_process == 0: |
|
|
print(f"\nNo remaining tasks to process for {KIMI_MODEL_NAME} based on the loaded dataset.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Exiting.") |
|
|
exit(0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*30) |
|
|
print(f"STEP 3: Loading {KIMI_MODEL_NAME} Model") |
|
|
print("="*30) |
|
|
try: |
|
|
|
|
|
model = KimiAudio(model_path=KIMI_MODEL_PATH, load_detokenizer=True) |
|
|
print(f"{KIMI_MODEL_NAME} model loaded successfully from {KIMI_MODEL_PATH}.") |
|
|
except NameError: |
|
|
print("FATAL: KimiAudio class not defined. Import likely failed earlier.") |
|
|
exit(1) |
|
|
except Exception as e: |
|
|
print(f"Error loading {KIMI_MODEL_NAME} model from {KIMI_MODEL_PATH}: {e}") |
|
|
traceback.print_exc() |
|
|
exit(1) |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*30) |
|
|
print(f"STEP 4: Preparing for {KIMI_MODEL_NAME} Processing") |
|
|
print("="*30) |
|
|
|
|
|
os.makedirs(OUTPUT_AUDIO_ROOT_DIR, exist_ok=True) |
|
|
os.makedirs(OUTPUT_DATASET_DIR, exist_ok=True) |
|
|
|
|
|
fallback_save_dir = os.path.join(os.path.dirname(OUTPUT_DATASET_DIR), f"{KIMI_MODEL_NAME}_checkpoints_fallback") |
|
|
os.makedirs(fallback_save_dir, exist_ok=True) |
|
|
print(f"Audio outputs will be saved in: {OUTPUT_AUDIO_ROOT_DIR}") |
|
|
print(f"Dataset checkpoints will be saved in: {OUTPUT_DATASET_DIR}") |
|
|
print(f"Fallback checkpoints (JSONL) in: {fallback_save_dir}") |
|
|
|
|
|
|
|
|
|
|
|
updated_data = [dict(row) for row in dataset] |
|
|
|
|
|
tasks_processed_count = 0 |
|
|
start_total_time = time.time() |
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "="*30) |
|
|
print(f"STEP 5: Starting {KIMI_MODEL_NAME} Processing Loop ({total_tasks_to_process} Tasks)") |
|
|
print("="*30) |
|
|
|
|
|
pbar = tqdm(enumerate(tasks_to_process_indices), total=total_tasks_to_process, desc=f"Processing {KIMI_MODEL_NAME} Tasks") |
|
|
for loop_idx, (row_idx, slot_i) in pbar: |
|
|
|
|
|
row = updated_data[row_idx] |
|
|
|
|
|
|
|
|
pbar.set_description(f"Processing Row {row_idx}, Slot {slot_i}") |
|
|
|
|
|
prompt_text_key = f"prompt_text_{slot_i}" |
|
|
response_text_key = f"response_text_{slot_i}" |
|
|
response_audio_key = f"response_audio_path_{slot_i}" |
|
|
model_key = f"model_{slot_i}" |
|
|
|
|
|
|
|
|
if row.get(model_key) != KIMI_MODEL_NAME: |
|
|
tqdm.write(f" Skipping Row {row_idx}, Slot {slot_i}: Model is '{row.get(model_key)}', not '{KIMI_MODEL_NAME}'.") |
|
|
continue |
|
|
if row.get(response_text_key): |
|
|
tqdm.write(f" Skipping Row {row_idx}, Slot {slot_i}: Already has response text '{str(row.get(response_text_key))[:50]}...'.") |
|
|
continue |
|
|
|
|
|
|
|
|
prompt_text = row.get(prompt_text_key, "") |
|
|
question_audio_path = row.get('question_audio') |
|
|
metadata_str = row.get('metadata', "{}") |
|
|
source_dataset = row.get('source_dataset') |
|
|
|
|
|
|
|
|
if not question_audio_path or not os.path.exists(question_audio_path): |
|
|
tqdm.write(f" Error: Input audio path missing or invalid for Row {row_idx}: '{question_audio_path}'. Skipping model call.") |
|
|
updated_data[row_idx][response_text_key] = "[ERROR: Missing Input Audio]" |
|
|
updated_data[row_idx][response_audio_key] = None |
|
|
continue |
|
|
|
|
|
|
|
|
kimi_messages = [] |
|
|
|
|
|
|
|
|
if source_dataset == 'ultra' and metadata_str: |
|
|
try: |
|
|
metadata = json.loads(metadata_str) |
|
|
history_str = metadata.get('history', '') |
|
|
if history_str: |
|
|
|
|
|
history_messages_parsed = parse_ultra_history(history_str) |
|
|
kimi_messages.extend(history_messages_parsed) |
|
|
except json.JSONDecodeError: |
|
|
tqdm.write(f" Warning: Could not parse metadata JSON for row {row_idx}") |
|
|
except Exception as hist_e: |
|
|
tqdm.write(f" Warning: Error processing history for row {row_idx}: {hist_e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if prompt_text and prompt_text.strip(): |
|
|
kimi_messages.append({"role": "user", "message_type": "text", "content": prompt_text.strip()}) |
|
|
|
|
|
kimi_messages.append({"role": "user", "message_type": "audio", "content": question_audio_path}) |
|
|
|
|
|
|
|
|
unique_id = str(uuid.uuid4()) |
|
|
output_audio_filename = f"{KIMI_MODEL_NAME}_row{row_idx}_slot{slot_i}_{unique_id}.{OUTPUT_AUDIO_FORMAT}" |
|
|
output_audio_filepath = os.path.join(OUTPUT_AUDIO_ROOT_DIR, output_audio_filename) |
|
|
|
|
|
|
|
|
|
|
|
call_start_time = time.time() |
|
|
response_text, saved_audio_path = call_kimi_model( |
|
|
model, |
|
|
kimi_messages, |
|
|
KIMI_SAMPLING_PARAMS, |
|
|
output_audio_filepath, |
|
|
OUTPUT_AUDIO_SAMPLERATE |
|
|
) |
|
|
call_end_time = time.time() |
|
|
audio_basename = os.path.basename(str(saved_audio_path)) if saved_audio_path else "None" |
|
|
tqdm.write(f" Row {row_idx}, Slot {slot_i}: Finished in {call_end_time - call_start_time:.2f}s. Text: '{str(response_text)[:50]}...', Audio: {audio_basename}") |
|
|
|
|
|
|
|
|
updated_data[row_idx][response_text_key] = response_text |
|
|
updated_data[row_idx][response_audio_key] = saved_audio_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if response_text is not None and not response_text.startswith("[ERROR"): |
|
|
tasks_processed_count += 1 |
|
|
|
|
|
|
|
|
processed_count_in_loop = loop_idx + 1 |
|
|
if processed_count_in_loop % SAVE_EVERY_N_SAMPLES == 0 or processed_count_in_loop == total_tasks_to_process: |
|
|
save_checkpoint(updated_data, original_features, OUTPUT_DATASET_DIR, fallback_save_dir) |
|
|
|
|
|
|
|
|
end_total_time = time.time() |
|
|
print("\n" + "="*30) |
|
|
print(f"STEP 6: {KIMI_MODEL_NAME} Processing Complete - Summary") |
|
|
print("="*30) |
|
|
print(f"Total tasks identified for processing in this run: {total_tasks_to_process}") |
|
|
print(f"Total tasks successfully processed (generated text): {tasks_processed_count}") |
|
|
total_duration = end_total_time - start_total_time |
|
|
print(f"Total processing time for this run: {format_time(total_duration)}") |
|
|
if tasks_processed_count > 0: |
|
|
avg_time = total_duration / tasks_processed_count |
|
|
print(f"Average time per successfully processed task in this run: {avg_time:.2f} seconds") |
|
|
else: |
|
|
print("Average time per task: N/A (no tasks successfully processed in this run)") |
|
|
|
|
|
|
|
|
print("\nPerforming final save of the dataset...") |
|
|
save_checkpoint(updated_data, original_features, OUTPUT_DATASET_DIR, fallback_save_dir) |
|
|
|
|
|
print("\nScript finished.") |