| import os |
| import pandas as pd |
|
|
| |
| data_dir = 'data' |
|
|
| def process_tokamak_data(directory): |
| |
| try: |
| files = [f for f in os.listdir(directory) if not f.startswith('.')] |
| print(f"📂 Files found in '{directory}': {files}") |
| except FileNotFoundError: |
| print(f"❌ Error: Folder '{directory}' not found. Please create it and unzip files there.") |
| return None |
|
|
| data_frames = [] |
| |
| for f in files: |
| file_path = os.path.join(directory, f) |
| |
| |
| if f.endswith('.csv') and 'Sample' not in f: |
| try: |
| print(f" Reading {f}...") |
| df = pd.read_csv(file_path) |
| |
| df['machine'] = f.split('.')[0] |
| data_frames.append(df) |
| print(f" ✅ Loaded {f}: {df.shape}") |
| except Exception as e: |
| print(f" ⚠️ Could not read {f}: {e}") |
| |
| |
| elif f.endswith('.zip'): |
| print(f" ℹ️ Found nested zip {f}. Please unzip this one too.") |
|
|
| |
| if data_frames: |
| full_df = pd.concat(data_frames, ignore_index=True) |
| |
| full_df = full_df.sample(frac=1, random_state=42).reset_index(drop=True) |
| |
| output_file = 'real_tokamak_data_merged.csv' |
| full_df.to_csv(output_file, index=False) |
| print(f"\n🎉 Success! Combined dataset saved to '{output_file}'") |
| print(f"Total shape: {full_df.shape}") |
| return full_df |
| else: |
| print("\n❌ No usable CSV data found. Check if the zips contained sub-folders or HDF5 files.") |
| return None |
|
|
| |
| df = process_tokamak_data(data_dir) |