1bnjmn3
/

QGAN_Project

Model card Files Files and versions

QGAN_Project / vG.0.1 /data-detect

1bnjmn3's picture

Add files using upload-large-folder tool

0f755ec verified 6 months ago

history blame contribute delete

1.98 kB

	import os
	import pandas as pd

	# Path where you unzipped the files
	data_dir = 'data'

	def process_tokamak_data(directory):
	# Check what files we actually have
	try:
	files = [f for f in os.listdir(directory) if not f.startswith('.')]
	print(f"📂 Files found in '{directory}': {files}")
	except FileNotFoundError:
	print(f"❌ Error: Folder '{directory}' not found. Please create it and unzip files there.")
	return None

	data_frames = []

	for f in files:
	file_path = os.path.join(directory, f)

	# Case A: It's a CSV (Likely feature vectors)
	if f.endswith('.csv') and 'Sample' not in f:
	try:
	print(f" Reading {f}...")
	df = pd.read_csv(file_path)
	# Add a column to track which machine this came from (crucial for analysis)
	df['machine'] = f.split('.')[0]
	data_frames.append(df)
	print(f" ✅ Loaded {f}: {df.shape}")
	except Exception as e:
	print(f" ⚠️ Could not read {f}: {e}")

	# Case B: It's a ZIP inside a ZIP (common in Zindi)
	elif f.endswith('.zip'):
	print(f" ℹ️ Found nested zip {f}. Please unzip this one too.")

	# Merge everything
	if data_frames:
	full_df = pd.concat(data_frames, ignore_index=True)
	# Shuffle the data to mix HL-2A and J-TEXT
	full_df = full_df.sample(frac=1, random_state=42).reset_index(drop=True)

	output_file = 'real_tokamak_data_merged.csv'
	full_df.to_csv(output_file, index=False)
	print(f"\n🎉 Success! Combined dataset saved to '{output_file}'")
	print(f"Total shape: {full_df.shape}")
	return full_df
	else:
	print("\n❌ No usable CSV data found. Check if the zips contained sub-folders or HDF5 files.")
	return None

	# Run the function
	df = process_tokamak_data(data_dir)