krystv

Fix NameError: make dataset module self-contained with its own CharTokenizer\n\nDuplicates the minimal CharTokenizer in the dataset module to avoid import dependency issues."

9d170ee verified 2 days ago

raw

history blame contribute delete

1.68 kB

	# setup.py - Run this first to fix the import issue in neurolex_v4_dataset.py
	# Usage: python setup.py

	"""
	This script fixes the missing import in neurolex_v4_dataset.py.
	Run once after cloning the repo:
	python setup.py
	"""

	def fix_dataset_import():
	"""Add the missing CharTokenizer import to neurolex_v4_dataset.py"""

	filepath = 'neurolex_v4_dataset.py'

	with open(filepath, 'r') as f:
	content = f.read()

	# Check if fix is already applied
	if 'from neurolex_v4_model import CharTokenizer' in content:
	print("✅ Import already present - no fix needed")
	return

	# Add the import after 'import math'
	old = "import math\n"
	new = "import math\n\nfrom neurolex_v4_model import CharTokenizer\n"

	if old in content:
	content = content.replace(old, new, 1) # Replace only first occurrence
	with open(filepath, 'w') as f:
	f.write(content)
	print("✅ Fixed: Added 'from neurolex_v4_model import CharTokenizer' to neurolex_v4_dataset.py")
	else:
	print("⚠️ Could not find insertion point. Please manually add:")
	print(" from neurolex_v4_model import CharTokenizer")
	print(" after the 'import math' line in neurolex_v4_dataset.py")


	if __name__ == '__main__':
	fix_dataset_import()

	# Verify the fix works
	print("\nVerifying imports...")
	try:
	from neurolex_v4_model import CharTokenizer, NeuroLexV4, create_model
	from neurolex_v4_dataset import NeuroLexDataset, create_dataloaders
	print("✅ All imports successful!")
	except Exception as e:
	print(f"❌ Import error: {e}")