enhanced-advanced-tokenizer / simple_working_test.py

Upload folder using huggingface_hub

498ff31 verified 3 months ago

1.89 kB

	#!/usr/bin/env python3
	"""
	Simple working test for enhanced tokenizer
	"""

	# Test imports
	print("Testing imports...")

	try:
	import torch
	print("✅ PyTorch:", torch.__version__)
	except ImportError:
	print("❌ PyTorch not available")

	try:
	import transformers
	print("✅ Transformers:", transformers.__version__)
	except ImportError:
	print("❌ Transformers not available")

	try:
	import sentence_transformers
	print("✅ Sentence Transformers available")
	except ImportError:
	print("❌ Sentence Transformers not available")

	try:
	import spacy
	print("✅ spaCy available")
	except ImportError:
	print("❌ spaCy not available")

	try:
	import sympy
	print("✅ SymPy:", sympy.__version__)
	except ImportError:
	print("❌ SymPy not available")

	try:
	import scipy
	print("✅ SciPy:", scipy.__version__)
	except ImportError:
	print("❌ SciPy not available")

	try:
	import sklearn
	print("✅ scikit-learn:", sklearn.__version__)
	except ImportError:
	print("❌ scikit-learn not available")

	print("\nTesting basic functionality...")

	# Test basic tokenization
	text = "Hello world! This is a test."
	tokens = text.split()
	print(f"✅ Basic tokenization: {len(tokens)} tokens")

	# Test mathematical expression detection
	import re
	math_pattern = r'\$\$[^$]+\$\$'
	math_text = "The equation $$x^2 + y^2 = z^2$$ is fundamental."
	math_matches = re.findall(math_pattern, math_text)
	print(f"✅ Math detection: {len(math_matches)} expressions found")

	# Test entity detection (simple pattern-based)
	entity_pattern = r'\b[A-Z][a-z]+ [A-Z][a-z]+\b'
	entity_text = "John Smith works at Google Inc."
	entity_matches = re.findall(entity_pattern, entity_text)
	print(f"✅ Entity detection: {len(entity_matches)} entities found")

	print("\n🎉 Enhanced tokenizer dependencies test complete!")
	print("✅ All basic functionality working!")