enhanced-advanced-tokenizer / simple_working_test.py
9x25dillon's picture
Upload folder using huggingface_hub
498ff31 verified
#!/usr/bin/env python3
"""
Simple working test for enhanced tokenizer
"""
# Test imports
print("Testing imports...")
try:
import torch
print("βœ… PyTorch:", torch.__version__)
except ImportError:
print("❌ PyTorch not available")
try:
import transformers
print("βœ… Transformers:", transformers.__version__)
except ImportError:
print("❌ Transformers not available")
try:
import sentence_transformers
print("βœ… Sentence Transformers available")
except ImportError:
print("❌ Sentence Transformers not available")
try:
import spacy
print("βœ… spaCy available")
except ImportError:
print("❌ spaCy not available")
try:
import sympy
print("βœ… SymPy:", sympy.__version__)
except ImportError:
print("❌ SymPy not available")
try:
import scipy
print("βœ… SciPy:", scipy.__version__)
except ImportError:
print("❌ SciPy not available")
try:
import sklearn
print("βœ… scikit-learn:", sklearn.__version__)
except ImportError:
print("❌ scikit-learn not available")
print("\nTesting basic functionality...")
# Test basic tokenization
text = "Hello world! This is a test."
tokens = text.split()
print(f"βœ… Basic tokenization: {len(tokens)} tokens")
# Test mathematical expression detection
import re
math_pattern = r'\$\$[^$]+\$\$'
math_text = "The equation $$x^2 + y^2 = z^2$$ is fundamental."
math_matches = re.findall(math_pattern, math_text)
print(f"βœ… Math detection: {len(math_matches)} expressions found")
# Test entity detection (simple pattern-based)
entity_pattern = r'\b[A-Z][a-z]+ [A-Z][a-z]+\b'
entity_text = "John Smith works at Google Inc."
entity_matches = re.findall(entity_pattern, entity_text)
print(f"βœ… Entity detection: {len(entity_matches)} entities found")
print("\nπŸŽ‰ Enhanced tokenizer dependencies test complete!")
print("βœ… All basic functionality working!")