|
|
|
|
|
""" |
|
|
Simple working test for enhanced tokenizer |
|
|
""" |
|
|
|
|
|
|
|
|
print("Testing imports...") |
|
|
|
|
|
try: |
|
|
import torch |
|
|
print("β
PyTorch:", torch.__version__) |
|
|
except ImportError: |
|
|
print("β PyTorch not available") |
|
|
|
|
|
try: |
|
|
import transformers |
|
|
print("β
Transformers:", transformers.__version__) |
|
|
except ImportError: |
|
|
print("β Transformers not available") |
|
|
|
|
|
try: |
|
|
import sentence_transformers |
|
|
print("β
Sentence Transformers available") |
|
|
except ImportError: |
|
|
print("β Sentence Transformers not available") |
|
|
|
|
|
try: |
|
|
import spacy |
|
|
print("β
spaCy available") |
|
|
except ImportError: |
|
|
print("β spaCy not available") |
|
|
|
|
|
try: |
|
|
import sympy |
|
|
print("β
SymPy:", sympy.__version__) |
|
|
except ImportError: |
|
|
print("β SymPy not available") |
|
|
|
|
|
try: |
|
|
import scipy |
|
|
print("β
SciPy:", scipy.__version__) |
|
|
except ImportError: |
|
|
print("β SciPy not available") |
|
|
|
|
|
try: |
|
|
import sklearn |
|
|
print("β
scikit-learn:", sklearn.__version__) |
|
|
except ImportError: |
|
|
print("β scikit-learn not available") |
|
|
|
|
|
print("\nTesting basic functionality...") |
|
|
|
|
|
|
|
|
text = "Hello world! This is a test." |
|
|
tokens = text.split() |
|
|
print(f"β
Basic tokenization: {len(tokens)} tokens") |
|
|
|
|
|
|
|
|
import re |
|
|
math_pattern = r'\$\$[^$]+\$\$' |
|
|
math_text = "The equation $$x^2 + y^2 = z^2$$ is fundamental." |
|
|
math_matches = re.findall(math_pattern, math_text) |
|
|
print(f"β
Math detection: {len(math_matches)} expressions found") |
|
|
|
|
|
|
|
|
entity_pattern = r'\b[A-Z][a-z]+ [A-Z][a-z]+\b' |
|
|
entity_text = "John Smith works at Google Inc." |
|
|
entity_matches = re.findall(entity_pattern, entity_text) |
|
|
print(f"β
Entity detection: {len(entity_matches)} entities found") |
|
|
|
|
|
print("\nπ Enhanced tokenizer dependencies test complete!") |
|
|
print("β
All basic functionality working!") |
|
|
|