Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Test script to verify the fixes for the ASL gloss processing | |
| """ | |
| import asyncio | |
| import re | |
| from vectorizer import Vectorizer | |
| def clean_gloss_token(token): | |
| """ | |
| Clean a gloss token by removing brackets, newlines, and extra whitespace | |
| """ | |
| # Remove brackets and newlines | |
| cleaned = re.sub(r'[\[\]\n\r]', '', token) | |
| # Remove extra whitespace | |
| cleaned = re.sub(r'\s+', ' ', cleaned).strip() | |
| return cleaned.lower() | |
| def test_gloss_parsing(): | |
| """Test the gloss parsing functionality""" | |
| # Sample gloss output from the notebook | |
| sample_gloss = ("ASL [BEAR] [NAME] [OSKI] [LOVE] [HONEY] [BUT] [ALWAYS] " | |
| "[GET-STUCK] [TREE]\n\n[ONE_DAY] [HE] [DISCOVER] [LADDER]\n\n" | |
| "[PROBLEM] [SOLVE] [FINISH]") | |
| print("Original gloss:") | |
| print(sample_gloss) | |
| print("\n" + "="*50 + "\n") | |
| # Split by spaces and clean each token | |
| gloss_tokens = sample_gloss.split() | |
| cleaned_tokens = [] | |
| for token in gloss_tokens: | |
| cleaned = clean_gloss_token(token) | |
| if cleaned: # Only add non-empty tokens | |
| cleaned_tokens.append(cleaned) | |
| print("Cleaned tokens:") | |
| for i, token in enumerate(cleaned_tokens): | |
| print(f"{i+1:2d}. {token}") | |
| return cleaned_tokens | |
| async def test_vectorizer(): | |
| """Test the vectorizer functionality""" | |
| try: | |
| vectorizer = Vectorizer() | |
| # Test with a simple word that should be in the vocabulary | |
| test_words = ["BEAR", "LOVE", "TREE", "HE", "FINISH"] | |
| for word in test_words: | |
| print(f"\nTesting word: {word}") | |
| result = await vectorizer.vector_query_from_supabase(word) | |
| print(f"Result: {result}") | |
| except Exception as e: | |
| print(f"Error testing vectorizer: {e}") | |
| async def main(): | |
| """Main test function""" | |
| print("Testing ASL Gloss Processing Fixes") | |
| print("=" * 50) | |
| # Test 1: Gloss parsing | |
| print("\n1. Testing gloss parsing...") | |
| cleaned_tokens = test_gloss_parsing() | |
| print(f"Total cleaned tokens: {len(cleaned_tokens)}") | |
| # Test 2: Vectorizer (if environment is set up) | |
| print("\n2. Testing vectorizer...") | |
| await test_vectorizer() | |
| print("\n" + "=" * 50) | |
| print("Test completed!") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |