#!/usr/bin/env python3 """ Simple inference example for Turnlet BERT Multilingual EOU model Demonstrates both PyTorch and ONNX usage """ import argparse import numpy as np def test_pytorch(text, threshold=0.86): """Test using PyTorch model""" from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch print("🔥 Loading PyTorch model...") model = AutoModelForSequenceClassification.from_pretrained(".") tokenizer = AutoTokenizer.from_pretrained(".") model.eval() print(f"\n📝 Input: {text}") # Tokenize and predict inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128) with torch.no_grad(): outputs = model(**inputs) probs = torch.softmax(outputs.logits, dim=-1) prob_eou = probs[0][1].item() is_eou = prob_eou > threshold print(f"✅ EOU Probability: {prob_eou:.4f}") print(f"🎯 Prediction: {'EOU (End of Utterance)' if is_eou else 'Non-EOU (Incomplete)'}") print(f"📊 Threshold: {threshold}") return is_eou, prob_eou def test_onnx(text, model_path="bert_model_optimized_dynamic_int8.onnx", threshold=0.86): """Test using ONNX quantized model (faster)""" import onnxruntime as ort from transformers import AutoTokenizer print("⚡ Loading ONNX Quantized INT8 model...") # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(".") session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider']) print(f"\n📝 Input: {text}") # Tokenize inputs = tokenizer(text, padding="max_length", max_length=128, truncation=True, return_tensors="np") # Prepare ONNX inputs ort_inputs = { 'input_ids': inputs['input_ids'].astype(np.int64), 'attention_mask': inputs['attention_mask'].astype(np.int64) } # Run inference import time start = time.time() outputs = session.run(None, ort_inputs) inference_time = (time.time() - start) * 1000 logits = outputs[0][0] probs = np.exp(logits) / np.sum(np.exp(logits)) prob_eou = probs[1] is_eou = prob_eou > threshold print(f"✅ EOU Probability: {prob_eou:.4f}") print(f"🎯 Prediction: {'EOU (End of Utterance)' if is_eou else 'Non-EOU (Incomplete)'}") print(f"📊 Threshold: {threshold}") print(f"⚡ Inference Time: {inference_time:.2f}ms") return is_eou, prob_eou def test_multiple_examples(use_onnx=True): """Test multiple examples in different languages""" examples = [ ("Thanks for your help!", "en", True), ("I need a train to Cambridge.", "en", True), ("What time does the", "en", False), ("धन्यवाद!", "hi", True), # Hindi: "Thank you!" ("मुझे मदद चाहिए", "hi", False), # Hindi: "I need help" (incomplete) ("¡Gracias por tu ayuda!", "es", True), # Spanish: "Thanks for your help!" ("Necesito un tren a", "es", False), # Spanish: "I need a train to" (incomplete) ] print("\n" + "="*70) print("🌐 MULTILINGUAL EOU DETECTION TEST") print("="*70) correct = 0 total = len(examples) for text, lang, expected_eou in examples: print(f"\n{'─'*70}") print(f"🌍 Language: {lang.upper()}") if use_onnx: is_eou, prob = test_onnx(text, threshold=0.86) else: is_eou, prob = test_pytorch(text, threshold=0.86) expected_str = "EOU" if expected_eou else "Non-EOU" predicted_str = "EOU" if is_eou else "Non-EOU" is_correct = is_eou == expected_eou correct += is_correct status = "✅ CORRECT" if is_correct else "❌ INCORRECT" print(f"💡 Expected: {expected_str} | Got: {predicted_str} | {status}") print(f"\n{'='*70}") print(f"📊 ACCURACY: {correct}/{total} ({correct/total*100:.1f}%)") print(f"{'='*70}\n") def interactive_mode(use_onnx=True, threshold=0.86): """Interactive mode - continuously ask for input and predict""" import onnxruntime as ort from transformers import AutoTokenizer import time print("\n" + "="*70) print("🎮 INTERACTIVE MODE - Multilingual EOU Detection") print("="*70) print("🌐 Supported languages: English, Hindi, Spanish") print("📊 Threshold: {:.2f}".format(threshold)) if use_onnx: print("⚡ Using: ONNX Quantized INT8 model (fast)") tokenizer = AutoTokenizer.from_pretrained(".") session = ort.InferenceSession("bert_model_optimized_dynamic_int8.onnx", providers=['CPUExecutionProvider']) else: print("🔥 Using: PyTorch model") from transformers import AutoModelForSequenceClassification import torch tokenizer = AutoTokenizer.from_pretrained(".") model = AutoModelForSequenceClassification.from_pretrained(".") model.eval() print("\n💡 Type your text and press Enter to get EOU prediction") print("💡 Type 'quit' or 'exit' to stop") print("💡 Type 'examples' to see sample inputs") print("="*70 + "\n") sample_count = 0 while True: try: # Get user input user_input = input("📝 Enter text: ").strip() if not user_input: continue # Check for exit commands if user_input.lower() in ['quit', 'exit', 'q']: print("\n👋 Goodbye! Tested {} samples.".format(sample_count)) break # Show examples if user_input.lower() == 'examples': print("\n📚 Example inputs to try:") print(" English:") print(" - 'Thanks for your help!' (EOU)") print(" - 'I need to book a' (Non-EOU)") print(" Hindi:") print(" - 'धन्यवाद!' (Thank you! - EOU)") print(" - 'मुझे मदद चाहिए' (I need help - could be EOU)") print(" Spanish:") print(" - '¡Muchas gracias!' (Thank you! - EOU)") print(" - 'Necesito un tren a' (I need a train to - Non-EOU)") print() continue sample_count += 1 print() # Tokenize inputs = tokenizer(user_input, padding="max_length", max_length=128, truncation=True, return_tensors="np" if use_onnx else "pt") # Predict start = time.time() if use_onnx: # ONNX inference ort_inputs = { 'input_ids': inputs['input_ids'].astype(np.int64), 'attention_mask': inputs['attention_mask'].astype(np.int64) } outputs = session.run(None, ort_inputs) logits = outputs[0][0] probs = np.exp(logits) / np.sum(np.exp(logits)) prob_eou = probs[1] else: # PyTorch inference import torch with torch.no_grad(): outputs = model(**inputs) probs = torch.softmax(outputs.logits, dim=-1) prob_eou = probs[0][1].item() inference_time = (time.time() - start) * 1000 # Determine prediction is_eou = prob_eou > threshold # Display results with color coding print("─" * 70) if is_eou: print("✅ Prediction: EOU (End of Utterance)") print(" └─ The user has likely finished their thought") else: print("⏳ Prediction: Non-EOU (Incomplete)") print(" └─ The user may still be speaking") print(f"📊 Confidence: {prob_eou:.4f} (threshold: {threshold})") print(f"⚡ Inference time: {inference_time:.2f}ms") # Confidence bar bar_length = 40 filled = int(bar_length * prob_eou) bar = "█" * filled + "░" * (bar_length - filled) print(f"📈 [{bar}] {prob_eou*100:.1f}%") print("─" * 70 + "\n") except KeyboardInterrupt: print("\n\n👋 Interrupted! Tested {} samples. Goodbye!".format(sample_count)) break except Exception as e: print(f"❌ Error: {e}\n") continue def main(): parser = argparse.ArgumentParser(description="Test Turnlet BERT Multilingual EOU model") parser.add_argument("--text", type=str, help="Text to classify") parser.add_argument("--threshold", type=float, default=0.86, help="EOU threshold (default: 0.86)") parser.add_argument("--pytorch", action="store_true", help="Use PyTorch instead of ONNX") parser.add_argument("--test-suite", action="store_true", help="Run full test suite") parser.add_argument("--interactive", "-i", action="store_true", help="Run in interactive mode") args = parser.parse_args() if args.interactive: interactive_mode(use_onnx=not args.pytorch, threshold=args.threshold) elif args.test_suite: test_multiple_examples(use_onnx=not args.pytorch) elif args.text: if args.pytorch: test_pytorch(args.text, args.threshold) else: test_onnx(args.text, threshold=args.threshold) else: # Default to interactive mode if no arguments provided print("No arguments provided. Starting interactive mode...") print("(Use --help to see all options)\n") interactive_mode(use_onnx=True, threshold=args.threshold) if __name__ == "__main__": main()