#!/usr/bin/env python3 """Test script for the custom VerbatimOutputParser.""" import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) from app.core.custom_parser import VerbatimOutputParser, VERBATIM_RESPONSE_CONFIG from llama_index.core.agent.react.types import ResponseReasoningStep def test_verbatim_detection(): """Test that the parser correctly identifies responses that should be verbatim.""" parser = VerbatimOutputParser() # Test cases - responses that SHOULD be returned verbatim verbatim_test_cases = [ # Booking confirmation """Perfect! ✅ **Meeting confirmed: licensing** 📅 **Tuesday, January 14, 2025 at 2:30 PM** (15 minutes)

🎥 Google Meet: Join here

📋 Meeting IDs:
Meeting ID: 0114-1430-15m
Calendar ID: abc123def456

📧 Email invitations are being sent in the background...

""", # Authentication error "I need to reconnect to your calendar. Please visit http://localhost:8000/auth/google to re-authenticate, then try again.", # Cancellation success "✅ Meeting cancelled successfully. The appointment has been removed from both calendars." ] # Test cases that should NOT be returned verbatim (normal ReAct responses) normal_test_cases = [ "I need your email address to send the Google Meet invitation.", "Let me check your availability for that time.", "What topic would you like to discuss in the meeting?", "Thought: I need to use the check_availability tool to see what times are free." ] print("Testing verbatim detection...") # Test verbatim cases for i, test_case in enumerate(verbatim_test_cases): should_be_verbatim = parser.should_return_verbatim(test_case) status = "✅ PASS" if should_be_verbatim else "❌ FAIL" print(f" Verbatim test {i+1}: {status}") if not should_be_verbatim: print(f" Expected verbatim but got normal for: {test_case[:50]}...") # Test normal cases for i, test_case in enumerate(normal_test_cases): should_be_verbatim = parser.should_return_verbatim(test_case) status = "✅ PASS" if not should_be_verbatim else "❌ FAIL" print(f" Normal test {i+1}: {status}") if should_be_verbatim: print(f" Expected normal but got verbatim for: {test_case}") def test_response_extraction(): """Test the extraction of clean responses from tool outputs.""" parser = VerbatimOutputParser() # Test extracting from Observation format observation_output = """Thought: I need to create the appointment now. Action: create_appointment Action Input: {"title": "licensing", "date_string": "today", "time_string": "2:30 PM", "duration_minutes": 15} Observation: Perfect! ✅ **Meeting confirmed: licensing** 📅 **Tuesday, January 14, 2025 at 2:30 PM** (15 minutes)

🎥 Google Meet: Join here

Thought: Perfect! The meeting has been booked successfully.""" extracted = parser.extract_verbatim_response(observation_output) print(f"Extracted response length: {len(extracted)}") print(f"Starts with 'Perfect! ✅': {'Perfect! ✅' in extracted}") print(f"Contains Google Meet: {'🎥 Google Meet' in extracted}") def test_full_parsing(): """Test the complete parse method functionality.""" parser = VerbatimOutputParser() # Test parsing a booking confirmation booking_output = """Observation: Perfect! ✅ **Meeting confirmed: licensing** 📅 **Tuesday, January 14, 2025 at 2:30 PM** (15 minutes) """ result = parser.parse(booking_output) print(f"Parse result type: {type(result)}") print(f"Is ResponseReasoningStep: {isinstance(result, ResponseReasoningStep)}") if isinstance(result, ResponseReasoningStep): print(f"Response contains booking confirmation: {'✅' in result.response}") print(f"Response length: {len(result.response)}") print(f"Thought: {result.thought}") def main(): """Run all tests.""" print("🧪 Testing Custom VerbatimOutputParser\n") print("=" * 60) test_verbatim_detection() print("\n" + "=" * 60) test_response_extraction() print("\n" + "=" * 60) test_full_parsing() print("\n🎉 Testing complete!") if __name__ == "__main__": main()