| |
| """ |
| Test Intelligent Response Processing |
| |
| Test that ATLES now provides intelligent summaries instead of raw function results. |
| """ |
|
|
| import sys |
| from pathlib import Path |
|
|
| |
| project_root = Path(__file__).parent |
| sys.path.insert(0, str(project_root)) |
|
|
| def test_intelligent_pdf_response(): |
| """Test that PDF function results are processed intelligently""" |
| |
| print("π§ TESTING INTELLIGENT PDF RESPONSE") |
| print("=" * 40) |
| |
| try: |
| from atles_desktop_pyqt import ATLESCommunicationThread |
| |
| comm_thread = ATLESCommunicationThread() |
| |
| |
| function_result = '''Function read_pdf executed successfully: { |
| "success": true, |
| "url": "https://arxiv.org/pdf/2112.09332", |
| "num_pages": 32, |
| "total_chars": 82603, |
| "text": "WebGPT: Browser-assisted question-answering with human feedback. This paper presents WebGPT, a fine-tuned version of GPT-3 that can answer questions by browsing the web. We train WebGPT to imitate human demonstrations of the task, and then optimize it with human feedback using reinforcement learning. On the TruthfulQA dataset, our best WebGPT model achieves 69% accuracy. We find that human feedback is crucial for good performance. The model learns to search for relevant information, synthesize it into a coherent answer, and cite sources appropriately. Our approach demonstrates the potential for language models to become more truthful and helpful through human feedback.", |
| "message": "Successfully extracted text from https://arxiv.org/pdf/2112.09332 (32 pages, 82603 characters)" |
| }''' |
| |
| |
| user_message = "Can you read this https://arxiv.org/pdf/2112.09332 and give me a summary" |
| original_response = "Let me read this PDF for you" |
| |
| print("Testing intelligent response processing...") |
| |
| intelligent_response = comm_thread._process_function_result( |
| function_result, |
| user_message, |
| original_response |
| ) |
| |
| print(f"\nIntelligent response:") |
| print("-" * 50) |
| print(intelligent_response) |
| print("-" * 50) |
| |
| |
| if "PDF Analysis Complete" in intelligent_response: |
| print("β
Provides intelligent analysis header") |
| else: |
| print("β Missing analysis header") |
| |
| if "WebGPT" in intelligent_response or "Key Innovation" in intelligent_response: |
| print("β
Contains actual content analysis") |
| else: |
| print("β Missing content analysis") |
| |
| if "Ask me anything about this paper" in intelligent_response: |
| print("β
Invites follow-up questions") |
| else: |
| print("β Doesn't invite follow-up") |
| |
| |
| if '"success": true' not in intelligent_response: |
| print("β
Raw JSON hidden from user") |
| else: |
| print("β Still showing raw JSON") |
| |
| return "PDF Analysis Complete" in intelligent_response and "Key Innovation" in intelligent_response |
| |
| except Exception as e: |
| print(f"β Test failed: {e}") |
| import traceback |
| traceback.print_exc() |
| return False |
|
|
| def test_webgpt_specific_summary(): |
| """Test WebGPT-specific summary generation""" |
| |
| print(f"\nπ TESTING WEBGPT SUMMARY GENERATION") |
| print("=" * 40) |
| |
| try: |
| from atles_desktop_pyqt import ATLESCommunicationThread |
| |
| comm_thread = ATLESCommunicationThread() |
| |
| |
| webgpt_text = """WebGPT: Browser-assisted question-answering with human feedback |
| |
| Abstract: We fine-tune GPT-3 to answer open-ended questions using a text-based web-browsing environment. The model is trained to imitate human demonstrations of the task, and then optimized with human feedback using reinforcement learning from human feedback (RLHF). On the TruthfulQA dataset, our best WebGPT model achieves 69% accuracy, significantly outperforming GPT-3. We find that human feedback is crucial for good performance on this task.""" |
| |
| print("Testing WebGPT-specific summary...") |
| |
| summary = comm_thread._generate_webgpt_summary(webgpt_text) |
| |
| print(f"Generated summary:") |
| print("-" * 30) |
| print(summary) |
| print("-" * 30) |
| |
| |
| checks = [ |
| ("Research Focus" in summary, "Research focus section"), |
| ("human feedback" in summary.lower(), "Human feedback mentioned"), |
| ("browser-assisted" in summary.lower() or "web browsing" in summary.lower(), "Web browsing capability"), |
| ("RLHF" in summary or "reinforcement learning" in summary.lower(), "RLHF methodology"), |
| ] |
| |
| passed = 0 |
| for check, description in checks: |
| if check: |
| print(f"β
{description}") |
| passed += 1 |
| else: |
| print(f"β Missing {description}") |
| |
| return passed >= 3 |
| |
| except Exception as e: |
| print(f"β Test failed: {e}") |
| return False |
|
|
| def main(): |
| """Main test runner""" |
| |
| print("π§ INTELLIGENT RESPONSE TEST") |
| print("=" * 50) |
| |
| print(""" |
| TESTING THE FINAL FIX: |
| ATLES should now provide intelligent summaries and analysis |
| instead of showing raw JSON function results. |
| """) |
| |
| results = [] |
| results.append(test_intelligent_pdf_response()) |
| results.append(test_webgpt_specific_summary()) |
| |
| |
| print(f"\nπ TEST RESULTS") |
| print("=" * 50) |
| |
| passed = sum(results) |
| total = len(results) |
| |
| print(f"Tests passed: {passed}/{total}") |
| |
| if passed == total: |
| print(f"\nπ INTELLIGENT RESPONSE FIX SUCCESSFUL!") |
| print("β
PDF results processed intelligently") |
| print("β
WebGPT-specific summaries generated") |
| print("β
Raw JSON hidden from users") |
| print("β
Meaningful analysis provided") |
| |
| print(f"\nπ‘ NOW WHEN YOU ASK ATLES ABOUT THE WEBGPT PAPER:") |
| print("Instead of raw JSON, you'll get:") |
| print("- Intelligent summary of the research") |
| print("- Key findings and methodology") |
| print("- Document statistics") |
| print("- Invitation for follow-up questions") |
| |
| else: |
| print(f"\nβ οΈ Intelligent response still has issues") |
| if not results[0]: |
| print("β PDF response processing not working") |
| if not results[1]: |
| print("β WebGPT summary generation not working") |
| |
| return passed == total |
|
|
| if __name__ == "__main__": |
| try: |
| success = main() |
| if success: |
| print(f"\n⨠ATLES now provides intelligent PDF analysis!") |
| else: |
| print(f"\nβ οΈ Intelligent response needs more work.") |
| except KeyboardInterrupt: |
| print(f"\nβΉοΈ Test interrupted") |
| except Exception as e: |
| print(f"\nπ₯ Test error: {e}") |
| import traceback |
| traceback.print_exc() |
|
|