| import os
|
| import tempfile
|
|
|
| from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
|
| from evaluation.utils.shared import assert_and_raise
|
| from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
|
| from openhands.events.event import Event
|
| from openhands.events.observation import AgentDelegateObservation
|
| from openhands.runtime.base import Runtime
|
|
|
| HTML_FILE = """
|
| <!DOCTYPE html>
|
| <html lang="en">
|
| <head>
|
| <meta charset="UTF-8">
|
| <meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| <title>The Ultimate Answer</title>
|
| <style>
|
| body {
|
| display: flex;
|
| justify-content: center;
|
| align-items: center;
|
| height: 100vh;
|
| margin: 0;
|
| background: linear-gradient(to right, #1e3c72, #2a5298);
|
| color: #fff;
|
| font-family: 'Arial', sans-serif;
|
| text-align: center;
|
| }
|
| .container {
|
| text-align: center;
|
| padding: 20px;
|
| background: rgba(255, 255, 255, 0.1);
|
| border-radius: 10px;
|
| box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
|
| }
|
| h1 {
|
| font-size: 36px;
|
| margin-bottom: 20px;
|
| }
|
| p {
|
| font-size: 18px;
|
| margin-bottom: 30px;
|
| }
|
| #showButton {
|
| padding: 10px 20px;
|
| font-size: 16px;
|
| color: #1e3c72;
|
| background: #fff;
|
| border: none;
|
| border-radius: 5px;
|
| cursor: pointer;
|
| transition: background 0.3s ease;
|
| }
|
| #showButton:hover {
|
| background: #f0f0f0;
|
| }
|
| #result {
|
| margin-top: 20px;
|
| font-size: 24px;
|
| }
|
| </style>
|
| </head>
|
| <body>
|
| <div class="container">
|
| <h1>The Ultimate Answer</h1>
|
| <p>Click the button to reveal the answer to life, the universe, and everything.</p>
|
| <button id="showButton">Click me</button>
|
| <div id="result"></div>
|
| </div>
|
| <script>
|
| document.getElementById('showButton').addEventListener('click', function() {
|
| document.getElementById('result').innerText = 'The answer is OpenHands is all you need!';
|
| });
|
| </script>
|
| </body>
|
| </html>
|
| """
|
|
|
|
|
| class Test(BaseIntegrationTest):
|
| INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.'
|
|
|
| @classmethod
|
| def initialize_runtime(cls, runtime: Runtime) -> None:
|
| action = CmdRunAction(command='mkdir -p /workspace')
|
| obs = runtime.run_action(action)
|
| assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
|
|
| action = CmdRunAction(command='mkdir -p /tmp/server')
|
| obs = runtime.run_action(action)
|
| assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
|
|
|
|
|
| with tempfile.TemporaryDirectory() as temp_dir:
|
| temp_file_path = os.path.join(temp_dir, 'index.html')
|
| with open(temp_file_path, 'w') as f:
|
| f.write(HTML_FILE)
|
|
|
| runtime.copy_to(temp_file_path, '/tmp/server')
|
|
|
|
|
| action = CmdRunAction(
|
| command='cd /tmp/server && nohup python3 -m http.server 8000 &'
|
| )
|
| obs = runtime.run_action(action)
|
|
|
| @classmethod
|
| def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
| from openhands.core.logger import openhands_logger as logger
|
|
|
|
|
| message_actions = [
|
| event
|
| for event in histories
|
| if isinstance(
|
| event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
| )
|
| ]
|
| logger.debug(f'Total message-like events: {len(message_actions)}')
|
|
|
| for event in message_actions:
|
| try:
|
| if isinstance(event, AgentDelegateObservation):
|
| content = event.content
|
| elif isinstance(event, AgentFinishAction):
|
| content = event.outputs.get('content', '')
|
| elif isinstance(event, MessageAction):
|
| content = event.content
|
| else:
|
| logger.warning(f'Unexpected event type: {type(event)}')
|
| continue
|
|
|
| if 'OpenHands is all you need!' in content:
|
| return TestResult(success=True)
|
| except Exception as e:
|
| logger.error(f'Error processing event: {e}')
|
|
|
| logger.debug(
|
| f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
| )
|
| return TestResult(
|
| success=False,
|
| reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
| )
|
|
|