| | import hashlib
|
| |
|
| | from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
|
| | from openhands.events.action import (
|
| | AgentFinishAction,
|
| | FileWriteAction,
|
| | MessageAction,
|
| | )
|
| | from openhands.events.event import Event
|
| | from openhands.events.observation import AgentDelegateObservation
|
| | from openhands.runtime.base import Runtime
|
| |
|
| |
|
| | class Test(BaseIntegrationTest):
|
| | INSTRUCTION = 'Execute the python script /workspace/python_script.py with input "John" and "25" and tell me the secret number.'
|
| | SECRET_NUMBER = int(hashlib.sha256(str(25).encode()).hexdigest()[:8], 16) % 1000
|
| |
|
| | @classmethod
|
| | def initialize_runtime(cls, runtime: Runtime) -> None:
|
| | from openhands.core.logger import openhands_logger as logger
|
| |
|
| | action = FileWriteAction(
|
| | path='/workspace/python_script.py',
|
| | content=(
|
| | 'name = input("Enter your name: "); age = input("Enter your age: "); '
|
| | 'import hashlib; secret = int(hashlib.sha256(str(age).encode()).hexdigest()[:8], 16) % 1000; '
|
| | 'print(f"Hello {name}, you are {age} years old. Tell you a secret number: {secret}")'
|
| | ),
|
| | )
|
| | logger.info(action, extra={'msg_type': 'ACTION'})
|
| | observation = runtime.run_action(action)
|
| | logger.info(observation, extra={'msg_type': 'OBSERVATION'})
|
| |
|
| | @classmethod
|
| | def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
|
| | from openhands.core.logger import openhands_logger as logger
|
| |
|
| |
|
| | message_actions = [
|
| | event
|
| | for event in histories
|
| | if isinstance(
|
| | event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
|
| | )
|
| | ]
|
| | logger.info(f'Total message-like events: {len(message_actions)}')
|
| |
|
| | for event in message_actions:
|
| | try:
|
| | if isinstance(event, AgentDelegateObservation):
|
| | content = event.content
|
| | elif isinstance(event, AgentFinishAction):
|
| | content = event.outputs.get('content', '')
|
| | if event.thought:
|
| | content += f'\n\n{event.thought}'
|
| | elif isinstance(event, MessageAction):
|
| | content = event.content
|
| | else:
|
| | logger.warning(f'Unexpected event type: {type(event)}')
|
| | continue
|
| |
|
| | if str(cls.SECRET_NUMBER) in content:
|
| | return TestResult(success=True)
|
| | except Exception as e:
|
| | logger.error(f'Error processing event: {e}')
|
| |
|
| | logger.debug(
|
| | f'Total messages: {len(message_actions)}. Messages: {message_actions}'
|
| | )
|
| | return TestResult(
|
| | success=False,
|
| | reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
|
| | )
|
| |
|