breakout / src /envs /atari_env /test_atari_docker.sh
Zach Wentz
๐Ÿค– Deploy atari_env environment - 2025-10-19 22:32:33
d0ae716
#!/bin/bash
# Comprehensive Docker test for Atari environment
# Tests: Build, Start, Health, Reset, Step, State, Cleanup
set -e # Exit on error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
IMAGE_NAME="atari-env"
IMAGE_TAG="test"
CONTAINER_NAME="atari-env-test"
PORT="8765" # Use non-standard port to avoid conflicts
HEALTH_RETRIES=30
HEALTH_DELAY=2
# Cleanup function
cleanup() {
echo -e "\n${BLUE}Cleaning up...${NC}"
docker stop ${CONTAINER_NAME} 2>/dev/null || true
docker rm ${CONTAINER_NAME} 2>/dev/null || true
echo -e "${GREEN}โœ“${NC} Cleanup complete"
}
# Set trap to cleanup on exit
trap cleanup EXIT
# Header
echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"
echo " ATARI ENVIRONMENT DOCKER TEST"
echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"
echo ""
# Check prerequisites
echo -e "${BLUE}Checking prerequisites...${NC}"
if ! command -v docker &> /dev/null; then
echo -e "${RED}โœ—${NC} Docker is not installed"
exit 1
fi
echo -e "${GREEN}โœ“${NC} Docker is installed"
if ! command -v curl &> /dev/null; then
echo -e "${RED}โœ—${NC} curl is not installed"
exit 1
fi
echo -e "${GREEN}โœ“${NC} curl is installed"
# Check if we're in the right directory
if [ ! -f "src/envs/atari_env/server/Dockerfile" ]; then
echo -e "${RED}โœ—${NC} Must run from OpenEnv root directory"
exit 1
fi
echo -e "${GREEN}โœ“${NC} In correct directory"
# Step 1: Build Docker image
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 1: Building Docker Image${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo "Building ${IMAGE_NAME}:${IMAGE_TAG}..."
if docker build -f src/envs/atari_env/server/Dockerfile -t ${IMAGE_NAME}:${IMAGE_TAG} . 2>&1 | tee /tmp/atari_build.log | tail -n 20; then
echo -e "${GREEN}โœ“${NC} Docker image built successfully"
else
echo -e "${RED}โœ—${NC} Docker build failed"
echo "See /tmp/atari_build.log for full output"
exit 1
fi
# Check image exists
if docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} &> /dev/null; then
IMAGE_SIZE=$(docker image inspect ${IMAGE_NAME}:${IMAGE_TAG} --format='{{.Size}}' | awk '{print $1/1024/1024}')
echo -e "${GREEN}โœ“${NC} Image size: ${IMAGE_SIZE} MB"
else
echo -e "${RED}โœ—${NC} Image not found after build"
exit 1
fi
# Step 2: Start container
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 2: Starting Container${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
# Clean up any existing container
docker rm -f ${CONTAINER_NAME} 2>/dev/null || true
echo "Starting container on port ${PORT}..."
docker run -d \
--name ${CONTAINER_NAME} \
-p ${PORT}:8000 \
-e ATARI_GAME=pong \
-e ATARI_OBS_TYPE=ram \
-e ATARI_FRAMESKIP=4 \
${IMAGE_NAME}:${IMAGE_TAG}
if [ $? -eq 0 ]; then
echo -e "${GREEN}โœ“${NC} Container started: ${CONTAINER_NAME}"
else
echo -e "${RED}โœ—${NC} Failed to start container"
exit 1
fi
# Wait for container to be running
sleep 2
if docker ps | grep -q ${CONTAINER_NAME}; then
echo -e "${GREEN}โœ“${NC} Container is running"
else
echo -e "${RED}โœ—${NC} Container is not running"
docker logs ${CONTAINER_NAME}
exit 1
fi
# Step 3: Wait for health check
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 3: Waiting for Server${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo "Waiting for server to be ready (timeout: ${HEALTH_RETRIES}s)..."
for i in $(seq 1 ${HEALTH_RETRIES}); do
if curl -s http://localhost:${PORT}/health > /dev/null 2>&1; then
echo -e "${GREEN}โœ“${NC} Server is ready (${i}s)"
break
fi
if [ $i -eq ${HEALTH_RETRIES} ]; then
echo -e "${RED}โœ—${NC} Server did not become ready in time"
echo "Container logs:"
docker logs ${CONTAINER_NAME}
exit 1
fi
echo -n "."
sleep ${HEALTH_DELAY}
done
# Step 4: Test health endpoint
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 4: Testing Health Endpoint${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
HEALTH_RESPONSE=$(curl -s http://localhost:${PORT}/health)
echo "Response: ${HEALTH_RESPONSE}"
if echo "${HEALTH_RESPONSE}" | grep -q "healthy"; then
echo -e "${GREEN}โœ“${NC} Health endpoint working"
else
echo -e "${RED}โœ—${NC} Health endpoint failed"
exit 1
fi
# Step 5: Test reset endpoint
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 5: Testing Reset Endpoint${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
RESET_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/reset -H "Content-Type: application/json" -d '{}')
if [ -z "${RESET_RESPONSE}" ]; then
echo -e "${RED}โœ—${NC} Reset endpoint returned empty response"
docker logs ${CONTAINER_NAME} | tail -20
exit 1
fi
echo "Response (first 200 chars): ${RESET_RESPONSE:0:200}..."
# Check if response contains expected fields
if echo "${RESET_RESPONSE}" | grep -q "observation" && \
echo "${RESET_RESPONSE}" | grep -q "screen" && \
echo "${RESET_RESPONSE}" | grep -q "legal_actions"; then
echo -e "${GREEN}โœ“${NC} Reset endpoint working"
# Extract some info
SCREEN_LEN=$(echo "${RESET_RESPONSE}" | grep -o '"screen":\[[^]]*\]' | wc -c)
echo " Screen data length: ${SCREEN_LEN} chars"
else
echo -e "${RED}โœ—${NC} Reset response missing required fields"
echo "Full response: ${RESET_RESPONSE}"
exit 1
fi
# Step 6: Test step endpoint
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 6: Testing Step Endpoint${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
STEP_PAYLOAD='{"action": {"action_id": 0, "game_name": "pong"}}'
STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
if [ -z "${STEP_RESPONSE}" ]; then
echo -e "${RED}โœ—${NC} Step endpoint returned empty response"
docker logs ${CONTAINER_NAME} | tail -20
exit 1
fi
echo "Response (first 200 chars): ${STEP_RESPONSE:0:200}..."
# Check if response contains expected fields
if echo "${STEP_RESPONSE}" | grep -q "observation" && \
echo "${STEP_RESPONSE}" | grep -q "reward" && \
echo "${STEP_RESPONSE}" | grep -q "done"; then
echo -e "${GREEN}โœ“${NC} Step endpoint working"
# Extract reward and done
REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2)
DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
echo " Reward: ${REWARD}"
echo " Done: ${DONE}"
else
echo -e "${RED}โœ—${NC} Step response missing required fields"
echo "Full response: ${STEP_RESPONSE}"
exit 1
fi
# Step 7: Test state endpoint
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 7: Testing State Endpoint${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
STATE_RESPONSE=$(curl -s http://localhost:${PORT}/state)
if [ -z "${STATE_RESPONSE}" ]; then
echo -e "${RED}โœ—${NC} State endpoint returned empty response"
docker logs ${CONTAINER_NAME} | tail -20
exit 1
fi
echo "Response: ${STATE_RESPONSE}"
# Check if response contains expected fields
if echo "${STATE_RESPONSE}" | grep -q "episode_id" && \
echo "${STATE_RESPONSE}" | grep -q "step_count" && \
echo "${STATE_RESPONSE}" | grep -q "game_name"; then
echo -e "${GREEN}โœ“${NC} State endpoint working"
# Extract info
GAME_NAME=$(echo "${STATE_RESPONSE}" | grep -o '"game_name":"[^"]*"' | cut -d'"' -f4)
STEP_COUNT=$(echo "${STATE_RESPONSE}" | grep -o '"step_count":[^,}]*' | cut -d: -f2)
echo " Game: ${GAME_NAME}"
echo " Steps: ${STEP_COUNT}"
else
echo -e "${RED}โœ—${NC} State response missing required fields"
echo "Full response: ${STATE_RESPONSE}"
exit 1
fi
# Step 8: Test multiple steps
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 8: Testing Multiple Steps${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo "Taking 10 steps..."
TOTAL_REWARD=0
for i in {1..10}; do
ACTION_ID=$((RANDOM % 3)) # Random action 0-2
STEP_PAYLOAD="{\"action\": {\"action_id\": ${ACTION_ID}, \"game_name\": \"pong\"}}"
STEP_RESPONSE=$(curl -s -X POST http://localhost:${PORT}/step -H "Content-Type: application/json" -d "${STEP_PAYLOAD}")
if ! echo "${STEP_RESPONSE}" | grep -q "observation"; then
echo -e "${RED}โœ—${NC} Step ${i} failed"
exit 1
fi
REWARD=$(echo "${STEP_RESPONSE}" | grep -o '"reward":[^,}]*' | cut -d: -f2 | sed 's/null/0/')
DONE=$(echo "${STEP_RESPONSE}" | grep -o '"done":[^,}]*' | cut -d: -f2)
echo " Step ${i}: action=${ACTION_ID}, reward=${REWARD}, done=${DONE}"
if [ "${DONE}" = "true" ]; then
echo " Episode completed early at step ${i}"
break
fi
done
echo -e "${GREEN}โœ“${NC} Multiple steps completed successfully"
# Step 9: Check container logs for errors
echo ""
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
echo -e "${BLUE}STEP 9: Checking Container Logs${NC}"
echo -e "${BLUE}โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”${NC}"
LOGS=$(docker logs ${CONTAINER_NAME} 2>&1)
if echo "${LOGS}" | grep -i "error" | grep -v "LoggerMode.Error"; then
echo -e "${YELLOW}โš ${NC} Found errors in logs:"
echo "${LOGS}" | grep -i "error" | head -5
else
echo -e "${GREEN}โœ“${NC} No errors in container logs"
fi
if echo "${LOGS}" | grep -i "exception"; then
echo -e "${RED}โœ—${NC} Found exceptions in logs:"
echo "${LOGS}" | grep -i "exception" | head -5
exit 1
else
echo -e "${GREEN}โœ“${NC} No exceptions in container logs"
fi
# Final Summary
echo ""
echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"
echo -e "${GREEN}โœ… ALL DOCKER TESTS PASSED${NC}"
echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"
echo ""
echo "Summary:"
echo " โœ“ Docker image built successfully"
echo " โœ“ Container started and ran"
echo " โœ“ Health endpoint working"
echo " โœ“ Reset endpoint working"
echo " โœ“ Step endpoint working"
echo " โœ“ State endpoint working"
echo " โœ“ Multiple steps working"
echo " โœ“ No errors or exceptions"
echo ""
echo "Image: ${IMAGE_NAME}:${IMAGE_TAG}"
echo "Container: ${CONTAINER_NAME}"
echo "Port: ${PORT}"
echo ""
echo "To keep container running: docker start ${CONTAINER_NAME}"
echo "To view logs: docker logs ${CONTAINER_NAME}"
echo ""