Spaces:

beatccjiang
/

OdysseyArena

Runtime error

File size: 20,866 Bytes

# ==================== Light 任务模块 ====================
"""
Light 任务相关的所有函数和界面组件
支持多用户并发：使用 gr.State 管理每个用户会话的状态
使用统一进度管理模块存储数据
"""
import json
import os
from typing import List, Tuple, Optional, Dict, Any
import gradio as gr

# 导入统一进度管理模块
import progress_manager

# 导入 Light 环境
import sys
current_dir = os.path.dirname(os.path.abspath(__file__))
lightenv_path = os.path.join(current_dir, "LightEnv")
if os.path.exists(lightenv_path):
    sys.path.insert(0, lightenv_path)
from TextEnv_v2 import LightBulbEnv

# ------------------- 常量 -------------------
LIGHT_MAX_STEPS = 200

# ------------------- Example Text -------------------
LIGHT_EXAMPLE_TEXT = """
## 📖 Light Bulb Environment Usage Example

### Example Scenario
Assume there are 3 light bulbs (indices 0, 1, 2), all initially turned off (○).

### Example Logic (Only shown in examples. In actual tasks, these rules are hidden and need to be inferred by users)
- B0: True  # B0 can be turned on under any circumstances
- B1: B0  # B1 can only be turned on when B0 is on
- B2: not B1 and B0  # B2 can only be turned on when B1 is off and B0 is on

### Example Steps
1. **Step 1**: Input action `1`, click "Execute Action"
   - Environment state after execution: ○ ○ ○
   - Environment feedback: B1 remains inactive... remaining bulbs should be in specific mode.
   - Reason: B1 can only be turned on when B0 is on, but B0 is off, so B1 cannot be turned on.

2. **Step 2**: Input action `0`, click "Execute Action"
   - Environment state after execution: 💡 ○ ○
   - Environment feedback: Toggled B1 to True
   - Reason: B0 can be turned on at any time.

3. **Step 3**: Input action `2`, click "Execute Action"
   - Environment state after execution: 💡 ○ 💡
   - Environment feedback: Toggled B2 to True
   - Reason: B2 can only be turned on when B1 is off and B0 is on, so B2 is turned on.

4. **Step 4**: Input action `1`, click "Execute Action"
   - Environment state after execution: 💡 💡 💡 (Task completed)
   - Environment feedback: Toggled B1 to True
   - Reason: B1 can only be turned on when B0 is on, so B1 is turned on.

### Tips
- 💡 indicates the bulb is lit
- ○ indicates the bulb is not lit
- The availability of each bulb may depend on the state of other bulbs
- You need to discover hidden rules through experimentation
- Maximum 200 steps allowed

### Goal
Turn on all bulbs (all bulbs display as 💡)
"""


# ------------------- 状态管理 -------------------

def create_light_state() -> Dict[str, Any]:
    """创建初始的 Light 任务状态（每个用户会话独立）"""
    return {
        'env': None,                    # LightBulbEnv 实例
        'test_data': [],                # 测试数据
        'current_env_idx': 0,           # 当前环境索引
        'history_records': [],          # 操作历史记录
    }


# ------------------- 工具函数 -------------------

def format_bulb_state(obs: List[bool]) -> str:
    """格式化灯泡状态显示（不显示数字）"""
    state_parts = []
    
    for b in obs:
        bulb = "💡" if b else "○"
        state_parts.append(bulb)
    
    return " ".join(state_parts)


def load_light_test_data(state: Dict[str, Any], current_dir: str) -> Tuple[Dict[str, Any], str]:
    """加载 Light 测试数据"""
    test_file = os.path.join(
        current_dir, "test_data/turnonlights/test_turnonlights_lite_251030.json")
    if not os.path.exists(test_file):
        test_file = "test_data/turnonlights/test_turnonlights_lite_251030.json"
    
    try:
        with open(test_file, 'r', encoding='utf-8') as f:
            state['test_data'] = json.load(f)
        return state, f"✅ Successfully loaded {len(state['test_data'])} test environments"
    except FileNotFoundError:
        return state, f"❌ File not found: {test_file}"
    except Exception as e:
        return state, f"❌ Load failed: {str(e)}"


def light_save_progress_internal(state: Dict[str, Any], current_user_id: str, save_dir: str) -> str:
    """保存 Light 环境进度（使用统一进度管理模块）"""
    if not current_user_id:
        return "⚠️ Please enter user ID first"
    
    env = state.get('env')
    if env is None:
        return "⚠️ No progress to save"
    
    try:
        obs = env._get_obs()
        current_env_idx = state.get('current_env_idx', 0)
        history_records = state.get('history_records', [])
        test_data = state.get('test_data', [])
        
        env_progress = {
            "user_id": current_user_id,
            "env_idx": current_env_idx,
            "env_idx_display": current_env_idx + 1,
            "bulb_states": obs,
            "history": history_records,
            "num_steps": env.steps,
            "level": env.num_bulbs,
            "custom_logic": test_data[current_env_idx].get("custom_logic", {}) if current_env_idx < len(test_data) else {}
        }
        
        result = progress_manager.save_task_environment_progress(
            current_user_id, save_dir, "light", current_env_idx, env_progress
        )
        
        return f"✅ Progress saved (Environment {current_env_idx + 1}, Steps {len(history_records)})"
    except Exception as e:
        return f"❌ Save failed: {str(e)}"


def light_load_environment(state: Dict[str, Any], env_idx_display: int, current_user_id: str, save_dir: str) -> Tuple[Dict[str, Any], str, str, str, str, str, str]:
    """加载 Light 环境（使用统一进度管理模块）
    Returns: (state, info, state_display, logic, history_display, progress, steps_info)
    """
    # Auto-generate user ID if not provided
    if not current_user_id:
        import uuid
        current_user_id = f"user_{uuid.uuid4().hex[:8]}"
    
    test_data = state.get('test_data', [])
    if not test_data:
        return state, "❌ Please load test data first", "", "", "", "Click 'View Uncompleted Problems' button to view progress", "0 / 200"
    
    env_idx = env_idx_display - 1
    if env_idx < 0 or env_idx >= len(test_data):
        return state, f"❌ Environment index out of range (1-{len(test_data)})", "", "", "", "Click 'View Unfinished Problems' button to view progress", "0 / 200"
    
    # 使用统一进度管理模块检查是否有保存的进度
    saved_progress_data = progress_manager.get_task_environment_progress(
        current_user_id, save_dir, "light", env_idx
    )
    
    # 如果有保存的进度，加载它
    if saved_progress_data:
        state['current_env_idx'] = env_idx
        bulb_states = saved_progress_data.get("bulb_states", [])
        state['history_records'] = saved_progress_data.get("history", [])
        level = saved_progress_data.get("level", 0)
        num_steps = saved_progress_data.get("num_steps", len(state['history_records']))
        
        custom_logic = saved_progress_data.get("custom_logic", {})
        if not custom_logic and env_idx < len(test_data):
            custom_logic = test_data[env_idx].get("custom_logic", {})
        
        if env_idx < len(test_data) and level > 0:
            state['env'] = LightBulbEnv(custom_logic=custom_logic, num_bulbs=level)
            state['env'].steps = num_steps
            for i, bulb_state in enumerate(bulb_states):
                if i < state['env'].num_bulbs:
                    bulb_name = f"B{i}"
                    if bulb_name in state['env'].bulbs:
                        state['env'].bulbs[bulb_name] = bulb_state
        
        obs = state['env']._get_obs()
        state_display = format_bulb_state(obs)
        history_display = "\n\n".join(state['history_records']) if state['history_records'] else "No history records"
        
        info = f"✅ Environment {env_idx_display}/{len(test_data)} loaded\n"
        info += f"Number of bulbs: {level}\n"
        info += f"Steps: {len(state['history_records'])}"
        
        current_steps = state['env'].steps
        steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
        
        return state, info, state_display, "", history_display, "Click 'View Unfinished Problems' button to view progress", steps_info
    
    # 没有保存的进度，初始化新环境
    state['current_env_idx'] = env_idx
    d = test_data[env_idx]
    state['env'] = LightBulbEnv(custom_logic=d["custom_logic"], num_bulbs=d["level"])
    state['history_records'] = []
    light_save_progress_internal(state, current_user_id, save_dir)
    
    obs = state['env']._get_obs()
    state_display = format_bulb_state(obs)
    history_display = "Environment initialized (new environment)\n"
    
    info = f"✅ Environment {env_idx_display}/{len(test_data)} initialized (new environment)\n"
    info += f"Number of bulbs: {d['level']}\n"
    info += f"Initial state: {state_display.split(chr(10))[0]}"
    
    current_steps = state['env'].steps
    steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
    
    return state, info, state_display, "", history_display, "Click 'View Unfinished Problems' button to view progress", steps_info


def light_step_environment(state: Dict[str, Any], action_str: str, current_user_id: str, save_dir: str) -> Tuple[Dict[str, Any], str, str, str, bool, str]:
    """执行 Light 环境一步动作
    Returns: (state, feedback, state_display, history_display, done, steps_info)
    """
    env = state.get('env')
    history_records = state.get('history_records', [])
    
    current_state_display = ""
    if env is not None:
        obs = env._get_obs()
        current_state_display = format_bulb_state(obs)
    
    if env is None:
        return state, "❌ Please initialize environment first", current_state_display if current_state_display else "Please initialize environment first", "", False, "0 / 200"
    
    # Auto-generate user ID if not provided
    if not current_user_id:
        import uuid
        current_user_id = f"user_{uuid.uuid4().hex[:8]}"
    
    # 解析动作
    action = None
    action_error = None
    try:
        action = int(action_str.strip())
        if action < 0 or action >= env.num_bulbs:
            action_error = f"Action out of range (0-{env.num_bulbs-1})"
    except ValueError:
        action_error = f"Invalid action format: {action_str}"
    
    # 检查是否已经达到步骤上限
    if env.steps >= LIGHT_MAX_STEPS:
        history_display = "\n\n".join(history_records) if history_records else ""
        light_save_progress_internal(state, current_user_id, save_dir)
        feedback_info = f"⚠️ Reached step limit ({LIGHT_MAX_STEPS} steps)\n"
        feedback_info += "Task ended (failed to complete within the specified number of steps)\n"
        feedback_info += "Cannot continue executing actions\n"
        
        current_steps = env.steps
        steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
        return state, feedback_info, current_state_display, history_display, True, steps_info
    
    # 如果动作无效
    if action_error:
        # 获取当前状态（无效动作后状态不变）
        obs_current = env._get_obs()
        state_current_str = format_bulb_state(obs_current)
        
        step_num = len(history_records) + 1
        history_record = f"Step {step_num}:\n"
        history_record += f"State: {state_current_str}\n"
        history_record += f"Action: {action_str} (invalid)\n"
        history_record += f"Feedback: ❌ {action_error}"
        history_records.append(history_record)
        state['history_records'] = history_records
        history_display = "\n\n".join(history_records)
        env.steps += 1
        
        if env.steps >= LIGHT_MAX_STEPS:
            history_records.append(
                f"Step {len(history_records) + 1}: Reached step limit ({LIGHT_MAX_STEPS} steps), task ended")
            state['history_records'] = history_records
            history_display = "\n\n".join(history_records)
            light_save_progress_internal(state, current_user_id, save_dir)
            
            feedback_info = f"Action: {action_str}\nFeedback: ❌ {action_error}\n"
            feedback_info += f"⚠️ Reached step limit ({LIGHT_MAX_STEPS} steps)\n"
            feedback_info += "Task ended (failed to complete within the specified number of steps)\n"
            
            current_steps = env.steps
            steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
            return state, feedback_info, current_state_display, history_display, True, steps_info
        
        light_save_progress_internal(state, current_user_id, save_dir)
        feedback_info = f"Action: {action_str}\nFeedback: ❌ {action_error}\n"
        
        current_steps = env.steps
        steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
        return state, feedback_info, current_state_display, history_display, False, steps_info
    
    # 执行有效动作
    # 在执行动作前获取当前状态
    obs_before = env._get_obs()
    state_before_str = format_bulb_state(obs_before)
    
    # 执行动作
    obs, feedback, done, _ = env.step(action)
    state_display = format_bulb_state(obs)
    
    # 记录历史，包含执行后的状态（显示亮的灯泡）
    step_num = len(history_records) + 1
    state_after_str = format_bulb_state(obs)
    history_record = f"Step {step_num}:\n"
    history_record += f"State after execution:\n{state_after_str}\n"
    history_record += f"Action: {action}\n"
    history_record += f"Feedback: {feedback}"
    history_records.append(history_record)
    state['history_records'] = history_records
    history_display = "\n\n".join(history_records)
    
    if env.steps >= LIGHT_MAX_STEPS:
        done = True
        if not all(obs):
            feedback = f"{feedback}\n⚠️ Reached step limit ({LIGHT_MAX_STEPS} steps), task ended (failed to complete within the specified number of steps)"
    
    light_save_progress_internal(state, current_user_id, save_dir)
    
    feedback_info = f"Action: {action}\nFeedback: {feedback}\n"
    if done:
        if all(obs):
            feedback_info += "🎉 Task completed! All bulbs are lit!\n"
        else:
            feedback_info += f"⚠️ Task ended (reached step limit {LIGHT_MAX_STEPS} steps)\n"
    
    current_steps = env.steps
    steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
    
    return state, feedback_info, state_display, history_display, done, steps_info


def light_reset_environment(state: Dict[str, Any], current_user_id: str, save_dir: str) -> Tuple[Dict[str, Any], str, str, str, str, str]:
    """重置 Light 环境
    Returns: (state, info, state_display, history_display, progress, steps_info)
    """
    env = state.get('env')
    
    if env is None:
        return state, "❌ Please initialize environment first", "", "", "Click 'View Uncompleted Problems' button to view progress", "0 / 200"
    
    env.reset()
    state['history_records'] = []
    light_save_progress_internal(state, current_user_id, save_dir)
    
    obs = env._get_obs()
    state_display = format_bulb_state(obs)
    history_display = "Environment reset\n"
    
    current_steps = env.steps
    steps_info = f"{current_steps} / {LIGHT_MAX_STEPS}"
    
    return state, "✅ Environment reset", state_display, history_display, "Click 'View Unfinished Problems' button to view progress", steps_info


def get_light_current_env_idx(state: Dict[str, Any]) -> int:
    """获取当前 Light 环境索引"""
    return state.get('current_env_idx', 0)


def get_light_test_data(state: Dict[str, Any]) -> List[dict]:
    """获取 Light 测试数据"""
    return state.get('test_data', [])


def get_light_history_records(state: Dict[str, Any]) -> List[str]:
    """获取 Light 历史记录"""
    return state.get('history_records', [])


def get_light_progress_summary(state: Dict[str, Any], user_id: str, save_dir: str) -> str:
    """获取 Light 任务用户进度摘要（使用统一进度管理模块）
    Args:
        state: 会话状态
        user_id: 用户ID
        save_dir: 保存目录
    Returns: 格式化的进度摘要字符串
    """
    # Auto-generate user ID if not provided
    if not user_id or not user_id.strip():
        import uuid
        user_id = f"user_{uuid.uuid4().hex[:8]}"
    
    user_id = user_id.strip()
    test_data = state.get('test_data', [])
    
    # 使用统一进度管理模块加载进度
    task_data = progress_manager.load_task_progress(user_id, save_dir, "light")
    environments = task_data.get("environments", {})
    
    completed_envs = set()
    for env_key, progress_data in environments.items():
        env_idx = progress_data.get("env_idx", -1)
        bulb_states = progress_data.get("bulb_states", [])
        num_steps = progress_data.get("num_steps", 0)
        
        # 检查是否完成
        is_completed = False
        if bulb_states and all(bulb_states):
            is_completed = True
        elif num_steps >= LIGHT_MAX_STEPS:
            is_completed = True
        
        if is_completed:
            completed_envs.add(env_idx)
    
    # 获取总环境数
    total_envs = len(test_data) if test_data else 0
    if total_envs == 0:
        return "⚠️ Please load test data first"
    
    # 找出未完成的环境
    all_env_indices = set(range(total_envs))
    incomplete_envs = sorted(all_env_indices - completed_envs)
    
    # 构建摘要信息
    summary_lines = []
    summary_lines.append(f"📊 Light Task - Progress Summary for User {user_id}")
    summary_lines.append(f"Total environments: {total_envs}")
    summary_lines.append(f"Completed: {len(completed_envs)}/{total_envs}")
    summary_lines.append(f"Incomplete: {len(incomplete_envs)}/{total_envs}")
    
    if incomplete_envs:
        summary_lines.append("\n❌ Incomplete environments:")
        # 每行显示5个环境索引
        for i in range(0, len(incomplete_envs), 5):
            env_display_list = [str(env_idx + 1) for env_idx in incomplete_envs[i:i+5]]
            summary_lines.append("  " + ", ".join(env_display_list))
    else:
        summary_lines.append("\n🎉 Congratulations! All environments are completed!")
    
    return "\n".join(summary_lines)


def create_light_interface(current_dir: str, save_dir: str, user_id_input: gr.Textbox) -> Tuple[gr.Row, gr.Number, gr.Button, gr.Button, gr.Textbox, gr.Textbox, gr.Textbox, gr.Textbox, gr.Textbox, gr.Textbox]:
    """创建 Light 任务界面组件
    Returns: (light_interface, light_env_idx_input, light_init_btn, light_reset_btn, 
              light_env_info, light_state_display, light_steps_info_text, 
              light_action_input, light_step_btn, light_feedback_display, light_history_display)
    
    注意：环境控制组件（light_env_idx_input, light_init_btn, light_reset_btn, light_env_info）
    需要在主界面中手动添加到进度摘要下方，不包含在 light_interface 中。
    为了保持函数签名一致，这里返回 None 作为占位符，主界面会忽略这些返回值。
    """
    # 创建主界面 Row（不包含环境控制）
    with gr.Row(visible=True) as light_interface:
        with gr.Column(scale=1):
            light_steps_info_text = gr.Textbox(
                label="Steps Info",
                value="0 / 200",
                interactive=False,
                visible=True,
                lines=2
            )
            gr.Markdown("### 📜 Action History")
            light_history_display = gr.Textbox(
                label="Action History",
                interactive=False,
                lines=10
            )
        
        with gr.Column(scale=2):
            gr.Markdown("### 💡 Current State")
            light_state_display = gr.Textbox(
                label="Light Bulb State",
                interactive=False,
                lines=3,
                value="Please load environment first"
            )
            
            gr.Markdown("### 🎯 Action Input")
            light_action_input = gr.Textbox(
                label="Input Action (Bulb Index)",
                placeholder="e.g., 0",
                info="Input the bulb index to toggle (starting from 0)"
            )
            light_step_btn = gr.Button("Execute Action", variant="primary")
            
            gr.Markdown("### 💬 Environment Feedback")
            light_feedback_display = gr.Textbox(
                label="Feedback Info",
                interactive=False,
                lines=5
            )
    
    # 返回占位符（主界面会使用自己创建的环境控制组件）
    return (light_interface, None, None, None,
            None, light_state_display, light_steps_info_text,
            light_action_input, light_step_btn, light_feedback_display, light_history_display)