Spaces:
Running
Running
| """ | |
| inference.py — Baseline inference script for ExecAssist | |
| Runs a baseline AI model against all 3 tasks using structured stdout logging. | |
| Uses OpenRouter API with unlimited free credits. | |
| """ | |
| import os | |
| import json | |
| import statistics | |
| from typing import List, Optional | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # ============================================================ | |
| # CONFIGURATION | |
| # ============================================================ | |
| API_BASE_URL = os.getenv("APIBASEURL") or os.getenv("API_BASE_URL") or "https://openrouter.ai/api/v1" | |
| API_KEY = os.getenv("HFTOKEN") or os.getenv("HF_TOKEN") or os.getenv("API_KEY") | |
| MODEL_NAME = os.getenv("MODELNAME") or os.getenv("MODEL_NAME") or "nvidia/nemotron-3-super-120b-a12b:free" | |
| BENCHMARK = "exec-assist" | |
| TEMPERATURE = 0.3 | |
| MAX_TOKENS = 500 | |
| # ============================================================ | |
| # STRUCTURED STDOUT LOGGING | |
| # ============================================================ | |
| def log_start(task: str, env: str, model: str) -> None: | |
| print(f"[START] task={task} env={env} model={model}", flush=True) | |
| def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None: | |
| error_val = error if error else "null" | |
| done_val = str(done).lower() | |
| print( | |
| f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", | |
| flush=True, | |
| ) | |
| def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None: | |
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) | |
| print( | |
| f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", | |
| flush=True, | |
| ) | |
| # ============================================================ | |
| # PROMPT BUILDING | |
| # ============================================================ | |
| def build_assistant_prompt(observation: dict) -> str: | |
| """Build prompt for the AI model to act as executive assistant.""" | |
| emails = observation.get("emails", []) | |
| calendar = observation.get("calendar", {}) | |
| # Build email section | |
| email_str = "" | |
| for email in emails: | |
| email_str += f"\n--- Email from {email['sender']} ---\n" | |
| email_str += f"Subject: {email['subject']}\n" | |
| email_str += f"Priority: {email['priority']}\n" | |
| email_str += f"Body:\n{email['body']}\n" | |
| # Build calendar section | |
| meetings = calendar.get("existing_meetings", []) | |
| calendar_str = "\nExisting Meetings:\n" | |
| if meetings: | |
| for mtg in meetings: | |
| calendar_str += f" - {mtg['subject']}: {mtg['start_time']} to {mtg['end_time']} (Priority: {mtg['priority']})\n" | |
| else: | |
| calendar_str += " (No existing meetings)\n" | |
| working_hours = calendar.get("working_hours", {}) | |
| hours_str = "\nWorking Hours:\n" | |
| for day, hours in working_hours.items(): | |
| hours_str += f" {day.capitalize()}: {hours}\n" | |
| task_desc = observation.get("description", "") | |
| action_required = observation.get("action_required", "") | |
| prompt = f"""You are an executive assistant for {calendar.get('executive_name', 'Alex Chen')}. | |
| TASK: {task_desc} | |
| {email_str} | |
| {calendar_str} | |
| {hours_str} | |
| ACTION REQUIRED: {action_required} | |
| Respond with ONLY a JSON object in this exact format: | |
| {{ | |
| "email_reply": "Your professional email response here", | |
| "calendar_action": "book or propose_alternatives or reschedule or decline", | |
| "meeting_details": {{ | |
| "participants": ["email1@company.com", "email2@company.com"], | |
| "start_time": "2026-04-28T14:00:00", | |
| "end_time": "2026-04-28T15:00:00", | |
| "subject": "Meeting subject", | |
| "location": "Conference Room A", | |
| "proposed_alternatives": [ | |
| {{"start_time": "2026-04-29T10:00:00", "end_time": "2026-04-29T11:00:00", "note": "Alternative option"}} | |
| ] | |
| }} | |
| }} | |
| Important: | |
| - Be professional and polite in email | |
| - Check for calendar conflicts | |
| - If conflict exists, propose 2-3 alternative times | |
| - Include all email participants in meeting_details.participants | |
| - Use ISO format for all times (YYYY-MM-DDTHH:MM:SS) | |
| Respond with ONLY the JSON object, no explanation.""" | |
| return prompt | |
| # ============================================================ | |
| # MODEL INTERACTION | |
| # ============================================================ | |
| def call_model(client: OpenAI, prompt: str) -> str: | |
| """Call OpenRouter API.""" | |
| try: | |
| completion = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=TEMPERATURE, | |
| max_tokens=MAX_TOKENS, | |
| ) | |
| response_text = completion.choices[0].message.content or "" | |
| return response_text.strip() | |
| except Exception as exc: | |
| print(f"API error: {exc}") | |
| return "" | |
| # ============================================================ | |
| # RESPONSE PARSING | |
| # ============================================================ | |
| def parse_assistant_response(response: str) -> Optional[dict]: | |
| """Parse AI response into action dict.""" | |
| if not response: | |
| return None | |
| try: | |
| # Extract JSON from response | |
| start = response.find("{") | |
| end = response.rfind("}") + 1 | |
| if start != -1 and end > start: | |
| json_str = response[start:end] | |
| parsed = json.loads(json_str) | |
| # Validate required fields | |
| if "email_reply" in parsed and "calendar_action" in parsed: | |
| return parsed | |
| except (json.JSONDecodeError, KeyError) as e: | |
| print(f"Parse error: {e}") | |
| return None | |
| # ============================================================ | |
| # ENVIRONMENT INTERACTION | |
| # ============================================================ | |
| def run_episode(client: OpenAI, task: str, env_url: str = "http://localhost:8000") -> dict: | |
| """Run one episode against the environment.""" | |
| import requests | |
| # Reset environment | |
| reset_response = requests.post(f"{env_url}/reset", params={"task": task}) | |
| reset_data = reset_response.json() | |
| observation = reset_data["observation"] | |
| # Build prompt and get AI response | |
| prompt = build_assistant_prompt(observation) | |
| ai_response = call_model(client, prompt) | |
| # Parse response | |
| action = parse_assistant_response(ai_response) | |
| if not action: | |
| # Fallback action if parsing failed | |
| action = { | |
| "email_reply": "Thank you for your message. I'll check the calendar and get back to you shortly.", | |
| "calendar_action": "propose_alternatives", | |
| "meeting_details": None, | |
| } | |
| # Submit action to environment | |
| step_response = requests.post(f"{env_url}/step", json=action) | |
| step_data = step_response.json() | |
| return { | |
| "reward": step_data["reward"], | |
| "done": step_data["done"], | |
| "info": step_data.get("info", {}), | |
| } | |
| # ============================================================ | |
| # MAIN — Run baseline inference | |
| # ============================================================ | |
| def main() -> None: | |
| """Run baseline inference on all 3 tasks.""" | |
| if not API_KEY: | |
| print("[END] success=false steps=0 score=0.000 rewards=", flush=True) | |
| return | |
| client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) | |
| # Environment URL (local or HF Space) | |
| env_url = os.getenv("ENV_URL", "http://localhost:8000") | |
| for task in ["easy", "medium", "hard"]: | |
| rewards = [] | |
| step_count = 0 | |
| log_start(task=task, env=BENCHMARK, model=MODEL_NAME) | |
| try: | |
| # Run episode | |
| result = run_episode(client, task, env_url) | |
| reward = result["reward"] | |
| done = result["done"] | |
| rewards.append(reward) | |
| step_count += 1 | |
| log_step( | |
| step=step_count, | |
| action=f"assistant({task})", | |
| reward=reward, | |
| done=done, | |
| error=None, | |
| ) | |
| final_score = round(reward, 4) | |
| success = final_score > 0.5 | |
| except Exception as exc: | |
| print(f"Error in {task}: {exc}") | |
| final_score = 0.0 | |
| success = False | |
| log_end( | |
| success=success, | |
| steps=step_count, | |
| score=final_score, | |
| rewards=rewards, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |