| import requests
|
| import sys
|
| import os
|
| import json
|
| from dotenv import load_dotenv
|
| from openai import OpenAI
|
|
|
| load_dotenv()
|
|
|
| BASE_URL = "http://localhost:7860"
|
|
|
| def run_inference(task_name: str):
|
| env_name = "CrisisSim"
|
|
|
| API_BASE_URL = os.getenv("API_BASE_URL") or "https://api.openai.com/v1"
|
| MODEL_NAME = os.getenv("MODEL_NAME") or "gpt-4o-mini"
|
| API_KEY = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
|
|
|
| if not API_KEY:
|
| print(f"[START] task={task_name} env={env_name} model={MODEL_NAME}", flush=True)
|
| print("[STEP] step=1 action=null reward=0.00 done=true error=API_KEY or HF_TOKEN environment variable is not set", flush=True)
|
| print("[END] success=false steps=0 rewards=", flush=True)
|
| sys.exit(1)
|
|
|
| print(f"[START] task={task_name} env={env_name} model={MODEL_NAME}", flush=True)
|
|
|
| try:
|
| client = OpenAI(
|
| base_url=API_BASE_URL,
|
| api_key=API_KEY
|
| )
|
| except Exception as e:
|
| print(f"[STEP] step=1 action=null reward=0.00 done=true error=api_error", flush=True)
|
| print("[END] success=false steps=0 rewards=", flush=True)
|
| sys.exit(1)
|
|
|
| steps = 0
|
| rewards = []
|
| last_actions = []
|
|
|
| VALID_ACTIONS = [
|
| "cut_expenses", "stock_essentials", "invest_gold", "hold_cash",
|
| "convert_currency", "take_loan", "pay_debt", "reduce_luxury", "build_emergency_fund"
|
| ]
|
|
|
| SYSTEM_PROMPT = """You are an expert financial crisis manager operating in a simulated economy.
|
|
|
| Your goals:
|
| - Survive as long as possible (avoid bankruptcy)
|
| - Maintain and grow savings
|
| - Reduce debt strategically
|
| - Adapt to inflation and economic shocks
|
|
|
| Rules:
|
| - Do NOT repeat the same action more than 2 times in a row
|
| - Avoid passive strategies like always holding cash
|
| - Balance short-term survival with long-term stability
|
| - Consider consequences of each decision
|
|
|
| Always return ONLY one valid action."""
|
|
|
| try:
|
|
|
| res = requests.post(f"{BASE_URL}/reset", json={"task_name": task_name})
|
| res.raise_for_status()
|
| state = res.json()
|
|
|
| while True:
|
| steps += 1
|
| error_val = "null"
|
|
|
| user_prompt = f"""Current State:
|
| {state}
|
|
|
| Previous Actions (last 3):
|
| {last_actions[-3:]}
|
|
|
| Choose ONE action from:
|
| [cut_expenses, stock_essentials, invest_gold, hold_cash, convert_currency, take_loan, pay_debt, reduce_luxury, build_emergency_fund]
|
|
|
| Avoid repeating same action too often.
|
|
|
| Return ONLY the action name."""
|
|
|
|
|
| try:
|
| completion = client.chat.completions.create(
|
| model=MODEL_NAME,
|
| messages=[
|
| {"role": "system", "content": SYSTEM_PROMPT.strip()},
|
| {"role": "user", "content": user_prompt.strip()}
|
| ],
|
| temperature=0.5,
|
| max_tokens=50
|
| )
|
| action = completion.choices[0].message.content.strip()
|
|
|
|
|
| if action not in VALID_ACTIONS:
|
| action = "hold_cash"
|
|
|
|
|
| if len(last_actions) >= 2 and last_actions[-1] == last_actions[-2] == action:
|
| alternatives = [a for a in VALID_ACTIONS if a != action]
|
| action = alternatives[0]
|
|
|
| except Exception as e:
|
|
|
| action = "hold_cash"
|
| error_val = "api_error"
|
|
|
| last_actions.append(action)
|
|
|
|
|
| step_res = requests.post(f"{BASE_URL}/step", json={"action": action})
|
| step_res.raise_for_status()
|
|
|
| data = step_res.json()
|
| state = data["observation"]
|
| reward = data["reward"]
|
| done = data["done"]
|
| rewards.append(reward)
|
|
|
| print(f"[STEP] step={steps} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", flush=True)
|
|
|
| if done:
|
| success = not state.get("bankrupt", True)
|
| break
|
|
|
| rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| print(f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}", flush=True)
|
|
|
| except Exception as e:
|
| error_val = "api_error"
|
| print(f"[STEP] step={steps} action=null reward=0.00 done=true error={error_val}", flush=True)
|
| rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| print(f"[END] success=false steps={steps} rewards={rewards_str}", flush=True)
|
|
|
| if __name__ == "__main__":
|
| task = sys.argv[1] if len(sys.argv) > 1 else "easy"
|
| run_inference(task)
|
|
|