import gradio as gr
from fastapi import FastAPI
from env import EmailEnv
from agent import llm_agent
from rl import update_q

app = FastAPI()

env = EmailEnv()
obs = None
total_score = 0

@app.post("/reset")
def reset_env():
    global obs, total_score
    obs = env.reset()
    total_score = 0
    return {"status": "success"}

@app.get("/")
def home():
    return {"message": "running"}

def step_env(user_action):
    global obs, total_score
    if obs is None:
        obs = env.reset()
        total_score = 0
        return obs, 0, total_score
    obs_new, reward, done, info = env.step(user_action)
    total_score += reward
    if done:
        obs = None
        return "Finished Click again to restart", reward, total_score
    return obs_new, reward, total_score

def auto_ai():
    global obs, total_score
    if obs is None:
        obs = env.reset()
        total_score = 0
        return obs, 0, total_score
    action = llm_agent(obs)
    obs_new, reward, done, info = env.step(action)
    total_score += reward
    if done:
        obs = None
        return "AI Finished Click again to restart", reward, total_score
    return f"AI chose: {action} | {obs_new}", reward, total_score

iface = gr.Interface(
    fn=step_env,
    inputs=gr.Radio(["important", "spam", "normal"]),
    outputs=["text", "number", "number"],
    title="Smart Email AI Trainer"
)

app = gr.mount_gradio_app(app, iface, path="/")