Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files- Dockerfile +3 -0
- README.md +9 -10
- __init__.py +0 -0
- agent.py +13 -0
- app.py +58 -0
- client.py +114 -0
- env.py +27 -0
- gitattributes +35 -0
- inference.py +48 -0
- models.py +0 -0
- openenv.yaml +3 -0
- pyproject.toml +3 -0
- requirements.txt +4 -0
- rl.py +12 -0
Dockerfile
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Minimal Dockerfile for OpenEnv
|
| 2 |
+
|
| 3 |
+
ENV ENABLE_WEB_INTERFACE=true
|
README.md
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
-
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
---
|
| 2 |
+
sdk: gradio
|
| 3 |
+
app_file: client.py
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
base_path: /web
|
| 7 |
+
---
|
| 8 |
+
# Email Env Project
|
| 9 |
+
Minimal OpenEnv/HF testing environment
|
|
|
__init__.py
ADDED
|
File without changes
|
agent.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import pipeline
|
| 2 |
+
|
| 3 |
+
classifier = pipeline("text-classification")
|
| 4 |
+
|
| 5 |
+
def llm_agent(observation):
|
| 6 |
+
result = classifier(observation)[0]['label'].lower()
|
| 7 |
+
|
| 8 |
+
if "spam" in result:
|
| 9 |
+
return "spam"
|
| 10 |
+
elif "important" in result:
|
| 11 |
+
return "important"
|
| 12 |
+
else:
|
| 13 |
+
return "normal"
|
app.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from fastapi import FastAPI
|
| 3 |
+
from env import EmailEnv
|
| 4 |
+
from agent import llm_agent
|
| 5 |
+
from rl import update_q
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
|
| 9 |
+
env = EmailEnv()
|
| 10 |
+
obs = None
|
| 11 |
+
total_score = 0
|
| 12 |
+
|
| 13 |
+
@app.post("/reset")
|
| 14 |
+
def reset_env():
|
| 15 |
+
global obs, total_score
|
| 16 |
+
obs = env.reset()
|
| 17 |
+
total_score = 0
|
| 18 |
+
return {"status": "success"}
|
| 19 |
+
|
| 20 |
+
@app.get("/")
|
| 21 |
+
def home():
|
| 22 |
+
return {"message": "running"}
|
| 23 |
+
|
| 24 |
+
def step_env(user_action):
|
| 25 |
+
global obs, total_score
|
| 26 |
+
if obs is None:
|
| 27 |
+
obs = env.reset()
|
| 28 |
+
total_score = 0
|
| 29 |
+
return obs, 0, total_score
|
| 30 |
+
obs_new, reward, done, info = env.step(user_action)
|
| 31 |
+
total_score += reward
|
| 32 |
+
if done:
|
| 33 |
+
obs = None
|
| 34 |
+
return "Finished Click again to restart", reward, total_score
|
| 35 |
+
return obs_new, reward, total_score
|
| 36 |
+
|
| 37 |
+
def auto_ai():
|
| 38 |
+
global obs, total_score
|
| 39 |
+
if obs is None:
|
| 40 |
+
obs = env.reset()
|
| 41 |
+
total_score = 0
|
| 42 |
+
return obs, 0, total_score
|
| 43 |
+
action = llm_agent(obs)
|
| 44 |
+
obs_new, reward, done, info = env.step(action)
|
| 45 |
+
total_score += reward
|
| 46 |
+
if done:
|
| 47 |
+
obs = None
|
| 48 |
+
return "AI Finished Click again to restart", reward, total_score
|
| 49 |
+
return f"AI chose: {action} | {obs_new}", reward, total_score
|
| 50 |
+
|
| 51 |
+
iface = gr.Interface(
|
| 52 |
+
fn=step_env,
|
| 53 |
+
inputs=gr.Radio(["important", "spam", "normal"]),
|
| 54 |
+
outputs=["text", "number", "number"],
|
| 55 |
+
title="Smart Email AI Trainer"
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
app = gr.mount_gradio_app(app, iface, path="/")
|
client.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Literal
|
| 3 |
+
import gradio as gr
|
| 4 |
+
|
| 5 |
+
class EmailAction(BaseModel):
|
| 6 |
+
action_type: Literal['send', 'reply', 'archive', 'delete']
|
| 7 |
+
message: str = ""
|
| 8 |
+
email_id: int = None
|
| 9 |
+
|
| 10 |
+
class EmailObservation(BaseModel):
|
| 11 |
+
emails: List[dict]
|
| 12 |
+
ai_feedback: str
|
| 13 |
+
|
| 14 |
+
class EmailEnv:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.emails = [
|
| 17 |
+
{"id": 1, "from": "alice@example.com", "subject": "Meeting Tomorrow", "label": "Work", "status": "Unread"},
|
| 18 |
+
{"id": 2, "from": "bob@example.com", "subject": "Lunch Plans", "label": "Personal", "status": "Archived"},
|
| 19 |
+
{"id": 3, "from": "carol@example.com", "subject": "Project Update", "label": "Work", "status": "Sent"},
|
| 20 |
+
{"id": 4, "from": "dave@spam.com", "subject": "Win a Prize", "label": "Spam", "status": "Deleted"},
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
def step(self, action: EmailAction) -> EmailObservation:
|
| 24 |
+
feedback = ""
|
| 25 |
+
target = next((e for e in self.emails if e["id"] == action.email_id), None) if action.email_id else None
|
| 26 |
+
if action.action_type == "send":
|
| 27 |
+
new_id = max([e["id"] for e in self.emails]+[0]) + 1
|
| 28 |
+
self.emails.append({
|
| 29 |
+
"id": new_id,
|
| 30 |
+
"from": "me@example.com",
|
| 31 |
+
"subject": "Project Update",
|
| 32 |
+
"label": "Work",
|
| 33 |
+
"status": "Sent",
|
| 34 |
+
"body": action.message
|
| 35 |
+
})
|
| 36 |
+
feedback = "Action Executed: Send Email ✅"
|
| 37 |
+
elif action.action_type == "reply" and target:
|
| 38 |
+
new_id = max([e["id"] for e in self.emails]+[0]) + 1
|
| 39 |
+
self.emails.append({
|
| 40 |
+
"id": new_id,
|
| 41 |
+
"from": "me@example.com",
|
| 42 |
+
"subject": f"Re: {target['subject']}",
|
| 43 |
+
"label": target["label"],
|
| 44 |
+
"status": "Sent",
|
| 45 |
+
"body": action.message
|
| 46 |
+
})
|
| 47 |
+
feedback = f"Action Executed: Reply ✅ to email ID {action.email_id}"
|
| 48 |
+
elif action.action_type == "archive" and target:
|
| 49 |
+
target["status"] = "Archived"
|
| 50 |
+
feedback = f"Action Executed: Archive ✅ email ID {action.email_id}"
|
| 51 |
+
elif action.action_type == "delete" and target:
|
| 52 |
+
target["status"] = "Deleted"
|
| 53 |
+
feedback = f"Action Executed: Delete ✅ email ID {action.email_id}"
|
| 54 |
+
else:
|
| 55 |
+
feedback = "Invalid action or email ID."
|
| 56 |
+
return EmailObservation(emails=self.emails, ai_feedback=feedback)
|
| 57 |
+
|
| 58 |
+
env = EmailEnv()
|
| 59 |
+
|
| 60 |
+
status_colors = {
|
| 61 |
+
"Unread": "orange",
|
| 62 |
+
"Sent": "green",
|
| 63 |
+
"Archived": "blue",
|
| 64 |
+
"Deleted": "red"
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def render_table_html(emails):
|
| 68 |
+
table_html = "<table style='border-collapse: collapse; width: 100%;'>"
|
| 69 |
+
table_html += "<tr><th>ID</th><th>From</th><th>Subject</th><th>Label</th><th>Status</th></tr>"
|
| 70 |
+
for e in emails:
|
| 71 |
+
color = status_colors.get(e["status"], "white")
|
| 72 |
+
table_html += f"<tr style='border:1px solid #ddd;'>"
|
| 73 |
+
table_html += f"<td>{e['id']}</td>"
|
| 74 |
+
table_html += f"<td>{e['from']}</td>"
|
| 75 |
+
table_html += f"<td>{e['subject']}</td>"
|
| 76 |
+
table_html += f"<td>{e['label']}</td>"
|
| 77 |
+
table_html += f"<td style='background-color:{color}; color:white;'>{e['status']}</td>"
|
| 78 |
+
table_html += "</tr>"
|
| 79 |
+
table_html += "</table>"
|
| 80 |
+
return table_html
|
| 81 |
+
|
| 82 |
+
def gradio_step(action_type, message, email_id):
|
| 83 |
+
if not action_type:
|
| 84 |
+
return "Invalid action selected.", ""
|
| 85 |
+
action_type_safe = str(action_type).lower()
|
| 86 |
+
email_id_val = None
|
| 87 |
+
if action_type_safe != "send" and email_id:
|
| 88 |
+
email_id_str = str(email_id).strip()
|
| 89 |
+
if email_id_str != "":
|
| 90 |
+
try:
|
| 91 |
+
email_id_val = int(email_id_str)
|
| 92 |
+
except ValueError:
|
| 93 |
+
return "Invalid Email ID. Must be a number.", ""
|
| 94 |
+
obs = env.step(EmailAction(action_type=action_type_safe, message=message, email_id=email_id_val))
|
| 95 |
+
table_html = render_table_html(obs.emails)
|
| 96 |
+
return obs.ai_feedback, table_html
|
| 97 |
+
|
| 98 |
+
iface = gr.Interface(
|
| 99 |
+
fn=gradio_step,
|
| 100 |
+
inputs=[
|
| 101 |
+
gr.Dropdown(['Send', 'Reply', 'Archive', 'Delete'], label="Choose Action"),
|
| 102 |
+
gr.Textbox(label="Message"),
|
| 103 |
+
gr.Textbox(label="Email ID (for reply/archive/delete)")
|
| 104 |
+
],
|
| 105 |
+
outputs=[
|
| 106 |
+
gr.Textbox(label="AI Feedback"),
|
| 107 |
+
gr.HTML(label="Emails Dashboard")
|
| 108 |
+
],
|
| 109 |
+
live=False,
|
| 110 |
+
title="Smart Email Management AI Environment (OpenEnv)",
|
| 111 |
+
description="Simulated email environment with AI-powered actions: send, reply, archive, delete."
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
iface.launch(server_name="0.0.0.0", server_port=7860)
|
env.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class EmailEnv:
|
| 2 |
+
def __init__(self):
|
| 3 |
+
self.data = []
|
| 4 |
+
self.index = 0
|
| 5 |
+
|
| 6 |
+
def reset(self):
|
| 7 |
+
self.data = [
|
| 8 |
+
{"text": "Meeting at 5pm", "label": "important"},
|
| 9 |
+
{"text": "Win a free iPhone", "label": "spam"},
|
| 10 |
+
{"text": "Weekly report attached", "label": "normal"}
|
| 11 |
+
]
|
| 12 |
+
self.index = 0
|
| 13 |
+
return self.data[self.index]["text"]
|
| 14 |
+
|
| 15 |
+
def step(self, action):
|
| 16 |
+
if self.index >= len(self.data):
|
| 17 |
+
return None, 0, True, {}
|
| 18 |
+
|
| 19 |
+
correct = self.data[self.index]["label"]
|
| 20 |
+
reward = 10 if action == correct else -5
|
| 21 |
+
|
| 22 |
+
self.index += 1
|
| 23 |
+
done = self.index >= len(self.data)
|
| 24 |
+
|
| 25 |
+
next_obs = None if done else self.data[self.index]["text"]
|
| 26 |
+
|
| 27 |
+
return next_obs, reward, done, {"correct": correct}
|
gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
inference.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Literal
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
class EmailAction(BaseModel):
|
| 5 |
+
action_type: Literal['send', 'reply', 'archive', 'delete']
|
| 6 |
+
message: str = ""
|
| 7 |
+
email_id: int = None
|
| 8 |
+
|
| 9 |
+
class EmailEnv:
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.reset()
|
| 12 |
+
|
| 13 |
+
def reset(self):
|
| 14 |
+
self.emails = [
|
| 15 |
+
{"id": 1, "from": "alice@example.com", "subject": "Meeting Tomorrow", "label": "Work", "status": "Unread"},
|
| 16 |
+
{"id": 2, "from": "bob@example.com", "subject": "Lunch Plans", "label": "Personal", "status": "Archived"},
|
| 17 |
+
]
|
| 18 |
+
return {"emails": self.emails}
|
| 19 |
+
|
| 20 |
+
def step(self, action: dict):
|
| 21 |
+
if action["action_type"] == "send":
|
| 22 |
+
new_id = len(self.emails) + 1
|
| 23 |
+
self.emails.append({
|
| 24 |
+
"id": new_id,
|
| 25 |
+
"from": "me@example.com",
|
| 26 |
+
"subject": "New Mail",
|
| 27 |
+
"label": "Work",
|
| 28 |
+
"status": "Sent"
|
| 29 |
+
})
|
| 30 |
+
return {"emails": self.emails, "reward": 1, "done": False}
|
| 31 |
+
return {"emails": self.emails, "reward": 0, "done": False}
|
| 32 |
+
|
| 33 |
+
env = EmailEnv()
|
| 34 |
+
obs = None
|
| 35 |
+
|
| 36 |
+
def reset():
|
| 37 |
+
global obs
|
| 38 |
+
obs = env.reset()
|
| 39 |
+
return obs
|
| 40 |
+
|
| 41 |
+
def step(action):
|
| 42 |
+
global obs
|
| 43 |
+
result = env.step(action)
|
| 44 |
+
obs = result["emails"]
|
| 45 |
+
return result
|
| 46 |
+
|
| 47 |
+
def act():
|
| 48 |
+
return {"action_type": "send", "message": "Hello"}
|
models.py
ADDED
|
File without changes
|
openenv.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: email_env
|
| 2 |
+
description: Minimal OpenEnv environment
|
| 3 |
+
version: 0.1.0
|
pyproject.toml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pydantic
|
| 3 |
+
transformers
|
| 4 |
+
torch
|
rl.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Q = {}
|
| 2 |
+
actions = ["important", "spam", "normal"]
|
| 3 |
+
|
| 4 |
+
def get_action(state):
|
| 5 |
+
if state not in Q:
|
| 6 |
+
Q[state] = {a: 0 for a in actions}
|
| 7 |
+
return max(Q[state], key=Q[state].get)
|
| 8 |
+
|
| 9 |
+
def update_q(state, action, reward):
|
| 10 |
+
if state not in Q:
|
| 11 |
+
Q[state] = {a: 0 for a in actions}
|
| 12 |
+
Q[state][action] += reward
|