Spaces:
Paused
Paused
Upload 8 files
Browse files- Dockerfile +9 -0
- app.py +55 -0
- baseline.py +26 -0
- env.py +48 -0
- grader.py +4 -0
- models.py +17 -0
- openenv.yaml +0 -0
- tasks.py +8 -0
Dockerfile
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY . .
|
| 6 |
+
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from env import EcommerceEnv
|
| 3 |
+
from models import Action
|
| 4 |
+
import random
|
| 5 |
+
|
| 6 |
+
def simulate():
|
| 7 |
+
env = EcommerceEnv()
|
| 8 |
+
obs = env.reset()
|
| 9 |
+
|
| 10 |
+
log = ""
|
| 11 |
+
total_reward = 0
|
| 12 |
+
steps = 0
|
| 13 |
+
clicks = 0
|
| 14 |
+
purchases = 0
|
| 15 |
+
|
| 16 |
+
done = False
|
| 17 |
+
|
| 18 |
+
while not done:
|
| 19 |
+
# simple agent
|
| 20 |
+
action = Action(recommended_product=random.randint(1, 3))
|
| 21 |
+
|
| 22 |
+
obs, reward, done, _ = env.step(action)
|
| 23 |
+
|
| 24 |
+
steps += 1
|
| 25 |
+
total_reward += reward.score
|
| 26 |
+
|
| 27 |
+
if reward.score == 1.0:
|
| 28 |
+
purchases += 1
|
| 29 |
+
elif reward.score > 0:
|
| 30 |
+
clicks += 1
|
| 31 |
+
|
| 32 |
+
log += f"Step {steps} → Recommended: {action.recommended_product} | Reward: {reward.score}\n"
|
| 33 |
+
|
| 34 |
+
# Metrics
|
| 35 |
+
ctr = clicks / steps if steps else 0
|
| 36 |
+
conversion = purchases / steps if steps else 0
|
| 37 |
+
|
| 38 |
+
log += "\n--- SESSION SUMMARY ---\n"
|
| 39 |
+
log += f"Total Steps: {steps}\n"
|
| 40 |
+
log += f"Total Reward: {round(total_reward,2)}\n"
|
| 41 |
+
log += f"Clicks: {clicks}\n"
|
| 42 |
+
log += f"Purchases: {purchases}\n"
|
| 43 |
+
log += f"CTR: {round(ctr,2)}\n"
|
| 44 |
+
log += f"Conversion Rate: {round(conversion,2)}\n"
|
| 45 |
+
|
| 46 |
+
return log
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
gr.Interface(
|
| 50 |
+
fn=simulate,
|
| 51 |
+
inputs=[],
|
| 52 |
+
outputs="text",
|
| 53 |
+
title="🛒 AI E-commerce Recommendation Simulator",
|
| 54 |
+
description="Simulates how an AI agent recommends products and optimizes user engagement & conversions."
|
| 55 |
+
).launch(share=True)
|
baseline.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from env import EcommerceEnv
|
| 2 |
+
from models import Action
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
env = EcommerceEnv()
|
| 6 |
+
|
| 7 |
+
total_reward = 0
|
| 8 |
+
episodes = 3
|
| 9 |
+
|
| 10 |
+
for ep in range(episodes):
|
| 11 |
+
obs = env.reset()
|
| 12 |
+
done = False
|
| 13 |
+
|
| 14 |
+
print(f"\nEpisode {ep+1}")
|
| 15 |
+
|
| 16 |
+
while not done:
|
| 17 |
+
# Simple agent (random recommendation)
|
| 18 |
+
action = Action(recommended_product=random.randint(1, 3))
|
| 19 |
+
|
| 20 |
+
obs, reward, done, _ = env.step(action)
|
| 21 |
+
|
| 22 |
+
print(f"Recommended: {action.recommended_product}, Reward: {reward.score}")
|
| 23 |
+
|
| 24 |
+
total_reward += reward.score
|
| 25 |
+
|
| 26 |
+
print("\nFinal Total Reward:", total_reward)
|
env.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from models import Observation, Action, Reward
|
| 3 |
+
|
| 4 |
+
class EcommerceEnv:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.sessions = [
|
| 7 |
+
{"history": [1,2], "target": 3},
|
| 8 |
+
{"history": [2,3], "target": 1},
|
| 9 |
+
{"history": [1,3], "target": 2},
|
| 10 |
+
]
|
| 11 |
+
self.max_steps = 5
|
| 12 |
+
|
| 13 |
+
def reset(self):
|
| 14 |
+
self.current_session = random.choice(self.sessions)
|
| 15 |
+
self.steps = 0
|
| 16 |
+
self.done = False
|
| 17 |
+
return self._get_obs()
|
| 18 |
+
|
| 19 |
+
def _get_obs(self):
|
| 20 |
+
return Observation(
|
| 21 |
+
user_id=1,
|
| 22 |
+
history=self.current_session["history"]
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
def step(self, action: Action):
|
| 26 |
+
self.steps += 1
|
| 27 |
+
|
| 28 |
+
target = self.current_session["target"]
|
| 29 |
+
|
| 30 |
+
# Reward shaping (IMPORTANT)
|
| 31 |
+
if action.recommended_product == target:
|
| 32 |
+
reward = 1.0 # purchase
|
| 33 |
+
self.done = True
|
| 34 |
+
elif action.recommended_product in self.current_session["history"]:
|
| 35 |
+
reward = 0.3 # click
|
| 36 |
+
else:
|
| 37 |
+
reward = -0.2 # irrelevant
|
| 38 |
+
|
| 39 |
+
if self.steps >= self.max_steps:
|
| 40 |
+
self.done = True
|
| 41 |
+
|
| 42 |
+
return self._get_obs(), Reward(score=reward), self.done, {}
|
| 43 |
+
|
| 44 |
+
def state(self):
|
| 45 |
+
return {
|
| 46 |
+
"steps": self.steps,
|
| 47 |
+
"target": self.current_session["target"]
|
| 48 |
+
}
|
grader.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def grade(total_reward, steps):
|
| 2 |
+
# normalize score between 0 and 1
|
| 3 |
+
score = max(0, min(1, total_reward / steps))
|
| 4 |
+
return score
|
models.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
# What agent sees
|
| 5 |
+
class Observation(BaseModel):
|
| 6 |
+
user_id: int
|
| 7 |
+
history: List[int] # previously viewed products
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# What agent does
|
| 11 |
+
class Action(BaseModel):
|
| 12 |
+
recommended_product: int # product ID
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Reward returned by environment
|
| 16 |
+
class Reward(BaseModel):
|
| 17 |
+
score: float # between -1 and 1
|
openenv.yaml
ADDED
|
File without changes
|
tasks.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def easy():
|
| 2 |
+
return "maximize clicks"
|
| 3 |
+
|
| 4 |
+
def medium():
|
| 5 |
+
return "predict purchase"
|
| 6 |
+
|
| 7 |
+
def hard():
|
| 8 |
+
return "maximize session reward"
|