itzfrontman commited on
Commit
74aaccc
·
verified ·
1 Parent(s): 277a492

Upload 8 files

Browse files
Files changed (8) hide show
  1. Dockerfile +9 -0
  2. app.py +55 -0
  3. baseline.py +26 -0
  4. env.py +48 -0
  5. grader.py +4 -0
  6. models.py +17 -0
  7. openenv.yaml +0 -0
  8. tasks.py +8 -0
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from env import EcommerceEnv
3
+ from models import Action
4
+ import random
5
+
6
+ def simulate():
7
+ env = EcommerceEnv()
8
+ obs = env.reset()
9
+
10
+ log = ""
11
+ total_reward = 0
12
+ steps = 0
13
+ clicks = 0
14
+ purchases = 0
15
+
16
+ done = False
17
+
18
+ while not done:
19
+ # simple agent
20
+ action = Action(recommended_product=random.randint(1, 3))
21
+
22
+ obs, reward, done, _ = env.step(action)
23
+
24
+ steps += 1
25
+ total_reward += reward.score
26
+
27
+ if reward.score == 1.0:
28
+ purchases += 1
29
+ elif reward.score > 0:
30
+ clicks += 1
31
+
32
+ log += f"Step {steps} → Recommended: {action.recommended_product} | Reward: {reward.score}\n"
33
+
34
+ # Metrics
35
+ ctr = clicks / steps if steps else 0
36
+ conversion = purchases / steps if steps else 0
37
+
38
+ log += "\n--- SESSION SUMMARY ---\n"
39
+ log += f"Total Steps: {steps}\n"
40
+ log += f"Total Reward: {round(total_reward,2)}\n"
41
+ log += f"Clicks: {clicks}\n"
42
+ log += f"Purchases: {purchases}\n"
43
+ log += f"CTR: {round(ctr,2)}\n"
44
+ log += f"Conversion Rate: {round(conversion,2)}\n"
45
+
46
+ return log
47
+
48
+
49
+ gr.Interface(
50
+ fn=simulate,
51
+ inputs=[],
52
+ outputs="text",
53
+ title="🛒 AI E-commerce Recommendation Simulator",
54
+ description="Simulates how an AI agent recommends products and optimizes user engagement & conversions."
55
+ ).launch(share=True)
baseline.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from env import EcommerceEnv
2
+ from models import Action
3
+ import random
4
+
5
+ env = EcommerceEnv()
6
+
7
+ total_reward = 0
8
+ episodes = 3
9
+
10
+ for ep in range(episodes):
11
+ obs = env.reset()
12
+ done = False
13
+
14
+ print(f"\nEpisode {ep+1}")
15
+
16
+ while not done:
17
+ # Simple agent (random recommendation)
18
+ action = Action(recommended_product=random.randint(1, 3))
19
+
20
+ obs, reward, done, _ = env.step(action)
21
+
22
+ print(f"Recommended: {action.recommended_product}, Reward: {reward.score}")
23
+
24
+ total_reward += reward.score
25
+
26
+ print("\nFinal Total Reward:", total_reward)
env.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from models import Observation, Action, Reward
3
+
4
+ class EcommerceEnv:
5
+ def __init__(self):
6
+ self.sessions = [
7
+ {"history": [1,2], "target": 3},
8
+ {"history": [2,3], "target": 1},
9
+ {"history": [1,3], "target": 2},
10
+ ]
11
+ self.max_steps = 5
12
+
13
+ def reset(self):
14
+ self.current_session = random.choice(self.sessions)
15
+ self.steps = 0
16
+ self.done = False
17
+ return self._get_obs()
18
+
19
+ def _get_obs(self):
20
+ return Observation(
21
+ user_id=1,
22
+ history=self.current_session["history"]
23
+ )
24
+
25
+ def step(self, action: Action):
26
+ self.steps += 1
27
+
28
+ target = self.current_session["target"]
29
+
30
+ # Reward shaping (IMPORTANT)
31
+ if action.recommended_product == target:
32
+ reward = 1.0 # purchase
33
+ self.done = True
34
+ elif action.recommended_product in self.current_session["history"]:
35
+ reward = 0.3 # click
36
+ else:
37
+ reward = -0.2 # irrelevant
38
+
39
+ if self.steps >= self.max_steps:
40
+ self.done = True
41
+
42
+ return self._get_obs(), Reward(score=reward), self.done, {}
43
+
44
+ def state(self):
45
+ return {
46
+ "steps": self.steps,
47
+ "target": self.current_session["target"]
48
+ }
grader.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ def grade(total_reward, steps):
2
+ # normalize score between 0 and 1
3
+ score = max(0, min(1, total_reward / steps))
4
+ return score
models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ # What agent sees
5
+ class Observation(BaseModel):
6
+ user_id: int
7
+ history: List[int] # previously viewed products
8
+
9
+
10
+ # What agent does
11
+ class Action(BaseModel):
12
+ recommended_product: int # product ID
13
+
14
+
15
+ # Reward returned by environment
16
+ class Reward(BaseModel):
17
+ score: float # between -1 and 1
openenv.yaml ADDED
File without changes
tasks.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ def easy():
2
+ return "maximize clicks"
3
+
4
+ def medium():
5
+ return "predict purchase"
6
+
7
+ def hard():
8
+ return "maximize session reward"