Fahad-sha commited on
Commit
4e80910
·
verified ·
1 Parent(s): 018767e

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +12 -12
  2. app.py +119 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: RetailProductRecommendationExplainer
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 6.3.0
8
- app_file: app.py
9
- pinned: false
10
- short_description: (Before vs After RL)
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
+ # Retail Recommendation Explainer — Before vs After RL (DPO)
 
 
 
 
 
 
 
 
 
 
2
 
3
+ This Space compares a base instruct model vs a DPO-trained LoRA adapter for
4
+ retail product recommendations with concise, constraint-aware explanations.
5
+
6
+ ## Environment Variables (optional)
7
+ - `BASE_MODEL` (default: `Qwen/Qwen2.5-0.5B-Instruct`)
8
+ - `ADAPTER_RL_PATH` (default: `./adapter_dpo`)
9
+
10
+ ## How to use
11
+ 1. Train and produce `adapter_dpo/` using the scripts in `trainer/`.
12
+ 2. Copy the `adapter_dpo/` folder into the Space repo (same folder level as `app.py`).
13
+ 3. Run the Space and click **Generate (Before vs After)**.
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ from peft import PeftModel
6
+
7
+ BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
8
+ ADAPTER_RL_PATH = os.environ.get("ADAPTER_RL_PATH", "./adapter_dpo") # commit folder into Space repo
9
+
10
+ SYSTEM_PROMPT = """You are a retail recommendation assistant.
11
+ You recommend at most 3 items that complement the user's cart and intent.
12
+ You must be:
13
+ - Relevant to the cart + intent
14
+ - Constraint-aware (budget, urgency, compatibility, brand preferences)
15
+ - Non-pushy and honest (no made-up specs or guarantees)
16
+ - Concise and structured
17
+
18
+ Output format:
19
+ Recommendations:
20
+ 1) <item> — <one-line reason>
21
+ 2) ...
22
+ Why these:
23
+ - ...
24
+ Compatibility / checks:
25
+ - ...
26
+ Optional next step:
27
+ - (only if helpful)
28
+ """
29
+
30
+ def build_prompt(user_intent, cart, budget, urgency, brand_avoid):
31
+ cart_list = [c.strip() for c in cart.split(",") if c.strip()]
32
+ constraints = {
33
+ "budget_usd": budget,
34
+ "shipping_urgency": urgency,
35
+ "brand_avoid": [b.strip() for b in brand_avoid.split(",") if b.strip()]
36
+ }
37
+ user_block = (
38
+ f"User intent: {user_intent}\n"
39
+ f"Cart: {', '.join(cart_list)}\n"
40
+ f"Constraints: {constraints}\n"
41
+ "Generate recommendations following the required format."
42
+ )
43
+ return f"<|system|>\n{SYSTEM_PROMPT}\n<|user|>\n{user_block}\n<|assistant|>\n"
44
+
45
+ @torch.inference_mode()
46
+ def generate(model, tok, prompt, max_new_tokens=220, temperature=0.7):
47
+ inputs = tok(prompt, return_tensors="pt").to(model.device)
48
+ out = model.generate(
49
+ **inputs,
50
+ max_new_tokens=max_new_tokens,
51
+ do_sample=True if temperature > 0 else False,
52
+ temperature=temperature,
53
+ pad_token_id=tok.eos_token_id,
54
+ )
55
+ text = tok.decode(out[0], skip_special_tokens=True)
56
+
57
+ # Best-effort: return only assistant completion
58
+ if "<|assistant|>" in text:
59
+ return text.split("<|assistant|>", 1)[-1].strip()
60
+ return text.strip()
61
+
62
+ def load_models():
63
+ tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
64
+ if tok.pad_token is None:
65
+ tok.pad_token = tok.eos_token
66
+
67
+ base = AutoModelForCausalLM.from_pretrained(
68
+ BASE_MODEL,
69
+ device_map="auto",
70
+ torch_dtype="auto",
71
+ )
72
+
73
+ rl = None
74
+ if os.path.exists(ADAPTER_RL_PATH):
75
+ rl = PeftModel.from_pretrained(base, ADAPTER_RL_PATH)
76
+ return tok, base, rl
77
+
78
+ tok, base_model, rl_model = load_models()
79
+
80
+ def run_both(user_intent, cart, budget, urgency, brand_avoid, max_new_tokens, temperature):
81
+ prompt = build_prompt(user_intent, cart, budget, urgency, brand_avoid)
82
+ before = generate(base_model, tok, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
83
+
84
+ if rl_model is None:
85
+ after = "RL adapter not found. Ensure adapter_dpo/ is included in the Space repo or ADAPTER_RL_PATH is correct."
86
+ else:
87
+ after = generate(rl_model, tok, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
88
+
89
+ return before, after
90
+
91
+ with gr.Blocks() as demo:
92
+ gr.Markdown("# Retail Recommendation Explainer — Before vs After RL (DPO)")
93
+
94
+ user_intent = gr.Textbox(
95
+ label="User intent",
96
+ value="I’m starting to run regularly and want to avoid blisters."
97
+ )
98
+ cart = gr.Textbox(label="Cart items (comma-separated)", value="running shoes, socks")
99
+ with gr.Row():
100
+ budget = gr.Slider(10, 150, value=40, step=5, label="Budget (USD)")
101
+ urgency = gr.Dropdown(["fast", "normal"], value="fast", label="Shipping urgency")
102
+ brand_avoid = gr.Textbox(label="Brands/materials to avoid (comma-separated)", value="")
103
+
104
+ with gr.Row():
105
+ max_new_tokens = gr.Slider(80, 400, value=220, step=10, label="Max new tokens")
106
+ temperature = gr.Slider(0.0, 1.2, value=0.7, step=0.05, label="Temperature")
107
+
108
+ btn = gr.Button("Generate (Before vs After)")
109
+ with gr.Row():
110
+ out_before = gr.Textbox(label="Before (Base)", lines=18)
111
+ out_after = gr.Textbox(label="After (RL / DPO LoRA)", lines=18)
112
+
113
+ btn.click(
114
+ fn=run_both,
115
+ inputs=[user_intent, cart, budget, urgency, brand_avoid, max_new_tokens, temperature],
116
+ outputs=[out_before, out_after]
117
+ )
118
+
119
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers>=4.42.0
4
+ peft
5
+ accelerate