Spaces:

lspcloud
/

amazon_mturker

Sleeping

App Files Files Community

ehejin commited on Apr 1

Commit

cc5590d

1 Parent(s): 6d6e203

added user study

Browse files

Files changed (2) hide show

requirements.txt +7 -3
src/streamlit_app.py +965 -34

requirements.txt CHANGED Viewed

@@ -1,3 +1,7 @@
-altair
-pandas
-streamlit

+streamlit>=1.32.0
+openai>=1.0.0
+huggingface_hub>=0.20.0
+datasets>=2.18.0
+filelock>=3.13.0
+python-dotenv>=1.0.0
+pandas>=2.0.0

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,971 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
 """
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+"""
+Streamlit App: AI Product Willingness User Study
+=================================================
+Run locally:
+    streamlit run app.py -- --category groceries
+    streamlit run app.py -- --category groceries --debug
+On HuggingFace Spaces, set these environment variables in Space Settings → Variables:
+    HF_TOKEN           - HuggingFace token
+    TOGETHER_API_KEY   - Together AI API key
+    DATASET_REPO_ID    - HuggingFace dataset repo to upload results
+    CATEGORY           - groceries | books | movies | health  (default: groceries)
+    DEBUG_MODE         - "true" to skip validation (optional)
 """
+import asyncio
+import concurrent.futures
+import csv
+import json
+import os
+import random
+import re
+import sys
+import tempfile
+import time
+import uuid
+from datetime import datetime
+from pathlib import Path
+import streamlit as st
+from dotenv import load_dotenv
+from filelock import FileLock
+from huggingface_hub import HfApi
+from openai import AsyncOpenAI
+load_dotenv()
+# ---------------------------------------------------------------------------
+# CLI args (supported locally; ignored on HF Spaces — use env vars instead)
+# ---------------------------------------------------------------------------
+import argparse
+parser = argparse.ArgumentParser(add_help=False)
+parser.add_argument("--category", choices=["books", "groceries", "movies", "health"], default=None)
+parser.add_argument("--debug", action="store_true", default=False)
+cli_args, _ = parser.parse_known_args()
+# ---------------------------------------------------------------------------
+# Config  (env vars take precedence, then CLI args, then defaults)
+# ---------------------------------------------------------------------------
+CATEGORY = os.getenv("CATEGORY") or cli_args.category or "groceries"
+DEBUG_MODE = os.getenv("DEBUG_MODE", "").lower() == "true" or cli_args.debug
+DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "your-username/product-study")
+HF_TOKEN = os.getenv("HF_TOKEN")
+TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
+MODEL_NAME = "openai/gpt-oss-20b"
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_DIR = os.path.join(BASE_DIR, "data")
+ANNOTATIONS_DIR = os.path.join(BASE_DIR, "annotations")
+os.makedirs(DATA_DIR, exist_ok=True)
+os.makedirs(ANNOTATIONS_DIR, exist_ok=True)
+CATEGORY_TO_HF = {
+    "books": "ehejin/amazon_books",
+    "groceries": "ehejin/amazon_Grocery_and_Gourmet_Food",
+    "movies": "ehejin/amazon_Movies_and_TV",
+    "health": "ehejin/amazon_Health_and_Household",
+}
+CATEGORY_DISPLAY = {
+    "books": "Books",
+    "groceries": "Grocery Products",
+    "movies": "Movies & TV",
+    "health": "Health & Household Products",
+}
+FAMILIARITY_USED_LABEL = {
+    "books": "Read it before",
+    "movies": "Watched it before",
+    "groceries": "Used it before",
+    "health": "Used it before",
+}
+PRODUCTS_PER_USER = 5
+MIN_TURNS = 3
+MAX_TURNS = 10
+DEBUG_DEMOGRAPHICS = {
+    "age": "30", "gender": "Female", "geographic_region": "West",
+    "education_level": "College graduate/some postgrad", "race": "White",
+    "us_citizen": "Yes", "marital_status": "Single",
+    "religion": "Agnostic", "religious_attendance": "Never",
+    "political_affiliation": "Independent", "income": "$50,000-$75,000",
+    "political_views": "Moderate", "household_size": "2",
+    "employment_status": "Full-time employment",
+}
+WILLINGNESS_LABELS = {
+    1: "Definitely would not buy",
+    2: "Probably would not buy",
+    3: "Slightly unlikely to buy",
+    4: "Neutral",
+    5: "Slightly likely to buy",
+    6: "Probably would buy",
+    7: "Definitely would buy",
+}
+WILLINGNESS_CHOICES = [f"{v} ({k})" for k, v in WILLINGNESS_LABELS.items()]
+# ---------------------------------------------------------------------------
+# Dataset loading
+# ---------------------------------------------------------------------------
+LOCAL_DATA_PATH = os.path.join(DATA_DIR, f"{CATEGORY}.json")
+ORDER_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_order.json")
+COUNTER_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_counter.txt")
+COUNTER_LOCK_PATH = os.path.join(DATA_DIR, f"{CATEGORY}_counter.lock")
+@st.cache_resource
+def download_and_cache_dataset():
+    if os.path.exists(LOCAL_DATA_PATH):
+        print(f"[DATA] Found cached dataset at {LOCAL_DATA_PATH}")
+        return
+    print(f"[DATA] Downloading {CATEGORY_TO_HF[CATEGORY]} from HuggingFace...")
+    try:
+        from datasets import load_dataset
+        import huggingface_hub
+        if HF_TOKEN:
+            huggingface_hub.login(token=HF_TOKEN)
+        ds = load_dataset(CATEGORY_TO_HF[CATEGORY], split="train")
+        items = []
+        for row in ds:
+            meta = row.get("metadata", {})
+            def to_list(val):
+                if isinstance(val, list): return val
+                if isinstance(val, str): return [val] if val else []
+                return []
+            item = {
+                "id": str(uuid.uuid4()),
+                "title": meta.get("title", "") if isinstance(meta, dict) else "",
+                "description": to_list(meta.get("description", []) if isinstance(meta, dict) else []),
+                "features": to_list(meta.get("features", []) if isinstance(meta, dict) else []),
+                "price": meta.get("price", "N/A") if isinstance(meta, dict) else "N/A",
+                "category": CATEGORY,
+            }
+            items.append(item)
+        with open(LOCAL_DATA_PATH, "w") as f:
+            json.dump(items, f, indent=2)
+        print(f"[DATA] Cached {len(items)} items to {LOCAL_DATA_PATH}")
+    except Exception as e:
+        print(f"[DATA] ERROR downloading dataset: {e}")
+        raise
+@st.cache_resource
+def load_local_dataset():
+    with open(LOCAL_DATA_PATH, "r") as f:
+        return json.load(f)
+@st.cache_resource
+def ensure_shuffled_order(n_items):
+    if os.path.exists(ORDER_PATH):
+        with open(ORDER_PATH, "r") as f:
+            return json.load(f)
+    indices = list(range(n_items))
+    random.shuffle(indices)
+    with open(ORDER_PATH, "w") as f:
+        json.dump(indices, f)
+    return indices
+def assign_products(items, order, n=PRODUCTS_PER_USER):
+    lock = FileLock(COUNTER_LOCK_PATH)
+    with lock:
+        if os.path.exists(COUNTER_PATH):
+            with open(COUNTER_PATH, "r") as f:
+                counter = int(f.read().strip() or "0")
+        else:
+            counter = 0
+        total = len(order)
+        assigned_indices = [order[(counter + i) % total] for i in range(n)]
+        new_counter = (counter + n) % total
+        with open(COUNTER_PATH, "w") as f:
+            f.write(str(new_counter))
+    return [items[i] for i in assigned_indices]
+# ---------------------------------------------------------------------------
+# AI client
+# ---------------------------------------------------------------------------
+@st.cache_resource
+def get_model_client():
+    return AsyncOpenAI(
+        base_url="https://api.together.xyz/v1",
+        api_key=TOGETHER_API_KEY,
+        timeout=60.0,
+    )
+def call_model(messages: list) -> str:
+    async def _call():
+        try:
+            client = get_model_client()
+            response = await client.chat.completions.create(
+                model=MODEL_NAME,
+                messages=messages,
+                max_tokens=1000,
+                temperature=0.7,
+                top_p=0.9,
+            )
+            content = response.choices[0].message.content.strip()
+            content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
+            return content
+        except Exception as e:
+            print(f"[MODEL] Error: {e}")
+            return f"[Model error: {e}]"
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+        future = pool.submit(asyncio.run, _call())
+        return future.result()
+# ---------------------------------------------------------------------------
+# HuggingFace upload
+# ---------------------------------------------------------------------------
+@st.cache_resource
+def get_hf_api():
+    api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi()
+    if HF_TOKEN:
+        try:
+            api.repo_info(repo_id=DATASET_REPO_ID, repo_type="dataset")
+            print(f"[HF] Repo {DATASET_REPO_ID} exists.")
+        except Exception as e:
+            if "404" in str(e) or "not found" in str(e).lower():
+                api.create_repo(repo_id=DATASET_REPO_ID, repo_type="dataset", private=True)
+                print(f"[HF] Created repo {DATASET_REPO_ID}.")
+            else:
+                print(f"[HF] WARNING: {e}")
+    return api
+def save_and_upload(state: dict):
+    hf_api = get_hf_api()
+    worker_id = state.get("worker_id") or state.get("user_id", "anonymous")
+    submission_id = state.get("submission_id", str(uuid.uuid4()))
+    safe_worker = "".join(c if c.isalnum() else "_" for c in str(worker_id))
+    filename = f"{submission_id}_{CATEGORY}.json"
+    folder = os.path.join(ANNOTATIONS_DIR, safe_worker)
+    os.makedirs(folder, exist_ok=True)
+    file_path = os.path.join(folder, filename)
+    with open(file_path, "w") as f:
+        json.dump(state, f, indent=2)
+    print(f"[SAVE] Wrote {file_path}")
+    if HF_TOKEN:
+        try:
+            hf_api.upload_file(
+                path_or_fileobj=file_path,
+                path_in_repo=f"{safe_worker}/{filename}",
+                repo_id=DATASET_REPO_ID,
+                repo_type="dataset",
+            )
+            print("[HF] Uploaded JSON.")
+        except Exception as e:
+            print(f"[HF] JSON upload error: {e}")
+    upload_csv_rows(state, hf_api, safe_worker, submission_id)
+def upload_csv_rows(state: dict, hf_api, safe_worker: str, submission_id: str):
+    demographics = state.get("demographics", {})
+    products = state.get("products", [])
+    header = [
+        "submission_id", "worker_id", "submission_time", "duration_seconds", "category",
+        "age", "gender", "geographic_region", "education_level", "race",
+        "us_citizen", "marital_status", "religion", "religious_attendance",
+        "political_affiliation", "income", "political_views", "household_size", "employment_status",
+        "product_index", "product_id", "title", "price", "familiarity",
+        "pre_willingness", "pre_willingness_label", "post_willingness", "post_willingness_label",
+        "willingness_delta", "num_turns", "conversation_json", "standout_moment", "thinking_change",
+    ]
+    rows = []
+    for i, prod in enumerate(products):
+        conv = prod.get("conversation", {})
+        refl = prod.get("reflection", {})
+        pre = prod.get("pre_willingness", "")
+        post = prod.get("post_willingness", "")
+        delta = (post - pre) if isinstance(pre, int) and isinstance(post, int) else ""
+        row = [
+            submission_id, state.get("worker_id", ""),
+            state.get("meta", {}).get("submission_time", ""),
+            state.get("meta", {}).get("duration_seconds", ""),
+            CATEGORY,
+            demographics.get("age", ""), demographics.get("gender", ""),
+            demographics.get("geographic_region", ""), demographics.get("education_level", ""),
+            demographics.get("race", ""), demographics.get("us_citizen", ""),
+            demographics.get("marital_status", ""), demographics.get("religion", ""),
+            demographics.get("religious_attendance", ""), demographics.get("political_affiliation", ""),
+            demographics.get("income", ""), demographics.get("political_views", ""),
+            demographics.get("household_size", ""), demographics.get("employment_status", ""),
+            i + 1, prod.get("id", ""), prod.get("title", ""), prod.get("price", ""),
+            prod.get("familiarity", ""),
+            pre, WILLINGNESS_LABELS.get(pre, "") if isinstance(pre, int) else "",
+            post, WILLINGNESS_LABELS.get(post, "") if isinstance(post, int) else "",
+            delta, conv.get("num_turns", 0), json.dumps(conv.get("turns", [])),
+            refl.get("standout_moment", ""), refl.get("thinking_change", ""),
+        ]
+        rows.append(row)
+    timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
+    unique_id = uuid.uuid4().hex[:8]
+    csv_filename = f"csv_submissions/{timestamp_str}_{safe_worker}_{unique_id}.csv"
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False, newline="", encoding="utf-8") as tmp:
+        tmp_path = tmp.name
+        writer = csv.writer(tmp)
+        writer.writerow(header)
+        writer.writerows(rows)
+    if HF_TOKEN:
+        try:
+            hf_api.upload_file(
+                path_or_fileobj=tmp_path,
+                path_in_repo=csv_filename,
+                repo_id=DATASET_REPO_ID,
+                repo_type="dataset",
+            )
+            print("[HF] Uploaded CSV rows.")
+        except Exception as e:
+            print(f"[HF] CSV upload error: {e}")
+    os.unlink(tmp_path)
+# ---------------------------------------------------------------------------
+# Prompt builders
+# ---------------------------------------------------------------------------
+def build_sales_system_prompt(product: dict) -> str:
+    title = product.get("title", "this product")
+    price = product.get("price", "N/A")
+    description = product.get("description", [])
+    features = product.get("features", [])
+    desc_text = " ".join(description) if description else "No description available."
+    feat_text = " ".join(features) if features else ""
+    price_str = f"${price}" if price and price != "N/A" and not str(price).startswith("$") else price
+    return f"""You are a warm, knowledgeable sales agent for an Amazon product. Your goal is to convince the user to buy this product.
+Product name: {title}
+Product description and features: {desc_text} {feat_text}
+Price: {price_str}
+Rules:
+- Be warm, confident, and conversational — like a helpful friend who knows the product well, not a pushy salesperson
+- Be persuasive and proactive: do not just answer questions; actively make the case for buying.
+- Tailor your argument to what the user cares about or seems hesitant about.
+- Use only the information provided in the title, description, features, and price.
+- Do not fabricate reviews, ratings, statistics, comparisons, discounts, or product details.
+- If the user raises a concern, respond directly and turn the conversation back to why the product is worth buying.
+- Keep the reply concise and natural.
+First message:
+- Open with the product's strongest selling point.
+- Explain why it is worth buying.
+- End with a question that helps uncover the user's preferences, objections, or hesitation.
+Follow-up rules:
+- Acknowledge what the user said.
+- Address their main objection directly with a concrete benefit grounded in the product info.
+- Continue steering the conversation toward purchase.
+- Usually end with a natural question that keeps the user engaged.
 """
+def build_opening_user_message(product: dict) -> str:
+    return f'Tell me about this product and why I should buy it: "{product.get("title", "this product")}"'
+def parse_willingness(choice_str: str) -> int:
+    try:
+        return int(choice_str.split("(")[1].rstrip(")"))
+    except Exception:
+        return 4
+def get_familiarity_choices():
+    used_label = FAMILIARITY_USED_LABEL.get(CATEGORY, "Used it before")
+    return [
+        "Never heard of it",
+        "Heard of it, but not used/purchased",
+        used_label,
+        "Purchased it before",
+    ]
+# ---------------------------------------------------------------------------
+# State initialisation
+# ---------------------------------------------------------------------------
+def init_state():
+    download_and_cache_dataset()
+    items = load_local_dataset()
+    order = ensure_shuffled_order(len(items))
+    assigned = assign_products(items, order, PRODUCTS_PER_USER)
+    # Read MTurk query params if available
+    try:
+        params = st.query_params
+    except Exception:
+        params = {}
+    return {
+        "submission_id": str(uuid.uuid4()),
+        "user_id": str(uuid.uuid4()),
+        "worker_id": params.get("workerId", ""),
+        "assignment_id": params.get("assignmentId", ""),
+        "hit_id": params.get("hitId", ""),
+        "turk_submit_to": params.get("turkSubmitTo", ""),
+        "start_time": time.time(),
+        "category": CATEGORY,
+        "demographics": {},
+        "products": [
+            {
+                "id": p.get("id", str(uuid.uuid4())),
+                "title": p.get("title", ""),
+                "description": p.get("description", []),
+                "features": p.get("features", []),
+                "price": p.get("price", "N/A"),
+                "familiarity": None,
+                "pre_willingness": None,
+                "post_willingness": None,
+                "willingness_delta": None,
+                "conversation": {
+                    "system_prompt": "",
+                    "opening_user_message": "",
+                    "turns": [],
+                    "num_turns": 0,
+                },
+                "reflection": {},
+            }
+            for p in assigned
+        ],
+        "current_product_index": 0,
+        "screen": "welcome",   # screens: welcome | demographics | product_intro | chat | post_will | reflection | done
+        "meta": {},
+    }
+# ---------------------------------------------------------------------------
+# CSS
+# ---------------------------------------------------------------------------
+def inject_css():
+    st.markdown("""
+    <style>
+    /* Hide Streamlit chrome */
+    #MainMenu, footer, header { visibility: hidden; }
+    .block-container { max-width: 820px; padding-top: 2rem; }
+    /* Product card */
+    .product-card {
+        border: 2px solid #2563eb;
+        border-radius: 10px;
+        padding: 1rem 1.25rem;
+        background: #f0f6ff;
+        margin-bottom: 0.75rem;
+    }
+    .pc-header {
+        display: flex;
+        justify-content: space-between;
+        align-items: flex-start;
+        margin-bottom: 0.6rem;
+        gap: 1rem;
+    }
+    .pc-title {
+        font-size: 1.05rem;
+        font-weight: 700;
+        color: #1a1a2e;
+        line-height: 1.35;
+        flex: 1;
+    }
+    .pc-price {
+        font-size: 1.2rem;
+        font-weight: 800;
+        color: #16a34a;
+        white-space: nowrap;
+    }
+    .pc-section { margin-top: 0.5rem; }
+    .pc-section-title {
+        font-weight: 600;
+        font-size: 0.85rem;
+        color: #475569;
+        text-transform: uppercase;
+        letter-spacing: 0.04em;
+        margin-bottom: 0.3rem;
+    }
+    .pc-list {
+        margin: 0;
+        padding-left: 1.2rem;
+        font-size: 0.92rem;
+        color: #334155;
+        line-height: 1.5;
+    }
+    .pc-list li { margin-bottom: 0.25rem; }
+    /* Progress bar */
+    .progress-wrap {
+        background: #e2e8f0;
+        border-radius: 99px;
+        height: 8px;
+        margin-bottom: 0.25rem;
+        overflow: hidden;
+    }
+    .progress-fill {
+        background: #2563eb;
+        height: 100%;
+        border-radius: 99px;
+    }
+    .progress-label {
+        font-size: 0.82rem;
+        color: #64748b;
+        text-align: right;
+        margin-bottom: 1rem;
+    }
+    /* Chat bubbles */
+    .chat-wrap { max-height: 420px; overflow-y: auto; margin-bottom: 1rem; }
+    .bubble { padding: 0.65rem 0.9rem; border-radius: 12px; margin-bottom: 0.5rem; font-size: 0.93rem; line-height: 1.5; }
+    .bubble-ai { background: #eff6ff; border: 1px solid #93c5fd; margin-right: 10%; }
+    .bubble-user { background: #f0fdf4; border: 1px solid #86efac; margin-left: 10%; text-align: right; }
+    .bubble-label { font-size: 0.75rem; color: #94a3b8; margin-bottom: 0.2rem; }
+    /* Compact product banner above chat */
+    .chat-product-banner {
+        border: 1.5px solid #93c5fd;
+        border-radius: 8px;
+        padding: 0.6rem 1rem;
+        background: #eff6ff;
+        margin-bottom: 0.75rem;
+        font-size: 0.88rem;
+        color: #1d4ed8;
+        font-weight: 600;
+        cursor: pointer;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# ---------------------------------------------------------------------------
+# UI helpers
+# ---------------------------------------------------------------------------
+def render_product_card_html(product: dict, compact: bool = False) -> str:
+    title = product.get("title", "Unknown Product")
+    price = product.get("price", "N/A")
+    description = product.get("description", [])
+    features = product.get("features", [])
+    price_str = f"${price}" if price and price != "N/A" and not str(price).startswith("$") else price
+    desc_html = ""
+    if description:
+        items_html = "".join(f"<li>{d}</li>" for d in description if d)
+        desc_html = f'<div class="pc-section"><div class="pc-section-title">📋 Description</div><ul class="pc-list">{items_html}</ul></div>'
+    feat_html = ""
+    if features:
+        items_html = "".join(f"<li>{feat}</li>" for feat in features if feat)
+        feat_html = f'<div class="pc-section"><div class="pc-section-title">✨ Features</div><ul class="pc-list">{items_html}</ul></div>'
+    max_h = "max-height:240px;overflow-y:auto;" if compact else ""
+    return f"""
+    <div class="product-card" style="{max_h}">
+        <div class="pc-header">
+            <div class="pc-title">{title}</div>
+            <div class="pc-price">{price_str}</div>
+        </div>
+        {desc_html}
+        {feat_html}
+    </div>"""
+def render_progress(current: int, total: int = PRODUCTS_PER_USER):
+    pct = int((current / total) * 100)
+    st.markdown(f"""
+    <div class="progress-wrap"><div class="progress-fill" style="width:{pct}%"></div></div>
+    <div class="progress-label">Product {current} of {total}</div>
+    """, unsafe_allow_html=True)
+def render_chat_history(turns: list):
+    html = '<div class="chat-wrap">'
+    for turn in turns:
+        role = turn.get("role", "")
+        content = turn.get("content", "")
+        if role == "assistant":
+            html += f'<div class="bubble-label">🤖 AI Sales Agent</div><div class="bubble bubble-ai">{content}</div>'
+        elif role == "user":
+            html += f'<div class="bubble-label" style="text-align:right">You</div><div class="bubble bubble-user">{content}</div>'
+    html += "</div>"
+    st.markdown(html, unsafe_allow_html=True)
+# ---------------------------------------------------------------------------
+# Screen renderers
+# ---------------------------------------------------------------------------
+def screen_welcome(s):
+    st.markdown(f"# 🛒 Product Evaluation Study")
+    st.markdown(
+        f"Welcome! In this study you will evaluate **{PRODUCTS_PER_USER} {CATEGORY_DISPLAY[CATEGORY]}** products.\n\n"
+        "For each product you will:\n"
+        "1. Rate how familiar you are with the product\n"
+        "2. Rate how willing you are to buy it\n"
+        "3. Chat with an AI about the product (**at least 3 exchanges**)\n"
+        "4. Rate your willingness to buy it again\n"
+        "5. Answer two brief reflection questions\n\n"
+        "After all 5 products, you're done! The study takes about **20–30 minutes**. "
+        "Thank you for participating!"
+    )
+    if st.button("Begin →", type="primary", use_container_width=True):
+        if DEBUG_MODE:
+            s["demographics"] = DEBUG_DEMOGRAPHICS.copy()
+            s["screen"] = "product_intro"
+        else:
+            s["screen"] = "demographics"
+        st.rerun()
+def screen_demographics(s):
+    st.markdown("## Demographics — About You")
+    st.markdown("All fields are required before you can proceed.")
+    age = st.text_input("Age (years)", placeholder="e.g. 34")
+    gender = st.selectbox("Gender", ["", "Female", "Male"])
+    geographic_region = st.selectbox("Geographic region", ["", "West", "South", "Midwest", "Northeast", "Pacific"])
+    education_level = st.selectbox("Highest education level", [
+        "", "Less than high school", "High school graduate",
+        "Some college, no degree", "Associate's degree",
+        "College graduate/some postgrad", "Postgraduate",
+    ])
+    race = st.selectbox("Race / ethnicity", ["", "Asian", "Hispanic", "White", "Black", "Other"])
+    us_citizen = st.selectbox("Are you a U.S. citizen?", ["", "Yes", "No"])
+    marital_status = st.selectbox("Marital status", [
+        "", "Never been married", "Married", "Living with a partner",
+        "Divorced", "Separated", "Widowed",
+    ])
+    religion = st.selectbox("Religion", [
+        "", "Protestant", "Roman Catholic", "Mormon", "Orthodox", "Jewish",
+        "Muslim", "Buddhist", "Atheist", "Agnostic", "Nothing in particular", "Other",
+    ])
+    religious_attendance = st.selectbox("How often do you attend religious services?", [
+        "", "Never", "Seldom", "A few times a year", "Once or twice a month",
+        "Once a week", "More than once a week",
+    ])
+    political_affiliation = st.selectbox("Political affiliation", [
+        "", "Democrat", "Republican", "Independent", "Something else",
+    ])
+    income = st.selectbox("Household income", [
+        "", "Less than $30,000", "$30,000-$50,000", "$50,000-$75,000",
+        "$75,000-$100,000", "$100,000 or more",
+    ])
+    political_views = st.selectbox("Political views", [
+        "", "Very liberal", "Liberal", "Moderate", "Conservative", "Very conservative",
+    ])
+    household_size = st.selectbox("Household size", ["", "1", "2", "3", "4", "More than 4"])
+    employment_status = st.selectbox("Employment status", [
+        "", "Full-time employment", "Part-time employment", "Self-employed",
+        "Unemployed", "Retired", "Home-maker", "Student",
+    ])
+    if st.button("Next →", type="primary", use_container_width=True):
+        fields = [age, gender, geographic_region, education_level, race, us_citizen,
+                  marital_status, religion, religious_attendance, political_affiliation,
+                  income, political_views, household_size, employment_status]
+        if not all([f and (f.strip() if isinstance(f, str) else f) for f in fields]):
+            st.error("⚠️ Please complete all fields.")
+            return
+        if not age.strip().isdigit() or not (1 <= int(age.strip()) <= 120):
+            st.error("⚠️ Please enter a valid age.")
+            return
+        s["demographics"] = {
+            "age": age.strip(), "gender": gender, "geographic_region": geographic_region,
+            "education_level": education_level, "race": race, "us_citizen": us_citizen,
+            "marital_status": marital_status, "religion": religion,
+            "religious_attendance": religious_attendance, "political_affiliation": political_affiliation,
+            "income": income, "political_views": political_views,
+            "household_size": household_size, "employment_status": employment_status,
+        }
+        s["screen"] = "product_intro"
+        st.rerun()
+def screen_product_intro(s):
+    idx = s["current_product_index"]
+    product = s["products"][idx]
+    render_progress(idx + 1)
+    st.markdown("## Product Evaluation")
+    st.markdown("Please read the product information carefully, then answer the two questions below.")
+    st.markdown(render_product_card_html(product), unsafe_allow_html=True)
+    familiarity_val = st.radio(
+        "How familiar are you with this product?",
+        get_familiarity_choices(),
+        index=None,
+        key=f"familiarity_{idx}",
+    )
+    pre_will_val = st.radio(
+        "How willing would you be to buy this product?",
+        WILLINGNESS_CHOICES,
+        index=None,
+        key=f"pre_will_{idx}",
+    )
+    if st.button("Start Chat →", type="primary", use_container_width=True):
+        if not DEBUG_MODE:
+            if not familiarity_val:
+                st.error("⚠️ Please rate your familiarity.")
+                return
+            if not pre_will_val:
+                st.error("⚠️ Please rate your willingness to buy.")
+                return
+        familiarity_val = familiarity_val or get_familiarity_choices()[0]
+        pre_will_val = pre_will_val or WILLINGNESS_CHOICES[3]
+        pre_val = parse_willingness(pre_will_val)
+        s["products"][idx]["familiarity"] = familiarity_val
+        s["products"][idx]["pre_willingness"] = pre_val
+        s["products"][idx]["pre_willingness_label"] = WILLINGNESS_LABELS[pre_val]
+        # Get opening AI message
+        system_prompt = build_sales_system_prompt(product)
+        opening_user_msg = build_opening_user_message(product)
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": opening_user_msg},
+        ]
+        with st.spinner("Starting conversation…"):
+            ai_reply = call_model(messages)
+        s["products"][idx]["conversation"]["system_prompt"] = system_prompt
+        s["products"][idx]["conversation"]["opening_user_message"] = opening_user_msg
+        s["products"][idx]["conversation"]["turns"] = [
+            {"turn_index": 0, "role": "assistant", "content": ai_reply,
+             "timestamp": time.time(), "model": MODEL_NAME}
+        ]
+        s["products"][idx]["conversation"]["num_turns"] = 0
+        s["screen"] = "chat"
+        st.rerun()
+def screen_chat(s):
+    idx = s["current_product_index"]
+    product = s["products"][idx]
+    conv = s["products"][idx]["conversation"]
+    render_progress(idx + 1)
+    st.markdown("## Chat with the AI")
+    # Compact product banner
+    title = product.get("title", "Product")
+    price = product.get("price", "N/A")
+    price_str = f"${price}" if price and price != "N/A" and not str(price).startswith("$") else price
+    with st.expander(f"📦 {title} — {price_str} (click to expand product details)"):
+        st.markdown(render_product_card_html(product, compact=True), unsafe_allow_html=True)
+    num_turns = conv["num_turns"]
+    st.markdown(
+        f"The AI is trying to convince you to buy this product. "
+        f"Ask questions, push back, or explore your interest. "
+        f"You need at least **{MIN_TURNS} exchanges** before you can move on."
+    )
+    # Chat history (only user/assistant turns, not the opening system exchange)
+    display_turns = [t for t in conv["turns"] if t["role"] in ("user", "assistant")]
+    render_chat_history(display_turns)
+    # Turn counter
+    if num_turns >= MAX_TURNS:
+        st.info(f"Maximum turns ({MAX_TURNS}) reached. Please proceed.")
+    else:
+        st.caption(f"Turns: {num_turns} / minimum {MIN_TURNS}")
+    # Input
+    if num_turns < MAX_TURNS:
+        user_msg = st.text_area("Your response:", placeholder="Type your response here…", height=100, key=f"chat_input_{idx}_{num_turns}")
+        col1, col2 = st.columns([3, 1])
+        with col2:
+            send_clicked = st.button("Send", type="primary", use_container_width=True)
+        if send_clicked:
+            if not user_msg or not user_msg.strip():
+                st.error("⚠️ Please type a message.")
+                return
+            if len(user_msg.strip().split()) < 5 and not DEBUG_MODE:
+                st.error(f"⚠️ Please write at least 5 words ({len(user_msg.strip().split())} so far).")
+                return
+            user_msg = user_msg.strip()
+            messages = [{"role": "system", "content": conv["system_prompt"]},
+                        {"role": "user", "content": conv["opening_user_message"]}]
+            for turn in conv["turns"]:
+                messages.append({"role": turn["role"], "content": turn["content"]})
+            messages.append({"role": "user", "content": user_msg})
+            with st.spinner("AI is responding…"):
+                ai_reply = call_model(messages)
+            conv["turns"].append({"turn_index": len(conv["turns"]), "role": "user",
+                                   "content": user_msg, "timestamp": time.time()})
+            conv["turns"].append({"turn_index": len(conv["turns"]), "role": "assistant",
+                                   "content": ai_reply, "timestamp": time.time(), "model": MODEL_NAME})
+            conv["num_turns"] = num_turns + 1
+            s["products"][idx]["conversation"] = conv
+            st.rerun()
+    # Done button
+    can_finish = num_turns >= MIN_TURNS or num_turns >= MAX_TURNS or DEBUG_MODE
+    if can_finish:
+        if st.button("I'm done chatting →", use_container_width=True):
+            s["screen"] = "post_will"
+            st.rerun()
+    else:
+        st.button("I'm done chatting →", disabled=True, use_container_width=True,
+                  help=f"Complete at least {MIN_TURNS} exchanges first.")
+def screen_post_willingness(s):
+    idx = s["current_product_index"]
+    product = s["products"][idx]
+    render_progress(idx + 1)
+    st.markdown("## Your View Now")
+    st.markdown("Now that you've chatted with the AI, rate your willingness to buy again.")
+    st.markdown(render_product_card_html(product), unsafe_allow_html=True)
+    post_will_val = st.radio(
+        "How willing would you be to buy this product now?",
+        WILLINGNESS_CHOICES,
+        index=None,
+        key=f"post_will_{idx}",
+    )
+    if st.button("Next →", type="primary", use_container_width=True):
+        if not post_will_val and not DEBUG_MODE:
+            st.error("⚠️ Please rate your willingness to buy.")
+            return
+        post_will_val = post_will_val or WILLINGNESS_CHOICES[3]
+        post_val = parse_willingness(post_will_val)
+        pre_val = s["products"][idx].get("pre_willingness", 4)
+        delta = post_val - pre_val
+        s["products"][idx]["post_willingness"] = post_val
+        s["products"][idx]["post_willingness_label"] = WILLINGNESS_LABELS[post_val]
+        s["products"][idx]["willingness_delta"] = delta
+        s["screen"] = "reflection"
+        st.rerun()
+def screen_reflection(s):
+    idx = s["current_product_index"]
+    render_progress(idx + 1)
+    st.markdown("## Reflection")
+    standout = st.text_area(
+        "What did the AI say that stood out to you most?",
+        placeholder="Describe a specific argument, question, or moment from the conversation…",
+        height=120,
+        key=f"standout_{idx}",
+    )
+    thinking_change = st.text_area(
+        "How did your thinking about this product change (or not change) during the chat? Why?",
+        placeholder="Be as specific as you can…",
+        height=120,
+        key=f"thinking_{idx}",
+    )
+    next_label = "Next Product →" if idx + 1 < PRODUCTS_PER_USER else "Submit Study →"
+    if st.button(next_label, type="primary", use_container_width=True):
+        if not DEBUG_MODE:
+            if not standout or not standout.strip():
+                st.error("⚠️ Please answer the first reflection question.")
+                return
+            if len(standout.strip().split()) < 10:
+                st.error(f"⚠️ Please write at least 10 words for the first question ({len(standout.strip().split())} so far).")
+                return
+            if not thinking_change or not thinking_change.strip():
+                st.error("⚠️ Please answer the second reflection question.")
+                return
+            if len(thinking_change.strip().split()) < 10:
+                st.error(f"⚠️ Please write at least 10 words for the second question ({len(thinking_change.strip().split())} so far).")
+                return
+        standout = (standout or "").strip() or "[debug placeholder]"
+        thinking_change = (thinking_change or "").strip() or "[debug placeholder]"
+        s["products"][idx]["reflection"] = {
+            "standout_moment": standout,
+            "thinking_change": thinking_change,
+        }
+        next_idx = idx + 1
+        s["current_product_index"] = next_idx
+        if next_idx >= PRODUCTS_PER_USER:
+            end_time = time.time()
+            s["meta"] = {
+                "submission_time": end_time,
+                "duration_seconds": round(end_time - s.get("start_time", end_time), 1),
+                "model": MODEL_NAME,
+                "category": CATEGORY,
+            }
+            with st.spinner("Saving your responses…"):
+                save_and_upload(s)
+            s["screen"] = "done"
+        else:
+            s["screen"] = "product_intro"
+        st.rerun()
+def screen_done(s):
+    st.markdown("## ✅ Study Complete!")
+    st.markdown("**Thank you for completing the study!**")
+    st.markdown(f"Here's a summary of how your willingness changed across the {PRODUCTS_PER_USER} products:")
+    rows = []
+    for i, p in enumerate(s["products"]):
+        pre = p.get("pre_willingness", "?")
+        post = p.get("post_willingness", "?")
+        delta = p.get("willingness_delta", 0)
+        arrow = "➡️" if delta == 0 else ("⬆️" if delta > 0 else "⬇️")
+        rows.append({
+            "#": i + 1,
+            "Product": p.get("title", "")[:60] + ("…" if len(p.get("title", "")) > 60 else ""),
+            "Before": WILLINGNESS_LABELS.get(pre, str(pre)),
+            "After": WILLINGNESS_LABELS.get(post, str(post)),
+            "Change": f"{arrow} {delta:+d}" if isinstance(delta, int) else "–",
+        })
+    import pandas as pd
+    st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
+    # MTurk submit button
+    assignment_id = s.get("assignment_id", "")
+    turk_submit_to = s.get("turk_submit_to", "")
+    if assignment_id and turk_submit_to:
+        submit_url = f"{turk_submit_to}/mturk/externalSubmit"
+        submission_id = s.get("submission_id", "")
+        st.markdown(f"""
+        <form id="mturk-submit-form" method="POST" action="{submit_url}">
+            <input type="hidden" name="assignmentId" value="{assignment_id}" />
+            <input type="hidden" name="submission_id" value="{submission_id}" />
+            <button type="submit" style="
+                background:#2563eb; color:white; border:none; padding:12px 28px;
+                font-size:1rem; border-radius:6px; cursor:pointer; margin-top:12px;">
+                ✅ Submit to MTurk
+            </button>
+        </form>
+        """, unsafe_allow_html=True)
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main():
+    st.set_page_config(page_title="Product Study", page_icon="🛒", layout="centered")
+    inject_css()
+    if "study_state" not in st.session_state:
+        st.session_state.study_state = init_state()
+    s = st.session_state.study_state
+    screen = s.get("screen", "welcome")
+    if screen == "welcome":
+        screen_welcome(s)
+    elif screen == "demographics":
+        screen_demographics(s)
+    elif screen == "product_intro":
+        screen_product_intro(s)
+    elif screen == "chat":
+        screen_chat(s)
+    elif screen == "post_will":
+        screen_post_willingness(s)
+    elif screen == "reflection":
+        screen_reflection(s)
+    elif screen == "done":
+        screen_done(s)
+if __name__ == "__main__":
+    main()