"""User Modeling Agent — the demo.
DSN × BCT LLM Agent Challenge · Task A.
Takes a user persona and product details as input, and generates a star
rating and a written review as that user would write it — then critiques
and revises its own draft (self-reflection). Optionally renders the review
in Nigerian English.
Two ways to use it:
1. Compose a persona — type a persona + product (the brief's input contract)
2. Dataset reader — pick a real user, compare against ground truth
Run:
streamlit run app.py
"""
from __future__ import annotations
import html
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
import pandas as pd
import streamlit as st
from core.config import settings
from core.persona import PersonaEngine, UserPersona
from task_a_user_modeling.agent import ImpersonationAgent, ItemInput
st.set_page_config(page_title="User Modeling Agent", page_icon="✶",
layout="wide", initial_sidebar_state="expanded")
esc = html.escape
# ══════════════════════════════════════════════════════════════════════════════
# Design system
# ══════════════════════════════════════════════════════════════════════════════
CSS = """
"""
st.markdown(CSS, unsafe_allow_html=True)
# ══════════════════════════════════════════════════════════════════════════════
# HTML builders
# ══════════════════════════════════════════════════════════════════════════════
def stars(r: float) -> str:
f = int(round(r))
return "★" * f + "☆" * (5 - f)
def persona_card(p: UserPersona) -> str:
themes = "".join(f'{esc(t)}'
for t in p.preferred_themes) or '—'
comps = "".join(f'{esc(t)}'
for t in p.common_complaints) or '—'
nrev = (f'{p.n_reviews}' if p.n_reviews else 'composed')
return f"""
The Person · persona
“{esc(p.voice_one_liner or 'No voice captured.')}”
{p.avg_rating:.1f}★
avg rating
drawn to{themes}
put off by{comps}
"""
def reflection_stepper(iters: int, refined: bool, notes: list[str] | None) -> str:
steps = [''
'
First draft
'
'
generated in-voice
']
if refined:
steps += [''
'
Self-critique
'
'
found issues
',
''
'
Revised draft
'
'
rewritten with feedback
',
''
'
Re-checked
'
'
critique cleared
']
else:
steps += [''
'
Self-critique
'
'
passed first pass
',
''
'
Accepted
'
'
no revision needed
']
note = ""
if notes:
real = [n for n in notes if n and n.strip().lower() != "passed"]
if real:
note = f'The critic flagged: {esc(real[0])}
'
return f"""
Self-reflection · {iters} critique cycle(s)
{''.join(steps)}
{note}
"""
# ══════════════════════════════════════════════════════════════════════════════
# Cached resources
# ══════════════════════════════════════════════════════════════════════════════
@st.cache_data(show_spinner=False)
def load_data():
rev = pd.read_parquet(settings.processed_dir / "reviews.parquet")
items = pd.read_parquet(settings.processed_dir / "items.parquet")
return rev, items
@st.cache_resource(show_spinner=False)
def get_engines():
return PersonaEngine(), ImpersonationAgent()
def composed_persona(desc: str, themes: list[str], dislikes: list[str],
tone: str, avg_rating: float) -> UserPersona:
"""Build a UserPersona from typed input — the brief's persona-as-input contract."""
# rating distribution skewed around the stated average
lo, hi = int(avg_rating), min(5, int(avg_rating) + 1)
dist = {lo: 0.55, hi: 0.35} if lo != hi else {lo: 0.9}
dist.setdefault(3, 0.1)
return UserPersona(
user_id="composed", n_reviews=0, avg_rating=avg_rating,
std_rating=0.6, avg_review_length=90.0, std_review_length=30.0,
verified_rate=1.0, domains=[], n_domains=0,
rating_distribution=dist, top_terms=[],
tone=tone, preferred_themes=themes, common_complaints=dislikes,
voice_one_liner=desc, history_samples=[],
)
def persona_from_reviews(rows: list[dict]) -> UserPersona:
"""Build a UserPersona from pasted past reviews.
Each row: {rating, title, domain, text, date(optional)}. Assembles them
into the column shape PersonaEngine.from_dataframe expects, then lets the
engine do the real modelling. This is the agent building a persona itself
from raw user history.
"""
import pandas as _pd
records = []
for i, r in enumerate(rows):
ts = r.get("date")
# PersonaEngine sorts history by timestamp; fall back to entry order
try:
ts_val = _pd.Timestamp(ts) if ts else _pd.Timestamp("2020-01-01") + _pd.Timedelta(days=i)
except Exception:
ts_val = _pd.Timestamp("2020-01-01") + _pd.Timedelta(days=i)
records.append({
"user_id": "pasted",
"parent_asin": f"pasted_{i}",
"rating": float(r["rating"]),
"text": r["text"],
"verified_purchase": True,
"domain": r["domain"],
"timestamp": ts_val,
})
df = _pd.DataFrame(records)
engine = PersonaEngine()
persona = engine.from_dataframe("pasted", df)
return engine.enrich(persona)
st.markdown("""
""", unsafe_allow_html=True)
try:
reviews, items = load_data()
except Exception as e:
st.error(f"Could not load data — ensure data/processed/*.parquet exist.\n\n{e}")
st.stop()
train = reviews[reviews["split"] == "train"]
test = reviews[reviews["split"] == "test"]
persona_engine, agent = get_engines()
with st.sidebar:
st.markdown("## ✶ Controls")
st.markdown(
''
'
🇳🇬 Naija Mode
'
'
'
'NIGERIAN-ENGLISH LOCALIZATION
',
unsafe_allow_html=True)
naija = st.toggle("Render output in Nigerian English", value=False)
if naija:
st.markdown(
''
'● NAIJA MODE ACTIVE
', unsafe_allow_html=True)
else:
st.markdown(
''
'○ OFF · STANDARD ENGLISH
',
unsafe_allow_html=True)
st.divider()
st.markdown("### How it works")
st.caption("The agent builds a persona, drafts a review in that voice, then "
"runs a self-reflection loop — a critic LLM checks rating-text "
"consistency, voice match and on-topic fit, and the agent revises "
"if the critic objects.")
st.divider()
_prov = {"openai": "OpenAI", "gemini": "Gemini"}.get(
settings.llm_provider.lower(), settings.llm_provider.capitalize())
st.caption(f"LLM · {_prov}")
st.session_state.setdefault("result", None)
st.session_state.setdefault("ctx", None)
if naija:
st.markdown(
''
'🇳🇬'
'Naija Mode is active'
'output localized to Nigerian English'
'
', unsafe_allow_html=True)
# ══════════════════════════════════════════════════════════════════════════════
# Tabs — Compose (primary) · Dataset reader (secondary)
# ══════════════════════════════════════════════════════════════════════════════
tab_compose, tab_dataset, tab_history = st.tabs([
"✎ Compose a Persona",
"⊞ Dataset Reader",
"❏ Build From Past Reviews"])
# ── COMPOSE ───────────────────────────────────────────────────────────────────
with tab_compose:
st.markdown('Input · Persona and Product
',
unsafe_allow_html=True)
with st.expander("The Person", expanded=True):
p_desc = st.text_area(
"Describe the Person's Reviewing Voice",
value="Someone who loves character-driven stories and "
"rich world-building, but is impatient with slow pacing.",
height=90, key="p_desc")
p_themes = st.text_input("Drawn To (Comma-Separated)",
value="character development, immersive worlds, "
"original plots", key="p_themes")
p_dislikes = st.text_input("Put Off By (Comma-Separated)",
value="slow pacing, thin characters", key="p_dis")
c1, c2 = st.columns(2)
with c1:
p_tone = st.selectbox("Tone", ["enthusiastic", "analytical", "casual",
"critical", "earnest", "terse"], key="p_tone")
with c2:
p_rating = st.slider("Typical Rating", 1.0, 5.0, 4.0, 0.5, key="p_rate")
with st.expander("The Product", expanded=True):
i_title = st.text_input("Title", value="The Midnight Library", key="i_title")
i_domain = st.selectbox("Domain", ["Books", "Movies_and_TV", "Kindle_Store",
"Other"], key="i_domain")
i_desc = st.text_area(
"Description / Synopsis",
value="A novel about a library between life and death, where each "
"book lets a woman try a different version of her life.",
height=110, key="i_desc")
go = st.button("Generate review ✶", key="go_compose", use_container_width=True)
if go:
try:
with st.status("The agent is working…", expanded=True) as status:
themes = [t.strip() for t in p_themes.split(",") if t.strip()]
dislikes = [t.strip() for t in p_dislikes.split(",") if t.strip()]
st.write("Assembling the persona…")
persona = composed_persona(p_desc, themes, dislikes, p_tone, p_rating)
item = ItemInput(parent_asin="composed", title=i_title,
description=i_desc, categories="",
domain=i_domain)
st.write("Drafting in the person's voice, then self-critiquing…")
result = agent.run(persona, item, naija_mode=naija)
st.write("Self-reflection complete")
status.update(label="Review generated", state="complete")
st.session_state.result = result
st.session_state.ctx = {"persona": persona, "item": item, "truth": None}
except Exception as e:
st.session_state.result = None
st.markdown(f''
f'
Generation interrupted
'
f'The model call did not complete — it may be rate-limited. '
f'Try again shortly.
'
f'{esc(type(e).__name__)} ', unsafe_allow_html=True)
# ── DATASET READER ────────────────────────────────────────────────────────────
with tab_dataset:
st.markdown('Input · A Real Person From the Data
',
unsafe_allow_html=True)
elig = train.groupby("user_id").size().reset_index(name="n")
elig = elig[(elig["n"] >= 5) & (elig["user_id"].isin(set(test["user_id"])))]
users = elig.sample(min(40, len(elig)), random_state=7)["user_id"].tolist()
with st.expander("The Person", expanded=True):
st.caption("Pick a real person. The agent builds their persona from "
"actual history and is scored against a held-out review.")
user = st.selectbox("Person", users, key="sel_user")
go_ds = st.button("Generate review ✶", key="go_ds", use_container_width=True)
if go_ds and user:
try:
with st.status("The agent is working…", expanded=True) as status:
ut = test[test["user_id"] == user]
if ut.empty:
status.update(label="No held-out item for this person",
state="error")
st.stop()
tr = ut.iloc[0]
tid = tr["parent_asin"]
meta = items[items["parent_asin"] == tid]
if meta.empty:
item = ItemInput(parent_asin=tid, title=str(tr.get("title", "")),
description="", categories="", domain=tr["domain"])
else:
m = meta.iloc[0]
item = ItemInput(parent_asin=tid, title=str(m.get("title", "")),
description=str(m.get("description", ""))[:1500],
categories=str(m.get("categories", "")),
domain=tr["domain"],
average_rating=(float(m["average_rating"])
if pd.notna(m.get("average_rating"))
else None))
st.write("Reading the person's history…")
persona = persona_engine.from_dataframe(user, train)
persona = persona_engine.enrich(persona)
st.write(f"Persona built from {persona.n_reviews} reviews")
st.write("Drafting in their voice, then self-critiquing…")
result = agent.run(persona, item, naija_mode=naija)
st.write("Self-reflection complete")
status.update(label="Review generated", state="complete")
st.session_state.result = result
st.session_state.ctx = {"persona": persona, "item": item,
"truth": {"rating": float(tr["rating"]),
"text": str(tr["text"])}}
except Exception as e:
st.session_state.result = None
st.markdown(f''
f'
Generation interrupted
'
f'The model call did not complete — it may be rate-limited. '
f'Try again shortly.
'
f'{esc(type(e).__name__)} ', unsafe_allow_html=True)
# ── BUILD FROM PAST REVIEWS ────────────────────────────────────────────────────
with tab_history:
st.markdown('Input · Raw Past Reviews
',
unsafe_allow_html=True)
st.markdown("Paste a person's past reviews — the agent builds their persona "
"from this history, then writes a review of a new product. "
"Three to four reviews give the strongest persona.")
DOMAINS = ["Books", "Movies_and_TV", "Kindle_Store", "Other"]
hist_rows = []
for i in range(5):
with st.expander(f"Past Review {i + 1}", expanded=(i == 0)):
hc1, hc2, hc3 = st.columns([1, 2, 1])
with hc1:
h_rating = st.selectbox("Rating", [1.0, 2.0, 3.0, 4.0, 5.0],
index=3, key=f"h_rate_{i}")
with hc2:
h_title = st.text_input("Product Title", key=f"h_title_{i}",
placeholder="e.g. The Silent Patient")
with hc3:
h_domain = st.selectbox("Domain", DOMAINS, key=f"h_dom_{i}")
h_text = st.text_area("Review Text", key=f"h_text_{i}", height=80,
placeholder="Paste what this person wrote\u2026")
h_date = st.text_input("Date (Optional, e.g. 2024-03)", key=f"h_date_{i}",
placeholder="optional")
if h_text.strip():
hist_rows.append({"rating": h_rating, "title": h_title.strip(),
"domain": h_domain, "text": h_text.strip(),
"date": h_date.strip() or None})
st.markdown(''
'The New Product to Review
', unsafe_allow_html=True)
th1, th2 = st.columns([2, 1])
with th1:
ht_title = st.text_input("Title", value="The Midnight Library",
key="ht_title")
with th2:
ht_domain = st.selectbox("Domain ", DOMAINS, key="ht_domain")
ht_desc = st.text_area("Description / Synopsis", height=90, key="ht_desc",
value="A novel about a library between life and death, "
"where each book lets a woman try a different "
"version of her life.")
go_hist = st.button("Build persona & generate review ✶", key="go_hist",
use_container_width=True)
if go_hist:
if not hist_rows:
st.warning("Add at least one past review with text so the agent "
"has history to model.")
else:
try:
with st.status("The agent is working…", expanded=True) as status:
st.write(f"Reading {len(hist_rows)} pasted review(s)…")
persona = persona_from_reviews(hist_rows)
st.write(f"Persona built by the agent from "
f"{persona.n_reviews} reviews")
item = ItemInput(parent_asin="pasted_target", title=ht_title,
description=ht_desc, categories="",
domain=ht_domain)
st.write("Drafting in the inferred voice, then self-critiquing…")
result = agent.run(persona, item, naija_mode=naija)
st.write("Self-reflection complete")
status.update(label="Review generated", state="complete")
st.session_state.result = result
st.session_state.ctx = {"persona": persona, "item": item,
"truth": None}
except Exception as e:
st.session_state.result = None
st.markdown(f''
f'
Generation interrupted
'
f'The model call did not complete \u2014 it may be '
f'rate-limited. Try again shortly.
'
f'
'
f'{esc(type(e).__name__)} ',
unsafe_allow_html=True)
# ══════════════════════════════════════════════════════════════════════════════
# Result — shown below both tabs
# ══════════════════════════════════════════════════════════════════════════════
res = st.session_state.result
ctx = st.session_state.ctx
st.markdown("---")
if res and ctx:
st.markdown(persona_card(ctx["persona"]), unsafe_allow_html=True)
it = ctx["item"]
st.markdown(f"""
The Item
{esc(it.domain)}
{esc(it.title)}
""", unsafe_allow_html=True)
badge = 'NAIJA VOICE' if res.naija_mode else ""
st.markdown(f"""
The Generated Review · written as the person
{res.rating:.1f}
{stars(res.rating)}{badge}
{esc(res.review)}
""", unsafe_allow_html=True)
st.markdown(reflection_stepper(res.reflection_iterations,
res.reflection_refined,
res.reflection_notes), unsafe_allow_html=True)
st.markdown('Why This Rating
', unsafe_allow_html=True)
truth = ctx.get("truth")
if truth:
col1, col2 = st.columns(2)
with col1:
st.markdown(f"""
The agent rated it {res.rating:.1f}★
{esc(res.reasoning)}
""", unsafe_allow_html=True)
with col2:
d = abs(res.rating - truth["rating"])
dc = "good" if d <= 0.5 else ("mid" if d <= 1.0 else "far")
t = truth["text"].replace("
", "\n").replace("
", "\n")
t = t[:520] + ("…" if len(t) > 520 else "")
st.markdown(f"""
The person actually wrote
Δ {d:.1f}★
{stars(truth['rating'])}
{truth['rating']:.1f}★
{esc(t)}
""", unsafe_allow_html=True)
else:
st.markdown(f"""
The agent rated it {res.rating:.1f}★
{esc(res.reasoning)}
""", unsafe_allow_html=True)
st.caption(f"Grounded on {res.used_history_count} similar past reviews")
else:
st.markdown('Compose a persona and a product, or pick a '
'dataset person — then press Generate. The agent writes '
'the review in that person\'s voice and shows its reasoning.
',
unsafe_allow_html=True)
st.markdown("""
""", unsafe_allow_html=True)