Spaces:

Israelbliz
/

User-Modeling-Agent

Running

App Files Files Community

Israelbliz commited on 9 days ago

Commit

72a7c36

verified ·

1 Parent(s): ad45209

Delete core/persona.py

Browse files

Files changed (1) hide show

core/persona.py +0 -291

core/persona.py DELETED Viewed

@@ -1,291 +0,0 @@
-"""Persona engine — turn a user's review history into a behavioral fingerprint.
-The persona is the spine of the whole system. Both tasks ask it different
-questions:
-    Task A: "Given this persona and this item, how would the user rate and review it?"
-    Task B: "Given this persona, what items would the user want next?"
-A persona has two layers:
-    1. Quantitative signals (computed deterministically from history)
-       - rating cadence: mean, std, distribution shape
-       - review length: mean, std
-       - vocabulary fingerprint: top distinctive terms
-       - domain mix: which categories the user engages with
-       - verified-purchase rate, helpful-vote signal
-    2. Qualitative summary (LLM-generated, cached)
-       - tone descriptor (snarky / earnest / analytical / casual / ...)
-       - common preferences (themes, styles)
-       - common complaints (deal-breakers)
-       - recommended audience for THIS user (one-liner persona pitch)
-The qualitative layer is what makes generated reviews feel like the actual
-user wrote them. Without it, you get generic LLM prose. With it, you get
-behavioral fidelity — which is one of Task A's three scored axes.
-"""
-from __future__ import annotations
-import logging
-from collections import Counter
-from dataclasses import dataclass, field, asdict
-from typing import Any
-import pandas as pd
-from pydantic import BaseModel, Field
-from core.llm import LLMClient
-log = logging.getLogger(__name__)
-# ──────────────────────────────────────────────────────────────────────────────
-# Schemas
-# ──────────────────────────────────────────────────────────────────────────────
-class QualitativeSummary(BaseModel):
-    """LLM-generated qualitative layer of a persona."""
-    tone: str = Field(description="One-word tone descriptor: snarky, earnest, analytical, casual, enthusiastic, terse, verbose, etc.")
-    preferred_themes: list[str] = Field(description="3-5 themes/styles/qualities this user gravitates toward")
-    common_complaints: list[str] = Field(description="2-4 recurring deal-breakers or critique patterns")
-    voice_one_liner: str = Field(description="A single sentence describing this user's reviewing voice as if pitching them to a casting director")
-@dataclass
-class UserPersona:
-    """Complete persona — quantitative signals + qualitative summary + history."""
-    user_id: str
-    # Quantitative
-    n_reviews: int
-    avg_rating: float
-    std_rating: float
-    avg_review_length: float
-    std_review_length: float
-    verified_rate: float
-    domains: list[str]
-    n_domains: int
-    rating_distribution: dict[int, float]   # {1: 0.05, 2: 0.1, ..., 5: 0.4}
-    top_terms: list[str]                    # vocabulary fingerprint
-    # Qualitative (lazily filled by PersonaEngine.enrich)
-    tone: str = ""
-    preferred_themes: list[str] = field(default_factory=list)
-    common_complaints: list[str] = field(default_factory=list)
-    voice_one_liner: str = ""
-    # Sample history for retrieval/grounding (subset of training reviews)
-    history_samples: list[dict[str, Any]] = field(default_factory=list)
-    def to_prompt_block(self) -> str:
-        """Render the persona as a structured prompt section.
-        This text is what the LLM sees when generating reviews / recommendations.
-        Keeping it formatted consistently is what makes generation behaviorally
-        faithful.
-        """
-        dist = " ".join(f"{r}★:{p:.0%}" for r, p in sorted(self.rating_distribution.items()))
-        return (
-            f"USER PERSONA\n"
-            f"  Reviews written: {self.n_reviews}\n"
-            f"  Avg rating: {self.avg_rating:.2f} (±{self.std_rating:.2f})\n"
-            f"  Rating distribution: {dist}\n"
-            f"  Avg review length: {self.avg_review_length:.0f} words (±{self.std_review_length:.0f})\n"
-            f"  Verified-purchase rate: {self.verified_rate:.0%}\n"
-            f"  Active domains: {', '.join(self.domains)}\n"
-            f"  Vocabulary fingerprint: {', '.join(self.top_terms[:15])}\n"
-            f"  Tone: {self.tone or 'unspecified'}\n"
-            f"  Preferred themes: {', '.join(self.preferred_themes) or 'unspecified'}\n"
-            f"  Common complaints: {', '.join(self.common_complaints) or 'unspecified'}\n"
-            f"  Voice: {self.voice_one_liner or 'unspecified'}\n"
-        )
-    def as_dict(self) -> dict:
-        return asdict(self)
-# ──────────────────────────────────────────────────────────────────────────────
-# Engine
-# ──────────────────────────────────────────────────────────────────────────────
-# A small set of generic English stopwords + Amazon-review noise. Keeping
-# this in-module avoids pulling in nltk's download flow.
-_STOPWORDS = set("""
-a an the and or but if then else when while of in on at by to for with from
-into onto over under is are was were be been being have has had do does did
-i you he she it we they me him her us them my your his its our their this
-that these those there here what which who whom whose how why so as too very
-just also more most some any all each every other another such no not nor only
-own same can will would could should might may must one two three really get
-got gets just like dont didnt isnt arent wasnt werent havent hadnt hasnt cant
-couldnt wouldnt shouldnt wont thats whats theres heres ive ill ive youve im
-""".split())
-class PersonaEngine:
-    """Build personas from review history.
-    Two entry points:
-        from_dataframe(user_id, training_reviews_df) -> UserPersona
-        enrich(persona) -> UserPersona   # adds qualitative summary via LLM
-    """
-    def __init__(self, llm: LLMClient | None = None,
-                 top_terms_k: int = 20,
-                 history_samples_k: int = 8):
-        self.llm = llm or LLMClient()
-        self.top_terms_k = top_terms_k
-        self.history_samples_k = history_samples_k
-    # ─────────────────────────── Quantitative ────────────────────────────
-    def from_dataframe(self, user_id: str,
-                       reviews: pd.DataFrame) -> UserPersona:
-        """Build a UserPersona from a DataFrame of one user's training reviews.
-        Expected columns: user_id, parent_asin, rating, text, verified_purchase,
-                          domain, timestamp.
-        """
-        user_reviews = reviews[reviews["user_id"] == user_id]
-        if user_reviews.empty:
-            raise ValueError(f"No reviews found for user_id={user_id!r}")
-        ratings = user_reviews["rating"].astype(float)
-        lengths = user_reviews["text"].fillna("").str.split().str.len()
-        # Rating distribution as proportions
-        dist = ratings.round().astype(int).value_counts(normalize=True).to_dict()
-        rating_dist = {int(k): float(v) for k, v in dist.items()}
-        # Vocabulary fingerprint: most common non-stopword tokens
-        top_terms = self._top_terms(user_reviews["text"].tolist())
-        # Sample history items for retrieval grounding — keep the most recent
-        history = user_reviews.sort_values("timestamp", ascending=False) \
-                              .head(self.history_samples_k)
-        history_samples = [
-            {
-                "parent_asin": row["parent_asin"],
-                "rating": float(row["rating"]),
-                "text": row["text"][:500],
-                "domain": row["domain"],
-            }
-            for _, row in history.iterrows()
-        ]
-        return UserPersona(
-            user_id=user_id,
-            n_reviews=len(user_reviews),
-            avg_rating=float(ratings.mean()),
-            std_rating=float(ratings.std()) if len(ratings) > 1 else 0.0,
-            avg_review_length=float(lengths.mean()),
-            std_review_length=float(lengths.std()) if len(lengths) > 1 else 0.0,
-            verified_rate=float(user_reviews["verified_purchase"].mean()),
-            domains=sorted(user_reviews["domain"].unique().tolist()),
-            n_domains=int(user_reviews["domain"].nunique()),
-            rating_distribution=rating_dist,
-            top_terms=top_terms,
-            history_samples=history_samples,
-        )
-    def _top_terms(self, texts: list[str]) -> list[str]:
-        """Most frequent content tokens, stopwords removed."""
-        counter: Counter = Counter()
-        for txt in texts:
-            if not isinstance(txt, str):
-                continue
-            tokens = [t.lower().strip(".,!?\"'()[]{}:;") for t in txt.split()]
-            tokens = [t for t in tokens
-                      if t and len(t) > 2 and t not in _STOPWORDS and t.isalpha()]
-            counter.update(tokens)
-        return [w for w, _ in counter.most_common(self.top_terms_k)]
-    # ─────────────────────────── Qualitative ─────────────────────────────
-    def enrich(self, persona: UserPersona) -> UserPersona:
-        """Add LLM-generated qualitative summary to an existing persona.
-        Uses the reasoning model (gpt-4o) — more reliable structured output
-        than the bulk model. If the LLM call still fails, falls back to a
-        deterministic summary derived from the writing samples so we never
-        end up with an empty Voice/Tone.
-        """
-        if not persona.history_samples:
-            log.warning(f"User {persona.user_id} has no history samples; skipping enrichment")
-            return self._apply_deterministic_fallback(persona)
-        sample_block = "\n\n".join(
-            f"[{i+1}] Rating: {s['rating']}★  Domain: {s['domain']}\n{s['text'][:400]}"
-            for i, s in enumerate(persona.history_samples)
-        )
-        prompt = (
-            f"Below are review samples from a single user. Read them carefully "
-            f"and infer their reviewing voice.\n\n"
-            f"{sample_block}\n\n"
-            f"Quantitative signals about this user:\n"
-            f"- Average rating: {persona.avg_rating:.2f} of 5\n"
-            f"- Average review length: {persona.avg_review_length:.0f} words\n"
-            f"- Vocabulary they use often: {', '.join(persona.top_terms[:15])}\n\n"
-            f"Produce a qualitative summary of their reviewer voice. "
-            f"Be concise and concrete. If the samples are too sparse or generic, "
-            f"infer the most plausible voice rather than refusing."
-        )
-        try:
-            summary = self.llm.structured(
-                prompt, QualitativeSummary, model="reasoning",
-                system="You are a behavioral analyst specializing in online review patterns. Always produce valid output.",
-            )
-            persona.tone = summary.tone or persona.tone
-            persona.preferred_themes = summary.preferred_themes or persona.preferred_themes
-            persona.common_complaints = summary.common_complaints or persona.common_complaints
-            persona.voice_one_liner = summary.voice_one_liner or persona.voice_one_liner
-        except Exception as e:
-            log.warning(f"LLM enrichment failed for {persona.user_id} ({type(e).__name__}); using deterministic fallback")
-            persona = self._apply_deterministic_fallback(persona)
-        return persona
-    @staticmethod
-    def _apply_deterministic_fallback(persona: UserPersona) -> UserPersona:
-        """Fill in tone/themes/voice from quantitative signals when LLM fails.
-        This isn't as rich as an LLM summary, but it guarantees downstream
-        query construction has SOMETHING to work with — much better than
-        an empty string.
-        """
-        # Tone bucket from avg rating
-        if persona.avg_rating >= 4.5:
-            tone = "enthusiastic"
-        elif persona.avg_rating >= 3.8:
-            tone = "earnest"
-        elif persona.avg_rating >= 3.0:
-            tone = "measured"
-        else:
-            tone = "critical"
-        # Use top distinctive terms as proxy themes (filter out true generics)
-        generic_terms = {"book", "read", "story", "movie", "film", "great", "good",
-                         "really", "much", "first", "next", "through", "about"}
-        candidate_themes = [t for t in persona.top_terms if t not in generic_terms][:5]
-        themes = candidate_themes or persona.top_terms[:3]
-        # Domain-grounded voice
-        domain_str = "/".join(persona.domains) if persona.domains else "general"
-        length_descriptor = (
-            "writes brief reviews" if persona.avg_review_length < 30
-            else "writes detailed reviews" if persona.avg_review_length > 150
-            else "writes moderate-length reviews"
-        )
-        voice = (
-            f"A {tone} {domain_str} reviewer who {length_descriptor} "
-            f"(avg {persona.avg_rating:.1f}★ over {persona.n_reviews} reviews)."
-        )
-        if not persona.tone:
-            persona.tone = tone
-        if not persona.preferred_themes:
-            persona.preferred_themes = themes
-        if not persona.voice_one_liner:
-            persona.voice_one_liner = voice
-        return persona