ashish-sarvam's picture
Upload folder using huggingface_hub
fc1a684 verified
import csv
import random
from pathlib import Path
from typing import Dict, List, Optional, Sequence
from pydantic import BaseModel
# ---------- Models ----------
class BigFive(BaseModel):
O: float
C: float
E: float
A: float
N: float
class DerivedStyles(BaseModel):
communication_style: str
decision_making_style: str
stress_response: str
interaction_preference: str
expertise_attitude: str
class User(BaseModel):
first_name: str
gender: str
big_five: BigFive
styles: DerivedStyles
labels: List[str]
# ---------- Derivations ----------
class _Derive:
@staticmethod
def _derive_communication_style(bf: BigFive) -> str:
e, a, n = bf.E, bf.A, bf.N
if e > 0.7 and a > 0.6:
return "friendly_talkative"
if e < 0.3 and n > 0.6:
return "reserved_cautious"
if a < 0.4 and n < 0.4:
return "direct_assertive"
if e > 0.6 and n > 0.6:
return "expressive_emotional"
return "balanced_moderate"
@staticmethod
def _derive_decision_making_style(bf: BigFive) -> str:
c, o, n = bf.C, bf.O, bf.N
if c > 0.7 and n < 0.4:
return "methodical_thorough"
if o > 0.7 and c < 0.4:
return "intuitive_creative"
if n > 0.7:
return "anxious_overthinking"
if c > 0.6 and o > 0.6:
return "analytical_open"
return "pragmatic_balanced"
@staticmethod
def _derive_stress_response(bf: BigFive) -> str:
n, e, a = bf.N, bf.E, bf.A
if n > 0.7 and e < 0.4:
return "withdraws_worries"
if n > 0.6 and a < 0.4:
return "becomes_irritable"
if n > 0.6 and e > 0.6:
return "seeks_support"
if n < 0.3:
return "stays_calm"
return "moderate_coping"
@staticmethod
def _derive_interaction_preference(bf: BigFive) -> str:
e, o = bf.E, bf.O
if e > 0.7 and o > 0.6:
return "collaborative_brainstorming"
if e < 0.3 and o < 0.4:
return "structured_individual"
if e > 0.6:
return "social_interactive"
return "task_focused"
@staticmethod
def _derive_expertise_attitude(bf: BigFive) -> str:
o, a, n = bf.O, bf.A, bf.N
if o > 0.7 and a < 0.4:
return "questions_challenges"
if a > 0.7 and n > 0.5:
return "defers_seeks_guidance"
if o > 0.6 and n < 0.4:
return "collaborative_peer"
return "respectful_practical"
@classmethod
def derive_styles(cls, bf: BigFive) -> DerivedStyles:
return DerivedStyles(
communication_style=cls._derive_communication_style(bf),
decision_making_style=cls._derive_decision_making_style(bf),
stress_response=cls._derive_stress_response(bf),
interaction_preference=cls._derive_interaction_preference(bf),
expertise_attitude=cls._derive_expertise_attitude(bf),
)
@staticmethod
def _derive_labels(bf: BigFive, st: DerivedStyles) -> List[str]:
tags = []
if bf.O > 0.7:
tags.append("curious")
if bf.C > 0.7:
tags.append("disciplined")
if bf.E > 0.7:
tags.append("outgoing")
if bf.A > 0.7:
tags.append("cooperative")
if bf.N > 0.7:
tags.append("sensitive")
tags += [
st.communication_style,
st.decision_making_style,
st.stress_response,
]
return list(dict.fromkeys(tags))
# ---------- UserEngine ----------
class UserEngine:
def __init__(
self,
roster_csv: Optional[str] = "user_base_data.csv",
seed: Optional[int] = None,
):
self.rng = random.Random(seed)
self.rows = self._load_csv(
Path(roster_csv) if roster_csv else Path("user_base_data.csv")
)
@staticmethod
def _load_csv(csv_path: Path) -> List[Dict[str, str]]:
rows = []
with csv_path.open("r", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
name = (row.get("first_name") or row.get("name") or "").strip()
gender = (row.get("gender") or "unspecified").strip()
if name:
rows.append({"first_name": name, "gender": gender})
return rows
def _concatenate_user_attributes(self, user: User) -> str:
return (
f"First name of the user is {user.first_name}. "
f"Their gender is {user.gender}. "
f"Their communication style is generally {user.styles.communication_style}. " # noqa
f"In decision making, they are {user.styles.decision_making_style}. " # noqa
f"Under stress, they tend to {user.styles.stress_response}. " # noqa
f"They prefer {user.styles.interaction_preference} when interacting. " # noqa
f"Their attitude toward expertise is {user.styles.expertise_attitude}. " # noqa
f"Some descriptive labels for them are: {', '.join(user.labels)}." # noqa
)
def generate_users(
self,
personalities: Sequence[Dict[str, float]],
*,
extra_labels: Optional[Sequence[str]] = None,
) -> List[str]:
users = []
for p in personalities:
bf = BigFive(**p)
row = self.rng.choice(self.rows)
styles = _Derive.derive_styles(bf)
labels = _Derive._derive_labels(bf, styles)
gender_bool = row["gender"]
gender = "male" if int(gender_bool) == 0 else "female"
if extra_labels:
labels = list(dict.fromkeys(labels + list(extra_labels)))
user_str = self._concatenate_user_attributes(
User(
first_name=row["first_name"],
gender=gender,
big_five=bf,
styles=styles,
labels=labels,
)
)
users.append(user_str)
return users
def get_random_user(self) -> Dict[str, str]:
return self.rng.choice(self.rows)