Spaces:

ashish-sarvam
/

data-gen

Runtime error

App Files Files Community

data-gen / conv_data_gen /generators /user_structured /User.py

ashish-sarvam

Upload folder using huggingface_hub

fc1a684 verified 4 months ago

raw

history blame contribute delete

6.29 kB

	import csv
	import random
	from pathlib import Path
	from typing import Dict, List, Optional, Sequence
	from pydantic import BaseModel


	# ---------- Models ----------


	class BigFive(BaseModel):
	O: float
	C: float
	E: float
	A: float
	N: float


	class DerivedStyles(BaseModel):
	communication_style: str
	decision_making_style: str
	stress_response: str
	interaction_preference: str
	expertise_attitude: str


	class User(BaseModel):
	first_name: str
	gender: str
	big_five: BigFive
	styles: DerivedStyles
	labels: List[str]


	# ---------- Derivations ----------


	class _Derive:
	@staticmethod
	def _derive_communication_style(bf: BigFive) -> str:
	e, a, n = bf.E, bf.A, bf.N
	if e > 0.7 and a > 0.6:
	return "friendly_talkative"
	if e < 0.3 and n > 0.6:
	return "reserved_cautious"
	if a < 0.4 and n < 0.4:
	return "direct_assertive"
	if e > 0.6 and n > 0.6:
	return "expressive_emotional"
	return "balanced_moderate"

	@staticmethod
	def _derive_decision_making_style(bf: BigFive) -> str:
	c, o, n = bf.C, bf.O, bf.N
	if c > 0.7 and n < 0.4:
	return "methodical_thorough"
	if o > 0.7 and c < 0.4:
	return "intuitive_creative"
	if n > 0.7:
	return "anxious_overthinking"
	if c > 0.6 and o > 0.6:
	return "analytical_open"
	return "pragmatic_balanced"

	@staticmethod
	def _derive_stress_response(bf: BigFive) -> str:
	n, e, a = bf.N, bf.E, bf.A
	if n > 0.7 and e < 0.4:
	return "withdraws_worries"
	if n > 0.6 and a < 0.4:
	return "becomes_irritable"
	if n > 0.6 and e > 0.6:
	return "seeks_support"
	if n < 0.3:
	return "stays_calm"
	return "moderate_coping"

	@staticmethod
	def _derive_interaction_preference(bf: BigFive) -> str:
	e, o = bf.E, bf.O
	if e > 0.7 and o > 0.6:
	return "collaborative_brainstorming"
	if e < 0.3 and o < 0.4:
	return "structured_individual"
	if e > 0.6:
	return "social_interactive"
	return "task_focused"

	@staticmethod
	def _derive_expertise_attitude(bf: BigFive) -> str:
	o, a, n = bf.O, bf.A, bf.N
	if o > 0.7 and a < 0.4:
	return "questions_challenges"
	if a > 0.7 and n > 0.5:
	return "defers_seeks_guidance"
	if o > 0.6 and n < 0.4:
	return "collaborative_peer"
	return "respectful_practical"

	@classmethod
	def derive_styles(cls, bf: BigFive) -> DerivedStyles:
	return DerivedStyles(
	communication_style=cls._derive_communication_style(bf),
	decision_making_style=cls._derive_decision_making_style(bf),
	stress_response=cls._derive_stress_response(bf),
	interaction_preference=cls._derive_interaction_preference(bf),
	expertise_attitude=cls._derive_expertise_attitude(bf),
	)

	@staticmethod
	def _derive_labels(bf: BigFive, st: DerivedStyles) -> List[str]:
	tags = []
	if bf.O > 0.7:
	tags.append("curious")
	if bf.C > 0.7:
	tags.append("disciplined")
	if bf.E > 0.7:
	tags.append("outgoing")
	if bf.A > 0.7:
	tags.append("cooperative")
	if bf.N > 0.7:
	tags.append("sensitive")
	tags += [
	st.communication_style,
	st.decision_making_style,
	st.stress_response,
	]
	return list(dict.fromkeys(tags))


	# ---------- UserEngine ----------


	class UserEngine:
	def __init__(
	self,
	roster_csv: Optional[str] = "user_base_data.csv",
	seed: Optional[int] = None,
	):
	self.rng = random.Random(seed)
	self.rows = self._load_csv(
	Path(roster_csv) if roster_csv else Path("user_base_data.csv")
	)

	@staticmethod
	def _load_csv(csv_path: Path) -> List[Dict[str, str]]:
	rows = []
	with csv_path.open("r", encoding="utf-8") as f:
	reader = csv.DictReader(f)
	for row in reader:
	name = (row.get("first_name") or row.get("name") or "").strip()
	gender = (row.get("gender") or "unspecified").strip()
	if name:
	rows.append({"first_name": name, "gender": gender})
	return rows

	def _concatenate_user_attributes(self, user: User) -> str:
	return (
	f"First name of the user is {user.first_name}. "
	f"Their gender is {user.gender}. "
	f"Their communication style is generally {user.styles.communication_style}. " # noqa
	f"In decision making, they are {user.styles.decision_making_style}. " # noqa
	f"Under stress, they tend to {user.styles.stress_response}. " # noqa
	f"They prefer {user.styles.interaction_preference} when interacting. " # noqa
	f"Their attitude toward expertise is {user.styles.expertise_attitude}. " # noqa
	f"Some descriptive labels for them are: {', '.join(user.labels)}." # noqa
	)

	def generate_users(
	self,
	personalities: Sequence[Dict[str, float]],
	*,
	extra_labels: Optional[Sequence[str]] = None,
	) -> List[str]:
	users = []
	for p in personalities:
	bf = BigFive(**p)
	row = self.rng.choice(self.rows)
	styles = _Derive.derive_styles(bf)
	labels = _Derive._derive_labels(bf, styles)
	gender_bool = row["gender"]
	gender = "male" if int(gender_bool) == 0 else "female"
	if extra_labels:
	labels = list(dict.fromkeys(labels + list(extra_labels)))
	user_str = self._concatenate_user_attributes(
	User(
	first_name=row["first_name"],
	gender=gender,
	big_five=bf,
	styles=styles,
	labels=labels,
	)
	)
	users.append(user_str)

	return users

	def get_random_user(self) -> Dict[str, str]:
	return self.rng.choice(self.rows)