from __future__ import annotations from collections import defaultdict import csv import json from pathlib import Path from typing import Dict, List, Optional, Any, Tuple from conv_data_gen.logger import setup_logger logger = setup_logger(__name__) def append_kv(parts: List[str], key: str, value: str, desc: str = "") -> None: if desc: parts.append(f"- {key}: {value} — {desc}") else: parts.append(f"- {key}: {value}") def compose_user_message_for_proxy( user_goal: str, user_personality: str, meta_data: Any, user_knobs_dict: Any, available_tools: Optional[List[Dict[str, Any]]] = None, available_knowledge_bases: Optional[List[Dict[str, str]]] = None, agent_variables: Optional[Dict[str, Any]] = None, ) -> str: parts: List[str] = [] # DEFINING USER # parts.append("## ABOUT THE USER ##") parts.append(f"USER ROLE: {meta_data.get('user_type', '')}") parts.append(f"USER DESCRIPTION: {user_personality}") parts.append(f"USER GOAL: {user_goal}") # DEFINING TASK# parts.append("## ABOUT THE TASK ##") parts.append(f"COMPANY: {meta_data.get('company', '')}") parts.append(f"USE CASE: {meta_data.get('use_case', '')}") parts.append( f"TYPE OF PERSON YOU WILL BE TALKING TO: {meta_data.get('agent_type', '')}" # noqa ) parts.append( f"DIRECTION OF THE CONVERSATION: {meta_data.get('conversation_direction', '')}" # noqa ) # DEFINING USER KNOBS # parts.append("\nKNOBS:") knobs = user_knobs_dict.get("knobs", {}) knob_descriptions = user_knobs_dict.get("knob_descriptions", {}) for k, v in knobs.items(): append_kv(parts, k, v, knob_descriptions.get(k, "")) parts.append("\nLANGUAGE:") lang = user_knobs_dict.get("language_style", {}) ldesc = user_knobs_dict.get("language_descriptions", {}) append_kv( parts, "language", lang.get("language", ""), ldesc.get("language_desc", ""), ) append_kv( parts, "formality", lang.get("formality", ""), ldesc.get("formality_desc", ""), ) append_kv( parts, "code_switch_ratio", lang.get("code_switch_ratio", ""), ldesc.get("code_switch_desc", ""), ) parts.append(" - regionalisms: " + lang.get("regionalisms", "")) parts.append("\nDEMOGRAPHICS:") demographics = user_knobs_dict.get("demographics", {}) demographic_descriptions = user_knobs_dict.get( "demographic_descriptions", {} ) for k, v in demographics.items(): append_kv(parts, k, v, demographic_descriptions.get(k, "")) # Provide interaction complexity tier information parts.append("\nINTERACTION (complexity tier constraints):") interaction = user_knobs_dict.get("interaction", {}) if interaction.get("tier_name"): parts.append(f"- tier_name: {interaction.get('tier_name', '')}") parts.append( f"- turn_range: [" f"{interaction.get('turn_min', '')}, " f"{interaction.get('turn_max', '')}]" ) parts.append( f"- tool_calls_budget: [" f"{interaction.get('tool_calls_min', '')}, " f"{interaction.get('tool_calls_max', '')}]" ) parts.append( f"- kb_queries_budget: [" f"{interaction.get('kb_queries_min', '')}, " f"{interaction.get('kb_queries_max', '')}]" ) # DEFINING AGENT DETAILS # parts.append(f"YOUR VARIABLES AVAILABLE TO THE AGENT: {agent_variables}") parts.append( f"PROMPT OF THE AGENT YOU WILL BE TALKING TO: {meta_data.get('bot_prompt', '')}" # noqa ) if available_tools: parts.append("\nAVAILABLE_TOOLS:") for tool in available_tools: tool_name = tool.get("name", "unknown_tool") tool_desc = tool.get("description", "No description available") parts.append(f"- {tool_name}: {tool_desc}") # Add available knowledge bases information if available_knowledge_bases: parts.append("\nAVAILABLE_KNOWLEDGE_BASES:") for kb in available_knowledge_bases: kb_name = kb.get("name", "unknown_kb") kb_desc = kb.get("description", "No description available") parts.append(f"- {kb_name}: {kb_desc}") msg = "\n".join(parts) return msg def save_persona_text(base_dir: Path, text: str, index: int) -> str: out_path = base_dir / f"persona_{index}.txt" out_path.write_text(text, encoding="utf-8") logger.info("Saved persona to %s", out_path) return str(out_path) def append_persona_csv(csv_path: Path, rows: List[Dict[str, str]]) -> str: write_header = not csv_path.exists() fieldnames = sorted({k for row in rows for k in row.keys()}) with open(csv_path, "a", encoding="utf-8", newline="") as f: writer = csv.DictWriter(f, fieldnames=fieldnames) if write_header: writer.writeheader() for row in rows: writer.writerow(row) logger.info("Appended personas CSV at %s", csv_path) return str(csv_path) def read_proxy_rows(path: str) -> List[Dict[str, Any]]: p = Path(path) items: List[Dict[str, Any]] = [] with open(p, "r", encoding="utf-8") as f: for line in f: try: obj = json.loads(line) if isinstance(obj, dict): items.append({str(k): v for k, v in obj.items()}) except Exception: continue return items def group_by_key( rows: List[Dict[str, Any]], ) -> Dict[Tuple[str, str, str], List[Dict[str, Any]]]: groups: Dict[Tuple[str, str, str], List[Dict[str, Any]]] = defaultdict( list ) for r in rows: company = str(r.get("company", "")) agent_type = str(r.get("agent_type", "")) use_case = str(r.get("use_case", "")) groups[(company, agent_type, use_case)].append(r) return groups def select_rows_for_personas( groups: Dict[Tuple[str, str, str], List[Dict[str, Any]]], per_group: int, ) -> List[Dict[str, Any]]: per = max(1, int(per_group)) selected: List[Dict[str, Any]] = [] for _, items in groups.items(): selected.extend(items[:per]) return selected def extract_flat_fields( spec: Dict[str, Any], ) -> Tuple[str, str, Dict[str, Any]]: user_desc = str(spec.get("user_description", "")) goal_in_conv = str(spec.get("goal_in_conversation", "")) bft = spec.get("big_five_traits", {}) big_five_traits: Dict[str, Any] = bft if isinstance(bft, dict) else {} return user_desc, goal_in_conv, big_five_traits def extract_user_goal(spec: Dict[str, Any]) -> str: return str(spec.get("goal_in_conversation", "")) def extract_user_personality_description(spec: Dict[str, Any]) -> str: return str(spec.get("user_description", "")) def extract_user_knobs(spec: Dict[str, Any]) -> str: return str(spec.get("knobs", "")) def extract_meta_data(spec: Dict[str, Any]) -> str: meta_data = { "company": spec.get("company", ""), "use_case": spec.get("use_case", ""), "conversation_direction": spec.get("conversation_direction", ""), "agent_type": spec.get("agent_type", ""), "user_type": spec.get("user_type", ""), "bot_prompt": spec.get("bot_prompt", ""), } return meta_data def user_knobs_to_dict(user_knobs: Any) -> Dict[str, Any]: sample_dict = { "knobs": user_knobs.knobs, "knob_descriptions": user_knobs.knob_descriptions, "language_style": user_knobs.language_style, "language_descriptions": user_knobs.language_descriptions, "demographics": user_knobs.demographics, "demographic_descriptions": user_knobs.demographic_descriptions, "interaction": user_knobs.interaction, } return sample_dict