Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from collections import defaultdict | |
| import csv | |
| import json | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Any, Tuple | |
| from conv_data_gen.logger import setup_logger | |
| logger = setup_logger(__name__) | |
| def append_kv(parts: List[str], key: str, value: str, desc: str = "") -> None: | |
| if desc: | |
| parts.append(f"- {key}: {value} — {desc}") | |
| else: | |
| parts.append(f"- {key}: {value}") | |
| def compose_user_message_for_proxy( | |
| user_goal: str, | |
| user_personality: str, | |
| meta_data: Any, | |
| user_knobs_dict: Any, | |
| available_tools: Optional[List[Dict[str, Any]]] = None, | |
| available_knowledge_bases: Optional[List[Dict[str, str]]] = None, | |
| agent_variables: Optional[Dict[str, Any]] = None, | |
| ) -> str: | |
| parts: List[str] = [] | |
| # DEFINING USER # | |
| parts.append("## ABOUT THE USER ##") | |
| parts.append(f"USER ROLE: {meta_data.get('user_type', '')}") | |
| parts.append(f"USER DESCRIPTION: {user_personality}") | |
| parts.append(f"USER GOAL: {user_goal}") | |
| # DEFINING TASK# | |
| parts.append("## ABOUT THE TASK ##") | |
| parts.append(f"COMPANY: {meta_data.get('company', '')}") | |
| parts.append(f"USE CASE: {meta_data.get('use_case', '')}") | |
| parts.append( | |
| f"TYPE OF PERSON YOU WILL BE TALKING TO: {meta_data.get('agent_type', '')}" # noqa | |
| ) | |
| parts.append( | |
| f"DIRECTION OF THE CONVERSATION: {meta_data.get('conversation_direction', '')}" # noqa | |
| ) | |
| # DEFINING USER KNOBS # | |
| parts.append("\nKNOBS:") | |
| knobs = user_knobs_dict.get("knobs", {}) | |
| knob_descriptions = user_knobs_dict.get("knob_descriptions", {}) | |
| for k, v in knobs.items(): | |
| append_kv(parts, k, v, knob_descriptions.get(k, "")) | |
| parts.append("\nLANGUAGE:") | |
| lang = user_knobs_dict.get("language_style", {}) | |
| ldesc = user_knobs_dict.get("language_descriptions", {}) | |
| append_kv( | |
| parts, | |
| "language", | |
| lang.get("language", ""), | |
| ldesc.get("language_desc", ""), | |
| ) | |
| append_kv( | |
| parts, | |
| "formality", | |
| lang.get("formality", ""), | |
| ldesc.get("formality_desc", ""), | |
| ) | |
| append_kv( | |
| parts, | |
| "code_switch_ratio", | |
| lang.get("code_switch_ratio", ""), | |
| ldesc.get("code_switch_desc", ""), | |
| ) | |
| parts.append(" - regionalisms: " + lang.get("regionalisms", "")) | |
| parts.append("\nDEMOGRAPHICS:") | |
| demographics = user_knobs_dict.get("demographics", {}) | |
| demographic_descriptions = user_knobs_dict.get( | |
| "demographic_descriptions", {} | |
| ) | |
| for k, v in demographics.items(): | |
| append_kv(parts, k, v, demographic_descriptions.get(k, "")) | |
| # Provide interaction complexity tier information | |
| parts.append("\nINTERACTION (complexity tier constraints):") | |
| interaction = user_knobs_dict.get("interaction", {}) | |
| if interaction.get("tier_name"): | |
| parts.append(f"- tier_name: {interaction.get('tier_name', '')}") | |
| parts.append( | |
| f"- turn_range: [" | |
| f"{interaction.get('turn_min', '')}, " | |
| f"{interaction.get('turn_max', '')}]" | |
| ) | |
| parts.append( | |
| f"- tool_calls_budget: [" | |
| f"{interaction.get('tool_calls_min', '')}, " | |
| f"{interaction.get('tool_calls_max', '')}]" | |
| ) | |
| parts.append( | |
| f"- kb_queries_budget: [" | |
| f"{interaction.get('kb_queries_min', '')}, " | |
| f"{interaction.get('kb_queries_max', '')}]" | |
| ) | |
| # DEFINING AGENT DETAILS # | |
| parts.append(f"YOUR VARIABLES AVAILABLE TO THE AGENT: {agent_variables}") | |
| parts.append( | |
| f"PROMPT OF THE AGENT YOU WILL BE TALKING TO: {meta_data.get('bot_prompt', '')}" # noqa | |
| ) | |
| if available_tools: | |
| parts.append("\nAVAILABLE_TOOLS:") | |
| for tool in available_tools: | |
| tool_name = tool.get("name", "unknown_tool") | |
| tool_desc = tool.get("description", "No description available") | |
| parts.append(f"- {tool_name}: {tool_desc}") | |
| # Add available knowledge bases information | |
| if available_knowledge_bases: | |
| parts.append("\nAVAILABLE_KNOWLEDGE_BASES:") | |
| for kb in available_knowledge_bases: | |
| kb_name = kb.get("name", "unknown_kb") | |
| kb_desc = kb.get("description", "No description available") | |
| parts.append(f"- {kb_name}: {kb_desc}") | |
| msg = "\n".join(parts) | |
| return msg | |
| def save_persona_text(base_dir: Path, text: str, index: int) -> str: | |
| out_path = base_dir / f"persona_{index}.txt" | |
| out_path.write_text(text, encoding="utf-8") | |
| logger.info("Saved persona to %s", out_path) | |
| return str(out_path) | |
| def append_persona_csv(csv_path: Path, rows: List[Dict[str, str]]) -> str: | |
| write_header = not csv_path.exists() | |
| fieldnames = sorted({k for row in rows for k in row.keys()}) | |
| with open(csv_path, "a", encoding="utf-8", newline="") as f: | |
| writer = csv.DictWriter(f, fieldnames=fieldnames) | |
| if write_header: | |
| writer.writeheader() | |
| for row in rows: | |
| writer.writerow(row) | |
| logger.info("Appended personas CSV at %s", csv_path) | |
| return str(csv_path) | |
| def read_proxy_rows(path: str) -> List[Dict[str, Any]]: | |
| p = Path(path) | |
| items: List[Dict[str, Any]] = [] | |
| with open(p, "r", encoding="utf-8") as f: | |
| for line in f: | |
| try: | |
| obj = json.loads(line) | |
| if isinstance(obj, dict): | |
| items.append({str(k): v for k, v in obj.items()}) | |
| except Exception: | |
| continue | |
| return items | |
| def group_by_key( | |
| rows: List[Dict[str, Any]], | |
| ) -> Dict[Tuple[str, str, str], List[Dict[str, Any]]]: | |
| groups: Dict[Tuple[str, str, str], List[Dict[str, Any]]] = defaultdict( | |
| list | |
| ) | |
| for r in rows: | |
| company = str(r.get("company", "")) | |
| agent_type = str(r.get("agent_type", "")) | |
| use_case = str(r.get("use_case", "")) | |
| groups[(company, agent_type, use_case)].append(r) | |
| return groups | |
| def select_rows_for_personas( | |
| groups: Dict[Tuple[str, str, str], List[Dict[str, Any]]], | |
| per_group: int, | |
| ) -> List[Dict[str, Any]]: | |
| per = max(1, int(per_group)) | |
| selected: List[Dict[str, Any]] = [] | |
| for _, items in groups.items(): | |
| selected.extend(items[:per]) | |
| return selected | |
| def extract_flat_fields( | |
| spec: Dict[str, Any], | |
| ) -> Tuple[str, str, Dict[str, Any]]: | |
| user_desc = str(spec.get("user_description", "")) | |
| goal_in_conv = str(spec.get("goal_in_conversation", "")) | |
| bft = spec.get("big_five_traits", {}) | |
| big_five_traits: Dict[str, Any] = bft if isinstance(bft, dict) else {} | |
| return user_desc, goal_in_conv, big_five_traits | |
| def extract_user_goal(spec: Dict[str, Any]) -> str: | |
| return str(spec.get("goal_in_conversation", "")) | |
| def extract_user_personality_description(spec: Dict[str, Any]) -> str: | |
| return str(spec.get("user_description", "")) | |
| def extract_user_knobs(spec: Dict[str, Any]) -> str: | |
| return str(spec.get("knobs", "")) | |
| def extract_meta_data(spec: Dict[str, Any]) -> str: | |
| meta_data = { | |
| "company": spec.get("company", ""), | |
| "use_case": spec.get("use_case", ""), | |
| "conversation_direction": spec.get("conversation_direction", ""), | |
| "agent_type": spec.get("agent_type", ""), | |
| "user_type": spec.get("user_type", ""), | |
| "bot_prompt": spec.get("bot_prompt", ""), | |
| } | |
| return meta_data | |
| def user_knobs_to_dict(user_knobs: Any) -> Dict[str, Any]: | |
| sample_dict = { | |
| "knobs": user_knobs.knobs, | |
| "knob_descriptions": user_knobs.knob_descriptions, | |
| "language_style": user_knobs.language_style, | |
| "language_descriptions": user_knobs.language_descriptions, | |
| "demographics": user_knobs.demographics, | |
| "demographic_descriptions": user_knobs.demographic_descriptions, | |
| "interaction": user_knobs.interaction, | |
| } | |
| return sample_dict | |