import json from typing import List, Dict from pydantic import BaseModel from concurrent.futures import ThreadPoolExecutor from utils import call_llm LLM_MODEL = "gpt-4o-mini" def generate_user_parameters(audience: str, scope: str) -> List[str]: standard_parameters = ["Name", "Age", "Location", "Profession"] prompt = f""" You are an expert customer researcher. Your task is to help define user personas for a specific audience and a specific research scope. This is the desired audience: {audience} This is the research scope: {scope} Start from the following 4 standard demographic parameters: {standard_parameters} Your goal is to suggest 4 additional parameters that are especially relevant for capturing behaviors, attitudes, or characteristics important for this audience and scope. Only suggest parameters that will meaningfully help differentiate users in this specific context. The parameters should be tailored for the audience and scope but also not too specific. """ class Response(BaseModel): additional_parameters: list[str] response = call_llm(prompt=prompt, response_format=Response) additional_parameters = json.loads(response)["additional_parameters"] return standard_parameters + additional_parameters def generate_synthetic_personas(parameters: List[str], num_personas: int, audience: str, batch_size: int = 10) -> Dict: """ Generate synthetic personas in batches, ensuring variability by considering previously generated personas. Args: parameters: List of parameters to include in each persona num_personas: Total number of personas to generate audience: Target audience for the personas batch_size: Number of personas to generate in each batch Returns: Dictionary containing the list of generated personas """ all_personas = [] remaining_personas = num_personas while remaining_personas > 0: current_batch_size = min(batch_size, remaining_personas) # Create context about previously generated personas previous_personas_context = "" if all_personas: previous_personas_context = "\nAs a context, here are the previously generated personas:\n" for i, persona in enumerate(all_personas, 1): previous_personas_context += f"\nPersona {i}:\n" for param, value in persona.items(): previous_personas_context += f"{param}: {value}\n" prompt = f""" You are an expert in user persona creation. Generate {current_batch_size} diversified user personas based on the following parameters: {parameters} Each persona should be unique and realistic, ensure variability. Take into account that the desired audience is the following: {audience}. {previous_personas_context} Important requirements: 1. Each new persona should be significantly different from the previously generated ones 2. Ensure diversity in all parameters, especially in key demographic factors 3. Avoid creating personas that are too similar to existing ones 4. Make sure the personas are realistic and representative of the target audience 5. Ensure the list contains exactly {current_batch_size} personas """ class Parameter(BaseModel): parameter_name: str value: str class User(BaseModel): user_persona: list[Parameter] class Response(BaseModel): users_personas: list[User] response = call_llm(prompt=prompt, response_format=Response) response = json.loads(response) # Transform the response and add to all personas batch_personas = [ {item["parameter_name"]: item["value"] for item in user["user_persona"]} for user in response["users_personas"] ] all_personas.extend(batch_personas) remaining_personas -= current_batch_size return {"users_personas": all_personas} def ask_questions_to_persona(persona: dict, questions: List[str]) -> List[str]: def ask_single_question(question: str) -> str: prompt = f""" Act as if you were this user persona: {persona} You need to impersonate this user persona and answer the following question as if you were that user: {question} Never sart the sentences in the following way: As "name of the person"... Try to sound natural and authentic, as if you were the user persona. Make sure to answer the question in a way that is relevant to the user persona. """ return call_llm(prompt, False) max_workers = min(8, len(questions)) with ThreadPoolExecutor(max_workers=max_workers) as executor: answers = list(executor.map(ask_single_question, questions)) return answers def generate_fleet(n: int, parameters: Dict, questions: List[str], audience: str) -> List[Dict]: users_personas = generate_synthetic_personas(parameters=parameters, num_personas=n, audience=audience)["users_personas"] return generate_fleet_from_users(users_personas=users_personas, questions=questions) def generate_fleet_from_users(users_personas: Dict, questions: List[str]) -> List[Dict]: fleet = [] for persona_dict in users_personas: answers = ask_questions_to_persona(persona_dict, questions) persona_dict["answers"] = answers fleet.append(persona_dict) return fleet def generate_content(fleet,questions=None,scope=None) -> str: content = "" if scope: content += f"Scope of Research:\n{scope}\n\n" if questions: content += "Questions:\n" for i, question in enumerate(questions, 1): content += f"Q{i}: {question}\n" content += "\n" for i, user in enumerate(fleet, 1): content += f"### User {i} ###\n" for key, value in user.items(): if key != "answers": content += f"{key}: {value}\n" content += "\n" for j, answer in enumerate(user.get("answers", []), 1): content += f"Q{j}: {answer}\n\n" content += "\n---\n\n" return content def generate_report(questions,fleet,scope) -> str: content=generate_content(questions=questions,fleet=fleet,scope=scope) prompt = f""" You are an expert in customer and market research. Your task is to write a research report based on the following interviews: {content} You have to follow the following scope of research: {scope} The report should be structured and include the following sections: - Introduction: describe the purpose of the research and the audience. - Methodology: describe how the interviews were conducted. - Demography: summarize the personas that were interviewed. Add also some examples of the users that were interviewed. - Findings: summarize the key insights and themes that emerged from the interviews focusing on the scope of research. If possible and relevant, include quotes from the interviews to support your findings. - Insights: summarize the key insights and themes that emerged from the interviews going beyond the scope of research. Be creative and think outside the box analysing trends and patterns. If possible and relevant, include quotes from the interviews to support your insights. - Conclusion: summarize the main findings and their implications. - Recommendations: provide actionable recommendations based on the findings and insights. - Improvements: suggest improvements for the research process and the interview questions. """ report_text = call_llm(prompt, False) return report_text def chat_with_persona(persona: dict, question: str, conversation_history: List[dict] = None) -> str: """ Chat with a specific persona, taking into account conversation history if provided. Args: persona: The user persona to chat with question: The current question to ask conversation_history: List of previous Q&A pairs, if any Returns: The persona's answer to the question """ history_context = "" if conversation_history: history_context = "\nPrevious conversation:\n" for chat in conversation_history: history_context += f"Q: {chat['question']}\n" history_context += f"A: {chat['answer']}\n" prompt = f""" Act as if you were this user persona: {persona} You need to impersonate this user persona and answer the following question as if you were that user: {question} Never start the sentences in the following way: As "name of the person"... Try to sound natural and authentic, as if you were the user persona. Make sure to answer the question in a way that is relevant to the user persona and consistent with any previous conversation. """ if conversation_history: prompt += f"\nPrevious conversation:\n{history_context}" return call_llm(prompt) def chat_with_report(users: List[dict], question: str, questions: List[str]) -> str: """ Chat with the content of a report, using the provided users' data. Args: users: List of user personas with their answers (fleet) question: The question to ask about the report content questions: List of questions that were asked to the users Returns: The answer based on the report content """ # Generate the content string that would be used in the report content = generate_content(fleet=users, questions=questions) prompt = f""" You are an expert in customer and market research. You have access to the following interview data: {content} You need to answer the following question based on the interview data: {question} Provide a detailed answer that synthesizes the information from the interviews. If relevant, include specific examples or quotes from the interviews to support your answer. Make sure your response is well-structured and professional. If the question is not relevant to the interview data, please say that you cannot answer it based on the provided information. """ return call_llm(prompt) def generate_audience_name(audience: str, scope: str) -> str: """ Generate a concise, descriptive name for the audience based on the research scope. Args: audience: The target audience description scope: The research scope Returns: A concise, descriptive name for the audience """ prompt = f""" You are an expert in creating concise, descriptive names for research audiences. Given the following audience and research scope: Audience: {audience} Scope: {scope} Create a very concise name (max one sentence) that captures the essence of this audience for the given scope. Respond with ONLY the name, nothing else. """ return call_llm(prompt, False).strip()