igiuseppe's picture
now using 4.1mini with temperature 1
70f896f
import requests
import math
import os
import json
import time
import logging
from compare import main as run_comparison
# --- Configuration ---
BASE_URL = "http://0.0.0.0:8000"
#BASE_URL="https://alter-ego-app-alter-ego-api-test.hf.space"
PARAMETERS_API_URL = f"{BASE_URL}/generate-parameters"
USERS_API_URL = f"{BASE_URL}/generate-users"
ANSWERS_API_URL = f"{BASE_URL}/generate-users-answers"
REPORT_API_URL = f"{BASE_URL}/generate-report"
# IMPORTANT: Set this environment variable or replace the value directly
API_KEY = os.environ.get("MY_API_KEY")
# Simulation Parameters
AUDIENCE = """
This audience consists of experienced professionals working in product development, UX/UI design, market research, and user research across startups, agencies, and established digital platforms. Common roles include product managers, design directors, researchers, and UX designers. They are united by a shared commitment to user-centered design and evidence-based decision-making. Most of them are based in Italy, some of them in the rest of Europe.
They typically operate in fast-paced, resource-constrained environments and balance practical execution with strategic vision. Most have hands-on experience with qualitative research methods, especially user interviews, and frequently use tools like Miro, Notion, Excel, SQL, or Microsoft Teams to collect, cluster, and communicate insights. Their projects span fintech, healthcare, SaaS, home renovation, education, and wealth management.
Key goals include improving product quality through deeper user understanding, formalizing internal research processes, validating ideas quickly, and gaining stakeholder buy-in for user research. While many are exploring AI and synthetic personas, they are cautious about bias and the limitations of non-human insight. They value authenticity, human empathy, and contextual nuance in their work.
They face common pain points such as difficulty in recruiting the right participants, budget/time constraints, skepticism from stakeholders, and limitations in current research tools. Nonetheless, they remain motivated by a desire to create meaningful, intuitive products and by the belief that better research leads to better outcomes.
They are analytical, collaborative, and open-minded—interested in innovation but pragmatic about its application. This audience blends creative problem-solving with structured thinking and seeks tools and methods that enhance insight without compromising rigor or user connection.
"""
SCOPE = """
This research explores how professionals in product development, UX/UI design, and user research conduct and reflect on user research activities, particularly qualitative interviews. It aims to understand their workflows, tools, challenges, and perspectives on emerging technologies, with the goal of generating realistic synthetic personas. The scope covers participants' roles and experience, typical research processes from planning to analysis, and the tools used for interviews, data organization, and insight synthesis. It investigates common challenges such as participant recruitment, budget constraints, and tool limitations, as well as how insights are validated and shared. A key focus is on attitudes toward AI-powered tools like synthetic users and automated interviews, exploring both perceived benefits and concerns around bias, empathy, and credibility. The research also gathers best practices, advice for improving research effectiveness, insights into how budgets are allocated, and participants' visions for the future of user research. Overall, it aims to capture the mindsets, behaviors, and motivations of user-centered professionals to inform the development of synthetic personas that accurately reflect real-world needs and practices.
"""
QUESTIONS = [
"Can you describe your role and your experience with conducting user interviews and market research?",
"Could you walk us through your typical process for conducting user research, from planning to analysis?",
"What are the most significant challenges you typically encounter during the user research process?",
"What tools or platforms do you primarily use to support your user research activities (e.g., for interviews, analysis, data organization)?",
"What are your thoughts on using AI-powered tools, such as synthetic users or automated interviews, in the user research process, and what potential benefits or drawbacks do you foresee?",
"What key advice would you offer to teams or individuals looking to improve the effectiveness of their user research practices?",
"How do you see user research evolving in the future, particularly with advancements in technology?",
"Generally, how is the budget for user research activities determined or allocated in your experience?",
"How do you typically approach the validation of insights gathered from user research to ensure their reliability and accuracy?",
"Is there anything else you'd like to share about your experiences with user research or any related topics we haven't covered?"
]
TOTAL_FLEET_SIZE = 10 # Total number of users to generate
BATCH_SIZE = 10 # Number of users to generate per batch
DELAY_BETWEEN_BATCHES_SECONDS = 1
# --- Logging Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def get_parameters(api_url: str, api_key: str, audience: str, scope: str) -> list[str] | None:
"""Calls the /generate-parameters endpoint to get user parameters."""
logger.info("--- Fetching Parameters --- ")
headers = {
"Content-Type": "application/json",
"x-api-key": api_key,
}
request_body = {
"audience": audience,
"scope": scope
}
try:
start_time = time.time()
logger.info(f"Calling Parameters API: {api_url}")
response = requests.post(
api_url,
headers=headers,
json=request_body,
timeout=60
)
response.raise_for_status()
duration = time.time() - start_time
logger.info(f"Parameters API call successful. Status: {response.status_code}. Duration: {duration:.2f}s")
data = response.json()
parameters = data.get("parameters")
if isinstance(parameters, list):
logger.info(f"Received {len(parameters)} parameters.")
return parameters
else:
logger.error(f"'parameters' field not found or not a list in response: {data}")
return None
except Exception as e:
logger.error(f"Error during parameter fetching: {e}")
return None
def generate_users_batch(
api_url: str,
api_key: str,
audience: str,
batch_size: int,
previous_users: list[dict] = None
) -> list[dict] | None:
"""Generates a batch of users using the /generate-users endpoint."""
headers = {
"Content-Type": "application/json",
"x-api-key": api_key,
}
request_body = {
"audience": audience,
"n": batch_size
}
if previous_users:
request_body["previous_users"] = previous_users
try:
start_time = time.time()
logger.info(f"Generating batch of {batch_size} users")
response = requests.post(
api_url,
headers=headers,
json=request_body,
timeout=600
)
response.raise_for_status()
duration = time.time() - start_time
logger.info(f"Users API call successful. Duration: {duration:.2f}s")
data = response.json()
users = data.get("users", [])
if users:
logger.info(f"Generated {len(users)} users successfully")
return users
else:
logger.error("No users generated in response")
return None
except Exception as e:
logger.error(f"Error generating users: {e}")
return None
def add_answers_to_users(
api_url: str,
api_key: str,
users: list[dict],
questions: list[str]
) -> list[dict] | None:
"""Adds answers to the generated users using the /generate-users-answers endpoint."""
headers = {
"Content-Type": "application/json",
"x-api-key": api_key,
}
request_body = {
"users": users,
"questions": questions
}
try:
start_time = time.time()
logger.info(f"Adding answers to {len(users)} users")
response = requests.post(
api_url,
headers=headers,
json=request_body,
timeout=180
)
response.raise_for_status()
duration = time.time() - start_time
logger.info(f"Answers API call successful. Duration: {duration:.2f}s")
data = response.json()
users_with_answers = data.get("users", [])
if users_with_answers:
logger.info(f"Added answers to {len(users_with_answers)} users successfully")
return users_with_answers
else:
logger.error("No users with answers in response")
return None
except Exception as e:
logger.error(f"Error adding answers: {e}")
return None
def generate_report(
api_url: str,
api_key: str,
audience: str,
scope: str,
questions: list[str],
users: list[dict]
) -> str | None:
"""Generates a report using the /generate-report endpoint."""
headers = {
"Content-Type": "application/json",
"x-api-key": api_key,
}
request_body = {
"audience": audience,
"scope": scope,
"questions": questions,
"users": users
}
try:
start_time = time.time()
logger.info("Generating report")
response = requests.post(
api_url,
headers=headers,
json=request_body,
timeout=300
)
response.raise_for_status()
duration = time.time() - start_time
logger.info(f"Report API call successful. Duration: {duration:.2f}s")
data = response.json()
report = data.get("report")
if report:
logger.info("Report generated successfully")
return report
else:
logger.error("No report in response")
return None
except Exception as e:
logger.error(f"Error generating report: {e}")
return None
def export_answers_to_json(users: list[dict], questions: list[str], filename: str) -> bool:
"""
Export answers to a JSON file with the format:
[
{"question": text of the question, "answers": [complete answer from user 1, complete answer from user 2, ...]},
...
]
Args:
users: List of user personas with their answers
questions: List of questions that were asked
filename: Name of the output JSON file
Returns:
True if the export was successful, False otherwise
"""
try:
# Initialize the result structure
result = []
# For each question, collect all answers from all users
for i, question in enumerate(questions):
question_data = {
"question": question,
"answers": []
}
# Collect the answers to this question from all users
for user in users:
if "answers" in user and len(user["answers"]) > i:
question_data["answers"].append(user["answers"][i])
result.append(question_data)
# Write the data to the JSON file
with open(filename, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
logger.info(f"Answers exported to {filename}")
return True
except Exception as e:
logger.error(f"Error exporting answers to JSON: {e}")
return False
def export_user_profiles_to_json(users: list[dict], filename: str) -> bool:
"""
Export user profiles to a separate JSON file, excluding the answers.
Args:
users: List of user personas with their answers
filename: Name of the output JSON file
Returns:
True if the export was successful, False otherwise
"""
try:
# Create a copy of users without the answers field
user_profiles = users
# Write the data to the JSON file
with open(filename, "w", encoding="utf-8") as f:
json.dump(user_profiles, f, ensure_ascii=False, indent=2)
logger.info(f"User profiles exported to {filename}")
return True
except Exception as e:
logger.error(f"Error exporting user profiles to JSON: {e}")
return False
def main():
"""Main function to run the complete simulation."""
if not API_KEY:
logger.error("API Key not configured. Please set the MY_API_KEY environment variable.")
return
# 1 e 2. Generate users in batches
all_users = []
num_batches = math.ceil(TOTAL_FLEET_SIZE / BATCH_SIZE)
for batch_num in range(num_batches):
current_batch_size = min(BATCH_SIZE, TOTAL_FLEET_SIZE - len(all_users))
if current_batch_size <= 0:
break
logger.info(f"\nProcessing batch {batch_num + 1}/{num_batches}")
batch_users = generate_users_batch(
USERS_API_URL,
API_KEY,
AUDIENCE,
current_batch_size,
all_users if all_users else None
)
if batch_users:
all_users.extend(batch_users)
logger.info(f"Total users generated so far: {len(all_users)}")
if batch_num < num_batches - 1:
time.sleep(DELAY_BETWEEN_BATCHES_SECONDS)
else:
logger.error(f"Failed to generate batch {batch_num + 1}. Stopping.")
break
if not all_users:
logger.error("No users were generated. Exiting.")
return
# 3. Add answers to users
users_with_answers = add_answers_to_users(ANSWERS_API_URL, API_KEY, all_users, QUESTIONS)
if not users_with_answers:
logger.error("Failed to add answers to users. Exiting.")
return
# # 4. Generate report
# report = generate_report(REPORT_API_URL, API_KEY, AUDIENCE, SCOPE, QUESTIONS, users_with_answers)
# if report:
# logger.info("\nReport generated successfully!")
# # Save report to file
# with open("eval/synthetic/report.txt", "w") as f:
# f.write(report)
# logger.info("Report saved to report.txt")
# else:
# logger.error("Failed to generate report.")
# Generate timestamp for filenames
timestamp = time.strftime("%Y%m%d_%H%M%S")
# 5. Export answers to JSON
answers_filename = f"eval/synthetic/answers.json"
export_answers_to_json(users_with_answers, QUESTIONS, answers_filename)
# 6. Export user profiles to JSON
profiles_filename = f"eval/synthetic/user_profiles_{timestamp}.json"
export_user_profiles_to_json(users_with_answers, profiles_filename)
return True
if __name__ == "__main__":
if main():
run_comparison()