AUXteam commited on
Commit
f5e9574
·
verified ·
1 Parent(s): da232a0

Upload folder using huggingface_hub

Browse files
Dockerfile CHANGED
@@ -1,26 +1,28 @@
1
- FROM python:3.11-slim
2
 
3
- # Configure a non-root user specifically for HF Spaces
4
- RUN useradd -m -u 1000 user
 
 
 
 
5
 
 
 
6
  USER user
 
7
 
8
- # Set home to the user's home directory
9
- ENV HOME=/home/user \
10
- PATH=/home/user/.local/bin:$PATH
11
-
12
- # Change working directory
13
- WORKDIR $HOME/app
14
 
15
- # Install dependencies using the user
16
  COPY --chown=user requirements.txt .
17
  RUN pip install --no-cache-dir --user -r requirements.txt
18
 
19
- # Copy the app files into the home directory, setting proper ownership
20
- COPY --chown=user . $HOME/app
21
 
22
- # Expose the standard port
23
  EXPOSE 7860
24
 
25
- # Run the FastAPI app using uvicorn
26
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.12-slim
2
 
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ build-essential \
6
+ curl \
7
+ git \
8
+ && rm -rf /var/lib/apt/lists/*
9
 
10
+ # Create a non-root user
11
+ RUN useradd -m -u 1000 user
12
  USER user
13
+ ENV PATH="/home/user/.local/bin:${PATH}"
14
 
15
+ WORKDIR /app
 
 
 
 
 
16
 
17
+ # Copy requirements and install
18
  COPY --chown=user requirements.txt .
19
  RUN pip install --no-cache-dir --user -r requirements.txt
20
 
21
+ # Copy the rest of the application
22
+ COPY --chown=user . .
23
 
24
+ # Expose the HF port
25
  EXPOSE 7860
26
 
27
+ # Run the application
28
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Tiny Factory
3
- emoji: 💻
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: docker
@@ -8,4 +8,22 @@ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Deep Persona Factory
3
+ emoji: 🎭
4
  colorFrom: yellow
5
  colorTo: gray
6
  sdk: docker
 
8
  pinned: false
9
  ---
10
 
11
+ # Deep Persona Factory
12
+
13
+ Deep Persona Factory is a specialized simulation engine for persona generation and social content testing.
14
+
15
+ ## Features
16
+ - **Social Network Engine:** Graph-based modeling and influence propagation.
17
+ - **Prediction Engine:** ML and LLM-based engagement scoring.
18
+ - **Deep Persona Generation:** Sequential enrichment for high-fidelity character profiles.
19
+ - **API Documentation:** Accessible via \`/api-docs\`.
20
+ - **Health Check:** Accessible via \`/health\`.
21
+
22
+ ## API Documentation
23
+ The application exposes a mandatory \`/api-docs\` endpoint providing Swagger UI for all available endpoints.
24
+
25
+ ## Local Setup
26
+ \`\`\`bash
27
+ pip install -r requirements.txt
28
+ uvicorn app:app --host 0.0.0.0 --port 7860
29
+ \`\`\`
app.py CHANGED
@@ -2,199 +2,536 @@ import sys
2
  import os
3
  import gradio as gr
4
  import json
5
- from fastapi import FastAPI
6
- import uvicorn
7
- from pydantic import BaseModel
 
 
 
8
 
9
- app = FastAPI()
 
 
10
 
11
- @app.get("/health")
12
- def health():
13
- return {"status": "ok"}
14
 
15
- from fastapi.responses import RedirectResponse
16
-
17
- @app.get("/api-docs")
18
- def api_docs():
19
- return RedirectResponse(url="/docs")
20
-
21
- class PersonaRequest(BaseModel):
22
- business_description: str
23
- customer_profile: str
24
- num_personas: int = 1
25
-
26
- @app.post("/api/v1/generate_personas")
27
- def generate_personas_api(req: PersonaRequest):
28
- return generate_personas(req.business_description, req.customer_profile, req.num_personas)
29
 
30
- def extract_persona_parameters(business_description: str, customer_profile: str) -> dict:
31
- from tinytroupe.openai_utils import client
32
-
33
- system_prompt = """
34
- You are an expert persona parameter extractor.
35
- Based on the provided business description and customer profile, you must deduce and generate 10 specific parameters needed for a deep persona generator.
36
- The parameters are:
37
- - `age` (float): The age of the persona.
38
- - `gender` (str): The gender of the persona.
39
- - `occupation` (str): The occupation of the persona.
40
- - `city` (str): The city of the persona.
41
- - `country` (str): The country of the persona.
42
- - `custom_values` (str): The personal values of the persona.
43
- - `custom_life_attitude` (str): The life attitude of the persona.
44
- - `life_story` (str): A brief life story of the persona.
45
- - `interests_hobbies` (str): Interests and hobbies of the persona.
46
- - `attribute_count` (float): Attribute richness, default to 350.
47
-
48
- You must return a valid JSON object containing exactly these keys.
49
- """
50
-
51
- user_prompt = f"Business Description: {business_description}\nCustomer Profile: {customer_profile}\n\nReturn the 10 parameters as JSON."
52
-
53
- messages = [
54
- {"role": "system", "content": system_prompt},
55
- {"role": "user", "content": user_prompt}
56
- ]
57
-
58
- api_client = client()
59
- response = api_client.send_message(messages, response_format={"type": "json_object"})
60
-
61
- if response and "content" in response:
62
- try:
63
- # Attempt to parse it if the model returned string json
64
- import json
65
- import tinytroupe.utils as utils
66
- extracted_json = utils.extract_json(response["content"])
67
-
68
- # Ensure all keys are present
69
- required_keys = ['age', 'gender', 'occupation', 'city', 'country', 'custom_values', 'custom_life_attitude', 'life_story', 'interests_hobbies', 'attribute_count']
70
-
71
- # If extracting JSON list vs dict
72
- if isinstance(extracted_json, list) and len(extracted_json) > 0:
73
- extracted_json = extracted_json[0]
74
-
75
- for key in required_keys:
76
- if key not in extracted_json:
77
- # provide defaults for missing ones
78
- if key in ['age', 'attribute_count']:
79
- extracted_json[key] = 350 if key == 'attribute_count' else 30
80
- else:
81
- extracted_json[key] = "Unknown"
82
-
83
- return extracted_json
84
- except Exception as e:
85
- print(f"Error parsing JSON from LLM: {e}")
86
- pass
87
-
88
- # Fallback
89
- return {
90
- "age": 30,
91
- "gender": "Non-binary",
92
- "occupation": "Professional",
93
- "city": "Metropolis",
94
- "country": "Country",
95
- "custom_values": "Innovation, Community",
96
- "custom_life_attitude": "Optimistic",
97
- "life_story": "A standard professional background with a passion for their field.",
98
- "interests_hobbies": "Technology, Reading",
99
- "attribute_count": 350
100
- }
101
 
 
102
  def generate_personas(business_description, customer_profile, num_personas, blablador_api_key=None):
103
  """
104
- Generates a list of personas based on the provided inputs, utilizing a double
105
- sequential generation pipeline:
106
- 1. Extract parameters from context via LLM.
107
- 2. Generate persona using deeppersona-experience via gradio client.
108
  """
 
 
109
  api_key_to_use = blablador_api_key or os.getenv("BLABLADOR_API_KEY")
110
 
111
  if not api_key_to_use:
112
  return {"error": "BLABLADOR_API_KEY not found. Please provide it in your API call or set it as a secret in the Space settings."}
113
 
 
114
  original_key = os.getenv("BLABLADOR_API_KEY")
115
- os.environ["BLABLADOR_API_KEY"] = api_key_to_use
116
 
117
  try:
118
- from gradio_client import Client
119
-
 
 
120
  num_personas = int(num_personas)
121
- personas_data = []
122
 
123
- # Step 1: Extract 10 parameters based on the high-level inputs
124
- # For multiple personas, we could call this in a loop or once.
125
- # The prompt implies we want to do it in a pipeline. We'll do it per persona or once based on the prompt.
126
- # Let's do it per persona to generate distinct ones, passing an index or just relying on LLM variance.
127
-
128
- # Connect to gradio client
129
- # In a real scenario, the Hugging Face Token might be needed if the Space is private.
130
- # But deeppersona-experience is public or assumed accessible.
131
- client = Client("THzva/deeppersona-experience")
132
 
133
- for i in range(num_personas):
134
- # To get variety, we can append a note about variety to the profile
135
- profile_with_variance = customer_profile + f"\n\nMake this persona distinct. Persona {i+1} of {num_personas}."
136
-
137
- # Extract parameters using the LLM
138
- params = extract_persona_parameters(business_description, profile_with_variance)
139
-
140
- # Step 2: Call the Gradio API with the extracted parameters
141
- result = client.predict(
142
- age=float(params.get("age", 30)),
143
- gender=str(params.get("gender", "Non-binary")),
144
- occupation=str(params.get("occupation", "Professional")),
145
- city=str(params.get("city", "Metropolis")),
146
- country=str(params.get("country", "Country")),
147
- custom_values=str(params.get("custom_values", "Innovation, Community")),
148
- custom_life_attitude=str(params.get("custom_life_attitude", "Optimistic")),
149
- life_story=str(params.get("life_story", "A standard professional background with a passion for their field.")),
150
- interests_hobbies=str(params.get("interests_hobbies", "Technology, Reading")),
151
- attribute_count=float(params.get("attribute_count", 350)),
152
- api_name="/generate_persona"
153
- )
154
-
155
- # Note: The result from this API is a string (persona profile text)
156
- personas_data.append({
157
- "parameters_used": params,
158
- "persona_profile": result
159
- })
160
-
161
  return personas_data
162
 
163
  except Exception as e:
164
  return {"error": str(e)}
165
-
166
  finally:
 
 
 
167
  if original_key is None:
 
168
  if "BLABLADOR_API_KEY" in os.environ:
169
  del os.environ["BLABLADOR_API_KEY"]
170
  else:
 
171
  os.environ["BLABLADOR_API_KEY"] = original_key
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  with gr.Blocks() as demo:
174
- gr.Markdown("<h1>Tiny Persona Generator</h1>")
175
  with gr.Row():
176
  with gr.Column():
177
  business_description_input = gr.Textbox(label="What is your business about?", lines=5)
178
  customer_profile_input = gr.Textbox(label="Information about your customer profile", lines=5)
179
- num_personas_input = gr.Number(label="Number of personas to generate", value=1, minimum=1, step=1)
180
 
 
 
181
  blablador_api_key_input = gr.Textbox(
182
  label="Blablador API Key (for API client use)",
183
  visible=False
184
  )
185
 
186
  generate_button = gr.Button("Generate Personas")
 
 
 
 
 
 
187
  with gr.Column():
188
- output_json = gr.JSON(label="Generated Personas")
189
 
190
  generate_button.click(
191
  fn=generate_personas,
 
192
  inputs=[business_description_input, customer_profile_input, num_personas_input, blablador_api_key_input],
193
  outputs=output_json,
194
  api_name="generate_personas"
195
  )
196
 
197
- app = gr.mount_gradio_app(app, demo, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  if __name__ == "__main__":
200
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  import os
3
  import gradio as gr
4
  import json
5
+ import glob
6
+ from deeppersona.factory import DeepPersonaFactory
7
+ from deeppersona.utils.semantics import select_best_persona, select_relevant_personas_utility
8
+ from deeppersona.simulation_manager import SimulationManager, SimulationConfig
9
+ from deeppersona.agent.social_types import Content
10
+ from huggingface_hub import hf_hub_download, upload_file
11
 
12
+ HF_TOKEN = os.getenv("HF_TOKEN") # Ensure this is set in Space secrets
13
+ REPO_ID = "AUXteam/tiny_factory"
14
+ PERSONA_BASE_FILE = "persona_base.json"
15
 
16
+ simulation_manager = SimulationManager()
 
 
17
 
18
+ def load_persona_base():
19
+ if not HF_TOKEN:
20
+ print("HF_TOKEN not found, persistence disabled.")
21
+ return []
22
+ try:
23
+ path = hf_hub_download(repo_id=REPO_ID, filename=PERSONA_BASE_FILE, repo_type="space", token=HF_TOKEN)
24
+ with open(path, 'r', encoding='utf-8') as f:
25
+ return json.load(f)
26
+ except Exception as e:
27
+ print(f"Error loading persona base: {e}")
28
+ return []
 
 
 
29
 
30
+ def save_persona_base(personas):
31
+ if not HF_TOKEN:
32
+ print("HF_TOKEN not found, skipping upload.")
33
+ return
34
+ with open(PERSONA_BASE_FILE, 'w', encoding='utf-8') as f:
35
+ json.dump(personas, f, indent=4)
36
+ try:
37
+ upload_file(
38
+ path_or_fileobj=PERSONA_BASE_FILE,
39
+ path_in_repo=PERSONA_BASE_FILE,
40
+ repo_id=REPO_ID,
41
+ repo_type="space",
42
+ token=HF_TOKEN
43
+ )
44
+ except Exception as e:
45
+ print(f"Error saving persona base to Hub: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # --- CHANGE 1: The function now accepts an optional API key. ---
48
  def generate_personas(business_description, customer_profile, num_personas, blablador_api_key=None):
49
  """
50
+ Generates a list of DeepPersona instances based on the provided inputs.
51
+ It prioritizes the API key passed as an argument, but falls back to the
52
+ environment variable if none is provided (for UI use).
 
53
  """
54
+ # --- CHANGE 2: Logic to determine which key to use. ---
55
+ # Use the key from the API call if provided, otherwise get it from the Space secrets.
56
  api_key_to_use = blablador_api_key or os.getenv("BLABLADOR_API_KEY")
57
 
58
  if not api_key_to_use:
59
  return {"error": "BLABLADOR_API_KEY not found. Please provide it in your API call or set it as a secret in the Space settings."}
60
 
61
+ # Store the original state of the environment variable, if it exists
62
  original_key = os.getenv("BLABLADOR_API_KEY")
 
63
 
64
  try:
65
+ # --- CHANGE 3: Securely set the correct environment variable for this request. ---
66
+ # The underlying deeppersona library will look for this variable.
67
+ os.environ["BLABLADOR_API_KEY"] = api_key_to_use
68
+
69
  num_personas = int(num_personas)
 
70
 
71
+ factory = DeepPersonaFactory(
72
+ context=business_description,
73
+ sampling_space_description=customer_profile,
74
+ total_population_size=num_personas
75
+ )
76
+
77
+ # Restricted to deep persona generation with double sequential API call
78
+ people = factory.generate_people(number_of_people=num_personas, parallelize=False, deep_persona=True)
79
+ personas_data = [person._persona for person in people]
80
 
81
+ # --- NEW: Update the Tresor ---
82
+ current_base = load_persona_base()
83
+ current_base.extend(personas_data)
84
+ save_persona_base(current_base)
85
+ # ------------------------------
86
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  return personas_data
88
 
89
  except Exception as e:
90
  return {"error": str(e)}
91
+
92
  finally:
93
+ # --- CHANGE 4: A robust cleanup using a 'finally' block. ---
94
+ # This ensures the environment is always restored to its original state,
95
+ # whether the function succeeds or fails.
96
  if original_key is None:
97
+ # If the variable didn't exist originally, remove it.
98
  if "BLABLADOR_API_KEY" in os.environ:
99
  del os.environ["BLABLADOR_API_KEY"]
100
  else:
101
+ # If it existed, restore its original value.
102
  os.environ["BLABLADOR_API_KEY"] = original_key
103
 
104
+
105
+ def find_best_persona(criteria):
106
+ """
107
+ Loads the persona base and finds the best matching persona based on criteria.
108
+ """
109
+ personas = load_persona_base()
110
+ if not personas:
111
+ return {"error": "Persona base is empty. Generate some personas first!"}
112
+
113
+ try:
114
+ # select_best_persona uses LLM to find the best index
115
+ idx = select_best_persona(criteria=criteria, personas=personas)
116
+
117
+ try:
118
+ idx = int(idx)
119
+ except (ValueError, TypeError):
120
+ return {"error": f"LLM returned an invalid index: {idx}"}
121
+
122
+ if idx >= 0 and idx < len(personas):
123
+ return personas[idx]
124
+ else:
125
+ return {"error": f"No matching persona found for criteria: {criteria}"}
126
+ except Exception as e:
127
+ return {"error": f"Error during persona matching: {str(e)}"}
128
+
129
+
130
+ def load_example_personas():
131
+ """
132
+ Loads example personas from the deeppersona library.
133
+ """
134
+ example_personas = []
135
+ # Path to the agents folder in deeppersona/examples
136
+ agents_path = os.path.join("deeppersona", "examples", "agents", "*.agent.json")
137
+ for file_path in glob.glob(agents_path):
138
+ try:
139
+ with open(file_path, 'r', encoding='utf-8') as f:
140
+ data = json.load(f)
141
+ if "persona" in data:
142
+ example_personas.append(data["persona"])
143
+ except Exception as e:
144
+ print(f"Error loading example persona from {file_path}: {e}")
145
+ return example_personas
146
+
147
+
148
+ def identify_personas(context):
149
+ """
150
+ Identifies appropriate personas from the Tresor and example agents based on context.
151
+ """
152
+ try:
153
+ # 1. Load Tresor personas (persisted JSON)
154
+ tresor_personas = load_persona_base()
155
+
156
+ # 2. Load Example personas from deeppersona library
157
+ example_personas = load_example_personas()
158
+
159
+ all_available = tresor_personas + example_personas
160
+
161
+ if not all_available:
162
+ return {"error": "No personas available in Tresor or examples."}
163
+
164
+ # 3. Use LLM to filter/select which ones match the 'context'
165
+ # Returns a list of indices
166
+ indices = select_relevant_personas_utility(context, all_available)
167
+
168
+ selected = []
169
+ if isinstance(indices, list):
170
+ for i in indices:
171
+ try:
172
+ idx = int(i)
173
+ if 0 <= idx < len(all_available):
174
+ selected.append(all_available[idx])
175
+ except (ValueError, TypeError):
176
+ continue
177
+
178
+ return selected
179
+ except Exception as e:
180
+ return {"error": str(e)}
181
+
182
+
183
+ def generate_social_network_api(name, persona_count, network_type, focus_group_name=None):
184
+ """
185
+ Gradio API endpoint for generating a social network.
186
+ """
187
+ try:
188
+ config = SimulationConfig(name=name, persona_count=int(persona_count), network_type=network_type)
189
+ simulation = simulation_manager.create_simulation(config, focus_group_name=focus_group_name)
190
+ return {
191
+ "simulation_id": simulation.id,
192
+ "name": simulation.config.name,
193
+ "persona_count": len(simulation.personas),
194
+ "network_metrics": simulation.network.get_metrics()
195
+ }
196
+ except Exception as e:
197
+ return {"error": str(e)}
198
+
199
+
200
+ def predict_engagement_api(simulation_id, content_text, format="text"):
201
+ """
202
+ Gradio API endpoint for predicting engagement.
203
+ """
204
+ try:
205
+ content = Content(text=content_text, format=format)
206
+ result = simulation_manager.run_simulation(simulation_id, content)
207
+ return {
208
+ "total_reach": result.total_reach,
209
+ "expected_likes": result.expected_likes,
210
+ "expected_comments": result.expected_comments,
211
+ "expected_shares": result.expected_shares,
212
+ "execution_time": result.execution_time,
213
+ "avg_sentiment": result.avg_sentiment,
214
+ "feedback_summary": result.feedback_summary
215
+ }
216
+ except Exception as e:
217
+ return {"error": str(e)}
218
+
219
+
220
+ def start_simulation_async_api(simulation_id, content_text, format="text"):
221
+ """
222
+ Starts a simulation in the background.
223
+ """
224
+ try:
225
+ content = Content(text=content_text, format=format)
226
+ simulation_manager.run_simulation(simulation_id, content, background=True)
227
+ return {"status": "started", "simulation_id": simulation_id}
228
+ except Exception as e:
229
+ return {"error": str(e)}
230
+
231
+
232
+ def get_simulation_status_api(simulation_id):
233
+ """
234
+ Checks the status and progress of a simulation.
235
+ """
236
+ try:
237
+ sim = simulation_manager.get_simulation(simulation_id)
238
+ if not sim: return {"error": "Simulation not found"}
239
+
240
+ status_data = {
241
+ "status": sim.status,
242
+ "progress": sim.progress
243
+ }
244
+
245
+ if sim.status == "completed" and sim.last_result:
246
+ status_data["result"] = {
247
+ "total_reach": sim.last_result.total_reach,
248
+ "expected_likes": sim.last_result.expected_likes,
249
+ "avg_sentiment": sim.last_result.avg_sentiment
250
+ }
251
+
252
+ return status_data
253
+ except Exception as e:
254
+ return {"error": str(e)}
255
+
256
+
257
+ def send_chat_message_api(simulation_id, sender, message):
258
+ """
259
+ Sends a message to the simulation chat.
260
+ """
261
+ try:
262
+ return simulation_manager.send_chat_message(simulation_id, sender, message)
263
+ except Exception as e:
264
+ return {"error": str(e)}
265
+
266
+
267
+ def get_chat_history_api(simulation_id):
268
+ """
269
+ Gets the chat history for a simulation.
270
+ """
271
+ try:
272
+ return simulation_manager.get_chat_history(simulation_id)
273
+ except Exception as e:
274
+ return {"error": str(e)}
275
+
276
+
277
+ def generate_variants_api(content_text, num_variants):
278
+ """
279
+ Gradio API endpoint for generating content variants.
280
+ """
281
+ try:
282
+ variants = simulation_manager.variant_generator.generate_variants(content_text, num_variants=int(num_variants))
283
+ return [{"text": v.text, "strategy": v.strategy} for v in variants]
284
+ except Exception as e:
285
+ return {"error": str(e)}
286
+
287
+
288
+ def list_simulations_api():
289
+ """
290
+ Gradio API endpoint for listing simulations.
291
+ """
292
+ try:
293
+ return simulation_manager.list_simulations()
294
+ except Exception as e:
295
+ return {"error": str(e)}
296
+
297
+
298
+ def list_personas_api(simulation_id):
299
+ """
300
+ Gradio API endpoint for listing personas in a simulation.
301
+ """
302
+ try:
303
+ return simulation_manager.list_personas(simulation_id)
304
+ except Exception as e:
305
+ return {"error": str(e)}
306
+
307
+
308
+ def get_persona_api(simulation_id, persona_name):
309
+ """
310
+ Gradio API endpoint for getting persona details.
311
+ """
312
+ try:
313
+ return simulation_manager.get_persona(simulation_id, persona_name)
314
+ except Exception as e:
315
+ return {"error": str(e)}
316
+
317
+
318
+ def delete_simulation_api(simulation_id):
319
+ """
320
+ Gradio API endpoint for deleting a simulation.
321
+ """
322
+ try:
323
+ success = simulation_manager.delete_simulation(simulation_id)
324
+ return {"success": success}
325
+ except Exception as e:
326
+ return {"error": str(e)}
327
+
328
+
329
+ def export_simulation_api(simulation_id):
330
+ """
331
+ Gradio API endpoint for exporting a simulation.
332
+ """
333
+ try:
334
+ return simulation_manager.export_simulation(simulation_id)
335
+ except Exception as e:
336
+ return {"error": str(e)}
337
+
338
+
339
+ def get_network_graph_api(simulation_id):
340
+ """
341
+ Gradio API endpoint for getting network graph data.
342
+ """
343
+ try:
344
+ sim = simulation_manager.get_simulation(simulation_id)
345
+ if not sim: return {"error": "Simulation not found"}
346
+
347
+ nodes = []
348
+ for p in sim.personas:
349
+ nodes.append({
350
+ "id": p.name,
351
+ "label": p.name,
352
+ "role": p._persona.get("occupation"),
353
+ "location": p._persona.get("residence")
354
+ })
355
+
356
+ edges = []
357
+ for edge in sim.network.edges:
358
+ edges.append({
359
+ "source": edge.connection_id.split('_')[0],
360
+ "target": edge.connection_id.split('_')[1],
361
+ "strength": edge.strength
362
+ })
363
+
364
+ return {"nodes": nodes, "edges": edges}
365
+ except Exception as e:
366
+ return {"error": str(e)}
367
+
368
+
369
+ def list_focus_groups_api():
370
+ """
371
+ Gradio API endpoint for listing focus groups.
372
+ """
373
+ try:
374
+ return simulation_manager.list_focus_groups()
375
+ except Exception as e:
376
+ return {"error": str(e)}
377
+
378
+
379
+ def save_focus_group_api(name, simulation_id):
380
+ """
381
+ Gradio API endpoint for saving a focus group from a simulation.
382
+ """
383
+ try:
384
+ sim = simulation_manager.get_simulation(simulation_id)
385
+ if not sim: return {"error": "Simulation not found"}
386
+ simulation_manager.save_focus_group(name, sim.personas)
387
+ return {"status": "success", "name": name}
388
+ except Exception as e:
389
+ return {"error": str(e)}
390
+
391
+
392
  with gr.Blocks() as demo:
393
+ gr.Markdown("<h1>Deep Persona Generator</h1>")
394
  with gr.Row():
395
  with gr.Column():
396
  business_description_input = gr.Textbox(label="What is your business about?", lines=5)
397
  customer_profile_input = gr.Textbox(label="Information about your customer profile", lines=5)
398
+ num_personas_input = gr.Number(label="Number of Deep Personas to generate", value=1, minimum=1, step=1)
399
 
400
+ # --- CHANGE 5: The API key input is now INVISIBLE. ---
401
+ # It still exists, so the API endpoint is created, but it's hidden from UI users.
402
  blablador_api_key_input = gr.Textbox(
403
  label="Blablador API Key (for API client use)",
404
  visible=False
405
  )
406
 
407
  generate_button = gr.Button("Generate Personas")
408
+
409
+ gr.Markdown("---")
410
+ gr.Markdown("<h3>Search Tresor</h3>")
411
+ criteria_input = gr.Textbox(label="Criteria to find best matching persona", lines=2)
412
+ find_button = gr.Button("Find Best Persona in Tresor")
413
+
414
  with gr.Column():
415
+ output_json = gr.JSON(label="Output (Generated or Matched Persona)")
416
 
417
  generate_button.click(
418
  fn=generate_personas,
419
+ # --- CHANGE 6: Pass the invisible textbox to the function. ---
420
  inputs=[business_description_input, customer_profile_input, num_personas_input, blablador_api_key_input],
421
  outputs=output_json,
422
  api_name="generate_personas"
423
  )
424
 
425
+ find_button.click(
426
+ fn=find_best_persona,
427
+ inputs=[criteria_input],
428
+ outputs=output_json,
429
+ api_name="find_best_persona"
430
+ )
431
+
432
+ with gr.Tab("Identify Personas API", visible=False):
433
+ api_id_context = gr.Textbox(label="Context")
434
+ api_id_btn = gr.Button("Identify Personas")
435
+ api_id_out = gr.JSON()
436
+ api_id_btn.click(identify_personas, inputs=[api_id_context], outputs=api_id_out, api_name="identify_personas")
437
+
438
+ # Invisible components to expose API endpoints
439
+ # These won't be seen by regular UI users but will be available via /api
440
+ with gr.Tab("Social Network API", visible=False):
441
+ api_net_name = gr.Textbox(label="Network Name")
442
+ api_net_count = gr.Number(label="Deep Persona Count", value=10)
443
+ api_net_type = gr.Dropdown(choices=["scale_free", "small_world"], label="Network Type")
444
+ api_net_focus = gr.Textbox(label="Focus Group Name (optional)")
445
+ api_net_btn = gr.Button("Generate Network")
446
+ api_net_out = gr.JSON()
447
+ api_net_btn.click(generate_social_network_api, inputs=[api_net_name, api_net_count, api_net_type, api_net_focus], outputs=api_net_out, api_name="generate_social_network")
448
+
449
+ with gr.Tab("Engagement Prediction API", visible=False):
450
+ api_pred_sim_id = gr.Textbox(label="Simulation ID")
451
+ api_pred_content = gr.Textbox(label="Content Text")
452
+ api_pred_format = gr.Textbox(label="Format", value="text")
453
+ api_pred_btn = gr.Button("Predict Engagement")
454
+ api_pred_out = gr.JSON()
455
+ api_pred_btn.click(predict_engagement_api, inputs=[api_pred_sim_id, api_pred_content, api_pred_format], outputs=api_pred_out, api_name="predict_engagement")
456
+
457
+ with gr.Tab("Async Simulation API", visible=False):
458
+ api_async_sim_id = gr.Textbox(label="Simulation ID")
459
+ api_async_content = gr.Textbox(label="Content Text")
460
+ api_async_format = gr.Textbox(label="Format", value="text")
461
+ api_async_btn = gr.Button("Start Simulation")
462
+ api_async_out = gr.JSON()
463
+ api_async_btn.click(start_simulation_async_api, inputs=[api_async_sim_id, api_async_content, api_async_format], outputs=api_async_out, api_name="start_simulation_async")
464
+
465
+ api_status_id = gr.Textbox(label="Simulation ID")
466
+ api_status_btn = gr.Button("Check Status")
467
+ api_status_out = gr.JSON()
468
+ api_status_btn.click(get_simulation_status_api, inputs=[api_status_id], outputs=api_status_out, api_name="get_simulation_status")
469
+
470
+ with gr.Tab("Chat API", visible=False):
471
+ api_chat_sim_id = gr.Textbox(label="Simulation ID")
472
+ api_chat_sender = gr.Textbox(label="Sender", value="User")
473
+ api_chat_msg = gr.Textbox(label="Message")
474
+ api_chat_send_btn = gr.Button("Send Message")
475
+ api_chat_send_out = gr.JSON()
476
+ api_chat_send_btn.click(send_chat_message_api, inputs=[api_chat_sim_id, api_chat_sender, api_chat_msg], outputs=api_chat_send_out, api_name="send_chat_message")
477
+
478
+ api_chat_hist_btn = gr.Button("Get History")
479
+ api_chat_hist_out = gr.JSON()
480
+ api_chat_hist_btn.click(get_chat_history_api, inputs=[api_chat_sim_id], outputs=api_chat_hist_out, api_name="get_chat_history")
481
+
482
+ with gr.Tab("Content Variants API", visible=False):
483
+ api_var_content = gr.Textbox(label="Original Content")
484
+ api_var_count = gr.Number(label="Number of Variants", value=5)
485
+ api_var_btn = gr.Button("Generate Variants")
486
+ api_var_out = gr.JSON()
487
+ api_var_btn.click(generate_variants_api, inputs=[api_var_content, api_var_count], outputs=api_var_out, api_name="generate_variants")
488
+
489
+ with gr.Tab("List Simulations API", visible=False):
490
+ api_list_sim_btn = gr.Button("List Simulations")
491
+ api_list_sim_out = gr.JSON()
492
+ api_list_sim_btn.click(list_simulations_api, outputs=api_list_sim_out, api_name="list_simulations")
493
+
494
+ with gr.Tab("List Personas API", visible=False):
495
+ api_list_per_sim_id = gr.Textbox(label="Simulation ID")
496
+ api_list_per_btn = gr.Button("List Personas")
497
+ api_list_per_out = gr.JSON()
498
+ api_list_per_btn.click(list_personas_api, inputs=[api_list_per_sim_id], outputs=api_list_per_out, api_name="list_personas")
499
+
500
+ with gr.Tab("Get Persona API", visible=False):
501
+ api_get_per_sim_id = gr.Textbox(label="Simulation ID")
502
+ api_get_per_name = gr.Textbox(label="Deep Persona Name")
503
+ api_get_per_btn = gr.Button("Get Persona")
504
+ api_get_per_out = gr.JSON()
505
+ api_get_per_btn.click(get_persona_api, inputs=[api_get_per_sim_id, api_get_per_name], outputs=api_get_per_out, api_name="get_persona")
506
+
507
+ with gr.Tab("Delete Simulation API", visible=False):
508
+ api_del_sim_id = gr.Textbox(label="Simulation ID")
509
+ api_del_btn = gr.Button("Delete Simulation")
510
+ api_del_out = gr.JSON()
511
+ api_del_btn.click(delete_simulation_api, inputs=[api_del_sim_id], outputs=api_del_out, api_name="delete_simulation")
512
+
513
+ with gr.Tab("Export Simulation API", visible=False):
514
+ api_exp_sim_id = gr.Textbox(label="Simulation ID")
515
+ api_exp_btn = gr.Button("Export Simulation")
516
+ api_exp_out = gr.JSON()
517
+ api_exp_btn.click(export_simulation_api, inputs=[api_exp_sim_id], outputs=api_exp_out, api_name="export_simulation")
518
+
519
+ with gr.Tab("Network Graph API", visible=False):
520
+ api_graph_sim_id = gr.Textbox(label="Simulation ID")
521
+ api_graph_btn = gr.Button("Get Graph Data")
522
+ api_graph_out = gr.JSON()
523
+ api_graph_btn.click(get_network_graph_api, inputs=[api_graph_sim_id], outputs=api_graph_out, api_name="get_network_graph")
524
+
525
+ with gr.Tab("Focus Group API", visible=False):
526
+ api_list_fg_btn = gr.Button("List Focus Groups")
527
+ api_list_fg_out = gr.JSON()
528
+ api_list_fg_btn.click(list_focus_groups_api, outputs=api_list_fg_out, api_name="list_focus_groups")
529
+
530
+ api_save_fg_name = gr.Textbox(label="Focus Group Name")
531
+ api_save_fg_sim_id = gr.Textbox(label="Simulation ID")
532
+ api_save_fg_btn = gr.Button("Save Focus Group")
533
+ api_save_fg_out = gr.JSON()
534
+ api_save_fg_btn.click(save_focus_group_api, inputs=[api_save_fg_name, api_save_fg_sim_id], outputs=api_save_fg_out, api_name="save_focus_group")
535
 
536
  if __name__ == "__main__":
537
+ demo.queue().launch()
config.ini CHANGED
@@ -1,7 +1,12 @@
1
  [OpenAI]
2
  API_TYPE=helmholtz-blablador
3
- MODEL=alias-large
4
- REASONING_MODEL=alias-large
 
 
5
  TOP_P=1.0
6
- MAX_ATTEMPTS=5
7
- WAITING_TIME=20
 
 
 
 
1
  [OpenAI]
2
  API_TYPE=helmholtz-blablador
3
+ MODEL=alias-fast
4
+ REASONING_MODEL=alias-fast
5
+ FALLBACK_MODEL_LARGE=alias-large
6
+ FALLBACK_MODEL_HUGE=alias-huge
7
  TOP_P=1.0
8
+ MAX_ATTEMPTS=999
9
+ WAITING_TIME=35
10
+
11
+ [Logging]
12
+ LOGLEVEL=DEBUG
deeppersona/__init__.py CHANGED
@@ -193,7 +193,7 @@ class ConfigManager:
193
 
194
  # Create global instance of the configuration manager
195
  config = utils.read_config_file()
196
- utils.pretty_print_deeppersona_version()
197
  utils.pretty_print_datetime()
198
  utils.pretty_print_config(config)
199
  utils.start_logger(config)
 
193
 
194
  # Create global instance of the configuration manager
195
  config = utils.read_config_file()
196
+ utils.pretty_print_tinytroupe_version()
197
  utils.pretty_print_datetime()
198
  utils.pretty_print_config(config)
199
  utils.start_logger(config)
deeppersona/control.py CHANGED
@@ -673,20 +673,20 @@ class Transaction:
673
  if output is None:
674
  return None
675
  elif isinstance(output, DeepPersona):
676
- return {"type": "DeepPersonaRef", "name": output.name}
677
  elif isinstance(output, DeepWorld):
678
- return {"type": "DeepWorldRef", "name": output.name}
679
  elif isinstance(output, DeepPersonaFactory):
680
- return {"type": "DeepPersonaFactoryRef", "name": output.name}
681
  elif isinstance(output, list):
682
  encoded_list = []
683
  for item in output:
684
  if isinstance(item, DeepPersona):
685
- encoded_list.append({"type": "DeepPersonaRef", "name": item.name})
686
  elif isinstance(item, DeepWorld):
687
- encoded_list.append({"type": "DeepWorldRef", "name": item.name})
688
  elif isinstance(item, DeepPersonaFactory):
689
- encoded_list.append({"type": "DeepPersonaFactoryRef", "name": item.name})
690
  else:
691
  encoded_list.append({"type": "JSON", "value": item})
692
  return {"type": "List", "value": encoded_list}
@@ -706,20 +706,20 @@ class Transaction:
706
 
707
  if encoded_output is None:
708
  return None
709
- elif encoded_output["type"] == "DeepPersonaRef":
710
  return DeepPersona.get_agent_by_name(encoded_output["name"])
711
- elif encoded_output["type"] == "DeepWorldRef":
712
  return DeepWorld.get_environment_by_name(encoded_output["name"])
713
- elif encoded_output["type"] == "DeepPersonaFactoryRef":
714
  return DeepPersonaFactory.get_factory_by_name(encoded_output["name"])
715
  elif encoded_output["type"] == "List":
716
  decoded_list = []
717
  for item in encoded_output["value"]:
718
- if item["type"] == "DeepPersonaRef":
719
  decoded_list.append(DeepPersona.get_agent_by_name(item["name"]))
720
- elif item["type"] == "DeepWorldRef":
721
  decoded_list.append(DeepWorld.get_environment_by_name(item["name"]))
722
- elif item["type"] == "DeepPersonaFactoryRef":
723
  decoded_list.append(DeepPersonaFactory.get_factory_by_name(item["name"]))
724
  else:
725
  decoded_list.append(item["value"])
 
673
  if output is None:
674
  return None
675
  elif isinstance(output, DeepPersona):
676
+ return {"type": "TinyPersonRef", "name": output.name}
677
  elif isinstance(output, DeepWorld):
678
+ return {"type": "TinyWorldRef", "name": output.name}
679
  elif isinstance(output, DeepPersonaFactory):
680
+ return {"type": "TinyFactoryRef", "name": output.name}
681
  elif isinstance(output, list):
682
  encoded_list = []
683
  for item in output:
684
  if isinstance(item, DeepPersona):
685
+ encoded_list.append({"type": "TinyPersonRef", "name": item.name})
686
  elif isinstance(item, DeepWorld):
687
+ encoded_list.append({"type": "TinyWorldRef", "name": item.name})
688
  elif isinstance(item, DeepPersonaFactory):
689
+ encoded_list.append({"type": "TinyFactoryRef", "name": item.name})
690
  else:
691
  encoded_list.append({"type": "JSON", "value": item})
692
  return {"type": "List", "value": encoded_list}
 
706
 
707
  if encoded_output is None:
708
  return None
709
+ elif encoded_output["type"] == "TinyPersonRef":
710
  return DeepPersona.get_agent_by_name(encoded_output["name"])
711
+ elif encoded_output["type"] == "TinyWorldRef":
712
  return DeepWorld.get_environment_by_name(encoded_output["name"])
713
+ elif encoded_output["type"] == "TinyFactoryRef":
714
  return DeepPersonaFactory.get_factory_by_name(encoded_output["name"])
715
  elif encoded_output["type"] == "List":
716
  decoded_list = []
717
  for item in encoded_output["value"]:
718
+ if item["type"] == "TinyPersonRef":
719
  decoded_list.append(DeepPersona.get_agent_by_name(item["name"]))
720
+ elif item["type"] == "TinyWorldRef":
721
  decoded_list.append(DeepWorld.get_environment_by_name(item["name"]))
722
+ elif item["type"] == "TinyFactoryRef":
723
  decoded_list.append(DeepPersonaFactory.get_factory_by_name(item["name"]))
724
  else:
725
  decoded_list.append(item["value"])
deeppersona/environment/social_deep_world.py CHANGED
@@ -52,7 +52,7 @@ class SimulationResult:
52
  self.total_reach = len(set(e["persona_id"] for e in self.engagements)) # Simplified
53
  # ... more metrics
54
 
55
- class SocialDeepWorld(DeepWorld):
56
  """Extended DeepWorld with social network capabilities"""
57
 
58
  def __init__(self, name: str, network: NetworkTopology = None, **kwargs):
 
52
  self.total_reach = len(set(e["persona_id"] for e in self.engagements)) # Simplified
53
  # ... more metrics
54
 
55
+ class SocialTinyWorld(DeepWorld):
56
  """Extended DeepWorld with social network capabilities"""
57
 
58
  def __init__(self, name: str, network: NetworkTopology = None, **kwargs):
deeppersona/extraction/results_extractor.py CHANGED
@@ -152,7 +152,7 @@ performed.
152
 
153
 
154
  def extract_results_from_world(self,
155
- deepworld:DeepWorld,
156
  extraction_objective:str="The main points that can be derived from the agents conversations and actions.",
157
  situation:str="",
158
  fields:list=None,
@@ -162,7 +162,7 @@ performed.
162
  Extracts results from a DeepWorld instance.
163
 
164
  Args:
165
- deepworld (DeepWorld): The DeepWorld instance to extract results from.
166
  extraction_objective (str): The extraction objective.
167
  situation (str): The situation to consider.
168
  fields (list, optional): The fields to extract. If None, the extractor will decide what names to use.
@@ -189,7 +189,7 @@ performed.
189
  rendering_configs)})
190
 
191
  # TODO: either summarize first or break up into multiple tasks
192
- interaction_history = deepworld.pretty_current_interactions(max_content_length=None)
193
 
194
  extraction_request_prompt = \
195
  f"""
@@ -203,7 +203,7 @@ You are considering various agents.
203
 
204
  ## Agents Interactions History
205
 
206
- You will consider the history of interactions from various agents that exist in an environment called {deepworld.name}.
207
  Each interaction history includes stimuli the corresponding agent received as well as actions it performed.
208
 
209
  {interaction_history}
@@ -223,7 +223,7 @@ Each interaction history includes stimuli the corresponding agent received as we
223
  result = None
224
 
225
  # cache the result
226
- self.world_extraction[deepworld.name] = result
227
 
228
  return result
229
 
 
152
 
153
 
154
  def extract_results_from_world(self,
155
+ tinyworld:DeepWorld,
156
  extraction_objective:str="The main points that can be derived from the agents conversations and actions.",
157
  situation:str="",
158
  fields:list=None,
 
162
  Extracts results from a DeepWorld instance.
163
 
164
  Args:
165
+ tinyworld (DeepWorld): The DeepWorld instance to extract results from.
166
  extraction_objective (str): The extraction objective.
167
  situation (str): The situation to consider.
168
  fields (list, optional): The fields to extract. If None, the extractor will decide what names to use.
 
189
  rendering_configs)})
190
 
191
  # TODO: either summarize first or break up into multiple tasks
192
+ interaction_history = tinyworld.pretty_current_interactions(max_content_length=None)
193
 
194
  extraction_request_prompt = \
195
  f"""
 
203
 
204
  ## Agents Interactions History
205
 
206
+ You will consider the history of interactions from various agents that exist in an environment called {tinyworld.name}.
207
  Each interaction history includes stimuli the corresponding agent received as well as actions it performed.
208
 
209
  {interaction_history}
 
223
  result = None
224
 
225
  # cache the result
226
+ self.world_extraction[tinyworld.name] = result
227
 
228
  return result
229
 
deeppersona/factory/deep_persona_factory.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import json
3
  import chevron
@@ -30,16 +31,9 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
30
  def __init__(self, sampling_space_description:str=None, total_population_size:int=None, context:str=None, simulation_id:str=None):
31
  """
32
  Initialize a DeepPersonaFactory instance.
33
-
34
- Args:
35
- sampling_space_description (str, optional): The description of the sampling space. Defaults to None. If this is
36
- specified, then population_size must also be specified.
37
- population_size (int, optional): The size of the population to sample from. Defaults to None.
38
- context (str): The context text used to generate the DeepPersona instances.
39
- simulation_id (str, optional): The ID of the simulation. Defaults to None.
40
  """
41
  super().__init__(simulation_id)
42
- self.person_prompt_template_path = os.path.join(os.path.dirname(__file__), 'prompts/generate_person.mustache')
43
  self.context_text = context
44
  self.sampling_space_description = sampling_space_description
45
  self.population_size = total_population_size
@@ -48,131 +42,109 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
48
  self.sampling_plan = None
49
  self.remaining_characteristics_sample = None
50
 
51
- self.generated_minibios = [] # keep track of the generated persons. We keep the minibio to avoid generating the same person twice.
52
  self.generated_names = []
53
 
54
- # TODO obsolete?
55
- @staticmethod
56
- def generate_person_factories(number_of_factories, generic_context_text):
57
  """
58
- Generate a list of DeepPersonaFactory instances using OpenAI's LLM.
59
-
60
- Args:
61
- number_of_factories (int): The number of DeepPersonaFactory instances to generate.
62
- generic_context_text (str): The generic context text used to generate the DeepPersonaFactory instances.
63
-
64
- Returns:
65
- list: A list of DeepPersonaFactory instances.
66
  """
 
 
 
67
 
68
- logger.info(f"Starting the generation of the {number_of_factories} person factories based on that context: {generic_context_text}")
69
-
70
- system_prompt = open(os.path.join(os.path.dirname(__file__), 'prompts/generate_person_factory.md'), 'r', encoding='utf-8', errors='replace').read()
71
-
72
- messages = []
73
- messages.append({"role": "system", "content": system_prompt})
74
-
75
- user_prompt = chevron.render("Please, create {{number_of_factories}} person descriptions based on the following broad context: {{context}}", {
76
- "number_of_factories": number_of_factories,
77
- "context": generic_context_text
78
- })
79
-
80
- messages.append({"role": "user", "content": user_prompt})
81
-
82
- response = openai_utils.client().send_message(messages)
83
-
84
- if response is not None:
85
- result = utils.extract_json(response["content"])
86
-
87
- factories = []
88
- for i in range(number_of_factories):
89
- logger.debug(f"Generating person factory with description: {result[i]}")
90
- factories.append(DeepPersonaFactory(result[i]))
91
-
92
- return factories
93
-
94
- return None
95
-
96
- @staticmethod
97
- def create_factory_from_demography(demography_description_or_file_path:Union[str, dict], population_size:int, additional_demographic_specification:str=None, context:str=None):
98
- """
99
- Create a DeepPersonaFactory instance from a demography description, which can be wither given as a file path or a dictionary
100
- (but not both).
101
-
102
- Args:
103
- demography_description_or_file_path (Union[str, dict]): The demography description or the file path to the demography description.
104
- population_size (int): The size of the population to sample from.
105
- context (str, optional): Additional context text used to generate the DeepPersona instances. Defaults to None.
106
-
107
- Returns:
108
- DeepPersonaFactory: A DeepPersonaFactory instance.
109
- """
110
- # read the demography description from a file or use the given dictionary
111
- if isinstance(demography_description_or_file_path, str):
112
- demography_description = json.loads(open(demography_description_or_file_path, 'r', encoding='utf-8', errors='replace').read())
113
- elif isinstance(demography_description_or_file_path, dict):
114
- demography_description = demography_description_or_file_path
115
- else:
116
- raise ValueError("demography_description_or_file_path must be either a string or a dictionary.")
117
-
118
- if population_size is None:
119
- raise ValueError("population_size must be specified.")
120
-
121
-
122
- full_demography_description = \
123
- f"""
124
- # Sampling space specification
125
-
126
- The population described by the demographic data below. Make sure you consider very detailed, fine-grained,
127
- characteristics of the individuals in the population.
128
-
129
- ## Directives
130
- Please follow these rules:
131
- - produce a uniformly distributed sample of the requested population, so that all characteristics are represented in the sample
132
- in the right proportions, as specified in the demographic data below.
133
- - consider as many different population segments as possible, while **always** keeping **proportions** correct.For example,
134
- instead of sampling 10 people from segment A and 5 from segment B, you can instead sample 2 from A, 1 from B,
135
- and 7 others from other segments, provided the proportions are maintained correct and there are enough people to sample.
136
- - also use any built-in knowledge you might have of the populations in question to improve the sampling space,
137
- provided this built-in knowledge does not conflict with the demographic data below.
138
-
139
- The sample must include representative people from the broad population, so for instance ensure that you include values covering
140
- people from all walks of life possible from the specified demographic data and your built-in knowledge of the target population, such as:
141
- - from the simplest professions to those of the highest ranks;
142
- - from the youngest to the oldest;
143
- - from the kind to the evil;
144
- - from the positive and enthusiastic to the negative and pessimistic;
145
- - from the happy and joyful to the sad and depressed;
146
- - from the most conservative, to the most liberal;
147
- - from the educated, to the ignorant;
148
- - from the healthy to the sick;
149
- - from those who enjoy bland food, to those who enjoy spicy food;
150
- - from rich to poor.
151
 
152
- Make sure there's sufficient variety to represent even extreme cases, so that fringe opinions or far fetched characteristics are also represented.
153
- Because these are by definition rare, here you can add a larger proportion than what is truly present in the population, so that there's some
154
- information from these rare cases.
 
 
155
 
156
- In particular, the population MUST cover both POSITIVE and NEGATIVE possibilities of the various characteristics
157
- (e.g., rich vs poor, likes sugar vs don't like sugar, enthusiastic vs apathetic).
158
 
159
- ## Additional demographic specification (if any)
160
- {additional_demographic_specification if additional_demographic_specification is not None else "(none)"}
161
-
162
- ## Demographic data
163
- {json.dumps(demography_description, indent=4)}
164
  """
165
-
166
- return DeepPersonaFactory(context=context,
167
- sampling_space_description=full_demography_description,
168
- total_population_size=population_size)
169
-
170
- @classmethod
171
- def _clear_factories(cls):
172
  """
173
- Additional class-level cleanup for this subclass.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  """
175
- DeepPersonaFactory.all_unique_names = [] # clear the list of all unique names, so that the next factories can start fresh.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  def generate_person(self,
178
  agent_particularities:str=None,
@@ -182,151 +154,57 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
182
  attempts:int=10,
183
  post_processing_func=None,
184
  deep_persona:bool=True) -> DeepPersona:
185
- """
186
- Generate a DeepPersona instance using OpenAI's LLM.
187
-
188
- Args:
189
- agent_particularities (str): The particularities of the agent.
190
- temperature (float): The temperature to use when sampling from the LLM.
191
- frequency_penalty (float): The frequency penalty to use when sampling from the LLM.
192
- presence_penalty (float): The presence penalty to use when sampling from the LLM.
193
- attempts (int): The number of attempts to generate a DeepPersona instance.
194
- post_processing_func (function): A function to apply to the generated agent after it is created.
195
-
196
- Returns:
197
- DeepPersona: A DeepPersona instance generated using the LLM.
198
- """
199
-
200
  logger.debug(f"Starting the person generation based these particularities: {agent_particularities}")
201
  fresh_agent_name = None
202
 
203
- # are we going to use a pre-computed sample of characteristics too?
204
  if self.population_size is not None:
205
-
206
  with concurrent_agent_generataion_lock:
207
  if self.remaining_characteristics_sample is None:
208
- # if the sample does not exist, we generate it here once.
209
  self.initialize_sampling_plan()
210
 
211
- logger.debug(f"Sampling plan initialized. Remaining characteristics sample: {self.remaining_characteristics_sample}")
212
-
213
- # CONCURRENT PROTECTION
214
  with concurrent_agent_generataion_lock:
215
  if len(self.remaining_characteristics_sample) == 0:
216
- logger.warning("No more characteristics samples left to sample from. This can happen if the sampling plan did not sum up correctly.")
217
  return None
218
-
219
  else:
220
  sampled_characteristics = self.remaining_characteristics_sample.pop()
221
- logger.debug(f"Sampled agent: {sampled_characteristics['name']}.")
222
 
223
  if agent_particularities is not None:
224
- agent_particularities =\
225
- f"""
226
- - Primary characteristics: {agent_particularities}
227
-
228
- - Also use all the following additional characteristics that **do not** conflict with the primary ones:
229
- * Name, demographics and other characteristics: {json.dumps(sampled_characteristics, indent=4)}
230
-
231
- In case one of the additional characteristics conflicts with a primary one, please use the primary one
232
- and ignore the additional one.
233
-
234
- If the agent's name is specified, you MUST ALWAYS use it, even if it conflicts with the primary characteristics.
235
-
236
- """
237
  else:
238
- agent_particularities = \
239
- f"""
240
- - Name, demographics and other characteristics:
241
- {json.dumps(sampled_characteristics, indent=4)}
242
- """
243
- else: # no predefined population size, so we generate one-off agents.
244
- # CONCURRENT PROTECTION
245
  with concurrent_agent_generataion_lock:
246
  fresh_agent_name = self._unique_full_name(already_generated_names=DeepPersonaFactory._all_used_and_precomputed_names(),
247
  context=self.context_text)
248
 
249
  if agent_particularities is not None:
250
- agent_particularities = \
251
- f"""
252
-
253
- - Primary characteristics: {agent_particularities}
254
-
255
- - Also use the following additional characteristics:
256
- * Full name: {fresh_agent_name}
257
-
258
- In case the primary characteristics already specify a name, please use the primary name and ignore the additional one.
259
- """
260
  else:
261
  agent_particularities = f"Full name: {fresh_agent_name}"
262
 
263
 
264
-
265
- logger.info(f"Generating person with the following particularities: {agent_particularities}")
266
-
267
- # read example specs from files.
268
- example_1 = json.load(open(os.path.join(os.path.dirname(__file__), '../examples/agents/Friedrich_Wolf.agent.json'), 'r', encoding='utf-8', errors='replace'))
269
- example_2 = json.load(open(os.path.join(os.path.dirname(__file__), '../examples/agents/Sophie_Lefevre.agent.json'), 'r', encoding='utf-8', errors='replace'))
270
-
271
- # We must include all agent names generated in the whole of the simulation, not only the ones generated by this factory,
272
- # since they all share the same name space.
273
- #
274
- # For the minibios, we only need to keep track of the ones generated by this factory, since they are unique to each factory
275
- # and are used to guide the sampling process.
276
- user_prompt = chevron.render(open(self.person_prompt_template_path, 'r', encoding='utf-8', errors='replace').read(), {
277
- "context": self.context_text,
278
- "agent_particularities": agent_particularities,
279
-
280
- #Note that we need to dump them to JSON strings, to ensure we get double quotes,
281
- # and other formatting issues are avoided.
282
- "example_1": json.dumps(example_1["persona"], indent=4),
283
- "example_2": json.dumps(example_2["persona"], indent=4)
284
- })
285
-
286
  def aux_generate(attempt):
287
- messages = []
288
- messages += [{"role": "system", "content": "You are a system that generates specifications for realistic simulations of people. You follow the generation rules and constraints carefully."},
289
- {"role": "user", "content": user_prompt}]
290
-
291
-
292
- # due to a technicality, we need to call an auxiliary method to be able to use the transactional decorator.
293
- message = self._aux_model_call(messages=messages,
294
- temperature=temperature,
295
- frequency_penalty=frequency_penalty,
296
- presence_penalty=presence_penalty)
297
-
298
- if message is not None:
299
- result = utils.extract_json(message["content"])
300
-
301
- logger.debug(f"At attempt {attempt}, generated person parameters:\n{json.dumps(result, indent=4, sort_keys=True)}")
302
-
303
- # only accept the generated spec if the name is not already in use
304
- if not self._is_name_already_assigned(result["name"]):
305
  return result
306
- else:
307
- logger.info(f"Person with name {result['name']} was already generated, cannot be reused.")
 
308
 
309
- return None # no suitable agent was generated
310
-
311
  agent_spec = None
312
  attempt = 0
313
  while agent_spec is None and attempt < attempts:
314
- try:
315
- attempt += 1
316
- agent_spec = aux_generate(attempt=attempt)
317
- except Exception as e:
318
- logger.error(f"Error while generating agent specification: {e}")
319
 
320
- # create the fresh agent
321
  if agent_spec is not None:
322
- # If deep_persona is requested, perform the second API call to enrich the persona
323
- if deep_persona:
324
- agent_spec = self._generate_deep_persona_internal(agent_spec)
325
-
326
- # the agent is created here. This is why the present method cannot be cached. Instead, an auxiliary method is used
327
- # for the actual model call, so that it gets cached properly without skipping the agent creation.
328
-
329
- # protect parallel agent generation
330
  with concurrent_agent_generataion_lock:
331
  person = DeepPersona(agent_spec["name"])
332
  self._setup_agent(person, agent_spec)
@@ -337,55 +215,27 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
337
  self.generated_names.append(person.get("name"))
338
 
339
  return person
340
- else:
341
- logger.error(f"Could not generate an agent after {attempts} attempts.")
342
- if sampled_characteristics is not None:
343
- self.remaining_characteristics_sample.append(sampled_characteristics)
344
- logger.error(f"Name {fresh_agent_name} was not used, it will be added back to the pool of names.")
345
-
346
- return None
347
-
348
-
349
- @config_manager.config_defaults(parallelize="parallel_agent_generation")
350
- def generate_from_linkedin_profile(self, profile_data: Dict) -> DeepPersona:
351
- """
352
- Generate a DeepPersona from a LinkedIn profile with enriched traits.
353
- """
354
- description = f"Professional with headline: {profile_data.get('headline', '')}. " \
355
- f"Industry: {profile_data.get('industry', '')}. " \
356
- f"Location: {profile_data.get('location', 'Global')}. " \
357
- f"Career level: {profile_data.get('career_level', 'Mid Level')}. " \
358
- f"Summary: {profile_data.get('summary', '')}"
359
 
360
- return self.generate_person(agent_particularities=description)
361
 
362
- def generate_persona_cluster(self, archetype: str, count: int) -> List[DeepPersona]:
363
- """
364
- Generate a cluster of personas following a specific archetype.
365
- """
366
- return self.generate_people(number_of_people=count, agent_particularities=f"Archetype: {archetype}")
367
 
368
- def generate_diverse_population(self, size: int, distribution: Dict) -> List[DeepPersona]:
369
- """
370
- Generate a diverse population based on a distribution.
371
- """
372
- # distribution could specify proportions of various characteristics
373
- # This is a simplified implementation
374
- return self.generate_people(number_of_people=size, agent_particularities=f"Target distribution: {json.dumps(distribution)}")
375
 
376
- def ensure_consistency(self, persona: DeepPersona) -> bool:
377
- """
378
- Ensure the generated persona is consistent.
379
- """
380
- # Implementation would involve checking traits, demographics, etc.
381
- return True # Placeholder
382
 
383
- def calculate_diversity_score(self, personas: List[DeepPersona]) -> float:
384
- """
385
- Calculate a diversity score for a list of personas.
386
- """
387
- # Placeholder for diversity metric calculation
388
- return 0.5
389
 
390
  def generate_people(self, number_of_people:int=None,
391
  agent_particularities:str=None,
@@ -397,1066 +247,19 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
397
  parallelize=None,
398
  verbose:bool=False,
399
  deep_persona:bool=True) -> list:
400
- """
401
- Generate a list of DeepPersona instances using OpenAI's LLM.
402
-
403
- Args:
404
- number_of_people (int): The number of DeepPersona instances to generate.
405
- agent_particularities (str): The particularities of the agent.
406
- temperature (float): The temperature to use when sampling from the LLM.
407
- frequency_penalty (float): The frequency penalty to use when sampling from the LLM.
408
- presence_penalty (float): The presence penalty to use when sampling from the LLM.
409
- attempts (int): The number of attempts to generate a DeepPersona instance.
410
- post_processing_func (function): A function to apply to the generated agent after it is created.
411
- parallalel_workers (int): The number of parallel workers to use when generating the people. Too many workers may cause the LLM to fail
412
- due to throttling by the API.
413
- verbose (bool): Whether to print information about the generated people.
414
-
415
- Returns:
416
- list: A list of DeepPersona instances generated using the LLM.
417
- """
418
-
419
  if number_of_people is None:
420
- if self.population_size is None:
421
- raise ValueError("Either the number of people to generate or the population size must be specified.")
422
  number_of_people = self.population_size
423
-
424
- elif self.population_size is None:
425
- self.population_size = number_of_people
426
-
427
- elif number_of_people is not None and self.population_size is not None and number_of_people > self.population_size:
428
- raise ValueError(f"Cannot generate more people than the population size. Requested {number_of_people}, but the population size is {self.population_size}.")
429
-
430
- people = []
431
- if parallelize:
432
- people = self._generate_people_in_parallel(number_of_people=number_of_people,
433
- agent_particularities=agent_particularities,
434
- temperature=temperature,
435
- frequency_penalty=frequency_penalty,
436
- presence_penalty=presence_penalty,
437
- attempts=attempts,
438
- post_processing_func=post_processing_func,
439
- verbose=verbose,
440
- deep_persona=deep_persona)
441
- else:
442
- people = self._generate_people_sequentially(number_of_people=number_of_people,
443
- agent_particularities=agent_particularities,
444
- temperature=temperature,
445
- frequency_penalty=frequency_penalty,
446
- presence_penalty=presence_penalty,
447
- attempts=attempts,
448
- post_processing_func=post_processing_func,
449
- verbose=verbose,
450
- deep_persona=deep_persona)
451
-
452
- return people
453
 
454
-
455
- @transactional(parallel=True)
456
- def _generate_people_in_parallel(self, number_of_people:int=None,
457
- agent_particularities:str=None,
458
- temperature:float=1.5,
459
- frequency_penalty:float=0.0,
460
- presence_penalty:float=0.0,
461
- attempts:int=10,
462
- post_processing_func=None,
463
- verbose:bool=False,
464
- deep_persona:bool=True) -> list:
465
  people = []
466
-
467
- #
468
- # Concurrently generate the people.
469
- #
470
- # This vastly speeds up the process, but be careful with the number of workers, as too
471
- # many may cause the LLM to fail due to throttling by the API.
472
- #
473
-
474
- # this is the function that will be executed in parallel
475
- def generate_person_wrapper(args):
476
- self, i, agent_particularities, temperature, frequency_penalty, presence_penalty, attempts, post_processing_func, deep_persona = args
477
  person = self.generate_person(agent_particularities=agent_particularities,
478
- temperature=temperature,
479
- frequency_penalty=frequency_penalty,
480
- presence_penalty=presence_penalty,
481
  attempts=attempts,
482
  post_processing_func=post_processing_func,
483
  deep_persona=deep_persona)
484
- return i, person
485
-
486
- with concurrent.futures.ThreadPoolExecutor() as executor:
487
- # we use a list of futures to keep track of the results
488
- futures = [
489
- executor.submit(generate_person_wrapper, (self, i, agent_particularities, temperature, frequency_penalty, presence_penalty, attempts, post_processing_func, deep_persona))
490
- for i in range(number_of_people)
491
- ]
492
-
493
- # we iterate over the futures as they are completed, and collect the results
494
- for future in concurrent.futures.as_completed(futures):
495
- i, person = future.result()
496
- if person is not None:
497
- people.append(person)
498
- info_msg = f"Generated person {i+1}/{number_of_people}: {person.minibio()}"
499
-
500
- if verbose:
501
- logger.info(info_msg)
502
-
503
- else:
504
- logger.error(f"Could not generate person {i+1}/{number_of_people}. Continuing with the remaining ones.")
505
-
506
- return people
507
-
508
- # TODO still make this one available?
509
- def _generate_people_sequentially(self, number_of_people:int=None,
510
- agent_particularities:str=None,
511
- temperature:float=1.5,
512
- frequency_penalty:float=0.0,
513
- presence_penalty:float=0.0,
514
- attempts:int=10,
515
- post_processing_func=None,
516
- verbose:bool=False,
517
- deep_persona:bool=True) -> list:
518
- """
519
- Generate the people sequentially, not in parallel. This is a simpler alternative.
520
- """
521
- people = []
522
- for i in range(number_of_people):
523
- person = self.generate_person(agent_particularities=agent_particularities,
524
- temperature=temperature,
525
- frequency_penalty=frequency_penalty,
526
- presence_penalty=presence_penalty,
527
- attempts=attempts,
528
- post_processing_func=post_processing_func,
529
- deep_persona=deep_persona)
530
- if person is not None:
531
  people.append(person)
532
- info_msg = f"Generated person {i+1}/{number_of_people}: {person.minibio()}"
533
- logger.info(info_msg)
534
- if verbose:
535
- print(info_msg)
536
- else:
537
- logger.error(f"Could not generate person {i+1}/{number_of_people}.")
538
-
539
  return people
540
 
541
-
542
-
543
-
544
  def initialize_sampling_plan(self):
545
- """
546
- Computes a list of characteristics samples from a sampling space.
547
- The sampling space is built from the given description through intermediary steps
548
- that actually build a sampling space and then randomly (and not via LLM) sample from it, thereby
549
- ensuring that the sampling is not biased by the LLM (though the sampling space itself may be biased).
550
-
551
- All intermediary results are stored for later inspection.
552
-
553
- For example, given some n > 3 and a description like
554
- "Young Western people of different liberal professions."
555
-
556
- The final samples could be something like:
557
- [{"age": 25, "profession": "Architect", "country": "USA"},
558
- {"age": 27, "profession": "Lawyer", "country": "Canada"},
559
- ...
560
- {"age": 25, "profession": "Architect", "country": "USA"}]
561
-
562
- Args:
563
- n (int): The number of samples to generate.
564
- sampling_space_description (str): A description of the sampling space.
565
-
566
- """
567
-
568
- # a technicality - we need to use an auxiliary method to be able to use the transactional decorator effectively.
569
- return self._initialize_sampling_plan_transaction(n=self.population_size, description=self.sampling_space_description,context=self.context_text)
570
-
571
- def _initialize_sampling_plan_transaction(self, n, description, context):
572
- """
573
- Auxiliary method to initialize the sampling plan. This is needed in order to be able to use the transactional decorator,
574
- due too a technicality - the method parameters must be such that when they change the transaction is nullified.
575
- """
576
- if self.remaining_characteristics_sample is None:
577
- # sampling dimensions
578
- self.sampling_dimensions = utils.try_function(lambda: self._compute_sampling_dimensions(sampling_space_description=description),
579
-
580
- # check that the result is a dict
581
- postcond_func=lambda result: isinstance(result, dict),
582
- retries=15)
583
- logger.info("Sampling dimensions computed successfully.")
584
- logger.debug(f"Sampling dimensions: {json.dumps(self.sampling_dimensions, indent=4)}")
585
-
586
- # sampling plan
587
- self.sampling_plan = utils.try_function(lambda: self._compute_sample_plan(N=n,
588
- sampling_dimensions=self.sampling_dimensions),
589
-
590
- # checks that the plan is a list, not an empty dictionary, a number or a string
591
- postcond_func = lambda result: isinstance(result, list) and len(result) > 0,
592
- retries=15
593
- )
594
- # if the sampling plan is a dict, let' s enclose it in a list
595
- if isinstance(self.sampling_plan, dict):
596
- self.sampling_plan = [self.sampling_plan]
597
- logger.warning("The sampling plan was a dictionary, enclosing it in a list to ensure it is processed correctly.")
598
-
599
- logger.info("Sampling plan computed successfully.")
600
- logger.debug(f"Sampling plan: {json.dumps(self.sampling_plan, indent=4)}")
601
-
602
- # Flatten the sampling plan in concrete individual samples.
603
- # Use deepcopy because we'll be modifying the samples later, and we want to keep the original sampling plan intact
604
- # for correct caching
605
- self.remaining_characteristics_sample = copy.deepcopy(utils.try_function(lambda: self._flatten_sampling_plan(sampling_plan=self.sampling_plan),
606
- retries=15))
607
-
608
- # instead of failing, we warn if the number of samples is not equal to n, as LLMs can be bad at summing up the quantities in the sampling plan.
609
- # This is not a problem, as the sampling space is still valid and can be used, though it may not be as rich as expected.
610
- if len(self.remaining_characteristics_sample) != n:
611
- logger.warning(f"Expected {n} samples, but got {len(self.remaining_characteristics_sample)} samples. The LLM may have failed to sum up the quantities in the sampling plan correctly.")
612
-
613
- # If we got more samples than requested, we truncate them to avoid generating too many names or personas.
614
- if len(self.remaining_characteristics_sample) > n:
615
- logger.info(f"Truncating {len(self.remaining_characteristics_sample)} samples to the requested {n} samples.")
616
- self.remaining_characteristics_sample = self.remaining_characteristics_sample[:n]
617
-
618
- logger.info(f"Sample plan has been flattened, contains {len(self.remaining_characteristics_sample)} total samples.")
619
- logger.debug(f"Remaining characteristics sample: {json.dumps(self.remaining_characteristics_sample, indent=4)}")
620
-
621
- # generate names for each sample individually, considering all their characteristics
622
- all_used_names = DeepPersonaFactory._all_used_and_precomputed_names()
623
-
624
- for i, sample in enumerate(self.remaining_characteristics_sample):
625
- logger.debug(f"Generating name for sample {i+1}/{len(self.remaining_characteristics_sample)}")
626
-
627
- # randomize the all_used_names to make the context less predictable for the LLM, thereby introducing some additional randomness.
628
- # Note that we use a fixed random seed to ensure that the sampling plan is reproducible and cache can be kept.
629
- DeepPersonaFactory.randomizer.shuffle(all_used_names)
630
-
631
- # generate a name that's appropriate for this specific sample's characteristics
632
- try:
633
-
634
- # A dummy name to start with, in case the name generation fails.
635
- sample["name"] = f"Agent_{utils.fresh_id('agents_names')}"
636
-
637
- name = utils.try_function(
638
- lambda: self._generate_name_for_sample(
639
- sample_characteristics=sample,
640
- already_generated_names=all_used_names
641
- ),
642
- # ensure the name is not in already used names
643
- postcond_func=lambda result: result not in all_used_names,
644
- retries=15
645
- )
646
-
647
- sample["name"] = name
648
- all_used_names.append(name)
649
-
650
- except Exception as e:
651
- logger.error(f"Error generating name for sample {i}: {e}")
652
- # fallback: use a simple default name with index
653
- fallback_name = f"Person_{i}_{sample.get('gender', 'unknown')}"
654
- sample["name"] = fallback_name
655
- all_used_names.append(fallback_name)
656
-
657
- logger.info("Names generated for all samples in the sampling plan.")
658
-
659
- # update the global list of unique names
660
- new_names = [sample["name"] for sample in self.remaining_characteristics_sample]
661
- DeepPersonaFactory.all_unique_names = list(set(DeepPersonaFactory.all_unique_names + new_names))
662
-
663
- else:
664
- raise ValueError("Sampling plan already initialized. Cannot reinitialize it.")
665
-
666
- @classmethod
667
- def _all_used_and_precomputed_names(cls) -> list:
668
- """
669
- Returns all the names currently in use by agents and those pre-generated by all factories.
670
- """
671
- return DeepPersona.all_agents_names() + cls.all_unique_names
672
-
673
- def _is_name_globally_unique(self, name:str) -> bool:
674
- """
675
- Checks if a name is globally unique.
676
- """
677
- return name not in DeepPersonaFactory.all_unique_names
678
-
679
- def _is_name_already_assigned(self, name:str) -> bool:
680
- """
681
- Checks if a name has already been assigned to a person.
682
- """
683
- return name in DeepPersona.all_agents_names()
684
-
685
-
686
- @transactional()
687
- @utils.llm(temperature=0.5, frequency_penalty=0.0, presence_penalty=0.0)
688
- def _compute_sampling_dimensions(self, sampling_space_description:str) -> dict:
689
- """
690
- Given a sampling description, computes the dimensions of the sampling space. The sampling space offers a way to sample from a population of people,
691
- so each dimension contains values that could be an attribute of a **specific** person. The resulting sampling space must:
692
- - contemplate all critical characteristics mentioned in the sampling description, even if this means having a large number of dimensions and
693
- complex values for each.
694
- * whenever necessary to properly capture the possibilities, you can replace a single dimension by a collection of sub-dimensions
695
- (e.g., instead of "beliefs", you might have "political_beliefs", "economic_beliefs", "consumer_beliefs", etc.)
696
- - values for each dimension can range from numbers or single words to large sentences or even paragraphs. For attributes that are not clearly single values,
697
- always try to add as much detail as possible. For instance, age is just a single value, but lifestyle or cultural background **must** be a long sentence or even a paragraph.
698
- This is to ensure that, later, the generated people can be very nuanced and realistic, with rich and detailed attributes. See the example below to get inspired.
699
- - you can be very creative with the dimensions and values provided that they are consistent with the sampling space description.
700
- - whenever you have the information about PROPORTIONS of the values, you **must** include them in the output, so that the sampling space can be used to generate people
701
- in a representative way.
702
- - values are **not** distributions, probabilities or other statistics, but rather concrete, specific, people attributes. For example, there can
703
- be no "average_age" dimension, but only "age", although the complete set of valies that define a dimension is itself a distribution.
704
- - each dimension should be as rich as possible, having as many values as possible, so that the sampling space can be used to generate
705
- many nuanced variations of the target population.
706
- - each dimension should consider a wide range of values, making sure to cover both POSITIVE and NEGATIVE possibilities (e.g., rich vs poor, likes sugar vs don't like sugar).
707
- - each dimension should always include extreme values, so that the sampling space can be used to generate people with extreme characteristics, such as very young or very old,
708
- very rich or very poor, very positive or very negative, etc.
709
- - include as many dimensions as possible to capture the richness of the population, even if this means having a large number of dimensions.
710
- - in principle, the original sampling description could be approximately rephrased in terms of the dimensions and values generated (i.e., the dimensions are rich enough
711
- to capture all relevant information). Howerver, this should not limit the range of values and dimensions used, but rather be a byproduct of the process. For instance,
712
- if the original description say "young people", the dimension "age" could be defined as a range of values from 18 to 30, but **not** as a small list with only, say, [18, 25, 30].
713
- Always try to be as rich as possible in the values and dimensions, even if this means having a large number of them.
714
-
715
- Additionally, make sure you include special dimensions that capture these aspects, in such a way that they relate to the sampling space description:
716
- - personality traits (with proportions)
717
- - political beliefs (with proportions)
718
- - economic beliefs (with proportions)
719
- - financial situation (with proportions)
720
- - preferences and tastes (with proportions)
721
- - cultural background (with proportions and diverse ethnicities and cultural heritages; provide detailed, realistic, and varied examples that reflect a wide spectrum of ethnic, national, and cultural identities relevant to the sampling space description)
722
-
723
- ## On your input
724
-
725
- Here's what to do depending on what the input sampling space description looks like:
726
- - Plain text: Abstract all the potential dimensions from the text. For example, if the text is "Young Western people of different liberal professions.", the dimensions could be "age", "profession", "country".
727
- - JSON: Do not use the JSON directly, but rather abstract the dimensions from it. Input JSONs can be obtained from various sources, and you should do your best to interpret them and produce a clean list of dimensions and their values, regardless of how complex the input JSON is. In particular, never use the JSON formatting itself as dimension names or values, but rather abstract the actual dimensions and values from it.
728
- - Tables or other structured data: Abstract the dimensions from the structured data. For example, if the data is in a table, you should extract the rows and columns and abstract the dimensions from them.
729
-
730
-
731
- ## On your output:
732
- You output a JSON containing a list of dimensions. Each output dimension **must** consist of:
733
- - a name;
734
- - EITHER a list of values OR a range of values (specified as a pair).
735
- * in lists of values, whenever possible, you **must** use long values, such as sentences or paragraphs, instead of short words or numbers.
736
- * in lists of values you can, optionally, use a dictionary to specify proportions of the values, e.g., {"value1": 0.5, "value2": 0.3, "value3": 0.2} to indicate that 50% of the population has value1, 30% has value2, and 20% has value3.
737
- Adjust the proportions as appropriate for the context and ensure they sum to 1.0.
738
-
739
- The output is formatted as a JSON object with the following structure:
740
- ```json
741
- {
742
- "sampling_space_description": "A description of the sampling space.",
743
- "dimensions": [
744
- {
745
- "name": "dimension_name_1",
746
- "values": ["value1", "value2", ...]
747
- },
748
-
749
- {
750
- "name": "dimension_name_2",
751
- "range": [min, max]
752
- },
753
-
754
- {
755
- "name": "dimension_name_3",
756
- "values": {"value1": proportion1, "value2": proportion2, "value3": proportion3, ...}
757
- },
758
-
759
- ...
760
- ]
761
- }
762
- ```
763
-
764
- Unless values are necessarily numbers (e.g., age), they should be descriptive strings so that it is easy to understand what they mean.
765
- These strings can be simple values or long detailed texts, whatever is best to capture the desired characteristic.
766
-
767
- ## Example:
768
- Given the following INPUT sampling space description: "Young Western people of different liberal professions and social classes."
769
-
770
- The OUTPUT dimensions could be a dictionary with the following structure:
771
- ```json
772
- {
773
- "sampling_space_description": "Young Western people of different liberal professions and social classes.",
774
- "dimensions": [
775
- {
776
- "name": "age",
777
- "range": [18, 30]
778
- },
779
- {
780
- "name": "socioeconomic status",
781
- "values": ["miserable", "poor", "middle class", "rich", "very rich"]
782
- },
783
- {
784
- "name": "profession",
785
- "values": ["Architect", "Lawyer", "Physician", "Accountant", ...]
786
- },
787
- {
788
- "name": "country",
789
- "values": {
790
- "USA": 0.35,
791
- "Germany": 0.10,
792
- "UK": 0.09,
793
- "France": 0.09,
794
- "Italy": 0.08,
795
- "Spain": 0.06,
796
- "Canada": 0.06,
797
- "Australia": 0.05,
798
- "Netherlands": 0.03,
799
- "Sweden": 0.03,
800
- "Belgium": 0.02,
801
- "Switzerland": 0.02,
802
- "Austria": 0.01
803
- }
804
- },
805
- {
806
- "name": "cultural_background",
807
- "values": {
808
- "Born in a large city of a developed nation, parents were from a lineage of physicians and lawyers": 0.12,
809
- "Descendant of Ashkenazi Jewish immigrants who settled in New York City in the early 20th century, maintaining strong ties to Jewish traditions and community life.": 0.08,
810
- "Second-generation Chinese-Canadian whose family values blend Confucian principles with Canadian multiculturalism, celebrating both Lunar New Year and Canada Day.": 0.06,
811
- "Of Irish and Italian descent, growing up in Boston with a household that combines Catholic traditions, Irish folk music, and Italian culinary heritage.": 0.10,
812
- "Of Turkish-German background, raised in Berlin with exposure to both Turkish family traditions and contemporary German urban culture.": 0.05,
813
- <... many more ...>
814
- }
815
- },
816
- {
817
- "name": "economic_beliefs",
818
- "values": {
819
- "Firmly believes that diligent effort and perseverance in one's career are the primary drivers of financial prosperity and upward mobility.": 0.28,
820
- "Holds the view that wealth accumulation is largely a matter of being in the right place at the right time, with luck playing a significant role in economic outcomes.": 0.18,
821
- "Thinks that government intervention and social programs are essential to ensure fair economic opportunities for all members of society.": 0.22,
822
- "Believes that personal connections and networking are more important than formal education or hard work in achieving economic success.": 0.15,
823
- <... many more ...>
824
- }
825
- },
826
- {
827
- "name": "professional_attitudes",
828
- "values": {
829
- "Aspires to establish and grow their own business, valuing independence and the ability to innovate without corporate constraints.": 0.18,
830
- "Prefers the stability and structure of working for a well-established company, appreciating clear career paths and organizational support.": 0.32,
831
- "Enjoys collaborating in multidisciplinary teams and seeks out workplaces that foster creativity and open communication.": 0.22,
832
- "Is highly risk-averse and prioritizes job security and predictable routines over rapid advancement or entrepreneurial ventures.": 0.15,
833
- <... many more ...>
834
- }
835
- },
836
- {
837
- "name": "political_beliefs",
838
- "values": {
839
- "Strongly supports progressive policies aimed at reducing income inequality and expanding access to healthcare and education.": 0.24,
840
- "Advocates for conservative values, emphasizing the importance of tradition, personal responsibility, and limited government intervention.": 0.20,
841
- "Identifies as a centrist, believing that balanced compromise between opposing political ideologies leads to the best societal outcomes.": 0.26,
842
- "Is passionate about environmental issues and supports policies that prioritize sustainability and climate change mitigation above economic growth.": 0.16,
843
- <... many more ...>
844
- }
845
- },
846
- {
847
- "name": "personality_traits",
848
- "values": {
849
- "Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.": 0.12,
850
- "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.": 0.18,
851
- "Is highly ambitious, constantly setting challenging goals and pushing themselves to achieve more in both personal and professional spheres.": 0.15,
852
- "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change.": 0.20,
853
- "Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.": 0.08,
854
- "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.": 0.06,
855
- "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.": 0.07,
856
- "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.": 0.05,
857
- <... many more ...>
858
- }
859
- },
860
- {
861
- "name": "preferences_and_tastes",
862
- "values": {
863
- "Has a deep appreciation for classical music, frequently attending orchestral concerts and collecting rare vinyl recordings.": 0.08,
864
- "Finds joy in spending weekends hiking in remote natural parks, seeking tranquility and inspiration from the outdoors.": 0.16,
865
- "Rarely leaves home, preferring the comfort of familiar surroundings and engaging in hobbies such as reading and painting indoors.": 0.11,
866
- "Enjoys experimenting with international cuisines, often hosting elaborate dinner parties to share culinary discoveries with friends.": 0.14,
867
- "Is sensitive to loud environments and actively avoids crowded or noisy places, seeking peace and quiet whenever possible.": 0.13,
868
- "Prefers to spend time alone in dimly lit rooms, listening to somber music and reflecting on the more difficult aspects of life.": 0.04,
869
- "Has little interest in social gatherings or celebrations, often declining invitations and feeling out of place in festive environments.": 0.07,
870
- "Frequently chooses entertainment or art that explores themes of loss, struggle, or existential despair, finding comfort in shared sadness.": 0.03,
871
- <... many more ...>
872
- }
873
- }
874
- ]
875
- }
876
- ```
877
-
878
- Note in the example:
879
- - Age is given as a numeric range.
880
- - All other values are descriptive strings, human-friendly, no strange symbols or codes.
881
- - The "country" dimension uses a dictionary with suitable proportions for Western countries.
882
- - No value contains internal structure - just a name or short description.
883
- - All values are concrete properties, not distributions, probabilities or other statistics.
884
- - Whenever possible, the values in the dimensions are long and detailed **sentences** each.
885
- - It has few dimensions because the sampling space description is very short. If the description were longer, the number of dimensions would be larger,
886
- and their values more detailed.
887
- - It contains the additional dimensions that capture the personality traits, political beliefs, economic beliefs, financial situation, preferences and tastes,
888
- and now cultural background with varied ethnicities and heritages, which are important for the sampling space to be rich enough to generate nuanced variations of the target population.
889
- - Beyond positive aspects, it also includes values that emphasize pessimism, negativeness, and sadness, ensuring these characteristics are balanced and represented in the sampling space.
890
-
891
- Args:
892
- sampling_space_description (str): A description of the sampling space.
893
-
894
- Returns:
895
- dict: A dictionary with the dimensions of the sampling space, as shown in the example above.
896
- """
897
- # the body of this method is handled by the @llm decorator.
898
-
899
- @transactional()
900
- @utils.llm(temperature=0.5, frequency_penalty=0.0, presence_penalty=0.0)
901
- def _compute_sample_plan(self, N:int, sampling_dimensions:dict, max_quantity_per_sample_directive:int=5, min_sampling_directives:int=10, max_sampling_directives:int=50) -> List[Dict[str, any]]:
902
- """
903
- This function defines which and how many people to sample from the sampling space defined by the given dimensions.
904
- Given a number N of people to sample, and the dimensions of the sampling space, computes a *sample plan* of N people from that space.
905
-
906
- The input sampling dimensions have the following structure:
907
-
908
- ```json
909
- {
910
- "sampling_space_description": "A description of the sampling space.",
911
- "dimensions": [
912
- {
913
- "name": "dimension_name_1",
914
- "values": ["value1", "value2", ...]
915
- },
916
- {
917
- "name": "dimension_name_2",
918
- "range": [min, max]
919
- },
920
- ...
921
- ]
922
- }
923
- ```
924
-
925
- The *sample plan* to be generated is a list of M *sampling directives*. Each *sampling directive* **always** consists of:
926
- - "id": a unique identifier for the *sampling directive*, just an incrementing integer starting from 1.
927
- - "subpopulation_description": a short description of the sub-population that this *sampling directive* represents, based on the sampling space description and the sampled values.
928
- If possible, make it a recognizable and meaningful description of the sub-population,
929
- such as "Young rebellious people from upper classes", "Old conservative boomers from rural areas", "Intellectual urban professionals with diverse and cosmopolitan cultural backgrounds", etc.
930
- - "sampled_values": a map from of dimensions from the sampling space to concrete values, value ranges or value options.
931
- - "quantity": to how many elements with those values should be sampled in total (from 1 to max_quantity_per_sample_directive if specified).
932
- The sum of all of these quantities must be equal to N.
933
-
934
- So your final output **MUST** follow this JSON structure:
935
-
936
- ```json
937
- [
938
- { "id": 1,
939
- "subpopulation_description": "Some description here...",
940
- "sampled_values": {
941
- "dimension_name_1": [n_1_min, n_1_max],,
942
- "dimension_name_2": ["value2_1", "value2_2", ...],
943
- "dimension_name_3": ["value3_1", "value3_2", ...],
944
- ...
945
- },
946
- "quantity": quantity_1
947
- },
948
-
949
- {
950
- "id": 2,
951
- "subpopulation_description": "Some other description here...",
952
- "sampled_values": {
953
- "dimension_name_1": [n_1_min, n_1_max],
954
- "dimension_name_2": "value2",
955
- "dimension_name_3": ["value3_1", "value3_2", ...],
956
- ...
957
- },
958
- "quantity": quantity_2
959
- },
960
- ...
961
- {
962
- "id": M,
963
- "subpopulation_description": "Again some description here...",
964
- "sampled_values": {
965
- "dimension_name_1": [n_1_min, n_1_max],
966
- "dimension_name_2": ["value2_1", "value2_2", ...],
967
- "dimension_name_3": ["value3_1", "value3_2", ...],
968
- ...
969
- },
970
- "quantity": quantity_M
971
- },
972
- ]
973
- ```
974
-
975
- where N = quantity_1 + quantity_2 + ... + quantity_M,
976
- quantity_i <= max_quantity_per_sample_directive (if specified),
977
- and M is the number of *sampling directives*, which can be as large as necessary to ensure
978
- that the total number of sampled people is equal to N.
979
-
980
- Note:
981
- - Concrete values are NOT in brackets, but rather just a single value or a range of values.
982
- - Options are given in lists of strings separated by commas, e.g., ["value1", "value2", ...].
983
- - Ranges are numberic and specified as a pair of numbers, e.g., [min, max].
984
-
985
- Rules and principles:
986
- - The sampling plan is a collection of sub-populations captured by each *sampling directive*. Therefore, the various *sampling directives* must complement each other in order
987
- to approximate the target population.
988
- - Each *sampling directive* is a **combination** of values from the sampling dimensions that represent a specific segment of the target population. Its richness and variety must reflect the desired sub-population.
989
- - The dimension sampled in each *sampling directive* can be a single value, a range of values, or a list of values. You can use ranges and lists to cover a wider range of possibilities
990
- in a compact way, but you can also use single values if necessary. The items in list can be long or short, does not matter, both can be in lists. Some examples of good fortmatting:
991
- * CORRECT example: ["Very rich", "Rich", "Middle class", "Poor"]
992
- * CORRECT example: "Rich"
993
- * WRONG example: ["Very rich or Rich or Middle class or Poor"]
994
- * WRONG example: ["Rich"]
995
- - **Always** try very hard to use a list of values (two or more values) or range of values (min - max), to make the sampling plan at once concise and rich. In doing so, make sure that each *sampling directive* is truly representative
996
- of some segment of the target population, and not just a random collection of values.
997
- - You MUST make M as large as necessary to contemplate the target population, ideally M >= min_sampling_directives (but M <= max_sampling_directives, if specified), to ensure a rich and varied sampling of the population.
998
- * Note that this means the maximum *sampling directive* "id" (call it max_id) used in the *sampling plan* is such that: max_id >= min_sampling_directives; max_id <= max_sampling_directives (if specified).
999
- - The sampled population MUST be representative of the target population.
1000
- - The sampled population MUST be realistic.
1001
- - You can set the quantity of each *sampling directive* to 1 if necessary to ensure a varied and representative sampling.
1002
- - All values chosen from the sampling dimensions must be copied IN FULL in the "sampled_values" map, so that the sampled values are concrete and specific.
1003
- The sample plan is supposed to be self-contained, therefore it MUST have all details necessary to sample the people later, without needing to refer back to the sampling dimensions.
1004
- - You should include as many *sampling directives* as necessary to cover the sampling of N total people (the sum of all quantities). When in doubt,
1005
- **always** add more *sampling directives* (i.e., make M larger) up to max_sampling_directives (if specified), as this will ensure you cover the requested N people.
1006
- - In particular, make sure both POSITIVE and NEGATIVE possibilities of the various characteristics are covered (e.g., rich vs poor, likes sugar vs doesn't like sugar, enthusiastic vs apathetic).
1007
- This is to ensure any bias (towards positive or negative characteristics) is minimized, and the sampling space is rich enough to generate people with a wide range of characteristics.
1008
- - The sampling space description should be used to guide the sampling, so that the sampled population is consistent with it.
1009
- - You should ensure that the quantity of requested samples in each *sampling directive* is proportional to their presumed size in the target population.
1010
- That is to say, combinations of dimensions that are more common in the target population should be sampled more often. If you don't know, make a guess.
1011
- - If max_quantity_per_sample_directive is specified, you must ensure that no single *sampling directive* exceeds this quantity. This is to ensure we get more variation and not just a few large groups.
1012
- - You can rely on your built-in knowledge or make educated guesses about such quantities and proportions to ensure that the sample is representative of the population.
1013
- * Note that this means for any quantity_i: quantity_i >= 1; quantity_i <= max_quantity_per_sample_directive (if specified).
1014
- - The sum of all quantities in the output **must** be equal to N, the number of people to sample in total.
1015
- - You can always add extra *sampling directives* (up to max_sampling_directives if specified) to ensure the total of N people is reached.
1016
- - It is acceptable for the sampling plan to generate more than N people, but NEVER less than N. So if unsure generate MORE people, never less.
1017
-
1018
- ## Example
1019
- Given the following INPUT sampling dimensions:
1020
-
1021
- ```json
1022
- {
1023
- "sampling_space_description": "Young Western people of different liberal or intellectual professions."
1024
- "dimensions": [
1025
- {
1026
- "name": "age",
1027
- "range": [18, 30]
1028
- },
1029
- {
1030
- "name": "profession",
1031
- "values": ["Architect", "Financial Analyst", "Writer", "Art critic", "Lawyer", "Physician", "Accountant", ...]
1032
- },
1033
- {
1034
- "name": "country",
1035
- "values": ["USA", "Canada", "UK", "France", "Germany", "Italy", "Spain", "Portugal", "Netherlands", "Belgium", ...]
1036
- },
1037
-
1038
- {
1039
- "name": "personality_traits",
1040
- "values": {
1041
- "Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.": 0.12,
1042
- "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.": 0.18,
1043
- "Is highly ambitious, constantly setting challenging goals and pushing themselves to achieve more in both personal and professional spheres.": 0.15,
1044
- "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change.": 0.20,
1045
- "Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.": 0.08,
1046
- "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.": 0.06,
1047
- "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.": 0.07,
1048
- "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.": 0.05,
1049
- <... many more ...>
1050
- }
1051
- }
1052
-
1053
- (... more dimensions ...)
1054
-
1055
- ]
1056
- }
1057
-
1058
- An OUTPUT *sample plan* therefore is a LIST with the *sample plan*, where each element is a dictionary with a *sampling directive*. For example, an output based on the above dimensions could look like this:
1059
-
1060
- ```json
1061
- [
1062
- {
1063
- "id": 1,
1064
- "subpopulation_description": "Young Anglo-Saxon professionals with their stereotypical ambition and drive.",
1065
- "sampled_values": {
1066
- "age": [22, 30],
1067
- "profession": ["Financial Analyst", "Lawyer", "Physician", "Accountant", ...],
1068
- "country": ["USA", "UK", "Canada"],
1069
- "personality_traits": ["Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.",
1070
- "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change",
1071
- "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.",
1072
- "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life."]
1073
- },
1074
- "quantity": 10
1075
- },
1076
- {
1077
- "id": 2,
1078
- "subpopulation_description": "Young European professionals with a focus on creativity and innovation and their occasional existential crises.",
1079
- "sampled_values": {
1080
- "age": [21, 30],
1081
- "profession": ["Architect", "Lawyer", "Writer", "Physician", "Art critic", ...],
1082
- "country": ["France", "Germany", "Italy", "Spain"],
1083
- "personality_traits": ["Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.",
1084
- "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.",
1085
- "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.",
1086
- "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.]"
1087
- },
1088
- "quantity": 5
1089
- },
1090
- ...
1091
- ]
1092
- ```
1093
-
1094
-
1095
- Args:
1096
- n (int): The number of elements to sample in total. This number will be distributed across the dimensions proportionally
1097
- to the presumed size the target population.
1098
- sampling_dimensions (dict): The dimensions of the sampling space.
1099
- max_quantity_per_sample_directive (int, optional): The maximum quantity of samples that can be specified in a single sampling directive. This is to ensure that the sampling plan is diverse and not biased towards a few large groups.
1100
- min_sampling_directives (int, optional): The minimum number of sampling directives to generate. This is to ensure that the sampling plan is rich and varied.
1101
- max_sampling_directives (int, optional): The maximum number of sampling directives to generate. This is to ensure that the sampling plan is not overly complex and remains manageable.
1102
-
1103
- Returns:
1104
- list: A LIST with the *sample plan*, where each element is a dictionary with a *sampling directive*, as described above.
1105
- """
1106
- # the body of this method is handled by the @llm decorator.
1107
-
1108
- @transactional()
1109
- def _flatten_sampling_plan(self, sampling_plan:dict) -> list:
1110
- """
1111
- Given a sample plan, flattens it into a list of samples in such a way that the number of times each sample appears
1112
- correspond to what was specified in the plan. The order is random to avoid bias.
1113
-
1114
- For example, an input sample plan could look like this:
1115
-
1116
- ```json
1117
- [
1118
- {
1119
- "sampled_values": {
1120
- "age": 25,
1121
- "profession": "Architect",
1122
- "country": "USA"
1123
- },
1124
- "quantity": 8
1125
- },
1126
- {
1127
- "sampled_values": {
1128
- "age": 27,
1129
- "profession": "Lawyer",
1130
- "country": "Canada"
1131
- },
1132
- "quantity": 1
1133
- },
1134
- ...
1135
- ]
1136
- ```
1137
-
1138
- And the output would be something like:
1139
-
1140
- ```python
1141
- [{"age": 25, "profession": "Architect", "country": "USA"},
1142
- {"age": 27, "profession": "Lawyer", "country": "Canada"},
1143
- ...
1144
- {"age": 25, "profession": "Architect", "country": "USA"}]
1145
- ```
1146
-
1147
- Args:
1148
- sampling_plan (dict): The sample plan to flatten.
1149
-
1150
- Returns:
1151
- list: A list of samples, where each sample is a dictionary with the sampled values.
1152
- """
1153
- samples = []
1154
- for sample in sampling_plan:
1155
- if "quantity" not in sample:
1156
- logger.warning(f"Sample in sampling plan does not have a 'quantity' field: {sample}. Assuming 1.")
1157
- qty = 1
1158
- else:
1159
- qty = int(sample["quantity"])
1160
-
1161
- for _ in range(qty):
1162
- # we need to copy the sample to avoid adding the original sample multiple times,
1163
- # which would cause problems later when we modify the individual flattened samples
1164
- cc_sample = copy.deepcopy(sample["sampled_values"])
1165
-
1166
- samples.append(cc_sample)
1167
-
1168
- # randomize
1169
- random.shuffle(samples) #inplace
1170
- return samples
1171
-
1172
- @transactional()
1173
- def _unique_full_name(self, already_generated_names: list, context:str=None) -> str:
1174
- # a technicality - we need to use an auxiliary method to be able to use the transactional decorator effectively.
1175
- # TODO update this somehow to avoid this cumbersome workaround.
1176
-
1177
- return self._aux_unique_full_name(already_generated_names=already_generated_names, context=context)
1178
-
1179
-
1180
- @utils.llm(temperature=1.5, presence_penalty=0.5, frequency_penalty=0.5)
1181
- def _aux_unique_full_name(self, already_generated_names: list, context:str=None) -> str:
1182
- """
1183
- Generates a unique full name for a person. The full name must not be in the list of already generated names.
1184
- If necessary, you can generate a longer name to ensure it is new. You can also try tweaking the spelling or
1185
- adding more surnames, so that the name is unique. However, the name **must** sound realistic and not be too far-fetched,
1186
- not sound as if it was made up.
1187
-
1188
- The final result is only the name, nothing else:
1189
-
1190
- "Some name here" ---> correct as it is just a name, nothing else
1191
- "Some name here, because ..." ---> incorrect as it contains a reason
1192
- "Some name here." ---> incorrect as it contains punctuation
1193
- "Name: Some name here" ---> incorrect as it contains a label
1194
- "Some name here, some other name here" ---> incorrect as it contains more than one name
1195
-
1196
- An optional context can be provided to guide the name generation, so that it is a realistic name for the context. For example, we know that different socio-economic classes have different naming conventions, so the context can be used to guide the name generation.
1197
-
1198
- Regarding the `already_generated_names`, you must:
1199
- - NEVER generate a name that is already in the list of already generated names.
1200
- - The names in `already_generated_names` ARE NOT examples of names to generate. They are just names that have already been generated and should not be repeated. You should generate new names regardless of the names in `already_generated_names`, the only constraint is that the new names should not be in the list of already generated names.
1201
- - In particular, you are not to generate a similar name to that of those in `already_generated_names`, you are **not** building some kind of
1202
- logical sequence. Each name must be independent of the others.
1203
-
1204
- ## Example
1205
-
1206
- **Input:**
1207
- already_generated_names: ["John Doe", "Jane Smith", "Alice Brown"]
1208
- context: { 'age': 25, 'profession': 'Architect', 'country': 'USA' }
1209
-
1210
- **Output:**
1211
- "Michael Johnson"
1212
-
1213
- Note that:
1214
- - The name "Michael Johnson" is not in the list of already generated names.
1215
- - The ouput consists only of a name, nothing else.
1216
-
1217
- Args:
1218
- already_generated_names (list): The list of already generated names.
1219
- context (str): The context in which the name is being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
1220
-
1221
- Returns:
1222
- str: A unique full name for a person.
1223
- """
1224
- # the body of this method is handled by the @llm decorator
1225
-
1226
- @transactional()
1227
- def _unique_full_names(self, n:int, already_generated_names: list, context:str=None) -> list:
1228
- """
1229
- Generates a list of n unique full names for people. The full names must not be in the list of already generated names.
1230
-
1231
- Args:
1232
- n (int): The number of names to generate.
1233
- already_generated_names (list): The list of already generated names.
1234
- context (str): The context in which the names are being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
1235
- """
1236
-
1237
- logger.debug(f"Will generate {n} unique full names for people. Already generated names: {already_generated_names}")
1238
-
1239
- names = []
1240
-
1241
- if n > 0:
1242
- # let's split the n in smaller chunks to make the model's job easier
1243
- chunk_size = min(10, n) # we generate at most 10 names at a time, to avoid overwhelming the model
1244
- chunks = math.ceil(n/chunk_size)
1245
-
1246
- forbidden_names = copy.deepcopy(already_generated_names)
1247
-
1248
-
1249
- max_iterations = chunks * 10
1250
- cur_iterations = 0
1251
-
1252
- while len(names) < n and cur_iterations < max_iterations:
1253
- logger.debug(f"Currently already generated names: {forbidden_names}")
1254
- logger.debug(f"Iteration {cur_iterations} - Generating {chunk_size} names. Currently have {len(names)} names. Max iterations to be allowed: {max_iterations}")
1255
- try:
1256
- temp_names = utils.try_function(\
1257
- lambda: \
1258
- self._aux_unique_full_names(n=chunk_size ,
1259
- already_generated_names=forbidden_names,
1260
- context=context),
1261
-
1262
- # checks that some new name was produced
1263
- postcond_func = lambda result: len(set(forbidden_names).intersection(result)) < len(result),
1264
- retries=3)
1265
-
1266
- # add the new names to the names list, removing any duplicates from their combination
1267
- names = list(set(names + temp_names))
1268
- forbidden_names += names
1269
- except Exception as e:
1270
- logger.error(f"Error generating names: {e}")
1271
- # if we have an error, we just skip this iteration and try again
1272
- # but we need to increment the number of iterations anyway
1273
-
1274
- cur_iterations += 1
1275
-
1276
- if cur_iterations >= max_iterations and len(names) < n:
1277
- logger.error(f"Could not generate the requested number of names after {max_iterations} iterations. Moving on with the {len(names)} names generated.")
1278
-
1279
- DeepPersonaFactory.all_unique_names = list(set(DeepPersonaFactory.all_unique_names + names))
1280
-
1281
- return names
1282
-
1283
- @utils.llm(temperature=1.9, presence_penalty=0.5, frequency_penalty=0.5)
1284
- def _aux_unique_full_names(self, n:int, already_generated_names: list, context:str=None) -> list:
1285
- """
1286
- Generates a list of n unique full names for people. The full names must not be in the list of already generated names. You **must** consider **all** reasononable options for names,
1287
- not only the common or popular. To ensure that fresh names are really new and do not appear in the list of already generated ones, if necessary you can:
1288
- - generate longer names to ensure they are new.
1289
- - try tweaking the spelling or adding more surnames, so that the names are unique.
1290
- - add unusual names or surnames, so that the names are unique.
1291
- - as a very last resort, you can append a number to the name, so that it is unique, despote being a bit less realistic.
1292
-
1293
- Except for the latter option, the names **must** sound realistic and not be too far-fetched, not sound as if they were made up.
1294
-
1295
- You **must** generate at least n names, and they **must** all be unique. If necessary, to ensure you get at least n names, you can try to generate more than n,
1296
- but **never** less, unless you need to avoid a repeated name. If forced to choose, you always prefer to generate unique names, even if that means generating less than n names.
1297
-
1298
- The final result is only the list of names, nothing else:
1299
-
1300
- ["Some name here"] ---> correct as it is just a list with a single name, nothing else
1301
- ["Some name here, some other name here"] ---> correct as it is a list of names
1302
- ["Some name here, because ..."] ---> incorrect as it contains a reason
1303
- ["Some name here."] ---> incorrect as it contains punctuation
1304
- ["Name: Some name here"] ---> incorrect as it contains a label
1305
-
1306
- An optional context can be provided to guide the name generation, so that it is a realistic name for the context. For example, we know that different socio-economic classes have different naming conventions,
1307
- so the context can be used to guide the name generation. In particular, follow these rules regarding the context:
1308
- - If a country is specified, the names should be typical for that country.
1309
-
1310
- Regarding the `already_generated_names`, you must:
1311
- - NEVER generate a name that is already in the list of already generated names.
1312
- - The names in `already_generated_names` ARE NOT examples of names to generate. They are just names that have already been generated and should not be repeated. You should generate new names regardless of the names in `already_generated_names`, the only constraint is that the new names should not be in the list of already_generated_names.
1313
- - In particular, you are not to generate a similar name to that of those in `already_generated_names`, you are **not** building some kind of logical sequence. Each name must be independent of the others.
1314
-
1315
- ## Example
1316
-
1317
- **Input:**
1318
- n: 6
1319
- already_generated_names: ["John Doe", "Jane Smith", "Alice Brown"]
1320
- context: "Young Americans of different liberal professions"
1321
- **Output:**
1322
- ["Michael Johnson", "Sarah Williams", "David Gates", "Jennifer Davis", "Robert J. Wilson", "Anna Kerr"]
1323
-
1324
- Note that:
1325
- - The names are not in the list of already generated names.
1326
- - The ouputs consist only of a list of names, nothing else.
1327
- - The output length is exactly 6, which is the requested number of names. There could be a bit more names generated, but never less.
1328
-
1329
- Args:
1330
- n (int): The number of names to generate.
1331
- already_generated_names (list): The list of already generated names.
1332
- context (str): The context in which the names are being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
1333
-
1334
- Returns:
1335
- list: A list of n unique full names for people. These names NEVER repeat names in the list of already generated names.
1336
- """
1337
- # the body of this method is handled by the @llm decorator. Below we provide a post-processing function that is
1338
- # applied to the LLM output, to ensure that the names are unique.
1339
-
1340
- return lambda names: list(set(names))
1341
-
1342
- @transactional()
1343
- def _aux_model_call(self, messages, temperature, frequency_penalty, presence_penalty):
1344
- """
1345
- Auxiliary method to make a model call. This is needed in order to be able to use the transactional decorator,
1346
- due too a technicality - otherwise, the agent creation would be skipped during cache reutilization, and
1347
- we don't want that.
1348
- """
1349
- return openai_utils.client().send_message(messages,
1350
- temperature=temperature,
1351
- frequency_penalty=frequency_penalty,
1352
- presence_penalty=presence_penalty,
1353
- response_format={"type": "json_object"})
1354
-
1355
- def _generate_deep_persona_internal(self, initial_spec: dict) -> dict:
1356
- """
1357
- Performs a second API call to enrich the persona with a depth of 350 attributes.
1358
- """
1359
- logger.info(f"Enriching persona {initial_spec.get('name')} to deep persona (depth 350)...")
1360
-
1361
- prompt = f"""
1362
- You are an expert persona generator. You have been provided with an initial persona profile:
1363
- {json.dumps(initial_spec, indent=4)}
1364
-
1365
- TASK:
1366
- Take all the attributes from this initial profile and expand them significantly to reach a depth of 350 attributes/nuances.
1367
- The final profile must be incredibly detailed, authentic, and realistic.
1368
- Expand on every field: education, occupation, style, personality, preferences, beliefs, skills, behaviors, health, relationships, and other_facts.
1369
- Provide at least 50 detailed entries for each complex field (preferences, beliefs, other_facts).
1370
-
1371
- Rules:
1372
- - Maintain consistency with the initial profile.
1373
- - Output ONLY a valid JSON object.
1374
- - Use the same field structure as the input.
1375
- """
1376
-
1377
- messages = [
1378
- {"role": "system", "content": "You are a specialized system for creating ultra-deep, 350-attribute persona specifications."},
1379
- {"role": "user", "content": prompt}
1380
- ]
1381
-
1382
- # Use the Helmholtz client via send_message
1383
- message = self._aux_model_call(messages=messages, temperature=1.2, frequency_penalty=0.0, presence_penalty=0.0)
1384
-
1385
- if message is not None:
1386
- enriched_spec = utils.extract_json(message["content"])
1387
- return enriched_spec
1388
-
1389
- return initial_spec
1390
-
1391
- @transactional()
1392
- def _setup_agent(self, agent, configuration):
1393
- """
1394
- Sets up the agent with the necessary elements.
1395
- """
1396
- agent.include_persona_definitions(configuration)
1397
-
1398
- # does not return anything, as we don't want to cache the agent object itself.
1399
-
1400
- @transactional()
1401
- @utils.llm(temperature=0.3, frequency_penalty=-0.1, presence_penalty=-0.1, enable_json_output_format=False)
1402
- def _generate_name_for_sample(self, sample_characteristics: dict, already_generated_names: list) -> str:
1403
- """
1404
- Generates a single full name for a person based on their complete sample characteristics, such that
1405
- it is as appropriate as possible to all characteristics, not just gender.
1406
- This name MUST BE UNIQUE and not appear in the already_generated_names list, though variations of the
1407
- same name are allowed.
1408
-
1409
- You must generate a realistic full name that is appropriate for the given sample characteristics.
1410
- Consider ALL the characteristics provided, including but not limited to:
1411
- - Gender
1412
- - Age or age range
1413
- - Country/nationality/ethnicity
1414
- - Socioeconomic status
1415
- - Profession
1416
- - Educational background
1417
- - Cultural background
1418
- - Any other relevant demographic or personal characteristics
1419
-
1420
- The name should:
1421
- - BE UNIQUE and not appear in the already_generated_names list
1422
- - Be realistic and culturally appropriate for the characteristics
1423
- - Sound natural and not made-up
1424
- - Be unique and not appear in the already_generated_names list
1425
- - Reflect the person's likely background (e.g., names common in their generation, culture, social class)
1426
-
1427
- If you need additional methods to ensure uniqueness, you can:
1428
- - Use longer or more uncommon names
1429
- - Include middle names or multiple surnames
1430
- - Use culturally appropriate name variations
1431
- - As a last resort, you can append a number, but this should be avoided.
1432
-
1433
-
1434
- In ANY CASE, you **must never**, NEVER, generate a name that already appears in the already_generated_names list.
1435
-
1436
- Return only the full name as a string, nothing else.
1437
-
1438
- ## Example
1439
-
1440
- **Input:**
1441
- sample_characteristics: {
1442
- "gender": "female",
1443
- "age": 28,
1444
- "country": "Brazil",
1445
- "profession": "Software Engineer",
1446
- "socioeconomic_status": "middle class",
1447
- "education": "Computer Science degree"
1448
- }
1449
- already_generated_names: ["João Silva", "Maria Santos", "Ana Costa"]
1450
-
1451
- **Output:**
1452
- "Camila Rodrigues"
1453
-
1454
- Args:
1455
- sample_characteristics (dict): The complete characteristics of the sample, including demographics, profession, etc.
1456
- already_generated_names (list): The list of already generated names to avoid duplicates. The new name MUST NOT be in this list.
1457
-
1458
- Returns:
1459
- str: A single full name appropriate for the sample characteristics.
1460
- """
1461
- # the body of this method is handled by the @llm decorator
1462
 
 
1
+ from gradio_client import Client
2
  import os
3
  import json
4
  import chevron
 
31
  def __init__(self, sampling_space_description:str=None, total_population_size:int=None, context:str=None, simulation_id:str=None):
32
  """
33
  Initialize a DeepPersonaFactory instance.
 
 
 
 
 
 
 
34
  """
35
  super().__init__(simulation_id)
36
+ self.person_prompt_template_path = os.path.join(os.path.dirname(__file__), 'prompts/deep_persona.mustache')
37
  self.context_text = context
38
  self.sampling_space_description = sampling_space_description
39
  self.population_size = total_population_size
 
42
  self.sampling_plan = None
43
  self.remaining_characteristics_sample = None
44
 
45
+ self.generated_minibios = []
46
  self.generated_names = []
47
 
48
+ def _parse_particularities_for_deeppersona(self, agent_particularities: str) -> dict:
 
 
49
  """
50
+ Parses the agent particularities string into the 10 fields required by DeepPersona.
 
 
 
 
 
 
 
51
  """
52
+ prompt = f"""
53
+ Parse the following agent description into a JSON object with these fields:
54
+ "name", "age", "gender", "occupation", "city", "country", "values", "attitude", "life_story", "interests".
55
 
56
+ Description:
57
+ {agent_particularities}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ If a field is missing, provide a realistic default or leave it as "Unknown".
60
+ Return ONLY the JSON.
61
+ """
62
+ messages = [{"role": "system", "content": "You are a precise data extractor."},
63
+ {"role": "user", "content": prompt}]
64
 
65
+ message = self._aux_model_call(messages, temperature=0.1, frequency_penalty=0.0, presence_penalty=0.0)
66
+ return utils.extract_json(message["content"]) if message else {}
67
 
68
+ def _generate_via_deeppersona(self, info: dict, attribute_count: int, context_from_step1: str = None) -> str:
 
 
 
 
69
  """
70
+ Calls the DeepPersona API on Hugging Face.
 
 
 
 
 
 
71
  """
72
+ try:
73
+ client = Client("THzva/deeppersona-experience")
74
+
75
+ age = info.get("age", 30)
76
+ if isinstance(age, str) and not age.isdigit():
77
+ age = 30
78
+
79
+ custom_values = info.get("values", "")
80
+ if context_from_step1:
81
+ custom_values = f"ENRICHMENT CONTEXT (Level 100):\n{context_from_step1}\n\nORIGINAL VALUES:\n{custom_values}"
82
+
83
+ result = client.predict(
84
+ age=float(age),
85
+ gender=str(info.get("gender", "Unknown")),
86
+ occupation=str(info.get("occupation", "Professional")),
87
+ city=str(info.get("city", "Unknown")),
88
+ country=str(info.get("country", "Unknown")),
89
+ custom_values=str(custom_values),
90
+ custom_life_attitude=str(info.get("attitude", "Unknown")),
91
+ life_story=str(info.get("life_story", "Unknown")),
92
+ interests_hobbies=str(info.get("interests", "Unknown")),
93
+ attribute_count=float(attribute_count),
94
+ api_name="/generate_persona"
95
+ )
96
+ return result
97
+ except Exception as e:
98
+ logger.error(f"Error calling DeepPersona API: {e}")
99
+ raise
100
+
101
+ def _structure_deeppersona_result(self, text_profile: str, base_info: dict) -> dict:
102
+ """
103
+ Converts the unstructured DeepPersona text profile into structured JSON format.
104
  """
105
+ prompt = f"""
106
+ Convert the following Markdown/Text persona profile into a structured JSON specification.
107
+
108
+ Text Profile:
109
+ {text_profile}
110
+
111
+ Base Info (use if missing in profile):
112
+ {json.dumps(base_info, indent=4)}
113
+
114
+ The JSON must include exactly these fields:
115
+ - name
116
+ - age
117
+ - gender
118
+ - nationality
119
+ - residence
120
+ - education
121
+ - long_term_goals
122
+ - occupation
123
+ - style
124
+ - personality (include at least 10 traits and Big-5)
125
+ - preferences (include at least 20 details)
126
+ - beliefs (include at least 30 details)
127
+ - skills
128
+ - behaviors
129
+ - health
130
+ - relationships
131
+ - other_facts (at least 30 entries)
132
+
133
+ Ensure it is valid JSON. Return ONLY the JSON.
134
+ """
135
+ messages = [{"role": "system", "content": "You are a JSON structuring assistant."},
136
+ {"role": "user", "content": prompt}]
137
+
138
+ message = self._aux_model_call(messages, temperature=0.3, frequency_penalty=0.0, presence_penalty=0.0)
139
+ return utils.extract_json(message["content"]) if message else None
140
+
141
+ @transactional()
142
+ def _aux_model_call(self, messages, temperature, frequency_penalty, presence_penalty):
143
+ return openai_utils.client().send_message(messages,
144
+ temperature=temperature,
145
+ frequency_penalty=frequency_penalty,
146
+ presence_penalty=presence_penalty,
147
+ response_format={"type": "json_object"})
148
 
149
  def generate_person(self,
150
  agent_particularities:str=None,
 
154
  attempts:int=10,
155
  post_processing_func=None,
156
  deep_persona:bool=True) -> DeepPersona:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  logger.debug(f"Starting the person generation based these particularities: {agent_particularities}")
158
  fresh_agent_name = None
159
 
 
160
  if self.population_size is not None:
 
161
  with concurrent_agent_generataion_lock:
162
  if self.remaining_characteristics_sample is None:
 
163
  self.initialize_sampling_plan()
164
 
 
 
 
165
  with concurrent_agent_generataion_lock:
166
  if len(self.remaining_characteristics_sample) == 0:
 
167
  return None
 
168
  else:
169
  sampled_characteristics = self.remaining_characteristics_sample.pop()
 
170
 
171
  if agent_particularities is not None:
172
+ agent_particularities = f"Primary: {agent_particularities}. Sampled: {json.dumps(sampled_characteristics)}"
 
 
 
 
 
 
 
 
 
 
 
 
173
  else:
174
+ agent_particularities = json.dumps(sampled_characteristics)
175
+ else:
 
 
 
 
 
176
  with concurrent_agent_generataion_lock:
177
  fresh_agent_name = self._unique_full_name(already_generated_names=DeepPersonaFactory._all_used_and_precomputed_names(),
178
  context=self.context_text)
179
 
180
  if agent_particularities is not None:
181
+ agent_particularities = f"Primary: {agent_particularities}. Name: {fresh_agent_name}"
 
 
 
 
 
 
 
 
 
182
  else:
183
  agent_particularities = f"Full name: {fresh_agent_name}"
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  def aux_generate(attempt):
187
+ try:
188
+ base_info = self._parse_particularities_for_deeppersona(agent_particularities)
189
+ logger.info(f"DeepPersona Step 1: Generating profile at depth 100...")
190
+ profile_step1 = self._generate_via_deeppersona(base_info, attribute_count=100)
191
+ logger.info(f"DeepPersona Step 2: Enriching profile to depth 350...")
192
+ profile_step2 = self._generate_via_deeppersona(base_info, attribute_count=350, context_from_step1=profile_step1)
193
+ logger.info(f"Structuring DeepPersona output into JSON...")
194
+ result = self._structure_deeppersona_result(profile_step2, base_info)
195
+ if result and not self._is_name_already_assigned(result.get("name", "")):
 
 
 
 
 
 
 
 
 
196
  return result
197
+ except Exception as e:
198
+ logger.error(f"DeepPersona generation failed: {e}")
199
+ return None
200
 
 
 
201
  agent_spec = None
202
  attempt = 0
203
  while agent_spec is None and attempt < attempts:
204
+ attempt += 1
205
+ agent_spec = aux_generate(attempt=attempt)
 
 
 
206
 
 
207
  if agent_spec is not None:
 
 
 
 
 
 
 
 
208
  with concurrent_agent_generataion_lock:
209
  person = DeepPersona(agent_spec["name"])
210
  self._setup_agent(person, agent_spec)
 
215
  self.generated_names.append(person.get("name"))
216
 
217
  return person
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ return None
220
 
221
+ @transactional()
222
+ def _setup_agent(self, agent, configuration):
223
+ agent.include_persona_definitions(configuration)
 
 
224
 
225
+ @transactional()
226
+ def _unique_full_name(self, already_generated_names: list, context:str=None) -> str:
227
+ return self._aux_unique_full_name(already_generated_names=already_generated_names, context=context)
 
 
 
 
228
 
229
+ @utils.llm(temperature=1.5, presence_penalty=0.5, frequency_penalty=0.5)
230
+ def _aux_unique_full_name(self, already_generated_names: list, context:str=None) -> str:
231
+ pass
 
 
 
232
 
233
+ @classmethod
234
+ def _all_used_and_precomputed_names(cls) -> list:
235
+ return DeepPersona.all_agents_names() + cls.all_unique_names
236
+
237
+ def _is_name_already_assigned(self, name:str) -> bool:
238
+ return name in DeepPersona.all_agents_names()
239
 
240
  def generate_people(self, number_of_people:int=None,
241
  agent_particularities:str=None,
 
247
  parallelize=None,
248
  verbose:bool=False,
249
  deep_persona:bool=True) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  if number_of_people is None:
 
 
251
  number_of_people = self.population_size
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
 
 
 
 
 
 
 
 
 
 
 
253
  people = []
254
+ for i in range(number_of_people):
 
 
 
 
 
 
 
 
 
 
255
  person = self.generate_person(agent_particularities=agent_particularities,
 
 
 
256
  attempts=attempts,
257
  post_processing_func=post_processing_func,
258
  deep_persona=deep_persona)
259
+ if person:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  people.append(person)
 
 
 
 
 
 
 
261
  return people
262
 
 
 
 
263
  def initialize_sampling_plan(self):
264
+ self.remaining_characteristics_sample = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
deeppersona/factory/deep_persona_factory_base.py CHANGED
@@ -6,12 +6,10 @@ import deeppersona.utils as utils
6
 
7
  class DeepPersonaFactoryBase:
8
  """
9
- A base class for various types of factories. This is important because it makes it easier to extend the system, particularly
10
- regarding transaction caching.
11
  """
12
 
13
  # common randomizer used for samplings, with a default initial seed to allow for reproducibility.
14
- # subclases can use this directly as well.
15
  randomizer = random.Random(42)
16
 
17
  # A dict of all factories created so far.
@@ -19,12 +17,9 @@ class DeepPersonaFactoryBase:
19
 
20
  def __init__(self, simulation_id:str=None) -> None:
21
  """
22
- Initialize a DeepPersonaFactory instance.
23
-
24
- Args:
25
- simulation_id (str, optional): The ID of the simulation. Defaults to None.
26
  """
27
- self.name = f"Factory {utils.fresh_id(self.__class__.__name__)}" # we need a name, but no point in making it customizable
28
  self.simulation_id = simulation_id
29
 
30
  DeepPersonaFactoryBase.add_factory(self)
@@ -34,20 +29,12 @@ class DeepPersonaFactoryBase:
34
 
35
  @staticmethod
36
  def set_simulation_for_free_factories(simulation):
37
- """
38
- Sets the simulation if it is None. This allows free environments to be captured by specific simulation scopes
39
- if desired.
40
- """
41
  for factory in DeepPersonaFactoryBase.all_factories.values():
42
  if factory.simulation_id is None:
43
  simulation.add_factory(factory)
44
 
45
  @staticmethod
46
  def add_factory(factory):
47
- """
48
- Adds a factory to the list of all factories. Factory names must be unique,
49
- so if an factory with the same name already exists, an error is raised.
50
- """
51
  if factory.name in DeepPersonaFactoryBase.all_factories:
52
  raise ValueError(f"Factory names must be unique, but '{factory.name}' is already defined.")
53
  else:
@@ -55,42 +42,19 @@ class DeepPersonaFactoryBase:
55
 
56
  @classmethod
57
  def clear_factories(cls):
58
- """
59
- Clears the global list of all factories.
60
- """
61
  cls.all_factories = {}
62
  cls._clear_factories()
63
 
64
  @classmethod
65
  def _clear_factories(cls):
66
- """
67
- Additional cleanup actions can be performed here by subclasses if needed.
68
- """
69
  pass
70
 
71
- ################################################################################################
72
- # Caching mechanisms
73
- #
74
- # Factories can also be cached in a transactional way. This is necessary because the agents they
75
- # generate can be cached, and we need to ensure that the factory itself is also cached in a
76
- # consistent way.
77
- ################################################################################################
78
-
79
  def encode_complete_state(self) -> dict:
80
- """
81
- Encodes the complete state of the factory. If subclasses have elmements that are not serializable, they should override this method.
82
- """
83
-
84
  state = copy.deepcopy(self.__dict__)
85
  return state
86
 
87
  def decode_complete_state(self, state:dict):
88
- """
89
- Decodes the complete state of the factory. If subclasses have elmements that are not serializable, they should override this method.
90
- """
91
  state = copy.deepcopy(state)
92
-
93
  self.__dict__.update(state)
94
  return self
95
-
96
 
 
6
 
7
  class DeepPersonaFactoryBase:
8
  """
9
+ A base class for various types of factories.
 
10
  """
11
 
12
  # common randomizer used for samplings, with a default initial seed to allow for reproducibility.
 
13
  randomizer = random.Random(42)
14
 
15
  # A dict of all factories created so far.
 
17
 
18
  def __init__(self, simulation_id:str=None) -> None:
19
  """
20
+ Initialize a DeepPersonaFactoryBase instance.
 
 
 
21
  """
22
+ self.name = f"Factory {utils.fresh_id(self.__class__.__name__)}"
23
  self.simulation_id = simulation_id
24
 
25
  DeepPersonaFactoryBase.add_factory(self)
 
29
 
30
  @staticmethod
31
  def set_simulation_for_free_factories(simulation):
 
 
 
 
32
  for factory in DeepPersonaFactoryBase.all_factories.values():
33
  if factory.simulation_id is None:
34
  simulation.add_factory(factory)
35
 
36
  @staticmethod
37
  def add_factory(factory):
 
 
 
 
38
  if factory.name in DeepPersonaFactoryBase.all_factories:
39
  raise ValueError(f"Factory names must be unique, but '{factory.name}' is already defined.")
40
  else:
 
42
 
43
  @classmethod
44
  def clear_factories(cls):
 
 
 
45
  cls.all_factories = {}
46
  cls._clear_factories()
47
 
48
  @classmethod
49
  def _clear_factories(cls):
 
 
 
50
  pass
51
 
 
 
 
 
 
 
 
 
52
  def encode_complete_state(self) -> dict:
 
 
 
 
53
  state = copy.deepcopy(self.__dict__)
54
  return state
55
 
56
  def decode_complete_state(self, state:dict):
 
 
 
57
  state = copy.deepcopy(state)
 
58
  self.__dict__.update(state)
59
  return self
 
60
 
deeppersona/profiling.py CHANGED
@@ -75,7 +75,7 @@ class Profiler:
75
  for agent in agents:
76
  if isinstance(agent, DeepPersona):
77
  # Extract data from DeepPersona object
78
- agent_data = self._extract_deeppersona_data(agent)
79
  else:
80
  agent_data = agent.copy()
81
 
@@ -83,7 +83,7 @@ class Profiler:
83
 
84
  return processed_agents
85
 
86
- def _extract_deeppersona_data(self, agent: DeepPersona) -> Dict[str, Any]:
87
  """Extract comprehensive data from a DeepPersona object."""
88
  data = {}
89
 
 
75
  for agent in agents:
76
  if isinstance(agent, DeepPersona):
77
  # Extract data from DeepPersona object
78
+ agent_data = self._extract_tinyperson_data(agent)
79
  else:
80
  agent_data = agent.copy()
81
 
 
83
 
84
  return processed_agents
85
 
86
+ def _extract_tinyperson_data(self, agent: DeepPersona) -> Dict[str, Any]:
87
  """Extract comprehensive data from a DeepPersona object."""
88
  data = {}
89
 
deeppersona/simulation_manager.py CHANGED
@@ -4,7 +4,7 @@ import threading
4
  from datetime import datetime
5
  from deeppersona.agent import DeepPersona
6
  from deeppersona.social_network import NetworkTopology
7
- from deeppersona.environment.social_deep_world import SocialDeepWorld, SimulationResult
8
  from deeppersona.agent.social_types import Content
9
  from deeppersona.ml_models import EngagementPredictor
10
  from deeppersona.content_generation import ContentVariantGenerator
@@ -18,7 +18,7 @@ class SimulationConfig:
18
  self.user_id = kwargs.get("user_id")
19
 
20
  class Simulation:
21
- def __init__(self, id: str, config: SimulationConfig, world: SocialDeepWorld, personas: List[DeepPersona], network: NetworkTopology):
22
  self.id = id
23
  self.config = config
24
  self.world = world
@@ -43,7 +43,7 @@ class SimulationManager:
43
  if focus_group_name and focus_group_name in self.focus_groups:
44
  personas = self.focus_groups[focus_group_name]
45
  else:
46
- from deeppersona.factory import DeepPersonaFactory
47
  factory = DeepPersonaFactory(
48
  context=config.name,
49
  total_population_size=config.persona_count
@@ -58,7 +58,7 @@ class SimulationManager:
58
  network = net_gen.generate_small_world_network(config.persona_count, 4, 0.1)
59
 
60
  # Create world
61
- world = SocialDeepWorld(config.name, network=network)
62
  for persona in personas:
63
  world.add_agent(persona)
64
 
 
4
  from datetime import datetime
5
  from deeppersona.agent import DeepPersona
6
  from deeppersona.social_network import NetworkTopology
7
+ from deeppersona.environment.social_deep_world import SocialTinyWorld, SimulationResult
8
  from deeppersona.agent.social_types import Content
9
  from deeppersona.ml_models import EngagementPredictor
10
  from deeppersona.content_generation import ContentVariantGenerator
 
18
  self.user_id = kwargs.get("user_id")
19
 
20
  class Simulation:
21
+ def __init__(self, id: str, config: SimulationConfig, world: SocialTinyWorld, personas: List[DeepPersona], network: NetworkTopology):
22
  self.id = id
23
  self.config = config
24
  self.world = world
 
43
  if focus_group_name and focus_group_name in self.focus_groups:
44
  personas = self.focus_groups[focus_group_name]
45
  else:
46
+ from deeppersona.factory.deep_persona_factory import DeepPersonaFactory
47
  factory = DeepPersonaFactory(
48
  context=config.name,
49
  total_population_size=config.persona_count
 
58
  network = net_gen.generate_small_world_network(config.persona_count, 4, 0.1)
59
 
60
  # Create world
61
+ world = SocialTinyWorld(config.name, network=network)
62
  for persona in personas:
63
  world.add_agent(persona)
64
 
deeppersona/utils/config.py CHANGED
@@ -59,7 +59,7 @@ def pretty_print_datetime():
59
  print(f"Current date and time (local): {now.strftime('%Y-%m-%d %H:%M:%S')}")
60
  print(f"Current date and time (UTC): {now_utc.strftime('%Y-%m-%d %H:%M:%S')}")
61
 
62
- def pretty_print_deeppersona_version():
63
  try:
64
  import importlib.metadata
65
  version = importlib.metadata.version("deeppersona")
 
59
  print(f"Current date and time (local): {now.strftime('%Y-%m-%d %H:%M:%S')}")
60
  print(f"Current date and time (UTC): {now_utc.strftime('%Y-%m-%d %H:%M:%S')}")
61
 
62
+ def pretty_print_tinytroupe_version():
63
  try:
64
  import importlib.metadata
65
  version = importlib.metadata.version("deeppersona")
pyproject.toml CHANGED
@@ -3,11 +3,11 @@ requires = ["setuptools>=61.0"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [tool.setuptools]
6
- packages = ["tinytroupe"]
7
  include-package-data = true
8
 
9
  [project]
10
- name = "tinytroupe"
11
  version = "0.5.2"
12
  authors = [
13
  { name="Paulo Salem", email="paulo.salem@microsoft.com" }
@@ -41,7 +41,7 @@ dependencies = [
41
  ]
42
 
43
  [project.urls]
44
- "Homepage" = "https://github.com/microsoft/tinytroupe"
45
 
46
  [tool.pytest.ini_options]
47
  pythonpath = [
@@ -56,4 +56,4 @@ markers = [
56
  "examples: mark a test as the execution of examples",
57
  "notebooks: mark a test as a more specific Jupyter notebook execution example",
58
  ]
59
- addopts = "--cov=tinytroupe --cov-report=html --cov-report=xml"
 
3
  build-backend = "setuptools.build_meta"
4
 
5
  [tool.setuptools]
6
+ packages = ["deeppersona"]
7
  include-package-data = true
8
 
9
  [project]
10
+ name = "deeppersona"
11
  version = "0.5.2"
12
  authors = [
13
  { name="Paulo Salem", email="paulo.salem@microsoft.com" }
 
41
  ]
42
 
43
  [project.urls]
44
+ "Homepage" = "https://github.com/microsoft/deeppersona"
45
 
46
  [tool.pytest.ini_options]
47
  pythonpath = [
 
56
  "examples: mark a test as the execution of examples",
57
  "notebooks: mark a test as a more specific Jupyter notebook execution example",
58
  ]
59
+ addopts = "--cov=deeppersona --cov-report=html --cov-report=xml"
requirements.txt CHANGED
@@ -20,8 +20,6 @@ matplotlib
20
  pydantic
21
  textdistance
22
  scipy
23
- transformers
24
  huggingface-hub>=0.33.5
25
  gradio_client
26
- fastapi
27
- uvicorn
 
20
  pydantic
21
  textdistance
22
  scipy
23
+ transformers==4.38.2
24
  huggingface-hub>=0.33.5
25
  gradio_client