Spaces:

AUXteam
/

tiny_factory

Paused

App Files Files Community

AUXteam commited on Mar 5

Commit

f5e9574

verified ·

1 Parent(s): da232a0

Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

Dockerfile +16 -14
README.md +21 -3
app.py +480 -143
config.ini +9 -4
deeppersona/__init__.py +1 -1
deeppersona/control.py +12 -12
deeppersona/environment/social_deep_world.py +1 -1
deeppersona/extraction/results_extractor.py +5 -5
deeppersona/factory/deep_persona_factory.py +131 -1328
deeppersona/factory/deep_persona_factory_base.py +3 -39
deeppersona/profiling.py +2 -2
deeppersona/simulation_manager.py +4 -4
deeppersona/utils/config.py +1 -1
pyproject.toml +4 -4
requirements.txt +1 -3

Dockerfile CHANGED Viewed

@@ -1,26 +1,28 @@
-FROM python:3.11-slim
-# Configure a non-root user specifically for HF Spaces
-RUN useradd -m -u 1000 user
 USER user
-# Set home to the user's home directory
-ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH
-# Change working directory
-WORKDIR $HOME/app
-# Install dependencies using the user
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --user -r requirements.txt
-# Copy the app files into the home directory, setting proper ownership
-COPY --chown=user . $HOME/app
-# Expose the standard port
 EXPOSE 7860
-# Run the FastAPI app using uvicorn
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.12-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Create a non-root user
+RUN useradd -m -u 1000 user
 USER user
+ENV PATH="/home/user/.local/bin:${PATH}"
+WORKDIR /app
+# Copy requirements and install
 COPY --chown=user requirements.txt .
 RUN pip install --no-cache-dir --user -r requirements.txt
+# Copy the rest of the application
+COPY --chown=user . .
+# Expose the HF port
 EXPOSE 7860
+# Run the application
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Tiny Factory
-emoji: 💻
 colorFrom: yellow
 colorTo: gray
 sdk: docker
@@ -8,4 +8,22 @@ app_port: 7860
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Deep Persona Factory
+emoji: 🎭
 colorFrom: yellow
 colorTo: gray
 sdk: docker
 pinned: false
 ---
+# Deep Persona Factory
+Deep Persona Factory is a specialized simulation engine for persona generation and social content testing.
+## Features
+- **Social Network Engine:** Graph-based modeling and influence propagation.
+- **Prediction Engine:** ML and LLM-based engagement scoring.
+- **Deep Persona Generation:** Sequential enrichment for high-fidelity character profiles.
+- **API Documentation:** Accessible via \`/api-docs\`.
+- **Health Check:** Accessible via \`/health\`.
+## API Documentation
+The application exposes a mandatory \`/api-docs\` endpoint providing Swagger UI for all available endpoints.
+## Local Setup
+\`\`\`bash
+pip install -r requirements.txt
+uvicorn app:app --host 0.0.0.0 --port 7860
+\`\`\`

app.py CHANGED Viewed

@@ -2,199 +2,536 @@ import sys
 import os
 import gradio as gr
 import json
-from fastapi import FastAPI
-import uvicorn
-from pydantic import BaseModel
-app = FastAPI()
-@app.get("/health")
-def health():
-    return {"status": "ok"}
-from fastapi.responses import RedirectResponse
-@app.get("/api-docs")
-def api_docs():
-    return RedirectResponse(url="/docs")
-class PersonaRequest(BaseModel):
-    business_description: str
-    customer_profile: str
-    num_personas: int = 1
-@app.post("/api/v1/generate_personas")
-def generate_personas_api(req: PersonaRequest):
-    return generate_personas(req.business_description, req.customer_profile, req.num_personas)
-def extract_persona_parameters(business_description: str, customer_profile: str) -> dict:
-    from tinytroupe.openai_utils import client
-    system_prompt = """
-    You are an expert persona parameter extractor.
-    Based on the provided business description and customer profile, you must deduce and generate 10 specific parameters needed for a deep persona generator.
-    The parameters are:
-    - `age` (float): The age of the persona.
-    - `gender` (str): The gender of the persona.
-    - `occupation` (str): The occupation of the persona.
-    - `city` (str): The city of the persona.
-    - `country` (str): The country of the persona.
-    - `custom_values` (str): The personal values of the persona.
-    - `custom_life_attitude` (str): The life attitude of the persona.
-    - `life_story` (str): A brief life story of the persona.
-    - `interests_hobbies` (str): Interests and hobbies of the persona.
-    - `attribute_count` (float): Attribute richness, default to 350.
-    You must return a valid JSON object containing exactly these keys.
-    """
-    user_prompt = f"Business Description: {business_description}\nCustomer Profile: {customer_profile}\n\nReturn the 10 parameters as JSON."
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_prompt}
-    ]
-    api_client = client()
-    response = api_client.send_message(messages, response_format={"type": "json_object"})
-    if response and "content" in response:
-        try:
-            # Attempt to parse it if the model returned string json
-            import json
-            import tinytroupe.utils as utils
-            extracted_json = utils.extract_json(response["content"])
-            # Ensure all keys are present
-            required_keys = ['age', 'gender', 'occupation', 'city', 'country', 'custom_values', 'custom_life_attitude', 'life_story', 'interests_hobbies', 'attribute_count']
-            # If extracting JSON list vs dict
-            if isinstance(extracted_json, list) and len(extracted_json) > 0:
-                extracted_json = extracted_json[0]
-            for key in required_keys:
-                if key not in extracted_json:
-                    # provide defaults for missing ones
-                    if key in ['age', 'attribute_count']:
-                        extracted_json[key] = 350 if key == 'attribute_count' else 30
-                    else:
-                        extracted_json[key] = "Unknown"
-            return extracted_json
-        except Exception as e:
-            print(f"Error parsing JSON from LLM: {e}")
-            pass
-    # Fallback
-    return {
-        "age": 30,
-        "gender": "Non-binary",
-        "occupation": "Professional",
-        "city": "Metropolis",
-        "country": "Country",
-        "custom_values": "Innovation, Community",
-        "custom_life_attitude": "Optimistic",
-        "life_story": "A standard professional background with a passion for their field.",
-        "interests_hobbies": "Technology, Reading",
-        "attribute_count": 350
-    }
 def generate_personas(business_description, customer_profile, num_personas, blablador_api_key=None):
     """
-    Generates a list of personas based on the provided inputs, utilizing a double
-    sequential generation pipeline:
-    1. Extract parameters from context via LLM.
-    2. Generate persona using deeppersona-experience via gradio client.
     """
     api_key_to_use = blablador_api_key or os.getenv("BLABLADOR_API_KEY")
     if not api_key_to_use:
         return {"error": "BLABLADOR_API_KEY not found. Please provide it in your API call or set it as a secret in the Space settings."}
     original_key = os.getenv("BLABLADOR_API_KEY")
-    os.environ["BLABLADOR_API_KEY"] = api_key_to_use
     try:
-        from gradio_client import Client
         num_personas = int(num_personas)
-        personas_data = []
-        # Step 1: Extract 10 parameters based on the high-level inputs
-        # For multiple personas, we could call this in a loop or once.
-        # The prompt implies we want to do it in a pipeline. We'll do it per persona or once based on the prompt.
-        # Let's do it per persona to generate distinct ones, passing an index or just relying on LLM variance.
-        # Connect to gradio client
-        # In a real scenario, the Hugging Face Token might be needed if the Space is private.
-        # But deeppersona-experience is public or assumed accessible.
-        client = Client("THzva/deeppersona-experience")
-        for i in range(num_personas):
-            # To get variety, we can append a note about variety to the profile
-            profile_with_variance = customer_profile + f"\n\nMake this persona distinct. Persona {i+1} of {num_personas}."
-            # Extract parameters using the LLM
-            params = extract_persona_parameters(business_description, profile_with_variance)
-            # Step 2: Call the Gradio API with the extracted parameters
-            result = client.predict(
-                age=float(params.get("age", 30)),
-                gender=str(params.get("gender", "Non-binary")),
-                occupation=str(params.get("occupation", "Professional")),
-                city=str(params.get("city", "Metropolis")),
-                country=str(params.get("country", "Country")),
-                custom_values=str(params.get("custom_values", "Innovation, Community")),
-                custom_life_attitude=str(params.get("custom_life_attitude", "Optimistic")),
-                life_story=str(params.get("life_story", "A standard professional background with a passion for their field.")),
-                interests_hobbies=str(params.get("interests_hobbies", "Technology, Reading")),
-                attribute_count=float(params.get("attribute_count", 350)),
-                api_name="/generate_persona"
-            )
-            # Note: The result from this API is a string (persona profile text)
-            personas_data.append({
-                "parameters_used": params,
-                "persona_profile": result
-            })
         return personas_data
     except Exception as e:
         return {"error": str(e)}
     finally:
         if original_key is None:
             if "BLABLADOR_API_KEY" in os.environ:
                 del os.environ["BLABLADOR_API_KEY"]
         else:
             os.environ["BLABLADOR_API_KEY"] = original_key
 with gr.Blocks() as demo:
-    gr.Markdown("<h1>Tiny Persona Generator</h1>")
     with gr.Row():
         with gr.Column():
             business_description_input = gr.Textbox(label="What is your business about?", lines=5)
             customer_profile_input = gr.Textbox(label="Information about your customer profile", lines=5)
-            num_personas_input = gr.Number(label="Number of personas to generate", value=1, minimum=1, step=1)
             blablador_api_key_input = gr.Textbox(
                 label="Blablador API Key (for API client use)",
                 visible=False
             )
             generate_button = gr.Button("Generate Personas")
         with gr.Column():
-            output_json = gr.JSON(label="Generated Personas")
     generate_button.click(
         fn=generate_personas,
         inputs=[business_description_input, customer_profile_input, num_personas_input, blablador_api_key_input],
         outputs=output_json,
         api_name="generate_personas"
     )
-app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import gradio as gr
 import json
+import glob
+from deeppersona.factory import DeepPersonaFactory
+from deeppersona.utils.semantics import select_best_persona, select_relevant_personas_utility
+from deeppersona.simulation_manager import SimulationManager, SimulationConfig
+from deeppersona.agent.social_types import Content
+from huggingface_hub import hf_hub_download, upload_file
+HF_TOKEN = os.getenv("HF_TOKEN") # Ensure this is set in Space secrets
+REPO_ID = "AUXteam/tiny_factory"
+PERSONA_BASE_FILE = "persona_base.json"
+simulation_manager = SimulationManager()
+def load_persona_base():
+    if not HF_TOKEN:
+        print("HF_TOKEN not found, persistence disabled.")
+        return []
+    try:
+        path = hf_hub_download(repo_id=REPO_ID, filename=PERSONA_BASE_FILE, repo_type="space", token=HF_TOKEN)
+        with open(path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    except Exception as e:
+        print(f"Error loading persona base: {e}")
+        return []
+def save_persona_base(personas):
+    if not HF_TOKEN:
+        print("HF_TOKEN not found, skipping upload.")
+        return
+    with open(PERSONA_BASE_FILE, 'w', encoding='utf-8') as f:
+        json.dump(personas, f, indent=4)
+    try:
+        upload_file(
+            path_or_fileobj=PERSONA_BASE_FILE,
+            path_in_repo=PERSONA_BASE_FILE,
+            repo_id=REPO_ID,
+            repo_type="space",
+            token=HF_TOKEN
+        )
+    except Exception as e:
+        print(f"Error saving persona base to Hub: {e}")
+# --- CHANGE 1: The function now accepts an optional API key. ---
 def generate_personas(business_description, customer_profile, num_personas, blablador_api_key=None):
     """
+    Generates a list of DeepPersona instances based on the provided inputs.
+    It prioritizes the API key passed as an argument, but falls back to the
+    environment variable if none is provided (for UI use).
     """
+    # --- CHANGE 2: Logic to determine which key to use. ---
+    # Use the key from the API call if provided, otherwise get it from the Space secrets.
     api_key_to_use = blablador_api_key or os.getenv("BLABLADOR_API_KEY")
     if not api_key_to_use:
         return {"error": "BLABLADOR_API_KEY not found. Please provide it in your API call or set it as a secret in the Space settings."}
+    # Store the original state of the environment variable, if it exists
     original_key = os.getenv("BLABLADOR_API_KEY")
     try:
+        # --- CHANGE 3: Securely set the correct environment variable for this request. ---
+        # The underlying deeppersona library will look for this variable.
+        os.environ["BLABLADOR_API_KEY"] = api_key_to_use
         num_personas = int(num_personas)
+        factory = DeepPersonaFactory(
+            context=business_description,
+            sampling_space_description=customer_profile,
+            total_population_size=num_personas
+        )
+        # Restricted to deep persona generation with double sequential API call
+        people = factory.generate_people(number_of_people=num_personas, parallelize=False, deep_persona=True)
+        personas_data = [person._persona for person in people]
+        # --- NEW: Update the Tresor ---
+        current_base = load_persona_base()
+        current_base.extend(personas_data)
+        save_persona_base(current_base)
+        # ------------------------------
         return personas_data
     except Exception as e:
         return {"error": str(e)}
     finally:
+        # --- CHANGE 4: A robust cleanup using a 'finally' block. ---
+        # This ensures the environment is always restored to its original state,
+        # whether the function succeeds or fails.
         if original_key is None:
+            # If the variable didn't exist originally, remove it.
             if "BLABLADOR_API_KEY" in os.environ:
                 del os.environ["BLABLADOR_API_KEY"]
         else:
+            # If it existed, restore its original value.
             os.environ["BLABLADOR_API_KEY"] = original_key
+def find_best_persona(criteria):
+    """
+    Loads the persona base and finds the best matching persona based on criteria.
+    """
+    personas = load_persona_base()
+    if not personas:
+        return {"error": "Persona base is empty. Generate some personas first!"}
+    try:
+        # select_best_persona uses LLM to find the best index
+        idx = select_best_persona(criteria=criteria, personas=personas)
+        try:
+            idx = int(idx)
+        except (ValueError, TypeError):
+            return {"error": f"LLM returned an invalid index: {idx}"}
+        if idx >= 0 and idx < len(personas):
+            return personas[idx]
+        else:
+            return {"error": f"No matching persona found for criteria: {criteria}"}
+    except Exception as e:
+        return {"error": f"Error during persona matching: {str(e)}"}
+def load_example_personas():
+    """
+    Loads example personas from the deeppersona library.
+    """
+    example_personas = []
+    # Path to the agents folder in deeppersona/examples
+    agents_path = os.path.join("deeppersona", "examples", "agents", "*.agent.json")
+    for file_path in glob.glob(agents_path):
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                if "persona" in data:
+                    example_personas.append(data["persona"])
+        except Exception as e:
+            print(f"Error loading example persona from {file_path}: {e}")
+    return example_personas
+def identify_personas(context):
+    """
+    Identifies appropriate personas from the Tresor and example agents based on context.
+    """
+    try:
+        # 1. Load Tresor personas (persisted JSON)
+        tresor_personas = load_persona_base()
+        # 2. Load Example personas from deeppersona library
+        example_personas = load_example_personas()
+        all_available = tresor_personas + example_personas
+        if not all_available:
+            return {"error": "No personas available in Tresor or examples."}
+        # 3. Use LLM to filter/select which ones match the 'context'
+        # Returns a list of indices
+        indices = select_relevant_personas_utility(context, all_available)
+        selected = []
+        if isinstance(indices, list):
+            for i in indices:
+                try:
+                    idx = int(i)
+                    if 0 <= idx < len(all_available):
+                        selected.append(all_available[idx])
+                except (ValueError, TypeError):
+                    continue
+        return selected
+    except Exception as e:
+        return {"error": str(e)}
+def generate_social_network_api(name, persona_count, network_type, focus_group_name=None):
+    """
+    Gradio API endpoint for generating a social network.
+    """
+    try:
+        config = SimulationConfig(name=name, persona_count=int(persona_count), network_type=network_type)
+        simulation = simulation_manager.create_simulation(config, focus_group_name=focus_group_name)
+        return {
+            "simulation_id": simulation.id,
+            "name": simulation.config.name,
+            "persona_count": len(simulation.personas),
+            "network_metrics": simulation.network.get_metrics()
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def predict_engagement_api(simulation_id, content_text, format="text"):
+    """
+    Gradio API endpoint for predicting engagement.
+    """
+    try:
+        content = Content(text=content_text, format=format)
+        result = simulation_manager.run_simulation(simulation_id, content)
+        return {
+            "total_reach": result.total_reach,
+            "expected_likes": result.expected_likes,
+            "expected_comments": result.expected_comments,
+            "expected_shares": result.expected_shares,
+            "execution_time": result.execution_time,
+            "avg_sentiment": result.avg_sentiment,
+            "feedback_summary": result.feedback_summary
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def start_simulation_async_api(simulation_id, content_text, format="text"):
+    """
+    Starts a simulation in the background.
+    """
+    try:
+        content = Content(text=content_text, format=format)
+        simulation_manager.run_simulation(simulation_id, content, background=True)
+        return {"status": "started", "simulation_id": simulation_id}
+    except Exception as e:
+        return {"error": str(e)}
+def get_simulation_status_api(simulation_id):
+    """
+    Checks the status and progress of a simulation.
+    """
+    try:
+        sim = simulation_manager.get_simulation(simulation_id)
+        if not sim: return {"error": "Simulation not found"}
+        status_data = {
+            "status": sim.status,
+            "progress": sim.progress
+        }
+        if sim.status == "completed" and sim.last_result:
+            status_data["result"] = {
+                "total_reach": sim.last_result.total_reach,
+                "expected_likes": sim.last_result.expected_likes,
+                "avg_sentiment": sim.last_result.avg_sentiment
+            }
+        return status_data
+    except Exception as e:
+        return {"error": str(e)}
+def send_chat_message_api(simulation_id, sender, message):
+    """
+    Sends a message to the simulation chat.
+    """
+    try:
+        return simulation_manager.send_chat_message(simulation_id, sender, message)
+    except Exception as e:
+        return {"error": str(e)}
+def get_chat_history_api(simulation_id):
+    """
+    Gets the chat history for a simulation.
+    """
+    try:
+        return simulation_manager.get_chat_history(simulation_id)
+    except Exception as e:
+        return {"error": str(e)}
+def generate_variants_api(content_text, num_variants):
+    """
+    Gradio API endpoint for generating content variants.
+    """
+    try:
+        variants = simulation_manager.variant_generator.generate_variants(content_text, num_variants=int(num_variants))
+        return [{"text": v.text, "strategy": v.strategy} for v in variants]
+    except Exception as e:
+        return {"error": str(e)}
+def list_simulations_api():
+    """
+    Gradio API endpoint for listing simulations.
+    """
+    try:
+        return simulation_manager.list_simulations()
+    except Exception as e:
+        return {"error": str(e)}
+def list_personas_api(simulation_id):
+    """
+    Gradio API endpoint for listing personas in a simulation.
+    """
+    try:
+        return simulation_manager.list_personas(simulation_id)
+    except Exception as e:
+        return {"error": str(e)}
+def get_persona_api(simulation_id, persona_name):
+    """
+    Gradio API endpoint for getting persona details.
+    """
+    try:
+        return simulation_manager.get_persona(simulation_id, persona_name)
+    except Exception as e:
+        return {"error": str(e)}
+def delete_simulation_api(simulation_id):
+    """
+    Gradio API endpoint for deleting a simulation.
+    """
+    try:
+        success = simulation_manager.delete_simulation(simulation_id)
+        return {"success": success}
+    except Exception as e:
+        return {"error": str(e)}
+def export_simulation_api(simulation_id):
+    """
+    Gradio API endpoint for exporting a simulation.
+    """
+    try:
+        return simulation_manager.export_simulation(simulation_id)
+    except Exception as e:
+        return {"error": str(e)}
+def get_network_graph_api(simulation_id):
+    """
+    Gradio API endpoint for getting network graph data.
+    """
+    try:
+        sim = simulation_manager.get_simulation(simulation_id)
+        if not sim: return {"error": "Simulation not found"}
+        nodes = []
+        for p in sim.personas:
+            nodes.append({
+                "id": p.name,
+                "label": p.name,
+                "role": p._persona.get("occupation"),
+                "location": p._persona.get("residence")
+            })
+        edges = []
+        for edge in sim.network.edges:
+            edges.append({
+                "source": edge.connection_id.split('_')[0],
+                "target": edge.connection_id.split('_')[1],
+                "strength": edge.strength
+            })
+        return {"nodes": nodes, "edges": edges}
+    except Exception as e:
+        return {"error": str(e)}
+def list_focus_groups_api():
+    """
+    Gradio API endpoint for listing focus groups.
+    """
+    try:
+        return simulation_manager.list_focus_groups()
+    except Exception as e:
+        return {"error": str(e)}
+def save_focus_group_api(name, simulation_id):
+    """
+    Gradio API endpoint for saving a focus group from a simulation.
+    """
+    try:
+        sim = simulation_manager.get_simulation(simulation_id)
+        if not sim: return {"error": "Simulation not found"}
+        simulation_manager.save_focus_group(name, sim.personas)
+        return {"status": "success", "name": name}
+    except Exception as e:
+        return {"error": str(e)}
 with gr.Blocks() as demo:
+    gr.Markdown("<h1>Deep Persona Generator</h1>")
     with gr.Row():
         with gr.Column():
             business_description_input = gr.Textbox(label="What is your business about?", lines=5)
             customer_profile_input = gr.Textbox(label="Information about your customer profile", lines=5)
+            num_personas_input = gr.Number(label="Number of Deep Personas to generate", value=1, minimum=1, step=1)
+            # --- CHANGE 5: The API key input is now INVISIBLE. ---
+            # It still exists, so the API endpoint is created, but it's hidden from UI users.
             blablador_api_key_input = gr.Textbox(
                 label="Blablador API Key (for API client use)",
                 visible=False
             )
             generate_button = gr.Button("Generate Personas")
+            gr.Markdown("---")
+            gr.Markdown("<h3>Search Tresor</h3>")
+            criteria_input = gr.Textbox(label="Criteria to find best matching persona", lines=2)
+            find_button = gr.Button("Find Best Persona in Tresor")
         with gr.Column():
+            output_json = gr.JSON(label="Output (Generated or Matched Persona)")
     generate_button.click(
         fn=generate_personas,
+        # --- CHANGE 6: Pass the invisible textbox to the function. ---
         inputs=[business_description_input, customer_profile_input, num_personas_input, blablador_api_key_input],
         outputs=output_json,
         api_name="generate_personas"
     )
+    find_button.click(
+        fn=find_best_persona,
+        inputs=[criteria_input],
+        outputs=output_json,
+        api_name="find_best_persona"
+    )
+    with gr.Tab("Identify Personas API", visible=False):
+        api_id_context = gr.Textbox(label="Context")
+        api_id_btn = gr.Button("Identify Personas")
+        api_id_out = gr.JSON()
+        api_id_btn.click(identify_personas, inputs=[api_id_context], outputs=api_id_out, api_name="identify_personas")
+    # Invisible components to expose API endpoints
+    # These won't be seen by regular UI users but will be available via /api
+    with gr.Tab("Social Network API", visible=False):
+        api_net_name = gr.Textbox(label="Network Name")
+        api_net_count = gr.Number(label="Deep Persona Count", value=10)
+        api_net_type = gr.Dropdown(choices=["scale_free", "small_world"], label="Network Type")
+        api_net_focus = gr.Textbox(label="Focus Group Name (optional)")
+        api_net_btn = gr.Button("Generate Network")
+        api_net_out = gr.JSON()
+        api_net_btn.click(generate_social_network_api, inputs=[api_net_name, api_net_count, api_net_type, api_net_focus], outputs=api_net_out, api_name="generate_social_network")
+    with gr.Tab("Engagement Prediction API", visible=False):
+        api_pred_sim_id = gr.Textbox(label="Simulation ID")
+        api_pred_content = gr.Textbox(label="Content Text")
+        api_pred_format = gr.Textbox(label="Format", value="text")
+        api_pred_btn = gr.Button("Predict Engagement")
+        api_pred_out = gr.JSON()
+        api_pred_btn.click(predict_engagement_api, inputs=[api_pred_sim_id, api_pred_content, api_pred_format], outputs=api_pred_out, api_name="predict_engagement")
+    with gr.Tab("Async Simulation API", visible=False):
+        api_async_sim_id = gr.Textbox(label="Simulation ID")
+        api_async_content = gr.Textbox(label="Content Text")
+        api_async_format = gr.Textbox(label="Format", value="text")
+        api_async_btn = gr.Button("Start Simulation")
+        api_async_out = gr.JSON()
+        api_async_btn.click(start_simulation_async_api, inputs=[api_async_sim_id, api_async_content, api_async_format], outputs=api_async_out, api_name="start_simulation_async")
+        api_status_id = gr.Textbox(label="Simulation ID")
+        api_status_btn = gr.Button("Check Status")
+        api_status_out = gr.JSON()
+        api_status_btn.click(get_simulation_status_api, inputs=[api_status_id], outputs=api_status_out, api_name="get_simulation_status")
+    with gr.Tab("Chat API", visible=False):
+        api_chat_sim_id = gr.Textbox(label="Simulation ID")
+        api_chat_sender = gr.Textbox(label="Sender", value="User")
+        api_chat_msg = gr.Textbox(label="Message")
+        api_chat_send_btn = gr.Button("Send Message")
+        api_chat_send_out = gr.JSON()
+        api_chat_send_btn.click(send_chat_message_api, inputs=[api_chat_sim_id, api_chat_sender, api_chat_msg], outputs=api_chat_send_out, api_name="send_chat_message")
+        api_chat_hist_btn = gr.Button("Get History")
+        api_chat_hist_out = gr.JSON()
+        api_chat_hist_btn.click(get_chat_history_api, inputs=[api_chat_sim_id], outputs=api_chat_hist_out, api_name="get_chat_history")
+    with gr.Tab("Content Variants API", visible=False):
+        api_var_content = gr.Textbox(label="Original Content")
+        api_var_count = gr.Number(label="Number of Variants", value=5)
+        api_var_btn = gr.Button("Generate Variants")
+        api_var_out = gr.JSON()
+        api_var_btn.click(generate_variants_api, inputs=[api_var_content, api_var_count], outputs=api_var_out, api_name="generate_variants")
+    with gr.Tab("List Simulations API", visible=False):
+        api_list_sim_btn = gr.Button("List Simulations")
+        api_list_sim_out = gr.JSON()
+        api_list_sim_btn.click(list_simulations_api, outputs=api_list_sim_out, api_name="list_simulations")
+    with gr.Tab("List Personas API", visible=False):
+        api_list_per_sim_id = gr.Textbox(label="Simulation ID")
+        api_list_per_btn = gr.Button("List Personas")
+        api_list_per_out = gr.JSON()
+        api_list_per_btn.click(list_personas_api, inputs=[api_list_per_sim_id], outputs=api_list_per_out, api_name="list_personas")
+    with gr.Tab("Get Persona API", visible=False):
+        api_get_per_sim_id = gr.Textbox(label="Simulation ID")
+        api_get_per_name = gr.Textbox(label="Deep Persona Name")
+        api_get_per_btn = gr.Button("Get Persona")
+        api_get_per_out = gr.JSON()
+        api_get_per_btn.click(get_persona_api, inputs=[api_get_per_sim_id, api_get_per_name], outputs=api_get_per_out, api_name="get_persona")
+    with gr.Tab("Delete Simulation API", visible=False):
+        api_del_sim_id = gr.Textbox(label="Simulation ID")
+        api_del_btn = gr.Button("Delete Simulation")
+        api_del_out = gr.JSON()
+        api_del_btn.click(delete_simulation_api, inputs=[api_del_sim_id], outputs=api_del_out, api_name="delete_simulation")
+    with gr.Tab("Export Simulation API", visible=False):
+        api_exp_sim_id = gr.Textbox(label="Simulation ID")
+        api_exp_btn = gr.Button("Export Simulation")
+        api_exp_out = gr.JSON()
+        api_exp_btn.click(export_simulation_api, inputs=[api_exp_sim_id], outputs=api_exp_out, api_name="export_simulation")
+    with gr.Tab("Network Graph API", visible=False):
+        api_graph_sim_id = gr.Textbox(label="Simulation ID")
+        api_graph_btn = gr.Button("Get Graph Data")
+        api_graph_out = gr.JSON()
+        api_graph_btn.click(get_network_graph_api, inputs=[api_graph_sim_id], outputs=api_graph_out, api_name="get_network_graph")
+    with gr.Tab("Focus Group API", visible=False):
+        api_list_fg_btn = gr.Button("List Focus Groups")
+        api_list_fg_out = gr.JSON()
+        api_list_fg_btn.click(list_focus_groups_api, outputs=api_list_fg_out, api_name="list_focus_groups")
+        api_save_fg_name = gr.Textbox(label="Focus Group Name")
+        api_save_fg_sim_id = gr.Textbox(label="Simulation ID")
+        api_save_fg_btn = gr.Button("Save Focus Group")
+        api_save_fg_out = gr.JSON()
+        api_save_fg_btn.click(save_focus_group_api, inputs=[api_save_fg_name, api_save_fg_sim_id], outputs=api_save_fg_out, api_name="save_focus_group")
 if __name__ == "__main__":
+    demo.queue().launch()

config.ini CHANGED Viewed

@@ -1,7 +1,12 @@
 [OpenAI]
 API_TYPE=helmholtz-blablador
-MODEL=alias-large
-REASONING_MODEL=alias-large
 TOP_P=1.0
-MAX_ATTEMPTS=5
-WAITING_TIME=20

 [OpenAI]
 API_TYPE=helmholtz-blablador
+MODEL=alias-fast
+REASONING_MODEL=alias-fast
+FALLBACK_MODEL_LARGE=alias-large
+FALLBACK_MODEL_HUGE=alias-huge
 TOP_P=1.0
+MAX_ATTEMPTS=999
+WAITING_TIME=35
+[Logging]
+LOGLEVEL=DEBUG

deeppersona/__init__.py CHANGED Viewed

@@ -193,7 +193,7 @@ class ConfigManager:
 # Create global instance of the configuration manager
 config = utils.read_config_file()
-utils.pretty_print_deeppersona_version()
 utils.pretty_print_datetime()
 utils.pretty_print_config(config)
 utils.start_logger(config)

 # Create global instance of the configuration manager
 config = utils.read_config_file()
+utils.pretty_print_tinytroupe_version()
 utils.pretty_print_datetime()
 utils.pretty_print_config(config)
 utils.start_logger(config)

deeppersona/control.py CHANGED Viewed

@@ -673,20 +673,20 @@ class Transaction:
         if output is None:
             return None
         elif isinstance(output, DeepPersona):
-            return {"type": "DeepPersonaRef", "name": output.name}
         elif isinstance(output, DeepWorld):
-            return {"type": "DeepWorldRef", "name": output.name}
         elif isinstance(output, DeepPersonaFactory):
-            return {"type": "DeepPersonaFactoryRef", "name": output.name}
         elif isinstance(output, list):
             encoded_list = []
             for item in output:
                 if isinstance(item, DeepPersona):
-                    encoded_list.append({"type": "DeepPersonaRef", "name": item.name})
                 elif isinstance(item, DeepWorld):
-                    encoded_list.append({"type": "DeepWorldRef", "name": item.name})
                 elif isinstance(item, DeepPersonaFactory):
-                    encoded_list.append({"type": "DeepPersonaFactoryRef", "name": item.name})
                 else:
                     encoded_list.append({"type": "JSON", "value": item})
             return {"type": "List", "value": encoded_list}
@@ -706,20 +706,20 @@ class Transaction:
         if encoded_output is None:
             return None
-        elif encoded_output["type"] == "DeepPersonaRef":
             return DeepPersona.get_agent_by_name(encoded_output["name"])
-        elif encoded_output["type"] == "DeepWorldRef":
             return DeepWorld.get_environment_by_name(encoded_output["name"])
-        elif encoded_output["type"] == "DeepPersonaFactoryRef":
             return DeepPersonaFactory.get_factory_by_name(encoded_output["name"])
         elif encoded_output["type"] == "List":
             decoded_list = []
             for item in encoded_output["value"]:
-                if item["type"] == "DeepPersonaRef":
                     decoded_list.append(DeepPersona.get_agent_by_name(item["name"]))
-                elif item["type"] == "DeepWorldRef":
                     decoded_list.append(DeepWorld.get_environment_by_name(item["name"]))
-                elif item["type"] == "DeepPersonaFactoryRef":
                     decoded_list.append(DeepPersonaFactory.get_factory_by_name(item["name"]))
                 else:
                     decoded_list.append(item["value"])

         if output is None:
             return None
         elif isinstance(output, DeepPersona):
+            return {"type": "TinyPersonRef", "name": output.name}
         elif isinstance(output, DeepWorld):
+            return {"type": "TinyWorldRef", "name": output.name}
         elif isinstance(output, DeepPersonaFactory):
+            return {"type": "TinyFactoryRef", "name": output.name}
         elif isinstance(output, list):
             encoded_list = []
             for item in output:
                 if isinstance(item, DeepPersona):
+                    encoded_list.append({"type": "TinyPersonRef", "name": item.name})
                 elif isinstance(item, DeepWorld):
+                    encoded_list.append({"type": "TinyWorldRef", "name": item.name})
                 elif isinstance(item, DeepPersonaFactory):
+                    encoded_list.append({"type": "TinyFactoryRef", "name": item.name})
                 else:
                     encoded_list.append({"type": "JSON", "value": item})
             return {"type": "List", "value": encoded_list}
         if encoded_output is None:
             return None
+        elif encoded_output["type"] == "TinyPersonRef":
             return DeepPersona.get_agent_by_name(encoded_output["name"])
+        elif encoded_output["type"] == "TinyWorldRef":
             return DeepWorld.get_environment_by_name(encoded_output["name"])
+        elif encoded_output["type"] == "TinyFactoryRef":
             return DeepPersonaFactory.get_factory_by_name(encoded_output["name"])
         elif encoded_output["type"] == "List":
             decoded_list = []
             for item in encoded_output["value"]:
+                if item["type"] == "TinyPersonRef":
                     decoded_list.append(DeepPersona.get_agent_by_name(item["name"]))
+                elif item["type"] == "TinyWorldRef":
                     decoded_list.append(DeepWorld.get_environment_by_name(item["name"]))
+                elif item["type"] == "TinyFactoryRef":
                     decoded_list.append(DeepPersonaFactory.get_factory_by_name(item["name"]))
                 else:
                     decoded_list.append(item["value"])

deeppersona/environment/social_deep_world.py CHANGED Viewed

@@ -52,7 +52,7 @@ class SimulationResult:
         self.total_reach = len(set(e["persona_id"] for e in self.engagements)) # Simplified
         # ... more metrics
-class SocialDeepWorld(DeepWorld):
     """Extended DeepWorld with social network capabilities"""
     def __init__(self, name: str, network: NetworkTopology = None, **kwargs):

         self.total_reach = len(set(e["persona_id"] for e in self.engagements)) # Simplified
         # ... more metrics
+class SocialTinyWorld(DeepWorld):
     """Extended DeepWorld with social network capabilities"""
     def __init__(self, name: str, network: NetworkTopology = None, **kwargs):

deeppersona/extraction/results_extractor.py CHANGED Viewed

@@ -152,7 +152,7 @@ performed.
     def extract_results_from_world(self,
-                                   deepworld:DeepWorld,
                                    extraction_objective:str="The main points that can be derived from the agents conversations and actions.",
                                    situation:str="",
                                    fields:list=None,
@@ -162,7 +162,7 @@ performed.
         Extracts results from a DeepWorld instance.
         Args:
-            deepworld (DeepWorld): The DeepWorld instance to extract results from.
             extraction_objective (str): The extraction objective.
             situation (str): The situation to consider.
             fields (list, optional): The fields to extract. If None, the extractor will decide what names to use.
@@ -189,7 +189,7 @@ performed.
                              rendering_configs)})
         # TODO: either summarize first or break up into multiple tasks
-        interaction_history = deepworld.pretty_current_interactions(max_content_length=None)
         extraction_request_prompt = \
 f"""
@@ -203,7 +203,7 @@ You are considering various agents.
 ## Agents Interactions History
-You will consider the history of interactions from various agents that exist in an environment called {deepworld.name}.
 Each interaction history includes stimuli the corresponding agent received as well as actions it performed.
 {interaction_history}
@@ -223,7 +223,7 @@ Each interaction history includes stimuli the corresponding agent received as we
             result = None
         # cache the result
-        self.world_extraction[deepworld.name] = result
         return result

     def extract_results_from_world(self,
+                                   tinyworld:DeepWorld,
                                    extraction_objective:str="The main points that can be derived from the agents conversations and actions.",
                                    situation:str="",
                                    fields:list=None,
         Extracts results from a DeepWorld instance.
         Args:
+            tinyworld (DeepWorld): The DeepWorld instance to extract results from.
             extraction_objective (str): The extraction objective.
             situation (str): The situation to consider.
             fields (list, optional): The fields to extract. If None, the extractor will decide what names to use.
                              rendering_configs)})
         # TODO: either summarize first or break up into multiple tasks
+        interaction_history = tinyworld.pretty_current_interactions(max_content_length=None)
         extraction_request_prompt = \
 f"""
 ## Agents Interactions History
+You will consider the history of interactions from various agents that exist in an environment called {tinyworld.name}.
 Each interaction history includes stimuli the corresponding agent received as well as actions it performed.
 {interaction_history}
             result = None
         # cache the result
+        self.world_extraction[tinyworld.name] = result
         return result

deeppersona/factory/deep_persona_factory.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import json
 import chevron
@@ -30,16 +31,9 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
     def __init__(self, sampling_space_description:str=None, total_population_size:int=None, context:str=None, simulation_id:str=None):
         """
         Initialize a DeepPersonaFactory instance.
-        Args:
-            sampling_space_description (str, optional): The description of the sampling space. Defaults to None. If this is
-               specified, then population_size must also be specified.
-            population_size (int, optional): The size of the population to sample from. Defaults to None.
-            context (str): The context text used to generate the DeepPersona instances.
-            simulation_id (str, optional): The ID of the simulation. Defaults to None.
         """
         super().__init__(simulation_id)
-        self.person_prompt_template_path = os.path.join(os.path.dirname(__file__), 'prompts/generate_person.mustache')
         self.context_text = context
         self.sampling_space_description = sampling_space_description
         self.population_size = total_population_size
@@ -48,131 +42,109 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
         self.sampling_plan = None
         self.remaining_characteristics_sample = None
-        self.generated_minibios = [] # keep track of the generated persons. We keep the minibio to avoid generating the same person twice.
         self.generated_names = []
-    # TODO obsolete?
-    @staticmethod
-    def generate_person_factories(number_of_factories, generic_context_text):
         """
-        Generate a list of DeepPersonaFactory instances using OpenAI's LLM.
-        Args:
-            number_of_factories (int): The number of DeepPersonaFactory instances to generate.
-            generic_context_text (str): The generic context text used to generate the DeepPersonaFactory instances.
-        Returns:
-            list: A list of DeepPersonaFactory instances.
         """
-        logger.info(f"Starting the generation of the {number_of_factories} person factories based on that context: {generic_context_text}")
-        system_prompt = open(os.path.join(os.path.dirname(__file__), 'prompts/generate_person_factory.md'), 'r', encoding='utf-8', errors='replace').read()
-        messages = []
-        messages.append({"role": "system", "content": system_prompt})
-        user_prompt = chevron.render("Please, create {{number_of_factories}} person descriptions based on the following broad context: {{context}}", {
-            "number_of_factories": number_of_factories,
-            "context": generic_context_text
-        })
-        messages.append({"role": "user", "content": user_prompt})
-        response = openai_utils.client().send_message(messages)
-        if response is not None:
-            result = utils.extract_json(response["content"])
-            factories = []
-            for i in range(number_of_factories):
-                logger.debug(f"Generating person factory with description: {result[i]}")
-                factories.append(DeepPersonaFactory(result[i]))
-            return factories
-        return None
-    @staticmethod
-    def create_factory_from_demography(demography_description_or_file_path:Union[str, dict],  population_size:int, additional_demographic_specification:str=None, context:str=None):
-        """
-        Create a DeepPersonaFactory instance from a demography description, which can be wither given as a file path or a dictionary
-        (but not both).
-        Args:
-            demography_description_or_file_path (Union[str, dict]): The demography description or the file path to the demography description.
-            population_size (int): The size of the population to sample from.
-            context (str, optional): Additional context text used to generate the DeepPersona instances. Defaults to None.
-        Returns:
-            DeepPersonaFactory: A DeepPersonaFactory instance.
-        """
-        # read the demography description from a file or use the given dictionary
-        if isinstance(demography_description_or_file_path, str):
-            demography_description = json.loads(open(demography_description_or_file_path, 'r', encoding='utf-8', errors='replace').read())
-        elif isinstance(demography_description_or_file_path, dict):
-            demography_description = demography_description_or_file_path
-        else:
-            raise ValueError("demography_description_or_file_path must be either a string or a dictionary.")
-        if population_size is None:
-            raise ValueError("population_size must be specified.")
-        full_demography_description = \
-        f"""
-        # Sampling space specification
-        The population described by the demographic data below. Make sure you consider very detailed, fine-grained,
-        characteristics of the individuals in the population.
-        ## Directives
-        Please follow these rules:
-            - produce a uniformly distributed sample of the requested population, so that all characteristics are represented in the sample
-              in the right proportions, as specified in the demographic data below.
-            - consider as many different population segments as possible, while **always** keeping **proportions** correct.For example,
-              instead of sampling 10 people from segment A and 5 from segment B, you can instead sample 2 from A, 1 from B,
-              and 7 others from other segments, provided the proportions are maintained correct and there are enough people to sample.
-            - also use any built-in knowledge you might have of the populations in question to improve the sampling space,
-              provided this built-in knowledge does not conflict with the demographic data below.
-        The sample must include representative people from the broad population, so for instance ensure that you include values covering
-        people from all walks of life possible from the specified demographic data and your built-in knowledge of the target population, such as:
-            - from the simplest professions to those of the highest ranks;
-            - from the youngest to the oldest;
-            - from the kind to the evil;
-            - from the positive and enthusiastic to the negative and pessimistic;
-            - from the happy and joyful to the sad and depressed;
-            - from the most conservative, to the most liberal;
-            - from the educated, to the ignorant;
-            - from the healthy to the sick;
-            - from those who enjoy bland food, to those who enjoy spicy food;
-            - from rich to poor.
-        Make sure there's sufficient variety to represent even extreme cases, so that fringe opinions or far fetched characteristics are also represented.
-        Because these are by definition rare, here you can add a larger proportion than what is truly present in the population, so that there's some
-        information from these rare cases.
-        In particular, the population MUST cover both POSITIVE and NEGATIVE possibilities of the various characteristics
-        (e.g., rich vs poor, likes sugar vs don't like sugar, enthusiastic vs apathetic).
-        ## Additional demographic specification (if any)
-        {additional_demographic_specification if additional_demographic_specification is not None else "(none)"}
-        ## Demographic data
-        {json.dumps(demography_description, indent=4)}
         """
-        return DeepPersonaFactory(context=context,
-                                 sampling_space_description=full_demography_description,
-                                 total_population_size=population_size)
-    @classmethod
-    def _clear_factories(cls):
         """
-        Additional class-level cleanup for this subclass.
         """
-        DeepPersonaFactory.all_unique_names = [] # clear the list of all unique names, so that the next factories can start fresh.
     def generate_person(self,
                         agent_particularities:str=None,
@@ -182,151 +154,57 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
                         attempts:int=10,
                         post_processing_func=None,
                         deep_persona:bool=True) -> DeepPersona:
-        """
-        Generate a DeepPersona instance using OpenAI's LLM.
-        Args:
-            agent_particularities (str): The particularities of the agent.
-            temperature (float): The temperature to use when sampling from the LLM.
-            frequency_penalty (float): The frequency penalty to use when sampling from the LLM.
-            presence_penalty (float): The presence penalty to use when sampling from the LLM.
-            attempts (int): The number of attempts to generate a DeepPersona instance.
-            post_processing_func (function): A function to apply to the generated agent after it is created.
-        Returns:
-            DeepPersona: A DeepPersona instance generated using the LLM.
-        """
         logger.debug(f"Starting the person generation based these particularities: {agent_particularities}")
         fresh_agent_name = None
-        # are we going to use a pre-computed sample of characteristics too?
         if self.population_size is not None:
             with concurrent_agent_generataion_lock:
                 if self.remaining_characteristics_sample is None:
-                    # if the sample does not exist, we generate it here once.
                     self.initialize_sampling_plan()
-            logger.debug(f"Sampling plan initialized. Remaining characteristics sample: {self.remaining_characteristics_sample}")
-            # CONCURRENT PROTECTION
             with concurrent_agent_generataion_lock:
                 if len(self.remaining_characteristics_sample) == 0:
-                    logger.warning("No more characteristics samples left to sample from. This can happen if the sampling plan did not sum up correctly.")
                     return None
                 else:
                     sampled_characteristics = self.remaining_characteristics_sample.pop()
-                    logger.debug(f"Sampled agent: {sampled_characteristics['name']}.")
             if agent_particularities is not None:
-                agent_particularities =\
-                    f"""
-                        - Primary characteristics: {agent_particularities}
-                        - Also use all the following additional characteristics that **do not** conflict with the primary ones:
-                            * Name, demographics and other characteristics: {json.dumps(sampled_characteristics, indent=4)}
-                        In case one of the additional characteristics conflicts with a primary one, please use the primary one
-                        and ignore the additional one.
-                        If the agent's name is specified, you MUST ALWAYS use it, even if it conflicts with the primary characteristics.
-                    """
             else:
-                agent_particularities = \
-                    f"""
-                    - Name, demographics and other characteristics:
-                         {json.dumps(sampled_characteristics, indent=4)}
-                    """
-        else: # no predefined population size, so we generate one-off agents.
-            # CONCURRENT PROTECTION
             with concurrent_agent_generataion_lock:
                 fresh_agent_name = self._unique_full_name(already_generated_names=DeepPersonaFactory._all_used_and_precomputed_names(),
                                                         context=self.context_text)
             if agent_particularities is not None:
-                agent_particularities = \
-                f"""
-                - Primary characteristics: {agent_particularities}
-                - Also use the following additional characteristics:
-                    * Full name: {fresh_agent_name}
-                In case the primary characteristics already specify a name, please use the primary name and ignore the additional one.
-                """
             else:
                 agent_particularities = f"Full name: {fresh_agent_name}"
-        logger.info(f"Generating person with the following particularities: {agent_particularities}")
-        # read example specs from files.
-        example_1 = json.load(open(os.path.join(os.path.dirname(__file__), '../examples/agents/Friedrich_Wolf.agent.json'), 'r', encoding='utf-8', errors='replace'))
-        example_2 = json.load(open(os.path.join(os.path.dirname(__file__), '../examples/agents/Sophie_Lefevre.agent.json'), 'r', encoding='utf-8', errors='replace'))
-        # We must include all agent names generated in the whole of the simulation, not only the ones generated by this factory,
-        # since they all share the same name space.
-        #
-        # For the minibios, we only need to keep track of the ones generated by this factory, since they are unique to each factory
-        # and are used to guide the sampling process.
-        user_prompt = chevron.render(open(self.person_prompt_template_path, 'r', encoding='utf-8', errors='replace').read(), {
-            "context": self.context_text,
-            "agent_particularities": agent_particularities,
-            #Note that we need to dump them to JSON strings, to ensure we get double quotes,
-            # and other formatting issues are avoided.
-            "example_1": json.dumps(example_1["persona"], indent=4),
-            "example_2": json.dumps(example_2["persona"], indent=4)
-        })
         def aux_generate(attempt):
-            messages = []
-            messages += [{"role": "system", "content": "You are a system that generates specifications for realistic simulations of people. You follow the generation rules and constraints carefully."},
-                        {"role": "user", "content": user_prompt}]
-            # due to a technicality, we need to call an auxiliary method to be able to use the transactional decorator.
-            message = self._aux_model_call(messages=messages,
-                                            temperature=temperature,
-                                            frequency_penalty=frequency_penalty,
-                                            presence_penalty=presence_penalty)
-            if message is not None:
-                result = utils.extract_json(message["content"])
-                logger.debug(f"At attempt {attempt}, generated person parameters:\n{json.dumps(result, indent=4, sort_keys=True)}")
-                # only accept the generated spec if the name is not already in use
-                if not self._is_name_already_assigned(result["name"]):
                     return result
-                else:
-                    logger.info(f"Person with name {result['name']} was already generated, cannot be reused.")
-            return None # no suitable agent was generated
         agent_spec = None
         attempt = 0
         while agent_spec is None and attempt < attempts:
-            try:
-                attempt += 1
-                agent_spec = aux_generate(attempt=attempt)
-            except Exception as e:
-                logger.error(f"Error while generating agent specification: {e}")
-        # create the fresh agent
         if agent_spec is not None:
-            # If deep_persona is requested, perform the second API call to enrich the persona
-            if deep_persona:
-                agent_spec = self._generate_deep_persona_internal(agent_spec)
-            # the agent is created here. This is why the present method cannot be cached. Instead, an auxiliary method is used
-            # for the actual model call, so that it gets cached properly without skipping the agent creation.
-            # protect parallel agent generation
             with concurrent_agent_generataion_lock:
                 person = DeepPersona(agent_spec["name"])
                 self._setup_agent(person, agent_spec)
@@ -337,55 +215,27 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
                 self.generated_names.append(person.get("name"))
             return person
-        else:
-            logger.error(f"Could not generate an agent after {attempts} attempts.")
-            if sampled_characteristics is not None:
-                self.remaining_characteristics_sample.append(sampled_characteristics)
-                logger.error(f"Name {fresh_agent_name} was not used, it will be added back to the pool of names.")
-            return None
-    @config_manager.config_defaults(parallelize="parallel_agent_generation")
-    def generate_from_linkedin_profile(self, profile_data: Dict) -> DeepPersona:
-        """
-        Generate a DeepPersona from a LinkedIn profile with enriched traits.
-        """
-        description = f"Professional with headline: {profile_data.get('headline', '')}. " \
-                      f"Industry: {profile_data.get('industry', '')}. " \
-                      f"Location: {profile_data.get('location', 'Global')}. " \
-                      f"Career level: {profile_data.get('career_level', 'Mid Level')}. " \
-                      f"Summary: {profile_data.get('summary', '')}"
-        return self.generate_person(agent_particularities=description)
-    def generate_persona_cluster(self, archetype: str, count: int) -> List[DeepPersona]:
-        """
-        Generate a cluster of personas following a specific archetype.
-        """
-        return self.generate_people(number_of_people=count, agent_particularities=f"Archetype: {archetype}")
-    def generate_diverse_population(self, size: int, distribution: Dict) -> List[DeepPersona]:
-        """
-        Generate a diverse population based on a distribution.
-        """
-        # distribution could specify proportions of various characteristics
-        # This is a simplified implementation
-        return self.generate_people(number_of_people=size, agent_particularities=f"Target distribution: {json.dumps(distribution)}")
-    def ensure_consistency(self, persona: DeepPersona) -> bool:
-        """
-        Ensure the generated persona is consistent.
-        """
-        # Implementation would involve checking traits, demographics, etc.
-        return True # Placeholder
-    def calculate_diversity_score(self, personas: List[DeepPersona]) -> float:
-        """
-        Calculate a diversity score for a list of personas.
-        """
-        # Placeholder for diversity metric calculation
-        return 0.5
     def generate_people(self, number_of_people:int=None,
                         agent_particularities:str=None,
@@ -397,1066 +247,19 @@ class DeepPersonaFactory(DeepPersonaFactoryBase):
                         parallelize=None,
                         verbose:bool=False,
                         deep_persona:bool=True) -> list:
-        """
-        Generate a list of DeepPersona instances using OpenAI's LLM.
-        Args:
-            number_of_people (int): The number of DeepPersona instances to generate.
-            agent_particularities (str): The particularities of the agent.
-            temperature (float): The temperature to use when sampling from the LLM.
-            frequency_penalty (float): The frequency penalty to use when sampling from the LLM.
-            presence_penalty (float): The presence penalty to use when sampling from the LLM.
-            attempts (int): The number of attempts to generate a DeepPersona instance.
-            post_processing_func (function): A function to apply to the generated agent after it is created.
-            parallalel_workers (int): The number of parallel workers to use when generating the people. Too many workers may cause the LLM to fail
-                due to throttling by the API.
-            verbose (bool): Whether to print information about the generated people.
-        Returns:
-            list: A list of DeepPersona instances generated using the LLM.
-        """
         if number_of_people is None:
-            if self.population_size is None:
-                raise ValueError("Either the number of people to generate or the population size must be specified.")
             number_of_people = self.population_size
-        elif self.population_size is None:
-            self.population_size = number_of_people
-        elif number_of_people is not None and self.population_size is not None and number_of_people > self.population_size:
-            raise ValueError(f"Cannot generate more people than the population size. Requested {number_of_people}, but the population size is {self.population_size}.")
-        people = []
-        if parallelize:
-            people = self._generate_people_in_parallel(number_of_people=number_of_people,
-                                                        agent_particularities=agent_particularities,
-                                                        temperature=temperature,
-                                                        frequency_penalty=frequency_penalty,
-                                                        presence_penalty=presence_penalty,
-                                                        attempts=attempts,
-                                                        post_processing_func=post_processing_func,
-                                                        verbose=verbose,
-                                                        deep_persona=deep_persona)
-        else:
-            people = self._generate_people_sequentially(number_of_people=number_of_people,
-                                                        agent_particularities=agent_particularities,
-                                                        temperature=temperature,
-                                                        frequency_penalty=frequency_penalty,
-                                                        presence_penalty=presence_penalty,
-                                                        attempts=attempts,
-                                                        post_processing_func=post_processing_func,
-                                                        verbose=verbose,
-                                                        deep_persona=deep_persona)
-        return people
-    @transactional(parallel=True)
-    def _generate_people_in_parallel(self, number_of_people:int=None,
-                        agent_particularities:str=None,
-                        temperature:float=1.5,
-                        frequency_penalty:float=0.0,
-                        presence_penalty:float=0.0,
-                        attempts:int=10,
-                        post_processing_func=None,
-                        verbose:bool=False,
-                        deep_persona:bool=True) -> list:
         people = []
-        #
-        # Concurrently generate the people.
-        #
-        # This vastly speeds up the process, but be careful with the number of workers, as too
-        # many may cause the LLM to fail due to throttling by the API.
-        #
-        # this is the function that will be executed in parallel
-        def generate_person_wrapper(args):
-            self, i, agent_particularities, temperature, frequency_penalty, presence_penalty, attempts, post_processing_func, deep_persona = args
             person = self.generate_person(agent_particularities=agent_particularities,
-                                        temperature=temperature,
-                                        frequency_penalty=frequency_penalty,
-                                        presence_penalty=presence_penalty,
                                         attempts=attempts,
                                         post_processing_func=post_processing_func,
                                         deep_persona=deep_persona)
-            return i, person
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            # we use a list of futures to keep track of the results
-            futures = [
-                executor.submit(generate_person_wrapper, (self, i, agent_particularities, temperature, frequency_penalty, presence_penalty, attempts, post_processing_func, deep_persona))
-                for i in range(number_of_people)
-            ]
-            # we iterate over the futures as they are completed, and collect the results
-            for future in concurrent.futures.as_completed(futures):
-                i, person = future.result()
-                if person is not None:
-                    people.append(person)
-                    info_msg = f"Generated person {i+1}/{number_of_people}: {person.minibio()}"
-                    if verbose:
-                        logger.info(info_msg)
-                else:
-                    logger.error(f"Could not generate person {i+1}/{number_of_people}. Continuing with the remaining ones.")
-        return people
-    # TODO still make this one available?
-    def _generate_people_sequentially(self, number_of_people:int=None,
-                        agent_particularities:str=None,
-                        temperature:float=1.5,
-                        frequency_penalty:float=0.0,
-                        presence_penalty:float=0.0,
-                        attempts:int=10,
-                        post_processing_func=None,
-                        verbose:bool=False,
-                        deep_persona:bool=True) -> list:
-        """
-        Generate the people sequentially, not in parallel. This is a simpler alternative.
-        """
-        people = []
-        for i in range(number_of_people):
-            person = self.generate_person(agent_particularities=agent_particularities,
-                          temperature=temperature,
-                          frequency_penalty=frequency_penalty,
-                          presence_penalty=presence_penalty,
-                          attempts=attempts,
-                          post_processing_func=post_processing_func,
-                          deep_persona=deep_persona)
-            if person is not None:
                 people.append(person)
-            info_msg = f"Generated person {i+1}/{number_of_people}: {person.minibio()}"
-            logger.info(info_msg)
-            if verbose:
-                print(info_msg)
-            else:
-                logger.error(f"Could not generate person {i+1}/{number_of_people}.")
         return people
     def initialize_sampling_plan(self):
-        """
-        Computes a list of characteristics samples from a sampling space.
-        The sampling space is built from the given description through intermediary steps
-        that actually build a sampling space and then randomly (and not via LLM) sample from it, thereby
-        ensuring that the sampling is not biased by the LLM (though the sampling space itself may be biased).
-        All intermediary results are stored for later inspection.
-        For example, given some n > 3 and a description like
-           "Young Western people of different liberal professions."
-        The final samples could be something like:
-              [{"age": 25, "profession": "Architect", "country": "USA"},
-                {"age": 27, "profession": "Lawyer", "country": "Canada"},
-                ...
-                {"age": 25, "profession": "Architect", "country": "USA"}]
-        Args:
-            n (int): The number of samples to generate.
-            sampling_space_description (str): A description of the sampling space.
-        """
-        # a technicality - we need to use an auxiliary method to be able to use the transactional decorator effectively.
-        return self._initialize_sampling_plan_transaction(n=self.population_size, description=self.sampling_space_description,context=self.context_text)
-    def _initialize_sampling_plan_transaction(self, n, description, context):
-        """
-        Auxiliary method to initialize the sampling plan. This is needed in order to be able to use the transactional decorator,
-        due too a technicality - the method parameters must be such that when they change the transaction is nullified.
-        """
-        if self.remaining_characteristics_sample is None:
-            # sampling dimensions
-            self.sampling_dimensions = utils.try_function(lambda: self._compute_sampling_dimensions(sampling_space_description=description),
-                                                    # check that the result is a dict
-                                                    postcond_func=lambda result: isinstance(result, dict),
-                                                    retries=15)
-            logger.info("Sampling dimensions computed successfully.")
-            logger.debug(f"Sampling dimensions: {json.dumps(self.sampling_dimensions, indent=4)}")
-            # sampling plan
-            self.sampling_plan =  utils.try_function(lambda: self._compute_sample_plan(N=n,
-                                                        sampling_dimensions=self.sampling_dimensions),
-                                                        # checks that the plan is a list, not an empty dictionary, a number or a string
-                                                        postcond_func = lambda result: isinstance(result, list) and len(result) > 0,
-                                                        retries=15
-                                                        )
-            # if the sampling plan is a dict, let' s enclose it in a list
-            if isinstance(self.sampling_plan, dict):
-                self.sampling_plan = [self.sampling_plan]
-                logger.warning("The sampling plan was a dictionary, enclosing it in a list to ensure it is processed correctly.")
-            logger.info("Sampling plan computed successfully.")
-            logger.debug(f"Sampling plan: {json.dumps(self.sampling_plan, indent=4)}")
-            # Flatten the sampling plan in concrete individual samples.
-            # Use deepcopy because we'll be modifying the samples later, and we want to keep the original sampling plan intact
-            # for correct caching
-            self.remaining_characteristics_sample = copy.deepcopy(utils.try_function(lambda: self._flatten_sampling_plan(sampling_plan=self.sampling_plan),
-                                                                                     retries=15))
-            # instead of failing, we warn if the number of samples is not equal to n, as LLMs can be bad at summing up the quantities in the sampling plan.
-            # This is not a problem, as the sampling space is still valid and can be used, though it may not be as rich as expected.
-            if len(self.remaining_characteristics_sample) != n:
-                logger.warning(f"Expected {n} samples, but got {len(self.remaining_characteristics_sample)} samples. The LLM may have failed to sum up the quantities in the sampling plan correctly.")
-            # If we got more samples than requested, we truncate them to avoid generating too many names or personas.
-            if len(self.remaining_characteristics_sample) > n:
-                logger.info(f"Truncating {len(self.remaining_characteristics_sample)} samples to the requested {n} samples.")
-                self.remaining_characteristics_sample = self.remaining_characteristics_sample[:n]
-            logger.info(f"Sample plan has been flattened, contains {len(self.remaining_characteristics_sample)} total samples.")
-            logger.debug(f"Remaining characteristics sample: {json.dumps(self.remaining_characteristics_sample, indent=4)}")
-            # generate names for each sample individually, considering all their characteristics
-            all_used_names = DeepPersonaFactory._all_used_and_precomputed_names()
-            for i, sample in enumerate(self.remaining_characteristics_sample):
-                logger.debug(f"Generating name for sample {i+1}/{len(self.remaining_characteristics_sample)}")
-                # randomize the all_used_names to make the context less predictable for the LLM, thereby introducing some additional randomness.
-                # Note that we use a fixed random seed to ensure that the sampling plan is reproducible and cache can be kept.
-                DeepPersonaFactory.randomizer.shuffle(all_used_names)
-                # generate a name that's appropriate for this specific sample's characteristics
-                try:
-                    # A dummy name to start with, in case the name generation fails.
-                    sample["name"] = f"Agent_{utils.fresh_id('agents_names')}"
-                    name = utils.try_function(
-                        lambda: self._generate_name_for_sample(
-                            sample_characteristics=sample,
-                            already_generated_names=all_used_names
-                        ),
-                        # ensure the name is not in already used names
-                        postcond_func=lambda result: result not in all_used_names,
-                        retries=15
-                    )
-                    sample["name"] = name
-                    all_used_names.append(name)
-                except Exception as e:
-                    logger.error(f"Error generating name for sample {i}: {e}")
-                    # fallback: use a simple default name with index
-                    fallback_name = f"Person_{i}_{sample.get('gender', 'unknown')}"
-                    sample["name"] = fallback_name
-                    all_used_names.append(fallback_name)
-            logger.info("Names generated for all samples in the sampling plan.")
-            # update the global list of unique names
-            new_names = [sample["name"] for sample in self.remaining_characteristics_sample]
-            DeepPersonaFactory.all_unique_names = list(set(DeepPersonaFactory.all_unique_names + new_names))
-        else:
-            raise ValueError("Sampling plan already initialized. Cannot reinitialize it.")
-    @classmethod
-    def _all_used_and_precomputed_names(cls) -> list:
-        """
-        Returns all the names currently in use by agents and those pre-generated by all factories.
-        """
-        return DeepPersona.all_agents_names() + cls.all_unique_names
-    def _is_name_globally_unique(self, name:str) -> bool:
-        """
-        Checks if a name is globally unique.
-        """
-        return name not in DeepPersonaFactory.all_unique_names
-    def _is_name_already_assigned(self, name:str) -> bool:
-        """
-        Checks if a name has already been assigned to a person.
-        """
-        return name in DeepPersona.all_agents_names()
-    @transactional()
-    @utils.llm(temperature=0.5, frequency_penalty=0.0, presence_penalty=0.0)
-    def _compute_sampling_dimensions(self, sampling_space_description:str) -> dict:
-        """
-        Given a sampling description, computes the dimensions of the sampling space. The sampling space offers a way to sample from a population of people,
-        so each dimension contains values that could be an attribute of a **specific** person. The resulting sampling space must:
-           - contemplate all critical characteristics mentioned in the sampling description, even if this means having a large number of dimensions and
-             complex values for each.
-               * whenever necessary to properly capture the possibilities, you can replace a single dimension by a collection of sub-dimensions
-             (e.g., instead of "beliefs", you might have "political_beliefs", "economic_beliefs", "consumer_beliefs", etc.)
-           - values for each dimension can range from numbers or single words to large sentences or even paragraphs. For attributes that are not clearly single values,
-             always try to add as much detail as possible. For instance, age is just a single value, but lifestyle or cultural background **must** be a long sentence or even a paragraph.
-             This is to ensure that, later, the generated people can be very nuanced and realistic, with rich and detailed attributes. See the example below to get inspired.
-           - you can be very creative with the dimensions and values provided that they are consistent with the sampling space description.
-           - whenever you have the information about PROPORTIONS of the values, you **must** include them in the output, so that the sampling space can be used to generate people
-             in a representative way.
-           - values are **not** distributions, probabilities or other statistics, but rather concrete, specific, people attributes. For example, there can
-             be no "average_age" dimension, but only "age", although the complete set of valies that define a dimension is itself a distribution.
-           - each dimension should be as rich as possible, having as many values as possible, so that the sampling space can be used to generate
-             many nuanced variations of the target population.
-           - each dimension should consider a wide range of values, making sure to cover both POSITIVE and NEGATIVE possibilities (e.g., rich vs poor, likes sugar vs don't like sugar).
-           - each dimension should always include extreme values, so that the sampling space can be used to generate people with extreme characteristics, such as very young or very old,
-             very rich or very poor, very positive or very negative, etc.
-           - include as many dimensions as possible to capture the richness of the population, even if this means having a large number of dimensions.
-           - in principle, the original sampling description could be approximately rephrased in terms of the dimensions and values generated (i.e., the dimensions are rich enough
-             to capture all relevant information). Howerver, this should not limit the range of values and dimensions used, but rather be a byproduct of the process. For instance,
-             if the original description say "young people", the dimension "age" could be defined as a range of values from 18 to 30, but **not** as a small list with only, say, [18, 25, 30].
-             Always try to be as rich as possible in the values and dimensions, even if this means having a large number of them.
-        Additionally, make sure you include special dimensions that capture these aspects, in such a way that they relate to the sampling space description:
-            - personality traits (with proportions)
-            - political beliefs (with proportions)
-            - economic beliefs (with proportions)
-            - financial situation (with proportions)
-            - preferences and tastes (with proportions)
-            - cultural background (with proportions and diverse ethnicities and cultural heritages; provide detailed, realistic, and varied examples that reflect a wide spectrum of ethnic, national, and cultural identities relevant to the sampling space description)
-        ## On your input
-        Here's what to do depending on what the input sampling space description looks like:
-          - Plain text: Abstract all the potential dimensions from the text. For example, if the text is "Young Western people of different liberal professions.", the dimensions could be "age", "profession", "country".
-          - JSON: Do not use the JSON directly, but rather abstract the dimensions from it. Input JSONs can be obtained from various sources, and you should do your best to interpret them and produce a clean list of dimensions and their values, regardless of how complex the input JSON is. In particular, never use the JSON formatting itself as dimension names or values, but rather abstract the actual dimensions and values from it.
-          - Tables or other structured data: Abstract the dimensions from the structured data. For example, if the data is in a table, you should extract the rows and columns and abstract the dimensions from them.
-        ## On your output:
-        You output a JSON containing a list of dimensions. Each output dimension **must** consist of:
-          - a name;
-          - EITHER a list of values OR a range of values (specified as a pair).
-              * in lists of values, whenever possible, you **must** use long values, such as sentences or paragraphs, instead of short words or numbers.
-              * in lists of values you can, optionally, use a dictionary to specify proportions of the values, e.g., {"value1": 0.5, "value2": 0.3, "value3": 0.2} to indicate that 50% of the population has value1, 30% has value2, and 20% has value3.
-                Adjust the proportions as appropriate for the context and ensure they sum to 1.0.
-        The output is formatted as a JSON object with the following structure:
-        ```json
-        {
-            "sampling_space_description": "A description of the sampling space.",
-            "dimensions": [
-            {
-            "name": "dimension_name_1",
-            "values": ["value1", "value2", ...]
-            },
-            {
-            "name": "dimension_name_2",
-            "range": [min, max]
-            },
-            {
-            "name": "dimension_name_3",
-            "values": {"value1": proportion1, "value2": proportion2, "value3": proportion3, ...}
-            },
-            ...
-            ]
-        }
-        ```
-        Unless values are necessarily numbers (e.g., age), they should be descriptive strings so that it is easy to understand what they mean.
-        These strings can be simple values or long detailed texts, whatever is best to capture the desired characteristic.
-        ## Example:
-        Given the following INPUT sampling space description: "Young Western people of different liberal professions and social classes."
-        The OUTPUT dimensions could be a dictionary with the following structure:
-           ```json
-           {
-               "sampling_space_description": "Young Western people of different liberal professions and social classes.",
-               "dimensions": [
-                   {
-                       "name": "age",
-                       "range": [18, 30]
-                   },
-                   {
-                       "name": "socioeconomic status",
-                       "values": ["miserable", "poor", "middle class", "rich", "very rich"]
-                   },
-                   {
-                       "name": "profession",
-                       "values": ["Architect", "Lawyer", "Physician", "Accountant", ...]
-                   },
-                   {
-                       "name": "country",
-                       "values": {
-                           "USA": 0.35,
-                           "Germany": 0.10,
-                           "UK": 0.09,
-                           "France": 0.09,
-                           "Italy": 0.08,
-                           "Spain": 0.06,
-                           "Canada": 0.06,
-                           "Australia": 0.05,
-                           "Netherlands": 0.03,
-                           "Sweden": 0.03,
-                           "Belgium": 0.02,
-                           "Switzerland": 0.02,
-                           "Austria": 0.01
-                       }
-                   },
-                   {
-                       "name": "cultural_background",
-                       "values": {
-                           "Born in a large city of a developed nation, parents were from a lineage of physicians and lawyers": 0.12,
-                           "Descendant of Ashkenazi Jewish immigrants who settled in New York City in the early 20th century, maintaining strong ties to Jewish traditions and community life.": 0.08,
-                           "Second-generation Chinese-Canadian whose family values blend Confucian principles with Canadian multiculturalism, celebrating both Lunar New Year and Canada Day.": 0.06,
-                           "Of Irish and Italian descent, growing up in Boston with a household that combines Catholic traditions, Irish folk music, and Italian culinary heritage.": 0.10,
-                           "Of Turkish-German background, raised in Berlin with exposure to both Turkish family traditions and contemporary German urban culture.": 0.05,
-                           <... many more ...>
-                       }
-                   },
-                   {
-                       "name": "economic_beliefs",
-                       "values": {
-                           "Firmly believes that diligent effort and perseverance in one's career are the primary drivers of financial prosperity and upward mobility.": 0.28,
-                           "Holds the view that wealth accumulation is largely a matter of being in the right place at the right time, with luck playing a significant role in economic outcomes.": 0.18,
-                           "Thinks that government intervention and social programs are essential to ensure fair economic opportunities for all members of society.": 0.22,
-                           "Believes that personal connections and networking are more important than formal education or hard work in achieving economic success.": 0.15,
-                           <... many more ...>
-                       }
-                   },
-                   {
-                       "name": "professional_attitudes",
-                       "values": {
-                           "Aspires to establish and grow their own business, valuing independence and the ability to innovate without corporate constraints.": 0.18,
-                           "Prefers the stability and structure of working for a well-established company, appreciating clear career paths and organizational support.": 0.32,
-                           "Enjoys collaborating in multidisciplinary teams and seeks out workplaces that foster creativity and open communication.": 0.22,
-                           "Is highly risk-averse and prioritizes job security and predictable routines over rapid advancement or entrepreneurial ventures.": 0.15,
-                           <... many more ...>
-                       }
-                   },
-                   {
-                       "name": "political_beliefs",
-                       "values": {
-                           "Strongly supports progressive policies aimed at reducing income inequality and expanding access to healthcare and education.": 0.24,
-                           "Advocates for conservative values, emphasizing the importance of tradition, personal responsibility, and limited government intervention.": 0.20,
-                           "Identifies as a centrist, believing that balanced compromise between opposing political ideologies leads to the best societal outcomes.": 0.26,
-                           "Is passionate about environmental issues and supports policies that prioritize sustainability and climate change mitigation above economic growth.": 0.16,
-                           <... many more ...>
-                       }
-                   },
-                   {
-                       "name": "personality_traits",
-                       "values": {
-                           "Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.": 0.12,
-                           "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.": 0.18,
-                           "Is highly ambitious, constantly setting challenging goals and pushing themselves to achieve more in both personal and professional spheres.": 0.15,
-                           "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change.": 0.20,
-                           "Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.": 0.08,
-                           "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.": 0.06,
-                           "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.": 0.07,
-                           "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.": 0.05,
-                           <... many more ...>
-                       }
-                   },
-                   {
-                       "name": "preferences_and_tastes",
-                       "values": {
-                           "Has a deep appreciation for classical music, frequently attending orchestral concerts and collecting rare vinyl recordings.": 0.08,
-                           "Finds joy in spending weekends hiking in remote natural parks, seeking tranquility and inspiration from the outdoors.": 0.16,
-                           "Rarely leaves home, preferring the comfort of familiar surroundings and engaging in hobbies such as reading and painting indoors.": 0.11,
-                           "Enjoys experimenting with international cuisines, often hosting elaborate dinner parties to share culinary discoveries with friends.": 0.14,
-                           "Is sensitive to loud environments and actively avoids crowded or noisy places, seeking peace and quiet whenever possible.": 0.13,
-                           "Prefers to spend time alone in dimly lit rooms, listening to somber music and reflecting on the more difficult aspects of life.": 0.04,
-                           "Has little interest in social gatherings or celebrations, often declining invitations and feeling out of place in festive environments.": 0.07,
-                           "Frequently chooses entertainment or art that explores themes of loss, struggle, or existential despair, finding comfort in shared sadness.": 0.03,
-                           <... many more ...>
-                       }
-                   }
-               ]
-           }
-           ```
-        Note in the example:
-           - Age is given as a numeric range.
-           - All other values are descriptive strings, human-friendly, no strange symbols or codes.
-           - The "country" dimension uses a dictionary with suitable proportions for Western countries.
-           - No value contains internal structure - just a name or short description.
-           - All values are concrete properties, not distributions, probabilities or other statistics.
-           - Whenever possible, the values in the dimensions are long and detailed **sentences** each.
-           - It has few dimensions because the sampling space description is very short. If the description were longer, the number of dimensions would be larger,
-             and their values more detailed.
-           - It contains the additional dimensions that capture the personality traits, political beliefs, economic beliefs, financial situation, preferences and tastes,
-             and now cultural background with varied ethnicities and heritages, which are important for the sampling space to be rich enough to generate nuanced variations of the target population.
-           - Beyond positive aspects, it also includes values that emphasize pessimism, negativeness, and sadness, ensuring these characteristics are balanced and represented in the sampling space.
-        Args:
-            sampling_space_description (str): A description of the sampling space.
-        Returns:
-            dict: A dictionary with the dimensions of the sampling space, as shown in the example above.
-        """
-        # the body of this method is handled by the @llm decorator.
-    @transactional()
-    @utils.llm(temperature=0.5, frequency_penalty=0.0, presence_penalty=0.0)
-    def _compute_sample_plan(self, N:int, sampling_dimensions:dict, max_quantity_per_sample_directive:int=5, min_sampling_directives:int=10, max_sampling_directives:int=50) -> List[Dict[str, any]]:
-        """
-        This function defines which and how many people to sample from the sampling space defined by the given dimensions.
-        Given a number N of people to sample, and the dimensions of the sampling space, computes a *sample plan* of N people from that space.
-        The input sampling dimensions have the following structure:
-            ```json
-                {
-                    "sampling_space_description": "A description of the sampling space.",
-                    "dimensions": [
-                        {
-                            "name": "dimension_name_1",
-                            "values": ["value1", "value2", ...]
-                        },
-                        {
-                            "name": "dimension_name_2",
-                            "range": [min, max]
-                        },
-                        ...
-                    ]
-                }
-            ```
-        The *sample plan* to be generated is a list of M *sampling directives*. Each *sampling directive* **always** consists of:
-          - "id": a unique identifier for the *sampling directive*, just an incrementing integer starting from 1.
-          - "subpopulation_description": a short description of the sub-population that this *sampling directive* represents, based on the sampling space description and the sampled values.
-                                         If possible, make it a recognizable and meaningful description of the sub-population,
-                                         such as "Young rebellious people from upper classes", "Old conservative boomers from rural areas", "Intellectual urban professionals with diverse and cosmopolitan cultural backgrounds", etc.
-          - "sampled_values": a map from of dimensions from the sampling space to concrete values, value ranges or value options.
-          - "quantity": to how many elements with those values should be sampled in total (from 1 to max_quantity_per_sample_directive if specified).
-                        The sum of all of these quantities must be equal to N.
-        So your final output **MUST** follow this JSON structure:
-            ```json
-            [
-                {   "id": 1,
-                     "subpopulation_description": "Some description here...",
-                    "sampled_values": {
-                        "dimension_name_1": [n_1_min, n_1_max],,
-                        "dimension_name_2": ["value2_1", "value2_2", ...],
-                        "dimension_name_3": ["value3_1", "value3_2", ...],
-                        ...
-                    },
-                    "quantity": quantity_1
-                },
-                {
-                    "id": 2,
-                    "subpopulation_description": "Some other description here...",
-                    "sampled_values": {
-                        "dimension_name_1": [n_1_min, n_1_max],
-                        "dimension_name_2": "value2",
-                        "dimension_name_3": ["value3_1", "value3_2", ...],
-                        ...
-                    },
-                    "quantity": quantity_2
-                },
-                ...
-                {
-                    "id": M,
-                    "subpopulation_description": "Again some description here...",
-                    "sampled_values": {
-                        "dimension_name_1": [n_1_min, n_1_max],
-                        "dimension_name_2": ["value2_1", "value2_2", ...],
-                        "dimension_name_3": ["value3_1", "value3_2", ...],
-                        ...
-                    },
-                    "quantity": quantity_M
-                },
-            ]
-            ```
-            where N = quantity_1 + quantity_2 + ... + quantity_M,
-                  quantity_i <= max_quantity_per_sample_directive (if specified),
-                  and M is the number of *sampling directives*, which can be as large as necessary to ensure
-                  that the total number of sampled people is equal to N.
-            Note:
-              - Concrete values are NOT in brackets, but rather just a single value or a range of values.
-              - Options are given in lists of strings separated by commas, e.g., ["value1", "value2", ...].
-              - Ranges are numberic and specified as a pair of numbers, e.g., [min, max].
-        Rules and principles:
-          - The sampling plan is a collection of sub-populations captured by each *sampling directive*. Therefore, the various *sampling directives* must complement each other in order
-            to approximate the target population.
-          - Each *sampling directive* is a **combination** of values from the sampling dimensions that represent a specific segment of the target population. Its richness and variety must reflect the desired sub-population.
-          - The dimension sampled in each *sampling directive* can be a single value, a range of values, or a list of values. You can use ranges and lists to cover a wider range of possibilities
-            in a compact way, but you can also use single values if necessary. The items in list can be long or short, does not matter, both can be in lists. Some examples of good fortmatting:
-                * CORRECT example: ["Very rich", "Rich", "Middle class", "Poor"]
-                * CORRECT example: "Rich"
-                * WRONG example: ["Very rich or Rich or Middle class or Poor"]
-                * WRONG example: ["Rich"]
-          - **Always** try very hard to use a list of values (two or more values) or range of values (min - max), to make the sampling plan at once concise and rich. In doing so, make sure that each *sampling directive* is truly representative
-            of some segment of the target population, and not just a random collection of values.
-          - You MUST make M as large as necessary to contemplate the target population, ideally M >= min_sampling_directives (but M <= max_sampling_directives, if specified), to ensure a rich and varied sampling of the population.
-              * Note that this means the maximum *sampling directive* "id" (call it max_id) used in the *sampling plan* is such that: max_id >= min_sampling_directives; max_id <= max_sampling_directives (if specified).
-          - The sampled population MUST be representative of the target population.
-          - The sampled population MUST be realistic.
-          - You can set the quantity of each *sampling directive* to 1 if necessary to ensure a varied and representative sampling.
-          - All values chosen from the sampling dimensions must be copied IN FULL in the "sampled_values" map, so that the sampled values are concrete and specific.
-            The sample plan is supposed to be self-contained, therefore it MUST have all details necessary to sample the people later, without needing to refer back to the sampling dimensions.
-          - You should include as many *sampling directives* as necessary to cover the sampling of N total people (the sum of all quantities). When in doubt,
-            **always** add more *sampling directives* (i.e., make M larger) up to max_sampling_directives (if specified), as this will ensure you cover the requested N people.
-          - In particular, make sure both POSITIVE and NEGATIVE possibilities of the various characteristics are covered (e.g., rich vs poor, likes sugar vs doesn't like sugar, enthusiastic vs apathetic).
-            This is to ensure any bias (towards positive or negative characteristics) is minimized, and the sampling space is rich enough to generate people with a wide range of characteristics.
-          - The sampling space description should be used to guide the sampling, so that the sampled population is consistent with it.
-          - You should ensure that the quantity of requested samples in each *sampling directive* is proportional to their presumed size in the target population.
-            That is to say, combinations of dimensions that are more common in the target population should be sampled more often. If you don't know, make a guess.
-          - If max_quantity_per_sample_directive is specified, you must ensure that no single *sampling directive* exceeds this quantity. This is to ensure we get more variation and not just a few large groups.
-          - You can rely on your built-in knowledge or make educated guesses about such quantities and proportions to ensure that the sample is representative of the population.
-              * Note that this means for any quantity_i: quantity_i >= 1; quantity_i <= max_quantity_per_sample_directive (if specified).
-          - The sum of all quantities in the output **must** be equal to N, the number of people to sample in total.
-          - You can always add extra *sampling directives* (up to max_sampling_directives if specified) to ensure the total of N people is reached.
-          - It is acceptable for the sampling plan to generate more than N people, but NEVER less than N. So if unsure generate MORE people, never less.
-        ## Example
-        Given the following INPUT sampling dimensions:
-        ```json
-        {
-            "sampling_space_description": "Young Western people of different liberal or intellectual professions."
-            "dimensions": [
-                {
-                    "name": "age",
-                    "range": [18, 30]
-                },
-                {
-                    "name": "profession",
-                    "values": ["Architect", "Financial Analyst", "Writer", "Art critic", "Lawyer", "Physician", "Accountant", ...]
-                },
-                {
-                    "name": "country",
-                    "values": ["USA", "Canada", "UK", "France", "Germany", "Italy", "Spain", "Portugal", "Netherlands", "Belgium", ...]
-                },
-                {
-                       "name": "personality_traits",
-                       "values": {
-                           "Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.": 0.12,
-                           "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.": 0.18,
-                           "Is highly ambitious, constantly setting challenging goals and pushing themselves to achieve more in both personal and professional spheres.": 0.15,
-                           "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change.": 0.20,
-                           "Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.": 0.08,
-                           "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.": 0.06,
-                           "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.": 0.07,
-                           "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.": 0.05,
-                           <... many more ...>
-                       }
-                   }
-                (... more dimensions ...)
-                ]
-           }
-        An OUTPUT *sample plan* therefore is a LIST with the *sample plan*, where each element is a dictionary with a *sampling directive*. For example, an output based on the above dimensions could look like this:
-        ```json
-        [
-            {
-                "id": 1,
-                "subpopulation_description": "Young Anglo-Saxon professionals with their stereotypical ambition and drive.",
-                "sampled_values": {
-                    "age": [22, 30],
-                    "profession": ["Financial Analyst", "Lawyer", "Physician", "Accountant", ...],
-                    "country": ["USA", "UK", "Canada"],
-                    "personality_traits": ["Maintains an unwavering optimism, always expecting positive outcomes even in the face of adversity and encouraging others to do the same.",
-                                           "Approaches new experiences with caution, carefully weighing risks and benefits before making decisions or embracing change",
-                                           "Tends to be introspective and reserved, preferring solitary activities and deep reflection over social gatherings or group events.",
-                                           "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life."]
-                },
-                "quantity": 10
-            },
-            {
-                "id": 2,
-                "subpopulation_description": "Young European professionals with a focus on creativity and innovation and their occasional existential crises.",
-                "sampled_values": {
-                    "age": [21, 30],
-                    "profession": ["Architect", "Lawyer", "Writer", "Physician", "Art critic", ...],
-                    "country": ["France", "Germany", "Italy", "Spain"],
-                    "personality_traits": ["Often expects the worst in any situation, focusing on potential problems and rarely feeling hopeful about the future.",
-                           "Frequently experiences a sense of sadness and melancholy, finding it difficult to enjoy activities that once brought happiness.",
-                           "Is quick to notice flaws and shortcomings in themselves and others, tending toward a negative outlook on life.",
-                           "Feels overwhelmed by setbacks, easily discouraged, and tends to dwell on failures rather than successes.]"
-                },
-                "quantity": 5
-            },
-            ...
-        ]
-        ```
-        Args:
-            n (int): The number of elements to sample in total. This number will be distributed across the dimensions proportionally
-                to the presumed size the target population.
-            sampling_dimensions (dict): The dimensions of the sampling space.
-            max_quantity_per_sample_directive (int, optional): The maximum quantity of samples that can be specified in a single sampling directive. This is to ensure that the sampling plan is diverse and not biased towards a few large groups.
-            min_sampling_directives (int, optional): The minimum number of sampling directives to generate. This is to ensure that the sampling plan is rich and varied.
-            max_sampling_directives (int, optional): The maximum number of sampling directives to generate. This is to ensure that the sampling plan is not overly complex and remains manageable.
-        Returns:
-            list: A LIST with the *sample plan*, where each element is a dictionary with a *sampling directive*, as described above.
-        """
-        # the body of this method is handled by the @llm decorator.
-    @transactional()
-    def _flatten_sampling_plan(self, sampling_plan:dict) -> list:
-        """
-        Given a sample plan, flattens it into a list of samples in such a way that the number of times each sample appears
-        correspond to what was specified in the plan. The order is random to avoid bias.
-        For example, an input sample plan could look like this:
-        ```json
-        [
-            {
-                "sampled_values": {
-                    "age": 25,
-                    "profession": "Architect",
-                    "country": "USA"
-                },
-                "quantity": 8
-            },
-            {
-                "sampled_values": {
-                    "age": 27,
-                    "profession": "Lawyer",
-                    "country": "Canada"
-                },
-                "quantity": 1
-            },
-            ...
-        ]
-        ```
-        And the output would be something like:
-        ```python
-           [{"age": 25, "profession": "Architect", "country": "USA"},
-            {"age": 27, "profession": "Lawyer", "country": "Canada"},
-            ...
-            {"age": 25, "profession": "Architect", "country": "USA"}]
-        ```
-        Args:
-            sampling_plan (dict): The sample plan to flatten.
-        Returns:
-            list: A list of samples, where each sample is a dictionary with the sampled values.
-        """
-        samples = []
-        for sample in sampling_plan:
-            if "quantity" not in sample:
-                logger.warning(f"Sample in sampling plan does not have a 'quantity' field: {sample}. Assuming 1.")
-                qty = 1
-            else:
-                qty = int(sample["quantity"])
-            for _ in range(qty):
-                # we need to copy the sample to avoid adding the original sample multiple times,
-                # which would cause problems later when we modify the individual flattened samples
-                cc_sample = copy.deepcopy(sample["sampled_values"])
-                samples.append(cc_sample)
-        # randomize
-        random.shuffle(samples) #inplace
-        return samples
-    @transactional()
-    def _unique_full_name(self, already_generated_names: list, context:str=None) -> str:
-        # a technicality - we need to use an auxiliary method to be able to use the transactional decorator effectively.
-        # TODO update this somehow to avoid this cumbersome workaround.
-        return self._aux_unique_full_name(already_generated_names=already_generated_names, context=context)
-    @utils.llm(temperature=1.5, presence_penalty=0.5, frequency_penalty=0.5)
-    def _aux_unique_full_name(self, already_generated_names: list, context:str=None) -> str:
-        """
-        Generates a unique full name for a person. The full name must not be in the list of already generated names.
-        If necessary, you can generate a longer name to ensure it is new. You can also try tweaking the spelling or
-        adding more surnames, so that the name is unique. However, the name **must** sound realistic and not be too far-fetched,
-        not sound as if it was made up.
-        The final result is only the name, nothing else:
-           "Some name here"  ---> correct as it is just a name, nothing else
-           "Some name here, because ..." ---> incorrect as it contains a reason
-           "Some name here." ---> incorrect as it contains punctuation
-           "Name: Some name here" ---> incorrect as it contains a label
-           "Some name here, some other name here" ---> incorrect as it contains more than one name
-        An optional context can be provided to guide the name generation, so that it is a realistic name for the context. For example, we know that different socio-economic classes have different naming conventions, so the context can be used to guide the name generation.
-        Regarding the `already_generated_names`, you must:
-            - NEVER generate a name that is already in the list of already generated names.
-            - The names in `already_generated_names` ARE NOT examples of names to generate. They are just names that have already been generated and should not be repeated. You should generate new names regardless of the names in `already_generated_names`, the only constraint is that the new names should not be in the list of already generated names.
-            - In particular, you are not to generate a similar name to that of those in `already_generated_names`, you are **not** building some kind of
-            logical sequence. Each name must be independent of the others.
-        ## Example
-          **Input:**
-              already_generated_names: ["John Doe", "Jane Smith", "Alice Brown"]
-              context: { 'age': 25, 'profession': 'Architect', 'country': 'USA' }
-          **Output:**
-              "Michael Johnson"
-          Note that:
-            - The name "Michael Johnson" is not in the list of already generated names.
-            - The ouput consists only of a name, nothing else.
-        Args:
-            already_generated_names (list): The list of already generated names.
-            context (str): The context in which the name is being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
-        Returns:
-            str: A unique full name for a person.
-        """
-        # the body of this method is handled by the @llm decorator
-    @transactional()
-    def _unique_full_names(self, n:int, already_generated_names: list, context:str=None) -> list:
-        """
-        Generates a list of n unique full names for people. The full names must not be in the list of already generated names.
-        Args:
-            n (int): The number of names to generate.
-            already_generated_names (list): The list of already generated names.
-            context (str): The context in which the names are being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
-        """
-        logger.debug(f"Will generate {n} unique full names for people. Already generated names: {already_generated_names}")
-        names = []
-        if n > 0:
-            # let's split the n in smaller chunks to make the model's job easier
-            chunk_size = min(10, n)  # we generate at most 10 names at a time, to avoid overwhelming the model
-            chunks = math.ceil(n/chunk_size)
-            forbidden_names = copy.deepcopy(already_generated_names)
-            max_iterations = chunks * 10
-            cur_iterations = 0
-            while len(names) < n and cur_iterations < max_iterations:
-                logger.debug(f"Currently already generated names: {forbidden_names}")
-                logger.debug(f"Iteration {cur_iterations} - Generating {chunk_size} names. Currently have {len(names)} names. Max iterations to be allowed: {max_iterations}")
-                try:
-                    temp_names = utils.try_function(\
-                                        lambda: \
-                                            self._aux_unique_full_names(n=chunk_size ,
-                                                                        already_generated_names=forbidden_names,
-                                                                        context=context),
-                                                                        # checks that some new name was produced
-                                                                        postcond_func = lambda result: len(set(forbidden_names).intersection(result)) < len(result),
-                                        retries=3)
-                    # add the new names to the names list, removing any duplicates from their combination
-                    names = list(set(names + temp_names))
-                    forbidden_names += names
-                except Exception as e:
-                    logger.error(f"Error generating names: {e}")
-                    # if we have an error, we just skip this iteration and try again
-                    # but we need to increment the number of iterations anyway
-                cur_iterations += 1
-            if cur_iterations >= max_iterations and len(names) < n:
-                logger.error(f"Could not generate the requested number of names after {max_iterations} iterations. Moving on with the {len(names)} names generated.")
-            DeepPersonaFactory.all_unique_names = list(set(DeepPersonaFactory.all_unique_names + names))
-        return names
-    @utils.llm(temperature=1.9, presence_penalty=0.5, frequency_penalty=0.5)
-    def _aux_unique_full_names(self, n:int, already_generated_names: list, context:str=None) -> list:
-        """
-        Generates a list of n unique full names for people. The full names must not be in the list of already generated names. You **must** consider **all** reasononable options for names,
-        not only the common or popular. To ensure that fresh names are really new and do not appear in the list of already generated ones, if necessary you can:
-          - generate longer names to ensure they are new.
-          - try tweaking the spelling or adding more surnames, so that the names are unique.
-          - add unusual names or surnames, so that the names are unique.
-          - as a very last resort, you can append a number to the name, so that it is unique, despote being a bit less realistic.
-        Except for the latter option, the names **must** sound realistic and not be too far-fetched, not sound as if they were made up.
-        You **must** generate at least n names, and they **must** all be unique. If necessary, to ensure you get at least n names, you can try to generate more than n,
-        but **never** less, unless you need to avoid a repeated name. If forced to choose, you always prefer to generate unique names, even if that means generating less than n names.
-        The final result is only the list of names, nothing else:
-           ["Some name here"]  ---> correct as it is just a list with a single name, nothing else
-           ["Some name here, some other name here"] ---> correct as it is a list of names
-           ["Some name here, because ..."] ---> incorrect as it contains a reason
-           ["Some name here."] ---> incorrect as it contains punctuation
-           ["Name: Some name here"] ---> incorrect as it contains a label
-        An optional context can be provided to guide the name generation, so that it is a realistic name for the context. For example, we know that different socio-economic classes have different naming conventions,
-        so the context can be used to guide the name generation. In particular, follow these rules regarding the context:
-            - If a country is specified, the names should be typical for that country.
-        Regarding the `already_generated_names`, you must:
-            - NEVER generate a name that is already in the list of already generated names.
-            - The names in `already_generated_names` ARE NOT examples of names to generate. They are just names that have already been generated and should not be repeated. You should generate new names regardless of the names in `already_generated_names`, the only constraint is that the new names should not be in the list of already_generated_names.
-            - In particular, you are not to generate a similar name to that of those in `already_generated_names`, you are **not** building some kind of  logical sequence. Each name must be independent of the others.
-        ## Example
-          **Input:**
-              n: 6
-              already_generated_names: ["John Doe", "Jane Smith", "Alice Brown"]
-              context: "Young Americans of different liberal professions"
-          **Output:**
-              ["Michael Johnson", "Sarah Williams", "David Gates", "Jennifer Davis", "Robert J. Wilson", "Anna Kerr"]
-          Note that:
-            - The names are not in the list of already generated names.
-            - The ouputs consist only of a list of names, nothing else.
-            - The output length is exactly 6, which is the requested number of names. There could be a bit more names generated, but never less.
-        Args:
-            n (int): The number of names to generate.
-            already_generated_names (list): The list of already generated names.
-            context (str): The context in which the names are being generated. This can be used to guide the name generation, so that it is a realistic name for the context.
-        Returns:
-            list: A list of n unique full names for people. These names NEVER repeat names in the list of already generated names.
-        """
-        # the body of this method is handled by the @llm decorator. Below we provide a post-processing function that is
-        # applied to the LLM output, to ensure that the names are unique.
-        return lambda names: list(set(names))
-    @transactional()
-    def _aux_model_call(self, messages, temperature, frequency_penalty, presence_penalty):
-        """
-        Auxiliary method to make a model call. This is needed in order to be able to use the transactional decorator,
-        due too a technicality - otherwise, the agent creation would be skipped during cache reutilization, and
-        we don't want that.
-        """
-        return openai_utils.client().send_message(messages,
-                                                  temperature=temperature,
-                                                  frequency_penalty=frequency_penalty,
-                                                  presence_penalty=presence_penalty,
-                                                  response_format={"type": "json_object"})
-    def _generate_deep_persona_internal(self, initial_spec: dict) -> dict:
-        """
-        Performs a second API call to enrich the persona with a depth of 350 attributes.
-        """
-        logger.info(f"Enriching persona {initial_spec.get('name')} to deep persona (depth 350)...")
-        prompt = f"""
-        You are an expert persona generator. You have been provided with an initial persona profile:
-        {json.dumps(initial_spec, indent=4)}
-        TASK:
-        Take all the attributes from this initial profile and expand them significantly to reach a depth of 350 attributes/nuances.
-        The final profile must be incredibly detailed, authentic, and realistic.
-        Expand on every field: education, occupation, style, personality, preferences, beliefs, skills, behaviors, health, relationships, and other_facts.
-        Provide at least 50 detailed entries for each complex field (preferences, beliefs, other_facts).
-        Rules:
-        - Maintain consistency with the initial profile.
-        - Output ONLY a valid JSON object.
-        - Use the same field structure as the input.
-        """
-        messages = [
-            {"role": "system", "content": "You are a specialized system for creating ultra-deep, 350-attribute persona specifications."},
-            {"role": "user", "content": prompt}
-        ]
-        # Use the Helmholtz client via send_message
-        message = self._aux_model_call(messages=messages, temperature=1.2, frequency_penalty=0.0, presence_penalty=0.0)
-        if message is not None:
-            enriched_spec = utils.extract_json(message["content"])
-            return enriched_spec
-        return initial_spec
-    @transactional()
-    def _setup_agent(self, agent, configuration):
-        """
-        Sets up the agent with the necessary elements.
-        """
-        agent.include_persona_definitions(configuration)
-        # does not return anything, as we don't want to cache the agent object itself.
-    @transactional()
-    @utils.llm(temperature=0.3, frequency_penalty=-0.1, presence_penalty=-0.1, enable_json_output_format=False)
-    def _generate_name_for_sample(self, sample_characteristics: dict, already_generated_names: list) -> str:
-        """
-        Generates a single full name for a person based on their complete sample characteristics, such that
-        it is as appropriate as possible to all characteristics, not just gender.
-        This name MUST BE UNIQUE and not appear in the already_generated_names list, though variations of the
-        same name are allowed.
-        You must generate a realistic full name that is appropriate for the given sample characteristics.
-        Consider ALL the characteristics provided, including but not limited to:
-        - Gender
-        - Age or age range
-        - Country/nationality/ethnicity
-        - Socioeconomic status
-        - Profession
-        - Educational background
-        - Cultural background
-        - Any other relevant demographic or personal characteristics
-        The name should:
-        - BE UNIQUE and not appear in the already_generated_names list
-        - Be realistic and culturally appropriate for the characteristics
-        - Sound natural and not made-up
-        - Be unique and not appear in the already_generated_names list
-        - Reflect the person's likely background (e.g., names common in their generation, culture, social class)
-        If you need additional methods to ensure uniqueness, you can:
-        - Use longer or more uncommon names
-        - Include middle names or multiple surnames
-        - Use culturally appropriate name variations
-        - As a last resort, you can append a number, but this should be avoided.
-        In ANY CASE, you **must never**, NEVER, generate a name that already appears in the already_generated_names list.
-        Return only the full name as a string, nothing else.
-        ## Example
-        **Input:**
-            sample_characteristics: {
-                "gender": "female",
-                "age": 28,
-                "country": "Brazil",
-                "profession": "Software Engineer",
-                "socioeconomic_status": "middle class",
-                "education": "Computer Science degree"
-            }
-            already_generated_names: ["João Silva", "Maria Santos", "Ana Costa"]
-        **Output:**
-            "Camila Rodrigues"
-        Args:
-            sample_characteristics (dict): The complete characteristics of the sample, including demographics, profession, etc.
-            already_generated_names (list): The list of already generated names to avoid duplicates. The new name MUST NOT be in this list.
-        Returns:
-            str: A single full name appropriate for the sample characteristics.
-        """
-        # the body of this method is handled by the @llm decorator

+from gradio_client import Client
 import os
 import json
 import chevron
     def __init__(self, sampling_space_description:str=None, total_population_size:int=None, context:str=None, simulation_id:str=None):
         """
         Initialize a DeepPersonaFactory instance.
         """
         super().__init__(simulation_id)
+        self.person_prompt_template_path = os.path.join(os.path.dirname(__file__), 'prompts/deep_persona.mustache')
         self.context_text = context
         self.sampling_space_description = sampling_space_description
         self.population_size = total_population_size
         self.sampling_plan = None
         self.remaining_characteristics_sample = None
+        self.generated_minibios = []
         self.generated_names = []
+    def _parse_particularities_for_deeppersona(self, agent_particularities: str) -> dict:
         """
+        Parses the agent particularities string into the 10 fields required by DeepPersona.
         """
+        prompt = f"""
+        Parse the following agent description into a JSON object with these fields:
+        "name", "age", "gender", "occupation", "city", "country", "values", "attitude", "life_story", "interests".
+        Description:
+        {agent_particularities}
+        If a field is missing, provide a realistic default or leave it as "Unknown".
+        Return ONLY the JSON.
+        """
+        messages = [{"role": "system", "content": "You are a precise data extractor."},
+                    {"role": "user", "content": prompt}]
+        message = self._aux_model_call(messages, temperature=0.1, frequency_penalty=0.0, presence_penalty=0.0)
+        return utils.extract_json(message["content"]) if message else {}
+    def _generate_via_deeppersona(self, info: dict, attribute_count: int, context_from_step1: str = None) -> str:
         """
+        Calls the DeepPersona API on Hugging Face.
         """
+        try:
+            client = Client("THzva/deeppersona-experience")
+            age = info.get("age", 30)
+            if isinstance(age, str) and not age.isdigit():
+                age = 30
+            custom_values = info.get("values", "")
+            if context_from_step1:
+                custom_values = f"ENRICHMENT CONTEXT (Level 100):\n{context_from_step1}\n\nORIGINAL VALUES:\n{custom_values}"
+            result = client.predict(
+                    age=float(age),
+                    gender=str(info.get("gender", "Unknown")),
+                    occupation=str(info.get("occupation", "Professional")),
+                    city=str(info.get("city", "Unknown")),
+                    country=str(info.get("country", "Unknown")),
+                    custom_values=str(custom_values),
+                    custom_life_attitude=str(info.get("attitude", "Unknown")),
+                    life_story=str(info.get("life_story", "Unknown")),
+                    interests_hobbies=str(info.get("interests", "Unknown")),
+                    attribute_count=float(attribute_count),
+                    api_name="/generate_persona"
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Error calling DeepPersona API: {e}")
+            raise
+    def _structure_deeppersona_result(self, text_profile: str, base_info: dict) -> dict:
+        """
+        Converts the unstructured DeepPersona text profile into structured JSON format.
         """
+        prompt = f"""
+        Convert the following Markdown/Text persona profile into a structured JSON specification.
+        Text Profile:
+        {text_profile}
+        Base Info (use if missing in profile):
+        {json.dumps(base_info, indent=4)}
+        The JSON must include exactly these fields:
+        - name
+        - age
+        - gender
+        - nationality
+        - residence
+        - education
+        - long_term_goals
+        - occupation
+        - style
+        - personality (include at least 10 traits and Big-5)
+        - preferences (include at least 20 details)
+        - beliefs (include at least 30 details)
+        - skills
+        - behaviors
+        - health
+        - relationships
+        - other_facts (at least 30 entries)
+        Ensure it is valid JSON. Return ONLY the JSON.
+        """
+        messages = [{"role": "system", "content": "You are a JSON structuring assistant."},
+                    {"role": "user", "content": prompt}]
+        message = self._aux_model_call(messages, temperature=0.3, frequency_penalty=0.0, presence_penalty=0.0)
+        return utils.extract_json(message["content"]) if message else None
+    @transactional()
+    def _aux_model_call(self, messages, temperature, frequency_penalty, presence_penalty):
+        return openai_utils.client().send_message(messages,
+                                                  temperature=temperature,
+                                                  frequency_penalty=frequency_penalty,
+                                                  presence_penalty=presence_penalty,
+                                                  response_format={"type": "json_object"})
     def generate_person(self,
                         agent_particularities:str=None,
                         attempts:int=10,
                         post_processing_func=None,
                         deep_persona:bool=True) -> DeepPersona:
         logger.debug(f"Starting the person generation based these particularities: {agent_particularities}")
         fresh_agent_name = None
         if self.population_size is not None:
             with concurrent_agent_generataion_lock:
                 if self.remaining_characteristics_sample is None:
                     self.initialize_sampling_plan()
             with concurrent_agent_generataion_lock:
                 if len(self.remaining_characteristics_sample) == 0:
                     return None
                 else:
                     sampled_characteristics = self.remaining_characteristics_sample.pop()
             if agent_particularities is not None:
+                agent_particularities = f"Primary: {agent_particularities}. Sampled: {json.dumps(sampled_characteristics)}"
             else:
+                agent_particularities = json.dumps(sampled_characteristics)
+        else:
             with concurrent_agent_generataion_lock:
                 fresh_agent_name = self._unique_full_name(already_generated_names=DeepPersonaFactory._all_used_and_precomputed_names(),
                                                         context=self.context_text)
             if agent_particularities is not None:
+                agent_particularities = f"Primary: {agent_particularities}. Name: {fresh_agent_name}"
             else:
                 agent_particularities = f"Full name: {fresh_agent_name}"
         def aux_generate(attempt):
+            try:
+                base_info = self._parse_particularities_for_deeppersona(agent_particularities)
+                logger.info(f"DeepPersona Step 1: Generating profile at depth 100...")
+                profile_step1 = self._generate_via_deeppersona(base_info, attribute_count=100)
+                logger.info(f"DeepPersona Step 2: Enriching profile to depth 350...")
+                profile_step2 = self._generate_via_deeppersona(base_info, attribute_count=350, context_from_step1=profile_step1)
+                logger.info(f"Structuring DeepPersona output into JSON...")
+                result = self._structure_deeppersona_result(profile_step2, base_info)
+                if result and not self._is_name_already_assigned(result.get("name", "")):
                     return result
+            except Exception as e:
+                logger.error(f"DeepPersona generation failed: {e}")
+            return None
         agent_spec = None
         attempt = 0
         while agent_spec is None and attempt < attempts:
+            attempt += 1
+            agent_spec = aux_generate(attempt=attempt)
         if agent_spec is not None:
             with concurrent_agent_generataion_lock:
                 person = DeepPersona(agent_spec["name"])
                 self._setup_agent(person, agent_spec)
                 self.generated_names.append(person.get("name"))
             return person
+        return None
+    @transactional()
+    def _setup_agent(self, agent, configuration):
+        agent.include_persona_definitions(configuration)
+    @transactional()
+    def _unique_full_name(self, already_generated_names: list, context:str=None) -> str:
+        return self._aux_unique_full_name(already_generated_names=already_generated_names, context=context)
+    @utils.llm(temperature=1.5, presence_penalty=0.5, frequency_penalty=0.5)
+    def _aux_unique_full_name(self, already_generated_names: list, context:str=None) -> str:
+        pass
+    @classmethod
+    def _all_used_and_precomputed_names(cls) -> list:
+        return DeepPersona.all_agents_names() + cls.all_unique_names
+    def _is_name_already_assigned(self, name:str) -> bool:
+        return name in DeepPersona.all_agents_names()
     def generate_people(self, number_of_people:int=None,
                         agent_particularities:str=None,
                         parallelize=None,
                         verbose:bool=False,
                         deep_persona:bool=True) -> list:
         if number_of_people is None:
             number_of_people = self.population_size
         people = []
+        for i in range(number_of_people):
             person = self.generate_person(agent_particularities=agent_particularities,
                                         attempts=attempts,
                                         post_processing_func=post_processing_func,
                                         deep_persona=deep_persona)
+            if person:
                 people.append(person)
         return people
     def initialize_sampling_plan(self):
+        self.remaining_characteristics_sample = []

deeppersona/factory/deep_persona_factory_base.py CHANGED Viewed

@@ -6,12 +6,10 @@ import deeppersona.utils as utils
 class DeepPersonaFactoryBase:
     """
-    A base class for various types of factories. This is important because it makes it easier to extend the system, particularly
-    regarding transaction caching.
     """
     # common randomizer used for samplings, with a default initial seed to allow for reproducibility.
-    # subclases can use this directly as well.
     randomizer = random.Random(42)
     # A dict of all factories created so far.
@@ -19,12 +17,9 @@ class DeepPersonaFactoryBase:
     def __init__(self, simulation_id:str=None) -> None:
         """
-        Initialize a DeepPersonaFactory instance.
-        Args:
-            simulation_id (str, optional): The ID of the simulation. Defaults to None.
         """
-        self.name = f"Factory {utils.fresh_id(self.__class__.__name__)}" # we need a name, but no point in making it customizable
         self.simulation_id = simulation_id
         DeepPersonaFactoryBase.add_factory(self)
@@ -34,20 +29,12 @@ class DeepPersonaFactoryBase:
     @staticmethod
     def set_simulation_for_free_factories(simulation):
-        """
-        Sets the simulation if it is None. This allows free environments to be captured by specific simulation scopes
-        if desired.
-        """
         for factory in DeepPersonaFactoryBase.all_factories.values():
             if factory.simulation_id is None:
                 simulation.add_factory(factory)
     @staticmethod
     def add_factory(factory):
-        """
-        Adds a factory to the list of all factories. Factory names must be unique,
-        so if an factory with the same name already exists, an error is raised.
-        """
         if factory.name in DeepPersonaFactoryBase.all_factories:
             raise ValueError(f"Factory names must be unique, but '{factory.name}' is already defined.")
         else:
@@ -55,42 +42,19 @@ class DeepPersonaFactoryBase:
     @classmethod
     def clear_factories(cls):
-        """
-        Clears the global list of all factories.
-        """
         cls.all_factories = {}
         cls._clear_factories()
     @classmethod
     def _clear_factories(cls):
-        """
-        Additional cleanup actions can be performed here by subclasses if needed.
-        """
         pass
-    ################################################################################################
-    # Caching mechanisms
-    #
-    # Factories can also be cached in a transactional way. This is necessary because the agents they
-    # generate can be cached, and we need to ensure that the factory itself is also cached in a
-    # consistent way.
-    ################################################################################################
     def encode_complete_state(self) -> dict:
-        """
-        Encodes the complete state of the factory. If subclasses have elmements that are not serializable, they should override this method.
-        """
         state = copy.deepcopy(self.__dict__)
         return state
     def decode_complete_state(self, state:dict):
-        """
-        Decodes the complete state of the factory. If subclasses have elmements that are not serializable, they should override this method.
-        """
         state = copy.deepcopy(state)
         self.__dict__.update(state)
         return self

 class DeepPersonaFactoryBase:
     """
+    A base class for various types of factories.
     """
     # common randomizer used for samplings, with a default initial seed to allow for reproducibility.
     randomizer = random.Random(42)
     # A dict of all factories created so far.
     def __init__(self, simulation_id:str=None) -> None:
         """
+        Initialize a DeepPersonaFactoryBase instance.
         """
+        self.name = f"Factory {utils.fresh_id(self.__class__.__name__)}"
         self.simulation_id = simulation_id
         DeepPersonaFactoryBase.add_factory(self)
     @staticmethod
     def set_simulation_for_free_factories(simulation):
         for factory in DeepPersonaFactoryBase.all_factories.values():
             if factory.simulation_id is None:
                 simulation.add_factory(factory)
     @staticmethod
     def add_factory(factory):
         if factory.name in DeepPersonaFactoryBase.all_factories:
             raise ValueError(f"Factory names must be unique, but '{factory.name}' is already defined.")
         else:
     @classmethod
     def clear_factories(cls):
         cls.all_factories = {}
         cls._clear_factories()
     @classmethod
     def _clear_factories(cls):
         pass
     def encode_complete_state(self) -> dict:
         state = copy.deepcopy(self.__dict__)
         return state
     def decode_complete_state(self, state:dict):
         state = copy.deepcopy(state)
         self.__dict__.update(state)
         return self

deeppersona/profiling.py CHANGED Viewed

@@ -75,7 +75,7 @@ class Profiler:
         for agent in agents:
             if isinstance(agent, DeepPersona):
                 # Extract data from DeepPersona object
-                agent_data = self._extract_deeppersona_data(agent)
             else:
                 agent_data = agent.copy()
@@ -83,7 +83,7 @@ class Profiler:
         return processed_agents
-    def _extract_deeppersona_data(self, agent: DeepPersona) -> Dict[str, Any]:
         """Extract comprehensive data from a DeepPersona object."""
         data = {}

         for agent in agents:
             if isinstance(agent, DeepPersona):
                 # Extract data from DeepPersona object
+                agent_data = self._extract_tinyperson_data(agent)
             else:
                 agent_data = agent.copy()
         return processed_agents
+    def _extract_tinyperson_data(self, agent: DeepPersona) -> Dict[str, Any]:
         """Extract comprehensive data from a DeepPersona object."""
         data = {}

deeppersona/simulation_manager.py CHANGED Viewed

@@ -4,7 +4,7 @@ import threading
 from datetime import datetime
 from deeppersona.agent import DeepPersona
 from deeppersona.social_network import NetworkTopology
-from deeppersona.environment.social_deep_world import SocialDeepWorld, SimulationResult
 from deeppersona.agent.social_types import Content
 from deeppersona.ml_models import EngagementPredictor
 from deeppersona.content_generation import ContentVariantGenerator
@@ -18,7 +18,7 @@ class SimulationConfig:
         self.user_id = kwargs.get("user_id")
 class Simulation:
-    def __init__(self, id: str, config: SimulationConfig, world: SocialDeepWorld, personas: List[DeepPersona], network: NetworkTopology):
         self.id = id
         self.config = config
         self.world = world
@@ -43,7 +43,7 @@ class SimulationManager:
         if focus_group_name and focus_group_name in self.focus_groups:
             personas = self.focus_groups[focus_group_name]
         else:
-            from deeppersona.factory import DeepPersonaFactory
             factory = DeepPersonaFactory(
                 context=config.name,
                 total_population_size=config.persona_count
@@ -58,7 +58,7 @@ class SimulationManager:
             network = net_gen.generate_small_world_network(config.persona_count, 4, 0.1)
         # Create world
-        world = SocialDeepWorld(config.name, network=network)
         for persona in personas:
             world.add_agent(persona)

 from datetime import datetime
 from deeppersona.agent import DeepPersona
 from deeppersona.social_network import NetworkTopology
+from deeppersona.environment.social_deep_world import SocialTinyWorld, SimulationResult
 from deeppersona.agent.social_types import Content
 from deeppersona.ml_models import EngagementPredictor
 from deeppersona.content_generation import ContentVariantGenerator
         self.user_id = kwargs.get("user_id")
 class Simulation:
+    def __init__(self, id: str, config: SimulationConfig, world: SocialTinyWorld, personas: List[DeepPersona], network: NetworkTopology):
         self.id = id
         self.config = config
         self.world = world
         if focus_group_name and focus_group_name in self.focus_groups:
             personas = self.focus_groups[focus_group_name]
         else:
+            from deeppersona.factory.deep_persona_factory import DeepPersonaFactory
             factory = DeepPersonaFactory(
                 context=config.name,
                 total_population_size=config.persona_count
             network = net_gen.generate_small_world_network(config.persona_count, 4, 0.1)
         # Create world
+        world = SocialTinyWorld(config.name, network=network)
         for persona in personas:
             world.add_agent(persona)

deeppersona/utils/config.py CHANGED Viewed

@@ -59,7 +59,7 @@ def pretty_print_datetime():
     print(f"Current date and time (local): {now.strftime('%Y-%m-%d %H:%M:%S')}")
     print(f"Current date and time (UTC):   {now_utc.strftime('%Y-%m-%d %H:%M:%S')}")
-def pretty_print_deeppersona_version():
     try:
         import importlib.metadata
         version = importlib.metadata.version("deeppersona")

     print(f"Current date and time (local): {now.strftime('%Y-%m-%d %H:%M:%S')}")
     print(f"Current date and time (UTC):   {now_utc.strftime('%Y-%m-%d %H:%M:%S')}")
+def pretty_print_tinytroupe_version():
     try:
         import importlib.metadata
         version = importlib.metadata.version("deeppersona")

pyproject.toml CHANGED Viewed

@@ -3,11 +3,11 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools]
-packages = ["tinytroupe"]
 include-package-data = true
 [project]
-name = "tinytroupe"
 version = "0.5.2"
 authors = [
   { name="Paulo Salem", email="paulo.salem@microsoft.com" }
@@ -41,7 +41,7 @@ dependencies = [
 ]
 [project.urls]
-"Homepage" = "https://github.com/microsoft/tinytroupe"
 [tool.pytest.ini_options]
 pythonpath = [
@@ -56,4 +56,4 @@ markers = [
   "examples: mark a test as the execution of examples",
   "notebooks: mark a test as a more specific Jupyter notebook execution example",
 ]
-addopts = "--cov=tinytroupe --cov-report=html --cov-report=xml"

 build-backend = "setuptools.build_meta"
 [tool.setuptools]
+packages = ["deeppersona"]
 include-package-data = true
 [project]
+name = "deeppersona"
 version = "0.5.2"
 authors = [
   { name="Paulo Salem", email="paulo.salem@microsoft.com" }
 ]
 [project.urls]
+"Homepage" = "https://github.com/microsoft/deeppersona"
 [tool.pytest.ini_options]
 pythonpath = [
   "examples: mark a test as the execution of examples",
   "notebooks: mark a test as a more specific Jupyter notebook execution example",
 ]
+addopts = "--cov=deeppersona --cov-report=html --cov-report=xml"

requirements.txt CHANGED Viewed

@@ -20,8 +20,6 @@ matplotlib
 pydantic
 textdistance
 scipy
-transformers
 huggingface-hub>=0.33.5
 gradio_client
-fastapi
-uvicorn

 pydantic
 textdistance
 scipy
+transformers==4.38.2
 huggingface-hub>=0.33.5
 gradio_client