Spaces:

alter-ego-app
/

alter_ego_api_test

Sleeping

App Files Files Community

igiuseppe commited on May 30, 2025

Commit

4bb354c

1 Parent(s): 2194a0e

using litellm - for now shuffle disabled

Browse files

Files changed (7) hide show

.gitignore +2 -0
core.py +12 -36
core_dt.py +1 -1
eval/compare.py +1 -1
prompts.py +38 -5
requirements.txt +2 -1
utils.py +53 -25

.gitignore CHANGED Viewed

@@ -5,3 +5,5 @@ interview_example.txt
 eval/results/
 eval/__pycache__/
 eval/synthetic/

 eval/results/
 eval/__pycache__/
 eval/synthetic/
+test_lite_llm.py
+test_gemini.py

core.py CHANGED Viewed

@@ -12,7 +12,8 @@ from prompts import (
     GENERATE_REPORT_PROMPT,
     CHAT_WITH_REPORT_PROMPT,
     GENERATE_AUDIENCE_NAME_PROMPT,
-    persona_schema
 )
 logger = logging.getLogger(__name__)
@@ -35,7 +36,7 @@ def generate_user_parameters(audience: str, scope: str,n:int=24) -> List[str]:
     class Response(BaseModel):
         additional_parameters: list[str]
-    response = call_llm(prompt=prompt, response_format=Response,model="gpt-4o-mini",temperature=0)
     additional_parameters = json.loads(response)["additional_parameters"]
     return standard_parameters + additional_parameters
@@ -67,14 +68,12 @@ def generate_synthetic_personas(num_personas: int, audience: str, previous_perso
     all_new_personas = []
     max_iterations = 5 # Safety break to prevent infinite loops
     current_iteration = 0
-    response_format = persona_schema
     while len(all_new_personas) < num_personas and current_iteration < max_iterations:
         current_iteration += 1
         needed_personas = num_personas - len(all_new_personas)
         logger.info(f"Iteration {current_iteration}/{max_iterations}: Requesting {needed_personas} more personas (Total needed: {num_personas}, Have: {len(all_new_personas)})...")
         # Combine original previous_personas with those generated in this function's previous iterations
         current_context_personas = (previous_personas or []) + all_new_personas
@@ -91,7 +90,7 @@ def generate_synthetic_personas(num_personas: int, audience: str, previous_perso
             prompt += build_previous_personas_context(current_context_personas) # Appends the formatted list
         try:
-            response_str = call_llm(prompt=prompt, response_format=response_format,temperature=1, model="gpt-4.1-mini")
             response_data = json.loads(response_str)
             users_list = response_data.get("users_personas", [])
@@ -129,7 +128,7 @@ def ask_single_question_to_persona(persona: dict, question: str) -> str:
             persona=persona,
             question=question
         )
-        answer = call_llm(prompt=prompt,temperature=0, model="gpt-4.1-nano")
         return answer
     except Exception as e:
         logger.error(f"Error asking question '{question}' to persona {persona.get('Name', 'Unknown')}: {e}")
@@ -138,30 +137,7 @@ def ask_single_question_to_persona(persona: dict, question: str) -> str:
 def ask_all_questions_to_persona(persona: dict, questions: List[str]) -> str:
     """Asks a single question to a single persona and returns the answer."""
-    response_format = {
-        "type": "json_schema",
-        "json_schema": {
-                "name": "answers_list",
-                "schema": {
-                    "type": "object",
-                    "properties": {
-                    "answers": {
-                        "type": "array",
-                        "description": f"A list of answers to questions, with exactly {len(questions)} elements.",
-                        "items": {
-                        "type": "string",
-                        "description": "Each answer corresponding to a question."
-                        }
-                    }
-                    },
-                    "required": [
-                    "answers"
-                    ],
-                    "additionalProperties": False
-                },
-                "strict": True
-            }
-    }
     try:
         prompt = ASK_QUESTIONS_TO_PERSONA_PROMPT.format(
@@ -169,7 +145,7 @@ def ask_all_questions_to_persona(persona: dict, questions: List[str]) -> str:
             questions=questions,
             num_questions=len(questions)
         )
-        response_str = call_llm(prompt=prompt,temperature=0.5, model="gpt-4.1-mini",response_format=response_format)
         response_data = json.loads(response_str)
         answers = response_data.get("answers", [])
         return answers
@@ -286,7 +262,7 @@ def generate_report(questions,fleet,scope) -> str:
         content=content,
         scope=scope
     )
-    report_text = call_llm(prompt=prompt,model="gpt-4.1-mini",temperature=0)
     return report_text
@@ -315,7 +291,7 @@ def chat_with_persona(persona: dict, question: str, conversation_history: List[d
     )
     if conversation_history:
         prompt += f"\nHere you have the previous conversation, make sure to answer the question in a way that is consistent with it:\n{history_context}"
-    return call_llm(prompt=prompt,temperature=0.5, model="gpt-4.1-mini")
 def chat_with_report(users: List[dict], question: str, questions: List[str]) -> str:
     """
@@ -336,7 +312,7 @@ def chat_with_report(users: List[dict], question: str, questions: List[str]) ->
         content=content,
         question=question
     )
-    return call_llm(prompt=prompt,temperature=0, model="gpt-4.1-nano")
 def generate_audience_name(audience: str, scope: str) -> str:
     """
@@ -353,4 +329,4 @@ def generate_audience_name(audience: str, scope: str) -> str:
         audience=audience,
         scope=scope
     )
-    return call_llm(prompt=prompt,model="gpt-4.1-nano",temperature=0)

     GENERATE_REPORT_PROMPT,
     CHAT_WITH_REPORT_PROMPT,
     GENERATE_AUDIENCE_NAME_PROMPT,
+    persona_schema,
+    answers_schema
 )
 logger = logging.getLogger(__name__)
     class Response(BaseModel):
         additional_parameters: list[str]
+    response = call_llm(prompt=prompt, response_format=Response,model_type="mid",temperature=0)
     additional_parameters = json.loads(response)["additional_parameters"]
     return standard_parameters + additional_parameters
     all_new_personas = []
     max_iterations = 5 # Safety break to prevent infinite loops
     current_iteration = 0
     while len(all_new_personas) < num_personas and current_iteration < max_iterations:
         current_iteration += 1
         needed_personas = num_personas - len(all_new_personas)
         logger.info(f"Iteration {current_iteration}/{max_iterations}: Requesting {needed_personas} more personas (Total needed: {num_personas}, Have: {len(all_new_personas)})...")
+        response_format = persona_schema(needed_personas)
         # Combine original previous_personas with those generated in this function's previous iterations
         current_context_personas = (previous_personas or []) + all_new_personas
             prompt += build_previous_personas_context(current_context_personas) # Appends the formatted list
         try:
+            response_str = call_llm(prompt=prompt, response_format=response_format,temperature=1, model_type="mid",shuffle=False)
             response_data = json.loads(response_str)
             users_list = response_data.get("users_personas", [])
             persona=persona,
             question=question
         )
+        answer = call_llm(prompt=prompt,temperature=0, model_type="low",shuffle=False)
         return answer
     except Exception as e:
         logger.error(f"Error asking question '{question}' to persona {persona.get('Name', 'Unknown')}: {e}")
 def ask_all_questions_to_persona(persona: dict, questions: List[str]) -> str:
     """Asks a single question to a single persona and returns the answer."""
+    response_format = answers_schema(len(questions))
     try:
         prompt = ASK_QUESTIONS_TO_PERSONA_PROMPT.format(
             questions=questions,
             num_questions=len(questions)
         )
+        response_str = call_llm(prompt=prompt,temperature=0.5, model_type="mid",response_format=response_format,shuffle=False)
         response_data = json.loads(response_str)
         answers = response_data.get("answers", [])
         return answers
         content=content,
         scope=scope
     )
+    report_text = call_llm(prompt=prompt,model_type="mid",temperature=0)
     return report_text
     )
     if conversation_history:
         prompt += f"\nHere you have the previous conversation, make sure to answer the question in a way that is consistent with it:\n{history_context}"
+    return call_llm(prompt=prompt,temperature=0.5, model_type="mid",shuffle=False)
 def chat_with_report(users: List[dict], question: str, questions: List[str]) -> str:
     """
         content=content,
         question=question
     )
+    return call_llm(prompt=prompt,temperature=0, model_type="low")
 def generate_audience_name(audience: str, scope: str) -> str:
     """
         audience=audience,
         scope=scope
     )
+    return call_llm(prompt=prompt,model_type="low",temperature=0)

core_dt.py CHANGED Viewed

@@ -84,7 +84,7 @@ The result should be in plain text.
 Here is the text:
 {agent_particularities}
 """
-    return call_llm(prompt=prompt)
 def generate_new_memory(n,person):
     return person.retrieve_recent_memories(include_omission_info=False)[-n:]

 Here is the text:
 {agent_particularities}
 """
+    return call_llm(prompt=prompt,model_type="mid",temperature=0.5)
 def generate_new_memory(n,person):
     return person.retrieve_recent_memories(include_omission_info=False)[-n:]

eval/compare.py CHANGED Viewed

@@ -104,7 +104,7 @@ The structure of your output must be a simple list of insights.
 """
     logger.info(f"Extracting insights for {audience_type} audience...")
-    insights = call_llm(prompt=prompt,temperature=0, model="gpt-4.1-mini")
     logger.info(f"Successfully extracted insights for {audience_type} audience")
     return insights

 """
     logger.info(f"Extracting insights for {audience_type} audience...")
+    insights = call_llm(prompt=prompt,temperature=0, model_type="mid")
     logger.info(f"Successfully extracted insights for {audience_type} audience")
     return insights

prompts.py CHANGED Viewed

@@ -276,7 +276,8 @@ Create a very concise name (max one sentence) that captures the essence of this
 Respond with ONLY the name, nothing else.
 """
-persona_schema={
     "type": "json_schema",
     "json_schema": {
         "name": "user_personas_response",
@@ -285,10 +286,10 @@ persona_schema={
             "properties": {
                             "users_personas": {
                                 "type": "array",
-                                "description": "An array of synthetic user personas.",
                                 "items": {
                                     "type": "object",
-                                    "description": "A single synthetic user persona defined by 20 key parameters.",
                                     "properties": {
                                         "Name": {
                                             "type": "string",
@@ -513,7 +514,9 @@ persona_schema={
                                         "general_interests_and_hobbies"
                                     ],
                                     "additionalProperties": False
-                                }
                             }
             },
             "required": ["users_personas"],
@@ -521,4 +524,34 @@ persona_schema={
         },
         "strict": True
     }
-}

 Respond with ONLY the name, nothing else.
 """
+def persona_schema(n):
+    persona_schema={
     "type": "json_schema",
     "json_schema": {
         "name": "user_personas_response",
             "properties": {
                             "users_personas": {
                                 "type": "array",
+                                "description": f"An array of exactly {n} synthetic user personas.",
                                 "items": {
                                     "type": "object",
+                                    "description": "A single synthetic user persona defined by the following key parameters.",
                                     "properties": {
                                         "Name": {
                                             "type": "string",
                                         "general_interests_and_hobbies"
                                     ],
                                     "additionalProperties": False
+                                },
+                                "minItems": n,
+                                "maxItems": n
                             }
             },
             "required": ["users_personas"],
         },
         "strict": True
     }
+    }
+    return persona_schema
+def answers_schema(n):
+    answers_schema={
+        "type": "json_schema",
+        "json_schema": {
+                "name": "answers_list",
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                    "answers": {
+                        "type": "array",
+                        "description": f"A list of answers to questions, with exactly {n} elements.",
+                        "items": {
+                        "type": "string",
+                        "description": "Each answer corresponding to a question."
+                        },
+                        "minItems": n,
+                        "maxItems": n
+                    }
+                    },
+                    "required": [
+                    "answers"
+                    ],
+                    "additionalProperties": False
+                },
+                "strict": True
+            }
+    }
+    return answers_schema

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ requests==2.32.3
 gradio==5.23.2
 fastapi==0.115.12
 uvicorn==0.34.0
-git+https://github.com/igiuseppe/TinyTroupeFork.git

 gradio==5.23.2
 fastapi==0.115.12
 uvicorn==0.34.0
+git+https://github.com/igiuseppe/TinyTroupeFork.git
+litellm==1.71.2

utils.py CHANGED Viewed

@@ -1,29 +1,57 @@
-import openai
-LLM_MODEL = "gpt-4.1-nano"
-temperature = 0.5
-frequency_penalty=0
-presence_penalty=0
-top_p=0
-def call_llm(prompt: str, response_format=None, model=LLM_MODEL,temperature=temperature,frequency_penalty=frequency_penalty,presence_penalty=presence_penalty,top_p=top_p) -> str:
-    client = openai.OpenAI()
     if response_format:
-        response = client.beta.chat.completions.parse(
-            model=model,
-            messages=[{"role": "user", "content": prompt}],
-            response_format=response_format,
-            temperature=temperature,
-            frequency_penalty=frequency_penalty,
-            presence_penalty=presence_penalty,
-            top_p=top_p
-        )
     else:
-        response = client.chat.completions.create(
-            model=model,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=temperature,
-            frequency_penalty=0.0,
-            presence_penalty=0.0
-        )
-    return response.choices[0].message.content

+from litellm import completion, _turn_on_debug
+from dotenv import load_dotenv
+import random
+import logging
+load_dotenv()
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+models_low=["gemini/gemini-2.0-flash","openai/gpt-4.1-nano"]
+models_mid=["gemini/gemini-2.5-flash-preview-05-20","openai/gpt-4.1-mini"]
+models_high=["gemini/gemini-2.5-pro-preview-05-06","openai/gpt-4.1"]
+model_low="openai/gpt-4.1-nano"
+model_mid="openai/gpt-4.1-mini"
+model_high="openai/gpt-4.1"
+def call_llm(prompt: str, temperature: float,model_type: str,response_format=None,tools=None,shuffle=False,return_tokens=False) -> str:
+    if shuffle:
+        if model_type=="low":
+            model = random.choice(models_low)
+        elif model_type=="mid":
+            model = random.choice(models_mid)
+        elif model_type=="high":
+            model = random.choice(models_high)
+        logger.info(f"SHUFFLE. Using model: {model}")
+    else:
+        if model_type=="low":
+            model = model_low
+        elif model_type=="mid":
+            model = model_mid
+        elif model_type=="high":
+            model = model_high
+    messages=[
+        {"role": "user", "content": prompt},
+    ]
+    completion_args = {
+        "model": model,
+        "messages": messages,
+        "temperature": temperature
+    }
     if response_format:
+        completion_args["response_format"] = response_format
+    if tools:
+        completion_args["tools"] = tools
+    response = completion(**completion_args)
+    response_str = response.choices[0].message.content
+    if return_tokens:
+        output_tokens = response.usage.completion_tokens
+        input_tokens = response.usage.prompt_tokens
+        return response_str,input_tokens,output_tokens
     else:
+        return response_str