Spaces:

alesamodio
/

Socrates_docker

Sleeping

File size: 4,992 Bytes

import os
import json
from config import OPENAI_CHAT_MODEL
from openai import OpenAI
import json
from supabase_ie import load_user_info
from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS
client = OpenAI()

allowed_str = ", ".join(ALLOWED_STORY_TOPICS)

# ==============================
# LLM Classification
# ==============================
def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list):
    """
    Calls the OpenAI model once to:
    - Extract user info ONLY from the latest query
    - Classify topic, response_mode, emotion using query + recent history
    - Detect if external news fetch is required (needs_news_fetch, news_topic, news_question)
    Returns a JSON object.
    """

    # Build schema but treat lists/JSON specially
    schema_fields = {}
    for field in TRACKED_FIELDS:
        if field == "kids":
            schema_fields[field] = [{"name": "", "age": ""}]
        elif field in ["hobbies", "interests", "sports_played", "sports_watched"]:
            schema_fields[field] = []
        else:
            schema_fields[field] = ""

    schema_json = json.dumps({
        "extracted_user_info": schema_fields,
        "topic": "",
        "response_mode": "",
        "emotion": "",
        "topic_for_story": "",
        "needs_news_fetch": False,
        "news_topic": [],
        "news_question": ""
    }, indent=2)

    system_prompt = f"""
    You are Socrates, analyzing a new user query.

    You must return your response strictly in JSON, with no extra text.
    The JSON must follow this schema exactly:

    {schema_json}

    Rules:
    - extracted_user_info:
      * Derive ONLY from the latest user query (ignore history & assistant replies).
      * kids: must be an array of objects with "name" and "age".
        Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}]
      * hobbies, interests, sports_played, sports_watched: must be an array of strings.
        Example: ["surfing","football"]
      * all other fields: strings ("" if unknown).
      * Leave fields empty if not present in the latest query.

    - topic, response_mode, emotion:
      * Use both the latest query AND recent history for classification.
      * topic: one of {list(TOPIC_DESCRIPTIONS.keys())}
      * response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())}
      * emotion: one of ["curious","happy","sad","angry","neutral"]
    
    - topic_for_story:
      * Choose exactly one label from this list (closed set):
        [{allowed_str}]
      * Map common paraphrases and synonyms to the correct label:
      luck, lucky, bad luck, good luck, fate, chance -> fortune
      courage, bravery, grit, resilience, resolve -> mental_strength
      bullying, bullies, being mocked/harassed -> bullying
      fear, phobia, anxiety, panic -> phobias
      friend, friendship, companions -> friendship
      love, romance, relationship, breakup -> love
      sex, intimacy, desire -> sex
      meaning, purpose, "what is the point" -> meaning_of_life
      confidence, self-confidence, self-belief -> confidence
      * If a synonym clearly matches, DO NOT return "none"
      * Return "none" if no fitting label is clearly supported.
      * Never invent new labels.

    - needs_news_fetch:
      * true if the user is clearly asking for news, current events, or external information lookup.
      * false otherwise.

    - news_topic:
      * If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]).
      * If false → return [].

    - news_question:
      * If needs_news_fetch=true → restate the user’s request as a clear research question.
        Example: "Who is going to play the US Open final?"
      * If false → return "".

    - Do not add any explanation or text outside the JSON.
    """

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": query},
        {"role": "assistant", "content": f"Recent history: {recent_history}"},
        {"role": "assistant", "content": f"Known user info: {user_info}"},
        {"role": "assistant", "content": f"Missing fields to track: {missing_fields}"}
    ]

    resp = client.chat.completions.create(
        model=OPENAI_CHAT_MODEL,
        messages=messages,
        temperature=0.2
    )

    raw_output = resp.choices[0].message.content

    try:
        parsed = json.loads(raw_output)
        return parsed
    except json.JSONDecodeError:
        print("⚠️ Could not parse LLM output as JSON:", raw_output)
        return {
            "extracted_user_info": schema_fields,
            "topic": "",
            "response_mode": "",
            "emotion": "",
            "topic_for_story": "none",
            "needs_news_fetch": False,
            "news_topic": [],
            "news_question": ""
        }