Spaces:
Sleeping
Sleeping
File size: 4,992 Bytes
433f3f1 dc3dc12 433f3f1 10940f4 433f3f1 dc3dc12 433f3f1 dc3dc12 d42c36e dc3dc12 433f3f1 dc3dc12 433f3f1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | import os
import json
from config import OPENAI_CHAT_MODEL
from openai import OpenAI
import json
from supabase_ie import load_user_info
from Classification_parameters import TOPIC_DESCRIPTIONS, RESPONSE_MODE_DESCRIPTIONS, TRACKED_FIELDS, FIELD_DESCRIPTIONS, ALLOWED_STORY_TOPICS
client = OpenAI()
allowed_str = ", ".join(ALLOWED_STORY_TOPICS)
# ==============================
# LLM Classification
# ==============================
def call_classification_llm(query: str, recent_history: list, user_info: dict, missing_fields: list):
"""
Calls the OpenAI model once to:
- Extract user info ONLY from the latest query
- Classify topic, response_mode, emotion using query + recent history
- Detect if external news fetch is required (needs_news_fetch, news_topic, news_question)
Returns a JSON object.
"""
# Build schema but treat lists/JSON specially
schema_fields = {}
for field in TRACKED_FIELDS:
if field == "kids":
schema_fields[field] = [{"name": "", "age": ""}]
elif field in ["hobbies", "interests", "sports_played", "sports_watched"]:
schema_fields[field] = []
else:
schema_fields[field] = ""
schema_json = json.dumps({
"extracted_user_info": schema_fields,
"topic": "",
"response_mode": "",
"emotion": "",
"topic_for_story": "",
"needs_news_fetch": False,
"news_topic": [],
"news_question": ""
}, indent=2)
system_prompt = f"""
You are Socrates, analyzing a new user query.
You must return your response strictly in JSON, with no extra text.
The JSON must follow this schema exactly:
{schema_json}
Rules:
- extracted_user_info:
* Derive ONLY from the latest user query (ignore history & assistant replies).
* kids: must be an array of objects with "name" and "age".
Example: [{{"name":"Demian","age":12}}, {{"name":"Selene","age":5}}]
* hobbies, interests, sports_played, sports_watched: must be an array of strings.
Example: ["surfing","football"]
* all other fields: strings ("" if unknown).
* Leave fields empty if not present in the latest query.
- topic, response_mode, emotion:
* Use both the latest query AND recent history for classification.
* topic: one of {list(TOPIC_DESCRIPTIONS.keys())}
* response_mode: one of {list(RESPONSE_MODE_DESCRIPTIONS.keys())}
* emotion: one of ["curious","happy","sad","angry","neutral"]
- topic_for_story:
* Choose exactly one label from this list (closed set):
[{allowed_str}]
* Map common paraphrases and synonyms to the correct label:
luck, lucky, bad luck, good luck, fate, chance -> fortune
courage, bravery, grit, resilience, resolve -> mental_strength
bullying, bullies, being mocked/harassed -> bullying
fear, phobia, anxiety, panic -> phobias
friend, friendship, companions -> friendship
love, romance, relationship, breakup -> love
sex, intimacy, desire -> sex
meaning, purpose, "what is the point" -> meaning_of_life
confidence, self-confidence, self-belief -> confidence
* If a synonym clearly matches, DO NOT return "none"
* Return "none" if no fitting label is clearly supported.
* Never invent new labels.
- needs_news_fetch:
* true if the user is clearly asking for news, current events, or external information lookup.
* false otherwise.
- news_topic:
* If needs_news_fetch=true → return a JSON array of 2–3 concise topic labels (e.g. ["US Open", "men's final", "tennis"]).
* If false → return [].
- news_question:
* If needs_news_fetch=true → restate the user’s request as a clear research question.
Example: "Who is going to play the US Open final?"
* If false → return "".
- Do not add any explanation or text outside the JSON.
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query},
{"role": "assistant", "content": f"Recent history: {recent_history}"},
{"role": "assistant", "content": f"Known user info: {user_info}"},
{"role": "assistant", "content": f"Missing fields to track: {missing_fields}"}
]
resp = client.chat.completions.create(
model=OPENAI_CHAT_MODEL,
messages=messages,
temperature=0.2
)
raw_output = resp.choices[0].message.content
try:
parsed = json.loads(raw_output)
return parsed
except json.JSONDecodeError:
print("⚠️ Could not parse LLM output as JSON:", raw_output)
return {
"extracted_user_info": schema_fields,
"topic": "",
"response_mode": "",
"emotion": "",
"topic_for_story": "none",
"needs_news_fetch": False,
"news_topic": [],
"news_question": ""
}
|