Spaces:
Sleeping
Sleeping
File size: 2,961 Bytes
7742949 a216aa1 7742949 b72b771 a39d90e 7742949 a216aa1 b72b771 a216aa1 b8f73a4 a216aa1 a39d90e 7742949 77e2d2d a39d90e 7742949 44d7acc 38c90b6 a39d90e 44d7acc 7742949 a216aa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import json
from openai_client import ask_gpt
from schema_map import schema # 👈 Import the full schema
def determine_data_requirements(
user_prompt: str,
company_code: str,
user_id: str,
participant_email: str | None,
participant_id: str | None,
):
token_participant = "{{{{participantId}}}}" # renders as {{participantId}} inside f-string
system_msg = {
"role": "system",
"content": (
"You are a planning assistant for a Firestore chatbot.\n"
"Return ONLY JSON with keys: collections, filters, instruction.\n"
"Rules:\n"
"- Always include filters.companyCode set to the current company.\n"
"- The 'userId' is an Auth UID (users/{uid}).\n"
"- 'applications.participantId' is the doc ID of /participants/{participantId}.\n"
"- To reference the current user's participant-linked data, use this participantId.\n"
f"- If you need the current participant, use token {token_participant} in filters.\n"
"- If participantId is unavailable, prefer filtering by email where applicable.\n\n"
f"Current companyCode: '{company_code}'\n"
f"Current uid: '{user_id}'\n"
f"Current user email: '{participant_email or ''}'\n"
f"Current participantId: '{participant_id or ''}'\n"
"Use this schema for all reasoning. It includes field names, types, and aliases:\n"
f"{json.dumps(schema)}\n\n"
"PLANNING RULES:\n"
"- Choose collections ONLY if they contain the fields needed to answer.\n"
"- If the user asks about headcount/revenue history, choose 'participants' (fields: headcountHistory, revenueHistory).\n"
"- Apply companyCode ONLY on collections that have companyCode in the schema (do NOT add companyCode to 'participants').\n"
"- If an alias is used in the question, map it to the canonical field using $aliases.\n"
"- Return 'collections' as a list of objects [{'name': <collection>, 'fields': [<field1>, ...]}].\n"
"- 'filters' must only include fields that exist in the chosen collections.\n"
"Strictly return only a valid JSON object with keys: collections, filters, instruction. No explanation."
),
}
user_msg = {
"role": "user",
"content": (
f"User question: {user_prompt}\n"
f"CompanyCode: {company_code}\n"
f"UserID: {user_id}"
),
}
try:
result = ask_gpt([system_msg, user_msg])
print("🧠 Raw planning response from Gemini:\n", result)
cleaned = (
result.strip()
.removeprefix("```json")
.removeprefix("```")
.removesuffix("```")
.strip()
)
return json.loads(cleaned)
except Exception as e:
return {"error": f"Planning failed: {str(e)}"}
|