Spaces:
Sleeping
Sleeping
Monish BV commited on
Commit ·
6dfbf93
1
Parent(s): eb4dde5
added output lenght and name fix
Browse files- .env.example +4 -1
- backend/main.py +88 -2
- backend/mcp/llm_planner.py +7 -1
- backend/providers/base.py +1 -0
- backend/providers/claude.py +3 -2
- backend/providers/gemini.py +2 -0
- backend/providers/gpt.py +2 -0
- backend/responders.py +3 -4
.env.example
CHANGED
|
@@ -30,11 +30,14 @@ KIOSK_PORT=8000
|
|
| 30 |
# LLM timeout in seconds
|
| 31 |
KIOSK_LLM_TIMEOUT=60
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
# System prompt (required - defines the kiosk persona)
|
| 34 |
KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
|
| 35 |
|
| 36 |
# Style guidelines (required - ensures speech-friendly responses)
|
| 37 |
-
KIOSK_LLM_STYLE="
|
| 38 |
|
| 39 |
# =============================================================================
|
| 40 |
# Hugging Face Dataset Persistence (optional - for metrics/history across restarts)
|
|
|
|
| 30 |
# LLM timeout in seconds
|
| 31 |
KIOSK_LLM_TIMEOUT=60
|
| 32 |
|
| 33 |
+
# Max tokens per response (reinforces brevity; default 384 if unset)
|
| 34 |
+
KIOSK_LLM_MAX_TOKENS=384
|
| 35 |
+
|
| 36 |
# System prompt (required - defines the kiosk persona)
|
| 37 |
KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
|
| 38 |
|
| 39 |
# Style guidelines (required - ensures speech-friendly responses)
|
| 40 |
+
KIOSK_LLM_STYLE="Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only."
|
| 41 |
|
| 42 |
# =============================================================================
|
| 43 |
# Hugging Face Dataset Persistence (optional - for metrics/history across restarts)
|
backend/main.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"""FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
|
|
|
| 4 |
import json
|
| 5 |
import logging
|
| 6 |
import os
|
|
@@ -84,6 +85,88 @@ app.add_middleware(
|
|
| 84 |
_orchestrator_lock = threading.Lock()
|
| 85 |
logger = logging.getLogger(__name__)
|
| 86 |
_hf_scheduler = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
class QueryPayload(BaseModel):
|
|
@@ -268,6 +351,8 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
|
|
| 268 |
return None
|
| 269 |
|
| 270 |
timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
|
|
|
|
|
|
|
| 271 |
api_env = settings.get("api_key")
|
| 272 |
model_env = settings.get("model")
|
| 273 |
base_url_env = settings.get("base_url")
|
|
@@ -288,6 +373,7 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
|
|
| 288 |
model=model,
|
| 289 |
timeout_sec=timeout,
|
| 290 |
base_url=base_url or None,
|
|
|
|
| 291 |
)
|
| 292 |
|
| 293 |
try:
|
|
@@ -306,7 +392,7 @@ def _build_responder(
|
|
| 306 |
"KIOSK_LLM_SYSTEM_PROMPT",
|
| 307 |
"You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
|
| 308 |
)
|
| 309 |
-
style = os.getenv("KIOSK_LLM_STYLE", "
|
| 310 |
|
| 311 |
provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
|
| 312 |
model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
|
|
@@ -344,7 +430,7 @@ def _create_planner() -> LLMActionPlanner:
|
|
| 344 |
if not client:
|
| 345 |
raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
|
| 346 |
schemas = get_all_tool_schemas()
|
| 347 |
-
return LLMActionPlanner(client, schemas=schemas)
|
| 348 |
|
| 349 |
|
| 350 |
class ConversationOrchestrator:
|
|
|
|
| 1 |
"""FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
+
import csv
|
| 5 |
import json
|
| 6 |
import logging
|
| 7 |
import os
|
|
|
|
| 85 |
_orchestrator_lock = threading.Lock()
|
| 86 |
logger = logging.getLogger(__name__)
|
| 87 |
_hf_scheduler = None
|
| 88 |
+
_entity_names: List[str] = []
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _load_entity_names() -> None:
|
| 92 |
+
"""Scrape entity names from Archive folder at startup and store in memory."""
|
| 93 |
+
global _entity_names
|
| 94 |
+
|
| 95 |
+
def _extract_names_from_csv(filepath: Path) -> List[str]:
|
| 96 |
+
names = []
|
| 97 |
+
try:
|
| 98 |
+
with open(filepath, "r", encoding="utf-8") as f:
|
| 99 |
+
reader = csv.DictReader(f)
|
| 100 |
+
if reader.fieldnames is None:
|
| 101 |
+
return names
|
| 102 |
+
fieldnames = reader.fieldnames
|
| 103 |
+
name_columns = []
|
| 104 |
+
for field in fieldnames:
|
| 105 |
+
field_lower = field.lower()
|
| 106 |
+
if field_lower == "name" or field_lower == "assignee name":
|
| 107 |
+
name_columns = [field]
|
| 108 |
+
break
|
| 109 |
+
elif field_lower == "first name":
|
| 110 |
+
name_columns.append(field)
|
| 111 |
+
elif field_lower == "last name":
|
| 112 |
+
name_columns.insert(0, field)
|
| 113 |
+
for row in reader:
|
| 114 |
+
if name_columns:
|
| 115 |
+
if len(name_columns) == 1 and row.get(name_columns[0]):
|
| 116 |
+
name = row[name_columns[0]].strip()
|
| 117 |
+
if name and name.upper() != "NA":
|
| 118 |
+
names.append(name)
|
| 119 |
+
elif len(name_columns) == 2:
|
| 120 |
+
last_name = row.get(name_columns[0], "").strip()
|
| 121 |
+
first_name = row.get(name_columns[1], "").strip()
|
| 122 |
+
if (last_name or first_name) and last_name.upper() != "NA" and first_name.upper() != "NA":
|
| 123 |
+
full_name = f"{first_name} {last_name}".strip() if (last_name and first_name) else (first_name or last_name)
|
| 124 |
+
if full_name:
|
| 125 |
+
names.append(full_name)
|
| 126 |
+
except Exception as e:
|
| 127 |
+
logger.warning("Error reading CSV %s: %s", filepath, e)
|
| 128 |
+
return names
|
| 129 |
+
|
| 130 |
+
def _extract_names_from_text(filepath: Path) -> List[str]:
|
| 131 |
+
names = []
|
| 132 |
+
try:
|
| 133 |
+
with open(filepath, "r", encoding="utf-8") as f:
|
| 134 |
+
for line in f:
|
| 135 |
+
line = line.strip()
|
| 136 |
+
if line.startswith("Name:"):
|
| 137 |
+
name = line.replace("Name:", "").strip()
|
| 138 |
+
if name:
|
| 139 |
+
names.append(name)
|
| 140 |
+
except Exception as e:
|
| 141 |
+
logger.warning("Error reading text file %s: %s", filepath, e)
|
| 142 |
+
return names
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
archive_dir = ARCHIVE_DIR
|
| 146 |
+
if not archive_dir.exists():
|
| 147 |
+
logger.warning("Archive directory not found at %s", archive_dir)
|
| 148 |
+
_entity_names = []
|
| 149 |
+
return
|
| 150 |
+
all_names: set = set()
|
| 151 |
+
file_count = 0
|
| 152 |
+
for filepath in sorted(archive_dir.iterdir()):
|
| 153 |
+
if filepath.is_file():
|
| 154 |
+
if filepath.suffix.lower() == ".csv":
|
| 155 |
+
names = _extract_names_from_csv(filepath)
|
| 156 |
+
all_names.update(names)
|
| 157 |
+
file_count += 1
|
| 158 |
+
elif filepath.suffix.lower() == ".txt":
|
| 159 |
+
names = _extract_names_from_text(filepath)
|
| 160 |
+
all_names.update(names)
|
| 161 |
+
file_count += 1
|
| 162 |
+
_entity_names = sorted(all_names)
|
| 163 |
+
logger.info("Scraped %d unique entity names from %d files in Archive", len(_entity_names), file_count)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logger.error("Failed to scrape entity names from Archive: %s", e)
|
| 166 |
+
_entity_names = []
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
_load_entity_names()
|
| 170 |
|
| 171 |
|
| 172 |
class QueryPayload(BaseModel):
|
|
|
|
| 351 |
return None
|
| 352 |
|
| 353 |
timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
|
| 354 |
+
max_tokens_raw = os.getenv("KIOSK_LLM_MAX_TOKENS", "").strip()
|
| 355 |
+
max_tokens = int(max_tokens_raw) if max_tokens_raw.isdigit() else None
|
| 356 |
api_env = settings.get("api_key")
|
| 357 |
model_env = settings.get("model")
|
| 358 |
base_url_env = settings.get("base_url")
|
|
|
|
| 373 |
model=model,
|
| 374 |
timeout_sec=timeout,
|
| 375 |
base_url=base_url or None,
|
| 376 |
+
max_tokens=max_tokens,
|
| 377 |
)
|
| 378 |
|
| 379 |
try:
|
|
|
|
| 392 |
"KIOSK_LLM_SYSTEM_PROMPT",
|
| 393 |
"You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
|
| 394 |
)
|
| 395 |
+
style = os.getenv("KIOSK_LLM_STYLE", "Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only.")
|
| 396 |
|
| 397 |
provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
|
| 398 |
model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
|
|
|
|
| 430 |
if not client:
|
| 431 |
raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
|
| 432 |
schemas = get_all_tool_schemas()
|
| 433 |
+
return LLMActionPlanner(client, schemas=schemas, entity_names=_entity_names)
|
| 434 |
|
| 435 |
|
| 436 |
class ConversationOrchestrator:
|
backend/mcp/llm_planner.py
CHANGED
|
@@ -12,6 +12,10 @@ from .actions import Action, PlannerContext, ALLOWED_ACTIONS
|
|
| 12 |
BASE_PROMPT = (
|
| 13 |
"You are the routing planner for the Northwestern CS Kiosk. "
|
| 14 |
"Select the best tool schema(s) for each user question. "
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
"Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
|
| 16 |
"You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
|
| 17 |
"When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
|
|
@@ -30,9 +34,10 @@ BASE_PROMPT = (
|
|
| 30 |
class LLMActionPlanner:
|
| 31 |
"""Structured planner that relies on an LLM for intent classification."""
|
| 32 |
|
| 33 |
-
def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]]) -> None:
|
| 34 |
self.client = client
|
| 35 |
self.schemas = schemas
|
|
|
|
| 36 |
|
| 37 |
def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
|
| 38 |
try:
|
|
@@ -50,6 +55,7 @@ class LLMActionPlanner:
|
|
| 50 |
"Each item should be an object with 'action' and 'arguments'. "
|
| 51 |
"If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
|
| 52 |
),
|
|
|
|
| 53 |
"tool_schemas": self.schemas,
|
| 54 |
"response_format": {
|
| 55 |
"type": "object",
|
|
|
|
| 12 |
BASE_PROMPT = (
|
| 13 |
"You are the routing planner for the Northwestern CS Kiosk. "
|
| 14 |
"Select the best tool schema(s) for each user question. "
|
| 15 |
+
"NAME MATCHING: When the user mentions a person's name, try to match it against the provided list of 'available_names'. "
|
| 16 |
+
"Consider phonetic similarity, common nicknames (e.g., 'Chris' for 'Kristian', 'Jim' for 'James'), and name variations. "
|
| 17 |
+
"If you find a match in 'available_names', use the matched name exactly as it appears in the list in your arguments. "
|
| 18 |
+
"If no good match is found, use the name as the user provided it. "
|
| 19 |
"Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
|
| 20 |
"You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
|
| 21 |
"When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
|
|
|
|
| 34 |
class LLMActionPlanner:
|
| 35 |
"""Structured planner that relies on an LLM for intent classification."""
|
| 36 |
|
| 37 |
+
def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]], entity_names: Optional[List[str]] = None) -> None:
|
| 38 |
self.client = client
|
| 39 |
self.schemas = schemas
|
| 40 |
+
self.entity_names = entity_names or []
|
| 41 |
|
| 42 |
def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
|
| 43 |
try:
|
|
|
|
| 55 |
"Each item should be an object with 'action' and 'arguments'. "
|
| 56 |
"If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
|
| 57 |
),
|
| 58 |
+
"available_names": self.entity_names,
|
| 59 |
"tool_schemas": self.schemas,
|
| 60 |
"response_format": {
|
| 61 |
"type": "object",
|
backend/providers/base.py
CHANGED
|
@@ -25,6 +25,7 @@ class ProviderConfig:
|
|
| 25 |
model: str
|
| 26 |
timeout_sec: int = 60
|
| 27 |
base_url: Optional[str] = None
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
@dataclass
|
|
|
|
| 25 |
model: str
|
| 26 |
timeout_sec: int = 60
|
| 27 |
base_url: Optional[str] = None
|
| 28 |
+
max_tokens: Optional[int] = None
|
| 29 |
|
| 30 |
|
| 31 |
@dataclass
|
backend/providers/claude.py
CHANGED
|
@@ -10,7 +10,7 @@ import httpx
|
|
| 10 |
from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
|
| 11 |
|
| 12 |
DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
|
| 13 |
-
|
| 14 |
|
| 15 |
|
| 16 |
def _anthropic_headers(api_key: str) -> Dict[str, str]:
|
|
@@ -67,10 +67,11 @@ class ClaudeProvider(BaseLLM):
|
|
| 67 |
raise ValueError("Claude requires at least one user message.")
|
| 68 |
|
| 69 |
endpoint = self.config.base_url or DEFAULT_ENDPOINT
|
|
|
|
| 70 |
payload: Dict[str, object] = {
|
| 71 |
"model": self.config.model or self.describe()["model"],
|
| 72 |
"messages": anthropic_messages,
|
| 73 |
-
"max_tokens":
|
| 74 |
}
|
| 75 |
if system_prompts:
|
| 76 |
payload["system"] = "\n\n".join(system_prompts)
|
|
|
|
| 10 |
from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
|
| 11 |
|
| 12 |
DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
|
| 13 |
+
DEFAULT_MAX_TOKENS = 384
|
| 14 |
|
| 15 |
|
| 16 |
def _anthropic_headers(api_key: str) -> Dict[str, str]:
|
|
|
|
| 67 |
raise ValueError("Claude requires at least one user message.")
|
| 68 |
|
| 69 |
endpoint = self.config.base_url or DEFAULT_ENDPOINT
|
| 70 |
+
max_tokens = self.config.max_tokens if self.config.max_tokens is not None else DEFAULT_MAX_TOKENS
|
| 71 |
payload: Dict[str, object] = {
|
| 72 |
"model": self.config.model or self.describe()["model"],
|
| 73 |
"messages": anthropic_messages,
|
| 74 |
+
"max_tokens": max_tokens,
|
| 75 |
}
|
| 76 |
if system_prompts:
|
| 77 |
payload["system"] = "\n\n".join(system_prompts)
|
backend/providers/gemini.py
CHANGED
|
@@ -34,6 +34,8 @@ class GeminiGenerative(BaseLLM):
|
|
| 34 |
}
|
| 35 |
]
|
| 36 |
}
|
|
|
|
|
|
|
| 37 |
url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
|
| 38 |
response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
|
| 39 |
response.raise_for_status()
|
|
|
|
| 34 |
}
|
| 35 |
]
|
| 36 |
}
|
| 37 |
+
if self.config.max_tokens is not None:
|
| 38 |
+
payload.setdefault("generationConfig", {})["maxOutputTokens"] = self.config.max_tokens
|
| 39 |
url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
|
| 40 |
response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
|
| 41 |
response.raise_for_status()
|
backend/providers/gpt.py
CHANGED
|
@@ -32,6 +32,8 @@ class OpenAIChat(BaseLLM):
|
|
| 32 |
"model": self.config.model,
|
| 33 |
"messages": formatted,
|
| 34 |
}
|
|
|
|
|
|
|
| 35 |
response = self._session.post(
|
| 36 |
f"{self.base_url}/chat/completions",
|
| 37 |
json=payload,
|
|
|
|
| 32 |
"model": self.config.model,
|
| 33 |
"messages": formatted,
|
| 34 |
}
|
| 35 |
+
if self.config.max_tokens is not None:
|
| 36 |
+
payload["max_tokens"] = self.config.max_tokens
|
| 37 |
response = self._session.post(
|
| 38 |
f"{self.base_url}/chat/completions",
|
| 39 |
json=payload,
|
backend/responders.py
CHANGED
|
@@ -95,10 +95,9 @@ class LLMResponder(Responder):
|
|
| 95 |
if self.style_guidelines:
|
| 96 |
prompt_sections.append(f"STYLE: {self.style_guidelines}")
|
| 97 |
prompt_sections.append(
|
| 98 |
-
"TASK: Compose a
|
| 99 |
-
"Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, "
|
| 100 |
-
"
|
| 101 |
-
"When multiple office hour slots are provided (different times, instructors, or rooms), list all of them—do not condense to a single slot. "
|
| 102 |
"For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
|
| 103 |
"If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
|
| 104 |
)
|
|
|
|
| 95 |
if self.style_guidelines:
|
| 96 |
prompt_sections.append(f"STYLE: {self.style_guidelines}")
|
| 97 |
prompt_sections.append(
|
| 98 |
+
"TASK: Compose a very brief, friendly reply grounded in the provided facts and notes. "
|
| 99 |
+
"Keep responses to 1-2 sentences when possible. Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, and do not include stage directions like *in a warm voice*. "
|
| 100 |
+
"When many items exist (office hours, faculty list, etc.), mention only the first 2-3 and say how many more there are—do not list everything. "
|
|
|
|
| 101 |
"For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
|
| 102 |
"If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
|
| 103 |
)
|