Monish BV commited on
Commit
6dfbf93
·
1 Parent(s): eb4dde5

added output lenght and name fix

Browse files
.env.example CHANGED
@@ -30,11 +30,14 @@ KIOSK_PORT=8000
30
  # LLM timeout in seconds
31
  KIOSK_LLM_TIMEOUT=60
32
 
 
 
 
33
  # System prompt (required - defines the kiosk persona)
34
  KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
35
 
36
  # Style guidelines (required - ensures speech-friendly responses)
37
- KIOSK_LLM_STYLE="Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*."
38
 
39
  # =============================================================================
40
  # Hugging Face Dataset Persistence (optional - for metrics/history across restarts)
 
30
  # LLM timeout in seconds
31
  KIOSK_LLM_TIMEOUT=60
32
 
33
+ # Max tokens per response (reinforces brevity; default 384 if unset)
34
+ KIOSK_LLM_MAX_TOKENS=384
35
+
36
  # System prompt (required - defines the kiosk persona)
37
  KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
38
 
39
  # Style guidelines (required - ensures speech-friendly responses)
40
+ KIOSK_LLM_STYLE="Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only."
41
 
42
  # =============================================================================
43
  # Hugging Face Dataset Persistence (optional - for metrics/history across restarts)
backend/main.py CHANGED
@@ -1,6 +1,7 @@
1
  """FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
2
 
3
  from __future__ import annotations
 
4
  import json
5
  import logging
6
  import os
@@ -84,6 +85,88 @@ app.add_middleware(
84
  _orchestrator_lock = threading.Lock()
85
  logger = logging.getLogger(__name__)
86
  _hf_scheduler = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  class QueryPayload(BaseModel):
@@ -268,6 +351,8 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
268
  return None
269
 
270
  timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
 
 
271
  api_env = settings.get("api_key")
272
  model_env = settings.get("model")
273
  base_url_env = settings.get("base_url")
@@ -288,6 +373,7 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
288
  model=model,
289
  timeout_sec=timeout,
290
  base_url=base_url or None,
 
291
  )
292
 
293
  try:
@@ -306,7 +392,7 @@ def _build_responder(
306
  "KIOSK_LLM_SYSTEM_PROMPT",
307
  "You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
308
  )
309
- style = os.getenv("KIOSK_LLM_STYLE", "Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*.")
310
 
311
  provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
312
  model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
@@ -344,7 +430,7 @@ def _create_planner() -> LLMActionPlanner:
344
  if not client:
345
  raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
346
  schemas = get_all_tool_schemas()
347
- return LLMActionPlanner(client, schemas=schemas)
348
 
349
 
350
  class ConversationOrchestrator:
 
1
  """FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
2
 
3
  from __future__ import annotations
4
+ import csv
5
  import json
6
  import logging
7
  import os
 
85
  _orchestrator_lock = threading.Lock()
86
  logger = logging.getLogger(__name__)
87
  _hf_scheduler = None
88
+ _entity_names: List[str] = []
89
+
90
+
91
+ def _load_entity_names() -> None:
92
+ """Scrape entity names from Archive folder at startup and store in memory."""
93
+ global _entity_names
94
+
95
+ def _extract_names_from_csv(filepath: Path) -> List[str]:
96
+ names = []
97
+ try:
98
+ with open(filepath, "r", encoding="utf-8") as f:
99
+ reader = csv.DictReader(f)
100
+ if reader.fieldnames is None:
101
+ return names
102
+ fieldnames = reader.fieldnames
103
+ name_columns = []
104
+ for field in fieldnames:
105
+ field_lower = field.lower()
106
+ if field_lower == "name" or field_lower == "assignee name":
107
+ name_columns = [field]
108
+ break
109
+ elif field_lower == "first name":
110
+ name_columns.append(field)
111
+ elif field_lower == "last name":
112
+ name_columns.insert(0, field)
113
+ for row in reader:
114
+ if name_columns:
115
+ if len(name_columns) == 1 and row.get(name_columns[0]):
116
+ name = row[name_columns[0]].strip()
117
+ if name and name.upper() != "NA":
118
+ names.append(name)
119
+ elif len(name_columns) == 2:
120
+ last_name = row.get(name_columns[0], "").strip()
121
+ first_name = row.get(name_columns[1], "").strip()
122
+ if (last_name or first_name) and last_name.upper() != "NA" and first_name.upper() != "NA":
123
+ full_name = f"{first_name} {last_name}".strip() if (last_name and first_name) else (first_name or last_name)
124
+ if full_name:
125
+ names.append(full_name)
126
+ except Exception as e:
127
+ logger.warning("Error reading CSV %s: %s", filepath, e)
128
+ return names
129
+
130
+ def _extract_names_from_text(filepath: Path) -> List[str]:
131
+ names = []
132
+ try:
133
+ with open(filepath, "r", encoding="utf-8") as f:
134
+ for line in f:
135
+ line = line.strip()
136
+ if line.startswith("Name:"):
137
+ name = line.replace("Name:", "").strip()
138
+ if name:
139
+ names.append(name)
140
+ except Exception as e:
141
+ logger.warning("Error reading text file %s: %s", filepath, e)
142
+ return names
143
+
144
+ try:
145
+ archive_dir = ARCHIVE_DIR
146
+ if not archive_dir.exists():
147
+ logger.warning("Archive directory not found at %s", archive_dir)
148
+ _entity_names = []
149
+ return
150
+ all_names: set = set()
151
+ file_count = 0
152
+ for filepath in sorted(archive_dir.iterdir()):
153
+ if filepath.is_file():
154
+ if filepath.suffix.lower() == ".csv":
155
+ names = _extract_names_from_csv(filepath)
156
+ all_names.update(names)
157
+ file_count += 1
158
+ elif filepath.suffix.lower() == ".txt":
159
+ names = _extract_names_from_text(filepath)
160
+ all_names.update(names)
161
+ file_count += 1
162
+ _entity_names = sorted(all_names)
163
+ logger.info("Scraped %d unique entity names from %d files in Archive", len(_entity_names), file_count)
164
+ except Exception as e:
165
+ logger.error("Failed to scrape entity names from Archive: %s", e)
166
+ _entity_names = []
167
+
168
+
169
+ _load_entity_names()
170
 
171
 
172
  class QueryPayload(BaseModel):
 
351
  return None
352
 
353
  timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
354
+ max_tokens_raw = os.getenv("KIOSK_LLM_MAX_TOKENS", "").strip()
355
+ max_tokens = int(max_tokens_raw) if max_tokens_raw.isdigit() else None
356
  api_env = settings.get("api_key")
357
  model_env = settings.get("model")
358
  base_url_env = settings.get("base_url")
 
373
  model=model,
374
  timeout_sec=timeout,
375
  base_url=base_url or None,
376
+ max_tokens=max_tokens,
377
  )
378
 
379
  try:
 
392
  "KIOSK_LLM_SYSTEM_PROMPT",
393
  "You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
394
  )
395
+ style = os.getenv("KIOSK_LLM_STYLE", "Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only.")
396
 
397
  provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
398
  model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
 
430
  if not client:
431
  raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
432
  schemas = get_all_tool_schemas()
433
+ return LLMActionPlanner(client, schemas=schemas, entity_names=_entity_names)
434
 
435
 
436
  class ConversationOrchestrator:
backend/mcp/llm_planner.py CHANGED
@@ -12,6 +12,10 @@ from .actions import Action, PlannerContext, ALLOWED_ACTIONS
12
  BASE_PROMPT = (
13
  "You are the routing planner for the Northwestern CS Kiosk. "
14
  "Select the best tool schema(s) for each user question. "
 
 
 
 
15
  "Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
16
  "You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
17
  "When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
@@ -30,9 +34,10 @@ BASE_PROMPT = (
30
  class LLMActionPlanner:
31
  """Structured planner that relies on an LLM for intent classification."""
32
 
33
- def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]]) -> None:
34
  self.client = client
35
  self.schemas = schemas
 
36
 
37
  def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
38
  try:
@@ -50,6 +55,7 @@ class LLMActionPlanner:
50
  "Each item should be an object with 'action' and 'arguments'. "
51
  "If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
52
  ),
 
53
  "tool_schemas": self.schemas,
54
  "response_format": {
55
  "type": "object",
 
12
  BASE_PROMPT = (
13
  "You are the routing planner for the Northwestern CS Kiosk. "
14
  "Select the best tool schema(s) for each user question. "
15
+ "NAME MATCHING: When the user mentions a person's name, try to match it against the provided list of 'available_names'. "
16
+ "Consider phonetic similarity, common nicknames (e.g., 'Chris' for 'Kristian', 'Jim' for 'James'), and name variations. "
17
+ "If you find a match in 'available_names', use the matched name exactly as it appears in the list in your arguments. "
18
+ "If no good match is found, use the name as the user provided it. "
19
  "Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
20
  "You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
21
  "When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
 
34
  class LLMActionPlanner:
35
  """Structured planner that relies on an LLM for intent classification."""
36
 
37
+ def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]], entity_names: Optional[List[str]] = None) -> None:
38
  self.client = client
39
  self.schemas = schemas
40
+ self.entity_names = entity_names or []
41
 
42
  def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
43
  try:
 
55
  "Each item should be an object with 'action' and 'arguments'. "
56
  "If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
57
  ),
58
+ "available_names": self.entity_names,
59
  "tool_schemas": self.schemas,
60
  "response_format": {
61
  "type": "object",
backend/providers/base.py CHANGED
@@ -25,6 +25,7 @@ class ProviderConfig:
25
  model: str
26
  timeout_sec: int = 60
27
  base_url: Optional[str] = None
 
28
 
29
 
30
  @dataclass
 
25
  model: str
26
  timeout_sec: int = 60
27
  base_url: Optional[str] = None
28
+ max_tokens: Optional[int] = None
29
 
30
 
31
  @dataclass
backend/providers/claude.py CHANGED
@@ -10,7 +10,7 @@ import httpx
10
  from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
11
 
12
  DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
13
- MAX_OUTPUT_TOKENS = 1024
14
 
15
 
16
  def _anthropic_headers(api_key: str) -> Dict[str, str]:
@@ -67,10 +67,11 @@ class ClaudeProvider(BaseLLM):
67
  raise ValueError("Claude requires at least one user message.")
68
 
69
  endpoint = self.config.base_url or DEFAULT_ENDPOINT
 
70
  payload: Dict[str, object] = {
71
  "model": self.config.model or self.describe()["model"],
72
  "messages": anthropic_messages,
73
- "max_tokens": MAX_OUTPUT_TOKENS,
74
  }
75
  if system_prompts:
76
  payload["system"] = "\n\n".join(system_prompts)
 
10
  from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
11
 
12
  DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
13
+ DEFAULT_MAX_TOKENS = 384
14
 
15
 
16
  def _anthropic_headers(api_key: str) -> Dict[str, str]:
 
67
  raise ValueError("Claude requires at least one user message.")
68
 
69
  endpoint = self.config.base_url or DEFAULT_ENDPOINT
70
+ max_tokens = self.config.max_tokens if self.config.max_tokens is not None else DEFAULT_MAX_TOKENS
71
  payload: Dict[str, object] = {
72
  "model": self.config.model or self.describe()["model"],
73
  "messages": anthropic_messages,
74
+ "max_tokens": max_tokens,
75
  }
76
  if system_prompts:
77
  payload["system"] = "\n\n".join(system_prompts)
backend/providers/gemini.py CHANGED
@@ -34,6 +34,8 @@ class GeminiGenerative(BaseLLM):
34
  }
35
  ]
36
  }
 
 
37
  url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
38
  response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
39
  response.raise_for_status()
 
34
  }
35
  ]
36
  }
37
+ if self.config.max_tokens is not None:
38
+ payload.setdefault("generationConfig", {})["maxOutputTokens"] = self.config.max_tokens
39
  url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
40
  response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
41
  response.raise_for_status()
backend/providers/gpt.py CHANGED
@@ -32,6 +32,8 @@ class OpenAIChat(BaseLLM):
32
  "model": self.config.model,
33
  "messages": formatted,
34
  }
 
 
35
  response = self._session.post(
36
  f"{self.base_url}/chat/completions",
37
  json=payload,
 
32
  "model": self.config.model,
33
  "messages": formatted,
34
  }
35
+ if self.config.max_tokens is not None:
36
+ payload["max_tokens"] = self.config.max_tokens
37
  response = self._session.post(
38
  f"{self.base_url}/chat/completions",
39
  json=payload,
backend/responders.py CHANGED
@@ -95,10 +95,9 @@ class LLMResponder(Responder):
95
  if self.style_guidelines:
96
  prompt_sections.append(f"STYLE: {self.style_guidelines}")
97
  prompt_sections.append(
98
- "TASK: Compose a concise, friendly reply grounded in the provided facts and notes. "
99
- "Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, "
100
- "keep responses to about two or three sentences when possible, and do not include stage directions or textual cues like *in a warm voice*. "
101
- "When multiple office hour slots are provided (different times, instructors, or rooms), list all of them—do not condense to a single slot. "
102
  "For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
103
  "If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
104
  )
 
95
  if self.style_guidelines:
96
  prompt_sections.append(f"STYLE: {self.style_guidelines}")
97
  prompt_sections.append(
98
+ "TASK: Compose a very brief, friendly reply grounded in the provided facts and notes. "
99
+ "Keep responses to 1-2 sentences when possible. Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, and do not include stage directions like *in a warm voice*. "
100
+ "When many items exist (office hours, faculty list, etc.), mention only the first 2-3 and say how many more there are—do not list everything. "
 
101
  "For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
102
  "If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
103
  )