Spaces:

monish563
/

NU-KIOSK-API

Sleeping

App Files Files Community

Monish BV commited on Mar 6

Commit

6dfbf93

1 Parent(s): eb4dde5

added output lenght and name fix

Browse files

Files changed (8) hide show

.env.example +4 -1
backend/main.py +88 -2
backend/mcp/llm_planner.py +7 -1
backend/providers/base.py +1 -0
backend/providers/claude.py +3 -2
backend/providers/gemini.py +2 -0
backend/providers/gpt.py +2 -0
backend/responders.py +3 -4

.env.example CHANGED Viewed

@@ -30,11 +30,14 @@ KIOSK_PORT=8000
 # LLM timeout in seconds
 KIOSK_LLM_TIMEOUT=60
 # System prompt (required - defines the kiosk persona)
 KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
 # Style guidelines (required - ensures speech-friendly responses)
-KIOSK_LLM_STYLE="Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*."
 # =============================================================================
 # Hugging Face Dataset Persistence (optional - for metrics/history across restarts)

 # LLM timeout in seconds
 KIOSK_LLM_TIMEOUT=60
+# Max tokens per response (reinforces brevity; default 384 if unset)
+KIOSK_LLM_MAX_TOKENS=384
 # System prompt (required - defines the kiosk persona)
 KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations."
 # Style guidelines (required - ensures speech-friendly responses)
+KIOSK_LLM_STYLE="Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only."
 # =============================================================================
 # Hugging Face Dataset Persistence (optional - for metrics/history across restarts)

backend/main.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
 from __future__ import annotations
 import json
 import logging
 import os
@@ -84,6 +85,88 @@ app.add_middleware(
 _orchestrator_lock = threading.Lock()
 logger = logging.getLogger(__name__)
 _hf_scheduler = None
 class QueryPayload(BaseModel):
@@ -268,6 +351,8 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
         return None
     timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
     api_env = settings.get("api_key")
     model_env = settings.get("model")
     base_url_env = settings.get("base_url")
@@ -288,6 +373,7 @@ def _build_client_from_env(provider: str, model_override: Optional[str]) -> Opti
         model=model,
         timeout_sec=timeout,
         base_url=base_url or None,
     )
     try:
@@ -306,7 +392,7 @@ def _build_responder(
         "KIOSK_LLM_SYSTEM_PROMPT",
         "You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
     )
-    style = os.getenv("KIOSK_LLM_STYLE", "Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*.")
     provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
     model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
@@ -344,7 +430,7 @@ def _create_planner() -> LLMActionPlanner:
     if not client:
         raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
     schemas = get_all_tool_schemas()
-    return LLMActionPlanner(client, schemas=schemas)
 class ConversationOrchestrator:

 """FastAPI backend for Northwestern CS Kiosk - API only (no frontend)."""
 from __future__ import annotations
+import csv
 import json
 import logging
 import os
 _orchestrator_lock = threading.Lock()
 logger = logging.getLogger(__name__)
 _hf_scheduler = None
+_entity_names: List[str] = []
+def _load_entity_names() -> None:
+    """Scrape entity names from Archive folder at startup and store in memory."""
+    global _entity_names
+    def _extract_names_from_csv(filepath: Path) -> List[str]:
+        names = []
+        try:
+            with open(filepath, "r", encoding="utf-8") as f:
+                reader = csv.DictReader(f)
+                if reader.fieldnames is None:
+                    return names
+                fieldnames = reader.fieldnames
+                name_columns = []
+                for field in fieldnames:
+                    field_lower = field.lower()
+                    if field_lower == "name" or field_lower == "assignee name":
+                        name_columns = [field]
+                        break
+                    elif field_lower == "first name":
+                        name_columns.append(field)
+                    elif field_lower == "last name":
+                        name_columns.insert(0, field)
+                for row in reader:
+                    if name_columns:
+                        if len(name_columns) == 1 and row.get(name_columns[0]):
+                            name = row[name_columns[0]].strip()
+                            if name and name.upper() != "NA":
+                                names.append(name)
+                        elif len(name_columns) == 2:
+                            last_name = row.get(name_columns[0], "").strip()
+                            first_name = row.get(name_columns[1], "").strip()
+                            if (last_name or first_name) and last_name.upper() != "NA" and first_name.upper() != "NA":
+                                full_name = f"{first_name} {last_name}".strip() if (last_name and first_name) else (first_name or last_name)
+                                if full_name:
+                                    names.append(full_name)
+        except Exception as e:
+            logger.warning("Error reading CSV %s: %s", filepath, e)
+        return names
+    def _extract_names_from_text(filepath: Path) -> List[str]:
+        names = []
+        try:
+            with open(filepath, "r", encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("Name:"):
+                        name = line.replace("Name:", "").strip()
+                        if name:
+                            names.append(name)
+        except Exception as e:
+            logger.warning("Error reading text file %s: %s", filepath, e)
+        return names
+    try:
+        archive_dir = ARCHIVE_DIR
+        if not archive_dir.exists():
+            logger.warning("Archive directory not found at %s", archive_dir)
+            _entity_names = []
+            return
+        all_names: set = set()
+        file_count = 0
+        for filepath in sorted(archive_dir.iterdir()):
+            if filepath.is_file():
+                if filepath.suffix.lower() == ".csv":
+                    names = _extract_names_from_csv(filepath)
+                    all_names.update(names)
+                    file_count += 1
+                elif filepath.suffix.lower() == ".txt":
+                    names = _extract_names_from_text(filepath)
+                    all_names.update(names)
+                    file_count += 1
+        _entity_names = sorted(all_names)
+        logger.info("Scraped %d unique entity names from %d files in Archive", len(_entity_names), file_count)
+    except Exception as e:
+        logger.error("Failed to scrape entity names from Archive: %s", e)
+        _entity_names = []
+_load_entity_names()
 class QueryPayload(BaseModel):
         return None
     timeout = int(os.getenv("KIOSK_LLM_TIMEOUT", "60"))
+    max_tokens_raw = os.getenv("KIOSK_LLM_MAX_TOKENS", "").strip()
+    max_tokens = int(max_tokens_raw) if max_tokens_raw.isdigit() else None
     api_env = settings.get("api_key")
     model_env = settings.get("model")
     base_url_env = settings.get("base_url")
         model=model,
         timeout_sec=timeout,
         base_url=base_url or None,
+        max_tokens=max_tokens,
     )
     try:
         "KIOSK_LLM_SYSTEM_PROMPT",
         "You are a conversational receptionist for the Northwestern CS Kiosk whose responses are spoken aloud. Speak naturally and never include stage directions or annotations.",
     )
+    style = os.getenv("KIOSK_LLM_STYLE", "Be very brief. One or two sentences max. No long lists—summarize top 2-3 items only.")
     provider_name = provider or os.getenv("KIOSK_LLM_PROVIDER", "anthropic")
     model_override = model_override if provider else (model_override or os.getenv("KIOSK_LLM_MODEL"))
     if not client:
         raise RuntimeError("LLM planner requires a configured provider (set KIOSK_LLM_PROVIDER/KEY).")
     schemas = get_all_tool_schemas()
+    return LLMActionPlanner(client, schemas=schemas, entity_names=_entity_names)
 class ConversationOrchestrator:

backend/mcp/llm_planner.py CHANGED Viewed

@@ -12,6 +12,10 @@ from .actions import Action, PlannerContext, ALLOWED_ACTIONS
 BASE_PROMPT = (
     "You are the routing planner for the Northwestern CS Kiosk. "
     "Select the best tool schema(s) for each user question. "
     "Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
     "You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
     "When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
@@ -30,9 +34,10 @@ BASE_PROMPT = (
 class LLMActionPlanner:
     """Structured planner that relies on an LLM for intent classification."""
-    def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]]) -> None:
         self.client = client
         self.schemas = schemas
     def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
         try:
@@ -50,6 +55,7 @@ class LLMActionPlanner:
                 "Each item should be an object with 'action' and 'arguments'. "
                 "If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
             ),
             "tool_schemas": self.schemas,
             "response_format": {
                 "type": "object",

 BASE_PROMPT = (
     "You are the routing planner for the Northwestern CS Kiosk. "
     "Select the best tool schema(s) for each user question. "
+    "NAME MATCHING: When the user mentions a person's name, try to match it against the provided list of 'available_names'. "
+    "Consider phonetic similarity, common nicknames (e.g., 'Chris' for 'Kristian', 'Jim' for 'James'), and name variations. "
+    "If you find a match in 'available_names', use the matched name exactly as it appears in the list in your arguments. "
+    "If no good match is found, use the name as the user provided it. "
     "Topic switching: when the user asks a new, standalone question that introduces a different subject or intent (e.g. 'office hours for CS 336 today' when we were just discussing a professor), ignore the previous context and route based on the new question alone. Do NOT carry over the old subject or topic. "
     "You receive context.full_history (full session), context.short_history (last 3 turns), context.topic, context.subject, and context.last_class. Use this context only for follow-ups that explicitly reference or continue the current subject (pronouns, 'his work', 'any other slots', etc.). "
     "When context.topic is 'professor' and context.subject is set and the question is a follow-up about that person: follow-ups like 'his work', 'her office', 'where can I find him', 'more about his research', 'yes please' (after offering directions) → return lookup_person or lookup_location with use_last_subject=true or name=context.subject. "
 class LLMActionPlanner:
     """Structured planner that relies on an LLM for intent classification."""
+    def __init__(self, client: BaseLLM, *, schemas: List[Dict[str, Any]], entity_names: Optional[List[str]] = None) -> None:
         self.client = client
         self.schemas = schemas
+        self.entity_names = entity_names or []
     def plan(self, question: str, context: PlannerContext) -> Optional[List[Action]]:
         try:
                 "Each item should be an object with 'action' and 'arguments'. "
                 "If you return a single action, returning {'action':..., 'arguments':{...}} is acceptable."
             ),
+            "available_names": self.entity_names,
             "tool_schemas": self.schemas,
             "response_format": {
                 "type": "object",

backend/providers/base.py CHANGED Viewed

@@ -25,6 +25,7 @@ class ProviderConfig:
     model: str
     timeout_sec: int = 60
     base_url: Optional[str] = None
 @dataclass

     model: str
     timeout_sec: int = 60
     base_url: Optional[str] = None
+    max_tokens: Optional[int] = None
 @dataclass

backend/providers/claude.py CHANGED Viewed

@@ -10,7 +10,7 @@ import httpx
 from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
 DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
-MAX_OUTPUT_TOKENS = 1024
 def _anthropic_headers(api_key: str) -> Dict[str, str]:
@@ -67,10 +67,11 @@ class ClaudeProvider(BaseLLM):
             raise ValueError("Claude requires at least one user message.")
         endpoint = self.config.base_url or DEFAULT_ENDPOINT
         payload: Dict[str, object] = {
             "model": self.config.model or self.describe()["model"],
             "messages": anthropic_messages,
-            "max_tokens": MAX_OUTPUT_TOKENS,
         }
         if system_prompts:
             payload["system"] = "\n\n".join(system_prompts)

 from .base import BaseLLM, ChatMessage, LLMResponse, ProviderConfig
 DEFAULT_ENDPOINT = "https://api.anthropic.com/v1/messages"
+DEFAULT_MAX_TOKENS = 384
 def _anthropic_headers(api_key: str) -> Dict[str, str]:
             raise ValueError("Claude requires at least one user message.")
         endpoint = self.config.base_url or DEFAULT_ENDPOINT
+        max_tokens = self.config.max_tokens if self.config.max_tokens is not None else DEFAULT_MAX_TOKENS
         payload: Dict[str, object] = {
             "model": self.config.model or self.describe()["model"],
             "messages": anthropic_messages,
+            "max_tokens": max_tokens,
         }
         if system_prompts:
             payload["system"] = "\n\n".join(system_prompts)

backend/providers/gemini.py CHANGED Viewed

@@ -34,6 +34,8 @@ class GeminiGenerative(BaseLLM):
                 }
             ]
         }
         url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
         response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
         response.raise_for_status()

                 }
             ]
         }
+        if self.config.max_tokens is not None:
+            payload.setdefault("generationConfig", {})["maxOutputTokens"] = self.config.max_tokens
         url = f"{self.base_url}/models/{self.config.model}:generateContent?key={self.config.api_key}"
         response = self._session.post(url, json=payload, timeout=self.config.timeout_sec)
         response.raise_for_status()

backend/providers/gpt.py CHANGED Viewed

@@ -32,6 +32,8 @@ class OpenAIChat(BaseLLM):
             "model": self.config.model,
             "messages": formatted,
         }
         response = self._session.post(
             f"{self.base_url}/chat/completions",
             json=payload,

             "model": self.config.model,
             "messages": formatted,
         }
+        if self.config.max_tokens is not None:
+            payload["max_tokens"] = self.config.max_tokens
         response = self._session.post(
             f"{self.base_url}/chat/completions",
             json=payload,

backend/responders.py CHANGED Viewed

@@ -95,10 +95,9 @@ class LLMResponder(Responder):
         if self.style_guidelines:
             prompt_sections.append(f"STYLE: {self.style_guidelines}")
         prompt_sections.append(
-            "TASK: Compose a concise, friendly reply grounded in the provided facts and notes. "
-            "Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, "
-            "keep responses to about two or three sentences when possible, and do not include stage directions or textual cues like *in a warm voice*. "
-            "When multiple office hour slots are provided (different times, instructors, or rooms), list all of them—do not condense to a single slot. "
             "For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
             "If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
         )

         if self.style_guidelines:
             prompt_sections.append(f"STYLE: {self.style_guidelines}")
         prompt_sections.append(
+            "TASK: Compose a very brief, friendly reply grounded in the provided facts and notes. "
+            "Keep responses to 1-2 sentences when possible. Write as if speaking aloud via text-to-speech: use natural conversational sentences, avoid bullet lists or markup, and do not include stage directions like *in a warm voice*. "
+            "When many items exist (office hours, faculty list, etc.), mention only the first 2-3 and say how many more there are—do not list everything. "
             "For date or time questions (e.g. 'what time is it?', 'what day is tomorrow?'), use the CONTEXT section when facts are empty. "
             "If the facts are empty and the question is not about date/time, explain what information is missing instead of inventing details."
         )