Spaces:

Pygmales
/

hsg_rag_eea

Running

App Files Files

Pygmales commited on Apr 21

Commit

5ff514f

1 Parent(s): fd5438b

updated project version

Browse files

Files changed (10) hide show

app.py +1 -1
src/apps/chat/app.py +8 -56
src/cache/cache.py +6 -4
src/cache/cache_base.py +2 -2
src/cache/cache_strategies.py +23 -25
src/cache/utils.py +5 -0
src/config/configs.py +1 -0
src/rag/agent_chain.py +39 -13
src/rag/prompts.py +21 -11
src/rag/utilclasses.py +10 -0

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dotenv import load_dotenv
 if __name__ == "__main__":
 	load_dotenv()
-	Cache.configure(mode='cloud', no_cache=False)
 	init_logging()
 	ChatbotApplication("de").run()

 if __name__ == "__main__":
 	load_dotenv()
+	Cache.configure(mode='cloud', cache=True)
 	init_logging()
 	ChatbotApplication("de").run()

src/apps/chat/app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-import os
 import uuid
 import gradio as gr
 from src.const.agent_response_constants import *
 from src.const.data_consent_constants import *
 from src.rag.agent_chain import ExecutiveAgentChain
-from src.rag.utilclasses import LeadAgentQueryResponse
 from src.utils.logging import get_logger, ConsentLogger
-from src.cache.cache import Cache
 logger = get_logger("chatbot_app")
@@ -15,7 +12,6 @@ class ChatbotApplication:
     def __init__(self, language: str = "de") -> None:
         self._app = gr.Blocks()
         self._language = language
-        self._cache = Cache.get_cache()
         self._consentLogger = ConsentLogger()
         with self._app:
@@ -257,59 +253,14 @@ class ChatbotApplication:
         answers = []
         try:
             logger.info(f"Processing user query: {message[:100]}...")
-            preprocess_resp = agent.preprocess_query(message)
-            final_response: LeadAgentQueryResponse = None
-            current_lang = preprocess_resp.language
-            processed_q = preprocess_resp.processed_query
-            if preprocess_resp.response:
-                # Response comes from preprocessing step
-                final_response = preprocess_resp
-            elif Cache._settings["enabled"]:
-                cached_data = self._cache.get(processed_q, language=current_lang)
-                if cached_data:
-                    # Cache Hit — restore response with metadata
-                    if isinstance(cached_data, dict):
-                        final_response = LeadAgentQueryResponse(
-                            response=cached_data["response"],
-                            language=current_lang,
-                            appointment_requested=cached_data.get("appointment_requested", False),
-                            relevant_programs=cached_data.get("relevant_programs", []),
-                        )
-                    else:
-                        # Legacy: plain string cache entry
-                        final_response = LeadAgentQueryResponse(
-                            response=cached_data,
-                            language=current_lang,
-                        )
-            if not final_response:
-                # Response needs to be generated by the agent
-                final_response = agent.agent_query(processed_q)
-            answers.append(final_response.response)
-            self._language = final_response.language
-            if final_response.confidence_fallback or final_response.max_turns_reached or final_response.appointment_requested:
-                html_code = get_booking_widget(language=self._language, programs=final_response.relevant_programs)
                 answers.append(gr.HTML(value=html_code))
-            if final_response.should_cache and Cache._settings["enabled"]:
-                # Caching response with metadata
-                self._cache.set(
-                    key=processed_q,
-                    value={
-                        "response": final_response.response,
-                        "appointment_requested": final_response.appointment_requested,
-                        "relevant_programs": final_response.relevant_programs,
-                    },
-                    language=current_lang
-                )
         except Exception as e:
             logger.error(f"Error processing query: {e}", exc_info=True)
             error_message = (
@@ -320,6 +271,7 @@ class ChatbotApplication:
         return answers
     def run(self):
         self._app.launch(
             share=False,

 import uuid
 import gradio as gr
 from src.const.agent_response_constants import *
 from src.const.data_consent_constants import *
 from src.rag.agent_chain import ExecutiveAgentChain
 from src.utils.logging import get_logger, ConsentLogger
 logger = get_logger("chatbot_app")
     def __init__(self, language: str = "de") -> None:
         self._app = gr.Blocks()
         self._language = language
         self._consentLogger = ConsentLogger()
         with self._app:
         answers = []
         try:
             logger.info(f"Processing user query: {message[:100]}...")
+            response = agent.query(message)
+            answers.append(response.response)
+            self._language = response.language
+            if any([response.confidence_fallback, response.max_turns_reached, response.appointment_requested]):
+                html_code = get_booking_widget(language=self._language, programs=response.relevant_programs)
                 answers.append(gr.HTML(value=html_code))
         except Exception as e:
             logger.error(f"Error processing query: {e}", exc_info=True)
             error_message = (
         return answers
     def run(self):
         self._app.launch(
             share=False,

src/cache/cache.py CHANGED Viewed

@@ -5,7 +5,7 @@ from src.cache.cache_strategies import RedisCache, LocalCache
 from src.utils.logging import get_logger
 from src.config import config
-logger = get_logger("cache    ")
 class Cache:
     _instance = None
@@ -14,10 +14,12 @@ class Cache:
     _cache_metrics = None
     @staticmethod
-    def configure(mode: str, no_cache: bool):
         Cache._settings = {
-            "mode": mode,
-            "enabled": not no_cache
         }
     @staticmethod

 from src.utils.logging import get_logger
 from src.config import config
+logger = get_logger("cache       ")
 class Cache:
     _instance = None
     _cache_metrics = None
     @staticmethod
+    def configure(mode: str, cache: bool):
+        logger.info(f"Cache configured with parameters: mode={mode}, cache={cache}")
+        config.cache.ENABLED = cache
         Cache._settings = {
+            "mode":    mode,
+            "enabled": cache
         }
     @staticmethod

src/cache/cache_base.py CHANGED Viewed

@@ -7,11 +7,11 @@ class CacheStrategy(ABC):
     """
     @abstractmethod
-    def set(self, key: str, value: Any, language: str):
         pass
     @abstractmethod
-    def get(self, key: str, language: str):
         pass
     @abstractmethod

     """
     @abstractmethod
+    def set(self, key: str, value: Any, language: str, session_id: str):
         pass
     @abstractmethod
+    def get(self, key: str, language: str, session_id: str):
         pass
     @abstractmethod

src/cache/cache_strategies.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 from typing import Any
 from cachetools import TTLCache
 from src.cache.cache_base import CacheStrategy
 from src.database.redisservice import RedisService
 from src.utils.logging import get_logger
@@ -15,39 +16,39 @@ class RedisCache(CacheStrategy):
         self.client = service.get_client()
         self.metrics = metrics
-    def set(self, key: str, value: Any, language: str):
         if not self.client: return
         try:
             json_str = json.dumps(value)
-            self.client.set(self._generate_normalized_key(key, language), json_str, ex=config.cache.TTL_CACHE)
-            logger.info("Response cached")
         except Exception as e:
             logger.error(f"Could not write to Redis: {e}")
-    def get(self, key: str, language: str):
         if not self.client: return None
         try:
-            val = self.client.get(self._generate_normalized_key(key, language))
             if val is not None:
                 self.metrics.increment_hit()
-                logger.info(f"Cache HIT {self.metrics.cache_stats.hits} {self.metrics.cache_stats.hits_ratio}")
                 return json.loads(val)
             self.metrics.increment_miss()
-            logger.info(f"Cache MISS {self.metrics.cache_stats.misses} {self.metrics.cache_stats.hits_ratio}")
             return None
         except Exception as e:
             logger.error(f"Could not read from Redis: {e}")
             return None
-    def _generate_normalized_key(self, key: str, language: str) -> str:
-        import re
-        normalized_key = re.sub(r'[^a-z0-9]', '', key.lower())
-        return f"cache:{language}:{normalized_key}"
     def clear_cache(self):
         if not self.client: return
@@ -63,28 +64,25 @@ class LocalCache(CacheStrategy):
         self.cache = TTLCache(maxsize=config.cache.MAX_SIZE_CACHE, ttl=config.cache.TTL_CACHE)
         self.metrics = metrics
-    def _generate_normalized_key(self, key: str, language: str) -> str:
-        import re
-        normalized_key = re.sub(r'[^a-z0-9]', '', key.lower())
-        return f"cache:{language}:{normalized_key}"
-    def set(self, key: str, value: Any, language: str):
-        normalized_key = self._generate_normalized_key(key, language)
         self.cache[normalized_key] = value
         logger.info("Response cached")
-    def get(self, key: str, language: str):
-        normalized_key = self._generate_normalized_key(key, language)
         res = self.cache.get(normalized_key, None)
         if res is not None:
             self.metrics.increment_hit()
-            logger.info(f"Cache HIT {self.metrics.cache_stats.hits} {self.metrics.cache_stats.hits_ratio}")
         else:
             self.metrics.increment_miss()
-            logger.info(f"Cache MISS {self.metrics.cache_stats.misses}")
         return res
     def clear_cache(self):
         self.cache.clear()
         logger.info("Local Cache cleared.")

 from typing import Any
 from cachetools import TTLCache
+from .utils import get_cache_key
 from src.cache.cache_base import CacheStrategy
 from src.database.redisservice import RedisService
 from src.utils.logging import get_logger
         self.client = service.get_client()
         self.metrics = metrics
+    def set(self, key: str, value: Any, language: str, session_id: str):
         if not self.client: return
         try:
             json_str = json.dumps(value)
+            cache_key = get_cache_key(key, language, session_id)
+            self.client.set(cache_key, json_str, ex=config.cache.TTL_CACHE)
+            logger.info(f"Cached response with key {cache_key[:20]}... to Redis")
         except Exception as e:
             logger.error(f"Could not write to Redis: {e}")
+    def get(self, key: str, language: str, session_id: str):
         if not self.client: return None
         try:
+            cache_key = get_cache_key(key, language, session_id)
+            val = self.client.get(cache_key)
             if val is not None:
                 self.metrics.increment_hit()
+                logger.info(f"Found cached data with key {cache_key}")
+                logger.debug(f"Cache statistics: Hit cache {self.metrics.cache_stats.hits} times, ratio[{self.metrics.cache_stats.hits_ratio}]")
                 return json.loads(val)
             self.metrics.increment_miss()
+            logger.debug(f"Cache statistics: Missed cache {self.metrics.cache_stats.misses} times, ratio[{self.metrics.cache_stats.hits_ratio}]")
             return None
         except Exception as e:
             logger.error(f"Could not read from Redis: {e}")
             return None
     def clear_cache(self):
         if not self.client: return
         self.cache = TTLCache(maxsize=config.cache.MAX_SIZE_CACHE, ttl=config.cache.TTL_CACHE)
         self.metrics = metrics
+    def set(self, key: str, value: Any, language: str, session_id: str):
+        normalized_key = get_cache_key(key, language, session_id)
         self.cache[normalized_key] = value
         logger.info("Response cached")
+    def get(self, key: str, language: str, session_id: str):
+        normalized_key = get_cache_key(key, language, session_id)
         res = self.cache.get(normalized_key, None)
         if res is not None:
             self.metrics.increment_hit()
+            logger.debug(f"Cache statistics: Hit cache {self.metrics.cache_stats.hits} times, ratio[{self.metrics.cache_stats.hits_ratio}]")
         else:
             self.metrics.increment_miss()
+            logger.debug(f"Cache statistics: Missed cache {self.metrics.cache_stats.misses} times, ratio[{self.metrics.cache_stats.hits_ratio}]")
         return res
     def clear_cache(self):
         self.cache.clear()
         logger.info("Local Cache cleared.")

src/cache/utils.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import re
+def get_cache_key(key: str, language: str, session_id: str) -> str:
+    normalized_key = re.sub(r'[^a-z0-9]', '', key.lower())
+    return f"cache:{session_id}:{language}:{normalized_key}"

src/config/configs.py CHANGED Viewed

@@ -85,6 +85,7 @@ class ChainConfig(ConfigBase):
 class CacheConfig(ConfigBase):
     CACHE_MODE: Literal['local', 'cloud', 'dict'] = _get('CACHE_MODE')
     LOCAL_HOST: str = _get('CACHE_LOCAL_HOST', 'localhost')

 class CacheConfig(ConfigBase):
+    ENABLED: bool = _get('CACHE_ENABLED', False)
     CACHE_MODE: Literal['local', 'cloud', 'dict'] = _get('CACHE_MODE')
     LOCAL_HOST: str = _get('CACHE_LOCAL_HOST', 'localhost')

src/rag/agent_chain.py CHANGED Viewed

@@ -35,6 +35,8 @@ from src.utils.logging import get_logger
 from src.utils.lang import get_language_name
 from src.config import config
 chain_logger = get_logger('agent_chain')
@@ -44,8 +46,9 @@ class ExecutiveAgentChain:
         self._stored_language = language
         self._dbservice = WeaviateService()
         self._agents, self._config = self._init_agents()
-        self._conversation_history = []
         # AI-middlewares
         if config.chain.EVALUATE_RESPONSE_QUALITY:
             self._quality_handler = QualityScoreHandler()
@@ -477,7 +480,7 @@ class ExecutiveAgentChain:
         return greeting_message
     @traceable
-    def preprocess_query(self, query: str) -> LeadAgentQueryResponse:
         """
         Phase 1: Validation, Scope-Check and language detection.
         Does not call the agent directly.
@@ -579,16 +582,38 @@ class ExecutiveAgentChain:
                 processed_query=processed_query,
                 appointment_requested=(should_escalate and escalation_type == "escalate_aggressive"),
             )
-        # Response = None indicates that agent needs to answer the processed query
-        return LeadAgentQueryResponse(
-            response=None,
-            processed_query=processed_query,
-            language=current_language
-        )
-    @traceable
-    def agent_query(self, preprocessed_query: str) -> LeadAgentQueryResponse:
         """
         Phase 2: Execute agent.
         Takes the ALREADY validated query from the preprocessing phase.
@@ -610,6 +635,7 @@ class ExecutiveAgentChain:
             messages=self._conversation_history + [language_instruction],
         )
         agent_response = structured_response.response
         chain_logger.info(f"Appointment Requested: {structured_response.appointment_requested}")
         chain_logger.info(f"Relevant Programs: {structured_response.relevant_programs}")
@@ -653,7 +679,7 @@ class ExecutiveAgentChain:
             response = formatted_response,
             language = response_language,
             confidence_fallback = confidence_fallback,
-            should_cache = False if (confidence_fallback or structured_response.appointment_requested) else True,
             processed_query = preprocessed_query,
             appointment_requested = structured_response.appointment_requested,
             relevant_programs = structured_response.relevant_programs

 from src.utils.lang import get_language_name
 from src.config import config
+from ..cache.cache import Cache
 chain_logger = get_logger('agent_chain')
         self._stored_language = language
         self._dbservice = WeaviateService()
         self._agents, self._config = self._init_agents()
+        self._conversation_history = []
+        self._cache = Cache.get_cache()
         # AI-middlewares
         if config.chain.EVALUATE_RESPONSE_QUALITY:
             self._quality_handler = QualityScoreHandler()
         return greeting_message
     @traceable
+    def query(self, query: str) -> LeadAgentQueryResponse:
         """
         Phase 1: Validation, Scope-Check and language detection.
         Does not call the agent directly.
                 processed_query=processed_query,
                 appointment_requested=(should_escalate and escalation_type == "escalate_aggressive"),
             )
+        # 5. Check if cached data already exists for this session
+        if config.cache.ENABLED:
+            cached_data = self._cache.get(query, current_language, self._user_id)
+            if cached_data and isinstance(cached_data, dict):
+                return LeadAgentQueryResponse(
+                    response=cached_data["response"],
+                    language=current_language,
+                    appointment_requested=cached_data.get("appointment_requested", False),
+                    relevant_programs=cached_data.get("relevant_programs", []),
+                )
+        # 6. Preprocessing is finished - the agent has to answer the query
+        response = self._query_lead(query)
+        if config.cache.ENABLED and response.should_cache:
+            self._cache.set(
+                key=query,
+                value={
+                    "response":              response.response,
+                    "appointment_requested": response.appointment_requested,
+                    "relevant_programs":     response.relevant_programs,
+                },
+                language   = current_language,
+                session_id = self._user_id,
+            )
+        return response
+    def _query_lead(self, preprocessed_query: str) -> LeadAgentQueryResponse:
         """
         Phase 2: Execute agent.
         Takes the ALREADY validated query from the preprocessing phase.
             messages=self._conversation_history + [language_instruction],
         )
         agent_response = structured_response.response
+        chain_logger.info(f"Is answer context dependent: {structured_response.is_context_dependent}")
         chain_logger.info(f"Appointment Requested: {structured_response.appointment_requested}")
         chain_logger.info(f"Relevant Programs: {structured_response.relevant_programs}")
             response = formatted_response,
             language = response_language,
             confidence_fallback = confidence_fallback,
+            should_cache = not any([confidence_fallback, structured_response.appointment_requested, structured_response.is_context_dependent]),
             processed_query = preprocessed_query,
             appointment_requested = structured_response.appointment_requested,
             relevant_programs = structured_response.relevant_programs

src/rag/prompts.py CHANGED Viewed

@@ -227,16 +227,16 @@ RULES:
     - If the user asks a specific question (duration, price, format) but refers only to "the EMBA" or "the program" WITHOUT specifying which one, you MUST ask for clarification.
     - **Example:** User "How long is the EMBA?" → **You:** "Are you interested in the **German-speaking EMBA HSG**, the **International EMBA (IEMBA)**, or the **emba X**?"
-CRITICAL - CROSS-SELLING RULES (PRIORITY 2):
-- Do NOT recommend generic online programs or programs not affiliated with University of St.Gallen.
-- If the user has constraints (e.g., "can't travel", "location restrictions"):
-  1. FIRST ask: "Is your constraint absolute, or is there some flexibility?"
-  2. If FLEXIBLE -> Offer to connect with admissions team (set appointment_requested=True).
-  3. If INFLEXIBLE -> Only then mention alternative HSG programs from https://op.unisg.ch/en/
-- Allowed cross-sell programs: MBA programs, Open Programs, Custom Programs from HSG Executive Education.
-- Always provide the link: https://op.unisg.ch/en/ when mentioning alternative programs.
-ESCALATION & HANDOVER RULES:
     - For eligibility assessments: "I can't confirm admission, but the admissions team can assess your profile."
     - For visa/permit questions: Redirect to admissions team.
     - For tuition/fee questions: ALWAYS provide the specific programme tuition figures first. Only escalate to admissions for payment plans, loan options, or employer sponsorship details beyond listed tuition.
@@ -271,7 +271,17 @@ ESCALATION & HANDOVER RULES:
     - Bold key facts: **program names**, **dates**, **costs**
     - Maximum 100 words per response
     - If uncertain, offer to connect user with the Admissions Team (and set appointment_requested=True).
     RULES:
     - Answer in the user's language. NEVER leave English terms untranslated in a German response. Key German translations:
       "tuition fee reduction" → "Studiengebührenreduktion", "tuition" → "Studiengebühr(en)", "included in tuition" → "in den Studiengebühren enthalten", "not included" → "nicht enthalten", "application deadline" → "Bewerbungsfrist".

     - If the user asks a specific question (duration, price, format) but refers only to "the EMBA" or "the program" WITHOUT specifying which one, you MUST ask for clarification.
     - **Example:** User "How long is the EMBA?" → **You:** "Are you interested in the **German-speaking EMBA HSG**, the **International EMBA (IEMBA)**, or the **emba X**?"
+    CRITICAL - CROSS-SELLING RULES (PRIORITY 2):
+    - Do NOT recommend generic online programs or programs not affiliated with University of St.Gallen.
+    - If the user has constraints (e.g., "can't travel", "location restrictions"):
+      1. FIRST ask: "Is your constraint absolute, or is there some flexibility?"
+      2. If FLEXIBLE -> Offer to connect with admissions team (set appointment_requested=True).
+      3. If INFLEXIBLE -> Only then mention alternative HSG programs from https://op.unisg.ch/en/
+    - Allowed cross-sell programs: MBA programs, Open Programs, Custom Programs from HSG Executive Education.
+    - Always provide the link: https://op.unisg.ch/en/ when mentioning alternative programs.
+    ESCALATION & HANDOVER RULES:
     - For eligibility assessments: "I can't confirm admission, but the admissions team can assess your profile."
     - For visa/permit questions: Redirect to admissions team.
     - For tuition/fee questions: ALWAYS provide the specific programme tuition figures first. Only escalate to admissions for payment plans, loan options, or employer sponsorship details beyond listed tuition.
     - Bold key facts: **program names**, **dates**, **costs**
     - Maximum 100 words per response
     - If uncertain, offer to connect user with the Admissions Team (and set appointment_requested=True).
+    - Set is_context_dependent=True for responses involving:
+      - eligibility
+      - recommendations
+      - comparisons after prior turns
+      - any answer using extracted profile data
+      - any answer influenced by conversation history
+    - Set is_context_dependent=False if the question can be answered without using user-specific information and without relying on prior conversation turns. This includes:
+      - factual, static information (e.g. prices, durations, deadlines, program structure)
+      - general definitions or explanations
+      - publicly available information that does not vary by user
     RULES:
     - Answer in the user's language. NEVER leave English terms untranslated in a German response. Key German translations:
       "tuition fee reduction" → "Studiengebührenreduktion", "tuition" → "Studiengebühr(en)", "included in tuition" → "in den Studiengebühren enthalten", "not included" → "nicht enthalten", "application deadline" → "Bewerbungsfrist".

src/rag/utilclasses.py CHANGED Viewed

@@ -26,6 +26,16 @@ class LeadAgentQueryResponse:
 class StructuredAgentResponse(BaseModel):
     response: str = Field(description="Main response to the query.")
     appointment_requested: bool = Field(
         default=False,
         description="Set to True ONLY if the user explicitly wants to book, asks for help booking, or if a proactive trigger (pricing/eligibility/handover) occurred in THIS specific turn. Otherwise, set to False."

 class StructuredAgentResponse(BaseModel):
     response: str = Field(description="Main response to the query.")
+    is_context_dependent: bool = Field(
+        default=True,
+        description=(
+            "Set to False only if the question can be answered without using any user-specific "
+            "information (e.g. name, age, preferences, extracted profile data) and without relying "
+            "on prior conversation turns or conversation history. "
+            "Must be True for responses involving eligibility, recommendations, comparisons after prior turns, "
+            "or any answer influenced by user profile data or conversation context."
+        )
+    )
     appointment_requested: bool = Field(
         default=False,
         description="Set to True ONLY if the user explicitly wants to book, asks for help booking, or if a proactive trigger (pricing/eligibility/handover) occurred in THIS specific turn. Otherwise, set to False."