Spaces:

awellis
/

bfh-studadmin-assist

Sleeping

awellis commited on Oct 12, 2025

Commit

cff57a5

1 Parent(s): ceb7cfa

Update README and enhance query processing with logging and error handling

- Updated README to reflect changes in model names and configuration options.
- Added detailed logging for query processing, document retrieval, and email drafting in `app_simple.py`.
- Improved error handling in email composition and retrieval processes.
- Introduced a new `QueryRewriter` class for enhanced query rewriting capabilities.
- Added test scripts for debugging query processing and retrieval.

Files changed (10) hide show

README.md +2 -2
app_simple.py +85 -14
src/agents/composer_agent.py +45 -5
src/config.py +2 -2
src/pipeline/orchestrator.py +4 -2
src/retrieval/__init__.py +7 -0
src/retrieval/hybrid_retriever.py +99 -7
src/retrieval/query_rewriter.py +237 -0
test_query_debug.py +73 -0
test_retrieval_simple.py +93 -0

README.md CHANGED Viewed

@@ -23,7 +23,7 @@ This system helps administrative staff compose accurate, professional email resp
 - **In-Memory Document Store**: No Docker required
 - **PydanticAI**: Multi-agent orchestration with structured outputs (optional)
 - **Gradio**: Clean, light web interface
-- **OpenAI GPT-5-nano**: Fast language model
 ## Two Versions Available
@@ -176,7 +176,7 @@ The app is configured for automatic deployment to Hugging Face Spaces via `app.p
 Key configuration options in `.env`:
 ### LLM Configuration
-- `LLM_MODEL`: OpenAI model (default: gpt-4o)
 - `EMBEDDING_MODEL`: Embedding model (default: text-embedding-3-small)
 - `LLM_TEMPERATURE`: Temperature for generation (0-1)
 - `LLM_MAX_TOKENS`: Maximum tokens per response

 - **In-Memory Document Store**: No Docker required
 - **PydanticAI**: Multi-agent orchestration with structured outputs (optional)
 - **Gradio**: Clean, light web interface
+- **OpenAI GPT-5-mini**: Reliable language model (upgraded from gpt-5-nano)
 ## Two Versions Available
 Key configuration options in `.env`:
 ### LLM Configuration
+- `LLM_MODEL`: OpenAI model (default: gpt-4o, recommended: gpt-5-mini)
 - `EMBEDDING_MODEL`: Embedding model (default: text-embedding-3-small)
 - `LLM_TEMPERATURE`: Temperature for generation (0-1)
 - `LLM_MAX_TOKENS`: Maximum tokens per response

app_simple.py CHANGED Viewed

@@ -143,14 +143,32 @@ class SimpleFastAssistant:
         import time
         start = time.time()
         # Retrieve documents
         docs = self.retriever.retrieve(query)
-        # Build context
-        context = "\n\n".join([
-            f"Document {i+1} (from {doc.meta.get('source_file', 'unknown')}):\n{doc.content}"
-            for i, doc in enumerate(docs[:3])
-        ])
         # Single LLM call
         system_prompt = """You are an email assistant for BFH (Bern University of Applied Sciences) administrative staff.
@@ -176,6 +194,16 @@ Context from knowledge base:
 Compose a professional email response."""
         # GPT-5 uses max_completion_tokens instead of max_tokens
         completion_params = {
             "model": self.config.llm.model_name,
@@ -189,19 +217,45 @@ Compose a professional email response."""
         if "gpt-5" in self.config.llm.model_name:
             completion_params["max_completion_tokens"] = self.config.llm.max_tokens
             # GPT-5-nano only supports temperature=1 (default), so don't set it
         else:
             completion_params["max_tokens"] = self.config.llm.max_tokens
             completion_params["temperature"] = self.config.llm.temperature
-        response = self.client.chat.completions.create(**completion_params)
-        email = response.choices[0].message.content
         elapsed = time.time() - start
         # Parse subject and body
-        lines = email.split('\n')
-        subject = lines[0].replace('Subject:', '').strip() if lines else "Response"
-        body = '\n'.join(lines[1:]).strip() if len(lines) > 1 else email
         stats = f"**Time:** {elapsed:.1f}s | **Docs:** {len(docs)} | **Model:** {self.config.llm.model_name}"
@@ -218,6 +272,17 @@ Compose a professional email response."""
                 'explanation': self._explain_relevance(query, doc, i + 1)
             })
         return subject, body, stats, chunks_info
@@ -237,10 +302,16 @@ class QueryProcessor:
         if not query or not query.strip():
             return "", "", "", ""
-        subject, body, stats, chunks_info = self.assistant.process_query(query)
-        chunks_html = self.formatter.format_chunks_html(chunks_info)
-        return subject, body, stats, chunks_html
 # ============================================================================

         import time
         start = time.time()
+        logger.info(f"[DEBUG] Processing query: '{query}'")
         # Retrieve documents
         docs = self.retriever.retrieve(query)
+        logger.info(f"[DEBUG] Retrieved {len(docs)} documents")
+        if len(docs) == 0:
+            logger.warning(f"[DEBUG] No documents retrieved for query: '{query}'")
+        # Build context - limit for GPT-5-nano's smaller context window
+        # GPT-5-nano has limited capacity, so we need to be more conservative
+        max_docs = 2 if "gpt-5" in self.config.llm.model_name else 3
+        max_chars_per_doc = 800 if "gpt-5" in self.config.llm.model_name else 1500
+        context_parts = []
+        for i, doc in enumerate(docs[:max_docs]):
+            doc_content = doc.content
+            if len(doc_content) > max_chars_per_doc:
+                doc_content = doc_content[:max_chars_per_doc] + "..."
+                logger.debug(f"[DEBUG] Truncated document {i+1} from {len(doc.content)} to {max_chars_per_doc} chars")
+            context_parts.append(f"Document {i+1} (from {doc.meta.get('source_file', 'unknown')}):\n{doc_content}")
+        context = "\n\n".join(context_parts)
+        logger.info(f"[DEBUG] Context length: {len(context)} chars (using {len(context_parts)} of {len(docs)} docs)")
         # Single LLM call
         system_prompt = """You are an email assistant for BFH (Bern University of Applied Sciences) administrative staff.
 Compose a professional email response."""
+        logger.info(f"[DEBUG] User prompt length: {len(user_prompt)} chars")
+        logger.info(f"[DEBUG] System prompt length: {len(system_prompt)} chars")
+        logger.info(f"[DEBUG] Total prompt length: {len(system_prompt) + len(user_prompt)} chars")
+        logger.debug(f"[DEBUG] User prompt preview: {user_prompt[:500]}...")
+        # Check if prompt might be too long for gpt-5-nano
+        total_chars = len(system_prompt) + len(user_prompt)
+        if "gpt-5" in self.config.llm.model_name and total_chars > 8000:
+            logger.warning(f"[DEBUG] Prompt may be too long for gpt-5-nano ({total_chars} chars). Consider using fewer documents or a different model.")
         # GPT-5 uses max_completion_tokens instead of max_tokens
         completion_params = {
             "model": self.config.llm.model_name,
         if "gpt-5" in self.config.llm.model_name:
             completion_params["max_completion_tokens"] = self.config.llm.max_tokens
             # GPT-5-nano only supports temperature=1 (default), so don't set it
+            logger.info(f"[DEBUG] Using GPT-5 parameters: max_completion_tokens={self.config.llm.max_tokens}")
         else:
             completion_params["max_tokens"] = self.config.llm.max_tokens
             completion_params["temperature"] = self.config.llm.temperature
+            logger.info(f"[DEBUG] Using standard parameters: max_tokens={self.config.llm.max_tokens}, temp={self.config.llm.temperature}")
+        logger.info(f"[DEBUG] Calling LLM with model: {self.config.llm.model_name}")
+        try:
+            response = self.client.chat.completions.create(**completion_params)
+            email = response.choices[0].message.content
+            # Check for null/empty response
+            if email is None or email.strip() == "":
+                logger.error(f"[DEBUG] LLM returned null or empty response!")
+                logger.error(f"[DEBUG] Full response: {response}")
+                email = "Error: The model returned an empty response. Please try again."
+            else:
+                logger.info(f"[DEBUG] LLM response received: {len(email)} chars")
+                logger.debug(f"[DEBUG] LLM response preview: {email[:300]}...")
+        except Exception as e:
+            logger.error(f"[DEBUG] LLM call failed: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
         elapsed = time.time() - start
         # Parse subject and body
+        if email and email.strip():
+            lines = email.split('\n')
+            subject = lines[0].replace('Subject:', '').strip() if lines else "Response"
+            body = '\n'.join(lines[1:]).strip() if len(lines) > 1 else email
+        else:
+            subject = "Error"
+            body = "No response generated from the model."
+        logger.info(f"[DEBUG] Parsed email - Subject: '{subject[:50] if len(subject) > 50 else subject}', Body: {len(body)} chars")
         stats = f"**Time:** {elapsed:.1f}s | **Docs:** {len(docs)} | **Model:** {self.config.llm.model_name}"
                 'explanation': self._explain_relevance(query, doc, i + 1)
             })
+        # Ensure we return non-empty strings
+        if not subject or subject.strip() == "":
+            subject = "(No subject generated)"
+            logger.warning("[DEBUG] Subject was empty, using fallback")
+        if not body or body.strip() == "":
+            body = "(No email body generated - please check logs for errors)"
+            logger.warning("[DEBUG] Body was empty, using fallback")
+        logger.info(f"[DEBUG] Returning - Subject: '{subject[:30]}...', Body length: {len(body)}, Chunks: {len(chunks_info)}")
         return subject, body, stats, chunks_info
         if not query or not query.strip():
             return "", "", "", ""
+        try:
+            subject, body, stats, chunks_info = self.assistant.process_query(query)
+            chunks_html = self.formatter.format_chunks_html(chunks_info)
+            return subject, body, stats, chunks_html
+        except Exception as e:
+            logger.error(f"[ERROR] Failed to process query: {e}")
+            import traceback
+            traceback.print_exc()
+            error_msg = f"Error processing query: {str(e)}"
+            return "Error", error_msg, f"**Status:** Failed", f"<p style='color:red'>{error_msg}</p>"
 # ============================================================================

src/agents/composer_agent.py CHANGED Viewed

@@ -121,12 +121,45 @@ Based on this information, compose a complete email response that addresses the
         except Exception as e:
             logger.error(f"Error composing email: {e}")
-            # Return minimal draft on error
             return EmailDraft(
-                subject="Ihre Anfrage / Your Inquiry",
-                body="Vielen Dank für Ihre Anfrage. Wir werden uns in Kürze bei Ihnen melden.\n\nThank you for your inquiry. We will get back to you shortly.",
                 tone="professional",
-                confidence=0.0,
             )
     def _build_context(self, documents: List[Document]) -> str:
@@ -140,7 +173,14 @@ Based on this information, compose a complete email response that addresses the
             Formatted context text
         """
         if not documents:
-            return "No relevant documents found in the knowledge base."
         context_parts = []
         for i, doc in enumerate(documents, 1):

         except Exception as e:
             logger.error(f"Error composing email: {e}")
+            # Return minimal draft on error based on detected language
+            language = intent.language if intent and hasattr(intent, 'language') else "de"
+            if language == "de":
+                subject = "Ihre Anfrage zur Studienadministration"
+                body = """Guten Tag,
+Vielen Dank für Ihre Anfrage.
+Leider konnte ich zu Ihrer spezifischen Frage keine detaillierten Informationen in unserer Wissensdatenbank finden. Ich empfehle Ihnen, sich direkt an die Studienadministration der BFH zu wenden:
+- Website: https://www.bfh.ch
+- Studienadministration: Kontaktinformationen finden Sie auf der BFH-Website
+Die Mitarbeitenden können Ihnen bei Ihrer Anfrage persönlich weiterhelfen.
+Freundliche Grüsse
+BFH Studienadministration"""
+            else:
+                subject = "Your Inquiry to Student Administration"
+                body = """Hello,
+Thank you for your inquiry.
+Unfortunately, I could not find detailed information regarding your specific question in our knowledge base. I recommend contacting BFH Student Administration directly:
+- Website: https://www.bfh.ch
+- Student Administration: Contact information available on the BFH website
+The staff will be able to assist you personally with your inquiry.
+Best regards
+BFH Student Administration"""
             return EmailDraft(
+                subject=subject,
+                body=body,
                 tone="professional",
+                confidence=0.1,
             )
     def _build_context(self, documents: List[Document]) -> str:
             Formatted context text
         """
         if not documents:
+            return """No relevant documents found in the knowledge base.
+IMPORTANT: Since no specific documentation was found, you should:
+1. Acknowledge the user's query professionally
+2. Provide general guidance if you can infer the topic
+3. Direct them to contact the appropriate administrative office
+4. Suggest checking the official BFH website for more information
+5. Do NOT make up specific procedures, deadlines, or requirements"""
         context_parts = []
         for i, doc in enumerate(documents, 1):

src/config.py CHANGED Viewed

@@ -88,7 +88,7 @@ class RetrievalConfig:
     top_k: int = 5  # Number of documents to retrieve
     bm25_weight: float = 0.5  # Weight for BM25 score
     vector_weight: float = 0.5  # Weight for vector similarity score
-    min_score: float = 0.3  # Minimum relevance score threshold
     @classmethod
     def from_env(cls) -> "RetrievalConfig":
@@ -97,7 +97,7 @@ class RetrievalConfig:
             top_k=int(os.getenv("RETRIEVAL_TOP_K", "5")),
             bm25_weight=float(os.getenv("BM25_WEIGHT", "0.5")),
             vector_weight=float(os.getenv("VECTOR_WEIGHT", "0.5")),
-            min_score=float(os.getenv("MIN_RELEVANCE_SCORE", "0.3")),
         )

     top_k: int = 5  # Number of documents to retrieve
     bm25_weight: float = 0.5  # Weight for BM25 score
     vector_weight: float = 0.5  # Weight for vector similarity score
+    min_score: float = 0.1  # Minimum relevance score threshold (lowered to be more permissive)
     @classmethod
     def from_env(cls) -> "RetrievalConfig":
             top_k=int(os.getenv("RETRIEVAL_TOP_K", "5")),
             bm25_weight=float(os.getenv("BM25_WEIGHT", "0.5")),
             vector_weight=float(os.getenv("VECTOR_WEIGHT", "0.5")),
+            min_score=float(os.getenv("MIN_RELEVANCE_SCORE", "0.1")),
         )

src/pipeline/orchestrator.py CHANGED Viewed

@@ -83,11 +83,13 @@ class RAGOrchestrator:
             logger.info("Step 1: Extracting intent...")
             intent = await self.intent_agent.extract_intent(query)
-            # Step 2: Retrieve relevant documents
             logger.info("Step 2: Retrieving relevant documents...")
-            retrieved_docs = self.retriever.retrieve(query)
             logger.info(f"Retrieved {len(retrieved_docs)} documents")
             # Step 3: Compose email draft
             logger.info("Step 3: Composing email draft...")

             logger.info("Step 1: Extracting intent...")
             intent = await self.intent_agent.extract_intent(query)
+            # Step 2: Retrieve relevant documents (with query rewriting)
             logger.info("Step 2: Retrieving relevant documents...")
+            retrieved_docs = await self.retriever.retrieve_with_rewriting(query)
             logger.info(f"Retrieved {len(retrieved_docs)} documents")
+            if len(retrieved_docs) == 0:
+                logger.warning("No documents retrieved - email generation may be limited")
             # Step 3: Compose email draft
             logger.info("Step 3: Composing email draft...")

src/retrieval/__init__.py CHANGED Viewed

@@ -11,3 +11,10 @@ try:
     __all__.append("HybridRetriever")
 except ImportError:
     pass  # OpenSearch dependencies not installed

     __all__.append("HybridRetriever")
 except ImportError:
     pass  # OpenSearch dependencies not installed
+# Optional query rewriter
+try:
+    from .query_rewriter import QueryRewriter, RewrittenQueries
+    __all__.extend(["QueryRewriter", "RewrittenQueries"])
+except ImportError:
+    pass  # PydanticAI not installed

src/retrieval/hybrid_retriever.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Hybrid retriever combining BM25 and vector search."""
-from typing import List, Dict, Any
 from haystack import Document
 from haystack.components.embedders import OpenAITextEmbedder
 from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
@@ -12,6 +12,18 @@ import logging
 from ..config import RetrievalConfig, LLMConfig
 logger = logging.getLogger(__name__)
@@ -23,6 +35,7 @@ class HybridRetriever:
         document_store: OpenSearchDocumentStore,
         llm_config: LLMConfig,
         retrieval_config: RetrievalConfig,
     ):
         """
         Initialize the hybrid retriever.
@@ -31,10 +44,12 @@ class HybridRetriever:
             document_store: OpenSearch document store
             llm_config: LLM configuration for embeddings
             retrieval_config: Retrieval configuration
         """
         self.document_store = document_store
         self.llm_config = llm_config
         self.retrieval_config = retrieval_config
         # Initialize BM25 retriever
         self.bm25_retriever = OpenSearchBM25Retriever(
@@ -52,12 +67,21 @@ class HybridRetriever:
             model=llm_config.embedding_model,
         )
-    def retrieve(self, query: str) -> List[Document]:
         """
-        Retrieve documents using hybrid search.
         Args:
-            query: Search query
         Returns:
             List of relevant documents with scores
@@ -65,16 +89,23 @@ class HybridRetriever:
         logger.info(f"Retrieving documents for query: {query[:100]}...")
         try:
             # Get BM25 results
-            logger.debug("Running BM25 retrieval...")
             bm25_results = self.bm25_retriever.run(
-                query=query,
                 top_k=self.retrieval_config.top_k * 2,  # Get more to merge
             )
             bm25_docs = bm25_results.get("documents", [])
             logger.debug(f"BM25 retrieved {len(bm25_docs)} documents")
-            # Generate query embedding
             logger.debug("Generating query embedding...")
             embedding_result = self.text_embedder.run(text=query)
             query_embedding = embedding_result.get("embedding")
@@ -101,12 +132,73 @@ class HybridRetriever:
             logger.info(f"Retrieved {len(final_docs)} documents after hybrid ranking")
             return final_docs
         except Exception as e:
             logger.error(f"Error during retrieval: {e}")
             return []
     def _merge_results(
         self, bm25_docs: List[Document], vector_docs: List[Document]
     ) -> List[Document]:

 """Hybrid retriever combining BM25 and vector search."""
+from typing import List, Dict, Any, Optional, TYPE_CHECKING
 from haystack import Document
 from haystack.components.embedders import OpenAITextEmbedder
 from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
 from ..config import RetrievalConfig, LLMConfig
+if TYPE_CHECKING:
+    from .query_rewriter import QueryRewriter, RewrittenQueries
+else:
+    # Optional import for query rewriting at runtime
+    try:
+        from .query_rewriter import QueryRewriter, RewrittenQueries
+        QUERY_REWRITER_AVAILABLE = True
+    except ImportError:
+        QUERY_REWRITER_AVAILABLE = False
+        QueryRewriter = None  # type: ignore
+        RewrittenQueries = None  # type: ignore
 logger = logging.getLogger(__name__)
         document_store: OpenSearchDocumentStore,
         llm_config: LLMConfig,
         retrieval_config: RetrievalConfig,
+        use_query_rewriting: bool = True,
     ):
         """
         Initialize the hybrid retriever.
             document_store: OpenSearch document store
             llm_config: LLM configuration for embeddings
             retrieval_config: Retrieval configuration
+            use_query_rewriting: Enable LLM-based query rewriting (default: True)
         """
         self.document_store = document_store
         self.llm_config = llm_config
         self.retrieval_config = retrieval_config
+        self.use_query_rewriting = use_query_rewriting
         # Initialize BM25 retriever
         self.bm25_retriever = OpenSearchBM25Retriever(
             model=llm_config.embedding_model,
         )
+        # Initialize query rewriter (uses faster model for speed)
+        self.query_rewriter: Optional["QueryRewriter"] = None
+        if use_query_rewriting and QUERY_REWRITER_AVAILABLE and QueryRewriter:
+            self.query_rewriter = QueryRewriter(
+                api_key=llm_config.api_key,
+                model="openai:gpt-4o-mini",  # Faster model for query rewriting
+            )
+    def retrieve(self, query: str, rewritten_query: Optional["RewrittenQueries"] = None) -> List[Document]:
         """
+        Retrieve documents using hybrid search with optional query rewriting.
         Args:
+            query: Search query (original or already rewritten)
+            rewritten_query: Pre-computed rewritten queries (optional)
         Returns:
             List of relevant documents with scores
         logger.info(f"Retrieving documents for query: {query[:100]}...")
         try:
+            # Use provided query for BM25 (might be rewritten)
+            bm25_query = query
             # Get BM25 results
+            logger.info(f"[DEBUG] BM25 query string: '{bm25_query}'")
+            logger.debug(f"Running BM25 retrieval with query: {bm25_query[:100]}...")
             bm25_results = self.bm25_retriever.run(
+                query=bm25_query,
                 top_k=self.retrieval_config.top_k * 2,  # Get more to merge
             )
             bm25_docs = bm25_results.get("documents", [])
+            logger.info(f"[DEBUG] BM25 retrieved {len(bm25_docs)} documents")
+            if bm25_docs:
+                logger.debug(f"[DEBUG] Top BM25 result score: {bm25_docs[0].score if bm25_docs[0].score else 'None'}")
             logger.debug(f"BM25 retrieved {len(bm25_docs)} documents")
+            # Generate query embedding (use original query for semantic similarity)
             logger.debug("Generating query embedding...")
             embedding_result = self.text_embedder.run(text=query)
             query_embedding = embedding_result.get("embedding")
             logger.info(f"Retrieved {len(final_docs)} documents after hybrid ranking")
+            # If no docs and we have rewritten queries, try fallback with synonyms
+            if len(final_docs) == 0 and rewritten_query and rewritten_query.synonyms:
+                logger.info("No documents found, trying with synonym queries...")
+                for synonym in rewritten_query.synonyms[:2]:  # Try top 2 synonyms
+                    fallback_docs = self._retrieve_with_query(synonym)
+                    if fallback_docs:
+                        logger.info(f"Found {len(fallback_docs)} documents with synonym: {synonym}")
+                        return fallback_docs
             return final_docs
         except Exception as e:
             logger.error(f"Error during retrieval: {e}")
             return []
+    async def retrieve_with_rewriting(self, query: str) -> List[Document]:
+        """
+        Retrieve documents with query rewriting.
+        Args:
+            query: Original user query
+        Returns:
+            List of relevant documents with scores
+        """
+        if not self.use_query_rewriting or not self.query_rewriter:
+            logger.info("[DEBUG] Query rewriting disabled, using original query")
+            return self.retrieve(query)
+        # Rewrite query
+        logger.info("Rewriting query for better retrieval...")
+        rewritten = await self.query_rewriter.rewrite_query(query)
+        # Build optimized query for BM25
+        optimized_query = self.query_rewriter.build_expanded_query(rewritten)
+        logger.info(f"[DEBUG] Original query: '{query}'")
+        logger.info(f"[DEBUG] Primary rewritten: '{rewritten.primary_query}'")
+        logger.info(f"[DEBUG] Key terms: {rewritten.key_terms}")
+        logger.info(f"[DEBUG] Optimized for BM25: '{optimized_query}'")
+        logger.info(f"Original: {query[:80]}")
+        logger.info(f"Optimized: {optimized_query[:80]}")
+        # Retrieve with optimized query
+        return self.retrieve(optimized_query, rewritten_query=rewritten)
+    def _retrieve_with_query(self, query: str) -> List[Document]:
+        """
+        Helper method to retrieve with a specific query string.
+        Args:
+            query: Query string
+        Returns:
+            List of documents
+        """
+        try:
+            bm25_results = self.bm25_retriever.run(
+                query=query,
+                top_k=self.retrieval_config.top_k,
+            )
+            docs = bm25_results.get("documents", [])
+            return self._apply_score_threshold(docs)
+        except Exception as e:
+            logger.error(f"Error in fallback retrieval: {e}")
+            return []
     def _merge_results(
         self, bm25_docs: List[Document], vector_docs: List[Document]
     ) -> List[Document]:

src/retrieval/query_rewriter.py ADDED Viewed

	@@ -0,0 +1,237 @@

+"""Query rewriting for improved retrieval using LLM."""
+import re
+import logging
+from typing import List, Set
+from pydantic import BaseModel, Field
+from pydantic_ai import Agent
+logger = logging.getLogger(__name__)
+class RewrittenQueries(BaseModel):
+    """Structured output for rewritten queries."""
+    primary_query: str = Field(
+        description="Main rewritten query optimized for keyword search"
+    )
+    synonyms: List[str] = Field(
+        default_factory=list,
+        description="Alternative phrasings and synonyms"
+    )
+    key_terms: List[str] = Field(
+        default_factory=list,
+        description="Important domain-specific terms to boost"
+    )
+    explanation: str = Field(
+        default="",
+        description="Brief explanation of the rewriting strategy"
+    )
+class QueryRewriter:
+    """Rewrites queries for better retrieval using LLM."""
+    # Stopwords for basic filtering (used as fallback)
+    STOPWORDS: Set[str] = {
+        # English
+        "i", "me", "my", "we", "you", "he", "she", "it", "they", "am", "is", "are",
+        "was", "were", "be", "been", "have", "has", "had", "do", "does", "did",
+        "a", "an", "the", "and", "but", "if", "or", "as", "of", "at", "by", "for",
+        "with", "about", "to", "from", "in", "on", "can", "will", "would", "should",
+        # German
+        "ich", "mich", "mir", "du", "er", "sie", "es", "wir", "ihr", "der", "die",
+        "das", "den", "dem", "des", "ein", "eine", "und", "oder", "aber", "wenn",
+        "als", "von", "zu", "mit", "bei", "für", "auf", "an", "in", "ist", "sind",
+        "war", "haben", "hat", "kann", "wie", "was", "wo",
+        # French
+        "je", "tu", "il", "elle", "nous", "vous", "ils", "elles", "le", "la", "les",
+        "un", "une", "des", "et", "ou", "mais", "si", "de", "à", "pour", "avec",
+        "dans", "est", "sont", "avoir", "peut", "comment", "que", "qui",
+    }
+    def __init__(self, api_key: str, model: str = "openai:gpt-4o-mini"):
+        """
+        Initialize query rewriter.
+        Args:
+            api_key: OpenAI API key
+            model: Model to use (default: gpt-4o-mini for speed)
+        """
+        self.agent = Agent[None, RewrittenQueries](
+            model,
+            system_prompt="""You are an expert at rewriting user queries for optimal document retrieval in a university administrative system (BFH - Bern University of Applied Sciences).
+Your task is to transform natural language queries into optimized search queries that will find relevant administrative documents.
+Key strategies:
+1. **Translate colloquial to formal**: Convert informal phrasing to official administrative terms
+   - Example: "drop out" → "Exmatrikulation" (German) or "withdrawal" (English)
+   - Example: "change my major" → "Studiengangwechsel"
+2. **Add domain-specific keywords**: Include relevant administrative terms
+   - Common terms: Antrag (application), Formular (form), Frist (deadline), Anmeldung (registration)
+3. **Language consistency**: Keep search terms in the same language as documents (primarily German)
+   - German query → German search terms
+   - English query → can include German terms if they're official names
+4. **Extract key entities**: Identify important terms like:
+   - Administrative processes: Exmatrikulation, Immatrikulation, Beurlaubung
+   - Documents: Formular, Bestätigung, Bescheinigung
+   - Deadlines: Frist, Semester, Anmeldung
+5. **Expand with synonyms**: Provide alternative terms that might appear in documents
+   - Example: "Abmeldung" and "Exmatrikulation"
+6. **Remove filler words**: Focus on content words only
+Output:
+- primary_query: The best optimized query for keyword search (BM25)
+- synonyms: 2-3 alternative phrasings
+- key_terms: 3-5 critical domain-specific terms to boost in search
+- explanation: Brief note on your strategy
+Keep queries concise (5-10 words max for primary_query)."""
+        )
+    async def rewrite_query(self, original_query: str) -> RewrittenQueries:
+        """
+        Rewrite a query for better retrieval.
+        Args:
+            original_query: Original user query
+        Returns:
+            Rewritten queries with metadata
+        """
+        logger.info(f"Rewriting query: {original_query[:100]}...")
+        try:
+            result = await self.agent.run(f"Rewrite this query for optimal document retrieval:\n\n{original_query}")
+            rewritten = result.output
+            # Post-process to ensure stopwords are removed
+            rewritten = self._clean_stopwords(rewritten)
+            logger.info(f"Rewritten primary query: {rewritten.primary_query}")
+            logger.debug(f"Key terms: {', '.join(rewritten.key_terms)}")
+            logger.debug(f"Explanation: {rewritten.explanation}")
+            return rewritten
+        except Exception as e:
+            logger.error(f"Error rewriting query: {e}")
+            # Fallback to basic stopword removal
+            return self._fallback_rewrite(original_query)
+    def _clean_stopwords(self, rewritten: RewrittenQueries) -> RewrittenQueries:
+        """
+        Clean stopwords from rewritten query (post-processing safety net).
+        Args:
+            rewritten: Rewritten query from LLM
+        Returns:
+            Cleaned rewritten query
+        """
+        # Clean primary query
+        tokens = re.findall(r'\w+', rewritten.primary_query.lower())
+        removed_stopwords = [t for t in tokens if t in self.STOPWORDS]
+        cleaned_tokens = [t for t in tokens if t not in self.STOPWORDS]
+        logger.info(f"[DEBUG] Stopwords removed from primary query: {removed_stopwords}")
+        logger.info(f"[DEBUG] Before stopword cleaning: '{rewritten.primary_query}'")
+        if cleaned_tokens:
+            cleaned_primary = " ".join(cleaned_tokens)
+        else:
+            # If everything was removed, keep original
+            cleaned_primary = rewritten.primary_query
+        logger.info(f"[DEBUG] After stopword cleaning: '{cleaned_primary}'")
+        # Clean key terms
+        cleaned_key_terms = [term for term in rewritten.key_terms if term.lower() not in self.STOPWORDS]
+        # Clean synonyms
+        cleaned_synonyms = []
+        for syn in rewritten.synonyms:
+            syn_tokens = re.findall(r'\w+', syn.lower())
+            syn_cleaned = [t for t in syn_tokens if t not in self.STOPWORDS]
+            if syn_cleaned:
+                cleaned_synonyms.append(" ".join(syn_cleaned))
+        return RewrittenQueries(
+            primary_query=cleaned_primary,
+            synonyms=cleaned_synonyms,
+            key_terms=cleaned_key_terms,
+            explanation=rewritten.explanation + " (stopwords cleaned)",
+        )
+    def _fallback_rewrite(self, query: str) -> RewrittenQueries:
+        """
+        Fallback query rewriting using simple stopword removal.
+        Args:
+            query: Original query
+        Returns:
+            Basic rewritten query
+        """
+        logger.info("Using fallback query rewriting (stopword removal)")
+        # Tokenize and filter stopwords
+        tokens = re.findall(r'\w+', query.lower())
+        filtered = [t for t in tokens if t not in self.STOPWORDS and len(t) >= 3]
+        primary_query = " ".join(filtered) if filtered else query
+        return RewrittenQueries(
+            primary_query=primary_query,
+            synonyms=[],
+            key_terms=filtered[:5],
+            explanation="Fallback: removed stopwords only"
+        )
+    def build_expanded_query(self, rewritten: RewrittenQueries) -> str:
+        """
+        Build an expanded query combining primary query and key terms.
+        Args:
+            rewritten: Rewritten query data
+        Returns:
+            Expanded query string for BM25 search
+        """
+        # Combine primary query with key terms (weighted more)
+        parts = [rewritten.primary_query]
+        # Add key terms (they'll boost relevance if present)
+        for term in rewritten.key_terms:
+            if term.lower() not in rewritten.primary_query.lower():
+                parts.append(term)
+        return " ".join(parts)
+    def get_all_variants(self, rewritten: RewrittenQueries) -> List[str]:
+        """
+        Get all query variants for multi-query retrieval.
+        Args:
+            rewritten: Rewritten query data
+        Returns:
+            List of query variants
+        """
+        variants = [rewritten.primary_query]
+        # Add synonym variants
+        variants.extend(rewritten.synonyms[:2])  # Limit to top 2
+        # Add expanded query
+        expanded = self.build_expanded_query(rewritten)
+        if expanded not in variants:
+            variants.append(expanded)
+        return variants

test_query_debug.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Test script to debug query processing for 'Was kostet eine Namensänderung?'"""
+import asyncio
+import logging
+import sys
+from src.config import get_config
+from src.indexing.indexer import DocumentIndexer
+from src.pipeline.orchestrator import RAGOrchestrator
+# Configure logging to see all DEBUG messages
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+# Set specific loggers to INFO to see debug messages
+logging.getLogger('src.retrieval.hybrid_retriever').setLevel(logging.INFO)
+logging.getLogger('src.retrieval.query_rewriter').setLevel(logging.INFO)
+async def test_query():
+    """Test the problematic query."""
+    print("\n" + "="*80)
+    print("Testing query: 'Was kostet eine Namensänderung?'")
+    print("="*80 + "\n")
+    # Load config
+    config = get_config()
+    # Initialize indexer (this connects to document store)
+    indexer = DocumentIndexer(config)
+    # Initialize orchestrator
+    orchestrator = RAGOrchestrator(config, indexer)
+    # Test query
+    query = "Was kostet eine Namensänderung?"
+    print(f"\n>>> Running query: '{query}'\n")
+    try:
+        result = await orchestrator.process_query(query)
+        print("\n" + "="*80)
+        print("RESULTS SUMMARY")
+        print("="*80)
+        print(f"Documents retrieved: {len(result.retrieved_docs)}")
+        print(f"Processing time: {result.processing_time:.2f}s")
+        print(f"\nIntent detected:")
+        print(f"  - Action: {result.intent.action_type}")
+        print(f"  - Topic: {result.intent.topic}")
+        print(f"  - Language: {result.intent.language}")
+        if result.retrieved_docs:
+            print(f"\nTop 3 retrieved documents:")
+            for i, doc in enumerate(result.retrieved_docs[:3], 1):
+                print(f"\n  [{i}] Score: {doc.get('score', 'N/A'):.4f}")
+                print(f"      Source: {doc.get('meta', {}).get('source_file', 'Unknown')}")
+                print(f"      Preview: {doc.get('content', '')[:150]}...")
+        print(f"\nEmail draft preview:")
+        print(f"  Subject: {result.email_draft.subject}")
+        print(f"  Body (first 200 chars): {result.email_draft.body[:200]}...")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    asyncio.run(test_query())

test_retrieval_simple.py ADDED Viewed

	@@ -0,0 +1,93 @@

+"""Simple test script to check retrieval for the problematic query."""
+import logging
+import sys
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')
+# Import after logging is configured
+from src.config import get_config
+from src.indexing.memory_indexer import MemoryDocumentIndexer
+from src.retrieval.memory_retriever import MemoryRetriever
+def test_retrieval():
+    """Test retrieval for the problematic query."""
+    query = "Wie kann ich einen Rückzug machen? Wie kann ich mich vom Studium abmelden?"
+    print("="*80)
+    print(f"Testing retrieval for query:")
+    print(f"  '{query}'")
+    print("="*80)
+    # Load config
+    config = get_config()
+    # Initialize indexer and retriever
+    print("\nInitializing document store...")
+    indexer = MemoryDocumentIndexer(llm_config=config.llm)
+    # Load documents from JSON
+    import json
+    from haystack import Document as HaystackDoc
+    from pathlib import Path
+    json_path = Path("documents_indexed.json")
+    if json_path.exists():
+        print(f"Loading documents from {json_path}...")
+        with open(json_path) as f:
+            docs_data = json.load(f)
+        documents = []
+        for doc_data in docs_data:
+            doc = HaystackDoc(
+                id=doc_data["id"],
+                content=doc_data["content"],
+                embedding=doc_data.get("embedding"),
+                meta=doc_data.get("meta", {})
+            )
+            documents.append(doc)
+        indexer.document_store.write_documents(documents)
+        print(f"  Loaded {len(documents)} documents")
+    else:
+        print(f"ERROR: {json_path} not found!")
+        return
+    print(f"  Total documents in store: {indexer.document_store.count_documents()}")
+    # Initialize retriever
+    print("\nInitializing retriever...")
+    retriever = MemoryRetriever(
+        document_store=indexer.document_store,
+        llm_config=config.llm,
+        retrieval_config=config.retrieval,
+    )
+    # Perform retrieval
+    print(f"\nPerforming retrieval...")
+    docs = retriever.retrieve(query)
+    print(f"\nResults: {len(docs)} documents retrieved")
+    print("="*80)
+    if len(docs) == 0:
+        print("❌ NO DOCUMENTS RETRIEVED!")
+        print("\nThis is the problem - retrieval is returning 0 results.")
+    else:
+        print("✅ Documents were retrieved successfully\n")
+        for i, doc in enumerate(docs, 1):
+            score = doc.score if doc.score else 0.0
+            source = doc.meta.get('source_file', 'Unknown')
+            section = doc.meta.get('section', 'N/A')
+            print(f"\n[{i}] Score: {score:.4f}")
+            print(f"    Source: {source}")
+            print(f"    Section: {section}")
+            print(f"    Content preview: {doc.content[:150]}...")
+    print("\n" + "="*80)
+if __name__ == "__main__":
+    test_retrieval()