Spaces:

Subhadip007
/

researchpilot-api

Running

App Files Files Community

Subhadip007 commited on Apr 14

Commit

294d426

1 Parent(s): f7e2e5e

fix: overhaul follow-up detection for conversation memory + add diagnostic logs

Browse files

Files changed (2) hide show

Dockerfile +1 -1
src/rag/pipeline.py +24 -7

Dockerfile CHANGED Viewed

@@ -15,7 +15,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Cache-bust: forces Docker to re-copy source code on every build
 # This ensures HuggingFace always gets the latest code from git
-ARG CACHEBUST=20260414_4
 # Copy source code
 COPY src/ ./src/

 # Cache-bust: forces Docker to re-copy source code on every build
 # This ensures HuggingFace always gets the latest code from git
+ARG CACHEBUST=20260414_5
 # Copy source code
 COPY src/ ./src/

src/rag/pipeline.py CHANGED Viewed

@@ -75,25 +75,40 @@ class RAGPipeline:
         history: list[ConversationTurn]
     ) -> str:
         followup_signals = [
-            "it", "that", "this", "they", "them",
-            "more", "example", "explain", "clarify",
-            "simpler", "detail", "elaborate", "again"
         ]
         question_lower = question.lower()
         is_followup = (
-            len(question.split()) < 12 and
-            any(word in question_lower for word in followup_signals)
         )
         if is_followup and history:
             last_substantial = ""
             for turn in reversed(history):
-                if turn.role == "user" and len(turn.content.split()) > 5:
                     last_substantial = turn.content
                     break
             if last_substantial:
-                return f"{last_substantial} {question}"
         return question
     def query(
@@ -179,6 +194,8 @@ class RAGPipeline:
         if not question:
             raise ValueError("Question cannot be empty")
         total_start = time.time()
         retrieval_start = time.time()

         history: list[ConversationTurn]
     ) -> str:
         followup_signals = [
+            # pronouns referring to prior context
+            "it", "that", "this", "they", "them", "those", "these",
+            # conversational follow-ups
+            "more", "example", "explain", "clarify", "elaborate",
+            "simpler", "simple", "detail", "again", "further",
+            # comprehension requests
+            "easy", "understand", "meaning", "mean", "summarize",
+            "summary", "break down", "eli5", "what about",
         ]
         question_lower = question.lower()
+        question_words = set(question_lower.split())
+        # Use word-boundary matching for single words, substring for phrases
         is_followup = (
+            len(question.split()) < 25 and
+            any(
+                signal in question_words if " " not in signal
+                else signal in question_lower
+                for signal in followup_signals
+            )
         )
         if is_followup and history:
             last_substantial = ""
             for turn in reversed(history):
+                if turn.role == "user" and len(turn.content.split()) > 3:
                     last_substantial = turn.content
                     break
             if last_substantial:
+                combined = f"{last_substantial} {question}"
+                logger.info(f"Follow-up detected. Retrieval query: '{combined[:80]}...'")
+                return combined
+        logger.info(f"Standalone query. Retrieval query: '{question[:80]}'")
         return question
     def query(
         if not question:
             raise ValueError("Question cannot be empty")
+        logger.info(f"stream_query: question='{question[:60]}', history_turns={len(history)}")
         total_start = time.time()
         retrieval_start = time.time()