Spaces:

jashdoshi77
/

notebooklm-fast

Running

App Files Files Community

jashdoshi77 commited on 3 days ago

Commit

60ff586

1 Parent(s): 9099b59

ai context

Browse files

Files changed (2) hide show

services/chroma_service.py +98 -10
services/rag_service.py +259 -47

services/chroma_service.py CHANGED Viewed

@@ -482,21 +482,49 @@ class ChromaService:
     # ==================== Conversation Memory Operations ====================
     def store_conversation(self, user_id: str, role: str, content: str,
-                           bucket_id: str = "", chat_id: str = "") -> dict:
-        """Store a conversation message for persistent memory"""
         import time
         msg_id = f"{user_id}_{int(time.time() * 1000)}"
         self.conversations_collection.add(
             ids=[msg_id],
             documents=[content],
-            metadatas=[{
-                "user_id": user_id,
-                "role": role,  # 'user' or 'assistant'
-                "bucket_id": bucket_id,
-                "chat_id": chat_id,
-                "timestamp": time.time()
-            }]
         )
         return {"msg_id": msg_id}
@@ -525,13 +553,73 @@ class ChromaService:
                 "content": results['documents'][i],
                 "timestamp": results['metadatas'][i]['timestamp'],
                 "bucket_id": results['metadatas'][i].get('bucket_id', ''),
-                "chat_id": results['metadatas'][i].get('chat_id', '')
             })
         # Sort by timestamp (newest last) and limit
         messages.sort(key=lambda x: x['timestamp'])
         return messages[-limit:]
     def clear_conversation(self, user_id: str, bucket_id: str = None) -> bool:
         """Clear conversation history for a user"""
         if bucket_id:

     # ==================== Conversation Memory Operations ====================
     def store_conversation(self, user_id: str, role: str, content: str,
+                           bucket_id: str = "", chat_id: str = "",
+                           query_context: dict = None, format_preference: str = None) -> dict:
+        """Store a conversation message for persistent memory.
+        Args:
+            user_id: User ID
+            role: 'user' or 'assistant'
+            content: Message content
+            bucket_id: Optional bucket ID
+            chat_id: Optional chat session ID
+            query_context: Optional dict with query data for format reuse (NEW)
+            format_preference: Optional format preference used (NEW)
+        """
         import time
+        import json
         msg_id = f"{user_id}_{int(time.time() * 1000)}"
+        metadata = {
+            "user_id": user_id,
+            "role": role,  # 'user' or 'assistant'
+            "bucket_id": bucket_id,
+            "chat_id": chat_id,
+            "timestamp": time.time()
+        }
+        # Store format preference if provided
+        if format_preference:
+            metadata["format_preference"] = format_preference
+        # Store query context as JSON string (for format reuse)
+        # Limited to 1000 chars to avoid storage issues
+        if query_context and role == 'assistant':
+            try:
+                context_str = json.dumps(query_context)
+                if len(context_str) <= 5000:
+                    metadata["query_context"] = context_str
+            except:
+                pass
         self.conversations_collection.add(
             ids=[msg_id],
             documents=[content],
+            metadatas=[metadata]
         )
         return {"msg_id": msg_id}
                 "content": results['documents'][i],
                 "timestamp": results['metadatas'][i]['timestamp'],
                 "bucket_id": results['metadatas'][i].get('bucket_id', ''),
+                "chat_id": results['metadatas'][i].get('chat_id', ''),
+                "format_preference": results['metadatas'][i].get('format_preference', ''),
+                "query_context": results['metadatas'][i].get('query_context', '')
             })
         # Sort by timestamp (newest last) and limit
         messages.sort(key=lambda x: x['timestamp'])
         return messages[-limit:]
+    def get_last_query_context(self, user_id: str, chat_id: str) -> dict:
+        """
+        Get the most recent query's data context for format reuse.
+        Returns dict with:
+        - context: The document data from previous query
+        - format_preference: The format used in previous response
+        - found: True if context was found
+        """
+        import json
+        try:
+            # Get recent messages for this chat
+            where_clause = {
+                "$and": [
+                    {"user_id": user_id},
+                    {"chat_id": chat_id},
+                    {"role": "assistant"}
+                ]
+            }
+            results = self.conversations_collection.get(where=where_clause)
+            if not results['ids']:
+                return {"found": False, "context": None, "format_preference": None}
+            # Find the most recent message with query_context
+            messages = []
+            for i, msg_id in enumerate(results['ids']):
+                messages.append({
+                    "msg_id": msg_id,
+                    "timestamp": results['metadatas'][i].get('timestamp', 0),
+                    "query_context": results['metadatas'][i].get('query_context', ''),
+                    "format_preference": results['metadatas'][i].get('format_preference', '')
+                })
+            # Sort by timestamp descending (newest first)
+            messages.sort(key=lambda x: x['timestamp'], reverse=True)
+            # Find first message with query_context
+            for msg in messages:
+                if msg.get('query_context'):
+                    try:
+                        context = json.loads(msg['query_context'])
+                        return {
+                            "found": True,
+                            "context": context,
+                            "format_preference": msg.get('format_preference')
+                        }
+                    except:
+                        continue
+            return {"found": False, "context": None, "format_preference": None}
+        except Exception as e:
+            print(f"[QUERY CONTEXT] Error retrieving last context: {e}")
+            return {"found": False, "context": None, "format_preference": None}
     def clear_conversation(self, user_id: str, bucket_id: str = None) -> bool:
         """Clear conversation history for a user"""
         if bucket_id:

services/rag_service.py CHANGED Viewed

@@ -221,6 +221,8 @@ class RAGService:
         - limit: number of results (or None for all)
         - calculation: sum|average|max|min (or None)
         - calculation_field: field for calculation
         """
         import json
@@ -233,6 +235,16 @@ CRITICAL RULES:
 3. When user asks for "top N" of something, set both limit AND sort_by appropriately
 4. Keywords like "manufacturing", "healthcare", "retail", "IT", "construction" are INDUSTRIES - put them in filters
 Available fields for filtering:
 - is_manufacturing (boolean): True ONLY if asking specifically about manufacturing flag
 - policy_type (string): fire, marine, motor, health, liability, property, engineering, etc.
@@ -262,36 +274,23 @@ Return ONLY valid JSON (no markdown, no explanation):
   "sort_order": "desc" or "asc",
   "limit": number or null,
   "calculation": "sum|average|max|min|count" or null,
-  "calculation_field": "premium_amount|sum_insured" or null
 }
 Examples:
 Query: "top 5 manufacturing policies by premium"
-{"intent":"rank","needs_metadata":true,"filters":{"industry":"manufacturing"},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}
-Query: "top 5 manufacturing and top 5 healthcare policies"
-{"intent":"compare","needs_metadata":true,"filters":{"industry":"manufacturing, healthcare"},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}
-Query: "compare manufacturing and healthcare industries"
-{"intent":"compare","needs_metadata":true,"filters":{"industry":"manufacturing, healthcare"},"sort_by":"sum_insured","sort_order":"desc","limit":10,"calculation":null,"calculation_field":null}
-Query: "list policies from IT and retail sectors"
-{"intent":"list","needs_metadata":true,"filters":{"industry":"it, retail"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}
-Query: "total sum insured for all fire policies"
-{"intent":"calculate","needs_metadata":true,"filters":{"policy_type":"fire"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"sum","calculation_field":"sum_insured"}
-Query: "what is covered in the ABC policy document?"
-{"intent":"specific","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null}
-Query: "list all policies renewing in 2026"
-{"intent":"list","needs_metadata":true,"filters":{"renewal_year":2026},"sort_by":"renewal_date","sort_order":"asc","limit":null,"calculation":null,"calculation_field":null}
-Query: "how many manufacturing companies do we have?"
-{"intent":"count","needs_metadata":true,"filters":{"industry":"manufacturing"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":"count","calculation_field":null}
-Query: "top 5 health policies by sum insured"
-{"intent":"rank","needs_metadata":true,"filters":{"policy_type":"health"},"sort_by":"sum_insured","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null}"""
         messages = [
             {"role": "system", "content": system_prompt},
@@ -304,12 +303,19 @@ Query: "top 5 health policies by sum insured"
             # Parse JSON response
             parsed = json.loads(response.strip())
             print(f"[AI QUERY PARSER] Parsed: {json.dumps(parsed, indent=2)}")
             return parsed
         except Exception as e:
             print(f"[AI QUERY PARSER] Error: {e}, falling back to pattern matching")
-            # Fallback to basic detection
             return {
                 "intent": "specific",
                 "needs_metadata": False,
@@ -318,7 +324,9 @@ Query: "top 5 health policies by sum insured"
                 "sort_order": "desc",
                 "limit": None,
                 "calculation": None,
-                "calculation_field": None
             }
     def _call_deepseek_sync(self, messages: list, max_tokens: int = 500) -> str:
@@ -348,6 +356,130 @@ Query: "top 5 health policies by sum insured"
         else:
             raise Exception(f"DeepSeek API error: {response.status_code}")
     def _detect_query_type(self, query: str, history: list[dict] = None) -> str:
         """
         Detect the type of query to optimize retrieval and response.
@@ -944,14 +1076,45 @@ Summary: {summary[:300] if summary else 'No summary available'}
         """
         print(f"[METADATA STREAM] Handling AI-parsed query: intent={parsed.get('intent')}")
-        # Step 1: Get filtered, sorted, and calculated metadata using AI-parsed parameters
-        result = self._handle_metadata_query(user_id, bucket_id, query, parsed)
-        context = result.get('context', '')
-        sources = result.get('sources', {})
-        total_docs = result.get('total_documents', 0)
-        total_before = result.get('total_before_filter', 0)
-        calculation = result.get('calculation')
         # Check if we have any data
         if not context or total_docs == 0:
@@ -971,6 +1134,10 @@ Summary: {summary[:300] if summary else 'No summary available'}
         # Step 2: Build AI prompt based on parsed intent
         intent = parsed.get('intent', 'list')
         if intent == 'count':
             system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COUNT query.
@@ -978,7 +1145,9 @@ CRITICAL INSTRUCTIONS:
 1. The count has been computed: {total_docs} documents match the criteria.
 2. State the count clearly and directly.
 3. If filters were applied, mention what was filtered.
-4. Brief context about what was counted is helpful."""
         elif intent == 'calculate':
             calc_info = ""
@@ -990,7 +1159,9 @@ CRITICAL INSTRUCTIONS:
 1. The calculation results have been computed from {total_docs} documents.{calc_info}
 2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
 3. Explain what the numbers mean in business context.
-4. Include document counts to show the calculation scope.
 Present the data accurately - these are pre-computed from actual document metadata."""
@@ -1004,8 +1175,9 @@ CRITICAL INSTRUCTIONS:
 1. You have been given the top {limit} documents sorted by {sort_by} ({sort_order}).
 2. Present them as a clear ranked list with the ranking number.
 3. Highlight the key metric ({sort_by}) for each item.
-4. Format nicely with headers, bold for values, and bullet points.
-5. Include all {limit} items - do not truncate."""
         elif intent == 'compare':
             system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COMPARISON query.
@@ -1013,9 +1185,10 @@ CRITICAL INSTRUCTIONS:
 CRITICAL INSTRUCTIONS:
 1. You have metadata for {total_docs} relevant documents.
 2. Create a clear comparison highlighting differences and similarities.
-3. Use tables or side-by-side format where helpful.
-4. Focus on the key metrics mentioned in the query.
-5. Be thorough but organized."""
         else:  # list, summarize, or other
             system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering a query that requires information from {total_docs} documents.
@@ -1023,16 +1196,44 @@ CRITICAL INSTRUCTIONS:
 CRITICAL INSTRUCTIONS:
 1. You have been given metadata for {total_docs} documents (from {total_before} total).
 2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
-3. Format your response clearly with headers, bullet points, and bold text.
-4. For "list" queries, actually list ALL matching items with key details.
-5. Organize information logically (by type, by company, by date, etc.).
-6. For "summarize" queries, provide a concise overview with key statistics.
 Do NOT say information is missing - you have the filtered list. Do NOT ask for more documents."""
-        # Step 3: Build messages
         messages = [{"role": "system", "content": system_prompt}]
         user_message = f"""Based on the following document metadata and any calculations, answer my question.
 DOCUMENT DATA:
@@ -1040,7 +1241,7 @@ DOCUMENT DATA:
 QUESTION: {query}
-Instructions: Provide a complete, well-formatted answer based on ALL the data above."""
         messages.append({"role": "user", "content": user_message})
@@ -1081,7 +1282,7 @@ Instructions: Provide a complete, well-formatted answer based on ALL the data ab
                     print(f"[METADATA STREAM] Model {model_key} failed: {e}")
                     continue
-        # Step 5: Store conversation
         if full_response and chat_id:
             try:
                 chroma_service.store_conversation(
@@ -1091,13 +1292,24 @@ Instructions: Provide a complete, well-formatted answer based on ALL the data ab
                     bucket_id=bucket_id or "",
                     chat_id=chat_id
                 )
                 chroma_service.store_conversation(
                     user_id=user_id,
                     role="assistant",
                     content=full_response,
                     bucket_id=bucket_id or "",
-                    chat_id=chat_id
                 )
             except Exception as e:
                 print(f"[METADATA STREAM] Failed to store conversation: {e}")

         - limit: number of results (or None for all)
         - calculation: sum|average|max|min (or None)
         - calculation_field: field for calculation
+        - format_preference: table|list|bullets|paragraph (or None for default)
+        - is_format_change: True if query is asking to reformat previous answer
         """
         import json
 3. When user asks for "top N" of something, set both limit AND sort_by appropriately
 4. Keywords like "manufacturing", "healthcare", "retail", "IT", "construction" are INDUSTRIES - put them in filters
+FORMAT DETECTION (NEW):
+1. Detect if user explicitly asks for a specific format:
+   - "as a table", "in table format", "show table" -> format_preference: "table"
+   - "as a list", "list format", "numbered list" -> format_preference: "list"
+   - "bullet points", "bullets" -> format_preference: "bullets"
+   - "in paragraph", "prose", "narrative" -> format_preference: "paragraph"
+2. Detect if query is ONLY asking to reformat (no new data request):
+   - "show that as a table", "convert to list", "in bullet points" -> is_format_change: true
+   - These typically use pronouns like "that", "this", "it" or "the above"
 Available fields for filtering:
 - is_manufacturing (boolean): True ONLY if asking specifically about manufacturing flag
 - policy_type (string): fire, marine, motor, health, liability, property, engineering, etc.
   "sort_order": "desc" or "asc",
   "limit": number or null,
   "calculation": "sum|average|max|min|count" or null,
+  "calculation_field": "premium_amount|sum_insured" or null,
+  "format_preference": "table|list|bullets|paragraph" or null,
+  "is_format_change": true or false
 }
 Examples:
 Query: "top 5 manufacturing policies by premium"
+{"intent":"rank","needs_metadata":true,"filters":{"industry":"manufacturing"},"sort_by":"premium_amount","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null,"format_preference":null,"is_format_change":false}
+Query: "show that as a table"
+{"intent":"list","needs_metadata":false,"filters":{},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null,"format_preference":"table","is_format_change":true}
+Query: "list all fire policies in bullet points"
+{"intent":"list","needs_metadata":true,"filters":{"policy_type":"fire"},"sort_by":null,"sort_order":"desc","limit":null,"calculation":null,"calculation_field":null,"format_preference":"bullets","is_format_change":false}
+Query: "top 5 health policies by sum insured as a table"
+{"intent":"rank","needs_metadata":true,"filters":{"policy_type":"health"},"sort_by":"sum_insured","sort_order":"desc","limit":5,"calculation":null,"calculation_field":null,"format_preference":"table","is_format_change":false}"""
         messages = [
             {"role": "system", "content": system_prompt},
             # Parse JSON response
             parsed = json.loads(response.strip())
+            # Ensure new fields have defaults if AI doesn't include them
+            if 'format_preference' not in parsed:
+                parsed['format_preference'] = None
+            if 'is_format_change' not in parsed:
+                parsed['is_format_change'] = False
             print(f"[AI QUERY PARSER] Parsed: {json.dumps(parsed, indent=2)}")
             return parsed
         except Exception as e:
             print(f"[AI QUERY PARSER] Error: {e}, falling back to pattern matching")
+            # Fallback to basic detection with new fields
             return {
                 "intent": "specific",
                 "needs_metadata": False,
                 "sort_order": "desc",
                 "limit": None,
                 "calculation": None,
+                "calculation_field": None,
+                "format_preference": None,
+                "is_format_change": False
             }
     def _call_deepseek_sync(self, messages: list, max_tokens: int = 500) -> str:
         else:
             raise Exception(f"DeepSeek API error: {response.status_code}")
+    def _is_format_only_request(self, query: str, parsed: dict) -> bool:
+        """
+        Detect if query is only asking to reformat the previous answer.
+        Uses AI parsing result and fallback pattern matching.
+        Returns True if this is a format-change-only request.
+        """
+        # First check AI parsing result
+        if parsed.get('is_format_change', False):
+            return True
+        # Fallback: pattern matching for common reformat requests
+        query_lower = query.lower().strip()
+        # Patterns that indicate format-only requests (with pronouns or references)
+        format_only_patterns = [
+            'show that as', 'show this as', 'show it as',
+            'convert to', 'change to', 'format as',
+            'in table format', 'as a table', 'as table',
+            'in list format', 'as a list', 'as list',
+            'in bullet', 'as bullet', 'with bullets',
+            'reformat', 'reformatted',
+            'same thing but', 'same data but', 'same info but'
+        ]
+        for pattern in format_only_patterns:
+            if pattern in query_lower:
+                # Check for pronouns indicating reference to previous answer
+                if any(pronoun in query_lower for pronoun in ['that', 'this', 'it', 'them', 'above', 'previous']):
+                    print(f"[FORMAT DETECT] Detected format-only request via pattern: '{pattern}'")
+                    return True
+        return False
+    def _validate_metadata(self, metadata: dict) -> dict:
+        """
+        Sanity check metadata values and flag anomalies.
+        Returns validated metadata with warnings logged for suspicious values.
+        Checks:
+        - Negative monetary amounts
+        - Dates too far in future (> 2100) or past (< 1900)
+        - Extremely large numerical values
+        """
+        validated = metadata.copy()
+        warnings = []
+        # Check sum_insured
+        sum_insured = metadata.get('sum_insured', 0)
+        if isinstance(sum_insured, (int, float)):
+            if sum_insured < 0:
+                warnings.append(f"Negative sum_insured: {sum_insured}")
+                validated['sum_insured'] = 0
+            elif sum_insured > 1e15:  # More than 1 quadrillion
+                warnings.append(f"Extremely large sum_insured: {sum_insured}")
+        # Check premium_amount
+        premium = metadata.get('premium_amount', 0)
+        if isinstance(premium, (int, float)):
+            if premium < 0:
+                warnings.append(f"Negative premium_amount: {premium}")
+                validated['premium_amount'] = 0
+            elif premium > 1e12:  # More than 1 trillion
+                warnings.append(f"Extremely large premium_amount: {premium}")
+        # Check renewal_year
+        renewal_year = metadata.get('renewal_year', 0)
+        if isinstance(renewal_year, int) and renewal_year > 0:
+            if renewal_year < 1900:
+                warnings.append(f"Renewal year too old: {renewal_year}")
+            elif renewal_year > 2100:
+                warnings.append(f"Renewal year too far in future: {renewal_year}")
+                validated['renewal_year'] = 0
+        # Check dates
+        for date_field in ['policy_start_date', 'policy_end_date', 'renewal_date']:
+            date_value = metadata.get(date_field, '')
+            if date_value and isinstance(date_value, str):
+                # Extract year from date string
+                import re
+                year_match = re.search(r'(19|20|21)\d{2}', date_value)
+                if year_match:
+                    year = int(year_match.group())
+                    if year > 2100 or year < 1900:
+                        warnings.append(f"Invalid year in {date_field}: {date_value}")
+        # Log warnings
+        if warnings:
+            doc_title = metadata.get('document_title', 'Unknown')
+            print(f"[METADATA VALIDATION] Warnings for '{doc_title}':")
+            for w in warnings:
+                print(f"  - {w}")
+        return validated
+    def _get_format_instructions(self, format_preference: str) -> str:
+        """
+        Get specific formatting instructions based on user's format preference.
+        Returns markdown-compatible formatting guidance.
+        """
+        format_map = {
+            "table": """FORMAT: Present data in a markdown table.
+- Use | column | headers | with |---| separator line
+- Keep columns aligned and consistent
+- Include all requested data in table rows""",
+            "list": """FORMAT: Present as a numbered list.
+1. Each item on its own line with number prefix
+2. Include key details after the number
+3. Use consistent formatting for all items""",
+            "bullets": """FORMAT: Use bullet points.
+- Each item as a bullet point
+- Sub-details can be indented bullets
+- Keep bullets concise and scannable""",
+            "paragraph": """FORMAT: Write in flowing prose paragraphs.
+- Use complete sentences and natural language
+- Group related information into paragraphs
+- Avoid lists or tables unless absolutely necessary"""
+        }
+        return format_map.get(format_preference, "")
     def _detect_query_type(self, query: str, history: list[dict] = None) -> str:
         """
         Detect the type of query to optimize retrieval and response.
         """
         print(f"[METADATA STREAM] Handling AI-parsed query: intent={parsed.get('intent')}")
+        # Get format preference from parsed query
+        format_preference = parsed.get('format_preference')
+        is_format_change = self._is_format_only_request(query, parsed)
+        print(f"[METADATA STREAM] Format preference: {format_preference}, is_format_change: {is_format_change}")
+        # Step 1: Check if this is a format-change-only request (reuse previous data)
+        context = None
+        sources = {}
+        total_docs = 0
+        total_before = 0
+        calculation = None
+        if is_format_change and chat_id:
+            # Try to get previous query's context data
+            print("[METADATA STREAM] Format-only request detected, attempting to reuse previous data...")
+            try:
+                prev_context = chroma_service.get_last_query_context(user_id, chat_id)
+                if prev_context.get('found') and prev_context.get('context'):
+                    cached_data = prev_context['context']
+                    context = cached_data.get('context', '')
+                    sources = cached_data.get('sources', {})
+                    total_docs = cached_data.get('total_documents', 0)
+                    total_before = cached_data.get('total_before_filter', 0)
+                    calculation = cached_data.get('calculation')
+                    print(f"[METADATA STREAM] Reusing cached data: {total_docs} documents")
+            except Exception as e:
+                print(f"[METADATA STREAM] Failed to get cached context: {e}")
+        # If no cached data available (or not a format change), get fresh data
+        if not context:
+            print("[METADATA STREAM] Getting fresh data from metadata query...")
+            result = self._handle_metadata_query(user_id, bucket_id, query, parsed)
+            context = result.get('context', '')
+            sources = result.get('sources', {})
+            total_docs = result.get('total_documents', 0)
+            total_before = result.get('total_before_filter', 0)
+            calculation = result.get('calculation')
         # Check if we have any data
         if not context or total_docs == 0:
         # Step 2: Build AI prompt based on parsed intent
         intent = parsed.get('intent', 'list')
+        # Get format-specific instructions if user specified a preference
+        format_instructions = self._get_format_instructions(format_preference) if format_preference else ""
+        conciseness_directive = "\n\nIMPORTANT: Be concise and direct. No preambles or verbose explanations. Get straight to the formatted answer." if format_preference else ""
         if intent == 'count':
             system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COUNT query.
 1. The count has been computed: {total_docs} documents match the criteria.
 2. State the count clearly and directly.
 3. If filters were applied, mention what was filtered.
+4. Brief context about what was counted is helpful.{conciseness_directive}
+{format_instructions}"""
         elif intent == 'calculate':
             calc_info = ""
 1. The calculation results have been computed from {total_docs} documents.{calc_info}
 2. Present the numbers clearly with proper formatting (₹ for currency, commas for thousands).
 3. Explain what the numbers mean in business context.
+4. Include document counts to show the calculation scope.{conciseness_directive}
+{format_instructions}
 Present the data accurately - these are pre-computed from actual document metadata."""
 1. You have been given the top {limit} documents sorted by {sort_by} ({sort_order}).
 2. Present them as a clear ranked list with the ranking number.
 3. Highlight the key metric ({sort_by}) for each item.
+4. Include all {limit} items - do not truncate.{conciseness_directive}
+{format_instructions if format_instructions else "FORMAT: Use numbered list format with bold for values."}"""
         elif intent == 'compare':
             system_prompt = f"""You are Iribl AI, a document analysis assistant answering a COMPARISON query.
 CRITICAL INSTRUCTIONS:
 1. You have metadata for {total_docs} relevant documents.
 2. Create a clear comparison highlighting differences and similarities.
+3. Focus on the key metrics mentioned in the query.
+4. Be thorough but organized.{conciseness_directive}
+{format_instructions if format_instructions else "FORMAT: Use tables or side-by-side format where helpful."}"""
         else:  # list, summarize, or other
             system_prompt = f"""You are Iribl AI, a document analysis assistant. You are answering a query that requires information from {total_docs} documents.
 CRITICAL INSTRUCTIONS:
 1. You have been given metadata for {total_docs} documents (from {total_before} total).
 2. Your answer must be COMPREHENSIVE - include ALL relevant items from the data provided.
+3. For "list" queries, actually list ALL matching items with key details.
+4. Organize information logically (by type, by company, by date, etc.).
+5. For "summarize" queries, provide a concise overview with key statistics.{conciseness_directive}
+{format_instructions if format_instructions else "FORMAT: Use headers, bullet points, and bold text for clarity."}
 Do NOT say information is missing - you have the filtered list. Do NOT ask for more documents."""
+        # Step 3: Load conversation history for memory (CRITICAL FOR CONTEXT)
+        stored_history = []
+        if chat_id:
+            try:
+                all_history = chroma_service.get_conversation_history(
+                    user_id=user_id,
+                    bucket_id=bucket_id,
+                    limit=50
+                )
+                # Filter to only this chat's messages
+                stored_history = [msg for msg in all_history
+                                  if msg.get('chat_id', '') == chat_id]
+                stored_history = stored_history[-self.max_history:]
+                print(f"[METADATA STREAM] Loaded {len(stored_history)} history messages")
+            except Exception as e:
+                print(f"[METADATA STREAM] Failed to load history: {e}")
+        # Step 4: Build messages with conversation history
         messages = [{"role": "system", "content": system_prompt}]
+        # Add conversation history for context (CRITICAL for follow-ups)
+        for msg in stored_history:
+            messages.append({
+                "role": msg['role'],
+                "content": msg['content']
+            })
+        # Build user message with format emphasis if specified
+        format_reminder = f"\n\nREMINDER: Present the response in {format_preference} format." if format_preference else ""
         user_message = f"""Based on the following document metadata and any calculations, answer my question.
 DOCUMENT DATA:
 QUESTION: {query}
+Instructions: Provide a complete, well-formatted answer based on ALL the data above.{format_reminder}"""
         messages.append({"role": "user", "content": user_message})
                     print(f"[METADATA STREAM] Model {model_key} failed: {e}")
                     continue
+        # Step 5: Store conversation WITH query context for format reuse
         if full_response and chat_id:
             try:
                 chroma_service.store_conversation(
                     bucket_id=bucket_id or "",
                     chat_id=chat_id
                 )
+                # Store context data for potential format-change reuse
+                query_context_data = {
+                    'context': context,
+                    'sources': sources,
+                    'total_documents': total_docs,
+                    'total_before_filter': total_before,
+                    'calculation': calculation
+                }
                 chroma_service.store_conversation(
                     user_id=user_id,
                     role="assistant",
                     content=full_response,
                     bucket_id=bucket_id or "",
+                    chat_id=chat_id,
+                    query_context=query_context_data,
+                    format_preference=format_preference
                 )
+                print(f"[METADATA STREAM] Stored conversation with query context for reuse")
             except Exception as e:
                 print(f"[METADATA STREAM] Failed to store conversation: {e}")