Spaces:

uumerrr684
/

RAG_Chat_Flow

Sleeping

App Files Files Community

uumerrr684 commited on Aug 20, 2025

Commit

91099df

verified ·

1 Parent(s): aaeb58e

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -72

app.py CHANGED Viewed

@@ -312,8 +312,8 @@ class ProductionRAGSystem:
         return content[:200] + "..."
-    def generate_answer(self, query, search_results):
-        """Generate both AI and extracted answers with minimal token usage"""
         if not search_results:
             return {
                 'ai_answer': "No information found in documents.",
@@ -330,16 +330,32 @@ class ProductionRAGSystem:
         # Always generate extracted answer
         extracted_answer = self.extract_direct_answer(query, best_result['content'])
-        # Try AI answer with minimal tokens - send only extracted answer, not full chunks
         ai_answer = None
         openrouter_key = os.environ.get("OPENROUTER_API_KEY")
-        if openrouter_key:
-            # Use the extracted answer as context instead of raw chunks
-            extracted_answer = self.extract_direct_answer(query, best_result['content'])
-            # Super minimal prompt with just the extracted info
-            prompt = f"Improve: {extracted_answer}"  # Very short prompt
             try:
                 response = requests.post(
@@ -353,51 +369,52 @@ class ProductionRAGSystem:
                     json={
                         "model": "openai/gpt-3.5-turbo",
                         "messages": [{"role": "user", "content": prompt}],
-                        "max_tokens": 25,  # Very small
-                        "temperature": 0.1
                     },
-                    timeout=10
                 )
                 if response.status_code == 200:
                     ai_response = response.json()['choices'][0]['message']['content'].strip()
-                    # Use AI response if it's actually better, otherwise stick with extracted
                     ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
                 elif response.status_code == 402:
-                    st.error("💳 OpenRouter credits exhausted. Using extracted answers only.")
                 else:
-                    st.error(f"API Error {response.status_code}")
             except Exception as e:
-                st.error(f"API Exception: {str(e)}")
-            return {
-                'ai_answer': ai_answer,
-                'extracted_answer': extracted_answer,
-                'sources': sources,
-                'confidence': avg_confidence,
-                'has_both': ai_answer is not None
-            }
-        else:
-            # No API key - just return extracted answer
-            return {
-                'ai_answer': None,
-                'extracted_answer': extracted_answer,
-                'sources': sources,
-                'confidence': avg_confidence,
-                'has_both': False
-            }
-def get_general_ai_response(query):
-    """Get AI response for general questions with minimal token usage"""
     openrouter_key = os.environ.get("OPENROUTER_API_KEY")
     if not openrouter_key:
         return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
     try:
-        # Much shorter prompt to save tokens
-        short_query = query[:50]  # Limit input length
         response = requests.post(
             "https://openrouter.ai/api/v1/chat/completions",
@@ -409,9 +426,9 @@ def get_general_ai_response(query):
             },
             json={
                 "model": "openai/gpt-3.5-turbo",
-                "messages": [{"role": "user", "content": short_query}],
-                "max_tokens": 25,  # Reduced from 150
-                "temperature": 0.7
             },
             timeout=15
         )
@@ -420,9 +437,13 @@ def get_general_ai_response(query):
             return response.json()['choices'][0]['message']['content'].strip()
         elif response.status_code == 402:
             return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
         else:
             return f"API error (Status: {response.status_code}). Try asking about documents instead."
     except Exception as e:
         return f"Error: {str(e)}"
@@ -606,6 +627,10 @@ with st.sidebar:
                 )
                 if test_response.status_code == 200:
                     st.success("✅ API working correctly!")
                 else:
                     st.error(f"❌ API Error: {test_response.status_code}")
             except Exception as e:
@@ -614,14 +639,20 @@ with st.sidebar:
         st.error("❌ No OpenRouter API Key")
         st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings → Variables and secrets")
-    # RAG Settings
-    use_ai_enhancement = st.checkbox("Use AI Enhancement", value=bool(openrouter_key))
-    low_token_mode = st.checkbox("Low Token Mode (Save Credits)", value=True)
-    show_sources = st.checkbox("Show Sources", value=True)
-    show_confidence = st.checkbox("Show Confidence Scores", value=True)
-    if low_token_mode:
-        st.info("Using minimal tokens to conserve credits")
     st.divider()
@@ -656,10 +687,11 @@ for message in st.session_state.messages:
             rag_info = message["rag_info"]
             if show_sources and rag_info.get("sources"):
                 st.markdown(f"""
                 <div class="rag-attribution">
                     <strong>📁 Sources:</strong> {', '.join(rag_info['sources'])}<br>
-                    <strong>🎯 Confidence:</strong> {rag_info['confidence']*100:.1f}%
                 </div>
                 """, unsafe_allow_html=True)
@@ -692,22 +724,28 @@ if prompt := st.chat_input("Ask questions about your documents..."):
             # Check if we found relevant documents (much lower threshold)
             if search_results and search_results[0]['similarity'] > 0.01:  # Very low threshold
                 # Generate document-based answer
-                result = rag_system.generate_answer(prompt, search_results)
                 # Display AI answer or extracted answer
                 if use_ai_enhancement and result['has_both']:
                     answer_text = result['ai_answer']
-                    st.markdown(f"🤖 **AI Answer:** {answer_text}")
                 else:
                     answer_text = result['extracted_answer']
                     st.markdown(f"📄 **Document Answer:** {answer_text}")
                 # Show RAG info
                 if show_sources and result['sources']:
                     st.markdown(f"""
                     <div class="rag-attribution">
                         <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
-                        <strong>🎯 Confidence:</strong> {result['confidence']*100:.1f}%<br>
                         <strong>📊 Found:</strong> {len(search_results)} relevant sections
                     </div>
                     """, unsafe_allow_html=True)
@@ -725,26 +763,16 @@ if prompt := st.chat_input("Ask questions about your documents..."):
                 }
             else:
-                # No relevant documents found - handle based on mode
-                if low_token_mode:
-                    no_docs_msg = "No relevant information found in your documents. (General AI disabled in low-token mode to save credits)"
-                    st.warning(no_docs_msg)
-                    assistant_message = {
-                        "role": "assistant",
-                        "content": no_docs_msg,
-                        "rag_info": {"sources": [], "confidence": 0, "mode": "no_docs"}
-                    }
-                else:
-                    # Use general AI only if not in low-token mode
-                    st.info("No relevant documents found. Switching to general AI mode...")
-                    general_response = get_general_ai_response(prompt)
-                    st.markdown(f"💬 **General AI:** {general_response}")
-                    assistant_message = {
-                        "role": "assistant",
-                        "content": general_response,
-                        "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
-                    }
         else:
             # RAG system not ready - use general AI
@@ -753,7 +781,7 @@ if prompt := st.chat_input("Ask questions about your documents..."):
             else:
                 st.error("RAG system not ready. Using general AI mode...")
-            general_response = get_general_ai_response(prompt)
             st.markdown(f"💬 **General AI:** {general_response}")
             assistant_message = {
@@ -771,4 +799,5 @@ if prompt := st.chat_input("Ask questions about your documents..."):
 # Footer info
 if rag_system and rag_system.model:
     doc_count = rag_system.get_collection_count()
-    st.caption(f"📚 Knowledge Base: {doc_count} indexed chunks | 🔍 RAG System Active")

         return content[:200] + "..."
+    def generate_answer(self, query, search_results, use_ai_enhancement=True, unlimited_tokens=False):
+        """Generate both AI and extracted answers with proper token handling"""
         if not search_results:
             return {
                 'ai_answer': "No information found in documents.",
         # Always generate extracted answer
         extracted_answer = self.extract_direct_answer(query, best_result['content'])
+        # Try AI answer if requested and API key available
         ai_answer = None
         openrouter_key = os.environ.get("OPENROUTER_API_KEY")
+        if use_ai_enhancement and openrouter_key:
+            # Build context from search results
+            context = "\n\n".join([f"Source: {r['metadata']['source_file']}\nContent: {r['content']}"
+                                 for r in search_results[:3]])
+            # Create comprehensive prompt for unlimited tokens
+            if unlimited_tokens:
+                prompt = f"""Based on the following document context, provide a comprehensive and detailed answer to the user's question.
+Context from documents:
+{context}
+User Question: {query}
+Please provide a thorough, well-structured answer that directly addresses the question using the information from the documents. If the documents contain specific details, include them in your response."""
+                max_tokens = 500  # Higher token limit for detailed responses
+                temperature = 0.3
+            else:
+                # Fallback to shorter prompt
+                prompt = f"Context: {extracted_answer}\n\nQuestion: {query}\n\nImprove the answer:"
+                max_tokens = 150
+                temperature = 0.1
             try:
                 response = requests.post(
                     json={
                         "model": "openai/gpt-3.5-turbo",
                         "messages": [{"role": "user", "content": prompt}],
+                        "max_tokens": max_tokens,
+                        "temperature": temperature
                     },
+                    timeout=15
                 )
                 if response.status_code == 200:
                     ai_response = response.json()['choices'][0]['message']['content'].strip()
                     ai_answer = ai_response if len(ai_response) > 10 else extracted_answer
                 elif response.status_code == 402:
+                    st.warning("💳 OpenRouter credits exhausted. Using extracted answers only.")
+                elif response.status_code == 429:
+                    st.warning("⏱️ Rate limit reached. Using extracted answers only.")
                 else:
+                    st.warning(f"API Error {response.status_code}. Using extracted answers only.")
+            except requests.exceptions.Timeout:
+                st.warning("⏱️ API timeout. Using extracted answers only.")
             except Exception as e:
+                st.warning(f"API Exception: {str(e)}. Using extracted answers only.")
+        return {
+            'ai_answer': ai_answer,
+            'extracted_answer': extracted_answer,
+            'sources': sources,
+            'confidence': avg_confidence,
+            'has_both': ai_answer is not None
+        }
+def get_general_ai_response(query, unlimited_tokens=False):
+    """Get AI response for general questions with proper token handling"""
     openrouter_key = os.environ.get("OPENROUTER_API_KEY")
     if not openrouter_key:
         return "I can only answer questions about your uploaded documents. Please add an OpenRouter API key for general conversations."
     try:
+        # Adjust parameters based on token availability
+        if unlimited_tokens:
+            max_tokens = 500
+            temperature = 0.7
+            prompt = f"Please provide a helpful and detailed answer to this question: {query}"
+        else:
+            max_tokens = 150
+            temperature = 0.7
+            prompt = query[:200]  # Limit input length for token conservation
         response = requests.post(
             "https://openrouter.ai/api/v1/chat/completions",
             },
             json={
                 "model": "openai/gpt-3.5-turbo",
+                "messages": [{"role": "user", "content": prompt}],
+                "max_tokens": max_tokens,
+                "temperature": temperature
             },
             timeout=15
         )
             return response.json()['choices'][0]['message']['content'].strip()
         elif response.status_code == 402:
             return "Sorry, OpenRouter credits exhausted. Please add more credits or ask document-specific questions."
+        elif response.status_code == 429:
+            return "Rate limit reached. Please try again in a moment."
         else:
             return f"API error (Status: {response.status_code}). Try asking about documents instead."
+    except requests.exceptions.Timeout:
+        return "Request timeout. Please try again."
     except Exception as e:
         return f"Error: {str(e)}"
                 )
                 if test_response.status_code == 200:
                     st.success("✅ API working correctly!")
+                elif test_response.status_code == 402:
+                    st.error("❌ Credits exhausted")
+                elif test_response.status_code == 429:
+                    st.warning("⏱️ Rate limited")
                 else:
                     st.error(f"❌ API Error: {test_response.status_code}")
             except Exception as e:
         st.error("❌ No OpenRouter API Key")
         st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings → Variables and secrets")
+    # Enhanced Settings
+    st.subheader("🚀 Token Settings")
+    unlimited_tokens = st.checkbox("🔥 Unlimited Tokens Mode", value=True, help="Use higher token limits for detailed responses")
+    use_ai_enhancement = st.checkbox("🤖 AI Enhancement", value=bool(openrouter_key), help="Enhance answers with AI when documents are found")
+    st.subheader("🎛️ Display Settings")
+    show_sources = st.checkbox("📁 Show Sources", value=True)
+    show_confidence = st.checkbox("🎯 Show Confidence Scores", value=True)
+    # Token mode indicator
+    if unlimited_tokens:
+        st.success("🔥 Unlimited mode: Detailed responses enabled")
+    else:
+        st.info("💰 Conservative mode: Limited tokens to save credits")
     st.divider()
             rag_info = message["rag_info"]
             if show_sources and rag_info.get("sources"):
+                confidence_text = f"{rag_info['confidence']*100:.1f}%" if show_confidence else ""
                 st.markdown(f"""
                 <div class="rag-attribution">
                     <strong>📁 Sources:</strong> {', '.join(rag_info['sources'])}<br>
+                    <strong>🎯 Confidence:</strong> {confidence_text}
                 </div>
                 """, unsafe_allow_html=True)
             # Check if we found relevant documents (much lower threshold)
             if search_results and search_results[0]['similarity'] > 0.01:  # Very low threshold
                 # Generate document-based answer
+                result = rag_system.generate_answer(
+                    prompt,
+                    search_results,
+                    use_ai_enhancement=use_ai_enhancement,
+                    unlimited_tokens=unlimited_tokens
+                )
                 # Display AI answer or extracted answer
                 if use_ai_enhancement and result['has_both']:
                     answer_text = result['ai_answer']
+                    st.markdown(f"🤖 **AI Enhanced Answer:** {answer_text}")
                 else:
                     answer_text = result['extracted_answer']
                     st.markdown(f"📄 **Document Answer:** {answer_text}")
                 # Show RAG info
                 if show_sources and result['sources']:
+                    confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
                     st.markdown(f"""
                     <div class="rag-attribution">
                         <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
+                        <strong>🎯 Confidence:</strong> {confidence_text}<br>
                         <strong>📊 Found:</strong> {len(search_results)} relevant sections
                     </div>
                     """, unsafe_allow_html=True)
                 }
             else:
+                # No relevant documents found - use general AI
+                st.info("No relevant documents found. Using general AI mode...")
+                general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
+                st.markdown(f"💬 **General AI:** {general_response}")
+                assistant_message = {
+                    "role": "assistant",
+                    "content": general_response,
+                    "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
+                }
         else:
             # RAG system not ready - use general AI
             else:
                 st.error("RAG system not ready. Using general AI mode...")
+            general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
             st.markdown(f"💬 **General AI:** {general_response}")
             assistant_message = {
 # Footer info
 if rag_system and rag_system.model:
     doc_count = rag_system.get_collection_count()
+    token_mode = "🔥 Unlimited" if unlimited_tokens else "💰 Conservative"
+    st.caption(f"📚 Knowledge Base: {doc_count} indexed chunks | 🔍 RAG System Active | {token_mode} Token Mode")