Spaces:

uumerrr684
/

RAG_Chat_Flow

Sleeping

App Files Files Community

uumerrr684 commited on Aug 20, 2025

Commit

e351eb9

verified ·

1 Parent(s): 16f9a56

Update app.py

Browse files

Files changed (1) hide show

app.py +539 -56

app.py CHANGED Viewed

@@ -146,17 +146,162 @@ USERS_FILE = "online_users.json"
 # ================= PERSONALITY QUESTIONS =================
-PERSONALITY_QUESTIONS = [
-    "What do you know about [name]?",
-    "How would you describe [name]'s personality?",
-    "Is [name] introvert or extrovert?",
-    "What do you think are [name]'s strengths?",
-    "Does [name] have any weaknesses?",
-    "What advice would you give [name]?",
-    "What is something you admire about [name]?",
-    "Do you remember anything about [name]'s friends or ambitions?",
-    "What words describe [name] best?"
-]
 # ================= GITHUB INTEGRATION =================
@@ -758,59 +903,397 @@ def start_new_chat():
     st.session_state.messages = []
     st.session_state.session_id = str(uuid.uuid4())
-def process_chat_message(prompt, rag_system, use_ai_enhancement, unlimited_tokens, show_sources, show_confidence):
-    """Process a chat message and return the assistant message"""
-    # Update user tracking
-    update_online_users()
-    # Get RAG response
-    if rag_system and rag_system.model and rag_system.get_collection_count() > 0:
-        # Search documents first
-        search_results = rag_system.search(prompt, n_results=5)
-        # Debug output for troubleshooting
-        if search_results:
-            st.info(f"🔍 Found {len(search_results)} potential matches. Best similarity: {search_results[0]['similarity']:.3f}")
         else:
-            st.warning("🔍 No search results returned from vector database")
-        # Check if we found relevant documents (very low threshold)
-        if search_results and search_results[0]['similarity'] > 0.001:  # Ultra-low threshold
-            # Generate document-based answer
-            result = rag_system.generate_answer(
-                prompt,
-                search_results,
-                use_ai_enhancement=use_ai_enhancement,
-                unlimited_tokens=unlimited_tokens
-            )
-            # Display AI answer or extracted answer
-            if use_ai_enhancement and result['has_both']:
-                answer_text = result['ai_answer']
-                st.markdown(f"🤖 **AI Enhanced Answer:** {answer_text}")
-                # Also show extracted answer for comparison if different
-                if result['extracted_answer'] != answer_text:
-                    with st.expander("📄 View Extracted Answer"):
-                        st.markdown(result['extracted_answer'])
-            else:
-                answer_text = result['extracted_answer']
-                st.markdown(f"📄 **Document Answer:** {answer_text}")
-                # Show why AI enhancement wasn't used
-                if use_ai_enhancement and not result['has_both']:
-                    st.info("💡 AI enhancement failed - showing extracted answer from documents")
-            # Show RAG info with more details
-            if show_sources and result['sources']:
-                confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
                 st.markdown(f"""
                 <div class="rag-attribution">
-                    <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
-                    <strong>🎯 Confidence:</strong> {confidence_text}<br>
-                    <strong>📊 Found:</strong> {len(search_results)} relevant sections<br>
-                    <strong>🔍 Best Match:</strong> {search_results[0]['similarity']:.3f} similarity
                 </div>
                 """, unsafe_allow_html=True)
-            #

 # ================= PERSONALITY QUESTIONS =================
+# Replace the personality questions section (around line 760-780) with this fixed version:
+# Personality Questions Section
+st.header("🎭 Personality Questions")
+# Name input for personalizing questions
+name_input = st.text_input("Enter name for personalized questions:", placeholder="e.g., Sarah, Ahmed", help="Replace [name] in questions with this name")
+if name_input.strip():
+    name = name_input.strip()
+    st.markdown(f"""
+    <div class="personality-section">
+        <strong>💫 Quick Questions for {name}:</strong><br>
+        <small>Click any question to ask about {name}</small>
+    </div>
+    """, unsafe_allow_html=True)
+    # Display personality questions as clickable buttons
+    for i, question in enumerate(PERSONALITY_QUESTIONS):
+        formatted_question = question.replace("[name]", name)
+        if st.button(formatted_question, key=f"pq_{i}", use_container_width=True):
+            # Add the question to chat and set flag to process it
+            user_message = {"role": "user", "content": formatted_question}
+            st.session_state.messages.append(user_message)
+            st.session_state.process_personality_question = formatted_question
+            st.rerun()
+else:
+    st.markdown("""
+    <div class="personality-section">
+        <strong>💫 Sample Questions:</strong><br>
+        <small>Enter a name above to personalize these questions</small>
+    </div>
+    """, unsafe_allow_html=True)
+    # Show sample questions without names
+    for question in PERSONALITY_QUESTIONS[:5]:  # Show first 5 as examples
+        st.markdown(f"• {question}")
+# Then, modify the main chat processing section to handle personality questions
+# Add this right after the chat input section and before the existing chat processing:
+# Check if we need to process a personality question
+if hasattr(st.session_state, 'process_personality_question'):
+    prompt = st.session_state.process_personality_question
+    del st.session_state.process_personality_question  # Clear the flag
+    # Display user message
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Process the question using the same logic as chat input
+    # Update user tracking
+    update_online_users()
+    # Get RAG response
+    with st.chat_message("assistant"):
+        if rag_system and rag_system.model and rag_system.get_collection_count() > 0:
+            # Search documents first
+            search_results = rag_system.search(prompt, n_results=5)
+            # Debug output for troubleshooting
+            if search_results:
+                st.info(f"🔍 Found {len(search_results)} potential matches. Best similarity: {search_results[0]['similarity']:.3f}")
+            else:
+                st.warning("🔍 No search results returned from vector database")
+            # Check if we found relevant documents (very low threshold)
+            if search_results and search_results[0]['similarity'] > 0.001:  # Ultra-low threshold
+                # Generate document-based answer
+                result = rag_system.generate_answer(
+                    prompt,
+                    search_results,
+                    use_ai_enhancement=use_ai_enhancement,
+                    unlimited_tokens=unlimited_tokens
+                )
+                # Display AI answer or extracted answer
+                if use_ai_enhancement and result['has_both']:
+                    answer_text = result['ai_answer']
+                    st.markdown(f"🤖 **AI Enhanced Answer:** {answer_text}")
+                    # Also show extracted answer for comparison if different
+                    if result['extracted_answer'] != answer_text:
+                        with st.expander("📄 View Extracted Answer"):
+                            st.markdown(result['extracted_answer'])
+                else:
+                    answer_text = result['extracted_answer']
+                    st.markdown(f"📄 **Document Answer:** {answer_text}")
+                    # Show why AI enhancement wasn't used
+                    if use_ai_enhancement and not result['has_both']:
+                        st.info("💡 AI enhancement failed - showing extracted answer from documents")
+                # Show RAG info with more details
+                if show_sources and result['sources']:
+                    confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
+                    st.markdown(f"""
+                    <div class="rag-attribution">
+                        <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
+                        <strong>🎯 Confidence:</strong> {confidence_text}<br>
+                        <strong>📊 Found:</strong> {len(search_results)} relevant sections<br>
+                        <strong>🔍 Best Match:</strong> {search_results[0]['similarity']:.3f} similarity
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Add to messages with RAG info
+                assistant_message = {
+                    "role": "assistant",
+                    "content": answer_text,
+                    "rag_info": {
+                        "sources": result['sources'],
+                        "confidence": result['confidence'],
+                        "extracted_answer": result['extracted_answer'],
+                        "has_ai": result['has_both']
+                    }
+                }
+            else:
+                # No relevant documents found - show debug info
+                if search_results:
+                    st.warning(f"📄 Found documents but similarity too low (best: {search_results[0]['similarity']:.3f}). Using general AI...")
+                else:
+                    st.warning("📄 No documents found in search. Using general AI...")
+                general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
+                st.markdown(f"💬 **General AI:** {general_response}")
+                assistant_message = {
+                    "role": "assistant",
+                    "content": general_response,
+                    "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
+                }
+        else:
+            # RAG system not ready - use general AI
+            if rag_system and rag_system.get_collection_count() == 0:
+                st.warning("No documents indexed. Sync from GitHub or upload documents first...")
+            else:
+                st.error("RAG system not ready. Using general AI mode...")
+            general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
+            st.markdown(f"💬 **General AI:** {general_response}")
+            assistant_message = {
+                "role": "assistant",
+                "content": general_response,
+                "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
+            }
+    # Add assistant message to history
+    st.session_state.messages.append(assistant_message)
+    # Auto-save
+    save_chat_history(st.session_state.messages)
+# Continue with the existing chat input processing...
 # ================= GITHUB INTEGRATION =================
     st.session_state.messages = []
     st.session_state.session_id = str(uuid.uuid4())
+# ================= MAIN APP =================
+# Initialize session state
+if "messages" not in st.session_state:
+    st.session_state.messages = load_chat_history()
+if "session_id" not in st.session_state:
+    st.session_state.session_id = str(uuid.uuid4())
+# Initialize RAG system
+rag_system = initialize_rag_system()
+# Header
+st.title("RAG Chat Flow ✩₊˚.⋆🕸️⋆⁺₊✧")
+st.caption("Ask questions about your documents with AI-powered retrieval")
+# Sidebar
+with st.sidebar:
+    # New Chat Button
+    if st.button("➕ New Chat", use_container_width=True, type="primary"):
+        start_new_chat()
+        st.rerun()
+    st.divider()
+    # Personality Questions Section
+    st.header("🎭 Personality Questions")
+    # Name input for personalizing questions
+    name_input = st.text_input("Enter name for personalized questions:", placeholder="e.g., Sarah, Ahmed", help="Replace [name] in questions with this name")
+    if name_input.strip():
+        name = name_input.strip()
+        st.markdown(f"""
+        <div class="personality-section">
+            <strong>💫 Quick Questions for {name}:</strong><br>
+            <small>Click any question to ask about {name}</small>
+        </div>
+        """, unsafe_allow_html=True)
+        # Display personality questions as clickable buttons
+        for i, question in enumerate(PERSONALITY_QUESTIONS):
+            formatted_question = question.replace("[name]", name)
+            if st.button(formatted_question, key=f"pq_{i}", use_container_width=True):
+                # Add the question to chat
+                user_message = {"role": "user", "content": formatted_question}
+                st.session_state.messages.append(user_message)
+                st.rerun()
+    else:
+        st.markdown("""
+        <div class="personality-section">
+            <strong>💫 Sample Questions:</strong><br>
+            <small>Enter a name above to personalize these questions</small>
+        </div>
+        """, unsafe_allow_html=True)
+        # Show sample questions without names
+        for question in PERSONALITY_QUESTIONS[:5]:  # Show first 5 as examples
+            st.markdown(f"• {question}")
+    st.divider()
+    # GitHub Integration
+    st.header("🐙 GitHub Integration")
+    github_status = check_github_status()
+    if github_status["status"] == "connected":
+        st.markdown(f"""
+        <div class="github-status">
+            <strong>🟢 GitHub:</strong> {github_status['message']}<br>
+            <strong>📂 Repo:</strong> family-profiles (private)
+        </div>
+        """, unsafe_allow_html=True)
+        # Sync from GitHub button
+        if st.button("🔄 Sync from GitHub", use_container_width=True):
+            if clone_github_repo():
+                # Auto-index after successful sync
+                if rag_system and rag_system.model:
+                    with st.spinner("Auto-indexing synced documents..."):
+                        if rag_system.index_documents("documents"):
+                            st.success("✅ Documents synced and indexed!")
+                            st.rerun()
+                        else:
+                            st.warning("⚠️ Sync successful but indexing failed")
+    else:
+        color_map = {"red": "🔴", "orange": "🟠", "green": "🟢"}
+        color_icon = color_map.get(github_status["color"], "🔴")
+        st.markdown(f"""
+        <div class="github-status">
+            <strong>{color_icon} GitHub:</strong> {github_status['message']}<br>
+            <strong>📋 Setup:</strong> Add GITHUB_TOKEN to Hugging Face secrets
+        </div>
+        """, unsafe_allow_html=True)
+    st.divider()
+    # Document Management
+    st.header("📂 Document Management")
+    if rag_system and rag_system.model:
+        doc_count = rag_system.get_collection_count()
+        if doc_count > 0:
+            st.markdown(f"""
+            <div class="document-status">
+                <strong>📊 Documents Indexed:</strong> {doc_count} chunks<br>
+                <strong>🔍 Status:</strong> Ready for queries
+            </div>
+            """, unsafe_allow_html=True)
         else:
+            st.warning("No documents indexed. Sync from GitHub or upload documents to get started.")
+        # Document indexing
+        if st.button("🔄 Re-index Documents", use_container_width=True):
+            with st.spinner("Indexing documents..."):
+                if rag_system.index_documents("documents"):
+                    st.success("Documents indexed successfully!")
+                    st.rerun()
+                else:
+                    st.error("Failed to index documents. Check your documents folder.")
+        # Show document count only (hidden)
+        if os.path.exists("documents"):
+            txt_files = [f for f in os.listdir("documents") if f.endswith('.txt')]
+            if txt_files:
+                st.info(f"📄 {len(txt_files)} documents loaded (hidden)")
+        # Manual upload interface (fallback)
+        st.subheader("📤 Manual Upload")
+        uploaded_files = st.file_uploader(
+            "Upload text files (fallback)",
+            type=['txt'],
+            accept_multiple_files=True,
+            help="Upload .txt files if GitHub sync is not available"
+        )
+        if uploaded_files:
+            if st.button("💾 Save & Index Files"):
+                os.makedirs("documents", exist_ok=True)
+                saved_files = []
+                for uploaded_file in uploaded_files:
+                    file_path = os.path.join("documents", uploaded_file.name)
+                    with open(file_path, "wb") as f:
+                        f.write(uploaded_file.getbuffer())
+                    saved_files.append(uploaded_file.name)
+                st.success(f"Saved {len(saved_files)} files!")
+                # Auto-index
+                with st.spinner("Auto-indexing new documents..."):
+                    if rag_system.index_documents("documents"):
+                        st.success("Documents indexed successfully!")
+                        st.rerun()
+    else:
+        st.error("RAG system initialization failed. Check your setup.")
+    st.divider()
+    # Online Users
+    st.header("👥 Online Users")
+    online_count = update_online_users()
+    if online_count == 1:
+        st.success("🟢 Just you online")
+    else:
+        st.success(f"🟢 {online_count} people online")
+    st.divider()
+    # Settings
+    st.header("⚙️ Settings")
+    # API Status with better checking
+    openrouter_key = os.environ.get("OPENROUTER_API_KEY")
+    if openrouter_key:
+        st.success(" ✅ API Connected")
+        # Quick API test
+        if st.button("Test API Connection", use_container_width=True):
+            try:
+                test_response = requests.post(
+                    "https://openrouter.ai/api/v1/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {openrouter_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": "openai/gpt-3.5-turbo",
+                        "messages": [{"role": "user", "content": "test"}],
+                        "max_tokens": 5
+                    },
+                    timeout=5
+                )
+                if test_response.status_code == 200:
+                    st.success("✅ API working correctly!")
+                elif test_response.status_code == 402:
+                    st.error("❌ Credits exhausted")
+                elif test_response.status_code == 429:
+                    st.warning("⏱️ Rate limited")
+                else:
+                    st.error(f"❌ API Error: {test_response.status_code}")
+            except Exception as e:
+                st.error(f"❌ API Test Failed: {str(e)}")
+    else:
+        st.error("❌ No OpenRouter API Key")
+        st.info("Add OPENROUTER_API_KEY in Hugging Face Space settings → Variables and secrets")
+    # Enhanced Settings
+    st.subheader("🚀 Token Settings")
+    unlimited_tokens = st.checkbox("🔥 Unlimited Tokens Mode", value=True, help="Use higher token limits for detailed responses")
+    use_ai_enhancement = st.checkbox("🤖 AI Enhancement", value=bool(openrouter_key), help="Enhance answers with AI when documents are found")
+    st.subheader("🎛️ Display Settings")
+    show_sources = st.checkbox("📁 Show Sources", value=True)
+    show_confidence = st.checkbox("🎯 Show Confidence Scores", value=True)
+    # Token mode indicator
+    if unlimited_tokens:
+        st.success("🔥 Unlimited mode: Detailed responses enabled")
+    else:
+        st.info("💰 Conservative mode: Limited tokens to save credits")
+    st.divider()
+    # Chat History Controls
+    st.header("💾 Chat History")
+    if st.session_state.messages:
+        st.info(f"Messages: {len(st.session_state.messages)}")
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("💾 Save", use_container_width=True):
+            save_chat_history(st.session_state.messages)
+            st.success("Saved!")
+    with col2:
+        if st.button("🗑️ Clear", use_container_width=True):
+            start_new_chat()
+            st.success("Cleared!")
+            st.rerun()
+# ================= MAIN CHAT AREA =================
+# Display chat messages
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        if message["role"] == "assistant" and "rag_info" in message:
+            # Display AI answer
+            st.markdown(message["content"])
+            # Display RAG information
+            rag_info = message["rag_info"]
+            if show_sources and rag_info.get("sources"):
+                confidence_text = f"{rag_info['confidence']*100:.1f}%" if show_confidence else ""
                 st.markdown(f"""
                 <div class="rag-attribution">
+                    <strong>📁 Sources:</strong> {', '.join(rag_info['sources'])}<br>
+                    <strong>🎯 Confidence:</strong> {confidence_text}
                 </div>
                 """, unsafe_allow_html=True)
+            # Show extracted answer if different
+            if rag_info.get("extracted_answer") and rag_info["extracted_answer"] != message["content"]:
+                st.markdown("**📄 Extracted Answer:**")
+                st.markdown(f"_{rag_info['extracted_answer']}_")
+        else:
+            st.markdown(message["content"])
+# Chat input
+if prompt := st.chat_input("Ask questions about your documents..."):
+    # Update user tracking
+    update_online_users()
+    # Add user message
+    user_message = {"role": "user", "content": prompt}
+    st.session_state.messages.append(user_message)
+    # Display user message
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Get RAG response
+    with st.chat_message("assistant"):
+        if rag_system and rag_system.model and rag_system.get_collection_count() > 0:
+            # Search documents first
+            search_results = rag_system.search(prompt, n_results=5)
+            # Debug output for troubleshooting
+            if search_results:
+                st.info(f"🔍 Found {len(search_results)} potential matches. Best similarity: {search_results[0]['similarity']:.3f}")
+            else:
+                st.warning("🔍 No search results returned from vector database")
+            # Check if we found relevant documents (very low threshold)
+            if search_results and search_results[0]['similarity'] > 0.001:  # Ultra-low threshold
+                # Generate document-based answer
+                result = rag_system.generate_answer(
+                    prompt,
+                    search_results,
+                    use_ai_enhancement=use_ai_enhancement,
+                    unlimited_tokens=unlimited_tokens
+                )
+                # Display AI answer or extracted answer
+                if use_ai_enhancement and result['has_both']:
+                    answer_text = result['ai_answer']
+                    st.markdown(f"🤖 **AI Enhanced Answer:** {answer_text}")
+                    # Also show extracted answer for comparison if different
+                    if result['extracted_answer'] != answer_text:
+                        with st.expander("📄 View Extracted Answer"):
+                            st.markdown(result['extracted_answer'])
+                else:
+                    answer_text = result['extracted_answer']
+                    st.markdown(f"📄 **Document Answer:** {answer_text}")
+                    # Show why AI enhancement wasn't used
+                    if use_ai_enhancement and not result['has_both']:
+                        st.info("💡 AI enhancement failed - showing extracted answer from documents")
+                # Show RAG info with more details
+                if show_sources and result['sources']:
+                    confidence_text = f"{result['confidence']*100:.1f}%" if show_confidence else ""
+                    st.markdown(f"""
+                    <div class="rag-attribution">
+                        <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
+                        <strong>🎯 Confidence:</strong> {confidence_text}<br>
+                        <strong>📊 Found:</strong> {len(search_results)} relevant sections<br>
+                        <strong>🔍 Best Match:</strong> {search_results[0]['similarity']:.3f} similarity
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Add to messages with RAG info
+                assistant_message = {
+                    "role": "assistant",
+                    "content": answer_text,
+                    "rag_info": {
+                        "sources": result['sources'],
+                        "confidence": result['confidence'],
+                        "extracted_answer": result['extracted_answer'],
+                        "has_ai": result['has_both']
+                    }
+                }
+            else:
+                # No relevant documents found - show debug info
+                if search_results:
+                    st.warning(f"📄 Found documents but similarity too low (best: {search_results[0]['similarity']:.3f}). Using general AI...")
+                else:
+                    st.warning("📄 No documents found in search. Using general AI...")
+                general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
+                st.markdown(f"💬 **General AI:** {general_response}")
+                assistant_message = {
+                    "role": "assistant",
+                    "content": general_response,
+                    "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
+                }
+        else:
+            # RAG system not ready - use general AI
+            if rag_system and rag_system.get_collection_count() == 0:
+                st.warning("No documents indexed. Sync from GitHub or upload documents first...")
+            else:
+                st.error("RAG system not ready. Using general AI mode...")
+            general_response = get_general_ai_response(prompt, unlimited_tokens=unlimited_tokens)
+            st.markdown(f"💬 **General AI:** {general_response}")
+            assistant_message = {
+                "role": "assistant",
+                "content": general_response,
+                "rag_info": {"sources": [], "confidence": 0, "mode": "general"}
+            }
+    # Add assistant message to history
+    st.session_state.messages.append(assistant_message)
+    # Auto-save
+    save_chat_history(st.session_state.messages)
+# Footer info
+if rag_system and rag_system.model:
+    doc_count = rag_system.get_collection_count()
+    token_mode = "🔥 Unlimited" if unlimited_tokens else "💰 Conservative"
+    github_status = check_github_status()
+    github_icon = "🟢" if github_status["status"] == "connected" else "🔴"
+    st.caption(f"📚 Knowledge Base: {doc_count} indexed chunks | 🔍 RAG System Active | {token_mode} Token Mode | {github_icon} GitHub {github_status['status'].title()}")