Spaces:

uumerrr684
/

RAG_Chat_Flow

Sleeping

App Files Files Community

uumerrr684 commited on Aug 20, 2025

Commit

8b2b880

verified ·

1 Parent(s): fe0ef09

Delete src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +0 -673

src/streamlit_app.py DELETED Viewed

@@ -1,673 +0,0 @@
-import streamlit as st
-import requests
-import os
-import json
-import uuid
-from datetime import datetime, timedelta
-from sentence_transformers import SentenceTransformer
-import chromadb
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-import re
-# Page configuration
-st.set_page_config(
-    page_title="RAG Chat Flow 📚",
-    page_icon="📚",
-    initial_sidebar_state="expanded"
-)
-# Enhanced CSS styling
-st.markdown("""
-<style>
-    .stApp {
-        background: white;
-    }
-    .main .block-container {
-        max-width: 900px;
-    }
-    #MainMenu {visibility: hidden;}
-    footer {visibility: hidden;}
-    header {visibility: hidden;}
-    .stDeployButton {display: none;}
-    .model-id {
-        color: #28a745;
-        font-family: monospace;
-    }
-    .model-attribution {
-        color: #28a745;
-        font-size: 0.8em;
-        font-style: italic;
-    }
-    .rag-attribution {
-        color: #6f42c1;
-        font-size: 0.8em;
-        font-style: italic;
-        background: #f8f9fa;
-        padding: 8px;
-        border-radius: 4px;
-        border-left: 3px solid #6f42c1;
-        margin-top: 8px;
-    }
-    /* NEW CHAT BUTTON - Black background */
-    .stButton > button[kind="primary"] {
-        background-color: #000000 !important;
-        border-color: #000000 !important;
-        color: #ffffff !important;
-    }
-    .stButton > button[kind="primary"]:hover {
-        background-color: #333333 !important;
-        border-color: #333333 !important;
-        color: #ffffff !important;
-    }
-    /* Chat history styling */
-    .chat-history-item {
-        padding: 8px 12px;
-        margin: 4px 0;
-        border-radius: 8px;
-        border: 1px solid #e0e0e0;
-        background: #f8f9fa;
-        cursor: pointer;
-        transition: all 0.2s;
-    }
-    .chat-history-item:hover {
-        background: #e9ecef;
-        border-color: #28a745;
-    }
-    .document-status {
-        background: #e3f2fd;
-        padding: 10px;
-        border-radius: 8px;
-        border-left: 4px solid #2196f3;
-        margin: 10px 0;
-    }
-    .rag-stats {
-        background: #f3e5f5;
-        padding: 8px;
-        border-radius: 6px;
-        font-size: 0.85em;
-        color: #4a148c;
-    }
-</style>
-""", unsafe_allow_html=True)
-# File paths
-HISTORY_FILE = "rag_chat_history.json"
-SESSIONS_FILE = "rag_chat_sessions.json"
-USERS_FILE = "online_users.json"
-# ================= RAG SYSTEM CLASS =================
-@st.cache_resource
-def initialize_rag_system():
-    """Initialize RAG system with caching"""
-    return ProductionRAGSystem()
-class ProductionRAGSystem:
-    def __init__(self, collection_name="streamlit_rag_docs"):
-        self.collection_name = collection_name
-        # Initialize embedding model
-        try:
-            self.model = SentenceTransformer('all-mpnet-base-v2')
-        except Exception as e:
-            st.error(f"Error loading embedding model: {e}")
-            self.model = None
-            return
-        # Initialize ChromaDB
-        try:
-            self.client = chromadb.PersistentClient(path="./chroma_db")
-            try:
-                self.collection = self.client.get_collection(collection_name)
-            except:
-                self.collection = self.client.create_collection(collection_name)
-        except Exception as e:
-            st.error(f"Error initializing ChromaDB: {e}")
-            self.client = None
-            return
-        # Initialize text splitter
-        self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=800,
-            chunk_overlap=100,
-            length_function=len,
-            separators=["\n\n", "\n", ". ", " ", ""]
-        )
-    def get_collection_count(self):
-        """Get number of documents in collection"""
-        try:
-            return self.collection.count() if self.collection else 0
-        except:
-            return 0
-    def load_documents_from_folder(self, folder_path="documents"):
-        """Load documents from folder"""
-        if not os.path.exists(folder_path):
-            return []
-        txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
-        if not txt_files:
-            return []
-        all_chunks = []
-        for filename in txt_files:
-            filepath = os.path.join(folder_path, filename)
-            try:
-                with open(filepath, 'r', encoding='utf-8') as f:
-                    content = f.read().strip()
-                if content:
-                    chunks = self.text_splitter.split_text(content)
-                    for i, chunk in enumerate(chunks):
-                        all_chunks.append({
-                            'content': chunk,
-                            'source_file': filename,
-                            'chunk_index': i,
-                            'char_count': len(chunk)
-                        })
-            except Exception as e:
-                st.error(f"Error reading {filename}: {e}")
-        return all_chunks
-    def index_documents(self, document_folder="documents"):
-        """Index documents with progress bar"""
-        if not self.model or not self.client:
-            return False
-        chunks = self.load_documents_from_folder(document_folder)
-        if not chunks:
-            return False
-        # Clear existing collection
-        try:
-            self.client.delete_collection(self.collection_name)
-            self.collection = self.client.create_collection(self.collection_name)
-        except:
-            pass
-        # Create embeddings with progress bar
-        progress_bar = st.progress(0)
-        status_text = st.empty()
-        chunk_texts = [chunk['content'] for chunk in chunks]
-        try:
-            status_text.text("Creating embeddings...")
-            embeddings = self.model.encode(chunk_texts, show_progress_bar=False)
-            status_text.text("Storing in database...")
-            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
-                chunk_id = f"{chunk['source_file']}_{chunk['chunk_index']}"
-                metadata = {
-                    "source_file": chunk['source_file'],
-                    "chunk_index": chunk['chunk_index'],
-                    "char_count": chunk['char_count']
-                }
-                self.collection.add(
-                    documents=[chunk['content']],
-                    ids=[chunk_id],
-                    embeddings=[embedding.tolist()],
-                    metadatas=[metadata]
-                )
-                progress_bar.progress((i + 1) / len(chunks))
-            progress_bar.empty()
-            status_text.empty()
-            return True
-        except Exception as e:
-            st.error(f"Error during indexing: {e}")
-            progress_bar.empty()
-            status_text.empty()
-            return False
-    def search(self, query, n_results=3):
-        """Search for relevant chunks"""
-        if not self.model or not self.collection:
-            return None
-        try:
-            query_embedding = self.model.encode([query])[0].tolist()
-            results = self.collection.query(
-                query_embeddings=[query_embedding],
-                n_results=n_results
-            )
-            if not results['documents'][0]:
-                return None
-            search_results = []
-            for chunk, distance, metadata in zip(
-                results['documents'][0],
-                results['distances'][0],
-                results['metadatas'][0]
-            ):
-                similarity = max(0, 1 - distance)
-                search_results.append({
-                    'content': chunk,
-                    'metadata': metadata,
-                    'similarity': similarity
-                })
-            return search_results
-        except Exception as e:
-            st.error(f"Search error: {e}")
-            return None
-    def extract_direct_answer(self, query, content):
-        """Extract direct answer from content"""
-        query_lower = query.lower()
-        sentences = re.split(r'[.!?]+', content)
-        sentences = [s.strip() for s in sentences if len(s.strip()) > 20]
-        query_words = set(query_lower.split())
-        scored_sentences = []
-        for sentence in sentences:
-            sentence_words = set(sentence.lower().split())
-            exact_matches = len(query_words.intersection(sentence_words))
-            # Bonus scoring for key terms
-            bonus_score = 0
-            if '401k' in query_lower and ('401' in sentence.lower() or 'retirement' in sentence.lower()):
-                bonus_score += 3
-            if 'sick' in query_lower and 'sick' in sentence.lower():
-                bonus_score += 3
-            if 'vacation' in query_lower and 'vacation' in sentence.lower():
-                bonus_score += 3
-            total_score = exact_matches * 2 + bonus_score
-            if total_score > 0:
-                scored_sentences.append((sentence, total_score))
-        if scored_sentences:
-            scored_sentences.sort(key=lambda x: x[1], reverse=True)
-            best_sentence = scored_sentences[0][0]
-            if not best_sentence.endswith('.'):
-                best_sentence += '.'
-            return best_sentence
-        # Fallback
-        for sentence in sentences:
-            if len(sentence) > 30:
-                return sentence + ('.' if not sentence.endswith('.') else '')
-        return content[:200] + "..."
-    def generate_answer(self, query, search_results):
-        """Generate both AI and extracted answers"""
-        if not search_results:
-            return {
-                'ai_answer': "No information found in documents.",
-                'extracted_answer': "No information found in documents.",
-                'sources': [],
-                'confidence': 0,
-                'has_both': False
-            }
-        best_result = search_results[0]
-        sources = list(set([r['metadata']['source_file'] for r in search_results[:2]]))
-        avg_confidence = sum(r['similarity'] for r in search_results[:2]) / len(search_results[:2])
-        # Always generate extracted answer
-        extracted_answer = self.extract_direct_answer(query, best_result['content'])
-        # Try AI answer if API key available
-        ai_answer = None
-        openrouter_key = os.environ.get("OPENROUTER_API_KEY")
-        if openrouter_key:
-            context = search_results[0]['content'][:500]
-            prompt = f"Answer briefly: {query}\n\nContext: {context}\n\nAnswer (1 sentence):"
-            try:
-                response = requests.post(
-                    "https://openrouter.ai/api/v1/chat/completions",
-                    headers={
-                        "Authorization": f"Bearer {openrouter_key}",
-                        "Content-Type": "application/json"
-                    },
-                    json={
-                        "model": "openai/gpt-3.5-turbo",
-                        "messages": [{"role": "user", "content": prompt}],
-                        "max_tokens": 100,
-                        "temperature": 0.1
-                    },
-                    timeout=10
-                )
-                if response.status_code == 200:
-                    ai_answer = response.json()['choices'][0]['message']['content'].strip()
-            except Exception as e:
-                st.warning(f"AI API error: {e}")
-        return {
-            'ai_answer': ai_answer,
-            'extracted_answer': extracted_answer,
-            'sources': sources,
-            'confidence': avg_confidence,
-            'has_both': ai_answer is not None
-        }
-# ================= UTILITY FUNCTIONS =================
-def get_user_id():
-    """Get unique ID for this user session"""
-    if 'user_id' not in st.session_state:
-        st.session_state.user_id = str(uuid.uuid4())[:8]
-    return st.session_state.user_id
-def update_online_users():
-    """Update user status"""
-    try:
-        users = {}
-        if os.path.exists(USERS_FILE):
-            with open(USERS_FILE, 'r') as f:
-                users = json.load(f)
-        user_id = get_user_id()
-        users[user_id] = {
-            'last_seen': datetime.now().isoformat(),
-            'name': f'User-{user_id}',
-            'session_start': users.get(user_id, {}).get('session_start', datetime.now().isoformat())
-        }
-        # Clean up old users
-        current_time = datetime.now()
-        active_users = {}
-        for uid, data in users.items():
-            try:
-                last_seen = datetime.fromisoformat(data['last_seen'])
-                if current_time - last_seen < timedelta(minutes=5):
-                    active_users[uid] = data
-            except:
-                continue
-        with open(USERS_FILE, 'w') as f:
-            json.dump(active_users, f, indent=2)
-        return len(active_users)
-    except:
-        return 1
-def load_chat_history():
-    """Load chat history"""
-    try:
-        if os.path.exists(HISTORY_FILE):
-            with open(HISTORY_FILE, 'r', encoding='utf-8') as f:
-                return json.load(f)
-    except:
-        pass
-    return []
-def save_chat_history(messages):
-    """Save chat history"""
-    try:
-        with open(HISTORY_FILE, 'w', encoding='utf-8') as f:
-            json.dump(messages, f, ensure_ascii=False, indent=2)
-    except Exception as e:
-        st.error(f"Error saving history: {e}")
-def start_new_chat():
-    """Start new chat session"""
-    st.session_state.messages = []
-    st.session_state.session_id = str(uuid.uuid4())
-# ================= MAIN APP =================
-# Initialize session state
-if "messages" not in st.session_state:
-    st.session_state.messages = load_chat_history()
-if "session_id" not in st.session_state:
-    st.session_state.session_id = str(uuid.uuid4())
-# Initialize RAG system
-rag_system = initialize_rag_system()
-# Header
-st.title("RAG Chat Flow 📚")
-st.caption("Ask questions about your documents with AI-powered retrieval")
-# Sidebar
-with st.sidebar:
-    # New Chat Button
-    if st.button("➕ New Chat", use_container_width=True, type="primary"):
-        start_new_chat()
-        st.rerun()
-    st.divider()
-    # Document Management
-    st.header("📂 Document Management")
-    if rag_system and rag_system.model:
-        doc_count = rag_system.get_collection_count()
-        if doc_count > 0:
-            st.markdown(f"""
-            <div class="document-status">
-                <strong>📊 Documents Indexed:</strong> {doc_count} chunks<br>
-                <strong>🔍 Status:</strong> Ready for queries
-            </div>
-            """, unsafe_allow_html=True)
-        else:
-            st.warning("No documents indexed. Upload documents to get started.")
-        # Document indexing
-        if st.button("🔄 Re-index Documents", use_container_width=True):
-            with st.spinner("Indexing documents..."):
-                if rag_system.index_documents("documents"):
-                    st.success("Documents indexed successfully!")
-                    st.rerun()
-                else:
-                    st.error("Failed to index documents. Check your documents folder.")
-        # Upload interface
-        st.subheader("📤 Upload Documents")
-        uploaded_files = st.file_uploader(
-            "Upload text files",
-            type=['txt'],
-            accept_multiple_files=True,
-            help="Upload .txt files to add to your knowledge base"
-        )
-        if uploaded_files:
-            if st.button("💾 Save & Index Files"):
-                os.makedirs("documents", exist_ok=True)
-                saved_files = []
-                for uploaded_file in uploaded_files:
-                    file_path = os.path.join("documents", uploaded_file.name)
-                    with open(file_path, "wb") as f:
-                        f.write(uploaded_file.getbuffer())
-                    saved_files.append(uploaded_file.name)
-                st.success(f"Saved {len(saved_files)} files!")
-                # Auto-index
-                with st.spinner("Auto-indexing new documents..."):
-                    if rag_system.index_documents("documents"):
-                        st.success("Documents indexed successfully!")
-                        st.rerun()
-    else:
-        st.error("RAG system initialization failed. Check your setup.")
-    st.divider()
-    # Online Users
-    st.header("👥 Online Users")
-    online_count = update_online_users()
-    if online_count == 1:
-        st.success("🟢 Just you online")
-    else:
-        st.success(f"🟢 {online_count} people online")
-    st.divider()
-    # Settings
-    st.header("⚙️ Settings")
-    # API Status
-    openrouter_key = os.environ.get("OPENROUTER_API_KEY")
-    if openrouter_key:
-        st.success("🟢 AI API Connected")
-    else:
-        st.warning("⚠️ No AI API Key (using extracted answers only)")
-    # RAG Settings
-    use_ai_enhancement = st.checkbox("Use AI Enhancement", value=bool(openrouter_key))
-    show_sources = st.checkbox("Show Sources", value=True)
-    show_confidence = st.checkbox("Show Confidence Scores", value=True)
-    st.divider()
-    # Chat History Controls
-    st.header("💾 Chat History")
-    if st.session_state.messages:
-        st.info(f"Messages: {len(st.session_state.messages)}")
-    col1, col2 = st.columns(2)
-    with col1:
-        if st.button("💾 Save", use_container_width=True):
-            save_chat_history(st.session_state.messages)
-            st.success("Saved!")
-    with col2:
-        if st.button("🗑️ Clear", use_container_width=True):
-            start_new_chat()
-            st.success("Cleared!")
-            st.rerun()
-# ================= MAIN CHAT AREA =================
-# Display chat messages
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        if message["role"] == "assistant" and "rag_info" in message:
-            # Display AI answer
-            st.markdown(message["content"])
-            # Display RAG information
-            rag_info = message["rag_info"]
-            if show_sources and rag_info.get("sources"):
-                st.markdown(f"""
-                <div class="rag-attribution">
-                    <strong>📁 Sources:</strong> {', '.join(rag_info['sources'])}<br>
-                    <strong>🎯 Confidence:</strong> {rag_info['confidence']*100:.1f}%
-                </div>
-                """, unsafe_allow_html=True)
-            # Show extracted answer if different
-            if rag_info.get("extracted_answer") and rag_info["extracted_answer"] != message["content"]:
-                st.markdown("**📄 Extracted Answer:**")
-                st.markdown(f"_{rag_info['extracted_answer']}_")
-        else:
-            st.markdown(message["content"])
-# Chat input
-if prompt := st.chat_input("Ask questions about your documents..."):
-    # Update user tracking
-    update_online_users()
-    # Add user message
-    user_message = {"role": "user", "content": prompt}
-    st.session_state.messages.append(user_message)
-    # Display user message
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # Get RAG response
-    with st.chat_message("assistant"):
-        if rag_system and rag_system.model and rag_system.get_collection_count() > 0:
-            # Search documents
-            search_results = rag_system.search(prompt, n_results=3)
-            if search_results:
-                # Generate answer
-                result = rag_system.generate_answer(prompt, search_results)
-                # Display AI answer or extracted answer
-                if use_ai_enhancement and result['has_both']:
-                    answer_text = result['ai_answer']
-                    st.markdown(f"🤖 **AI Answer:** {answer_text}")
-                else:
-                    answer_text = result['extracted_answer']
-                    st.markdown(f"📄 **Answer:** {answer_text}")
-                # Show RAG info
-                if show_sources and result['sources']:
-                    st.markdown(f"""
-                    <div class="rag-attribution">
-                        <strong>📁 Sources:</strong> {', '.join(result['sources'])}<br>
-                        <strong>🎯 Confidence:</strong> {result['confidence']*100:.1f}%<br>
-                        <strong>📊 Found:</strong> {len(search_results)} relevant sections
-                    </div>
-                    """, unsafe_allow_html=True)
-                # Add to messages with RAG info
-                assistant_message = {
-                    "role": "assistant",
-                    "content": answer_text,
-                    "rag_info": {
-                        "sources": result['sources'],
-                        "confidence": result['confidence'],
-                        "extracted_answer": result['extracted_answer'],
-                        "has_ai": result['has_both']
-                    }
-                }
-            else:
-                # No relevant documents found
-                no_info_msg = "I couldn't find relevant information in your documents. Try rephrasing your question or check if the information exists in your uploaded documents."
-                st.markdown(no_info_msg)
-                assistant_message = {
-                    "role": "assistant",
-                    "content": no_info_msg,
-                    "rag_info": {"sources": [], "confidence": 0}
-                }
-        else:
-            # RAG system not ready
-            error_msg = "Document system not ready. Please upload and index documents first."
-            st.error(error_msg)
-            assistant_message = {
-                "role": "assistant",
-                "content": error_msg,
-                "rag_info": {"sources": [], "confidence": 0}
-            }
-    # Add assistant message to history
-    st.session_state.messages.append(assistant_message)
-    # Auto-save
-    save_chat_history(st.session_state.messages)
-# Footer info
-if rag_system and rag_system.model:
-    doc_count = rag_system.get_collection_count()
-    st.caption(f"📚 Knowledge Base: {doc_count} indexed chunks | 🔍 RAG System Active")