import streamlit as st import os from datetime import datetime import tempfile from pathlib import Path # Import our modules from utils import initialize_rag_system, clear_session_state, format_sources from config import MODEL_NAME, EMBEDDING_MODEL # Page config st.set_page_config( page_title="AI RAG Assistant", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Header st.markdown('

🤖 AI RAG Assistant

', unsafe_allow_html=True) st.markdown("Upload your documents and chat with your data using advanced RAG powered by Llama-4-Scout") # Footer with attribution st.markdown( "**Built with** [![anycoder](https://img.shields.io/badge/Built%20with-anycoder-3b82f6?style=for-the-badge&logo=huggingface)](https://huggingface.co/spaces/akhaliq/anycoder)" ) # Sidebar with st.sidebar: st.header("âš™ī¸ Settings") # Model selection model_name = st.selectbox( "Response Model", [MODEL_NAME], help="Llama-4-Scout for powerful reasoning" ) embedding_model = st.selectbox( "Embedding Model", [EMBEDDING_MODEL], help="bge-m3: State-of-the-art multilingual embeddings" ) # Similarity threshold similarity_threshold = st.slider( "Similarity Threshold", 0.5, 0.95, 0.8, help="Minimum similarity score for relevant chunks" ) # Max new tokens max_new_tokens = st.slider("Max Tokens", 200, 2000, 1000) st.divider() if st.button("đŸ—‘ī¸ Clear Chat & Memory", type="secondary"): clear_session_state() st.rerun() # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] if "rag_system" not in st.session_state: st.session_state.rag_system = None if "documents_processed" not in st.session_state: st.session_state.documents_processed = 0 # File upload section uploaded_files = st.file_uploader( "📁 Upload Documents", type=['pdf', 'txt', 'md', 'docx', 'doc', 'pptx', 'ppt'], accept_multiple_files=True, help="Supports PDF, TXT, MD, DOCX, PPTX and more" ) # Process uploaded files if uploaded_files: with st.spinner("Processing documents... This may take a moment."): try: temp_dir = tempfile.mkdtemp() for file in uploaded_files: file_path = Path(temp_dir) / file.name with open(file_path, "wb") as f: f.write(file.getbuffer()) # Initialize or update RAG system st.session_state.rag_system = initialize_rag_system( temp_dir, model_name, embedding_model, similarity_threshold ) st.session_state.documents_processed = len(uploaded_files) st.success(f"✅ Processed {len(uploaded_files)} documents successfully!") st.info(f"📊 {st.session_state.documents_processed} documents indexed and ready for querying") except Exception as e: st.error(f"❌ Error processing documents: {str(e)}") # Status indicator if st.session_state.rag_system is not None: col1, col2 = st.columns([3, 1]) with col1: st.success(f"✅ Ready! {st.session_state.documents_processed} documents loaded") with col2: st.caption(f"Model: {model_name}") # Chat interface st.markdown("---") # Display chat messages for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Display sources for assistant messages if message["role"] == "assistant" and "sources" in message: with st.expander("📚 Sources", expanded=False): st.markdown(format_sources(message["sources"])) # Chat input if prompt := st.chat_input("Ask a question about your documents..."): # Add user message st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Generate response if st.session_state.rag_system is not None: with st.chat_message("assistant"): with st.spinner("Thinking..."): try: # Query RAG system response = st.session_state.rag_system.query(prompt) # Display response st.markdown(response.response) # Store full response with sources full_message = { "role": "assistant", "content": response.response, "sources": response.source_nodes } st.session_state.messages.append(full_message) except Exception as e: st.error(f"Error generating response: {str(e)}") else: with st.chat_message("assistant"): st.warning("👆 Please upload and process documents first!") # Instructions with st.expander("â„šī¸ How to use", expanded=False): st.markdown(""" 1. **Upload documents** (PDF, TXT, MD, DOCX, PPTX supported) 2. **Wait for processing** (indexing happens automatically) 3. **Ask questions** about your documents 4. **Click sources** to see exact references **Features:** - Multi-document support - Advanced semantic search - Source citations - Adjustable similarity threshold - Streaming responses """) # Performance metrics if st.session_state.rag_system is not None: with st.expander("📈 System Info", expanded=False): col1, col2, col3 = st.columns(3) with col1: st.metric("Documents", st.session_state.documents_processed) with col2: st.metric("Model", MODEL_NAME.split('/')[-1]) with col3: st.metric("Embedding", EMBEDDING_MODEL.split('/')[-1])