import streamlit as st
import os
from datetime import datetime
import tempfile
from pathlib import Path
# Import our modules
from utils import initialize_rag_system, clear_session_state, format_sources
from config import MODEL_NAME, EMBEDDING_MODEL
# Page config
st.set_page_config(
page_title="AI RAG Assistant",
page_icon="đ¤",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
# Header
st.markdown('
đ¤ AI RAG Assistant
', unsafe_allow_html=True)
st.markdown("Upload your documents and chat with your data using advanced RAG powered by Llama-4-Scout")
# Footer with attribution
st.markdown(
"**Built with** [](https://huggingface.co/spaces/akhaliq/anycoder)"
)
# Sidebar
with st.sidebar:
st.header("âī¸ Settings")
# Model selection
model_name = st.selectbox(
"Response Model",
[MODEL_NAME],
help="Llama-4-Scout for powerful reasoning"
)
embedding_model = st.selectbox(
"Embedding Model",
[EMBEDDING_MODEL],
help="bge-m3: State-of-the-art multilingual embeddings"
)
# Similarity threshold
similarity_threshold = st.slider(
"Similarity Threshold", 0.5, 0.95, 0.8,
help="Minimum similarity score for relevant chunks"
)
# Max new tokens
max_new_tokens = st.slider("Max Tokens", 200, 2000, 1000)
st.divider()
if st.button("đī¸ Clear Chat & Memory", type="secondary"):
clear_session_state()
st.rerun()
# Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = []
if "rag_system" not in st.session_state:
st.session_state.rag_system = None
if "documents_processed" not in st.session_state:
st.session_state.documents_processed = 0
# File upload section
uploaded_files = st.file_uploader(
"đ Upload Documents",
type=['pdf', 'txt', 'md', 'docx', 'doc', 'pptx', 'ppt'],
accept_multiple_files=True,
help="Supports PDF, TXT, MD, DOCX, PPTX and more"
)
# Process uploaded files
if uploaded_files:
with st.spinner("Processing documents... This may take a moment."):
try:
temp_dir = tempfile.mkdtemp()
for file in uploaded_files:
file_path = Path(temp_dir) / file.name
with open(file_path, "wb") as f:
f.write(file.getbuffer())
# Initialize or update RAG system
st.session_state.rag_system = initialize_rag_system(
temp_dir,
model_name,
embedding_model,
similarity_threshold
)
st.session_state.documents_processed = len(uploaded_files)
st.success(f"â
Processed {len(uploaded_files)} documents successfully!")
st.info(f"đ {st.session_state.documents_processed} documents indexed and ready for querying")
except Exception as e:
st.error(f"â Error processing documents: {str(e)}")
# Status indicator
if st.session_state.rag_system is not None:
col1, col2 = st.columns([3, 1])
with col1:
st.success(f"â
Ready! {st.session_state.documents_processed} documents loaded")
with col2:
st.caption(f"Model: {model_name}")
# Chat interface
st.markdown("---")
# Display chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Display sources for assistant messages
if message["role"] == "assistant" and "sources" in message:
with st.expander("đ Sources", expanded=False):
st.markdown(format_sources(message["sources"]))
# Chat input
if prompt := st.chat_input("Ask a question about your documents..."):
# Add user message
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Generate response
if st.session_state.rag_system is not None:
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
try:
# Query RAG system
response = st.session_state.rag_system.query(prompt)
# Display response
st.markdown(response.response)
# Store full response with sources
full_message = {
"role": "assistant",
"content": response.response,
"sources": response.source_nodes
}
st.session_state.messages.append(full_message)
except Exception as e:
st.error(f"Error generating response: {str(e)}")
else:
with st.chat_message("assistant"):
st.warning("đ Please upload and process documents first!")
# Instructions
with st.expander("âšī¸ How to use", expanded=False):
st.markdown("""
1. **Upload documents** (PDF, TXT, MD, DOCX, PPTX supported)
2. **Wait for processing** (indexing happens automatically)
3. **Ask questions** about your documents
4. **Click sources** to see exact references
**Features:**
- Multi-document support
- Advanced semantic search
- Source citations
- Adjustable similarity threshold
- Streaming responses
""")
# Performance metrics
if st.session_state.rag_system is not None:
with st.expander("đ System Info", expanded=False):
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Documents", st.session_state.documents_processed)
with col2:
st.metric("Model", MODEL_NAME.split('/')[-1])
with col3:
st.metric("Embedding", EMBEDDING_MODEL.split('/')[-1])