Spaces:

dev-models
/

MultiModel-Rag

Running

App Files Files Community

MultiModel-Rag / app.py

dev-models

app file updated

0fe8565 about 24 hours ago

raw

history blame contribute delete

11 kB

	import streamlit as st
	import os
	import base64
	from io import BytesIO
	from PIL import Image
	import time

	# Import Modular components
	from backend.rag import RAGEngine
	from backend.parser import EnrichedRagParser
	import tempfile

	# ==========================================
	# 1. Page Configuration & Professional CSS
	# ==========================================
	st.set_page_config(
	page_title="Multimodal RAG Assistant",
	page_icon="🤖",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Production-ready CSS
	st.markdown("""
	<style>
	.stChatMessage {
	background-color: var(--secondary-background-color);
	border: 1px solid rgba(128, 128, 128, 0.1);
	border-radius: 12px;
	padding: 1.5rem;
	margin-bottom: 1rem;
	box-shadow: 0 2px 4px rgba(0,0,0,0.05);
	}
	.stats-container {
	background-color: var(--secondary-background-color);
	border: 1px solid rgba(128, 128, 128, 0.2);
	border-radius: 10px;
	padding: 15px;
	margin-top: 10px;
	}
	.stats-header {
	font-weight: 600;
	color: var(--text-color);
	margin-bottom: 8px;
	display: block;
	}
	.stats-item {
	font-size: 0.9em;
	color: var(--text-color);
	opacity: 0.8;
	margin-bottom: 4px;
	display: flex;
	justify-content: space-between;
	}
	</style>
	""", unsafe_allow_html=True)

	# ==========================================
	# 2. Initialization & Helper Functions
	# ==========================================

	@st.cache_resource
	def initialize_rag_system(force_clean: bool = True):
	"""Initialize the RAG system with caching."""
	return RAGEngine(use_hybrid=True, force_clean=force_clean)

	def display_image_from_base64(base64_str: str, caption: str = "", width: int = 300):
	"""Helper to decode and display base64 images."""
	try:
	img_data = base64.b64decode(base64_str)
	img = Image.open(BytesIO(img_data))
	st.image(img, caption=caption, width=width)
	except Exception as e:
	st.error(f"Failed to display image: {e}")

	# ==========================================
	# 3. Main Application
	# ==========================================

	def main():
	# --- State Management ---
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "suggested_questions" not in st.session_state:
	st.session_state.suggested_questions = []

	# Initialize Backend
	if "rag" not in st.session_state:
	with st.spinner("🚀 Booting up AI System..."):
	st.session_state.rag = initialize_rag_system()
	rag: RAGEngine = st.session_state.rag

	# ==========================================
	# SIDEBAR: Control Panel
	# ==========================================
	with st.sidebar:
	st.header("🧠 RAG Control Panel")

	# --- PDF Document Upload ---
	with st.expander("📂 Knowledge Base", expanded=True):
	uploaded_file = st.file_uploader(
	"Upload Document (PDF)",
	type=["pdf"],
	label_visibility="collapsed"
	)

	if uploaded_file:
	# Temporary save for parsing
	# temp_dir = "/tmp"
	# os.makedirs(temp_dir, exist_ok=True)
	# save_path = os.path.join(temp_dir, uploaded_file.name)

	# with open(save_path, "wb") as f:
	# f.write(uploaded_file.getbuffer())

	with tempfile.NamedTemporaryFile(delete=False) as tmp:
	tmp.write(uploaded_file.read())
	file_path = tmp.name


	if st.button("🚀 Process PDF", type="primary", use_container_width=True):
	try:
	with st.spinner("Analyzing PDF with Docling..."):
	parser = EnrichedRagParser()
	parsed_data = parser.process_document(file_path)

	with st.spinner("Ingesting into MongoDB..."):
	rag.ingest_data(parsed_data)

	# Generate Suggestions
	suggestions = rag.generate_suggested_questions(num_questions=6)
	st.session_state.suggested_questions = suggestions
	st.success(f"Processed: {uploaded_file.name}")
	st.rerun()

	except Exception as e:
	st.error(f"❌ Error: {str(e)}")

	finally:
	# # ✅ Always cleanup temp file
	# if os.path.exists(file_path):
	# os.remove(file_path)
	print("🧹 Temp file deleted")

	st.rerun()
	st.markdown("---")

	# --- Suggested Questions ---
	if st.session_state.suggested_questions:
	st.subheader("💡 Quick Questions")
	for idx, q in enumerate(st.session_state.suggested_questions):
	if st.button(q, key=f"sugg_{idx}", use_container_width=True):
	st.session_state.messages.append({"role": "user", "content": q})
	st.rerun()
	st.markdown("---")

	# --- Settings ---
	with st.expander("⚙️ Search Settings"):
	top_k = st.slider("Max Results", 1, 10, 5)
	min_score = st.slider("Confidence Threshold", 0.0, 1.0, 0.6)
	use_images = st.toggle("Enable Image Search", value=True)

	# --- System Stats ---
	count = rag.collection.count_documents({})
	st.markdown(
	f"""
	<div class="stats-container">
	<span class="stats-header">📊 Database Status</span>
	<div class="stats-item"><span>Total Chunks:</span> <strong>{count}</strong></div>
	<div class="stats-item"><span>Embedding:</span> <strong>CLIP ViT-L/14</strong></div>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Reset
	if st.button("🗑️ Clear Chat", type="secondary", use_container_width=True):
	st.session_state.messages = []
	st.rerun()

	if st.button("⚠️ Delete Vector Collection", type="primary", use_container_width=True):
	with st.spinner("Deleting collection..."):
	rag.collection.delete_many({})
	# Reset in-memory indices to match empty DB
	rag.bm25_index = None
	rag.bm25_doc_map = {}
	st.success("Vector Collection Deleted!")
	time.sleep(1) # Give user a moment to see the success message
	st.rerun()

	# ==========================================
	# MAIN: Chat Interface
	# ==========================================
	st.title("🤖 Multimodal AI Assistant")

	if not st.session_state.messages:
	st.markdown(
	"""
	<div style="text-align: center; margin-top: 50px; opacity: 0.7;">
	<h3>👋 Ready to help!</h3>
	<p>Upload a PDF in the sidebar to start.</p>
	</div>
	""",
	unsafe_allow_html=True,
	)

	# Render History
	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])
	if "images" in msg and msg["images"]:
	st.markdown("---")
	cols = st.columns(3)
	for i, img in enumerate(msg["images"]):
	with cols[i % 3]:
	display_image_from_base64(img["image_base64"], width=220)

	# ==========================================
	# LOGIC: Input Handling
	# ==========================================
	user_input = st.chat_input("Type your question here...")

	if user_input:
	st.session_state.messages.append({"role": "user", "content": user_input})
	st.rerun()

	# ==========================================
	# ASSISTANT: Streaming Response Logic
	# ==========================================
	if st.session_state.messages and st.session_state.messages[-1]["role"] == "user":
	last_query = st.session_state.messages[-1]["content"]

	with st.chat_message("assistant"):
	with st.spinner("🤔 Searching context..."):
	try:
	img_keywords = ["show", "image", "diagram", "figure", "picture"]
	is_visual_request = any(
	k in last_query.lower() for k in img_keywords
	) and use_images

	found_imgs = []
	answer_text = ""

	if is_visual_request:
	# 🔍 Image search branch (non-streaming)
	found_imgs = rag.search_images(
	last_query,
	top_k=3,
	min_score=min_score,
	)
	if found_imgs:
	answer_text = f"I found {len(found_imgs)} relevant visuals:"
	else:
	answer_text = "I couldn't find any relevant images."

	# Render once
	st.markdown(answer_text)

	else:
	# 🧠 Text answer branch (STREAMING)
	# Assume rag.answer_question returns a generator / stream.
	# st.write_stream will both display the chunks and return
	# the final concatenated string.[web:60]
	stream = rag.answer_question(
	last_query,
	top_k=top_k
	)
	answer_text = st.write_stream(stream)

	# Render images if any
	if found_imgs:
	st.markdown("---")
	cols = st.columns(3)
	for idx, img in enumerate(found_imgs):
	with cols[idx % 3]:
	display_image_from_base64(
	img["image_base64"], width=220
	)

	# Persist assistant message in history
	st.session_state.messages.append(
	{
	"role": "assistant",
	"content": answer_text,
	"images": found_imgs,
	}
	)

	except Exception as e:
	st.error(f"Error: {e}")
	st.session_state.messages.append(
	{"role": "assistant", "content": f"❌ Error: {e}"}
	)

	if __name__ == "__main__":
	main()