Spaces:

TilanB
/

SmartDocAI

Sleeping

App Files Files Community

SmartDocAI / main.py

TilanB

fix

070e2e1 verified 4 months ago

raw

history blame

33.7 kB

	import configuration.logger_setup
	import logging

	logger = logging.getLogger(__name__)

	import hashlib
	import socket
	from typing import List, Dict
	import os
	import shutil
	from pathlib import Path
	from datetime import datetime
	import time
	import random

	from content_analyzer.document_parser import DocumentProcessor
	from search_engine.indexer import RetrieverBuilder
	from intelligence.orchestrator import AgentWorkflow
	from configuration import definitions, parameters


	# Example data for demo
	EXAMPLES = {
	"Generative AI and Jobs": {
	"question": "Which occupations are most likely to be automated by AI?",
	"file_paths": ["samples/OIT-NASK-IAGen_WP140_web.pdf"]
	},
	"Energy and AI": {
	"question": "What is the accuracy of AI models in coding?",
	"file_paths": ["samples/EnergyandAI.pdf"]
	},
	"Digital Progress and Trends Report 2025": {
	"question": "which country has most Gen Ai patents and which country has most total funding raised by AI start-ups?",
	"file_paths": ["samples/Digital Progress and Trends Report 2025, Strengthening AI Foundations.pdf"]
	}
	}


	def format_chat_history(history: List[Dict]) -> str:
	"""Format chat history as markdown for display."""
	if not history:
	return "No conversation history yet. Ask a question to get started!"

	formatted = []
	for i, entry in enumerate(history, 1):
	timestamp = entry.get("timestamp", "")
	question = entry.get("question", "")
	answer = entry.get("answer", "")
	confidence = entry.get("confidence", "N/A")

	formatted.append(f"""
	---
	### 💬 Q{i} ({timestamp})
	Question: {question}

	Answer: {answer}

	Confidence: {confidence}
	""")

	return "\n".join(formatted)


	def format_document_context(documents: List, question: str = "") -> str:
	"""Format retrieved documents with annotation highlighting."""
	if not documents:
	return "No documents retrieved yet."

	formatted = [f"### 📚 Retrieved Context ({len(documents)} chunks)\n"]

	# Extract key terms from question for highlighting
	key_terms = []
	if question:
	stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'in', 'on', 'at', 'to', 'for', 'of', 'and', 'or', 'what', 'how', 'why', 'when', 'where', 'which'}
	key_terms = [word.lower() for word in question.split() if word.lower() not in stopwords and len(word) > 2]

	for i, doc in enumerate(documents[:5], 1):
	content = doc.page_content if hasattr(doc, 'page_content') else str(doc)
	source = doc.metadata.get('source', 'Unknown') if hasattr(doc, 'metadata') else 'Unknown'

	# Truncate long content
	if len(content) > 500:
	content = content[:500] + "..."

	# Highlight key terms
	highlighted_content = content
	for term in key_terms[:5]:
	import re
	pattern = re.compile(re.escape(term), re.IGNORECASE)
	highlighted_content = pattern.sub(f"{term}", highlighted_content)

	formatted.append(f"""
	<details>
	<summary>📄 Chunk {i} - {os.path.basename(source)}</summary>

	{highlighted_content}

	</details>
	""")

	if len(documents) > 5:
	formatted.append(f"\n... and {len(documents) - 5} more chunks")

	return "\n".join(formatted)


	def _get_file_hashes(uploaded_files: List) -> frozenset:
	"""Generate SHA-256 hashes for uploaded files."""
	hashes = set()
	for file in uploaded_files:
	with open(file.name, "rb") as f:
	hashes.add(hashlib.sha256(f.read()).hexdigest())
	return frozenset(hashes)


	def _find_open_port(start_port: int, max_attempts: int = 20) -> int:
	"""Find an available TCP port starting from start_port."""
	port = start_port
	for _ in range(max_attempts):
	with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
	sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
	try:
	sock.bind(("127.0.0.1", port))
	return port
	except OSError:
	port += 1
	raise RuntimeError(f"Could not find an open port starting at {start_port}")


	def _ensure_hfhub_hffolder_compat():
	"""
	Shim for Gradio <5.7.1 with huggingface_hub >=1.0.
	"""
	import huggingface_hub
	if hasattr(huggingface_hub, "HfFolder"):
	return
	try:
	from huggingface_hub.utils import get_token
	except Exception:
	return
	class HfFolder:
	@staticmethod
	def get_token():
	return get_token()
	huggingface_hub.HfFolder = HfFolder


	def _setup_gradio_shim():
	"""Shim Gradio's JSON schema conversion to tolerate boolean additionalProperties values."""
	from gradio_client import utils as grc_utils
	_orig_json_schema_to_python_type = grc_utils._json_schema_to_python_type
	def _json_schema_to_python_type_safe(schema, defs=None):
	if isinstance(schema, bool):
	return "Any" if schema else "Never"
	return _orig_json_schema_to_python_type(schema, defs)
	grc_utils._json_schema_to_python_type = _json_schema_to_python_type_safe


	def main():
	_ensure_hfhub_hffolder_compat() # must run before importing gradio
	import gradio as gr
	_setup_gradio_shim()

	logger.info("=" * 60)
	logger.info("Starting SmartDoc AI application...")
	logger.info("=" * 60)

	# Initialize components
	processor = DocumentProcessor()
	retriever_indexer = RetrieverBuilder()
	orchestrator = AgentWorkflow()

	logger.info("All components initialized successfully")

	# CSS styling - Clean, accessible light theme with professional colors
	css = """
	/* Global styling - Light, clean background */
	.gradio-container {
	background: linear-gradient(180deg, #f8fafc 0%, #e2e8f0 100%) !important;
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
	}

	/* Title styles - Dark text for readability */
	.app-title {
	font-size: 2.2em !important;
	text-align: center !important;
	color: #1e293b !important;
	font-weight: 700 !important;
	margin-bottom: 8px !important;
	}
	.app-subtitle {
	font-size: 1.1em !important;
	text-align: center !important;
	color: #0369a1 !important;
	font-weight: 500 !important;
	}
	.app-description {
	text-align: center;
	color: #475569 !important;
	font-size: 0.95em !important;
	line-height: 1.6 !important;
	}

	/* Section headers */
	.section-header {
	color: #1e293b !important;
	font-weight: 600 !important;
	border-bottom: 2px solid #0ea5e9 !important;
	padding-bottom: 8px !important;
	margin-bottom: 16px !important;
	}

	/* Chat history panel - Clean white card with more height */
	.chat-history {
	min-height: 500px;
	max-height: 600px;
	overflow-y: auto;
	border: 1px solid #cbd5e1;
	border-radius: 12px;
	padding: 20px;
	background: #ffffff;
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
	color: #334155 !important;
	}
	#chat-history {
	min-height: 120px !important;
	max-height: none !important;
	height: auto !important;
	}
	.chat-history h3 {
	color: #0f172a !important;
	}
	.chat-history strong {
	color: #1e293b !important;
	}

	/* Document context panel */
	.doc-context {
	max-height: 380px;
	overflow-y: auto;
	border: 1px solid #cbd5e1;
	border-radius: 12px;
	padding: 20px;
	background: #ffffff;
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
	color: #334155 !important;
	}
	.doc-context details {
	margin-bottom: 12px;
	padding: 14px;
	background: #f1f5f9;
	border-radius: 8px;
	border-left: 4px solid #0ea5e9;
	}
	.doc-context summary {
	cursor: pointer;
	font-weight: 600;
	color: #0369a1 !important;
	}
	.doc-context p, .doc-context span {
	color: #475569 !important;
	}

	/* Answer box - Success green accent, auto-height */
	.answer-box > div:nth-child(2) {
	border-left: 4px solid #10b981 !important;
	padding: 16px 16px 16px 20px !important;
	background: #f0fdf4 !important;
	border-radius: 8px !important;
	min-height: 100px;
	color: #166534 !important;
	}
	.answer-box p, .answer-box li, .answer-box span {
	color: #166534 !important;
	}
	.answer-box strong {
	color: #14532d !important;
	}
	.answer-box h1, .answer-box h2, .answer-box h3, .answer-box h4 {
	color: #15803d !important;
	}
	.answer-box code {
	background: #dcfce7 !important;
	color: #166534 !important;
	padding: 2px 6px !important;
	border-radius: 4px !important;
	}
	.answer-box pre {
	background: #dcfce7 !important;
	padding: 12px !important;
	border-radius: 6px !important;
	overflow-x: auto !important;
	}

	/* Verification box - Blue accent */
	.verification-box > div:nth-child(2) {
	border-left: 4px solid #0ea5e9 !important;
	padding: 16px 16px 16px 20px !important;
	background: #f0f9ff !important;
	border-radius: 8px !important;
	min-height: 80px;
	color: #0369a1 !important;
	}
	.verification-box p, .verification-box li, .verification-box span {
	color: #0c4a6e !important;
	}
	.verification-box strong {
	color: #075985 !important;
	}

	/* Stats panel - Professional blue gradient */
	.stats-panel {
	background: linear-gradient(135deg, #0369a1 0%, #0284c7 50%, #0ea5e9 100%) !important;
	color: #ffffff !important;
	padding: 20px !important;
	border-radius: 12px !important;
	text-align: center;
	box-shadow: 0 4px 14px rgba(3, 105, 161, 0.3);
	}
	.stats-panel strong {
	color: #ffffff !important;
	}

	/* Info panel */
	.info-panel {
	background: #eff6ff !important;
	border: 1px solid #bfdbfe !important;
	border-radius: 8px !important;
	padding: 12px !important;
	color: #1e40af !important;
	}

	/* Form elements */
	.gr-input, .gr-textbox textarea {
	background: #ffffff !important;
	border: 1px solid #cbd5e1 !important;
	border-radius: 8px !important;
	color: #1e293b !important;
	}
	.gr-input:focus, .gr-textbox textarea:focus {
	border-color: #0ea5e9 !important;
	box-shadow: 0 0 0 3px rgba(14, 165, 233, 0.1) !important;
	}

	/* Labels */
	label {
	color: #374151 !important;
	font-weight: 500 !important;
	}

	/* Dropdown - High contrast with darker background for visibility */
	.gr-dropdown,
	[data-testid="dropdown"],
	.svelte-dropdown,dropdownExample
	div[class*="dropdown"] {
	background: #e0e7ff !important;
	color: #1e293b !important;
	border: 2px solid #1e40af !important;
	border-radius: 8px !important;
	box-shadow: 0 2px 8px rgba(30, 64, 175, 0.2) !important;
	}
	.gr-dropdown:hover,
	[data-testid="dropdown"]:hover {
	background: #c7d2fe !important;
	border-color: #1d4ed8 !important;
	box-shadow: 0 4px 12px rgba(30, 64, 175, 0.3) !important;
	}
	.gr-dropdown select,
	.gr-dropdown input,
	[data-testid="dropdown"] input {
	color: #1e293b !important;
	background: transparent !important;
	font-weight: 500 !important;
	}

	/* Dropdown container and options */
	[data-testid="dropdown"] span,
	.dropdown-container span,
	div[class*="dropdown"] span {
	color: #1e293b !important;
	font-weight: 500 !important;
	}

	/* Dropdown list options */
	.gr-dropdown ul,
	.dropdown-options,
	ul[class*="dropdown"] {
	background: #ffffff !important;
	border: 2px solid #1e40af !important;
	border-radius: 8px !important;
	box-shadow: 0 4px 16px rgba(0, 0, 0, 0.15) !important;
	}
	.gr-dropdown li,
	.dropdown-options li,
	ul[class*="dropdown"] li {
	color: #1e293b !important;
	padding: 10px 14px !important;
	}
	.gr-dropdown li:hover,
	ul[class*="dropdown"] li:hover {
	background: #c7d2fe !important;
	color: #1e40af !important;
	}

	/* Dropdown label */
	.gr-dropdown label,
	[data-testid="dropdown"] label {
	color: #1e40af !important;
	font-weight: 600 !important;
	}

	/* Tabs - Clean styling */
	.tab-nav {
	border-bottom: 2px solid #e2e8f0 !important;
	}
	.tab-nav button {
	color: #64748b !important;
	font-weight: 500 !important;
	padding: 12px 20px !important;
	border: none !important;
	background: transparent !important;
	}
	.tab-nav button.selected {
	color: #0369a1 !important;
	border-bottom: 3px solid #0369a1 !important;
	font-weight: 600 !important;
	}

	/* Markdown text */
	.prose, .markdown-text {
	color: #334155 !important;
	}
	.prose h1, .prose h2, .prose h3,
	.markdown-text h1, .markdown-text h2, .markdown-text h3 {
	color: #1e293b !important;
	}
	.prose strong, .markdown-text strong {
	color: #0f172a !important;
	}

	/* Scrollbar styling */
	::-webkit-scrollbar {
	width: 8px;
	height: 8px;
	}
	::-webkit-scrollbar-track {
	background: #f1f5f9;
	border-radius: 4px;
	}
	::-webkit-scrollbar-thumb {
	background: #94a3b8;
	border-radius: 4px;
	}
	::-webkit-scrollbar-thumb:hover {
	background: #64748b;
	}

	button.secondary {
	background: #1e40af !important;
	color: #ffffff !important;
	border: none !important;
	border-radius: 8px !important;
	font-weight: 600 !important;
	box-shadow: 0 2px 6px rgba(30, 64, 175, 0.3) !important;
	padding: 12px 20px !important;
	min-height: 44px !important;
	}
	button.secondary:hover {
	background: #1d4ed8 !important;
	box-shadow: 0 4px 10px rgba(30, 64, 175, 0.4) !important;
	}

	/* Left side input boxes with borders */
	.left-panel-box {
	background: #fafafa !important;
	border: 2px solid #94a3b8 !important;
	border-radius: 10px !important;
	padding: 14px !important;
	margin-bottom: 8px !important;
	}
	.left-panel-box:hover {
	border-color: #64748b !important;
	box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1) !important;
	}

	/* File upload box with border */
	.file-upload-box {
	background: #f8fafc !important;
	border: 2px dashed #64748b !important;
	border-radius: 10px !important;
	padding: 14px !important;
	}
	.file-upload-box:hover {
	border-color: #0369a1 !important;
	border-style: solid !important;
	background: #f0f9ff !important;
	}

	/* Question input box with border */
	.question-box {
	background: #fffbeb !important;
	border: 2px solid #f59e0b !important;
	border-radius: 10px !important;
	padding: 14px !important;
	}
	.question-box:hover {
	border-color: #d97706 !important;
	box-shadow: 0 2px 8px rgba(245, 158, 11, 0.2) !important;
	}

	/* Dropdown Example - Beige background on 3rd parent container */
	.dropdownExample {
	background: #f5f5dc !important;
	padding: 16px !important;
	border-radius: 8px !important;
	border: 2px solid #d1d5db !important;
	margin-bottom: 16px !important;
	}
	"""
	js = r"""
	(() => {
	const uploadMessages = [
	"Crunching your documents...",
	"Warming up the AI...",
	"Extracting knowledge...",
	"Scanning for insights...",
	"Preparing your data...",
	"Looking for answers...",
	"Analyzing file structure...",
	"Reading your files...",
	"Indexing content...",
	"Almost ready..."
	];

	let msgInterval = null;
	let timerInterval = null;
	let startMs = 0;
	let lastMsg = null;

	const root = () => document.getElementById("processing-message");
	const isVisible = (el) => !!(el && (el.offsetWidth \|\| el.offsetHeight \|\| el.getClientRects().length));

	const pickMsg = () => {
	if (uploadMessages.length === 0) return "";
	if (uploadMessages.length === 1) return uploadMessages[0];
	let m;
	do { m = uploadMessages[Math.floor(Math.random() * uploadMessages.length)]; }
	while (m === lastMsg);
	lastMsg = m;
	return m;
	};

	const getMsgSpan = () => root()?.querySelector("#processing-msg");
	const getTimerSpan = () => root()?.querySelector("#processing-timer");

	const setMsg = (t) => { const s = getMsgSpan(); if (s) s.textContent = t; };
	const fmtElapsed = () => `${((Date.now() - startMs) / 1000).toFixed(1)}s elapsed`;

	const start = () => {
	if (msgInterval \|\| timerInterval) return;
	startMs = Date.now();
	setMsg(pickMsg());

	msgInterval = setInterval(() => setMsg(pickMsg()), 2000);

	const t = getTimerSpan();
	if (t) {
	t.textContent = fmtElapsed();
	timerInterval = setInterval(() => { t.textContent = fmtElapsed(); }, 200);
	}
	};

	const stop = () => {
	if (msgInterval) { clearInterval(msgInterval); msgInterval = null; }
	if (timerInterval) { clearInterval(timerInterval); timerInterval = null; }
	const t = getTimerSpan();
	if (t) t.textContent = "";
	};

	const tick = () => {
	const r = root();
	if (isVisible(r)) start();
	else stop();
	};

	const obs = new MutationObserver(tick);
	obs.observe(document.body, { subtree: true, childList: true, attributes: true });

	window.addEventListener("load", tick);
	setInterval(tick, 500); // fallback
	})();

	"""

	with gr.Blocks(theme=gr.themes.Soft(), title="SmartDoc AI", css=css, js=js) as demo:
	gr.Markdown("### SmartDoc AI - Document Q&A", elem_classes="app-title")
	gr.Markdown("Upload your documents and ask questions. Answers will appear below, just like a chat.", elem_classes="app-description")
	gr.Markdown("---")

	# Examples dropdown
	example_dropdown = gr.Dropdown(
	label="Quick Start - Choose an Example",
	choices=list(EXAMPLES.keys()),
	value=None,
	info="Select a pre-loaded example to try"
	)
	loaded_file_info = gr.Markdown("", elem_classes="info-panel", visible=False)

	files = gr.Files(label="Upload your files", file_types=definitions.ALLOWED_TYPES)
	question = gr.Textbox(label="Ask a question", lines=2, placeholder="Type your question here...")
	chat = gr.Chatbot(label="Answers", elem_id="chat-history")
	submit_btn = gr.Button("Get Answer", variant="primary")
	processing_message = gr.HTML("", elem_id="processing-message", visible=False)
	doc_context_display = gr.Markdown("Submit a question to see which document sections were referenced", elem_classes="doc-context", visible=False)
	refresh_context_btn = gr.Button("Refresh Sources", variant="secondary", visible=False)
	with gr.Tab("Context"):
	pass # No .render() calls here; components are already defined and used in outputs

	session_state = gr.State({
	"file_hashes": frozenset(),
	"retriever": None,
	"chat_history": [],
	"last_documents": [],
	"total_questions": 0,
	"session_start": datetime.now().strftime("%Y-%m-%d %H:%M")
	})

	def process_question(question_text, uploaded_files, chat_history):
	chat_history = chat_history or []
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	try:
	if not question_text.strip():
	chat_history.append({"role": "user", "content": question_text})
	chat_history.append({"role": "assistant", "content": "Please enter a question."})
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(value="", visible=False)
	)
	return
	if not uploaded_files:
	chat_history.append({"role": "user", "content": question_text})
	chat_history.append({"role": "assistant", "content": "Please upload at least one document."})
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(value="", visible=False)
	)
	return
	# Stage 2: Chunking with per-chunk progress and rotating status
	def load_or_process(file):
	with open(file.name, "rb") as f:
	file_content = f.read()
	file_hash = processor._generate_hash(file_content)
	cache_path = processor.cache_dir / f"{file_hash}.pkl"
	if processor._is_cache_valid(cache_path):
	chunks = processor._load_from_cache(cache_path)
	if chunks:
	logger.info(f"Using cached chunks for {file.name}")
	return chunks
	chunks = processor._process_file(file)
	processor._save_to_cache(chunks, cache_path)
	return chunks

	all_chunks = []
	seen_hashes = set()
	chunks_by_file = []
	total_chunks = 0
	for file in uploaded_files:
	chunks = load_or_process(file)
	chunks_by_file.append(chunks)
	total_chunks += len(chunks)
	if total_chunks == 0:
	total_chunks = 1
	chunk_idx = 0
	for chunks in chunks_by_file:
	for chunk in chunks:
	chunk_hash = processor._generate_hash(chunk.page_content.encode())
	if chunk_hash not in seen_hashes:
	seen_hashes.add(chunk_hash)
	all_chunks.append(chunk)
	# else: skip duplicate chunk
	chunk_idx += 1
	# yield progress here if needed
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	# After all chunks, show 100%
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	# Stage 3: Building Retriever
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value=(
	'<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04); display:flex; align-items:center;">'
	'<img src="https://media.giphy.com/media/26ufnwz3wDUli7GU0/giphy.gif" alt="AI working" style="height:40px; margin-right:16px;">'
	'<span id="processing-msg"></span>'
	'</div>'
	), visible=True)
	)
	retriever = retriever_indexer.build_hybrid_retriever(all_chunks)
	# Stage 4: Generating Answer
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	result = orchestrator.run_workflow(question=question_text, retriever=retriever)
	answer = result["draft_answer"]
	# Stage 5: Verifying Answer
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(interactive=False),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	verification = result.get("verification_report", "No verification details available.")
	logger.info(f"Verification (internal):\n{verification}")
	# Do not display verification to user, only use internally
	chat_history.append({"role": "user", "content": question_text})
	chat_history.append({"role": "assistant", "content": f"Answer:\n{answer}"})
	session_state.value["last_documents"] = retriever.invoke(question_text)
	yield (
	chat_history,
	gr.update(visible=True), # doc_context_display
	gr.update(visible=True), # refresh_context_btn
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(value='''<div style="background:#fff; border-radius:8px; padding:18px 24px; margin-top:32px; color:#1e293b; font-size:1.2em; font-weight:600; box-shadow:0 2px 8px rgba(0,0,0,0.04);">
	<span id="processing-msg"></span>
	<span id="processing-timer" style="opacity:0.8; margin-left:8px;"></span>
	</div>''', visible=True)
	)
	yield (
	chat_history,
	gr.update(visible=True),
	gr.update(visible=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(value="", visible=False)
	)
	except Exception as e:
	logger.error(f"Processing error: {e}", exc_info=True)
	chat_history.append({"role": "user", "content": question_text})
	chat_history.append({"role": "assistant", "content": f"Error: {str(e)}"})
	yield (
	chat_history,
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(value="", visible=False)
	)

	submit_btn.click(
	fn=process_question,
	inputs=[question, files, chat],
	outputs=[chat, doc_context_display, refresh_context_btn, submit_btn, question, files, example_dropdown, processing_message],
	queue=True,
	show_progress=True
	)

	def refresh_context():
	docs = session_state.value.get("last_documents", [])
	last_question = ""
	for msg in reversed(chat.value or []):
	if msg["role"] == "user":
	last_question = msg["content"]
	break
	return format_document_context(docs, last_question)

	refresh_context_btn.click(
	fn=refresh_context,
	inputs=[],
	outputs=[doc_context_display]
	)

	def load_example(example_key):
	if not example_key or example_key not in EXAMPLES:
	return [], "", "Select a valid example from the dropdown above"
	ex_data = EXAMPLES[example_key]
	question_text = ex_data["question"]
	file_paths = ex_data["file_paths"]
	import tempfile
	temp_dir = tempfile.mkdtemp()
	copied_files = []
	file_info_text = f"Loaded: {example_key}\n\n"
	for source_file_path in file_paths:
	abs_source = os.path.abspath(source_file_path)
	if os.path.exists(abs_source):
	filename = os.path.basename(abs_source)
	temp_file_path = os.path.join(temp_dir, filename)
	shutil.copy2(abs_source, temp_file_path)
	copied_files.append(temp_file_path)
	file_size_mb = os.path.getsize(temp_file_path) / (1024 * 1024)
	file_info_text += f"{filename} ({file_size_mb:.2f} MB)\n"
	else:
	file_info_text += f"{source_file_path} not found\n"
	if not copied_files:
	return [], "", "Could not load example files"
	return copied_files, question_text, file_info_text

	example_dropdown.change(
	fn=load_example,
	inputs=[example_dropdown],
	outputs=[files, question, loaded_file_info]
	)
	# Launch server - Compatible with both local and Hugging Face Spaces
	# HF Spaces sets SPACE_ID environment variable
	is_hf_space = os.environ.get("SPACE_ID") is not None
	demo.queue()
	if is_hf_space:
	# Hugging Face Spaces configuration
	logger.info("Running on Hugging Face Spaces")
	demo.launch(server_name="0.0.0.0", server_port=7860)
	else:
	# Local development configuration
	configured_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
	server_port = _find_open_port(configured_port)
	logger.info(f"Launching Gradio on port {server_port}")
	logger.info(f"Access the app at: http://127.0.0.1:{server_port}")
	demo.launch(server_name="127.0.0.1", server_port=server_port, share=False)


	if __name__ == "__main__":
	main()