Spaces:

thomascerniglia
/

kgbchatbot

Sleeping

App Files Files Community

kgbchatbot / src /ui /components.py

thomascerniglia

Add dynamic result count detection based on query language

5bc045f 2 months ago

raw

history blame contribute delete

5.05 kB

	import gradio as gr
	from typing import List, Tuple
	from src.config import Settings
	from src.retrieval.search import retrieve, format_citations
	from src.llm.answer import compose_answer
	from src.retrieval.query_handler import (
	classify_query,
	QueryType,
	keyword_search_documents,
	generate_greeting_response,
	generate_semantic_analysis,
	detect_result_count
	)
	from src.utils import load_pickle

	def _ask(query: str, history: List, settings, embedder, vstore) -> tuple[List, List[dict]]:
	"""Process a chat message and return updated history + citations."""
	if not query or not query.strip():
	history.append((query, "Please enter a question about the documents."))
	return history, []

	# Detect how many results the user wants
	result_count = detect_result_count(query)

	# Classify the query type
	query_type = classify_query(query)

	# Handle greetings
	if query_type == QueryType.GREETING:
	answer = generate_greeting_response()
	history.append((query, answer))
	return history, []

	# Handle keyword search
	if query_type == QueryType.KEYWORD_SEARCH:
	# Load all documents for keyword search
	all_docs = load_pickle(settings.docs_path)
	hits = keyword_search_documents(query, all_docs)

	if not hits:
	history.append((query, "No documents found containing those keywords."))
	return history, []

	# Limit results to detected count
	hits = hits[:result_count]

	# Format keyword search results
	answer = f"🔍 Keyword Search Results (Showing {len(hits)} results)\n\n"
	for i, (doc, score) in enumerate(hits, 1):
	source = "unknown"
	body = doc
	if "[Source:" in doc:
	parts = doc.rsplit("[Source:", 1)
	body = parts[0].strip()
	source = "[Source:" + parts[1]
	answer += f"{i}. {body}\n\n{source}\nkeyword match score: {score:.3f}\n\n---\n\n"

	citations = format_citations(hits, max_items=result_count)
	history.append((query, answer))
	return history, citations

	# Handle semantic analysis
	if query_type == QueryType.SEMANTIC_ANALYSIS:
	# For semantic analysis, limit to 3 max for readability
	analysis_count = min(result_count, 3)
	hits = retrieve(query, vstore=vstore, embedder=embedder, k=analysis_count)
	answer = generate_semantic_analysis(query, hits)
	citations = format_citations(hits, max_items=analysis_count)
	history.append((query, answer))
	return history, citations

	# Handle regular document questions (default)
	hits: List[Tuple[str, float]] = retrieve(query, vstore=vstore, embedder=embedder, k=result_count)
	if not hits:
	history.append((query, "No relevant passages found. Try adjusting your query."))
	return history, []

	answer = compose_answer(query, hits, settings)
	citations = format_citations(hits, max_items=result_count)
	history.append((query, answer))
	return history, citations

	def build_app(settings: Settings, embedder, vstore):
	with gr.Blocks(title=settings.title) as demo:
	gr.Markdown(f"# {settings.title}\n{settings.description}")
	gr.Markdown(
	f"Mode: `{settings.mode}` "
	+ ("— no LLM used, showing excerpts only. LLM will be added later for summarization" if settings.mode == "retrieval"
	else "— retrieval + summarizer enabled.")
	)

	# Chat interface
	chatbot = gr.Chatbot(label="Conversation", height=400)
	with gr.Row():
	query = gr.Textbox(
	label="",
	placeholder="Ask a question about the documents...",
	scale=4,
	container=False
	)
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	# Citations accordion below chat
	with gr.Accordion("Citations (top matches from last query)", open=False):
	citations = gr.JSON(label="Source & similarity")

	# Clear button
	clear_btn = gr.Button("Clear Chat", size="sm")

	def on_submit(message, history):
	"""Handle message submission."""
	new_history, new_citations = _ask(message, history or [], settings, embedder, vstore)
	return new_history, new_citations, "" # Return empty string to clear input

	def clear_chat():
	"""Clear the chat history."""
	return [], []

	# Wire up the interactions
	submit_btn.click(
	on_submit,
	inputs=[query, chatbot],
	outputs=[chatbot, citations, query]
	)
	query.submit(
	on_submit,
	inputs=[query, chatbot],
	outputs=[chatbot, citations, query]
	)
	clear_btn.click(clear_chat, outputs=[chatbot, citations])

	return demo