Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from typing import List, Tuple | |
| from src.config import Settings | |
| from src.retrieval.search import retrieve, format_citations | |
| from src.llm.answer import compose_answer | |
| from src.retrieval.query_handler import ( | |
| classify_query, | |
| QueryType, | |
| keyword_search_documents, | |
| generate_greeting_response, | |
| generate_semantic_analysis, | |
| detect_result_count | |
| ) | |
| from src.utils import load_pickle | |
| def _ask(query: str, history: List, settings, embedder, vstore) -> tuple[List, List[dict]]: | |
| """Process a chat message and return updated history + citations.""" | |
| if not query or not query.strip(): | |
| history.append((query, "Please enter a question about the documents.")) | |
| return history, [] | |
| # Detect how many results the user wants | |
| result_count = detect_result_count(query) | |
| # Classify the query type | |
| query_type = classify_query(query) | |
| # Handle greetings | |
| if query_type == QueryType.GREETING: | |
| answer = generate_greeting_response() | |
| history.append((query, answer)) | |
| return history, [] | |
| # Handle keyword search | |
| if query_type == QueryType.KEYWORD_SEARCH: | |
| # Load all documents for keyword search | |
| all_docs = load_pickle(settings.docs_path) | |
| hits = keyword_search_documents(query, all_docs) | |
| if not hits: | |
| history.append((query, "No documents found containing those keywords.")) | |
| return history, [] | |
| # Limit results to detected count | |
| hits = hits[:result_count] | |
| # Format keyword search results | |
| answer = f"**🔍 Keyword Search Results** (Showing {len(hits)} results)\n\n" | |
| for i, (doc, score) in enumerate(hits, 1): | |
| source = "unknown" | |
| body = doc | |
| if "[Source:" in doc: | |
| parts = doc.rsplit("[Source:", 1) | |
| body = parts[0].strip() | |
| source = "[Source:" + parts[1] | |
| answer += f"**{i}.** {body}\n\n*{source}*\n*keyword match score: {score:.3f}*\n\n---\n\n" | |
| citations = format_citations(hits, max_items=result_count) | |
| history.append((query, answer)) | |
| return history, citations | |
| # Handle semantic analysis | |
| if query_type == QueryType.SEMANTIC_ANALYSIS: | |
| # For semantic analysis, limit to 3 max for readability | |
| analysis_count = min(result_count, 3) | |
| hits = retrieve(query, vstore=vstore, embedder=embedder, k=analysis_count) | |
| answer = generate_semantic_analysis(query, hits) | |
| citations = format_citations(hits, max_items=analysis_count) | |
| history.append((query, answer)) | |
| return history, citations | |
| # Handle regular document questions (default) | |
| hits: List[Tuple[str, float]] = retrieve(query, vstore=vstore, embedder=embedder, k=result_count) | |
| if not hits: | |
| history.append((query, "No relevant passages found. Try adjusting your query.")) | |
| return history, [] | |
| answer = compose_answer(query, hits, settings) | |
| citations = format_citations(hits, max_items=result_count) | |
| history.append((query, answer)) | |
| return history, citations | |
| def build_app(settings: Settings, embedder, vstore): | |
| with gr.Blocks(title=settings.title) as demo: | |
| gr.Markdown(f"# {settings.title}\n{settings.description}") | |
| gr.Markdown( | |
| f"**Mode:** `{settings.mode}` " | |
| + ("— no LLM used, showing excerpts only. LLM will be added later for summarization" if settings.mode == "retrieval" | |
| else "— retrieval + summarizer enabled.") | |
| ) | |
| # Chat interface | |
| chatbot = gr.Chatbot(label="Conversation", height=400) | |
| with gr.Row(): | |
| query = gr.Textbox( | |
| label="", | |
| placeholder="Ask a question about the documents...", | |
| scale=4, | |
| container=False | |
| ) | |
| submit_btn = gr.Button("Send", variant="primary", scale=1) | |
| # Citations accordion below chat | |
| with gr.Accordion("Citations (top matches from last query)", open=False): | |
| citations = gr.JSON(label="Source & similarity") | |
| # Clear button | |
| clear_btn = gr.Button("Clear Chat", size="sm") | |
| def on_submit(message, history): | |
| """Handle message submission.""" | |
| new_history, new_citations = _ask(message, history or [], settings, embedder, vstore) | |
| return new_history, new_citations, "" # Return empty string to clear input | |
| def clear_chat(): | |
| """Clear the chat history.""" | |
| return [], [] | |
| # Wire up the interactions | |
| submit_btn.click( | |
| on_submit, | |
| inputs=[query, chatbot], | |
| outputs=[chatbot, citations, query] | |
| ) | |
| query.submit( | |
| on_submit, | |
| inputs=[query, chatbot], | |
| outputs=[chatbot, citations, query] | |
| ) | |
| clear_btn.click(clear_chat, outputs=[chatbot, citations]) | |
| return demo |