Spaces:
Runtime error
Runtime error
| """Query ranker UI component and logic.""" | |
| import gradio as gr | |
| import logging | |
| from src.core.logging_config import get_logger | |
| from src.rag.vector_store import vector_store_manager | |
| from src.rag import document_ingestion_service | |
| logger = get_logger(__name__) | |
| def handle_query_search(query, method, k_value): | |
| """Handle query search and return formatted results.""" | |
| if not query or not query.strip(): | |
| return """ | |
| <div class="ranker-container"> | |
| <div class="ranker-placeholder"> | |
| <h3>π Query Ranker</h3> | |
| <p>Enter a search query to find relevant document chunks with similarity scores.</p> | |
| </div> | |
| </div> | |
| """ | |
| try: | |
| logger.info(f"Query search: '{query[:50]}...' using method: {method}") | |
| # Get results based on method | |
| results = [] | |
| if method == "similarity": | |
| retriever = vector_store_manager.get_retriever("similarity", {"k": k_value}) | |
| docs = retriever.invoke(query) | |
| # Try to get actual similarity scores | |
| try: | |
| vector_store = vector_store_manager.get_vector_store() | |
| if hasattr(vector_store, 'similarity_search_with_score'): | |
| docs_with_scores = vector_store.similarity_search_with_score(query, k=k_value) | |
| for i, (doc, score) in enumerate(docs_with_scores): | |
| similarity_score = max(0, 1 - score) if score is not None else 0.8 | |
| results.append(_format_ranker_result(doc, similarity_score, i + 1)) | |
| else: | |
| # Fallback without scores | |
| for i, doc in enumerate(docs): | |
| score = 0.85 - (i * 0.05) | |
| results.append(_format_ranker_result(doc, score, i + 1)) | |
| except Exception as e: | |
| logger.warning(f"Could not get similarity scores: {e}") | |
| for i, doc in enumerate(docs): | |
| score = 0.85 - (i * 0.05) | |
| results.append(_format_ranker_result(doc, score, i + 1)) | |
| elif method == "mmr": | |
| retriever = vector_store_manager.get_retriever("mmr", {"k": k_value, "fetch_k": k_value * 2, "lambda_mult": 0.5}) | |
| docs = retriever.invoke(query) | |
| for i, doc in enumerate(docs): | |
| results.append(_format_ranker_result(doc, None, i + 1)) # No score for MMR | |
| elif method == "bm25": | |
| retriever = vector_store_manager.get_bm25_retriever(k=k_value) | |
| docs = retriever.invoke(query) | |
| for i, doc in enumerate(docs): | |
| results.append(_format_ranker_result(doc, None, i + 1)) # No score for BM25 | |
| elif method == "hybrid": | |
| retriever = vector_store_manager.get_hybrid_retriever(k=k_value, semantic_weight=0.7, keyword_weight=0.3) | |
| docs = retriever.invoke(query) | |
| for i, doc in enumerate(docs): | |
| results.append(_format_ranker_result(doc, None, i + 1)) # No score for hybrid | |
| logger.info(f"Retrieved {len(results)} results for query using {method}") | |
| return _format_ranker_results_html(results, query, method) | |
| except Exception as e: | |
| error_msg = f"Error during search: {str(e)}" | |
| logger.error(error_msg) | |
| return f""" | |
| <div class="ranker-container"> | |
| <div class="ranker-error"> | |
| <h3>β Search Error</h3> | |
| <p>{error_msg}</p> | |
| <p class="error-hint">Make sure documents are uploaded and the system is ready.</p> | |
| </div> | |
| </div> | |
| """ | |
| def _format_ranker_result(doc, score, rank): | |
| """Format a single search result.""" | |
| # Extract metadata | |
| metadata = doc.metadata | |
| source = metadata.get("source", "Unknown") | |
| page = metadata.get("page", "N/A") | |
| chunk_id = metadata.get("chunk_id", "Unknown") | |
| # Calculate content length and create indicator | |
| content_length = len(doc.page_content) | |
| if content_length < 200: | |
| length_indicator = f"π {content_length} chars" | |
| elif content_length < 500: | |
| length_indicator = f"π {content_length} chars" | |
| else: | |
| length_indicator = f"π {content_length} chars" | |
| # Calculate confidence based on rank (high confidence for top results) | |
| if rank <= 2: | |
| confidence = "High" | |
| confidence_color = "#28a745" | |
| confidence_icon = "π₯" | |
| elif rank <= 4: | |
| confidence = "Medium" | |
| confidence_color = "#ffc107" | |
| confidence_icon = "β" | |
| else: | |
| confidence = "Low" | |
| confidence_color = "#6c757d" | |
| confidence_icon = "π‘" | |
| result = { | |
| "rank": rank, | |
| "content": doc.page_content, | |
| "source": source, | |
| "page": page, | |
| "chunk_id": chunk_id, | |
| "length_indicator": length_indicator, | |
| "has_score": score is not None, | |
| "confidence": confidence, | |
| "confidence_color": confidence_color, | |
| "confidence_icon": confidence_icon | |
| } | |
| # Only add score if we have a real score (similarity search only) | |
| if score is not None: | |
| result["score"] = round(score, 3) | |
| return result | |
| def _format_ranker_results_html(results, query, method): | |
| """Format search results as HTML.""" | |
| if not results: | |
| return """ | |
| <div class="ranker-container"> | |
| <div class="ranker-no-results"> | |
| <h3>π No Results Found</h3> | |
| <p>No relevant documents found for your query.</p> | |
| <p class="no-results-hint">Try different keywords or check if documents are uploaded.</p> | |
| </div> | |
| </div> | |
| """ | |
| # Method display names | |
| method_labels = { | |
| "similarity": "π― Similarity Search", | |
| "mmr": "π MMR (Diverse)", | |
| "bm25": "π BM25 (Keywords)", | |
| "hybrid": "π Hybrid (Recommended)" | |
| } | |
| method_display = method_labels.get(method, method) | |
| # Start building HTML | |
| html_parts = [f""" | |
| <div class="ranker-container"> | |
| <div class="ranker-header"> | |
| <div class="ranker-title"> | |
| <h3>π Search Results</h3> | |
| <div class="query-display">"{query}"</div> | |
| </div> | |
| <div class="ranker-meta"> | |
| <span class="method-badge">{method_display}</span> | |
| <span class="result-count">{len(results)} results</span> | |
| </div> | |
| </div> | |
| """] | |
| # Add results | |
| for result in results: | |
| rank_emoji = ["π₯", "π₯", "π₯"][result["rank"] - 1] if result["rank"] <= 3 else f"#{result['rank']}" | |
| # Escape content for safe HTML inclusion and JavaScript | |
| escaped_content = result['content'].replace('"', '"').replace("'", "'").replace('\n', '\\n') | |
| # Build score info - always show confidence, only show score for similarity search | |
| score_info_parts = [f""" | |
| <span class="confidence-badge" style="color: {result['confidence_color']}"> | |
| {result['confidence_icon']} {result['confidence']} | |
| </span>"""] | |
| # Only add score value if we have real scores (similarity search) | |
| if result.get('has_score', False): | |
| score_info_parts.append(f'<span class="score-value">π― {result["score"]}</span>') | |
| score_info_html = f""" | |
| <div class="score-info"> | |
| {''.join(score_info_parts)} | |
| </div>""" | |
| html_parts.append(f""" | |
| <div class="result-card"> | |
| <div class="result-header"> | |
| <div class="rank-info"> | |
| <span class="rank-badge">{rank_emoji} Rank {result['rank']}</span> | |
| <span class="source-info">π {result['source']}</span> | |
| {f"<span class='page-info'>Page {result['page']}</span>" if result['page'] != 'N/A' else ""} | |
| <span class="length-info">{result['length_indicator']}</span> | |
| </div> | |
| {score_info_html} | |
| </div> | |
| <div class="result-content"> | |
| <div class="content-text">{result['content']}</div> | |
| </div> | |
| </div> | |
| """) | |
| html_parts.append("</div>") | |
| return "".join(html_parts) | |
| def get_ranker_status(): | |
| """Get current ranker system status.""" | |
| try: | |
| # Get collection info | |
| collection_info = vector_store_manager.get_collection_info() | |
| document_count = collection_info.get("document_count", 0) | |
| # Get available methods | |
| available_methods = ["similarity", "mmr", "bm25", "hybrid"] | |
| # Check if system is ready | |
| ingestion_status = document_ingestion_service.get_ingestion_status() | |
| system_ready = ingestion_status.get('system_ready', False) | |
| status_html = f""" | |
| <div class="status-card"> | |
| <div class="status-header"> | |
| <h3>π Query Ranker Status</h3> | |
| <div class="status-indicator {'status-ready' if system_ready else 'status-not-ready'}"> | |
| {'π’ READY' if system_ready else 'π΄ NOT READY'} | |
| </div> | |
| </div> | |
| <div class="status-grid"> | |
| <div class="status-item"> | |
| <div class="status-label">Available Documents</div> | |
| <div class="status-value">{document_count}</div> | |
| </div> | |
| <div class="status-item"> | |
| <div class="status-label">Retrieval Methods</div> | |
| <div class="status-value">{len(available_methods)}</div> | |
| </div> | |
| <div class="status-item"> | |
| <div class="status-label">Vector Store</div> | |
| <div class="status-value">{'Ready' if system_ready else 'Not Ready'}</div> | |
| </div> | |
| </div> | |
| <div class="ranker-methods"> | |
| <div class="methods-label">Available Methods:</div> | |
| <div class="methods-list"> | |
| <span class="method-tag">π― Similarity</span> | |
| <span class="method-tag">π MMR</span> | |
| <span class="method-tag">π BM25</span> | |
| <span class="method-tag">π Hybrid</span> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| return status_html | |
| except Exception as e: | |
| error_msg = f"Error getting ranker status: {str(e)}" | |
| logger.error(error_msg) | |
| return f""" | |
| <div class="status-card status-error"> | |
| <div class="status-header"> | |
| <h3>β System Error</h3> | |
| </div> | |
| <p class="error-message">{error_msg}</p> | |
| </div> | |
| """ | |
| def create_query_ranker_tab(): | |
| """Create the query ranker tab UI.""" | |
| with gr.TabItem("π Query Ranker"): | |
| with gr.Column(elem_classes=["ranker-container"]): | |
| # Header | |
| gr.HTML(""" | |
| <div class="chat-header"> | |
| <h2>π Query Ranker</h2> | |
| <p>Search and rank document chunks with transparency into retrieval methods</p> | |
| </div> | |
| """) | |
| # Status display | |
| status_display = gr.HTML(value=get_ranker_status()) | |
| # Control buttons | |
| with gr.Row(elem_classes=["control-buttons"]): | |
| refresh_ranker_status_btn = gr.Button("π Refresh Status", elem_classes=["control-btn", "btn-refresh"]) | |
| clear_results_btn = gr.Button("ποΈ Clear Results", elem_classes=["control-btn", "btn-clear-data"]) | |
| # Search controls | |
| with gr.Column(elem_classes=["ranker-controls"]): | |
| with gr.Row(elem_classes=["ranker-input-row"]): | |
| query_input = gr.Textbox( | |
| placeholder="Enter your search query...", | |
| show_label=False, | |
| elem_classes=["ranker-query-input"], | |
| scale=4 | |
| ) | |
| search_btn = gr.Button("π Search", elem_classes=["ranker-search-btn"], scale=0) | |
| with gr.Row(elem_classes=["ranker-options-row"]): | |
| method_dropdown = gr.Dropdown( | |
| choices=[ | |
| ("π― Similarity Search", "similarity"), | |
| ("π MMR (Diverse)", "mmr"), | |
| ("π BM25 (Keywords)", "bm25"), | |
| ("π Hybrid (Recommended)", "hybrid") | |
| ], | |
| value="hybrid", | |
| label="Retrieval Method", | |
| scale=2 | |
| ) | |
| k_slider = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=5, | |
| step=1, | |
| label="Number of Results", | |
| scale=1 | |
| ) | |
| # Results display | |
| results_display = gr.HTML( | |
| value=handle_query_search("", "hybrid", 5), # Initial placeholder | |
| elem_classes=["ranker-results-container"] | |
| ) | |
| # Event handlers | |
| query_input.submit( | |
| handle_query_search, | |
| inputs=[query_input, method_dropdown, k_slider], | |
| outputs=[results_display] | |
| ) | |
| search_btn.click( | |
| handle_query_search, | |
| inputs=[query_input, method_dropdown, k_slider], | |
| outputs=[results_display] | |
| ) | |
| # Control button handlers | |
| def clear_ranker_results(): | |
| """Clear the search results and reset to placeholder.""" | |
| return handle_query_search("", "hybrid", 5), "" | |
| def refresh_ranker_status(): | |
| """Refresh the ranker status display.""" | |
| return get_ranker_status() | |
| refresh_ranker_status_btn.click( | |
| fn=refresh_ranker_status, | |
| inputs=[], | |
| outputs=[status_display] | |
| ) | |
| clear_results_btn.click( | |
| fn=clear_ranker_results, | |
| inputs=[], | |
| outputs=[results_display, query_input] | |
| ) | |
| # Update results when method or k changes | |
| method_dropdown.change( | |
| fn=handle_query_search, | |
| inputs=[query_input, method_dropdown, k_slider], | |
| outputs=[results_display] | |
| ) | |
| k_slider.change( | |
| fn=handle_query_search, | |
| inputs=[query_input, method_dropdown, k_slider], | |
| outputs=[results_display] | |
| ) |