Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from typing import Tuple, List | |
| from openai import OpenAI | |
| from utils.db_utils import DatabaseUtils | |
| from utils.embedding_utils import get_embedding | |
| def create_search_tab(openai_client: OpenAI, db_utils: DatabaseUtils, databases: List[str]) -> Tuple[gr.Tab, dict]: | |
| """Create the vector search tab UI | |
| Args: | |
| openai_client: OpenAI client instance | |
| db_utils: DatabaseUtils instance | |
| databases: List of available databases | |
| Returns: | |
| Tuple[gr.Tab, dict]: The tab component and its interface elements | |
| """ | |
| def update_collections(db_name: str) -> gr.Dropdown: | |
| """Update collections dropdown when database changes""" | |
| collections = db_utils.get_collections(db_name) | |
| # If there's only one collection, select it by default | |
| value = collections[0] if len(collections) == 1 else None | |
| return gr.Dropdown(choices=collections, value=value) | |
| def vector_search( | |
| query_text: str, | |
| db_name: str, | |
| collection_name: str, | |
| embedding_field: str, | |
| index_name: str | |
| ) -> str: | |
| """Perform vector search using embeddings""" | |
| try: | |
| print(f"\nProcessing query: {query_text}") | |
| db = db_utils.client[db_name] | |
| collection = db[collection_name] | |
| # Get embeddings for query | |
| embedding = get_embedding(query_text, openai_client) | |
| print("Generated embeddings successfully") | |
| results = collection.aggregate([ | |
| { | |
| '$vectorSearch': { | |
| "index": index_name, | |
| "path": embedding_field, | |
| "queryVector": embedding, | |
| "numCandidates": 50, | |
| "limit": 5 | |
| } | |
| }, | |
| { | |
| "$project": { | |
| "search_score": { "$meta": "vectorSearchScore" }, | |
| "document": "$$ROOT" | |
| } | |
| } | |
| ]) | |
| # Format results | |
| results_list = list(results) | |
| formatted_results = [] | |
| for idx, result in enumerate(results_list, 1): | |
| doc = result['document'] | |
| formatted_result = f"{idx}. Score: {result['search_score']:.4f}\n" | |
| # Add all fields except _id and embeddings | |
| for key, value in doc.items(): | |
| if key not in ['_id', embedding_field]: | |
| formatted_result += f"{key}: {value}\n" | |
| formatted_results.append(formatted_result) | |
| return "\n".join(formatted_results) if formatted_results else "No results found" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create the tab UI | |
| with gr.Tab("Search") as tab: | |
| with gr.Row(): | |
| db_input = gr.Dropdown( | |
| choices=databases, | |
| label="Select Database", | |
| info="Database containing the vectors" | |
| ) | |
| collection_input = gr.Dropdown( | |
| choices=[], | |
| label="Select Collection", | |
| info="Collection containing the vectors" | |
| ) | |
| with gr.Row(): | |
| embedding_field_input = gr.Textbox( | |
| label="Embedding Field Name", | |
| value="embedding", | |
| info="Field containing the vectors" | |
| ) | |
| index_input = gr.Textbox( | |
| label="Vector Search Index Name", | |
| value="vector_index", | |
| info="Index created in Atlas UI" | |
| ) | |
| query_input = gr.Textbox( | |
| label="Search Query", | |
| lines=2, | |
| placeholder="What would you like to search for?" | |
| ) | |
| search_btn = gr.Button("Search") | |
| search_output = gr.Textbox(label="Results", lines=10) | |
| # Set up event handlers | |
| db_input.change( | |
| fn=update_collections, | |
| inputs=[db_input], | |
| outputs=[collection_input] | |
| ) | |
| search_btn.click( | |
| fn=vector_search, | |
| inputs=[ | |
| query_input, | |
| db_input, | |
| collection_input, | |
| embedding_field_input, | |
| index_input | |
| ], | |
| outputs=search_output | |
| ) | |
| # Return the tab and its interface elements | |
| interface = { | |
| 'db_input': db_input, | |
| 'collection_input': collection_input, | |
| 'embedding_field_input': embedding_field_input, | |
| 'index_input': index_input, | |
| 'query_input': query_input, | |
| 'search_btn': search_btn, | |
| 'search_output': search_output | |
| } | |
| return tab, interface | |