Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| # 1. Import your ingestion or retrieval code | |
| import chromadb | |
| from sentence_transformers import SentenceTransformer | |
| # We'll assume you have a function like 'init_chroma()' or we do it inline: | |
| client = chromadb.Client() | |
| collection = client.get_or_create_collection("ml_basics_collection") | |
| # Same embedding model as in your ingestion | |
| embed_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| def query_db(user_query): | |
| """ | |
| 1) Convert user query to embedding | |
| 2) Query the Chroma collection | |
| 3) Build HTML that shows chunk previews & embedded PDFs | |
| """ | |
| # A) Embed the query | |
| q_vec = embed_model.encode(user_query).tolist() | |
| # B) Query top results | |
| results = collection.query( | |
| query_embeddings=[q_vec], | |
| n_results=3 # or however many you want | |
| ) | |
| # results is typically a dict with: "documents", "metadatas", "ids", ... | |
| metadatas_list = results.get("metadatas", [[]])[0] # first query's top docs | |
| documents_list = results.get("documents", [[]])[0] | |
| # If empty, return a message | |
| if not metadatas_list: | |
| return "<h4>No results found!</h4>" | |
| # C) Build an HTML string that displays each chunk & an <iframe> to the PDF | |
| # We'll show only the first chunk from each source_id or similar | |
| html_output = "<h2>Search Results</h2>" | |
| # We'll keep track of which PDF we've already displayed | |
| displayed_pdfs = set() | |
| for meta, chunk_text in zip(metadatas_list, documents_list): | |
| pdf_path = meta.get("file_path") | |
| title = meta.get("title", "No Title") | |
| source_id = meta.get("source_id") | |
| chunk_index = meta.get("chunk_index") | |
| html_output += f"<h3>{title} - chunk {chunk_index}</h3>" | |
| # Show snippet | |
| html_output += f"<p><b>Excerpt:</b> {chunk_text[:300]} ...</p>" | |
| if pdf_path and pdf_path not in displayed_pdfs: | |
| displayed_pdfs.add(pdf_path) | |
| # The 'pdf_path' must be accessible in your HF Space | |
| # For example: pdfs/1234.5678.pdf | |
| # We'll embed an iframe. Adjust width/height as needed. | |
| if os.path.exists(pdf_path): | |
| # We build a relative path for the iframe | |
| iframe_src = f"./{pdf_path}" # e.g. "./pdfs/1234.5678.pdf" | |
| # Or a direct URL with your space name: | |
| # iframe_src = f"https://<space-id>.hf.space/file/{pdf_path}" | |
| html_output += f""" | |
| <iframe | |
| src="{iframe_src}" | |
| width="600" | |
| height="400"> | |
| </iframe> | |
| """ | |
| else: | |
| html_output += f"<p style='color:red'>PDF not found at {pdf_path}</p>" | |
| # Add a horizontal line | |
| html_output += "<hr>" | |
| return html_output | |
| ######################## | |
| # Gradio UI | |
| ######################## | |
| def build_app(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Ask Me About Machine Learning Basics") | |
| user_query = gr.Textbox( | |
| label="Enter your query", | |
| placeholder="e.g. What is supervised learning?" | |
| ) | |
| results_html = gr.HTML(label="Results") | |
| # When user presses Enter on the query box, call `query_db` | |
| user_query.submit( | |
| fn=query_db, | |
| inputs=user_query, | |
| outputs=results_html | |
| ) | |
| return demo | |
| demo = build_app() | |
| if __name__ == "__main__": | |
| demo.launch() | |