import gradio as gr import os # 1. Import your ingestion or retrieval code import chromadb from sentence_transformers import SentenceTransformer # We'll assume you have a function like 'init_chroma()' or we do it inline: client = chromadb.Client() collection = client.get_or_create_collection("ml_basics_collection") # Same embedding model as in your ingestion embed_model = SentenceTransformer('all-MiniLM-L6-v2') def query_db(user_query): """ 1) Convert user query to embedding 2) Query the Chroma collection 3) Build HTML that shows chunk previews & embedded PDFs """ # A) Embed the query q_vec = embed_model.encode(user_query).tolist() # B) Query top results results = collection.query( query_embeddings=[q_vec], n_results=3 # or however many you want ) # results is typically a dict with: "documents", "metadatas", "ids", ... metadatas_list = results.get("metadatas", [[]])[0] # first query's top docs documents_list = results.get("documents", [[]])[0] # If empty, return a message if not metadatas_list: return "

No results found!

" # C) Build an HTML string that displays each chunk & an """ else: html_output += f"

PDF not found at {pdf_path}

" # Add a horizontal line html_output += "
" return html_output ######################## # Gradio UI ######################## def build_app(): with gr.Blocks() as demo: gr.Markdown("## Ask Me About Machine Learning Basics") user_query = gr.Textbox( label="Enter your query", placeholder="e.g. What is supervised learning?" ) results_html = gr.HTML(label="Results") # When user presses Enter on the query box, call `query_db` user_query.submit( fn=query_db, inputs=user_query, outputs=results_html ) return demo demo = build_app() if __name__ == "__main__": demo.launch()