import gradio as gr import os # 1. Import your ingestion or retrieval code import chromadb from sentence_transformers import SentenceTransformer # We'll assume you have a function like 'init_chroma()' or we do it inline: client = chromadb.Client() collection = client.get_or_create_collection("ml_basics_collection") # Same embedding model as in your ingestion embed_model = SentenceTransformer('all-MiniLM-L6-v2') def query_db(user_query): """ 1) Convert user query to embedding 2) Query the Chroma collection 3) Build HTML that shows chunk previews & embedded PDFs """ # A) Embed the query q_vec = embed_model.encode(user_query).tolist() # B) Query top results results = collection.query( query_embeddings=[q_vec], n_results=3 # or however many you want ) # results is typically a dict with: "documents", "metadatas", "ids", ... metadatas_list = results.get("metadatas", [[]])[0] # first query's top docs documents_list = results.get("documents", [[]])[0] # If empty, return a message if not metadatas_list: return "