import gradio as gr
import os

# 1. Import your ingestion or retrieval code
import chromadb
from sentence_transformers import SentenceTransformer

# We'll assume you have a function like 'init_chroma()' or we do it inline:
client = chromadb.Client()  
collection = client.get_or_create_collection("ml_basics_collection")

# Same embedding model as in your ingestion
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

def query_db(user_query):
    """
    1) Convert user query to embedding
    2) Query the Chroma collection
    3) Build HTML that shows chunk previews & embedded PDFs
    """

    # A) Embed the query
    q_vec = embed_model.encode(user_query).tolist()

    # B) Query top results
    results = collection.query(
        query_embeddings=[q_vec],
        n_results=3  # or however many you want
    )

    # results is typically a dict with: "documents", "metadatas", "ids", ...
    metadatas_list = results.get("metadatas", [[]])[0]  # first query's top docs
    documents_list = results.get("documents", [[]])[0]

    # If empty, return a message
    if not metadatas_list:
        return "<h4>No results found!</h4>"

    # C) Build an HTML string that displays each chunk & an <iframe> to the PDF
    #    We'll show only the first chunk from each source_id or similar
    html_output = "<h2>Search Results</h2>"

    # We'll keep track of which PDF we've already displayed
    displayed_pdfs = set()

    for meta, chunk_text in zip(metadatas_list, documents_list):
        pdf_path = meta.get("file_path")
        title = meta.get("title", "No Title")
        source_id = meta.get("source_id")
        chunk_index = meta.get("chunk_index")

        html_output += f"<h3>{title} - chunk {chunk_index}</h3>"
        # Show snippet
        html_output += f"<p><b>Excerpt:</b> {chunk_text[:300]} ...</p>"

        if pdf_path and pdf_path not in displayed_pdfs:
            displayed_pdfs.add(pdf_path)

            # The 'pdf_path' must be accessible in your HF Space
            # For example: pdfs/1234.5678.pdf
            # We'll embed an iframe. Adjust width/height as needed.
            if os.path.exists(pdf_path):
                # We build a relative path for the iframe
                iframe_src = f"./{pdf_path}"  # e.g. "./pdfs/1234.5678.pdf"
                # Or a direct URL with your space name: 
                # iframe_src = f"https://<space-id>.hf.space/file/{pdf_path}"
                
                html_output += f"""
                <iframe 
                  src="{iframe_src}" 
                  width="600" 
                  height="400">
                </iframe>
                """
            else:
                html_output += f"<p style='color:red'>PDF not found at {pdf_path}</p>"

        # Add a horizontal line
        html_output += "<hr>"

    return html_output


########################
# Gradio UI
########################
def build_app():
    with gr.Blocks() as demo:
        gr.Markdown("## Ask Me About Machine Learning Basics")

        user_query = gr.Textbox(
            label="Enter your query",
            placeholder="e.g. What is supervised learning?"
        )

        results_html = gr.HTML(label="Results")

        # When user presses Enter on the query box, call `query_db`
        user_query.submit(
            fn=query_db,
            inputs=user_query,
            outputs=results_html
        )
    
    return demo

demo = build_app()

if __name__ == "__main__":
    demo.launch()