"""
Gradio demo for the SQL Helper RAG project.

The Qdrant collection 'sql_kb' is assumed to already be populated
(this Space is read-only; ingestion was done locally with ingest.py).

Required Secrets in Space Settings:
  JINA_API_KEY, GROQ_API_KEY, QDRANT_URL, QDRANT_API_KEY
"""
import os

import gradio as gr
import httpx
from qdrant_client import QdrantClient

JINA_API_KEY = os.environ["JINA_API_KEY"]
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
QDRANT_URL = os.environ["QDRANT_URL"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]

COLLECTION = "sql_kb"
EMBED_MODEL = "jina-embeddings-v3"
LLM_MODEL = "openai/gpt-oss-20b"
TOP_K = 5

qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)


def embed_query(text: str) -> list[float]:
    r = httpx.post(
        "https://api.jina.ai/v1/embeddings",
        headers={"Authorization": f"Bearer {JINA_API_KEY}", "Content-Type": "application/json"},
        json={"model": EMBED_MODEL, "task": "retrieval.query", "input": [text]},
        timeout=30.0,
    )
    r.raise_for_status()
    return r.json()["data"][0]["embedding"]


def call_llm(question: str, context_chunks: list[str]) -> str:
    context_block = "\n\n---\n\n".join(
        f"[Source {i+1}]\n{c}" for i, c in enumerate(context_chunks)
    )
    system_prompt = (
        "You are a helpful SQL assistant. Answer the user's question using ONLY "
        "the provided context blocks below. If the answer isn't in the context, "
        "say so honestly — do not invent SQL syntax or features.\n\n"
        "Use clear, concise language. Include a small SQL example when helpful.\n"
        "Cite sources inline as [Source N] when you use them.\n\n"
        f"Context:\n{context_block}"
    )
    r = httpx.post(
        "https://api.groq.com/openai/v1/chat/completions",
        headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"},
        json={
            "model": LLM_MODEL,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": question},
            ],
            "temperature": 0.2,
            "max_tokens": 800,
        },
        timeout=60.0,
    )
    r.raise_for_status()
    return r.json()["choices"][0]["message"]["content"]


def ask(question: str) -> tuple[str, str]:
    if not question or len(question.strip()) < 3:
        return "Please enter a longer question.", ""
    try:
        vec = embed_query(question)
    except Exception as e:
        return f"Embedding error: {e}", ""
    hits = qdrant.query_points(
        collection_name=COLLECTION, query=vec, limit=TOP_K, with_payload=True
    ).points
    if not hits:
        return "Knowledge base is empty.", ""
    chunks = [h.payload["text"] for h in hits]
    try:
        answer = call_llm(question, chunks)
    except Exception as e:
        return f"LLM error: {e}", ""
    sources_md = "\n\n".join(
        f"**[Source {i+1}]** `{h.payload['source']}` (chunk {h.payload['chunk_idx']}, score {h.score:.3f})\n\n"
        f"> {h.payload['text'][:400]}{'…' if len(h.payload['text']) > 400 else ''}"
        for i, h in enumerate(hits)
    )
    return answer, sources_md


with gr.Blocks(title="SQL Helper RAG", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # SQL Helper — RAG Q&A

        Ask any SQL question. The system retrieves relevant chunks from a curated SQL knowledge base
        (SELECT basics, JOINs, aggregations, window functions, subqueries/CTEs, indexes/performance, common gotchas)
        and answers using only that context.

        **Stack:** Jina `jina-embeddings-v3` (embeddings) → Qdrant Cloud (vector DB) → Groq `gpt-oss-20b` (LLM)
        """
    )
    with gr.Row():
        with gr.Column(scale=1):
            question = gr.Textbox(
                label="Your SQL question",
                placeholder="e.g. What is the difference between WHERE and HAVING?",
                lines=2,
            )
            submit = gr.Button("Ask", variant="primary")
            gr.Examples(
                examples=[
                    "What is the difference between WHERE and HAVING in SQL?",
                    "How do I get top 3 employees by salary in each department?",
                    "Why does NOT IN return no rows when the subquery contains NULL?",
                    "What's the difference between RANK and DENSE_RANK?",
                    "Що таке INNER JOIN в SQL?",
                ],
                inputs=question,
            )
        with gr.Column(scale=2):
            answer = gr.Markdown(label="Answer", value="*Answer will appear here.*")
            sources = gr.Markdown(label="Retrieved sources", value="")
    submit.click(ask, inputs=question, outputs=[answer, sources])
    question.submit(ask, inputs=question, outputs=[answer, sources])

if __name__ == "__main__":
    demo.launch()