""" Gradio demo for the SQL Helper RAG project. The Qdrant collection 'sql_kb' is assumed to already be populated (this Space is read-only; ingestion was done locally with ingest.py). Required Secrets in Space Settings: JINA_API_KEY, GROQ_API_KEY, QDRANT_URL, QDRANT_API_KEY """ import os import gradio as gr import httpx from qdrant_client import QdrantClient JINA_API_KEY = os.environ["JINA_API_KEY"] GROQ_API_KEY = os.environ["GROQ_API_KEY"] QDRANT_URL = os.environ["QDRANT_URL"] QDRANT_API_KEY = os.environ["QDRANT_API_KEY"] COLLECTION = "sql_kb" EMBED_MODEL = "jina-embeddings-v3" LLM_MODEL = "openai/gpt-oss-20b" TOP_K = 5 qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30) def embed_query(text: str) -> list[float]: r = httpx.post( "https://api.jina.ai/v1/embeddings", headers={"Authorization": f"Bearer {JINA_API_KEY}", "Content-Type": "application/json"}, json={"model": EMBED_MODEL, "task": "retrieval.query", "input": [text]}, timeout=30.0, ) r.raise_for_status() return r.json()["data"][0]["embedding"] def call_llm(question: str, context_chunks: list[str]) -> str: context_block = "\n\n---\n\n".join( f"[Source {i+1}]\n{c}" for i, c in enumerate(context_chunks) ) system_prompt = ( "You are a helpful SQL assistant. Answer the user's question using ONLY " "the provided context blocks below. If the answer isn't in the context, " "say so honestly — do not invent SQL syntax or features.\n\n" "Use clear, concise language. Include a small SQL example when helpful.\n" "Cite sources inline as [Source N] when you use them.\n\n" f"Context:\n{context_block}" ) r = httpx.post( "https://api.groq.com/openai/v1/chat/completions", headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}, json={ "model": LLM_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": question}, ], "temperature": 0.2, "max_tokens": 800, }, timeout=60.0, ) r.raise_for_status() return r.json()["choices"][0]["message"]["content"] def ask(question: str) -> tuple[str, str]: if not question or len(question.strip()) < 3: return "Please enter a longer question.", "" try: vec = embed_query(question) except Exception as e: return f"Embedding error: {e}", "" hits = qdrant.query_points( collection_name=COLLECTION, query=vec, limit=TOP_K, with_payload=True ).points if not hits: return "Knowledge base is empty.", "" chunks = [h.payload["text"] for h in hits] try: answer = call_llm(question, chunks) except Exception as e: return f"LLM error: {e}", "" sources_md = "\n\n".join( f"**[Source {i+1}]** `{h.payload['source']}` (chunk {h.payload['chunk_idx']}, score {h.score:.3f})\n\n" f"> {h.payload['text'][:400]}{'…' if len(h.payload['text']) > 400 else ''}" for i, h in enumerate(hits) ) return answer, sources_md with gr.Blocks(title="SQL Helper RAG", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # SQL Helper — RAG Q&A Ask any SQL question. The system retrieves relevant chunks from a curated SQL knowledge base (SELECT basics, JOINs, aggregations, window functions, subqueries/CTEs, indexes/performance, common gotchas) and answers using only that context. **Stack:** Jina `jina-embeddings-v3` (embeddings) → Qdrant Cloud (vector DB) → Groq `gpt-oss-20b` (LLM) """ ) with gr.Row(): with gr.Column(scale=1): question = gr.Textbox( label="Your SQL question", placeholder="e.g. What is the difference between WHERE and HAVING?", lines=2, ) submit = gr.Button("Ask", variant="primary") gr.Examples( examples=[ "What is the difference between WHERE and HAVING in SQL?", "How do I get top 3 employees by salary in each department?", "Why does NOT IN return no rows when the subquery contains NULL?", "What's the difference between RANK and DENSE_RANK?", "Що таке INNER JOIN в SQL?", ], inputs=question, ) with gr.Column(scale=2): answer = gr.Markdown(label="Answer", value="*Answer will appear here.*") sources = gr.Markdown(label="Retrieved sources", value="") submit.click(ask, inputs=question, outputs=[answer, sources]) question.submit(ask, inputs=question, outputs=[answer, sources]) if __name__ == "__main__": demo.launch()