rag-sql-helper / app.py
notingemiu's picture
Initial commit: RAG SQL helper Gradio Space
53ff64f verified
Raw
History Blame Contribute Delete
4.92 kB
"""
Gradio demo for the SQL Helper RAG project.
The Qdrant collection 'sql_kb' is assumed to already be populated
(this Space is read-only; ingestion was done locally with ingest.py).
Required Secrets in Space Settings:
JINA_API_KEY, GROQ_API_KEY, QDRANT_URL, QDRANT_API_KEY
"""
import os
import gradio as gr
import httpx
from qdrant_client import QdrantClient
JINA_API_KEY = os.environ["JINA_API_KEY"]
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
QDRANT_URL = os.environ["QDRANT_URL"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
COLLECTION = "sql_kb"
EMBED_MODEL = "jina-embeddings-v3"
LLM_MODEL = "openai/gpt-oss-20b"
TOP_K = 5
qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)
def embed_query(text: str) -> list[float]:
r = httpx.post(
"https://api.jina.ai/v1/embeddings",
headers={"Authorization": f"Bearer {JINA_API_KEY}", "Content-Type": "application/json"},
json={"model": EMBED_MODEL, "task": "retrieval.query", "input": [text]},
timeout=30.0,
)
r.raise_for_status()
return r.json()["data"][0]["embedding"]
def call_llm(question: str, context_chunks: list[str]) -> str:
context_block = "\n\n---\n\n".join(
f"[Source {i+1}]\n{c}" for i, c in enumerate(context_chunks)
)
system_prompt = (
"You are a helpful SQL assistant. Answer the user's question using ONLY "
"the provided context blocks below. If the answer isn't in the context, "
"say so honestly — do not invent SQL syntax or features.\n\n"
"Use clear, concise language. Include a small SQL example when helpful.\n"
"Cite sources inline as [Source N] when you use them.\n\n"
f"Context:\n{context_block}"
)
r = httpx.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"},
json={
"model": LLM_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
],
"temperature": 0.2,
"max_tokens": 800,
},
timeout=60.0,
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]
def ask(question: str) -> tuple[str, str]:
if not question or len(question.strip()) < 3:
return "Please enter a longer question.", ""
try:
vec = embed_query(question)
except Exception as e:
return f"Embedding error: {e}", ""
hits = qdrant.query_points(
collection_name=COLLECTION, query=vec, limit=TOP_K, with_payload=True
).points
if not hits:
return "Knowledge base is empty.", ""
chunks = [h.payload["text"] for h in hits]
try:
answer = call_llm(question, chunks)
except Exception as e:
return f"LLM error: {e}", ""
sources_md = "\n\n".join(
f"**[Source {i+1}]** `{h.payload['source']}` (chunk {h.payload['chunk_idx']}, score {h.score:.3f})\n\n"
f"> {h.payload['text'][:400]}{'…' if len(h.payload['text']) > 400 else ''}"
for i, h in enumerate(hits)
)
return answer, sources_md
with gr.Blocks(title="SQL Helper RAG", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# SQL Helper — RAG Q&A
Ask any SQL question. The system retrieves relevant chunks from a curated SQL knowledge base
(SELECT basics, JOINs, aggregations, window functions, subqueries/CTEs, indexes/performance, common gotchas)
and answers using only that context.
**Stack:** Jina `jina-embeddings-v3` (embeddings) → Qdrant Cloud (vector DB) → Groq `gpt-oss-20b` (LLM)
"""
)
with gr.Row():
with gr.Column(scale=1):
question = gr.Textbox(
label="Your SQL question",
placeholder="e.g. What is the difference between WHERE and HAVING?",
lines=2,
)
submit = gr.Button("Ask", variant="primary")
gr.Examples(
examples=[
"What is the difference between WHERE and HAVING in SQL?",
"How do I get top 3 employees by salary in each department?",
"Why does NOT IN return no rows when the subquery contains NULL?",
"What's the difference between RANK and DENSE_RANK?",
"Що таке INNER JOIN в SQL?",
],
inputs=question,
)
with gr.Column(scale=2):
answer = gr.Markdown(label="Answer", value="*Answer will appear here.*")
sources = gr.Markdown(label="Retrieved sources", value="")
submit.click(ask, inputs=question, outputs=[answer, sources])
question.submit(ask, inputs=question, outputs=[answer, sources])
if __name__ == "__main__":
demo.launch()