Spaces:

notingemiu
/

rag-sql-helper

Sleeping

App Files Files Community

rag-sql-helper / app.py

notingemiu

Initial commit: RAG SQL helper Gradio Space

53ff64f verified 11 days ago

Raw

History Blame Contribute Delete

4.92 kB

	"""
	Gradio demo for the SQL Helper RAG project.

	The Qdrant collection 'sql_kb' is assumed to already be populated
	(this Space is read-only; ingestion was done locally with ingest.py).

	Required Secrets in Space Settings:
	JINA_API_KEY, GROQ_API_KEY, QDRANT_URL, QDRANT_API_KEY
	"""
	import os

	import gradio as gr
	import httpx
	from qdrant_client import QdrantClient

	JINA_API_KEY = os.environ["JINA_API_KEY"]
	GROQ_API_KEY = os.environ["GROQ_API_KEY"]
	QDRANT_URL = os.environ["QDRANT_URL"]
	QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]

	COLLECTION = "sql_kb"
	EMBED_MODEL = "jina-embeddings-v3"
	LLM_MODEL = "openai/gpt-oss-20b"
	TOP_K = 5

	qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30)


	def embed_query(text: str) -> list[float]:
	r = httpx.post(
	"https://api.jina.ai/v1/embeddings",
	headers={"Authorization": f"Bearer {JINA_API_KEY}", "Content-Type": "application/json"},
	json={"model": EMBED_MODEL, "task": "retrieval.query", "input": [text]},
	timeout=30.0,
	)
	r.raise_for_status()
	return r.json()["data"][0]["embedding"]


	def call_llm(question: str, context_chunks: list[str]) -> str:
	context_block = "\n\n---\n\n".join(
	f"[Source {i+1}]\n{c}" for i, c in enumerate(context_chunks)
	)
	system_prompt = (
	"You are a helpful SQL assistant. Answer the user's question using ONLY "
	"the provided context blocks below. If the answer isn't in the context, "
	"say so honestly — do not invent SQL syntax or features.\n\n"
	"Use clear, concise language. Include a small SQL example when helpful.\n"
	"Cite sources inline as [Source N] when you use them.\n\n"
	f"Context:\n{context_block}"
	)
	r = httpx.post(
	"https://api.groq.com/openai/v1/chat/completions",
	headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"},
	json={
	"model": LLM_MODEL,
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": question},
	],
	"temperature": 0.2,
	"max_tokens": 800,
	},
	timeout=60.0,
	)
	r.raise_for_status()
	return r.json()["choices"][0]["message"]["content"]


	def ask(question: str) -> tuple[str, str]:
	if not question or len(question.strip()) < 3:
	return "Please enter a longer question.", ""
	try:
	vec = embed_query(question)
	except Exception as e:
	return f"Embedding error: {e}", ""
	hits = qdrant.query_points(
	collection_name=COLLECTION, query=vec, limit=TOP_K, with_payload=True
	).points
	if not hits:
	return "Knowledge base is empty.", ""
	chunks = [h.payload["text"] for h in hits]
	try:
	answer = call_llm(question, chunks)
	except Exception as e:
	return f"LLM error: {e}", ""
	sources_md = "\n\n".join(
	f"[Source {i+1}] `{h.payload['source']}` (chunk {h.payload['chunk_idx']}, score {h.score:.3f})\n\n"
	f"> {h.payload['text'][:400]}{'…' if len(h.payload['text']) > 400 else ''}"
	for i, h in enumerate(hits)
	)
	return answer, sources_md


	with gr.Blocks(title="SQL Helper RAG", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# SQL Helper — RAG Q&A

	Ask any SQL question. The system retrieves relevant chunks from a curated SQL knowledge base
	(SELECT basics, JOINs, aggregations, window functions, subqueries/CTEs, indexes/performance, common gotchas)
	and answers using only that context.

	Stack: Jina `jina-embeddings-v3` (embeddings) → Qdrant Cloud (vector DB) → Groq `gpt-oss-20b` (LLM)
	"""
	)
	with gr.Row():
	with gr.Column(scale=1):
	question = gr.Textbox(
	label="Your SQL question",
	placeholder="e.g. What is the difference between WHERE and HAVING?",
	lines=2,
	)
	submit = gr.Button("Ask", variant="primary")
	gr.Examples(
	examples=[
	"What is the difference between WHERE and HAVING in SQL?",
	"How do I get top 3 employees by salary in each department?",
	"Why does NOT IN return no rows when the subquery contains NULL?",
	"What's the difference between RANK and DENSE_RANK?",
	"Що таке INNER JOIN в SQL?",
	],
	inputs=question,
	)
	with gr.Column(scale=2):
	answer = gr.Markdown(label="Answer", value="Answer will appear here.")
	sources = gr.Markdown(label="Retrieved sources", value="")
	submit.click(ask, inputs=question, outputs=[answer, sources])
	question.submit(ask, inputs=question, outputs=[answer, sources])

	if __name__ == "__main__":
	demo.launch()