Spaces:
Sleeping
Sleeping
| """ | |
| Gradio demo for the SQL Helper RAG project. | |
| The Qdrant collection 'sql_kb' is assumed to already be populated | |
| (this Space is read-only; ingestion was done locally with ingest.py). | |
| Required Secrets in Space Settings: | |
| JINA_API_KEY, GROQ_API_KEY, QDRANT_URL, QDRANT_API_KEY | |
| """ | |
| import os | |
| import gradio as gr | |
| import httpx | |
| from qdrant_client import QdrantClient | |
| JINA_API_KEY = os.environ["JINA_API_KEY"] | |
| GROQ_API_KEY = os.environ["GROQ_API_KEY"] | |
| QDRANT_URL = os.environ["QDRANT_URL"] | |
| QDRANT_API_KEY = os.environ["QDRANT_API_KEY"] | |
| COLLECTION = "sql_kb" | |
| EMBED_MODEL = "jina-embeddings-v3" | |
| LLM_MODEL = "openai/gpt-oss-20b" | |
| TOP_K = 5 | |
| qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30) | |
| def embed_query(text: str) -> list[float]: | |
| r = httpx.post( | |
| "https://api.jina.ai/v1/embeddings", | |
| headers={"Authorization": f"Bearer {JINA_API_KEY}", "Content-Type": "application/json"}, | |
| json={"model": EMBED_MODEL, "task": "retrieval.query", "input": [text]}, | |
| timeout=30.0, | |
| ) | |
| r.raise_for_status() | |
| return r.json()["data"][0]["embedding"] | |
| def call_llm(question: str, context_chunks: list[str]) -> str: | |
| context_block = "\n\n---\n\n".join( | |
| f"[Source {i+1}]\n{c}" for i, c in enumerate(context_chunks) | |
| ) | |
| system_prompt = ( | |
| "You are a helpful SQL assistant. Answer the user's question using ONLY " | |
| "the provided context blocks below. If the answer isn't in the context, " | |
| "say so honestly — do not invent SQL syntax or features.\n\n" | |
| "Use clear, concise language. Include a small SQL example when helpful.\n" | |
| "Cite sources inline as [Source N] when you use them.\n\n" | |
| f"Context:\n{context_block}" | |
| ) | |
| r = httpx.post( | |
| "https://api.groq.com/openai/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}, | |
| json={ | |
| "model": LLM_MODEL, | |
| "messages": [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": question}, | |
| ], | |
| "temperature": 0.2, | |
| "max_tokens": 800, | |
| }, | |
| timeout=60.0, | |
| ) | |
| r.raise_for_status() | |
| return r.json()["choices"][0]["message"]["content"] | |
| def ask(question: str) -> tuple[str, str]: | |
| if not question or len(question.strip()) < 3: | |
| return "Please enter a longer question.", "" | |
| try: | |
| vec = embed_query(question) | |
| except Exception as e: | |
| return f"Embedding error: {e}", "" | |
| hits = qdrant.query_points( | |
| collection_name=COLLECTION, query=vec, limit=TOP_K, with_payload=True | |
| ).points | |
| if not hits: | |
| return "Knowledge base is empty.", "" | |
| chunks = [h.payload["text"] for h in hits] | |
| try: | |
| answer = call_llm(question, chunks) | |
| except Exception as e: | |
| return f"LLM error: {e}", "" | |
| sources_md = "\n\n".join( | |
| f"**[Source {i+1}]** `{h.payload['source']}` (chunk {h.payload['chunk_idx']}, score {h.score:.3f})\n\n" | |
| f"> {h.payload['text'][:400]}{'…' if len(h.payload['text']) > 400 else ''}" | |
| for i, h in enumerate(hits) | |
| ) | |
| return answer, sources_md | |
| with gr.Blocks(title="SQL Helper RAG", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # SQL Helper — RAG Q&A | |
| Ask any SQL question. The system retrieves relevant chunks from a curated SQL knowledge base | |
| (SELECT basics, JOINs, aggregations, window functions, subqueries/CTEs, indexes/performance, common gotchas) | |
| and answers using only that context. | |
| **Stack:** Jina `jina-embeddings-v3` (embeddings) → Qdrant Cloud (vector DB) → Groq `gpt-oss-20b` (LLM) | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| question = gr.Textbox( | |
| label="Your SQL question", | |
| placeholder="e.g. What is the difference between WHERE and HAVING?", | |
| lines=2, | |
| ) | |
| submit = gr.Button("Ask", variant="primary") | |
| gr.Examples( | |
| examples=[ | |
| "What is the difference between WHERE and HAVING in SQL?", | |
| "How do I get top 3 employees by salary in each department?", | |
| "Why does NOT IN return no rows when the subquery contains NULL?", | |
| "What's the difference between RANK and DENSE_RANK?", | |
| "Що таке INNER JOIN в SQL?", | |
| ], | |
| inputs=question, | |
| ) | |
| with gr.Column(scale=2): | |
| answer = gr.Markdown(label="Answer", value="*Answer will appear here.*") | |
| sources = gr.Markdown(label="Retrieved sources", value="") | |
| submit.click(ask, inputs=question, outputs=[answer, sources]) | |
| question.submit(ask, inputs=question, outputs=[answer, sources]) | |
| if __name__ == "__main__": | |
| demo.launch() | |