Spaces:

mertyazan
/

SQL-Assignment

Runtime error

+DB_SYSTEM_PROMPT = (
+    "You are a helpful assistant that answers questions about a PostgreSQL database. "
+    "When you use search, include brief citations as links. "
+    "Use markdowns to separate the code and the text in your output. "
+    "Do not use the WITH clause in your SQL code. "
+    "When asked about 'Find the top 10 directors who use the word 'the' the most in their movie titles', assume that the user is looking for a partial match. Remind the user that this is partial match. "
+    "Here is the database:"
+    """CREATE TABLE public.sales (
+    year                        integer,
+    release_date                text,
+    title                       text,
+    genre                       text,
+    international_box_office    text,
+    domestic_box_office         text,
+    worldwide_box_office        text,
+    production_budget           text,
+    opening_weekend             text,
+    theatre_count               integer,
+    avg_run_per_theatre         text,
+    runtime                     integer,
+    keywords                    text,
+    creative_type               text,
+    url                         text
+);
+CREATE TABLE public.metadata (
+	url					text,
+	title				text,
+	studio				text,
+	rating				text,
+	runtime				integer,
+	casting				text,
+	director			text,
+	genre				text,
+	summary				text,
+	awards				text,
+	metascore			integer,
+	userscore			text,
+	RelDate				text
+);
+CREATE TABLE public.user_reviews (
+ url           text,
+  idvscore      smallint,
+  reviewer      text,
+  datep         date,
+  thumbs_up     integer,
+  thumbs_tot    integer,
+  wc            integer,
+  analytic      double precision,
+  clout         double precision,
+  authentic     double precision,
+  tone          double precision,
+  wps           double precision,
+  sixltr        double precision,
+  dic           double precision,
+  function_     double precision,
+  pronoun       double precision,
+  ppron         double precision,
+  i             double precision,
+  we            double precision,
+  you           double precision,
+  shehe         double precision,
+  they          double precision,
+  ipron         double precision,
+  article       double precision,
+  prep          double precision,
+  auxverb       double precision,
+  adverb        double precision,
+  conj          double precision,
+  negate        double precision,
+  verb          double precision,
+  adj           double precision,
+  compare       double precision,
+  interrog      double precision,
+  number        double precision,
+  quant         double precision,
+  affect        double precision,
+  posemo        double precision,
+  negemo        double precision,
+  anx           double precision,
+  anger         double precision,
+  sad           double precision,
+  social        double precision,
+  family        double precision,
+  friend        double precision,
+  female        double precision,
+  male          double precision,
+  cogproc       double precision,
+  insight       double precision,
+  cause         double precision,
+  discrep       double precision,
+  tentat        double precision,
+  certain       double precision,
+  differ        double precision,
+  percept       double precision,
+  see           double precision,
+  hear          double precision,
+  feel          double precision,
+  bio           double precision,
+  body          double precision,
+  health        double precision,
+  sexual        double precision,
+  ingest        double precision,
+  drives        double precision,
+  affiliation   double precision,
+  achieve       double precision,
+  power         double precision,
+  reward        double precision,
+  risk          double precision,
+  focuspast     double precision,
+  focuspresent  double precision,
+  focusfuture   double precision,
+  relativ       double precision,
+  motion        double precision,
+  space         double precision,
+  time          double precision,
+  work          double precision,
+  leisure       double precision,
+  home          double precision,
+  money         double precision,
+  relig         double precision,
+  death         double precision,
+  informal      double precision,
+  swear         double precision,
+  netspeak      double precision,
+  assent        double precision,
+  nonflu        double precision,
+  filler        double precision,
+  allpunc       double precision,
+  period        double precision,
+  comma         double precision,
+  colon         double precision,
+  semic         double precision,
+  qmark         double precision,
+  exclam        double precision,
+  dash          double precision,
+  quote         double precision,
+  apostro       double precision,
+  parenth       double precision,
+  otherp        double precision
+);
+CREATE TABLE public.expert_reviews (
+  url           text,
+  idvscore      smallint,
+  reviewer      text,
+  datep         date,
+  wc            integer,
+  analytic      double precision,
+  clout         double precision,
+  authentic     double precision,
+  tone          double precision,
+  wps           double precision,
+  sixltr        double precision,
+  dic           double precision,
+  function_     double precision,
+  pronoun       double precision,
+  ppron         double precision,
+  i             double precision,
+  we            double precision,
+  you           double precision,
+  shehe         double precision,
+  they          double precision,
+  ipron         double precision,
+  article       double precision,
+  prep          double precision,
+  auxverb       double precision,
+  adverb        double precision,
+  conj          double precision,
+  negate        double precision,
+  verb          double precision,
+  adj           double precision,
+  compare       double precision,
+  interrog      double precision,
+  number        double precision,
+  quant         double precision,
+  affect        double precision,
+  posemo        double precision,
+  negemo        double precision,
+  anx           double precision,
+  anger         double precision,
+  sad           double precision,
+  social        double precision,
+  family        double precision,
+  friend        double precision,
+  female        double precision,
+  male          double precision,
+  cogproc       double precision,
+  insight       double precision,
+  cause         double precision,
+  discrep       double precision,
+  tentat        double precision,
+  certain       double precision,
+  differ        double precision,
+  percept       double precision,
+  see           double precision,
+  hear          double precision,
+  feel          double precision,
+  bio           double precision,
+  body          double precision,
+  health        double precision,
+  sexual        double precision,
+  ingest        double precision,
+  drives        double precision,
+  affiliation   double precision,
+  achieve       double precision,
+  power         double precision,
+  reward        double precision,
+  risk          double precision,
+  focuspast     double precision,
+  focuspresent  double precision,
+  focusfuture   double precision,
+  relativ       double precision,
+  motion        double precision,
+  space         double precision,
+  time          double precision,
+  work          double precision,
+  leisure       double precision,
+  home          double precision,
+  money         double precision,
+  relig         double precision,
+  death         double precision,
+  informal      double precision,
+  swear         double precision,
+  netspeak      double precision,
+  assent        double precision,
+  nonflu        double precision,
+  filler        double precision,
+  allpunc       double precision,
+  period        double precision,
+  comma         double precision,
+  colon         double precision,
+  semic         double precision,
+  qmark         double precision,
+  exclam        double precision,
+  dash          double precision,
+  quote         double precision,
+  apostro       double precision,
+  parenth       double precision,
+  otherp        double precision
+);"""
+)

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: SQL Assignment
-emoji: 🐠
-colorFrom: indigo
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.45.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SQL-Assignment
 app_file: app.py
+sdk: gradio
+sdk_version: 5.44.1
 ---

app.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import os
+import gradio as gr
+from openai import AsyncOpenAI
+import pandas as pd
+import uuid, asyncio
+from sql_tab import run_sql
+from logger import log_event
+from chat_helpers import build_input_from_history, get_db_sys_prompt
+oclient = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+max_rows = 100
+async def respond_once(message, history):
+    text_input = build_input_from_history(message, history)
+    kwargs = dict(
+        model="gpt-4.1",
+        input=text_input,
+        temperature=0,
+        instructions=get_db_sys_prompt(),
+        tools=[{"type": "web_search"}],
+        tool_choice="auto",
+        parallel_tool_calls=True,
+    )
+    # MOCK mode to isolate app/DB without burning tokens
+    if os.getenv("MOCK_OPENAI", "").lower() in {"1", "true", "yes"}:
+        import random, asyncio
+        await asyncio.sleep(random.uniform(0.05, 0.25))
+        return "MOCK: Here’s a fabricated answer for load testing."
+    resp = await oclient.responses.create(**kwargs)
+    return getattr(resp, "output_text", "")
+async def respond(message, history):
+    text_input = build_input_from_history(message, history)
+    kwargs = dict(
+        model="gpt-4.1",
+        input=text_input,
+        temperature=0,
+        instructions=get_db_sys_prompt(),
+        tools=[{"type": "web_search"}],
+        tool_choice="auto",
+        parallel_tool_calls=True,
+    )
+    buffer = []
+    async with oclient.responses.stream(**kwargs) as stream:
+        async for event in stream:
+            if event.type == "response.output_text.delta":
+                buffer.append(event.delta)
+                yield "".join(buffer)
+        final = await stream.get_final_response()
+        final_text = getattr(final, "output_text", None)
+        if final_text and (not buffer or final_text != "".join(buffer)):
+            yield final_text
+async def chat_driver(user_message, messages_history, _user_name, _session_id):
+    messages_history = messages_history or []
+    base = messages_history + [{"role": "user", "content": user_message}]
+    assistant_text = ""
+    asyncio.create_task(log_event(_user_name, _session_id, "chat_user", {"text": user_message}))
+    async for chunk in respond(user_message, messages_history):
+        assistant_text = chunk
+        # stream to UI
+        yield base + [{"role": "assistant", "content": assistant_text}], ""
+    # after stream finished, log the final assistant text
+    asyncio.create_task(log_event(_user_name, _session_id, "chat_assistant", {"text": assistant_text}))
+async def post_completion_code(_user_name, _session_id):
+    code = "9C1F4B2E"
+    msg = f"the completion code is {code}"
+    updated = [{"role": "assistant", "content": msg}]
+    await log_event(_user_name, _session_id, "completion_code", {"code": code})
+    return updated
+with gr.Blocks(title="Movie Database", theme="soft") as demo:
+    # gr.Markdown("## Movie Database Bot and SQL Console")
+    user_name = gr.State("")
+    session_id = gr.State("")
+    with gr.Column(visible=True) as identify_view:
+        gr.Markdown("### Login")
+        name_tb = gr.Textbox(label="Student ID (required)", placeholder="Please enter your student ID", autofocus=True)
+        enter_btn = gr.Button("Enter", variant="primary")
+        id_msg = gr.Markdown("")
+    async def do_login(name):
+        name = (name or "").strip()
+        if not name:
+            return (gr.update(visible=True), gr.update(visible=False), "⚠️ Please enter your student ID to continue.", "", "")
+        sid = uuid.uuid4().hex
+        await log_event(name, sid, "login", {"meta": {"agent": "gradio_app", "version": 1}})
+        return (gr.update(visible=False), gr.update(visible=True), "", name, sid)
+    with gr.Column(visible=False) as app_view:
+        welcome_md = gr.Markdown("")
+        with gr.Tabs():
+            with gr.Tab("Assignment"):
+                gr.Markdown("""
+                <h2> Platform Usage and the Assignment </h2>
+                <br>
+                <ul>
+                    <li> You can use the "SQL" tab to run your queries and see if you have the correct results.</li>
+                    <li> The "Chatbot" tab provides you a chatbot (that is connected to ChatGPT) to ask questions about PostgreSQL and the database.</li>
+                    <li> The chatbot knows the tables and their columns, and would help with questions.</li>
+                    <li> Even with its knowledge, the chatbot can still make mistakes.</li>
+                    <li> When you are finished with all questions, the survey platform will ask for a completion code. You can find it in the "Chatbot" tab. </li>
+                    <li> <b> Reminder: </b> This assignment is optional and ungraded. It is designed for you to practice. You can be relaxed, it is okay to have errors. Good luck! </li>
+                </ul>
+                <h3> Database </h3>
+                The database has 4 tables, each corresponding to the 4 excel files you have for the project:
+                <ul>
+                    <li>sales</li>
+                    <li>metadata</li>
+                    <li>user_reviews</li>
+                    <li>expert_reviews</li>
+                </ul>
+                <br>
+                <b> Important Notes: </b>
+                <br>
+                <br>
+                <ul>
+                    <li> A proper ERD or foreign key relationships are not defined for the tables. You can still join them based on the column names, but be careful. </li>
+                    <li> Some movies have the same title but they are different movies. </li>
+                    <li> A column that stores numerical information might have the datatype "text". </li>
+                    <li> Datatypes might not be exactly the same as the excel files. </li>
+                    <li> Some columns might store null values as text, like "n/a" or "null". </li>
+                    <li> Columns with the same names might store different values in different tables. Example: "url" column in metadata and sales.</li>
+                </ul>
+                """)
+            with gr.Tab("Chatbot"):
+                chatbot = gr.Chatbot(type="messages", label="Conversation", height=450)
+                with gr.Row():
+                    chat_input = gr.Textbox(
+                        placeholder="How can I help you with PostgreSQL today?",
+                            scale=8,
+                            autofocus=True,
+                            container=False,
+                        )
+                    send_btn = gr.Button("Send", variant="primary", scale=1)
+                    code_btn = gr.Button("Completion code", variant="secondary", scale=1)
+                def _clear_input():
+                    return ""
+                ev = send_btn.click(chat_driver, [chat_input, chatbot, user_name, session_id], [chatbot, chat_input])
+                ev.then(_clear_input, None, [chat_input])
+                ev2 = chat_input.submit(chat_driver, [chat_input, chatbot, user_name, session_id], [chatbot, chat_input])
+                ev2.then(_clear_input, None, [chat_input])
+                code_btn.click(
+                        post_completion_code,
+                        inputs=[user_name, session_id],
+                        outputs=[chatbot],
+                    )
+            with gr.Tab("SQL"):
+                with gr.Column():
+                    sql_input = gr.Code(
+                        label="SQL",
+                        language="sql",
+                        value="SELECT * FROM sales;",
+                        lines=10,
+                    )
+                    with gr.Row():
+                        run_btn = gr.Button("Run", variant="primary")
+                        clear_btn = gr.Button("Clear")
+                    results = gr.Dataframe(
+                        label="Results",
+                        wrap=True,
+                        interactive=True,
+                    )
+                    meta = gr.Markdown("")
+                    plan = gr.Markdown("", label="Explain/Plan")
+            async def on_run(q, _user_name, _session_id):
+                df, meta_msg, _ = await asyncio.to_thread(run_sql, q, max_rows, False)
+                await log_event(
+                    _user_name, _session_id, "sql",
+                    {
+                        "query": q,
+                        "row_limit": max_rows,
+                        "row_count": int(getattr(df, "shape", [0])[0]),
+                        "meta": meta_msg,
+                    },
+                )
+                return df, meta_msg, ""
+            def on_clear():
+                return "", pd.DataFrame(), "Cleared.", ""
+            run_btn.click(on_run, [sql_input, user_name, session_id], [results, meta, plan])
+            clear_btn.click(on_clear, inputs=None, outputs=[sql_input, results, meta, plan])
+        outputs = [identify_view, app_view, id_msg, user_name, session_id]
+        enter_btn.click(do_login, inputs=[name_tb], outputs=outputs)
+        name_tb.submit(do_login, inputs=[name_tb], outputs=outputs)
+        def greet(name):
+            return f"**Hello, {name}!**"
+        user_name.change(greet, inputs=[user_name], outputs=[welcome_md])
+if __name__ == "__main__":
+    demo.launch(share=True)

chat_helpers.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import tiktoken
+MAX_TOKENS = 16000
+SQL_SYSTEM_PROMPT = (
+    "You are a helpful assistant that answers questions about PostgreSQL databases. "
+    "When you use search, include brief citations as links. "
+    "Use markdowns to separate the code and the text in your output. "
+)
+def get_db_sys_prompt():
+    with open("DB_SYS_PROMPT.txt", "r") as f:
+        return f.read()
+def build_input_from_history(message, history):
+    parts = []
+    parts.append({"role": "system", "content": get_db_sys_prompt()})
+    # prior turns
+    for msg in history:
+        if msg["role"] == "user":
+            parts.append({"role": "user", "content": msg["content"]})
+        if msg["role"] == "assistant":
+            parts.append({"role": "assistant", "content": msg["content"]})
+    parts.append({"role": "user", "content": message})
+    parts = truncate_history(parts, MAX_TOKENS)
+    return parts
+def count_tokens(messages, model="gpt-4.1"):
+    """Count tokens in a list of messages using tiktoken."""
+    try:
+        enc = tiktoken.encoding_for_model(model)
+    except KeyError:
+        enc = tiktoken.get_encoding("cl100k_base")
+    num_tokens = 0
+    for msg in messages:
+        num_tokens += len(enc.encode(msg["content"]))
+    return num_tokens
+def truncate_history(messages, max_tokens=MAX_TOKENS, model="gpt-4.1"):
+    while count_tokens(messages, model=model) > max_tokens and len(messages) > 2:
+        messages.pop(1)
+    return messages

locustfile.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# locustfile.py
+import json, random, string, time
+from locust import HttpUser, task, between
+CHAT_PATH = "/e2e/chat"
+SQL_PATH = "/e2e/sql"
+SIMPLE_SQL = [
+    "SELECT * FROM sales",
+    "SELECT genre, COUNT(*) AS n FROM sales GROUP BY genre ORDER BY n DESC",
+    "SELECT title, runtime FROM sales WHERE runtime > 150 ORDER BY runtime DESC",
+    "SELECT AVG(metascore) FROM metadata",
+    "SELECT s.title, m.studio FROM sales s JOIN metadata m ON s.title=m.title LIMIT 200",
+]
+def rnd_name():
+    import string, random
+    return "U-" + "".join(random.choices(string.ascii_uppercase + string.digits, k=6))
+class GradioDBUser(HttpUser):
+    wait_time = between(0.05, 0.25)
+    @task(3)
+    def chat(self):
+        msg = random.choice([
+            "Which table has the highest number of rows and why?",
+            "Write a SQL to list top-5 genres by revenue.",
+            "Explain how to compute ROI = worldwide_box_office / production_budget.",
+            "What's the average runtime by studio?",
+        ])
+        payload = {
+            "message": msg,
+            "history": [
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant", "content": "Hello!"},
+            ],
+        }
+        t0 = time.perf_counter()
+        with self.client.post(
+            CHAT_PATH,
+            data=json.dumps(payload),
+            headers={"Content-Type": "application/json"},
+            name="chat",
+            catch_response=True,
+        ) as r:
+            dt_ms = (time.perf_counter() - t0) * 1000
+            if r.status_code != 200:
+                r.failure(f"HTTP {r.status_code}: {r.text[:200]}")
+            else:
+                # Optionally assert JSON shape
+                try:
+                    _ = r.json().get("output", "")
+                    r.success()
+                except Exception as e:
+                    r.failure(f"Bad JSON after {dt_ms:.1f}ms: {e}")
+    @task(2)
+    def sql(self):
+        payload = {"query": random.choice(SIMPLE_SQL), "limit": 200, "allow_writes": False}
+        t0 = time.perf_counter()
+        with self.client.post(
+            SQL_PATH,
+            data=json.dumps(payload),
+            headers={"Content-Type": "application/json"},
+            name="sql",
+            catch_response=True,
+        ) as r:
+            dt_ms = (time.perf_counter() - t0) * 1000
+            if r.status_code != 200:
+                r.failure(f"HTTP {r.status_code}: {r.text[:200]}")
+            else:
+                try:
+                    j = r.json()
+                    # Optional sanity checks so bad responses are marked failures
+                    if "rows" in j and isinstance(j["rows"], list):
+                        r.success()
+                    else:
+                        r.failure(f"Unexpected JSON shape after {dt_ms:.1f}ms: {j}")
+                except Exception as e:
+                    r.failure(f"Bad JSON after {dt_ms:.1f}ms: {e}")

logger.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+import json, uuid, pathlib, asyncio, datetime, re
+DATA_DIR = pathlib.Path(os.getenv("APP_DATA_DIR", "./user_data"))
+DATA_DIR.mkdir(parents=True, exist_ok=True)
+_name_re = re.compile(r"[^A-Za-z0-9._-]+")
+def _slugify(name: str) -> str:
+    name = (name or "").strip().lower()
+    name = _name_re.sub("_", name)
+    return name or f"anon_{uuid.uuid4().hex[:8]}"
+def _user_log_path(name: str) -> pathlib.Path:
+    return DATA_DIR / f"{_slugify(name)}.jsonl"
+def _utc_now():
+    # ISO 8601 with 'Z'
+    return datetime.datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+async def _append_jsonl(path: pathlib.Path, obj: dict):
+    """
+    Append 1 line of JSON to the user's file without blocking the event loop.
+    """
+    line = json.dumps(obj, ensure_ascii=False)
+    def _write():
+        with path.open("a", encoding="utf-8") as f:
+            f.write(line + "\n")
+    await asyncio.to_thread(_write)
+async def log_event(user_name: str, session_id: str, kind: str, payload: dict):
+    """
+    kind: "login" | "chat_user" | "chat_assistant" | "sql"
+    payload: arbitrary fields, we’ll add timestamp/ids.
+    """
+    record = {
+        "ts": _utc_now(),
+        "user": user_name,
+        "session_id": session_id,
+        "kind": kind,
+        **payload,
+    }
+    await _append_jsonl(_user_log_path(user_name), record)

output.log ADDED Viewed

	@@ -0,0 +1,5 @@

+nohup: ignoring input
+* Running on local URL:  http://127.0.0.1:7860
+* Running on public URL: https://e026dc1db28b1a0014.gradio.live
+This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)

server.py ADDED Viewed

	@@ -0,0 +1,126 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+import gradio as gr
+import uvicorn
+from gradio_app import demo, respond_once
+from sql_tab import run_sql
+import math, uuid, decimal, datetime as dt
+import numpy as np
+import pandas as pd
+from fastapi.responses import ORJSONResponse
+import traceback, sys, logging
+log = logging.getLogger("uvicorn.error")
+app = FastAPI(default_response_class=ORJSONResponse)
+def df_json_safe(df: pd.DataFrame) -> list[dict]:
+    # 1) kill Infs -> NaN
+    df = df.replace([np.inf, -np.inf], np.nan)
+    # 2) force object dtype so None can live in numeric cols
+    df = df.astype(object)
+    # 3) NaN -> None
+    df = df.where(pd.notnull(df), None)
+    def to_py(v):
+        # --- numbers ---
+        if isinstance(v, decimal.Decimal):
+            # convert to float; fall back to None if weird
+            try:
+                f = float(v)
+                if math.isnan(f) or math.isinf(f):
+                    return None
+                return f
+            except Exception:
+                return None
+        if isinstance(v, np.floating):
+            f = float(v)
+            if math.isnan(f) or math.isinf(f):
+                return None
+            return f
+        if isinstance(v, np.integer):
+            return int(v)
+        if isinstance(v, (np.bool_,)):
+            return bool(v)
+        # --- datetimes / timedeltas ---
+        if isinstance(v, (pd.Timestamp, np.datetime64, dt.datetime, dt.date, dt.time)):
+            try:
+                # ensure ISO8601
+                return pd.to_datetime(v).isoformat()
+            except Exception:
+                return str(v)
+        if isinstance(v, (pd.Timedelta, dt.timedelta)):
+            return str(v)
+        # --- misc types you can get from Postgres ---
+        if isinstance(v, (bytes, bytearray, memoryview)):
+            try:
+                return bytes(v).decode("utf-8", "replace")
+            except Exception:
+                return str(v)
+        if isinstance(v, uuid.UUID):
+            return str(v)
+        # leave str, dict, list, None as-is
+        return v
+    records = df.to_dict(orient="records")
+    return [{k: to_py(v) for k, v in row.items()} for row in records]
+class ChatReq(BaseModel):
+    message: str
+    history: list[dict] = []
+class SqlReq(BaseModel):
+    query: str
+    limit: int = 200
+    allow_writes: bool = False
+@app.get("/healthz")
+def healthz():
+    return {"ok": True}
+@app.post("/e2e/chat")
+async def e2e_chat(req: ChatReq):
+    text = await respond_once(req.message, req.history)
+    return {"output": text}
+@app.post("/e2e/sql")
+def e2e_sql(req: SqlReq):
+    try:
+        df, meta, elapsed = run_sql(req.query, req.limit, req.allow_writes)
+        # Take only head for safety
+        head = df.head(min(len(df), 200))
+        # Log raw DF preview (before cleaning)
+        log.error("DEBUG DF (raw):\n%s", head.to_string())
+        rows = df_json_safe(head)
+        payload = {
+            "meta": str(meta),
+            "elapsed": float(elapsed) if elapsed == elapsed and not math.isinf(elapsed) else None,
+            "n": int(len(df)),
+            "rows": rows,
+        }
+        return ORJSONResponse(payload, headers={"X-Serializer": "orjson"})
+    except Exception as e:
+        # Log script name + stack + dataframe if available
+        log.error("Exception in %s", __file__)
+        traceback.print_exc(file=sys.stderr)
+        try:
+            log.error("Last DF snapshot:\n%s", head.to_string())
+        except Exception:
+            pass
+        raise
+# Mount Gradio UI on "/"
+mounted = gr.mount_gradio_app(app, demo, path="/")
+if __name__ == "__main__":
+    # Run with multiple workers for concurrency in real tests (see section D)
+    uvicorn.run(mounted, host="0.0.0.0", port=7860)

sql_tab.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import re
+import time
+import pandas as pd
+import numpy as np
+import psycopg2.extras as extras
+from psycopg2.pool import SimpleConnectionPool
+DB_NAME = os.getenv("PGDATABASE", "mert")
+DB_USER = os.getenv("PGUSER", "mert")
+DB_PASS = os.getenv("POSTGRES_PASSWORD")
+DB_HOST = os.getenv("PGHOST", "127.0.0.1")
+DB_PORT = int(os.getenv("PGPORT", "5432"))
+POOL_MAX = int(os.getenv("PG_POOL_MAX", "10"))
+_pool: SimpleConnectionPool | None = None
+def _get_pool():
+    global _pool
+    if _pool is None:
+        _pool = SimpleConnectionPool(
+            minconn=1, maxconn=POOL_MAX,
+            database=DB_NAME, user=DB_USER, password=DB_PASS,
+            host=DB_HOST, port=DB_PORT
+        )
+    return _pool
+def _borrow_conn():
+    pool = _get_pool()
+    conn = pool.getconn()
+    conn.autocommit = True
+    try:
+        with conn.cursor() as cur:
+            cur.execute("SET statement_timeout = 10000;")  # 10s
+    except Exception:
+        pass
+    return conn
+def _return_conn(conn):
+    try:
+        _get_pool().putconn(conn)
+    except Exception:
+        try: conn.close()
+        except Exception: pass
+WRITE_FIRST_KEYWORDS = {
+    "INSERT","UPDATE","DELETE","DROP","ALTER","CREATE","TRUNCATE",
+    "VACUUM","REINDEX","GRANT","REVOKE","MERGE","CALL","DO",
+    "ATTACH","DETACH"
+}
+def is_write_query(sql: str) -> bool:
+    """
+    Returns True if the first *statement* is a write. Ignores function names like REPLACE().
+    Handles WITH ... (SELECT ...) vs WITH ... INSERT/UPDATE/DELETE/MERGE ...
+    """
+    first_stmt = re.split(r";\s*", sql.strip(), maxsplit=1)[0]
+    # If it starts with WITH, decide based on the main statement following the CTEs
+    if re.match(r"^\s*WITH\b", first_stmt, flags=re.IGNORECASE):
+        # Heuristic: if an INSERT/UPDATE/DELETE/MERGE appears after the CTE block, treat as write
+        return bool(re.search(r"\)\s*(INSERT|UPDATE|DELETE|MERGE)\b", first_stmt, flags=re.IGNORECASE))
+    # Otherwise, just check the very first keyword
+    m = re.match(r"^\s*([A-Za-z]+)", first_stmt)
+    first_kw = m.group(1).upper() if m else ""
+    return first_kw in WRITE_FIRST_KEYWORDS
+def enforce_limit(sql: str, limit: int) -> str:
+    """
+    Adds LIMIT if:
+      - query starts with SELECT or WITH
+      - and no existing LIMIT present (naive but practical)
+    """
+    first = sql.strip().strip(";")
+    if re.match(r"^(SELECT|WITH)\b", first, flags=re.IGNORECASE) and not re.search(r"\bLIMIT\b", first, flags=re.IGNORECASE):
+        return f"{first} LIMIT {int(limit)}"
+    return first
+def run_sql(query: str, max_rows: int, allow_writes: bool):
+    if not query or not query.strip():
+        return pd.DataFrame(), "Provide a SQL query.", 0.0
+    if ";" in query.strip().rstrip(";"):
+        return pd.DataFrame(), "Multiple statements detected; please run one at a time.", 0.0
+    if not allow_writes and is_write_query(query):
+        return pd.DataFrame(), "Write operations are disabled. Enable the toggle to allow writes.", 0.0
+    sql_to_run = enforce_limit(query, max_rows)
+    started = time.perf_counter()
+    conn = None
+    try:
+        conn = _borrow_conn()
+        with conn.cursor(cursor_factory=extras.RealDictCursor) as cur:
+            cur.execute("SET LOCAL statement_timeout = 10000;")
+            cur.execute(sql_to_run)
+            rows = cur.fetchall() if cur.description else []
+            df = pd.DataFrame(rows)
+    except Exception as e:
+        return pd.DataFrame(), f"Error: {e}", 0.0
+    finally:
+        if conn: _return_conn(conn)
+    elapsed = time.perf_counter() - started
+    meta = f"Rows: {len(df)} | Time: {elapsed:.3f}s"
+    df.replace([np.inf, -np.inf], pd.NA, inplace=True)
+    df = df.where(pd.notnull(df), None)
+    return df, meta, elapsed