Spaces:

johneze
/

chichewa-text2sql

Sleeping

App Files Files Community

johneze commited on 16 days ago

Commit

c1bc8ec

verified ·

1 Parent(s): 8cf79dd

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +77 -53

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import json
 import re
 import sqlite3
 import difflib
 from pathlib import Path
 import spaces
@@ -95,7 +96,7 @@ def run_query(sql: str):
         conn.close()
-# ── Model (pre-download weights at startup, load into GPU on first call) ───
 print("Downloading model weights to cache ...")
 _model_cache = snapshot_download(repo_id=MODEL_ID)
 print(f"Model cached at: {_model_cache}")
@@ -107,59 +108,82 @@ _pipe = None
 # ── Main function ──────────────────────────────────────────────────────────
 @spaces.GPU(duration=300)
 def generate_sql(question: str, language: str = "ny"):
-    """Returns (sql, match_info_markdown, results_dataframe)."""
-    global _pipe
-    if _pipe is None:
-        model = AutoModelForCausalLM.from_pretrained(
-            _model_cache,
-            dtype=torch.bfloat16,
-            device_map="auto",
         )
-        _pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
-    lang_name = "Chichewa" if language == "ny" else "English"
-    messages = [
-        {
-            "role": "system",
-            "content": (
-                "You are an expert Text-to-SQL model for a SQLite database "
-                "with tables: production, population, food_insecurity, "
-                "commodity_prices, mse_daily. "
-                "Generate ONE valid SQL SELECT query. Return ONLY the SQL, no explanation."
-            ),
-        },
-        {"role": "user", "content": f"Language: {lang_name}\nQuestion: {question}"},
-    ]
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    out = _pipe(prompt, max_new_tokens=128, do_sample=False,
-                pad_token_id=tokenizer.eos_token_id)[0]["generated_text"]
-    generated = out[len(prompt):] if out.startswith(prompt) else out
-    sql = extract_sql(generated)
-    # Dataset match
-    example, score, mode = find_match(question, language)
-    if example:
-        match_info = (
-            f"**Match:** {mode} (score: {score})\n\n"
-            f"**ny:** {example.get('question_ny', '')}\n\n"
-            f"**en:** {example.get('question_en', '')}\n\n"
-            f"**Dataset SQL:** `{example.get('sql_statement', '')}`\n\n"
-            f"**Table:** {example.get('table', '')} | "
-            f"**Difficulty:** {example.get('difficulty_level', '')}"
         )
-    else:
-        match_info = "_No close match found in the dataset._"
-    # Execute SQL
-    df, err = run_query(sql)
-    if err:
-        results = pd.DataFrame([{"error": err}])
-    elif df is not None and not df.empty:
-        results = df
-    else:
-        results = pd.DataFrame([{"info": "Query returned no rows."}])
-    return sql, match_info, results
 # ── Gradio UI ──────────────────────────────────────────────────────────────
@@ -180,7 +204,7 @@ with gr.Blocks(title="Chichewa Text-to-SQL") as demo:
     submit_btn = gr.Button("Generate SQL & Run", variant="primary")
-    sql_output    = gr.Code(label="Generated SQL", language="sql")
     match_output  = gr.Markdown()
     result_output = gr.Dataframe(label="Query Results", wrap=True)

 import re
 import sqlite3
 import difflib
+import traceback
 from pathlib import Path
 import spaces
         conn.close()
+# ── Model (pre-download at startup, load into GPU lazily on first call) ────
 print("Downloading model weights to cache ...")
 _model_cache = snapshot_download(repo_id=MODEL_ID)
 print(f"Model cached at: {_model_cache}")
 # ── Main function ──────────────────────────────────────────────────────────
 @spaces.GPU(duration=300)
 def generate_sql(question: str, language: str = "ny"):
+    """Returns (sql_str, match_info_markdown, results_dataframe)."""
+    # Always return 3 values even on error so Gradio never shows generic "Error"
+    empty_df = pd.DataFrame()
+    try:
+        global _pipe
+        if _pipe is None:
+            model = AutoModelForCausalLM.from_pretrained(
+                _model_cache,
+                torch_dtype=torch.bfloat16,
+                device_map="cuda",
+            )
+            _pipe = pipeline(
+                "text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                device_map="cuda",
+            )
+        lang_name = "Chichewa" if language == "ny" else "English"
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an expert Text-to-SQL model for a SQLite database "
+                    "with tables: production, population, food_insecurity, "
+                    "commodity_prices, mse_daily. "
+                    "Generate ONE valid SQL SELECT query. Return ONLY the SQL, no explanation."
+                ),
+            },
+            {"role": "user", "content": f"Language: {lang_name}\nQuestion: {question}"},
+        ]
+        prompt = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
         )
+        out = _pipe(
+            prompt,
+            max_new_tokens=128,
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+        )[0]["generated_text"]
+        generated = out[len(prompt):] if out.startswith(prompt) else out
+        sql = extract_sql(generated)
+        # Dataset match
+        example, score, mode = find_match(question, language)
+        if example:
+            match_info = (
+                f"**Match:** {mode} (score: {score})\n\n"
+                f"**ny:** {example.get('question_ny', '')}\n\n"
+                f"**en:** {example.get('question_en', '')}\n\n"
+                f"**Dataset SQL:** `{example.get('sql_statement', '')}`\n\n"
+                f"**Table:** {example.get('table', '')} | "
+                f"**Difficulty:** {example.get('difficulty_level', '')}"
+            )
+        else:
+            match_info = "_No close match found in the dataset._"
+        # Execute SQL
+        df, err = run_query(sql)
+        if err:
+            results = pd.DataFrame([{"error": err}])
+        elif df is not None and not df.empty:
+            results = df
+        else:
+            results = pd.DataFrame([{"info": "Query returned no rows."}])
+        return sql, match_info, results
+    except Exception:
+        err_msg = traceback.format_exc()
+        print(err_msg)
+        return (
+            f"-- ERROR --\n{err_msg}",
+            f"**Error during generation:**\n```\n{err_msg}\n```",
+            pd.DataFrame([{"error": err_msg}]),
         )
 # ── Gradio UI ──────────────────────────────────────────────────────────────
     submit_btn = gr.Button("Generate SQL & Run", variant="primary")
+    sql_output    = gr.Textbox(label="Generated SQL", lines=3)
     match_output  = gr.Markdown()
     result_output = gr.Dataframe(label="Query Results", wrap=True)