Spaces:

johneze
/

chichewa-text2sql

Sleeping

App Files Files Community

johneze commited on Feb 19

Commit

107f585

verified ·

1 Parent(s): 09b455a

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +107 -6
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -14,13 +14,114 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 MODEL_ID = "johneze/Llama-3.1-8B-Instruct-chichewa-text2sql"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-)
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def extract_sql(text: str) -> str:

 MODEL_ID = "johneze/Llama-3.1-8B-Instruct-chichewa-text2sql"
+# Tokenizer is tiny — safe to load at startup without a GPU
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+# Model is loaded lazily on the FIRST call to generate_sql, where the GPU
+# context (@spaces.GPU) is already active and CUDA is available.
+_pipe = None
+def extract_sql(text: str) -> str:
+    match = re.search(r"(?is)select\s.+", text)
+    if not match:
+        return text.strip()
+    sql = match.group(0)
+    for sep in [";", "\n"]:
+        if sep in sql:
+            sql = sql.split(sep)[0]
+    return sql.strip() + ";"
+@spaces.GPU(duration=120)
+def generate_sql(question: str, language: str = "ny") -> str:
+    """
+    Generate SQL from a Chichewa or English question.
+    language: 'ny' for Chichewa, 'en' for English.
+    Returns a SQL SELECT statement.
+    """
+    global _pipe
+    if _pipe is None:
+        # First call: GPU is now available — load the 4-bit quantized model
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            dtype=torch.bfloat16,
+            device_map="auto",
+        )
+        _pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+    lang_name = "Chichewa" if language == "ny" else "English"
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are an expert Text-to-SQL model for a SQLite database "
+                "with the following tables: production, population, food_insecurity, "
+                "commodity_prices, mse_daily. "
+                "Given a natural language question, generate ONE valid SQL SELECT query. "
+                "Return ONLY the SQL query, no explanation."
+            ),
+        },
+        {
+            "role": "user",
+            "content": f"Language: {lang_name}\nQuestion: {question}",
+        },
+    ]
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    out = _pipe(
+        prompt,
+        max_new_tokens=128,
+        do_sample=False,
+        pad_token_id=tokenizer.eos_token_id,
+    )[0]["generated_text"]
+    generated = out[len(prompt):] if out.startswith(prompt) else out
+    return extract_sql(generated)
+# ── Gradio UI ──────────────────────────────────────────────────────────────
+with gr.Blocks(title="Chichewa Text-to-SQL") as demo:
+    gr.Markdown("# Chichewa Text-to-SQL\nEnter a question in Chichewa or English to generate SQL.")
+    with gr.Row():
+        question_box = gr.Textbox(
+            label="Question",
+            placeholder="Ndi boma liti komwe anakolola chimanga chambiri?",
+            lines=3,
+        )
+        language_box = gr.Radio(
+            ["ny", "en"],
+            value="ny",
+            label="Language",
+        )
+    submit_btn = gr.Button("Generate SQL", variant="primary")
+    sql_output = gr.Code(label="Generated SQL", language="sql")
+    submit_btn.click(
+        fn=generate_sql,
+        inputs=[question_box, language_box],
+        outputs=sql_output,
+    )
+    gr.Examples(
+        examples=[
+            ["Ndi boma liti komwe anakolola chimanga chambiri?", "ny"],
+            ["Which district produced the most Maize?", "en"],
+            ["Ndi anthu angati ku Lilongwe?", "ny"],
+            ["What is the food insecurity level in Nsanje?", "en"],
+        ],
+        inputs=[question_box, language_box],
+    )
+if __name__ == "__main__":
+    demo.launch()
 def extract_sql(text: str) -> str:

requirements.txt CHANGED Viewed

@@ -4,3 +4,4 @@ torch>=2.4.0
 accelerate>=0.34.0
 safetensors>=0.4.0
 spaces>=0.30.0

 accelerate>=0.34.0
 safetensors>=0.4.0
 spaces>=0.30.0
+bitsandbytes>=0.46.1