johneze commited on
Commit
721fdff
Β·
verified Β·
1 Parent(s): 6475c3b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +34 -12
  2. app.py +115 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
- ---
2
- title: Chichewa Text2sql
3
- emoji: πŸ”₯
4
- colorFrom: blue
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 6.6.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chichewa Text2SQL
3
+ emoji: 🌍
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: "4.44.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ hardware: zero-a10g
11
+ license: mit
12
+ ---
13
+
14
+ # Chichewa Text-to-SQL
15
+
16
+ Query databases using natural language in **Chichewa** or English.
17
+
18
+ Model: [johneze/Llama-3.1-8B-Instruct-chichewa-text2sql](https://huggingface.co/johneze/Llama-3.1-8B-Instruct-chichewa-text2sql)
19
+
20
+ ## API Usage
21
+
22
+ You can call this Space programmatically via `gradio_client`:
23
+
24
+ ```python
25
+ from gradio_client import Client
26
+
27
+ client = Client("johneze/chichewa-text2sql")
28
+ result = client.predict(
29
+ question="Ndi boma liti komwe anakolola chimanga chambiri?",
30
+ language="ny",
31
+ api_name="/generate_sql"
32
+ )
33
+ print(result)
34
+ ```
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Chichewa Text-to-SQL β€” HuggingFace Space
3
+ Loads johneze/Llama-3.1-8B-Instruct-chichewa-text2sql and exposes a
4
+ Gradio API endpoint that the Streamlit app (or anyone) can call.
5
+ Uses ZeroGPU for free GPU access on HF Spaces.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ import spaces
11
+ import gradio as gr
12
+ import torch
13
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
+
15
+ MODEL_ID = "johneze/Llama-3.1-8B-Instruct-chichewa-text2sql"
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ MODEL_ID,
20
+ torch_dtype=torch.bfloat16,
21
+ device_map="auto",
22
+ )
23
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
24
+
25
+
26
+ def extract_sql(text: str) -> str:
27
+ match = re.search(r"(?is)select\s.+", text)
28
+ if not match:
29
+ return text.strip()
30
+ sql = match.group(0)
31
+ for sep in [";", "\n"]:
32
+ if sep in sql:
33
+ sql = sql.split(sep)[0]
34
+ return sql.strip() + ";"
35
+
36
+
37
+ @spaces.GPU
38
+ def generate_sql(question: str, language: str = "ny") -> str:
39
+ """
40
+ Generate SQL from a Chichewa or English question.
41
+ language: 'ny' for Chichewa, 'en' for English.
42
+ Returns a SQL SELECT statement.
43
+ """
44
+ lang_name = "Chichewa" if language == "ny" else "English"
45
+
46
+ messages = [
47
+ {
48
+ "role": "system",
49
+ "content": (
50
+ "You are an expert Text-to-SQL model for a SQLite database "
51
+ "with the following tables: production, population, food_insecurity, "
52
+ "commodity_prices, mse_daily. "
53
+ "Given a natural language question, generate ONE valid SQL SELECT query. "
54
+ "Return ONLY the SQL query, no explanation."
55
+ ),
56
+ },
57
+ {
58
+ "role": "user",
59
+ "content": f"Language: {lang_name}\nQuestion: {question}",
60
+ },
61
+ ]
62
+
63
+ prompt = tokenizer.apply_chat_template(
64
+ messages, tokenize=False, add_generation_prompt=True
65
+ )
66
+
67
+ out = pipe(
68
+ prompt,
69
+ max_new_tokens=128,
70
+ do_sample=False,
71
+ pad_token_id=tokenizer.eos_token_id,
72
+ )[0]["generated_text"]
73
+
74
+ generated = out[len(prompt):] if out.startswith(prompt) else out
75
+ return extract_sql(generated)
76
+
77
+
78
+ # ── Gradio UI ──────────────────────────────────────────────────────────────
79
+ with gr.Blocks(title="Chichewa Text-to-SQL") as demo:
80
+ gr.Markdown("# 🌍 Chichewa Text-to-SQL\nEnter a question in Chichewa or English to generate SQL.")
81
+
82
+ with gr.Row():
83
+ question_box = gr.Textbox(
84
+ label="Question",
85
+ placeholder="Ndi boma liti komwe anakolola chimanga chambiri?",
86
+ lines=3,
87
+ )
88
+ language_box = gr.Radio(
89
+ ["ny", "en"],
90
+ value="ny",
91
+ label="Language",
92
+ )
93
+
94
+ submit_btn = gr.Button("Generate SQL", variant="primary")
95
+ sql_output = gr.Code(label="Generated SQL", language="sql")
96
+
97
+ submit_btn.click(
98
+ fn=generate_sql,
99
+ inputs=[question_box, language_box],
100
+ outputs=sql_output,
101
+ )
102
+
103
+ gr.Examples(
104
+ examples=[
105
+ ["Ndi boma liti komwe anakolola chimanga chambiri?", "ny"],
106
+ ["Which district produced the most Maize?", "en"],
107
+ ["Ndi anthu angati ku Lilongwe?", "ny"],
108
+ ["What is the food insecurity level in Nsanje?", "en"],
109
+ ],
110
+ inputs=[question_box, language_box],
111
+ )
112
+
113
+
114
+ if __name__ == "__main__":
115
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ transformers>=4.44.0
3
+ torch>=2.1.0
4
+ accelerate>=0.34.0
5
+ safetensors>=0.4.0
6
+ spaces>=0.19.0