Spaces:

manthilaffs
/

Gamunu-Inference

Sleeping

App Files Files Community

manthilaffs commited on Nov 2, 2025

Commit

165e807

verified ·

1 Parent(s): 9b9ba13

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -62

app.py CHANGED Viewed

@@ -3,9 +3,6 @@ import torch
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# ----------------------------------------------------
-# Globals
-# ----------------------------------------------------
 model = None
 tokenizer = None
@@ -20,20 +17,14 @@ alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර
 ### ප්‍රතිචාරය:
 {}"""
-# ----------------------------------------------------
-# GPU inference — official ZeroGPU style
-# ----------------------------------------------------
 @spaces.GPU
 def infer(instruction, input_text=""):
     global model, tokenizer
     if model is None:
-        tokenizer = AutoTokenizer.from_pretrained(
-            "manthilaffs/Gamunu-4B-Instruct-Alpha"
-        )
         model = AutoModelForCausalLM.from_pretrained(
             "manthilaffs/Gamunu-4B-Instruct-Alpha",
-            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             device_map="auto",
         )
         model.eval()
@@ -46,72 +37,86 @@ def infer(instruction, input_text=""):
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
             max_new_tokens=256,
-            temperature=0.4,
-            top_k=64,
-            top_p=0.95,
             repetition_penalty=1.05,
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
         text = text.split("### ප්‍රතිචාරය:")[-1].strip()
     return text
-# ----------------------------------------------------
-# Gradio UI
-# ----------------------------------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        # 🧠 Gamunu 4B Instruct Alpha
-        *Sinhala Instruct LLM — ZeroGPU Demo*
-        ⚙️ Pure Transformers Inference | 💠 ZeroGPU GPU Burst
-        """
-    )
-    with gr.Row():
-        instruction = gr.Textbox(
-            label="🧾 Instruction / Question",
-            placeholder="උදා: ඊයේ රු. 30 ක ඇපල් ගෙඩියක් අද රු. 60 නම් මිල වෙනස කීයද?",
-            lines=2,
-        )
-    with gr.Row():
-        input_text = gr.Textbox(
-            label="📥 Additional Context (Optional)",
-            placeholder="අමතර තොරතුරු (ඇත්නම්) එහි සටහන් කරන්න",
-            lines=2,
-        )
-    output = gr.Markdown(label="🧩 Gamunu Response")
-    run_btn = gr.Button("🔮 Generate Response")
     run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
-    gr.Examples(
-        examples=[
-            ["ඉන්දියානු මහා සමුද්‍රය යනු කොහෙද?", ""],
-            ["කාලගුණය අවතැන් වන්නේ මොනවක් නිසාද?", ""],
-            ["මුල්ම අගමැති කවුද?", ""],
-        ],
-        inputs=[instruction],
-        label="🪄 Example Questions (Click to try)",
-    )
-    gr.Markdown(
-        """
-        ---
-        🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
-        🧰 **Stack:** Transformers + Torch + Gradio + ZeroGPU
-        © 2025 Gamunu Project | Experimental Release
-        """
-    )
 if __name__ == "__main__":
     demo.launch()

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model = None
 tokenizer = None
 ### ප්‍රතිචාරය:
 {}"""
 @spaces.GPU
 def infer(instruction, input_text=""):
     global model, tokenizer
     if model is None:
+        tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
             "manthilaffs/Gamunu-4B-Instruct-Alpha",
+            dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             device_map="auto",
         )
         model.eval()
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
             max_new_tokens=256,
             repetition_penalty=1.05,
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
         text = text.split("### ප්‍රතිචාරය:")[-1].strip()
     return text
+with gr.Blocks(css="""
+.gradio-container {max-width: 1100px !important; margin:auto;}
+h1, h2, h3, h4, h5 {text-align:center;}
+#title-bar {background:linear-gradient(90deg,#804dee,#ee82ee);color:white;padding:0.6rem;border-radius:1rem;margin-bottom:0.6rem;}
+textarea, input {font-family:'Noto Sans Sinhala',sans-serif;}
+""", theme=gr.themes.Soft()) as demo:
+    gr.HTML("<div id='title-bar'><h1>🧠 Gamunu 4B Instruct Alpha</h1><h4>Sinhala Instruct LLM — ZeroGPU Demo</h4></div>")
+    gr.Markdown("⚙️ **Pure Transformers Inference** | 💠 ZeroGPU GPU Burst")
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=1, min_width=350):
+            instruction = gr.Textbox(
+                label="🧾 Instruction / Question",
+                placeholder="උදා: ඊයේ රු. 30 ක ඇපල් ගෙඩියක් අද රු. 60 නම් මිල වෙනස කීයද?",
+                lines=4,
+            )
+            input_text = gr.Textbox(
+                label="📥 Additional Context (Optional)",
+                placeholder="අමතර තොරතුරු ඇතුළත් කරන්න (ඇත්නම්)",
+                lines=3,
+            )
+            run_btn = gr.Button("🔮 Generate Response", variant="primary", scale=1)
+        with gr.Column(scale=1, min_width=350):
+            output = gr.Markdown(label="🧩 Gamunu Response", elem_id="output-box")
+    # Example categories
+    with gr.Accordion("🧮 Example Prompts by Category", open=False):
+        with gr.Tab("Maths"):
+            gr.Examples(
+                examples=[
+                    ["රු. 30 කින් මිලදී ගත් දේ රු. 60 නම් මිල වෙනස ප්‍රතිශතයකින් කීයද?", ""],
+                    ["සියයට 10 ක වර්ධනයක් තිබේ නම් අලුත් අගය කීයද?", ""],
+                ],
+                inputs=[instruction],
+            )
+        with gr.Tab("Roleplay"):
+            gr.Examples(
+                examples=[
+                    ["ඔබ ගුරුවරයෙකු ලෙස ක්‍රියාකරන්න. ශිෂ්‍යයාට ගණිතය උගන්වන්න.", ""],
+                    ["ඔබ පරිසර විද්‍යාඥයෙකු ලෙස වායු මණ්ඩලය පැහැදිලි කරන්න.", ""],
+                ],
+                inputs=[instruction],
+            )
+        with gr.Tab("QA"):
+            gr.Examples(
+                examples=[
+                    ["ශ්‍රී ලංකාවේ මුල්ම අගමැති කවුද?", ""],
+                    ["ඉන්දියානු මහා සමුද්‍රය යනු කොහෙද?", ""],
+                ],
+                inputs=[instruction],
+            )
+        with gr.Tab("NLP"):
+            gr.Examples(
+                examples=[
+                    ["මෙම වාක්‍යය සිංහලයට පරිවර්තනය කරන්න: 'The sun rises in the east.'", ""],
+                    ["වචන 'ආදරය' සඳහා පරිවර්තන 3ක් දෙන්න.", ""],
+                ],
+                inputs=[instruction],
+            )
     run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
+    gr.Markdown("""
+---
+🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
+🧰 **Stack:** Transformers + Torch + Gradio + ZeroGPU
+© 2025 Gamunu Project | Experimental Release
+""")
 if __name__ == "__main__":
     demo.launch()