Spaces:

manthilaffs
/

Gamunu-Inference

Sleeping

App Files Files Community

manthilaffs commited on Nov 3, 2025

Commit

4a2293a

verified ·

1 Parent(s): 8ae799f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -20

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model = None
 tokenizer = None
@@ -18,19 +19,20 @@ alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර
 {}"""
 @spaces.GPU
-def infer(instruction, input_text=""):
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
             "manthilaffs/Gamunu-4B-Instruct-Alpha",
-            dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             device_map="auto",
         )
         model.eval()
     prompt = alpaca_prompt.format(
-        "ඔබ ගැමුණු (Gamunu) නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි."
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ උපදෙස් නිවැරදිව පිලිපැදීම හා අසා ඇති ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
         instruction.strip(),
         input_text.strip(),
@@ -40,23 +42,40 @@ def infer(instruction, input_text=""):
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=256,
-            repetition_penalty=1.05,
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
         text = text.split("### ප්‍රතිචාරය:")[-1].strip()
     return text
-with gr.Blocks(css="""
-.gradio-container {max-width: 1100px !important; margin:auto;}
-h1, h2, h3, h4, h5 {text-align:center;}
-#title-bar {background:linear-gradient(90deg,#804dee,#ee82ee);color:white;padding:0.6rem;border-radius:1rem;margin-bottom:0.6rem;}
-textarea, input {font-family:'Noto Sans Sinhala',sans-serif;}
-""", theme=gr.themes.Soft()) as demo:
     gr.HTML("<div id='title-bar'><h1>🧠 Gamunu 4B Instruct Alpha</h1><h4>සිංහල Instruct LLM</h4></div>")
-    # gr.Markdown("⚙️ **Pure Transformers Inference** | 💠 ZeroGPU GPU Burst")
     with gr.Row(equal_height=True):
         with gr.Column(scale=1, min_width=350):
@@ -70,12 +89,21 @@ textarea, input {font-family:'Noto Sans Sinhala',sans-serif;}
                 placeholder="අමතර තොරතුරු ඇතුළත් කරන්න (ඇත්නම්)",
                 lines=3,
             )
             run_btn = gr.Button("🔮 Generate Response", variant="primary", scale=1)
-        with gr.Column(scale=1, min_width=350):
             output = gr.Markdown(label="🧩 Gamunu Response", elem_id="output-box")
-    # Example categories
-    with gr.Accordion("🧮 Example Prompts by Category", open=True):
         with gr.Tab("Maths"):
             gr.Examples(
                 examples=[
@@ -97,8 +125,7 @@ textarea, input {font-family:'Noto Sans Sinhala',sans-serif;}
                 examples=[
                     ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න.", ""],
                     ["ජලයේ රසායනික සූත්‍රය කුමක්ද?", ""],
-                    ["""ජලය සහ සනීපාරක්ෂාව පිළිබඳ සංකල්පය,
-i. SMART ii. PRICES iii. WASH""", ""]
                 ],
                 inputs=[instruction],
             )
@@ -111,12 +138,23 @@ i. SMART ii. PRICES iii. WASH""", ""]
                 inputs=[instruction],
             )
-    run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
     gr.Markdown("""
 ---
-🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
-© 2025 Gamunu Project | Experimental Release
 """)
 if __name__ == "__main__":

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Global model/tokenizer cache
 model = None
 tokenizer = None
 {}"""
 @spaces.GPU
+def infer(instruction, input_text="", temperature=0.5, top_p=0.95, repetition_penalty=1.05, max_new_tokens=256):
+    """Main inference function with adjustable generation parameters."""
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
             "manthilaffs/Gamunu-4B-Instruct-Alpha",
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
             device_map="auto",
         )
         model.eval()
     prompt = alpaca_prompt.format(
+        "ඔබ ගැමුණු (Gamunu) නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ උපදෙස් නිවැරදිව පිලිපැදීම හා අසා ඇති ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
         instruction.strip(),
         input_text.strip(),
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
         )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
         text = text.split("### ප්‍රතිචාරය:")[-1].strip()
     return text
+# ----------------------- UI -----------------------
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {max-width: 1080px !important; margin:auto;}
+    h1, h2, h3, h4, h5 {text-align:center;}
+    #title-bar {
+        background:linear-gradient(90deg,#764de6,#e36cee);
+        color:white;
+        padding:0.7rem;
+        border-radius:1rem;
+        margin-bottom:0.8rem;
+        box-shadow:0 2px 8px rgba(0,0,0,0.15);
+    }
+    textarea, input, .gr-text-input, .gr-textbox {
+        font-family:'Noto Sans Sinhala',sans-serif !important;
+    }
+    #status-text {text-align:center; color:#555;}
+    """,
+) as demo:
     gr.HTML("<div id='title-bar'><h1>🧠 Gamunu 4B Instruct Alpha</h1><h4>සිංහල Instruct LLM</h4></div>")
     with gr.Row(equal_height=True):
         with gr.Column(scale=1, min_width=350):
                 placeholder="අමතර තොරතුරු ඇතුළත් කරන්න (ඇත්නම්)",
                 lines=3,
             )
+            with gr.Accordion("⚙️ Advanced Options", open=False):
+                temperature = gr.Slider(0.1, 1.5, value=0.5, step=0.05, label="🌡 Temperature (0 = more focused)")
+                top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="🎯 Top-p (Nucleus Sampling)")
+                repetition_penalty = gr.Slider(0.8, 2.0, value=1.05, step=0.05, label="♻️ Repetition Penalty")
+                max_new_tokens = gr.Slider(32, 1024, value=256, step=32, label="🔢 Max New Tokens")
             run_btn = gr.Button("🔮 Generate Response", variant="primary", scale=1)
+            status = gr.Markdown("", elem_id="status-text")
+        with gr.Column(scale=1, min_width=400):
             output = gr.Markdown(label="🧩 Gamunu Response", elem_id="output-box")
+    # --- Example prompts ---
+    with gr.Accordion("🧮 Example Prompts by Category", open=False):
         with gr.Tab("Maths"):
             gr.Examples(
                 examples=[
                 examples=[
                     ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න.", ""],
                     ["ජලයේ රසායනික සූත්‍රය කුමක්ද?", ""],
+                    ["ජලය සහ සනීපාරක්ෂාව පිළිබඳ සංකල්පය SMART PRICES WASH", ""],
                 ],
                 inputs=[instruction],
             )
                 inputs=[instruction],
             )
+    # --- Loading feedback ---
+    def process_with_status(instruction, input_text, temperature, top_p, repetition_penalty, max_new_tokens):
+        yield "⏳ Generating response...", gr.update(interactive=False, value="⏳ Generating..."), ""
+        result = infer(instruction, input_text, temperature, top_p, repetition_penalty, max_new_tokens)
+        yield "", gr.update(interactive=True, value="🔮 Generate Response"), result
+    run_btn.click(
+        process_with_status,
+        inputs=[instruction, input_text, temperature, top_p, repetition_penalty, max_new_tokens],
+        outputs=[status, run_btn, output],
+        show_progress=True,
+    )
     gr.Markdown("""
 ---
+🪶 **Model:** [`manthilaffs/Gamunu-4B-Instruct-Alpha`](https://huggingface.co/manthilaffs/Gamunu-4B-Instruct-Alpha)
+© 2025 Gamunu Project | Experimental Release
 """)
 if __name__ == "__main__":