Spaces:

manthilaffs
/

Gamunu-Inference

Sleeping

App Files Files Community

manthilaffs commited on Nov 3, 2025

Commit

3ab3391

verified ·

1 Parent(s): 6420ef6

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -32

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Global model/tokenizer cache
 model = None
 tokenizer = None
@@ -19,8 +18,8 @@ alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර
 {}"""
 @spaces.GPU
-def infer(instruction, input_text="", temperature=0.5, top_p=0.95, repetition_penalty=1.05, max_new_tokens=256):
-    """Main inference function with adjustable generation parameters."""
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
@@ -40,13 +39,7 @@ def infer(instruction, input_text="", temperature=0.5, top_p=0.95, repetition_pe
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.inference_mode():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            repetition_penalty=repetition_penalty,
-        )
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
@@ -58,8 +51,8 @@ def infer(instruction, input_text="", temperature=0.5, top_p=0.95, repetition_pe
 with gr.Blocks(
     theme=gr.themes.Soft(),
     css="""
-    .gradio-container {max-width: 1080px !important; margin:auto;}
-    h1, h2, h3, h4, h5 {text-align:center;}
     #title-bar {
         background:linear-gradient(90deg,#764de6,#e36cee);
         color:white;
@@ -68,17 +61,17 @@ with gr.Blocks(
         margin-bottom:0.8rem;
         box-shadow:0 2px 8px rgba(0,0,0,0.15);
     }
-    textarea, input, .gr-text-input, .gr-textbox {
         font-family:'Noto Sans Sinhala',sans-serif !important;
     }
-    #status-text {text-align:center; color:#555;}
     """,
 ) as demo:
     gr.HTML("<div id='title-bar'><h1>🧠 Gamunu 4B Instruct Alpha</h1><h4>සිංහල Instruct LLM</h4></div>")
     with gr.Row(equal_height=True):
-        with gr.Column(scale=1, min_width=350):
             instruction = gr.Textbox(
                 label="🧾 Instruction / Question",
                 placeholder="උදා: හායි! මම සමන්. ඔයාට කොහොමද?",
@@ -90,16 +83,15 @@ with gr.Blocks(
                 lines=3,
             )
-            with gr.Accordion("⚙️ Advanced Options", open=False):
-                # temperature = gr.Slider(0.1, 1.5, value=0.5, step=0.05, label="🌡 Temperature (0 = more focused)")
-                # top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.01, label="🎯 Top-p (Nucleus Sampling)")
-                # repetition_penalty = gr.Slider(0.8, 2.0, value=1.05, step=0.05, label="♻️ Repetition Penalty")
-                max_new_tokens = gr.Slider(32, 1024, value=256, step=32, label="Max New Tokens")
-            run_btn = gr.Button("Generate Response", variant="primary", scale=1)
-        with gr.Column(scale=1, min_width=400):
             output = gr.Markdown(label="🧩 Gamunu Response", elem_id="output-box")
     # --- Example prompts ---
@@ -115,7 +107,8 @@ with gr.Blocks(
         with gr.Tab("Roleplay"):
             gr.Examples(
                 examples=[
-                    ["ඔබ ගුරුවරයෙකු ලෙස ක්‍රියාකරන්න. ශිෂ්‍යයාට ගණිතය උගන්වන්න.", "රු. 30 කින් මිලදී ගත් දේ රු. 60 නම් මිල වෙනස ප්‍රතිශතයකින් කීයද?"],
                     ["ඔබ පරිසර විද්‍යාඥයෙකු ලෙස වායු මණ්ඩලය පැහැදිලි කරන්න.", ""],
                 ],
                 inputs=[instruction],
@@ -125,7 +118,6 @@ with gr.Blocks(
                 examples=[
                     ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න.", ""],
                     ["ජලයේ රසායනික සූත්‍රය කුමක්ද?", ""],
-                    ["ජලය සහ සනීපාරක්ෂාව පිළිබඳ සංකල්පය SMART PRICES WASH", ""],
                 ],
                 inputs=[instruction],
             )
@@ -138,16 +130,16 @@ with gr.Blocks(
                 inputs=[instruction],
             )
-    # --- Loading feedback ---
-    def process_with_status(instruction, input_text, temperature, top_p, repetition_penalty, max_new_tokens):
-        yield "⏳ Generating response...", gr.update(interactive=False, value="⏳ Generating..."), ""
-        result = infer(instruction, input_text, temperature, top_p, repetition_penalty, max_new_tokens)
-        yield "", gr.update(interactive=True, value="🔮 Generate Response"), result
     run_btn.click(
         process_with_status,
-        inputs=[instruction, input_text, max_new_tokens],
-        outputs=[status, run_btn, output],
         show_progress=True,
     )

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model = None
 tokenizer = None
 {}"""
 @spaces.GPU
+def infer(instruction, input_text="", max_new_tokens=512):
+    """Pure Transformers generation — lets model defaults decide behavior."""
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.inference_mode():
+        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "### ප්‍රතිචාරය:" in text:
 with gr.Blocks(
     theme=gr.themes.Soft(),
     css="""
+    .gradio-container {max-width:1080px !important; margin:auto;}
+    h1,h2,h3,h4,h5 {text-align:center;}
     #title-bar {
         background:linear-gradient(90deg,#764de6,#e36cee);
         color:white;
         margin-bottom:0.8rem;
         box-shadow:0 2px 8px rgba(0,0,0,0.15);
     }
+    textarea,input,.gr-text-input,.gr-textbox {
         font-family:'Noto Sans Sinhala',sans-serif !important;
     }
+    #status-text {text-align:center;color:#555;}
     """,
 ) as demo:
     gr.HTML("<div id='title-bar'><h1>🧠 Gamunu 4B Instruct Alpha</h1><h4>සිංහල Instruct LLM</h4></div>")
     with gr.Row(equal_height=True):
+        with gr.Column(scale=1,min_width=350):
             instruction = gr.Textbox(
                 label="🧾 Instruction / Question",
                 placeholder="උදා: හායි! මම සමන්. ඔයාට කොහොමද?",
                 lines=3,
             )
+            with gr.Accordion("⚙️ Advanced Option", open=False):
+                max_new_tokens = gr.Slider(
+                    64, 1024, value=512, step=32, label="🔢 Max New Tokens"
+                )
+            run_btn = gr.Button("🔮 Generate Response", variant="primary")
+            status = gr.Markdown("", elem_id="status-text")
+        with gr.Column(scale=1,min_width=400):
             output = gr.Markdown(label="🧩 Gamunu Response", elem_id="output-box")
     # --- Example prompts ---
         with gr.Tab("Roleplay"):
             gr.Examples(
                 examples=[
+                    ["ඔබ ගුරුවරයෙකු ලෙස ක්‍රියාකරන්න. ශිෂ්‍යයාට ගණිතය උගන්වන්න.",
+                     "රු. 30 කින් මිලදී ගත් දේ රු. 60 නම් මිල වෙනස ප්‍රතිශතයකින් කීයද?"],
                     ["ඔබ පරිසර විද්‍යාඥයෙකු ලෙස වායු මණ්ඩලය පැහැදිලි කරන්න.", ""],
                 ],
                 inputs=[instruction],
                 examples=[
                     ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න.", ""],
                     ["ජලයේ රසායනික සූත්‍රය කුමක්ද?", ""],
                 ],
                 inputs=[instruction],
             )
                 inputs=[instruction],
             )
+    # --- Button feedback ---
+    def process_with_status(instruction,input_text,max_new_tokens):
+        yield "⏳ Generating response...", gr.update(interactive=False,value="⏳ Generating..."), ""
+        result = infer(instruction,input_text,max_new_tokens)
+        yield "", gr.update(interactive=True,value="🔮 Generate Response"), result
     run_btn.click(
         process_with_status,
+        inputs=[instruction,input_text,max_new_tokens],
+        outputs=[status,run_btn,output],
         show_progress=True,
     )