Spaces:

manthilaffs
/

Gamunu-Inference

Sleeping

App Files Files Community

manthilaffs commited on Nov 2, 2025

Commit

726fa01

verified ·

1 Parent(s): bd0068f

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -11

app.py CHANGED Viewed

@@ -4,12 +4,12 @@ import spaces
 from transformers import AutoTokenizer
 # ----------------------------------------------------
-# Global placeholders (lazy-load later)
 # ----------------------------------------------------
 model = None
 tokenizer = None
-# Sinhala Alpaca-style prompt
 alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර්යයක් පිළිබඳ විස්තර කරන උපදෙසක් සහ එයට අදාළ තොරතුරු ඇතුළත් ආදානයකි. ඉල්ලූ කාර්යය නිවැරදිව සම්පූර්ණ කළ හැකි ප්‍රතිචාරයක් සපයන්න.
 ### උපදෙස:
@@ -22,13 +22,14 @@ alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර
 {}"""
 # ----------------------------------------------------
-#  GPU inference function — executed on ZeroGPU
 # ----------------------------------------------------
 @spaces.GPU
 def infer(instruction, input_text=""):
     global model, tokenizer
-    # ✅ Import Unsloth *only after GPU is available*
     from unsloth import FastLanguageModel
     if model is None:
@@ -37,6 +38,7 @@ def infer(instruction, input_text=""):
         )
         FastLanguageModel.for_inference(model)
     prompt = alpaca_prompt.format(
         "ඔබ ගැමුණු නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
@@ -46,6 +48,11 @@ def infer(instruction, input_text=""):
     inputs = tokenizer(text=prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
@@ -64,7 +71,7 @@ def infer(instruction, input_text=""):
 # ----------------------------------------------------
-#  Gradio Interface
 # ----------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
@@ -72,21 +79,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         # 🧠 Gamunu 4B Instruct Alpha
         *Sinhala Instruct LLM — ZeroGPU Demo*
-        ⚙️ Runs via **Unsloth FastLanguageModel**
-        💠 Accelerated temporarily with `@spaces.GPU`
         """
     )
     with gr.Row():
         instruction = gr.Textbox(
             label="🧾 Instruction / Question",
-            placeholder="උදා: ඊයේ ඇපල් මිල 30ක් නම් අද 60ක් නම් ප්‍රතිශත වෙනස කීයද?",
             lines=2,
         )
     with gr.Row():
         input_text = gr.Textbox(
             label="📥 Additional Context (Optional)",
-            placeholder="අමතර තොරතුරු ඇතුළත් කරන්න",
             lines=2,
         )
@@ -95,12 +103,23 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     run_btn = gr.Button("🔮 Generate Response")
     run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
     gr.Markdown(
         """
         ---
         🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
-        🧰 **Built with:** Unsloth + Gradio + ZeroGPU
-        © 2025 Gamunu Project | Experimental Release
         """
     )

 from transformers import AutoTokenizer
 # ----------------------------------------------------
+# Globals (lazy-loaded later)
 # ----------------------------------------------------
 model = None
 tokenizer = None
+# Sinhala Alpaca-style prompt template
 alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර්යයක් පිළිබඳ විස්තර කරන උපදෙසක් සහ එයට අදාළ තොරතුරු ඇතුළත් ආදානයකි. ඉල්ලූ කාර්යය නිවැරදිව සම්පූර්ණ කළ හැකි ප්‍රතිචාරයක් සපයන්න.
 ### උපදෙස:
 {}"""
 # ----------------------------------------------------
+# GPU inference — executed only when ZeroGPU allocates GPU
 # ----------------------------------------------------
 @spaces.GPU
 def infer(instruction, input_text=""):
+    """Run Gamunu inference on GPU (ZeroGPU burst)."""
     global model, tokenizer
+    # ✅ Lazy import Unsloth *after* GPU is available
     from unsloth import FastLanguageModel
     if model is None:
         )
         FastLanguageModel.for_inference(model)
+    # Build Alpaca-style prompt
     prompt = alpaca_prompt.format(
         "ඔබ ගැමුණු නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
     inputs = tokenizer(text=prompt, return_tensors="pt").to(model.device)
+    # ✅ Disable TorchDynamo (fix Gemma3 compile bug on Torch 2.8)
+    import torch._dynamo
+    torch._dynamo.config.suppress_errors = True
+    torch._dynamo.disable()
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
 # ----------------------------------------------------
+# Gradio Interface — ZeroGPU ready
 # ----------------------------------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         # 🧠 Gamunu 4B Instruct Alpha
         *Sinhala Instruct LLM — ZeroGPU Demo*
+        ⚙️ Built with Unsloth FastLanguageModel
+        💠 Temporary GPU acceleration via `@spaces.GPU`
         """
     )
     with gr.Row():
         instruction = gr.Textbox(
             label="🧾 Instruction / Question",
+            placeholder="උදා: ඊයේ රු. 30 ක ඇපල් ගෙඩියක් අද රු. 60 නම් මිල වෙනස කීයද?",
             lines=2,
         )
     with gr.Row():
         input_text = gr.Textbox(
             label="📥 Additional Context (Optional)",
+            placeholder="අමතර තොරතුරු ඇතුළත් කරන්න (උදා: කාල පරිච්ඡේදය හෝ තත්ත්වය)",
             lines=2,
         )
     run_btn = gr.Button("🔮 Generate Response")
     run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
+    # 🪄 Example questions for visitors
+    gr.Examples(
+        examples=[
+            ["ඉන්දියානු මහා සමුද්‍රය යනු කොහෙද?", ""],
+            ["කාලගුණය අවතැන් වන්නේ මොනවක් නිසාද?", ""],
+            ["මුල්ම අගමැති කවුද?", ""],
+        ],
+        inputs=[instruction],
+        label="🪄 Example Questions (Click to try)",
+    )
     gr.Markdown(
         """
         ---
         🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
+        🧰 **Stack:** Unsloth + Transformers + Gradio + ZeroGPU
+        © 2025 Gamunu Project | Experimental Research Release
         """
     )