Spaces:

manthilaffs
/

Gamunu-Inference

Running on Zero

App Files Files Community

manthilaffs commited on Nov 2, 2025

Commit

6d3b3e1

verified ·

1 Parent(s): 9a3408a

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import gradio as gr
+import torch
+import spaces
+from unsloth import FastLanguageModel
+from transformers import TextStreamer
+# ----------------------------------------------------
+#  Lazy model loading (GPU-accelerated on demand)
+# ----------------------------------------------------
+model = None
+tokenizer = None
+# Sinhala Alpaca-style prompt
+alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර්යයක් පිළිබඳ විස්තර කරන උපදෙසක් සහ එයට අදාළ තොරතුරු ඇතුළත් ආදානයකි. ඉල්ලූ කාර්යය නිවැරදිව සම්පූර්ණ කළ හැකි ප්‍රතිචාරයක් සපයන්න.
+### උපදෙස:
+{}
+### ආදානය:
+{}
+### ප්‍රතිචාරය:
+{}"""
+# ----------------------------------------------------
+#  GPU inference function — executed on ZeroGPU
+# ----------------------------------------------------
+@spaces.GPU
+def infer(instruction, input_text=""):
+    global model, tokenizer
+    if model is None:
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            "manthilaffs/Gamunu-4B-Instruct-Alpha"
+        )
+        FastLanguageModel.for_inference(model)
+    prompt = alpaca_prompt.format(
+        "ඔබ ගැමුණු නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
+        "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
+        instruction.strip(),
+        input_text.strip(),
+    )
+    inputs = tokenizer(text=prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.4,
+            top_k=64,
+            top_p=0.95,
+            min_p=0.75,
+        )
+    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    if "### ප්‍රතිචාරය:" in text:
+        text = text.split("### ප්‍රතිචාරය:")[-1].strip()
+    return text
+# ----------------------------------------------------
+#  Gradio Interface
+# ----------------------------------------------------
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🧠 Gamunu 4B Instruct Alpha
+        *Sinhala Instruct LLM — ZeroGPU Demo*
+        ⚙️ Runs via **Unsloth FastLanguageModel**
+        💠 Accelerated temporarily with `@spaces.GPU`
+        """
+    )
+    with gr.Row():
+        instruction = gr.Textbox(
+            label="🧾 Instruction / Question",
+            placeholder="උදා: ඊයේ ඇපල් මිල 30ක් නම් අද 60ක් නම් ප්‍රතිශත වෙනස කීයද?",
+            lines=2,
+        )
+    with gr.Row():
+        input_text = gr.Textbox(
+            label="📥 Additional Context (Optional)",
+            placeholder="අමතර තොරතුරු ඇතුළත් කරන්න",
+            lines=2,
+        )
+    output = gr.Markdown(label="🧩 Gamunu Response")
+    run_btn = gr.Button("🔮 Generate Response")
+    run_btn.click(infer, inputs=[instruction, input_text], outputs=output)
+    gr.Markdown(
+        """
+        ---
+        🪶 **Model:** `manthilaffs/Gamunu-4B-Instruct-Alpha`
+        🧰 **Built with:** Unsloth + Gradio + ZeroGPU
+        © 2025 Gamunu Project | Experimental Release
+        """
+    )
+if __name__ == "__main__":
+    demo.launch()