Spaces:

manthilaffs
/

Gamunu-Inference

Sleeping

App Files Files Community

manthilaffs commited on Nov 4, 2025

Commit

d8ac1c6

verified ·

1 Parent(s): 8a9554d

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -20

app.py CHANGED Viewed

@@ -1,25 +1,24 @@
 import gradio as gr
 import torch
 import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer
 model = None
 tokenizer = None
 alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර්යයක් පිළිබඳ විස්තර කරන උපදෙසක් සහ එයට අදාළ තොරතුරු ඇතුළත් ආදානයකි. ඉල්ලූ කාර්යය නිවැරදිව සම්පූර්ණ කළ හැකි ප්‍රතිචාරයක් සපයන්න.
 ### උපදෙස:
 {}
 ### ආදානය:
 {}
 ### ප්‍රතිචාරය:
 {}"""
 @spaces.GPU
 def infer(message, history, enable_history=False, max_new_tokens=512):
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
@@ -28,7 +27,7 @@ def infer(message, history, enable_history=False, max_new_tokens=512):
             device_map="auto",
         )
         model.eval()
     # Add history only if enabled
     if enable_history and history:
         prev = "\n".join(
@@ -37,29 +36,149 @@ def infer(message, history, enable_history=False, max_new_tokens=512):
         context = f"{prev}\n\n{message}"
     else:
         context = message
     prompt = alpaca_prompt.format(
         "ඔබ ගැමුණු (Gamunu) නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ උපදෙස් නිවැරදිව පිලිපැදීම හා අසා ඇති ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
         context.strip(),
         "",
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    with torch.inference_mode():
-        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if "### ප්‍රතිචාරය:" in text:
-        text = text.split("### ප්‍රතිචාරය:")[-1].strip()
-    return text
-# ---------------- UI ----------------
-with gr.Blocks() as demo:
-    # gr.Markdown("## 🧠 Gamunu 4B Instruct Alpha — Sinhala Instruct LLM")
-    # Use only stable arguments (compatible everywhere)
     chat = gr.ChatInterface(
         fn=lambda message, history: infer(message, history, enable_history.value, max_new_tokens.value),
         title="🧠 Gamunu 4B Instruct — සිංහල LLM",
@@ -71,15 +190,16 @@ with gr.Blocks() as demo:
             ["ඔබ කවියෙකු ලෙස 'ගඟක්' ගැන කෙටි කවියක් ලියන්න."],
         ]
     )
     with gr.Accordion("⚙️ Advanced Settings", open=False):
         enable_history = gr.Checkbox(label="Enable chat history", value=False)
         max_new_tokens = gr.Slider(64, 1024, value=512, step=32, label="🔢 Max New Tokens")
     gr.Markdown("""
 ---
-🪶 **Model:** [`manthilaffs/Gamunu-4B-Instruct-Alpha`](https://huggingface.co/manthilaffs/Gamunu-4B-Instruct-Alpha)
 © 2025 Gamunu Project | Experimental Release
 """)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from threading import Thread
 model = None
 tokenizer = None
 alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර්යයක් පිළිබඳ විස්තර කරන උපදෙසක් සහ එයට අදාළ තොරතුරු ඇතුළත් ආදානයකි. ඉල්ලූ කාර්යය නිවැරදිව සම්පූර්ණ කළ හැකි ප්‍රතිචාරයක් සපයන්න.
 ### උපදෙස:
 {}
 ### ආදානය:
 {}
 ### ප්‍රතිචාරය:
 {}"""
 @spaces.GPU
 def infer(message, history, enable_history=False, max_new_tokens=512):
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
             device_map="auto",
         )
         model.eval()
     # Add history only if enabled
     if enable_history and history:
         prev = "\n".join(
         context = f"{prev}\n\n{message}"
     else:
         context = message
     prompt = alpaca_prompt.format(
         "ඔබ ගැමුණු (Gamunu) නම් AI සහායකයායි. ඔබව නිර්මාණය කර ඇත්තේ මන්තිල විසිනි. "
         "ඔබේ කාර්යය වන්නේ පරිශීලකයන්ගේ උපදෙස් නිවැරදිව පිලිපැදීම හා අසා ඇති ප්‍රශ්නවලට නිවැරදිව පිළිතුරු සපයමින් ඔවුන්ට සහය වීමයි.",
         context.strip(),
         "",
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Setup streaming
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_kwargs = dict(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        streamer=streamer,
+    )
+    # Start generation in a separate thread
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Stream the output
+    partial_text = ""
+    response_started = False
+    for new_text in streamer:
+        partial_text += new_text
+        # Check if we've reached the response section
+        if not response_started and "### ප්‍රතිචාරය:" in partial_text:
+            partial_text = partial_text.split("### ප්‍රතිචාරය:")[-1].strip()
+            response_started = True
+        if response_started:
+            yield partial_text
+        elif "### ප්‍රතිචාරය:" not in prompt:
+            # If prompt doesn't contain the marker, stream everything
+            yield partial_text
+    thread.join()
+# Custom CSS for styling
+custom_css = """
+#splash-screen {
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100vw;
+    height: 100vh;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    z-index: 9999;
+    animation: fadeOut 1s ease-in-out 3s forwards;
+}
+@keyframes fadeOut {
+    to {
+        opacity: 0;
+        visibility: hidden;
+    }
+}
+.sinhala-animation {
+    font-size: 120px;
+    font-weight: bold;
+    color: white;
+    animation: letterRotate 3s ease-in-out infinite;
+    text-shadow: 0 0 20px rgba(255,255,255,0.5);
+}
+@keyframes letterRotate {
+    0%, 100% { transform: rotateY(0deg) scale(1); opacity: 1; }
+    25% { transform: rotateY(180deg) scale(1.2); opacity: 0.8; }
+    50% { transform: rotateY(360deg) scale(1); opacity: 1; }
+    75% { transform: rotateY(540deg) scale(1.2); opacity: 0.8; }
+}
+/* Smaller font sizes for chat */
+.message-wrap .message {
+    font-size: 0.9rem !important;
+}
+.message-wrap p {
+    font-size: 0.9rem !important;
+}
+/* Avatar styling */
+.message-wrap.user .avatar-container::before {
+    content: "👤";
+    font-size: 24px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    width: 40px;
+    height: 40px;
+    border-radius: 50%;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+}
+.message-wrap.bot .avatar-container::before {
+    content: "🧠";
+    font-size: 24px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    width: 40px;
+    height: 40px;
+    border-radius: 50%;
+    background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+}
+/* Hide default avatars if they exist */
+.avatar-container img {
+    display: none;
+}
+.avatar-container {
+    width: 40px !important;
+    height: 40px !important;
+    min-width: 40px !important;
+}
+"""
+# Splash screen HTML
+splash_html = """
+<div id="splash-screen">
+    <div class="sinhala-animation">ගැමුණු</div>
+</div>
+<script>
+    setTimeout(() => {
+        document.getElementById('splash-screen').style.display = 'none';
+    }, 4000);
+</script>
+"""
+# ---------------- UI ----------------
+with gr.Blocks(css=custom_css) as demo:
+    gr.HTML(splash_html)
     chat = gr.ChatInterface(
         fn=lambda message, history: infer(message, history, enable_history.value, max_new_tokens.value),
         title="🧠 Gamunu 4B Instruct — සිංහල LLM",
             ["ඔබ කවියෙකු ලෙස 'ගඟක්' ගැන කෙටි කවියක් ලියන්න."],
         ]
     )
     with gr.Accordion("⚙️ Advanced Settings", open=False):
         enable_history = gr.Checkbox(label="Enable chat history", value=False)
         max_new_tokens = gr.Slider(64, 1024, value=512, step=32, label="🔢 Max New Tokens")
     gr.Markdown("""
 ---
+🪶 **Model:** [manthilaffs/Gamunu-4B-Instruct-Alpha](https://huggingface.co/manthilaffs/Gamunu-4B-Instruct-Alpha)
 © 2025 Gamunu Project | Experimental Release
 """)
 if __name__ == "__main__":
+    demo.launch()