Spaces:

manthilaffs
/

Gamunu-Inference

Running on Zero

App Files Files Community

manthilaffs commited on Nov 5, 2025

Commit

15d1bb2

verified ·

1 Parent(s): 9c5c378

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -197

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
 import torch
 import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
 model = None
 tokenizer = None
@@ -15,8 +14,10 @@ alpaca_prompt = """පහත දැක්වෙන්නේ යම් කාර
 ### ප්‍රතිචාරය:
 {}"""
-def load_model():
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
@@ -25,18 +26,11 @@ def load_model():
             device_map="auto",
         )
         model.eval()
-@spaces.GPU
-def generate_response(message, history, enable_history=False, max_new_tokens=1024):
-    global model, tokenizer
-    load_model()
     # Add history only if enabled
     if enable_history and history:
         prev = "\n".join(
-            [f"User: {h['content']}\nGamunu: {h.get('content', '')}"
-             for h in history if h.get('role') == 'assistant']
         )
         context = f"{prev}\n\n{message}"
     else:
@@ -51,62 +45,18 @@ def generate_response(message, history, enable_history=False, max_new_tokens=102
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Initialize the streamer
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    # Generation parameters
-    generation_kwargs = dict(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        streamer=streamer,
-        do_sample=True,
-    )
-    # Start generation in a separate thread
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    # Stream the response
-    full_text = ""
-    for new_text in streamer:
-        full_text += new_text
-        # Check if we've reached the response section and extract it
-        if "### ප්‍රතිචාරය:" in full_text:
-            response_text = full_text.split("### ප්‍රතිචාරය:")[-1].strip()
-            yield response_text
-        else:
-            # Still building up to the response marker, yield what we have
-            yield full_text.strip()
-    # Make sure thread completes
-    thread.join()
-    # Final yield with cleaned response
-    if "### ප්‍රතිචාරය:" in full_text:
-        final_response = full_text.split("### ප්‍රතිචාරය:")[-1].strip()
-    else:
-        final_response = full_text.strip()
-    # Ensure we yield at least once
-    if final_response:
-        yield final_response
-# Custom CSS for styling with copy button
 custom_css = """
-/* Container width constraints for PC screens */
-.gradio-container {
-    max-width: 1200px !important;
-    margin: 0 auto !important;
-}
-/* Chat interface max width */
-.contain {
-    max-width: 900px !important;
-    margin: 0 auto !important;
-}
 #splash-screen {
     position: fixed;
     top: 0;
@@ -143,42 +93,6 @@ custom_css = """
     75% { transform: rotateY(540deg) scale(1.2); opacity: 0.8; }
 }
-/* Copy button styling */
-.message-wrap.bot {
-    position: relative;
-}
-.message-wrap.bot:hover .copy-button {
-    opacity: 1;
-}
-.copy-button {
-    position: absolute;
-    top: 8px;
-    right: 8px;
-    opacity: 0;
-    transition: opacity 0.2s;
-    background: rgba(102, 126, 234, 0.1);
-    border: 1px solid rgba(102, 126, 234, 0.3);
-    border-radius: 6px;
-    padding: 6px 10px;
-    cursor: pointer;
-    font-size: 12px;
-    color: #667eea;
-    z-index: 10;
-}
-.copy-button:hover {
-    background: rgba(102, 126, 234, 0.2);
-    border-color: rgba(102, 126, 234, 0.5);
-}
-.copy-button.copied {
-    background: rgba(34, 197, 94, 0.2);
-    border-color: rgba(34, 197, 94, 0.5);
-    color: #22c55e;
-}
 /* Smaller font sizes for chat */
 .message-wrap .message {
     font-size: 0.9rem !important;
@@ -188,15 +102,6 @@ custom_css = """
     font-size: 0.9rem !important;
 }
-/* Compact examples grid */
-.examples {
-    max-width: 100% !important;
-}
-.examples .wrap {
-    gap: 0.5rem !important;
-}
 /* Avatar styling */
 .message-wrap.user .avatar-container::before {
     content: "👤";
@@ -232,67 +137,6 @@ custom_css = """
     height: 40px !important;
     min-width: 40px !important;
 }
-/* Compact padding */
-.main {
-    padding: 1rem !important;
-}
-/* Title styling - more compact */
-h1 {
-    font-size: 1.5rem !important;
-    margin-bottom: 0.5rem !important;
-}
-/* Streaming cursor effect */
-.message-wrap.bot.streaming .message::after {
-    content: '▊';
-    animation: blink 1s infinite;
-    margin-left: 2px;
-}
-@keyframes blink {
-    0%, 50% { opacity: 1; }
-    51%, 100% { opacity: 0; }
-}
-"""
-# JavaScript for copy functionality
-copy_js = """
-<script>
-function addCopyButtons() {
-    // Remove existing copy buttons first
-    document.querySelectorAll('.copy-button').forEach(btn => btn.remove());
-    // Add copy buttons to bot messages
-    const botMessages = document.querySelectorAll('.message-wrap.bot .message');
-    botMessages.forEach((message, index) => {
-        if (!message.querySelector('.copy-button')) {
-            const copyBtn = document.createElement('button');
-            copyBtn.className = 'copy-button';
-            copyBtn.innerHTML = '📋 Copy';
-            copyBtn.onclick = function(e) {
-                e.stopPropagation();
-                const text = message.innerText;
-                navigator.clipboard.writeText(text).then(() => {
-                    copyBtn.innerHTML = '✅ Copied!';
-                    copyBtn.classList.add('copied');
-                    setTimeout(() => {
-                        copyBtn.innerHTML = '📋 Copy';
-                        copyBtn.classList.remove('copied');
-                    }, 2000);
-                });
-            };
-            message.parentElement.style.position = 'relative';
-            message.parentElement.appendChild(copyBtn);
-        }
-    });
-}
-// Run on load and periodically to catch new messages
-setInterval(addCopyButtons, 1000);
-window.addEventListener('load', addCopyButtons);
-</script>
 """
 # Splash screen HTML
@@ -308,39 +152,28 @@ splash_html = """
 """
 # ---------------- UI ----------------
-with gr.Blocks(css=custom_css, head=copy_js) as demo:
     gr.HTML(splash_html)
-    enable_history = gr.State(value=False)
-    max_new_tokens = gr.State(value=512)
-    with gr.Row():
-        with gr.Column():
-            chat = gr.ChatInterface(
-                fn=generate_response,
-                title="🧠 Gamunu 4B Instruct - Demo",
-                theme=gr.themes.Default(text_size="sm"),
-                type="messages",  # Use new messages format
-                examples=[
-                    ["හෙලෝ ගැමුණු! මම සමන්, ඔයාට කොහොමද?"],
-                    ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න."],
-                    ["මෙම වාක්‍යය සිංහලයට පරිවර්තනය කරන්න: 'The sun rises in the east.'"],
-                    ["'completed' තත්ත්වයේ ඇති වාර්තා ගණන ගණනය කිරීමට දත්ත සමුදා විමසුමක් (database query) ගොඩනඟන්න."],
-                    ["ඔබ ගුරුවරයෙකු ලෙස ක්‍රියාකරන්න. ශිෂ්‍යයාට ඉතිහාසය උගන්වන්න."],
-                    ["පහත ප්‍රකාශය ප්‍රංශ භාෂාවට පරිවර්තනය කරන්න. Laughter is the best medicine."],
-                    ["ඝන වස්තුවක හා ද්‍රවයක පරිමාවන්හි වෙනස පැහැදිලි කරන්න."],
-                    ["වෙස් මුහුණු කලාවේ ප්‍රධාන අංග මොනවාද? වර්තමානයේ මෙම කලාව ප්‍රචලිතව පවතින ප්‍රදේශ මොනවාද?"]
-                ],
-                additional_inputs=[enable_history, max_new_tokens]
-            )
     with gr.Accordion("⚙️ Advanced Settings", open=False):
-        history_checkbox = gr.Checkbox(label="Enable chat history", value=False)
-        tokens_slider = gr.Slider(64, 1024, value=512, step=32, label="🔢 Max New Tokens")
-        # Update state when controls change
-        history_checkbox.change(fn=lambda x: x, inputs=history_checkbox, outputs=enable_history)
-        tokens_slider.change(fn=lambda x: x, inputs=tokens_slider, outputs=max_new_tokens)
     gr.Markdown("""
 ---

 import gradio as gr
 import torch
 import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer
 model = None
 tokenizer = None
 ### ප්‍රතිචාරය:
 {}"""
+@spaces.GPU
+def infer(message, history, enable_history=False, max_new_tokens=1024):
     global model, tokenizer
     if model is None:
         tokenizer = AutoTokenizer.from_pretrained("manthilaffs/Gamunu-4B-Instruct-Alpha")
         model = AutoModelForCausalLM.from_pretrained(
             device_map="auto",
         )
         model.eval()
     # Add history only if enabled
     if enable_history and history:
         prev = "\n".join(
+            [f"User: {h[0]}\nGamunu: {h[1]}" for h in history if h[1] is not None]
         )
         context = f"{prev}\n\n{message}"
     else:
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.inference_mode():
+        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
+    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    if "### ප්‍රතිචාරය:" in text:
+        text = text.split("### ප්‍රතිචාරය:")[-1].strip()
+    return text
+# Custom CSS for styling
 custom_css = """
 #splash-screen {
     position: fixed;
     top: 0;
     75% { transform: rotateY(540deg) scale(1.2); opacity: 0.8; }
 }
 /* Smaller font sizes for chat */
 .message-wrap .message {
     font-size: 0.9rem !important;
     font-size: 0.9rem !important;
 }
 /* Avatar styling */
 .message-wrap.user .avatar-container::before {
     content: "👤";
     height: 40px !important;
     min-width: 40px !important;
 }
 """
 # Splash screen HTML
 """
 # ---------------- UI ----------------
+with gr.Blocks(css=custom_css) as demo:
     gr.HTML(splash_html)
+    chat = gr.ChatInterface(
+        fn=lambda message, history: infer(message, history, enable_history.value, max_new_tokens.value),
+        title="🧠 Gamunu 4B Instruct - Demo",
+        theme=gr.themes.Default(text_size="sm"),
+        examples=[
+            ["හෙලෝ ගැමුණු! මම සමන්, ඔයාට කොහොමද?"],
+            ["ෆොටෝසින්තසිස් ක්‍රියාවලිය පැහැදිලි කරන්න."],
+            ["මෙම වාක්‍යය සිංහලයට පරිවර්තනය කරන්න: 'The sun rises in the east.'"],
+            ["'completed' තත්ත්වයේ ඇති වාර්තා ගණන ගණනය කිරීමට දත්ත සමුදා විමසුමක් (database query) ගොඩනඟන්න."],
+            ["ඔබ ගුරුවරයෙකු ලෙස ක්‍රියාකරන්න. ශිෂ්‍යයාට ඉතිහාසය උගන්වන්න."],
+            ["පහත ප්‍රකාශය ප්‍රංශ භාෂාවට පරිවර්���නය කරන්න. Laughter is the best medicine."],
+            ["ඝන වස්තුවක හා ද්‍රවයක පරිමාවන්හි වෙනස පැහැදිලි කරන්න."],
+            ["වෙස් මුහුණු කලාවේ ප්‍රධාන අංග මොනවාද? වර්තමානයේ මෙම කලාව ප්‍රචලිතව පවතින ප්‍රදේශ මොනවාද?"]
+        ]
+    )
     with gr.Accordion("⚙️ Advanced Settings", open=False):
+        enable_history = gr.Checkbox(label="Enable chat history", value=False)
+        max_new_tokens = gr.Slider(64, 1024, value=512, step=32, label="🔢 Max New Tokens")
     gr.Markdown("""
 ---