Spaces:

amkyawdev
/

mm-llm-tiny-space

Sleeping

File size: 2,148 Bytes

20b1dea
835f437
20b1dea
 
 
 
835f437
 
20b1dea
 
 
835f437
20b1dea
835f437
 
 
20b1dea
835f437
 
20b1dea
835f437
 
 
 
 
 
 
 
 
 
 
 
20b1dea
835f437
 
20b1dea
835f437
20b1dea
835f437
 
 
 
 
 
 
20b1dea
835f437
 
20b1dea
835f437
20b1dea
835f437
20b1dea
 
 
835f437
 
20b1dea
835f437
 
 
20b1dea
835f437
20b1dea
835f437
 
 
 
20b1dea
 
835f437
 
 
 
 
 
 
20b1dea

"""
Myanmar LLM Gradio App - Lite Version
Model: amkyawdev/mm-llm-tiny
"""

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_NAME = "amkyawdev/mm-llm-tiny"

print(f"Loading {MODEL_NAME}...")

# Load tokenizer only first (saves memory)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

# Model loads on first request (lazy load)
model = None

def get_model():
    global model
    if model is None:
        print("Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch.float32,
            low_cpu_mem_usage=True
        )
        model.eval()
        print("Model loaded!")
    return model

def generate(prompt, max_tokens=128, temp=0.7):
    m = get_model()
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
    
    with torch.no_grad():
        outputs = m.generate(
            **inputs,
            max_new_tokens=int(max_tokens),
            temperature=temp,
            do_sample=temp > 0,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response[len(prompt):].strip()

# UI
with gr.Blocks(title="Myanmar LLM") as app:
    gr.Markdown("# 🇲🇲 Myanmar LLM")
    gr.Markdown("Model: **amkyawdev/mm-llm-tiny**")
    
    with gr.Row():
        msg = gr.Textbox(label="Message", placeholder="မေးခွန်းရေးသားပါ။...")
        output = gr.Textbox(label="Response")
    
    with gr.Row():
        max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens")
        temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
    
    btn = gr.Button("Generate")
    
    btn.click(
        generate,
        inputs=[msg, max_tokens, temp],
        outputs=output
    )
    
    gr.Examples(
        examples=[
            ["Hello မြန်မာလို ပြန်ပါ။", 64, 0.7],
            ["Python နဲ့ list ရေးပါ။", 128, 0.7],
        ],
        inputs=[msg, max_tokens, temp]
    )

app.launch(share=True)