Spaces:
Sleeping
Sleeping
File size: 2,148 Bytes
20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea 835f437 20b1dea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """
Myanmar LLM Gradio App - Lite Version
Model: amkyawdev/mm-llm-tiny
"""
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_NAME = "amkyawdev/mm-llm-tiny"
print(f"Loading {MODEL_NAME}...")
# Load tokenizer only first (saves memory)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
# Model loads on first request (lazy load)
model = None
def get_model():
global model
if model is None:
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
)
model.eval()
print("Model loaded!")
return model
def generate(prompt, max_tokens=128, temp=0.7):
m = get_model()
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
with torch.no_grad():
outputs = m.generate(
**inputs,
max_new_tokens=int(max_tokens),
temperature=temp,
do_sample=temp > 0,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response[len(prompt):].strip()
# UI
with gr.Blocks(title="Myanmar LLM") as app:
gr.Markdown("# π²π² Myanmar LLM")
gr.Markdown("Model: **amkyawdev/mm-llm-tiny**")
with gr.Row():
msg = gr.Textbox(label="Message", placeholder="αα±αΈαα½ααΊαΈαα±αΈαα¬αΈαα«α...")
output = gr.Textbox(label="Response")
with gr.Row():
max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens")
temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
btn = gr.Button("Generate")
btn.click(
generate,
inputs=[msg, max_tokens, temp],
outputs=output
)
gr.Examples(
examples=[
["Hello ααΌααΊαα¬ααα― ααΌααΊαα«α", 64, 0.7],
["Python αα²α· list αα±αΈαα«α", 128, 0.7],
],
inputs=[msg, max_tokens, temp]
)
app.launch(share=True) |