""" Myanmar LLM Gradio App - Lite Version Model: amkyawdev/mm-llm-tiny """ import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer MODEL_NAME = "amkyawdev/mm-llm-tiny" print(f"Loading {MODEL_NAME}...") # Load tokenizer only first (saves memory) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) tokenizer.pad_token = tokenizer.eos_token # Model loads on first request (lazy load) model = None def get_model(): global model if model is None: print("Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, low_cpu_mem_usage=True ) model.eval() print("Model loaded!") return model def generate(prompt, max_tokens=128, temp=0.7): m = get_model() inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256) with torch.no_grad(): outputs = m.generate( **inputs, max_new_tokens=int(max_tokens), temperature=temp, do_sample=temp > 0, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response[len(prompt):].strip() # UI with gr.Blocks(title="Myanmar LLM") as app: gr.Markdown("# πŸ‡²πŸ‡² Myanmar LLM") gr.Markdown("Model: **amkyawdev/mm-llm-tiny**") with gr.Row(): msg = gr.Textbox(label="Message", placeholder="α€™α€±α€Έα€α€½α€”α€Ία€Έα€›α€±α€Έα€žα€¬α€Έα€•α€«α‹...") output = gr.Textbox(label="Response") with gr.Row(): max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens") temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature") btn = gr.Button("Generate") btn.click( generate, inputs=[msg, max_tokens, temp], outputs=output ) gr.Examples( examples=[ ["Hello α€™α€Όα€”α€Ία€™α€¬α€œα€­α€― ပြန်ပါ။", 64, 0.7], ["Python α€”α€²α€· list ရေးပါ။", 128, 0.7], ], inputs=[msg, max_tokens, temp] ) app.launch(share=True)