amkyawdev's picture
Update app.py
5a527c9 verified
"""
Production-grade Gradio Space for Myanmar LLM Code Assistant
Model: amkyawdev/mm-llm-coder-lite-v1
"""
import gradio as gr
from gradio import themes
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
from functools import lru_cache
import warnings
import time
warnings.filterwarnings("ignore")
import os
# ==================== CONFIGURATION ====================
MODEL_NAME = "amkyawdev/mm-llm-coder-lite-v1"
HF_TOKEN = os.environ.get("HF_TOKEN", "") # Set in Space secrets
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# System Prompts
SYSTEM_PROMPTS = {
"General Assistant": "သင်သည် မြန်မာစာ ကျွမ်းကျင်သော AI အကူအညီပေးသူဖြစ်သည်။ သင့်အား မြန်မာဘာသာဖြင့် ဖြေကြားပါ။",
"Code Expert": "သင်သည် Senior Python Developer ဖြစ်သည်။ အဆင့်မြင့် Code များကို ရှင်းလင်းစွာ ရေးသားပါ။ မြန်မာဘာသာဖြင့် ဖြေကြားပါ။",
"Translator": "သင်သည် မြန်မာ-အင်္ဂလိပ် ဘာသာပြန်ကျွမ်းကျင်သူဖြစ်သည်။ ဘာသာပြန်လုပ်ပါ။"
}
# ==================== MODEL LOADING ====================
@lru_cache(maxsize=1)
def load_model_and_tokenizer():
"""Load model and tokenizer with caching"""
print(f"Loading model from {MODEL_NAME}...")
try:
# Try with trust_remote_code and different settings
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
use_fast=True,
token=HF_TOKEN if HF_TOKEN else None
)
except Exception as e1:
print(f"Fast tokenizer failed: {e1}, trying slow...")
try:
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
use_fast=False,
token=HF_TOKEN if HF_TOKEN else None
)
except Exception as e2:
print(f"Slow tokenizer also failed: {e2}")
raise
# Handle missing pad token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model with lower memory settings
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu",
low_cpu_mem_usage=True,
trust_remote_code=True,
token=HF_TOKEN if HF_TOKEN else None
)
print(f"Model loaded successfully")
return model, tokenizer
# Initialize model at startup
try:
model, tokenizer = load_model_and_tokenizer()
MODEL_LOADED = True
except Exception as e:
print(f"Error loading model: {e}")
MODEL_LOADED = False
model = None
tokenizer = None
# ==================== GENERATION FUNCTIONS ====================
def format_prompt(user_message: str, system_prompt: str, history: list) -> str:
"""Format the prompt for the model"""
prompt = f"System: {system_prompt}\n\n"
for msg, response in history:
prompt += f"User: {msg}\n\nAssistant: {response}{tokenizer.eos_token}\n\n"
prompt += f"User: {user_message}\n\nAssistant:"
return prompt
def generate_response(
user_message: str,
system_prompt: str,
history: list,
max_new_tokens: int,
temperature: float,
top_p: float
) -> tuple:
"""Generate response from the model"""
if not MODEL_LOADED:
return "❌ မော်ဒယ် မပါ။ ပြန်လည်ကြိုးစားပါ။", history
try:
# Format prompt
prompt = format_prompt(user_message, system_prompt, history)
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
# Use CPU if no CUDA
if torch.cuda.is_available():
inputs = {k: v.to(model.device) for k, v in inputs.items()}
# Generate
generation_config = GenerationConfig(
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=temperature > 0,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
with torch.no_grad():
outputs = model.generate(
**inputs,
generation_config=generation_config
)
# Decode response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's response
response = response.split("Assistant:")[-1].strip()
# Update history
history.append((user_message, response))
return response, history
except Exception as e:
print(f"Generation error: {e}")
return f"❌ အမှားဖြစ်ပါ။: {str(e)}", history
def clear_history():
"""Clear chat history"""
return [], ""
# ==================== CUSTOM CSS ====================
CUSTOM_CSS = """
/* Premium Dark Theme */
:root {
--primary: #10a37f;
--secondary: #1a1a1a;
--accent: #2d2d2d;
--text-primary: #ffffff;
--text-secondary: #a0a0a0;
--user-bubble: #10a37f;
--bot-bubble: #2d2d2d;
--border-color: #404040;
}
/* Light Theme Overrides */
.light {
--secondary: #ffffff;
--accent: #f5f5f5;
--text-primary: #1a1a1a;
--text-secondary: #666666;
--bot-bubble: #f0f0f0;
--border-color: #e0e0e0;
}
/* Main Container */
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
}
/* Header */
.header-section {
text-align: center;
padding: 20px;
background: linear-gradient(135deg, #1a1a1a 0%, #2d2d2d 100%);
border-radius: 16px;
margin-bottom: 20px;
}
.header-title {
font-size: 28px;
font-weight: 700;
background: linear-gradient(90deg, #10a37f, #00d4aa);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 8px;
}
.header-subtitle {
color: #a0a0a0;
font-size: 14px;
}
/* Chat Interface */
.chat-container {
border-radius: 16px;
overflow: hidden;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
}
.chat-message {
padding: 12px 16px;
margin: 8px 0;
border-radius: 12px;
max-width: 85%;
line-height: 1.6;
}
.chat-message.user {
background: linear-gradient(135deg, #10a37f, #0d8a66);
color: white;
margin-left: auto;
border-bottom-right-radius: 4px;
}
.chat-message.bot {
background: var(--bot-bubble);
color: var(--text-primary);
border-bottom-left-radius: 4px;
}
/* Code Blocks */
pre {
background: #1e1e1e !important;
border-radius: 8px;
padding: 12px !important;
margin: 12px 0 !important;
overflow-x: auto;
}
code {
font-family: 'Fira Code', 'Consolas', monospace;
font-size: 13px;
}
/* Input Area */
.input-container {
background: var(--accent);
border-radius: 12px;
padding: 12px;
border: 1px solid var(--border-color);
}
.input-container:focus-within {
border-color: #10a37f;
box-shadow: 0 0 0 2px rgba(16, 163, 127, 0.2);
}
/* Buttons */
.btn-primary {
background: linear-gradient(135deg, #10a37f, #0d8a66) !important;
border: none !important;
border-radius: 8px !important;
padding: 10px 20px !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
}
.btn-primary:hover {
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(16, 163, 127, 0.4);
}
/* Example Buttons */
.example-btn {
background: var(--accent) !important;
border: 1px solid var(--border-color) !important;
border-radius: 8px !important;
padding: 8px 16px !important;
transition: all 0.3s ease !important;
}
.example-btn:hover {
background: #10a37f !important;
color: white !important;
border-color: #10a37f !important;
}
/* Sliders */
.slider-container label {
color: var(--text-primary);
font-weight: 500;
}
.slider-container .slider-value {
color: #10a37f;
font-weight: 600;
}
/* Dropdown */
.dropdown-container select {
background: var(--accent);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 8px 12px;
color: var(--text-primary);
}
/* Loading Animation */
.loading-spinner {
display: flex;
justify-content: center;
align-items: center;
padding: 20px;
}
.loading-spinner::after {
content: '';
width: 40px;
height: 40px;
border: 3px solid var(--border-color);
border-top-color: #10a37f;
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
/* Footer */
.footer {
text-align: center;
padding: 20px;
color: #a0a0a0;
font-size: 12px;
border-top: 1px solid var(--border-color);
margin-top: 20px;
}
.footer a {
color: #10a37f;
text-decoration: none;
}
.footer a:hover {
text-decoration: underline;
}
/* Dark/Light Toggle */
.theme-toggle {
display: flex;
align-items: center;
gap: 8px;
padding: 8px 12px;
background: var(--accent);
border-radius: 20px;
cursor: pointer;
}
/* Animations */
@keyframes fadeIn {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
.fade-in {
animation: fadeIn 0.3s ease-out;
}
/* Scrollbar */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
background: var(--secondary);
}
::-webkit-scrollbar-thumb {
background: var(--border-color);
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: #10a37f;
}
"""
# ==================== GRADIO APP ====================
# Simple version for testing
with gr.Blocks(title="Myanmar LLM") as app:
# State for theme
theme_state = gr.State(value="dark")
# Header
gr.HTML("""
<div class="header-section">
<div class="header-title"> Myanmar LLM Code Assistant</div>
<div class="header-subtitle">Powered by amkyawdev/mm-llm-coder-lite-v1</div>
</div>
""")
# Main Layout
with gr.Row():
with gr.Column(scale=3):
# Chat Interface
chatbot = gr.Chatbot(
label="💬 စကားပြောပါ။",
height=500,
)
# Example Prompts
gr.HTML("<div style='text-align: center; margin: 10px 0; color: #a0a0a0;'>Example Prompts:</div>")
with gr.Row():
btn1 = gr.Button("🔢 Fibonacci", size="sm", variant="secondary")
btn2 = gr.Button("🔤 Unicode → Zawgyi", size="sm", variant="secondary")
btn3 = gr.Button("Data Cleaning", size="sm", variant="secondary")
# Input Area
with gr.Row():
msg_input = gr.Textbox(
label="သင့်မေးခွန်း",
placeholder="မေးခွန်းရေးသားပါ။...",
lines=3,
scale=4
)
submit_btn = gr.Button(" စာပို့ပါ ", variant="primary", scale=1)
# Clear Button
clear_btn = gr.Button("🗑️ သန့်ရှင်းပါ။", variant="stop")
with gr.Column(scale=1):
# Settings Panel
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><b>⚙️ အပြင်အဆင့်များ</b></div>")
# System Prompt
system_prompt = gr.Dropdown(
choices=list(SYSTEM_PROMPTS.keys()),
value="General Assistant",
label="System Prompt",
info="AI ရဲ့ အပြုအမူ"
)
# Max Tokens
max_tokens = gr.Slider(
minimum=50,
maximum=512,
value=256,
step=10,
label="Max New Tokens",
info="အများဆုံး စကားပါးပါး"
)
# Temperature
temperature = gr.Slider(
minimum=0.1,
maximum=1.5,
value=0.7,
step=0.1,
label="Temperature",
info="ပိုမိုးတော်တော် (0.1 = တိကျ၊ 1.5 = ဖန်းဆန်း)"
)
# Top-p
top_p = gr.Slider(
minimum=0.5,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p",
info="Nucleus sampling"
)
# Theme info
gr.HTML("<div style='text-align: center; color: #a0a0a0;'>🌙 Dark Mode</div>")
# Footer
gr.HTML("""
<div class="footer">
<p>🤖 ဤအက်ပလီမော်ဒယ်သည် <a href="https://huggingface.co/amkyawdev/mm-llm-coder-lite-v1" target="_blank">amkyawdev/mm-llm-coder-lite-v1</a> ဖြစ်ပါ။</p>
<p>📊 Hardware: T4 Small (Recommended) | ဖန်တီးသူ: Amkyaw Dev </p>
</div>
""")
# ==================== EVENT HANDLERS ====================
def respond(
message: str,
history: list,
system_prompt_key: str,
max_tokens: int,
temperature: float,
top_p: float
):
"""Handle message submission"""
if not message.strip():
return "", history, gr.update()
system_prompt = SYSTEM_PROMPTS.get(system_prompt_key, SYSTEM_PROMPTS["General Assistant"])
response, history = generate_response(
user_message=message,
system_prompt=system_prompt,
history=history,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p
)
return "", history, gr.update()
# Submit button click
submit_btn.click(
fn=respond,
inputs=[
msg_input,
chatbot,
system_prompt,
max_tokens,
temperature,
top_p
],
outputs=[msg_input, chatbot, chatbot]
)
# Enter key submit
msg_input.submit(
fn=respond,
inputs=[
msg_input,
chatbot,
system_prompt,
max_tokens,
temperature,
top_p
],
outputs=[msg_input, chatbot, chatbot]
)
# Clear button
clear_btn.click(
fn=clear_history,
inputs=[],
outputs=[chatbot, msg_input]
)
# Example buttons
btn1.click(
fn=lambda: ("Python နဲ့ Fibonacci စီးရီးထုတ်တဲ့ function ရေးပေးပါ", []),
inputs=[],
outputs=[msg_input, chatbot]
)
btn2.click(
fn=lambda: ("မြန်မာ Unicode ကို Zawgyi ပြောင်းတဲ့ code ရေးပါ", []),
inputs=[],
outputs=[msg_input, chatbot]
)
btn3.click(
fn=lambda: ("ဒေတာ (Data) သန့်ရှင်းရေးလုပ်နည်း အဆင့်ဆင့်ရှင်းပြပါ", []),
inputs=[],
outputs=[msg_input, chatbot]
)
# Theme toggle - simplified (remove for now)
pass
# ==================== LAUNCH ====================
if __name__ == "__main__":
print("🚀 Starting Myanmar LLM Code Assistant...")
print(f"📱 Device: {DEVICE}")
print(f"📦 Model: {MODEL_NAME}")
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)