""" Production-grade Gradio Space for Myanmar LLM Code Assistant Model: amkyawdev/mm-llm-coder-lite-v1 """ import gradio as gr from gradio import themes import torch from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig from functools import lru_cache import warnings import time warnings.filterwarnings("ignore") import os # ==================== CONFIGURATION ==================== MODEL_NAME = "amkyawdev/mm-llm-coder-lite-v1" HF_TOKEN = os.environ.get("HF_TOKEN", "") # Set in Space secrets DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # System Prompts SYSTEM_PROMPTS = { "General Assistant": "သင်သည် မြန်မာစာ ကျွမ်းကျင်သော AI အကူအညီပေးသူဖြစ်သည်။ သင့်အား မြန်မာဘာသာဖြင့် ဖြေကြားပါ။", "Code Expert": "သင်သည် Senior Python Developer ဖြစ်သည်။ အဆင့်မြင့် Code များကို ရှင်းလင်းစွာ ရေးသားပါ။ မြန်မာဘာသာဖြင့် ဖြေကြားပါ။", "Translator": "သင်သည် မြန်မာ-အင်္ဂလိပ် ဘာသာပြန်ကျွမ်းကျင်သူဖြစ်သည်။ ဘာသာပြန်လုပ်ပါ။" } # ==================== MODEL LOADING ==================== @lru_cache(maxsize=1) def load_model_and_tokenizer(): """Load model and tokenizer with caching""" print(f"Loading model from {MODEL_NAME}...") try: # Try with trust_remote_code and different settings tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=True, token=HF_TOKEN if HF_TOKEN else None ) except Exception as e1: print(f"Fast tokenizer failed: {e1}, trying slow...") try: tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=False, token=HF_TOKEN if HF_TOKEN else None ) except Exception as e2: print(f"Slow tokenizer also failed: {e2}") raise # Handle missing pad token if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model with lower memory settings model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True, trust_remote_code=True, token=HF_TOKEN if HF_TOKEN else None ) print(f"Model loaded successfully") return model, tokenizer # Initialize model at startup try: model, tokenizer = load_model_and_tokenizer() MODEL_LOADED = True except Exception as e: print(f"Error loading model: {e}") MODEL_LOADED = False model = None tokenizer = None # ==================== GENERATION FUNCTIONS ==================== def format_prompt(user_message: str, system_prompt: str, history: list) -> str: """Format the prompt for the model""" prompt = f"System: {system_prompt}\n\n" for msg, response in history: prompt += f"User: {msg}\n\nAssistant: {response}{tokenizer.eos_token}\n\n" prompt += f"User: {user_message}\n\nAssistant:" return prompt def generate_response( user_message: str, system_prompt: str, history: list, max_new_tokens: int, temperature: float, top_p: float ) -> tuple: """Generate response from the model""" if not MODEL_LOADED: return "❌ မော်ဒယ် မပါ။ ပြန်လည်ကြိုးစားပါ။", history try: # Format prompt prompt = format_prompt(user_message, system_prompt, history) # Tokenize inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Use CPU if no CUDA if torch.cuda.is_available(): inputs = {k: v.to(model.device) for k, v in inputs.items()} # Generate generation_config = GenerationConfig( max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, ) with torch.no_grad(): outputs = model.generate( **inputs, generation_config=generation_config ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response response = response.split("Assistant:")[-1].strip() # Update history history.append((user_message, response)) return response, history except Exception as e: print(f"Generation error: {e}") return f"❌ အမှားဖြစ်ပါ။: {str(e)}", history def clear_history(): """Clear chat history""" return [], "" # ==================== CUSTOM CSS ==================== CUSTOM_CSS = """ /* Premium Dark Theme */ :root { --primary: #10a37f; --secondary: #1a1a1a; --accent: #2d2d2d; --text-primary: #ffffff; --text-secondary: #a0a0a0; --user-bubble: #10a37f; --bot-bubble: #2d2d2d; --border-color: #404040; } /* Light Theme Overrides */ .light { --secondary: #ffffff; --accent: #f5f5f5; --text-primary: #1a1a1a; --text-secondary: #666666; --bot-bubble: #f0f0f0; --border-color: #e0e0e0; } /* Main Container */ .gradio-container { max-width: 1200px !important; margin: auto !important; } /* Header */ .header-section { text-align: center; padding: 20px; background: linear-gradient(135deg, #1a1a1a 0%, #2d2d2d 100%); border-radius: 16px; margin-bottom: 20px; } .header-title { font-size: 28px; font-weight: 700; background: linear-gradient(90deg, #10a37f, #00d4aa); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 8px; } .header-subtitle { color: #a0a0a0; font-size: 14px; } /* Chat Interface */ .chat-container { border-radius: 16px; overflow: hidden; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3); } .chat-message { padding: 12px 16px; margin: 8px 0; border-radius: 12px; max-width: 85%; line-height: 1.6; } .chat-message.user { background: linear-gradient(135deg, #10a37f, #0d8a66); color: white; margin-left: auto; border-bottom-right-radius: 4px; } .chat-message.bot { background: var(--bot-bubble); color: var(--text-primary); border-bottom-left-radius: 4px; } /* Code Blocks */ pre { background: #1e1e1e !important; border-radius: 8px; padding: 12px !important; margin: 12px 0 !important; overflow-x: auto; } code { font-family: 'Fira Code', 'Consolas', monospace; font-size: 13px; } /* Input Area */ .input-container { background: var(--accent); border-radius: 12px; padding: 12px; border: 1px solid var(--border-color); } .input-container:focus-within { border-color: #10a37f; box-shadow: 0 0 0 2px rgba(16, 163, 127, 0.2); } /* Buttons */ .btn-primary { background: linear-gradient(135deg, #10a37f, #0d8a66) !important; border: none !important; border-radius: 8px !important; padding: 10px 20px !important; font-weight: 600 !important; transition: all 0.3s ease !important; } .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(16, 163, 127, 0.4); } /* Example Buttons */ .example-btn { background: var(--accent) !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; padding: 8px 16px !important; transition: all 0.3s ease !important; } .example-btn:hover { background: #10a37f !important; color: white !important; border-color: #10a37f !important; } /* Sliders */ .slider-container label { color: var(--text-primary); font-weight: 500; } .slider-container .slider-value { color: #10a37f; font-weight: 600; } /* Dropdown */ .dropdown-container select { background: var(--accent); border: 1px solid var(--border-color); border-radius: 8px; padding: 8px 12px; color: var(--text-primary); } /* Loading Animation */ .loading-spinner { display: flex; justify-content: center; align-items: center; padding: 20px; } .loading-spinner::after { content: ''; width: 40px; height: 40px; border: 3px solid var(--border-color); border-top-color: #10a37f; border-radius: 50%; animation: spin 1s linear infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Footer */ .footer { text-align: center; padding: 20px; color: #a0a0a0; font-size: 12px; border-top: 1px solid var(--border-color); margin-top: 20px; } .footer a { color: #10a37f; text-decoration: none; } .footer a:hover { text-decoration: underline; } /* Dark/Light Toggle */ .theme-toggle { display: flex; align-items: center; gap: 8px; padding: 8px 12px; background: var(--accent); border-radius: 20px; cursor: pointer; } /* Animations */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .fade-in { animation: fadeIn 0.3s ease-out; } /* Scrollbar */ ::-webkit-scrollbar { width: 8px; height: 8px; } ::-webkit-scrollbar-track { background: var(--secondary); } ::-webkit-scrollbar-thumb { background: var(--border-color); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: #10a37f; } """ # ==================== GRADIO APP ==================== # Simple version for testing with gr.Blocks(title="Myanmar LLM") as app: # State for theme theme_state = gr.State(value="dark") # Header gr.HTML("""
Myanmar LLM Code Assistant
Powered by amkyawdev/mm-llm-coder-lite-v1
""") # Main Layout with gr.Row(): with gr.Column(scale=3): # Chat Interface chatbot = gr.Chatbot( label="💬 စကားပြောပါ။", height=500, ) # Example Prompts gr.HTML("
Example Prompts:
") with gr.Row(): btn1 = gr.Button("🔢 Fibonacci", size="sm", variant="secondary") btn2 = gr.Button("🔤 Unicode → Zawgyi", size="sm", variant="secondary") btn3 = gr.Button("Data Cleaning", size="sm", variant="secondary") # Input Area with gr.Row(): msg_input = gr.Textbox( label="သင့်မေးခွန်း", placeholder="မေးခွန်းရေးသားပါ။...", lines=3, scale=4 ) submit_btn = gr.Button(" စာပို့ပါ ", variant="primary", scale=1) # Clear Button clear_btn = gr.Button("🗑️ သန့်ရှင်းပါ။", variant="stop") with gr.Column(scale=1): # Settings Panel gr.HTML("
⚙️ အပြင်အဆင့်များ
") # System Prompt system_prompt = gr.Dropdown( choices=list(SYSTEM_PROMPTS.keys()), value="General Assistant", label="System Prompt", info="AI ရဲ့ အပြုအမူ" ) # Max Tokens max_tokens = gr.Slider( minimum=50, maximum=512, value=256, step=10, label="Max New Tokens", info="အများဆုံး စကားပါးပါး" ) # Temperature temperature = gr.Slider( minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature", info="ပိုမိုးတော်တော် (0.1 = တိကျ၊ 1.5 = ဖန်းဆန်း)" ) # Top-p top_p = gr.Slider( minimum=0.5, maximum=1.0, value=0.95, step=0.05, label="Top-p", info="Nucleus sampling" ) # Theme info gr.HTML("
🌙 Dark Mode
") # Footer gr.HTML(""" """) # ==================== EVENT HANDLERS ==================== def respond( message: str, history: list, system_prompt_key: str, max_tokens: int, temperature: float, top_p: float ): """Handle message submission""" if not message.strip(): return "", history, gr.update() system_prompt = SYSTEM_PROMPTS.get(system_prompt_key, SYSTEM_PROMPTS["General Assistant"]) response, history = generate_response( user_message=message, system_prompt=system_prompt, history=history, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p ) return "", history, gr.update() # Submit button click submit_btn.click( fn=respond, inputs=[ msg_input, chatbot, system_prompt, max_tokens, temperature, top_p ], outputs=[msg_input, chatbot, chatbot] ) # Enter key submit msg_input.submit( fn=respond, inputs=[ msg_input, chatbot, system_prompt, max_tokens, temperature, top_p ], outputs=[msg_input, chatbot, chatbot] ) # Clear button clear_btn.click( fn=clear_history, inputs=[], outputs=[chatbot, msg_input] ) # Example buttons btn1.click( fn=lambda: ("Python နဲ့ Fibonacci စီးရီးထုတ်တဲ့ function ရေးပေးပါ", []), inputs=[], outputs=[msg_input, chatbot] ) btn2.click( fn=lambda: ("မြန်မာ Unicode ကို Zawgyi ပြောင်းတဲ့ code ရေးပါ", []), inputs=[], outputs=[msg_input, chatbot] ) btn3.click( fn=lambda: ("ဒေတာ (Data) သန့်ရှင်းရေးလုပ်နည်း အဆင့်ဆင့်ရှင်းပြပါ", []), inputs=[], outputs=[msg_input, chatbot] ) # Theme toggle - simplified (remove for now) pass # ==================== LAUNCH ==================== if __name__ == "__main__": print("🚀 Starting Myanmar LLM Code Assistant...") print(f"📱 Device: {DEVICE}") print(f"📦 Model: {MODEL_NAME}") app.launch( server_name="0.0.0.0", server_port=7860, share=True )