""" Production-grade Gradio Space for Myanmar LLM Code Assistant Model: amkyawdev/mm-llm-coder-lite-v1 """ import gradio as gr from gradio import themes import torch from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig from functools import lru_cache import warnings import time warnings.filterwarnings("ignore") import os # ==================== CONFIGURATION ==================== MODEL_NAME = "amkyawdev/mm-llm-coder-lite-v1" HF_TOKEN = os.environ.get("HF_TOKEN", "") # Set in Space secrets DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # System Prompts SYSTEM_PROMPTS = { "General Assistant": "သင်သည် မြန်မာစာ ကျွမ်းကျင်သော AI အကူအညီပေးသူဖြစ်သည်။ သင့်အား မြန်မာဘာသာဖြင့် ဖြေကြားပါ။", "Code Expert": "သင်သည် Senior Python Developer ဖြစ်သည်။ အဆင့်မြင့် Code များကို ရှင်းလင်းစွာ ရေးသားပါ။ မြန်မာဘာသာဖြင့် ဖြေကြားပါ။", "Translator": "သင်သည် မြန်မာ-အင်္ဂလိပ် ဘာသာပြန်ကျွမ်းကျင်သူဖြစ်သည်။ ဘာသာပြန်လုပ်ပါ။" } # ==================== MODEL LOADING ==================== @lru_cache(maxsize=1) def load_model_and_tokenizer(): """Load model and tokenizer with caching""" print(f"Loading model from {MODEL_NAME}...") try: # Try with trust_remote_code and different settings tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=True, token=HF_TOKEN if HF_TOKEN else None ) except Exception as e1: print(f"Fast tokenizer failed: {e1}, trying slow...") try: tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, trust_remote_code=True, use_fast=False, token=HF_TOKEN if HF_TOKEN else None ) except Exception as e2: print(f"Slow tokenizer also failed: {e2}") raise # Handle missing pad token if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model with lower memory settings model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True, trust_remote_code=True, token=HF_TOKEN if HF_TOKEN else None ) print(f"Model loaded successfully") return model, tokenizer # Initialize model at startup try: model, tokenizer = load_model_and_tokenizer() MODEL_LOADED = True except Exception as e: print(f"Error loading model: {e}") MODEL_LOADED = False model = None tokenizer = None # ==================== GENERATION FUNCTIONS ==================== def format_prompt(user_message: str, system_prompt: str, history: list) -> str: """Format the prompt for the model""" prompt = f"System: {system_prompt}\n\n" for msg, response in history: prompt += f"User: {msg}\n\nAssistant: {response}{tokenizer.eos_token}\n\n" prompt += f"User: {user_message}\n\nAssistant:" return prompt def generate_response( user_message: str, system_prompt: str, history: list, max_new_tokens: int, temperature: float, top_p: float ) -> tuple: """Generate response from the model""" if not MODEL_LOADED: return "❌ မော်ဒယ် မပါ။ ပြန်လည်ကြိုးစားပါ။", history try: # Format prompt prompt = format_prompt(user_message, system_prompt, history) # Tokenize inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Use CPU if no CUDA if torch.cuda.is_available(): inputs = {k: v.to(model.device) for k, v in inputs.items()} # Generate generation_config = GenerationConfig( max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, ) with torch.no_grad(): outputs = model.generate( **inputs, generation_config=generation_config ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response response = response.split("Assistant:")[-1].strip() # Update history history.append((user_message, response)) return response, history except Exception as e: print(f"Generation error: {e}") return f"❌ အမှားဖြစ်ပါ။: {str(e)}", history def clear_history(): """Clear chat history""" return [], "" # ==================== CUSTOM CSS ==================== CUSTOM_CSS = """ /* Premium Dark Theme */ :root { --primary: #10a37f; --secondary: #1a1a1a; --accent: #2d2d2d; --text-primary: #ffffff; --text-secondary: #a0a0a0; --user-bubble: #10a37f; --bot-bubble: #2d2d2d; --border-color: #404040; } /* Light Theme Overrides */ .light { --secondary: #ffffff; --accent: #f5f5f5; --text-primary: #1a1a1a; --text-secondary: #666666; --bot-bubble: #f0f0f0; --border-color: #e0e0e0; } /* Main Container */ .gradio-container { max-width: 1200px !important; margin: auto !important; } /* Header */ .header-section { text-align: center; padding: 20px; background: linear-gradient(135deg, #1a1a1a 0%, #2d2d2d 100%); border-radius: 16px; margin-bottom: 20px; } .header-title { font-size: 28px; font-weight: 700; background: linear-gradient(90deg, #10a37f, #00d4aa); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 8px; } .header-subtitle { color: #a0a0a0; font-size: 14px; } /* Chat Interface */ .chat-container { border-radius: 16px; overflow: hidden; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3); } .chat-message { padding: 12px 16px; margin: 8px 0; border-radius: 12px; max-width: 85%; line-height: 1.6; } .chat-message.user { background: linear-gradient(135deg, #10a37f, #0d8a66); color: white; margin-left: auto; border-bottom-right-radius: 4px; } .chat-message.bot { background: var(--bot-bubble); color: var(--text-primary); border-bottom-left-radius: 4px; } /* Code Blocks */ pre { background: #1e1e1e !important; border-radius: 8px; padding: 12px !important; margin: 12px 0 !important; overflow-x: auto; } code { font-family: 'Fira Code', 'Consolas', monospace; font-size: 13px; } /* Input Area */ .input-container { background: var(--accent); border-radius: 12px; padding: 12px; border: 1px solid var(--border-color); } .input-container:focus-within { border-color: #10a37f; box-shadow: 0 0 0 2px rgba(16, 163, 127, 0.2); } /* Buttons */ .btn-primary { background: linear-gradient(135deg, #10a37f, #0d8a66) !important; border: none !important; border-radius: 8px !important; padding: 10px 20px !important; font-weight: 600 !important; transition: all 0.3s ease !important; } .btn-primary:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(16, 163, 127, 0.4); } /* Example Buttons */ .example-btn { background: var(--accent) !important; border: 1px solid var(--border-color) !important; border-radius: 8px !important; padding: 8px 16px !important; transition: all 0.3s ease !important; } .example-btn:hover { background: #10a37f !important; color: white !important; border-color: #10a37f !important; } /* Sliders */ .slider-container label { color: var(--text-primary); font-weight: 500; } .slider-container .slider-value { color: #10a37f; font-weight: 600; } /* Dropdown */ .dropdown-container select { background: var(--accent); border: 1px solid var(--border-color); border-radius: 8px; padding: 8px 12px; color: var(--text-primary); } /* Loading Animation */ .loading-spinner { display: flex; justify-content: center; align-items: center; padding: 20px; } .loading-spinner::after { content: ''; width: 40px; height: 40px; border: 3px solid var(--border-color); border-top-color: #10a37f; border-radius: 50%; animation: spin 1s linear infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Footer */ .footer { text-align: center; padding: 20px; color: #a0a0a0; font-size: 12px; border-top: 1px solid var(--border-color); margin-top: 20px; } .footer a { color: #10a37f; text-decoration: none; } .footer a:hover { text-decoration: underline; } /* Dark/Light Toggle */ .theme-toggle { display: flex; align-items: center; gap: 8px; padding: 8px 12px; background: var(--accent); border-radius: 20px; cursor: pointer; } /* Animations */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .fade-in { animation: fadeIn 0.3s ease-out; } /* Scrollbar */ ::-webkit-scrollbar { width: 8px; height: 8px; } ::-webkit-scrollbar-track { background: var(--secondary); } ::-webkit-scrollbar-thumb { background: var(--border-color); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: #10a37f; } """ # ==================== GRADIO APP ==================== # Simple version for testing with gr.Blocks(title="Myanmar LLM") as app: # State for theme theme_state = gr.State(value="dark") # Header gr.HTML("""