| |
| """ |
| AmkyawDev-LLM-V3 Gradio Web UI |
| Burmese Language Model Chat Interface |
| """ |
|
|
| import os |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoPeftModel |
| from peft import PeftModel, PeftConfig |
| import gradio as gr |
| from threading import Thread |
|
|
|
|
| |
| BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct" |
| ADAPTER_PATH = "./model/adapter" |
|
|
| |
| def load_model(): |
| """Load the fine-tuned model with LoRA adapters.""" |
| |
| print("Loading tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained( |
| BASE_MODEL, |
| trust_remote_code=True |
| ) |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| print("Loading base model...") |
| base_model = AutoModelForCausalLM.from_pretrained( |
| BASE_MODEL, |
| trust_remote_code=True, |
| torch_dtype=torch.float16, |
| device_map="auto", |
| ) |
| |
| |
| if os.path.exists(ADAPTER_PATH) and os.listdir(ADAPTER_PATH): |
| print("Loading LoRA adapter...") |
| model = PeftModel.from_pretrained( |
| base_model, |
| ADAPTER_PATH, |
| torch_dtype=torch.float16, |
| ) |
| else: |
| print("No adapter found, using base model.") |
| model = base_model |
| |
| model.eval() |
| |
| return model, tokenizer |
|
|
|
|
| |
| print("Initializing model... This may take a few minutes.") |
| try: |
| model, tokenizer = load_model() |
| print("Model loaded successfully!") |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| print("Running in demo mode with mock responses.") |
| model = None |
| tokenizer = None |
|
|
|
|
| def generate_response(prompt, system_prompt=None, temperature=0.7, max_tokens=512): |
| """Generate response from the model.""" |
| |
| if model is None: |
| |
| return "π α€αααΊααΎα¬ demo mode ααΌα
αΊαα«αααΊα αα±α¬αΊαααΊαα«ααΊααΊααα«αα²α·α‘αα½ααΊα
ααΊαΈαααΊααΌα±ααα―αα«αααΊα" |
| |
| |
| if system_prompt: |
| full_prompt = f"System: {system_prompt}\n\nUser: {prompt}\nAssistant:" |
| else: |
| full_prompt = f"User: {prompt}\nAssistant:" |
| |
| |
| inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) |
| |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| temperature=temperature, |
| max_new_tokens=max_tokens, |
| do_sample=True, |
| top_p=0.9, |
| repetition_penalty=1.1, |
| ) |
| |
| |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| |
| if "Assistant:" in response: |
| response = response.split("Assistant:")[-1].strip() |
| |
| return response |
|
|
|
|
| def chat(message, history, system_prompt, temperature, max_tokens): |
| """Chat function for Gradio.""" |
| |
| response = generate_response( |
| message, |
| system_prompt=system_prompt, |
| temperature=temperature, |
| max_tokens=max_tokens |
| ) |
| |
| return response |
|
|
|
|
| |
| def create_ui(): |
| """Create the Gradio web UI.""" |
| |
| with gr.Blocks( |
| title="AmkyawDev-LLM-V3", |
| theme=gr.themes.Soft(), |
| css=""" |
| .gradio-container {max-width: 1200px !important;} |
| .main {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);} |
| """ |
| ) as demo: |
| |
| gr.Markdown(""" |
| # π²π² AmkyawDev-LLM-V3 |
| ### Burmese Language Model Chat Interface |
| |
| α€αααΊααΎα¬ ααΌααΊαα¬αα¬αα¬α
αα¬αΈ Large Language Model ααΌα
αΊαα«αααΊα |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=3): |
| chatbot = gr.Chatbot( |
| height=500, |
| show_copy_button=True, |
| bubble_full_width=False, |
| ) |
| |
| with gr.Row(): |
| msg = gr.Textbox( |
| label="Message", |
| placeholder="αα±αΈαα½ααΊαΈααα―ααΊαα«αααΊ...", |
| lines=3, |
| container=True, |
| ) |
| |
| with gr.Row(): |
| submit_btn = gr.Button("π€ ααα―α·αααΊ", variant="primary") |
| clear_btn = gr.Button("ποΈ ααΎααΊαΈαααΊ", variant="secondary") |
| |
| with gr.Column(scale=1): |
| gr.Markdown("### βοΈ Settings") |
| |
| system_prompt = gr.Textbox( |
| label="System Prompt", |
| value="You are a helpful Burmese language assistant.", |
| lines=3, |
| ) |
| |
| temperature = gr.Slider( |
| label="Temperature", |
| minimum=0.1, |
| maximum=1.5, |
| value=0.7, |
| step=0.1, |
| ) |
| |
| max_tokens = gr.Slider( |
| label="Max Tokens", |
| minimum=64, |
| maximum=2048, |
| value=512, |
| step=64, |
| ) |
| |
| |
| def respond(message, history, system_prompt, temperature, max_tokens): |
| response = generate_response( |
| message, |
| system_prompt=system_prompt, |
| temperature=temperature, |
| max_tokens=max_tokens |
| ) |
| history.append((message, response)) |
| return "", history |
| |
| submit_btn.click( |
| respond, |
| inputs=[msg, chatbot, system_prompt, temperature, max_tokens], |
| outputs=[msg, chatbot], |
| ) |
| |
| msg.submit( |
| respond, |
| inputs=[msg, chatbot, system_prompt, temperature, max_tokens], |
| outputs=[msg, chatbot], |
| ) |
| |
| clear_btn.click(lambda: (None, [])), outputs=[msg, chatbot]) |
| |
| gr.Markdown(""" |
| --- |
| ### π Notes |
| - αα±α¬αΊαααΊααα«αα«α demo mode ααΌα
αΊααα«αααΊα |
| - LoRA weights αα«αα»ααΊαΈααα―ααΊαΈαα«αααΊα |
| """) |
| |
| return demo |
|
|
|
|
| |
| if __name__ == "__main__": |
| print("Starting AmkyawDev-LLM-V3 Web UI...") |
| demo = create_ui() |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| share=False, |
| ) |