| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| model_id = "seonglae/yokhal-md" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu") | |
| def chatbot_response(input_text): | |
| chat_input = [{'role': 'user', 'content': f'한국어로 대답해\n{input_text}'}] | |
| prompt = tokenizer.apply_chat_template(chat_input, tokenize=False, add_generation_prompt=True) | |
| input_ids = tokenizer(prompt, return_tensors="pt", padding=True).to("cuda" if torch.cuda.is_available() else "cpu") | |
| outputs = model.generate(**input_ids, max_new_tokens=100, repetition_penalty=1.05) | |
| response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response_text | |
| iface = gr.Interface( | |
| fn=chatbot_response, | |
| inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."), | |
| outputs=gr.Textbox(), | |
| title="Korean Chatbot", | |
| description="Ask anything!" | |
| ) | |
| iface.launch() | |