| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import gradio as gr |
| import torch |
|
|
| |
| device = "cpu" |
| torch.set_num_threads(4) |
|
|
| |
| model = AutoModelForCausalLM.from_pretrained( |
| "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B", |
| torch_dtype=torch.float32, |
| low_cpu_mem_usage=True |
| ).to(device) |
|
|
| tokenizer = AutoTokenizer.from_pretrained( |
| "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B" |
| ) |
|
|
| def predict(message, history): |
| |
| chat = [ |
| {"role": "system", "content": "๊ฐ๊ฒฐํ๊ฒ ๋ต๋ณํด์ฃผ์ธ์."}, |
| {"role": "user", "content": message} |
| ] |
| |
| |
| inputs = tokenizer.apply_chat_template( |
| chat, |
| return_tensors="pt", |
| max_length=512, |
| truncation=True |
| ).to(device) |
| |
| outputs = model.generate( |
| inputs, |
| max_new_tokens=200, |
| temperature=0.3, |
| do_sample=False |
| ) |
| |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
| |
| demo = gr.ChatInterface( |
| predict, |
| title="CLOVA X (CPU ๋ชจ๋)", |
| description="CPU ์ ์ฉ ๊ฒฝ๋ํ ๋ฒ์ ", |
| theme="soft", |
| examples=["์๋
ํ์ธ์", "๋ ์จ ์๋ ค์ค"] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| favicon_path=None, |
| prevent_thread_lock=True |
| ) |