Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| """llama3-chatbot.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/135s6JfFHtKhcOcp7xB3b6v6FOYLenQEM | |
| """ | |
| import transformers | |
| import torch | |
| model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model_id, | |
| model_kwargs={ | |
| "torch_dtype": torch.float16, | |
| "quantization_config": {"load_in_4bit": True}, | |
| "low_cpu_mem_usage": True, | |
| }, | |
| ) | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant!"}, | |
| {"role": "user", "content": """Hey how are you doing today?"""}, | |
| ] | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| print(outputs[0]["generated_text"][len(prompt):]) | |
| import gradio as gr | |
| messages = [] | |
| def add_text(history, text): | |
| global messages | |
| history = history + [(text,'')] | |
| messages = messages + [{"role":'user', 'content': text}] | |
| return history, text | |
| def generate(history): | |
| global messages | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| response_msg = outputs[0]["generated_text"][len(prompt):] | |
| for char in response_msg: | |
| history[-1][1] += char | |
| yield history | |
| pass | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot(value=[], elem_id="chatbot") | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| ) | |
| txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| generate, inputs =[chatbot,],outputs = chatbot,) | |
| demo.queue() | |
| demo.launch(debug=True) | |