Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import os | |
| # Access the Hugging Face token from the environment variable | |
| HF_TOKEN = os.getenv("HF_Token") | |
| from huggingface_hub import login | |
| # Log in with token | |
| login(token=os.getenv("HF_Token")) | |
| # Load the LLaMA 3.2 1B Instruct model and tokenizer | |
| model_name = "meta-llama/Llama-3.2-1B-Instruct" # Replace with actual Hugging Face model name | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) | |
| # Gradio app | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot(type="messages") | |
| msg = gr.Textbox(label="Your Message", placeholder="Type your message here...") | |
| clear = gr.ClearButton([msg, chatbot]) | |
| def respond(message, chat_history): | |
| # Add user message to chat history | |
| chat_history.append({"role": "user", "content": message}) | |
| # Prepare input for the model | |
| conversation = "\n".join([f"{turn['role'].capitalize()}: {turn['content']}" for turn in chat_history]) | |
| input_ids = tokenizer(conversation, return_tensors="pt").input_ids.to(model.device) | |
| # Generate response | |
| outputs = model.generate(input_ids, max_length=200, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) | |
| bot_message = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Add bot response to chat history | |
| chat_history.append({"role": "assistant", "content": bot_message}) | |
| return "", chat_history | |
| msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch() | |