import os from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # Load your environment variables hf_api_token = os.getenv("HF_API_TOKEN") # Ensure you have access to the model and are authenticated model_name = "meta-llama/Meta-Llama-3-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_api_token) model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_api_token) def chatbot(input_text): inputs = tokenizer.encode(input_text, return_tensors="pt") outputs = model.generate(inputs, max_length=500) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create Gradio interface iface = gr.Interface(fn=chatbot, inputs="text", outputs="text", title="LLaMA 3 Chatbot") # Use the Gradio queue to handle multiple requests iface.queue().launch()