import os import gradio as gr from transformers import pipeline # Load token from environment token = os.getenv("HF_TOKEN") # Use a pipeline as a high-level helper pipe = pipeline( "text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", token=token, torch_dtype="auto", device_map="auto" ) # Inference function def generate_response(prompt): messages = [{"role": "user", "content": prompt}] response = pipe(messages, max_new_tokens=160, temperature=0.7) # Extract only the assistant's response for msg in response[0]["generated_text"]: if isinstance(msg, dict) and msg.get("role") == "assistant": return msg.get("content") return "No assistant response found." # Gradio interface gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=4, label="Prompt"), outputs=gr.Textbox(label="Generated Response"), title="Meta LLaMA 3 8B Instruct", description="Gradio demo for Meta-Llama-3-8B-Instruct using Hugging Face Transformers pipeline" ).launch()