Spaces:
Paused
Paused
File size: 1,042 Bytes
ffc6533 56634b9 ffc6533 56634b9 07d2eee 56634b9 07d2eee 56634b9 c7cfffe a81c7b2 07d2eee 56634b9 07d2eee 56634b9 07d2eee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import os
import gradio as gr
from transformers import pipeline
# Load token from environment
token = os.getenv("HF_TOKEN")
# Use a pipeline as a high-level helper
pipe = pipeline(
"text-generation",
model="meta-llama/Meta-Llama-3-8B-Instruct",
token=token,
torch_dtype="auto",
device_map="auto"
)
# Inference function
def generate_response(prompt):
messages = [{"role": "user", "content": prompt}]
response = pipe(messages, max_new_tokens=160, temperature=0.7)
# Extract only the assistant's response
for msg in response[0]["generated_text"]:
if isinstance(msg, dict) and msg.get("role") == "assistant":
return msg.get("content")
return "No assistant response found."
# Gradio interface
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=4, label="Prompt"),
outputs=gr.Textbox(label="Generated Response"),
title="Meta LLaMA 3 8B Instruct",
description="Gradio demo for Meta-Llama-3-8B-Instruct using Hugging Face Transformers pipeline"
).launch()
|