File size: 1,042 Bytes
ffc6533
56634b9
 
ffc6533
56634b9
 
07d2eee
56634b9
 
 
 
 
 
07d2eee
 
 
 
56634b9
 
c7cfffe
a81c7b2
 
 
 
 
 
07d2eee
 
 
 
56634b9
07d2eee
56634b9
 
07d2eee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import gradio as gr
from transformers import pipeline

# Load token from environment
token = os.getenv("HF_TOKEN")

# Use a pipeline as a high-level helper
pipe = pipeline(
    "text-generation",
    model="meta-llama/Meta-Llama-3-8B-Instruct",
    token=token,
    torch_dtype="auto",
    device_map="auto"
)

# Inference function
def generate_response(prompt):
    messages = [{"role": "user", "content": prompt}]
    response = pipe(messages, max_new_tokens=160, temperature=0.7)
    # Extract only the assistant's response
    for msg in response[0]["generated_text"]:
        if isinstance(msg, dict) and msg.get("role") == "assistant":
            return msg.get("content")
    return "No assistant response found."


# Gradio interface
gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=4, label="Prompt"),
    outputs=gr.Textbox(label="Generated Response"),
    title="Meta LLaMA 3 8B Instruct",
    description="Gradio demo for Meta-Llama-3-8B-Instruct using Hugging Face Transformers pipeline"
).launch()