Spaces:
Paused
Paused
| import os | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Load token from environment | |
| token = os.getenv("HF_TOKEN") | |
| # Use a pipeline as a high-level helper | |
| pipe = pipeline( | |
| "text-generation", | |
| model="meta-llama/Meta-Llama-3-8B-Instruct", | |
| token=token, | |
| torch_dtype="auto", | |
| device_map="auto" | |
| ) | |
| # Inference function | |
| def generate_response(prompt): | |
| messages = [{"role": "user", "content": prompt}] | |
| response = pipe(messages, max_new_tokens=160, temperature=0.7) | |
| # Extract only the assistant's response | |
| for msg in response[0]["generated_text"]: | |
| if isinstance(msg, dict) and msg.get("role") == "assistant": | |
| return msg.get("content") | |
| return "No assistant response found." | |
| # Gradio interface | |
| gr.Interface( | |
| fn=generate_response, | |
| inputs=gr.Textbox(lines=4, label="Prompt"), | |
| outputs=gr.Textbox(label="Generated Response"), | |
| title="Meta LLaMA 3 8B Instruct", | |
| description="Gradio demo for Meta-Llama-3-8B-Instruct using Hugging Face Transformers pipeline" | |
| ).launch() | |