Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceApi | |
| # Replace 'YOUR_HUGGINGFACE_TOKEN' with your actual Hugging Face API token. | |
| # You can generate one at https://huggingface.co/settings/tokens with "Inference" permission. | |
| HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN" | |
| # Initialize the Hugging Face Inference API client for Meta-Llama-3-8B. | |
| # We assume the model is accessible via the standard inference API (no special endpoint needed). | |
| inference = InferenceApi(repo_id="meta-llama/Meta-Llama-3-8B", token=HF_TOKEN) | |
| def generate_response(message, history): | |
| """ | |
| Generates a response to the latest user message using Meta-Llama-3-8B via Hugging Face Inference API. | |
| Args: | |
| message (str): The latest user message. | |
| history (list of dict): The conversation history as a list of {"role": ..., "content": ...} dicts. | |
| Roles are 'user' or 'assistant'. | |
| Returns: | |
| str: The assistant's response. | |
| """ | |
| # Build the prompt by concatenating the conversation history | |
| # in a simple "User: ... Assistant: ..." format for context. | |
| prompt = "" | |
| for turn in history: | |
| role = turn.get("role", "").lower() | |
| content = turn.get("content", "") | |
| if role == "assistant": | |
| prompt += f"Assistant: {content}\n" | |
| else: | |
| # Treat any non-assistant role as user | |
| prompt += f"User: {content}\n" | |
| # Append the latest user message | |
| prompt += f"User: {message}\nAssistant:" | |
| # Call the inference API with the prompt. | |
| # You can adjust parameters like max_new_tokens, temperature, top_p, etc., if desired. | |
| result = inference(inputs=prompt, parameters={"max_new_tokens": 150}) | |
| # The API may return a list of results or a single dict. | |
| if isinstance(result, list): | |
| generated = result[0].get("generated_text", "") | |
| else: | |
| generated = result.get("generated_text", "") | |
| # The model may echo the prompt; remove any prompt prefix if necessary. | |
| # Since we added "Assistant:" at the end, the generated text should start after that. | |
| # We strip leading/trailing whitespace for cleanliness. | |
| reply = generated.strip() | |
| return reply | |
| # Create the Gradio Chat interface. | |
| # `type="messages"` specifies using OpenAI-style message dicts for history. | |
| chatbot = gr.ChatInterface(fn=generate_response, type="messages", title="Meta-Llama-3-8B Chatbot") | |
| chatbot.launch() | |