import gradio as gr from huggingface_hub import InferenceApi # Replace 'YOUR_HUGGINGFACE_TOKEN' with your actual Hugging Face API token. # You can generate one at https://huggingface.co/settings/tokens with "Inference" permission. HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN" # Initialize the Hugging Face Inference API client for Meta-Llama-3-8B. # We assume the model is accessible via the standard inference API (no special endpoint needed). inference = InferenceApi(repo_id="meta-llama/Meta-Llama-3-8B", token=HF_TOKEN) def generate_response(message, history): """ Generates a response to the latest user message using Meta-Llama-3-8B via Hugging Face Inference API. Args: message (str): The latest user message. history (list of dict): The conversation history as a list of {"role": ..., "content": ...} dicts. Roles are 'user' or 'assistant'. Returns: str: The assistant's response. """ # Build the prompt by concatenating the conversation history # in a simple "User: ... Assistant: ..." format for context. prompt = "" for turn in history: role = turn.get("role", "").lower() content = turn.get("content", "") if role == "assistant": prompt += f"Assistant: {content}\n" else: # Treat any non-assistant role as user prompt += f"User: {content}\n" # Append the latest user message prompt += f"User: {message}\nAssistant:" # Call the inference API with the prompt. # You can adjust parameters like max_new_tokens, temperature, top_p, etc., if desired. result = inference(inputs=prompt, parameters={"max_new_tokens": 150}) # The API may return a list of results or a single dict. if isinstance(result, list): generated = result[0].get("generated_text", "") else: generated = result.get("generated_text", "") # The model may echo the prompt; remove any prompt prefix if necessary. # Since we added "Assistant:" at the end, the generated text should start after that. # We strip leading/trailing whitespace for cleanliness. reply = generated.strip() return reply # Create the Gradio Chat interface. # `type="messages"` specifies using OpenAI-style message dicts for history. chatbot = gr.ChatInterface(fn=generate_response, type="messages", title="Meta-Llama-3-8B Chatbot") chatbot.launch()