Fans_Chat_Bot / bot.py
SakshamSna's picture
bot
8f0ee7b
import gradio as gr
from huggingface_hub import InferenceApi
# Replace 'YOUR_HUGGINGFACE_TOKEN' with your actual Hugging Face API token.
# You can generate one at https://huggingface.co/settings/tokens with "Inference" permission.
HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"
# Initialize the Hugging Face Inference API client for Meta-Llama-3-8B.
# We assume the model is accessible via the standard inference API (no special endpoint needed).
inference = InferenceApi(repo_id="meta-llama/Meta-Llama-3-8B", token=HF_TOKEN)
def generate_response(message, history):
"""
Generates a response to the latest user message using Meta-Llama-3-8B via Hugging Face Inference API.
Args:
message (str): The latest user message.
history (list of dict): The conversation history as a list of {"role": ..., "content": ...} dicts.
Roles are 'user' or 'assistant'.
Returns:
str: The assistant's response.
"""
# Build the prompt by concatenating the conversation history
# in a simple "User: ... Assistant: ..." format for context.
prompt = ""
for turn in history:
role = turn.get("role", "").lower()
content = turn.get("content", "")
if role == "assistant":
prompt += f"Assistant: {content}\n"
else:
# Treat any non-assistant role as user
prompt += f"User: {content}\n"
# Append the latest user message
prompt += f"User: {message}\nAssistant:"
# Call the inference API with the prompt.
# You can adjust parameters like max_new_tokens, temperature, top_p, etc., if desired.
result = inference(inputs=prompt, parameters={"max_new_tokens": 150})
# The API may return a list of results or a single dict.
if isinstance(result, list):
generated = result[0].get("generated_text", "")
else:
generated = result.get("generated_text", "")
# The model may echo the prompt; remove any prompt prefix if necessary.
# Since we added "Assistant:" at the end, the generated text should start after that.
# We strip leading/trailing whitespace for cleanliness.
reply = generated.strip()
return reply
# Create the Gradio Chat interface.
# `type="messages"` specifies using OpenAI-style message dicts for history.
chatbot = gr.ChatInterface(fn=generate_response, type="messages", title="Meta-Llama-3-8B Chatbot")
chatbot.launch()