Spaces:

SakshamSna
/

Fans_Chat_Bot

Sleeping

App Files Files Community

Fans_Chat_Bot / bot.py

SakshamSna

bot

8f0ee7b 12 months ago

raw

history blame contribute delete

2.42 kB

	import gradio as gr
	from huggingface_hub import InferenceApi

	# Replace 'YOUR_HUGGINGFACE_TOKEN' with your actual Hugging Face API token.
	# You can generate one at https://huggingface.co/settings/tokens with "Inference" permission.
	HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"

	# Initialize the Hugging Face Inference API client for Meta-Llama-3-8B.
	# We assume the model is accessible via the standard inference API (no special endpoint needed).
	inference = InferenceApi(repo_id="meta-llama/Meta-Llama-3-8B", token=HF_TOKEN)

	def generate_response(message, history):
	"""
	Generates a response to the latest user message using Meta-Llama-3-8B via Hugging Face Inference API.

	Args:
	message (str): The latest user message.
	history (list of dict): The conversation history as a list of {"role": ..., "content": ...} dicts.
	Roles are 'user' or 'assistant'.

	Returns:
	str: The assistant's response.
	"""
	# Build the prompt by concatenating the conversation history
	# in a simple "User: ... Assistant: ..." format for context.
	prompt = ""
	for turn in history:
	role = turn.get("role", "").lower()
	content = turn.get("content", "")
	if role == "assistant":
	prompt += f"Assistant: {content}\n"
	else:
	# Treat any non-assistant role as user
	prompt += f"User: {content}\n"
	# Append the latest user message
	prompt += f"User: {message}\nAssistant:"

	# Call the inference API with the prompt.
	# You can adjust parameters like max_new_tokens, temperature, top_p, etc., if desired.
	result = inference(inputs=prompt, parameters={"max_new_tokens": 150})

	# The API may return a list of results or a single dict.
	if isinstance(result, list):
	generated = result[0].get("generated_text", "")
	else:
	generated = result.get("generated_text", "")

	# The model may echo the prompt; remove any prompt prefix if necessary.
	# Since we added "Assistant:" at the end, the generated text should start after that.
	# We strip leading/trailing whitespace for cleanliness.
	reply = generated.strip()
	return reply

	# Create the Gradio Chat interface.
	# `type="messages"` specifies using OpenAI-style message dicts for history.
	chatbot = gr.ChatInterface(fn=generate_response, type="messages", title="Meta-Llama-3-8B Chatbot")
	chatbot.launch()