future-html

Sleeping

App Files Files Community

future-html / gemmademo /_chat.py

aadya1762

add blogpost link

23cd50b 11 months ago

raw

history blame contribute delete

9.95 kB

	import gradio as gr
	from ._model import LlamaCppGemmaModel
	from ._prompts import PromptManager


	class GradioChat:
	"""
	A class that handles the chat interface for the Gemma model.

	Features:
	- A Gradio-based chatbot UI.
	- Dynamically loads models based on user selection.
	- Dynamically updates tasks using PromptManager.
	- Uses Gemma (llama.cpp) for generating responses.
	"""

	def __init__(self, model_options: list[str], task_options: list[str]):
	self.model_options = model_options
	self.task_options = task_options

	self.current_model_name = "gemma-3b"
	self.current_task_name = "Question Answering"
	self.prompt_manager = self._load_task(self.current_task_name)

	# Cache.
	self.models_cache = {}

	self.model = self._load_model("gemma-3b")

	def _load_model(self, model_name: str):
	"""Loads the model dynamically when switching models, with caching."""
	if model_name in self.models_cache:
	return self.models_cache[model_name]

	model = LlamaCppGemmaModel(name=model_name).load_model(
	system_prompt=self.prompt_manager.get_system_prompt()
	)
	self.models_cache[model_name] = model
	self.current_model_name = model_name
	return model

	def _load_task(self, task_name: str):
	"""Loads the task dynamically when switching tasks."""
	self.current_task_name = task_name
	return PromptManager(task=task_name)

	def _chat(self):
	def chat_fn(message, history, selected_model, selected_task):
	# Lazy load model on first use
	if self.model is None:
	self.model = self._load_model(self.current_model_name)

	# Reload model if changed, using cache when possible
	if selected_model != self.current_model_name:
	self.model = self._load_model(selected_model)
	# Clear message history when model changes
	self.model.messages = []

	# Reload task if changed
	if selected_task != self.current_task_name:
	self.prompt_manager = self._load_task(selected_task)
	# Clear message history when task changes
	if self.model:
	self.model.messages = []
	self.model.messages = [
	{
	"role": "system",
	"content": self.prompt_manager.get_system_prompt(),
	}
	]

	# Generate response using updated model & prompt manager
	prompt = self.prompt_manager.get_prompt(user_input=message)
	response_stream = self.model.generate_response(prompt)
	yield from response_stream

	def _get_examples(task):
	# Examples for each task type
	examples = {
	"Question Answering": [
	"What is quantum computing?",
	"How do neural networks work?",
	"Explain climate change in simple terms.",
	],
	"Text Generation": [
	"Once upon a time in a distant galaxy...",
	"The abandoned house at the end of the street had...",
	"In the year 2150, humanity discovered...",
	],
	"Code Completion": [
	"def fibonacci(n):",
	"class BinarySearchInAList:",
	"async def fetch_data(url):",
	],
	}
	return examples.get(task)

	def _update_examples(task):
	"""Updates the examples based on the selected task."""
	examples = _get_examples(task)
	return gr.Dataset(samples=[[example] for example in examples])

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column(scale=3): # Sidebar column
	with gr.Accordion(
	"Basic Settings ⚙️", open=False
	): # Make the sidebar foldable
	gr.Markdown(
	"## Google Gemma Models: lightweight, state-of-the-art open models from Google"
	)
	task_dropdown = gr.Dropdown(
	choices=self.task_options,
	value=self.current_task_name,
	label="Select Task",
	)
	model_dropdown = gr.Dropdown(
	choices=self.model_options,
	value=self.current_model_name,
	label="Select Gemma Model",
	)
	chat_interface = gr.ChatInterface(
	chat_fn,
	additional_inputs=[model_dropdown, task_dropdown],
	textbox=gr.Textbox(
	placeholder="Ask me something...", container=False
	),
	)
	gr.Markdown(
	"Medium Blog Post: [Gemma Chat Interface Blog](https://medium.com/@aadyachinubhai/introducing-the-gemma-chat-interface-your-ai-powered-chat-companion-a77fc609e51a)"
	)

	with gr.Column(scale=1):
	with gr.Accordion("Important Pointers", open=False):
	gr.Markdown(
	"""
	## Pointers

	- First response after model change will be slower (model loading lazily).
	- Switching models clears chat history.
	- Larger models need more memory but give better results.
	"""
	)
	examples_list = gr.Examples(
	examples=[
	[example]
	for example in _get_examples(self.current_task_name)
	],
	inputs=chat_interface.textbox,
	)
	task_dropdown.change(
	_update_examples, task_dropdown, examples_list.dataset
	)

	with gr.Accordion("Model Configuration ⚙️", open=False):
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=2,
	value=self.model.temperature,
	label="Temperature",
	)
	gr.Markdown(
	"Temperature: Lower values make the output more deterministic."
	)
	temperature_slider.change(
	fn=lambda temp: setattr(
	self.model, "temperature", temp
	),
	inputs=temperature_slider,
	)

	top_p_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=self.model.top_p,
	label="Top P",
	)
	gr.Markdown(
	"Top P: Lower values make the output more focused."
	)
	top_p_slider.change(
	fn=lambda top_p: setattr(self.model, "top_p", top_p),
	inputs=top_p_slider,
	)

	top_k_slider = gr.Slider(
	minimum=1,
	maximum=100,
	value=self.model.top_k,
	label="Top K",
	)
	gr.Markdown(
	"Top K: Lower values make the output more focused."
	)
	top_k_slider.change(
	fn=lambda top_k: setattr(self.model, "top_k", top_k),
	inputs=top_k_slider,
	)

	repetition_penalty_slider = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=self.model.repeat_penalty,
	label="Repetition Penalty",
	)
	gr.Markdown(
	"Repetition Penalty: Penalizes repeated tokens to reduce repetition in the output."
	)
	repetition_penalty_slider.change(
	fn=lambda penalty: setattr(
	self.model, "repeat_penalty", penalty
	),
	inputs=repetition_penalty_slider,
	)

	max_tokens_slider = gr.Slider(
	minimum=512,
	maximum=2048,
	value=self.model.max_tokens,
	label="Max Tokens",
	)
	gr.Markdown(
	"Max Tokens: Sets the maximum number of tokens the model can generate in one response."
	)
	max_tokens_slider.change(
	fn=lambda max_tokens: setattr(
	self.model, "max_tokens", max_tokens
	),
	inputs=max_tokens_slider,
	)

	demo.launch()

	def run(self):
	self._chat()