Spaces:

Rights4AI
/

quick-hub-957

Build error

App Files Files Community

quick-hub-957 / app.py

Rights4AI

Deploy Gradio app with multiple files

ca9b9a8 verified 6 months ago

raw

history blame contribute delete

14 kB

	import gradio as gr
	import os
	from pathlib import Path
	from typing import List, Dict, Optional, Generator
	import json
	import time

	from models import ModelManager
	from utils import get_available_models, format_chat_history, parse_model_info

	class ChatbotApp:
	def __init__(self):
	self.model_manager = ModelManager()
	self.current_model = None
	self.chat_history = []
	self.system_prompt = "You are a helpful assistant."

	def load_model(self, model_path: str, context_size: int = 2048, gpu_layers: int = 0) -> str:
	"""Load a GGUF model"""
	try:
	if not model_path or not os.path.exists(model_path):
	return "❌ Please select a valid model file"

	success = self.model_manager.load_model(
	model_path=model_path,
	context_size=context_size,
	gpu_layers=gpu_layers
	)

	if success:
	self.current_model = Path(model_path).name
	return f"✅ Successfully loaded: {self.current_model}"
	else:
	return "❌ Failed to load model"

	except Exception as e:
	return f"❌ Error loading model: {str(e)}"

	def unload_model(self) -> str:
	"""Unload the current model"""
	self.model_manager.unload_model()
	self.current_model = None
	return "✅ Model unloaded"

	def chat_response(
	self,
	message: str,
	history: List[List[str]],
	temperature: float,
	max_tokens: int,
	top_p: float,
	repeat_penalty: float
	) -> Generator[str, None, None]:
	"""Generate response from the model"""
	if not self.model_manager.is_loaded():
	yield "❌ No model loaded. Please load a model first."
	return

	try:
	# Format chat history
	formatted_history = format_chat_history(history, self.system_prompt)

	# Generate response
	response_text = ""
	for chunk in self.model_manager.generate(
	prompt=formatted_history + message,
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=top_p,
	repeat_penalty=repeat_penalty
	):
	response_text += chunk
	yield response_text

	except Exception as e:
	yield f"❌ Error generating response: {str(e)}"

	def clear_chat(self) -> List[List[str]]:
	"""Clear chat history"""
	self.chat_history = []
	return []

	def get_model_info(self) -> str:
	"""Get information about the loaded model"""
	if not self.current_model:
	return "No model loaded"

	try:
	model_info = self.model_manager.get_model_info()
	if model_info:
	return json.dumps(model_info, indent=2)
	return "Model info not available"
	except Exception as e:
	return f"Error getting model info: {str(e)}"

	def create_interface():
	"""Create the Gradio interface"""
	app = ChatbotApp()

	with gr.Blocks(theme=gr.themes.Soft(), title="Local GGUF Chatbot") as demo:
	gr.Markdown("""
	# 🤖 Local GGUF Chatbot
	Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)

	Chat with local GGUF models using llama.cpp. Load your models and start chatting!
	""")

	with gr.Tabs():
	# Chat Tab
	with gr.Tab("💬 Chat"):
	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Chat",
	height=500,
	show_copy_button=True,
	type="messages"
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Type your message here...",
	scale=4
	)
	send_btn = gr.Button("Send", scale=1)
	clear_btn = gr.Button("Clear", scale=1)

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Generation Parameters")

	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	max_tokens = gr.Slider(
	minimum=1,
	maximum=4096,
	value=512,
	step=1,
	label="Max Tokens"
	)

	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top P"
	)

	repeat_penalty = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=1.1,
	step=0.05,
	label="Repeat Penalty"
	)

	system_prompt = gr.Textbox(
	label="System Prompt",
	value="You are a helpful assistant.",
	lines=3
	)

	model_status = gr.Textbox(
	label="Model Status",
	value="No model loaded",
	interactive=False
	)

	# Model Management Tab
	with gr.Tab("📁 Models"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Load Model")

	model_file = gr.File(
	label="Select GGUF Model",
	file_types=[".gguf"],
	file_count="single"
	)

	with gr.Row():
	context_size = gr.Slider(
	minimum=512,
	maximum=8192,
	value=2048,
	step=512,
	label="Context Size"
	)

	gpu_layers = gr.Slider(
	minimum=0,
	maximum=99,
	value=0,
	step=1,
	label="GPU Layers"
	)

	load_btn = gr.Button("Load Model", variant="primary")
	unload_btn = gr.Button("Unload Model")

	load_status = gr.Textbox(
	label="Load Status",
	interactive=False
	)

	with gr.Column():
	gr.Markdown("### Available Models")

	available_models = gr.JSON(
	label="Models Directory",
	value=get_available_models()
	)

	model_info = gr.JSON(
	label="Model Information",
	visible=False
	)

	refresh_btn = gr.Button("Refresh Models")

	# Settings Tab
	with gr.Tab("⚙️ Settings"):
	gr.Markdown("### Application Settings")

	with gr.Row():
	with gr.Column():
	models_dir = gr.Textbox(
	label="Models Directory",
	value="./models",
	placeholder="Path to models directory"
	)

	save_chat = gr.Checkbox(
	label="Save Chat History",
	value=True
	)

	chat_format = gr.Dropdown(
	label="Chat Format",
	choices=["chatml", "llama2", "alpaca", "vicuna"],
	value="chatml"
	)

	with gr.Column():
	gr.Markdown("### Model Directory Info")
	dir_info = gr.JSON(label="Directory Info")

	update_dir_btn = gr.Button("Update Directory")

	# Event handlers
	def update_system_prompt(prompt):
	app.system_prompt = prompt
	return prompt

	system_prompt.change(update_system_prompt, system_prompt)

	# Chat functionality
	def user_message(user_input, history):
	if not user_input.strip():
	return "", history

	history.append({"role": "user", "content": user_input})
	return "", history

	def bot_response(history, temp, max_tok, top_p_val, repeat_pen):
	if not history:
	return history

	last_message = history[-1]["content"] if history else ""

	# Add assistant message placeholder
	history.append({"role": "assistant", "content": ""})

	# Generate response
	response = ""
	for chunk in app.chat_response(
	last_message,
	[{"role": h["role"], "content": h["content"]} for h in history[:-2]],
	temp,
	max_tok,
	top_p_val,
	repeat_pen
	):
	history[-1]["content"] = chunk
	yield history

	msg.submit(
	user_message,
	[msg, chatbot],
	[msg, chatbot]
	).then(
	bot_response,
	[chatbot, temperature, max_tokens, top_p, repeat_penalty],
	chatbot
	)

	send_btn.click(
	user_message,
	[msg, chatbot],
	[msg, chatbot]
	).then(
	bot_response,
	[chatbot, temperature, max_tokens, top_p, repeat_penalty],
	chatbot
	)

	clear_btn.click(app.clear_chat, outputs=chatbot)

	# Model loading
	def handle_model_load(file_obj, ctx_size, gpu_layers):
	if file_obj is None:
	return "❌ Please select a model file"

	status = app.load_model(file_obj.name, ctx_size, gpu_layers)

	# Update model status
	if app.current_model:
	model_status_text = f"✅ Loaded: {app.current_model}"
	else:
	model_status_text = "No model loaded"

	return status, model_status_text

	load_btn.click(
	handle_model_load,
	[model_file, context_size, gpu_layers],
	[load_status, model_status]
	)

	unload_btn.click(
	app.unload_model,
	outputs=[load_status, model_status]
	)

	# Refresh models
	refresh_btn.click(
	lambda: get_available_models(),
	outputs=available_models
	)

	# Update model info when model is loaded
	def update_model_info():
	if app.current_model:
	try:
	info = app.model_manager.get_model_info()
	return info
	except:
	return None
	return None

	demo.load(
	update_model_info,
	outputs=[model_info]
	)

	# Update directory info
	def update_directory_info(directory):
	try:
	path = Path(directory)
	if path.exists():
	return {
	"exists": True,
	"is_directory": path.is_dir(),
	"file_count": len(list(path.glob("*.gguf"))),
	"size_mb": sum(f.stat().st_size for f in path.glob(".gguf")) / (1024 1024)
	}
	else:
	return {"exists": False}
	except Exception as e:
	return {"error": str(e)}

	update_dir_btn.click(
	update_directory_info,
	models_dir,
	dir_info
	)

	return demo

	if __name__ == "__main__":
	# Create models directory if it doesn't exist
	os.makedirs("./models", exist_ok=True)

	# Create and launch the interface
	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_api=True
	)