quick-hub-957 / app.py
Rights4AI's picture
Deploy Gradio app with multiple files
ca9b9a8 verified
import gradio as gr
import os
from pathlib import Path
from typing import List, Dict, Optional, Generator
import json
import time
from models import ModelManager
from utils import get_available_models, format_chat_history, parse_model_info
class ChatbotApp:
def __init__(self):
self.model_manager = ModelManager()
self.current_model = None
self.chat_history = []
self.system_prompt = "You are a helpful assistant."
def load_model(self, model_path: str, context_size: int = 2048, gpu_layers: int = 0) -> str:
"""Load a GGUF model"""
try:
if not model_path or not os.path.exists(model_path):
return "❌ Please select a valid model file"
success = self.model_manager.load_model(
model_path=model_path,
context_size=context_size,
gpu_layers=gpu_layers
)
if success:
self.current_model = Path(model_path).name
return f"βœ… Successfully loaded: {self.current_model}"
else:
return "❌ Failed to load model"
except Exception as e:
return f"❌ Error loading model: {str(e)}"
def unload_model(self) -> str:
"""Unload the current model"""
self.model_manager.unload_model()
self.current_model = None
return "βœ… Model unloaded"
def chat_response(
self,
message: str,
history: List[List[str]],
temperature: float,
max_tokens: int,
top_p: float,
repeat_penalty: float
) -> Generator[str, None, None]:
"""Generate response from the model"""
if not self.model_manager.is_loaded():
yield "❌ No model loaded. Please load a model first."
return
try:
# Format chat history
formatted_history = format_chat_history(history, self.system_prompt)
# Generate response
response_text = ""
for chunk in self.model_manager.generate(
prompt=formatted_history + message,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
repeat_penalty=repeat_penalty
):
response_text += chunk
yield response_text
except Exception as e:
yield f"❌ Error generating response: {str(e)}"
def clear_chat(self) -> List[List[str]]:
"""Clear chat history"""
self.chat_history = []
return []
def get_model_info(self) -> str:
"""Get information about the loaded model"""
if not self.current_model:
return "No model loaded"
try:
model_info = self.model_manager.get_model_info()
if model_info:
return json.dumps(model_info, indent=2)
return "Model info not available"
except Exception as e:
return f"Error getting model info: {str(e)}"
def create_interface():
"""Create the Gradio interface"""
app = ChatbotApp()
with gr.Blocks(theme=gr.themes.Soft(), title="Local GGUF Chatbot") as demo:
gr.Markdown("""
# πŸ€– Local GGUF Chatbot
Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
Chat with local GGUF models using llama.cpp. Load your models and start chatting!
""")
with gr.Tabs():
# Chat Tab
with gr.Tab("πŸ’¬ Chat"):
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(
label="Chat",
height=500,
show_copy_button=True,
type="messages"
)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Type your message here...",
scale=4
)
send_btn = gr.Button("Send", scale=1)
clear_btn = gr.Button("Clear", scale=1)
with gr.Column(scale=1):
gr.Markdown("### βš™οΈ Generation Parameters")
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.7,
step=0.1,
label="Temperature"
)
max_tokens = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max Tokens"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top P"
)
repeat_penalty = gr.Slider(
minimum=1.0,
maximum=2.0,
value=1.1,
step=0.05,
label="Repeat Penalty"
)
system_prompt = gr.Textbox(
label="System Prompt",
value="You are a helpful assistant.",
lines=3
)
model_status = gr.Textbox(
label="Model Status",
value="No model loaded",
interactive=False
)
# Model Management Tab
with gr.Tab("πŸ“ Models"):
with gr.Row():
with gr.Column():
gr.Markdown("### Load Model")
model_file = gr.File(
label="Select GGUF Model",
file_types=[".gguf"],
file_count="single"
)
with gr.Row():
context_size = gr.Slider(
minimum=512,
maximum=8192,
value=2048,
step=512,
label="Context Size"
)
gpu_layers = gr.Slider(
minimum=0,
maximum=99,
value=0,
step=1,
label="GPU Layers"
)
load_btn = gr.Button("Load Model", variant="primary")
unload_btn = gr.Button("Unload Model")
load_status = gr.Textbox(
label="Load Status",
interactive=False
)
with gr.Column():
gr.Markdown("### Available Models")
available_models = gr.JSON(
label="Models Directory",
value=get_available_models()
)
model_info = gr.JSON(
label="Model Information",
visible=False
)
refresh_btn = gr.Button("Refresh Models")
# Settings Tab
with gr.Tab("βš™οΈ Settings"):
gr.Markdown("### Application Settings")
with gr.Row():
with gr.Column():
models_dir = gr.Textbox(
label="Models Directory",
value="./models",
placeholder="Path to models directory"
)
save_chat = gr.Checkbox(
label="Save Chat History",
value=True
)
chat_format = gr.Dropdown(
label="Chat Format",
choices=["chatml", "llama2", "alpaca", "vicuna"],
value="chatml"
)
with gr.Column():
gr.Markdown("### Model Directory Info")
dir_info = gr.JSON(label="Directory Info")
update_dir_btn = gr.Button("Update Directory")
# Event handlers
def update_system_prompt(prompt):
app.system_prompt = prompt
return prompt
system_prompt.change(update_system_prompt, system_prompt)
# Chat functionality
def user_message(user_input, history):
if not user_input.strip():
return "", history
history.append({"role": "user", "content": user_input})
return "", history
def bot_response(history, temp, max_tok, top_p_val, repeat_pen):
if not history:
return history
last_message = history[-1]["content"] if history else ""
# Add assistant message placeholder
history.append({"role": "assistant", "content": ""})
# Generate response
response = ""
for chunk in app.chat_response(
last_message,
[{"role": h["role"], "content": h["content"]} for h in history[:-2]],
temp,
max_tok,
top_p_val,
repeat_pen
):
history[-1]["content"] = chunk
yield history
msg.submit(
user_message,
[msg, chatbot],
[msg, chatbot]
).then(
bot_response,
[chatbot, temperature, max_tokens, top_p, repeat_penalty],
chatbot
)
send_btn.click(
user_message,
[msg, chatbot],
[msg, chatbot]
).then(
bot_response,
[chatbot, temperature, max_tokens, top_p, repeat_penalty],
chatbot
)
clear_btn.click(app.clear_chat, outputs=chatbot)
# Model loading
def handle_model_load(file_obj, ctx_size, gpu_layers):
if file_obj is None:
return "❌ Please select a model file"
status = app.load_model(file_obj.name, ctx_size, gpu_layers)
# Update model status
if app.current_model:
model_status_text = f"βœ… Loaded: {app.current_model}"
else:
model_status_text = "No model loaded"
return status, model_status_text
load_btn.click(
handle_model_load,
[model_file, context_size, gpu_layers],
[load_status, model_status]
)
unload_btn.click(
app.unload_model,
outputs=[load_status, model_status]
)
# Refresh models
refresh_btn.click(
lambda: get_available_models(),
outputs=available_models
)
# Update model info when model is loaded
def update_model_info():
if app.current_model:
try:
info = app.model_manager.get_model_info()
return info
except:
return None
return None
demo.load(
update_model_info,
outputs=[model_info]
)
# Update directory info
def update_directory_info(directory):
try:
path = Path(directory)
if path.exists():
return {
"exists": True,
"is_directory": path.is_dir(),
"file_count": len(list(path.glob("*.gguf"))),
"size_mb": sum(f.stat().st_size for f in path.glob("*.gguf")) / (1024 * 1024)
}
else:
return {"exists": False}
except Exception as e:
return {"error": str(e)}
update_dir_btn.click(
update_directory_info,
models_dir,
dir_info
)
return demo
if __name__ == "__main__":
# Create models directory if it doesn't exist
os.makedirs("./models", exist_ok=True)
# Create and launch the interface
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_api=True
)