import gradio as gr import os from pathlib import Path from typing import List, Dict, Optional, Generator import json import time from models import ModelManager from utils import get_available_models, format_chat_history, parse_model_info class ChatbotApp: def __init__(self): self.model_manager = ModelManager() self.current_model = None self.chat_history = [] self.system_prompt = "You are a helpful assistant." def load_model(self, model_path: str, context_size: int = 2048, gpu_layers: int = 0) -> str: """Load a GGUF model""" try: if not model_path or not os.path.exists(model_path): return "❌ Please select a valid model file" success = self.model_manager.load_model( model_path=model_path, context_size=context_size, gpu_layers=gpu_layers ) if success: self.current_model = Path(model_path).name return f"✅ Successfully loaded: {self.current_model}" else: return "❌ Failed to load model" except Exception as e: return f"❌ Error loading model: {str(e)}" def unload_model(self) -> str: """Unload the current model""" self.model_manager.unload_model() self.current_model = None return "✅ Model unloaded" def chat_response( self, message: str, history: List[List[str]], temperature: float, max_tokens: int, top_p: float, repeat_penalty: float ) -> Generator[str, None, None]: """Generate response from the model""" if not self.model_manager.is_loaded(): yield "❌ No model loaded. Please load a model first." return try: # Format chat history formatted_history = format_chat_history(history, self.system_prompt) # Generate response response_text = "" for chunk in self.model_manager.generate( prompt=formatted_history + message, temperature=temperature, max_tokens=max_tokens, top_p=top_p, repeat_penalty=repeat_penalty ): response_text += chunk yield response_text except Exception as e: yield f"❌ Error generating response: {str(e)}" def clear_chat(self) -> List[List[str]]: """Clear chat history""" self.chat_history = [] return [] def get_model_info(self) -> str: """Get information about the loaded model""" if not self.current_model: return "No model loaded" try: model_info = self.model_manager.get_model_info() if model_info: return json.dumps(model_info, indent=2) return "Model info not available" except Exception as e: return f"Error getting model info: {str(e)}" def create_interface(): """Create the Gradio interface""" app = ChatbotApp() with gr.Blocks(theme=gr.themes.Soft(), title="Local GGUF Chatbot") as demo: gr.Markdown(""" # 🤖 Local GGUF Chatbot Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder) Chat with local GGUF models using llama.cpp. Load your models and start chatting! """) with gr.Tabs(): # Chat Tab with gr.Tab("💬 Chat"): with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( label="Chat", height=500, show_copy_button=True, type="messages" ) with gr.Row(): msg = gr.Textbox( label="Message", placeholder="Type your message here...", scale=4 ) send_btn = gr.Button("Send", scale=1) clear_btn = gr.Button("Clear", scale=1) with gr.Column(scale=1): gr.Markdown("### ⚙️ Generation Parameters") temperature = gr.Slider( minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature" ) max_tokens = gr.Slider( minimum=1, maximum=4096, value=512, step=1, label="Max Tokens" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P" ) repeat_penalty = gr.Slider( minimum=1.0, maximum=2.0, value=1.1, step=0.05, label="Repeat Penalty" ) system_prompt = gr.Textbox( label="System Prompt", value="You are a helpful assistant.", lines=3 ) model_status = gr.Textbox( label="Model Status", value="No model loaded", interactive=False ) # Model Management Tab with gr.Tab("📁 Models"): with gr.Row(): with gr.Column(): gr.Markdown("### Load Model") model_file = gr.File( label="Select GGUF Model", file_types=[".gguf"], file_count="single" ) with gr.Row(): context_size = gr.Slider( minimum=512, maximum=8192, value=2048, step=512, label="Context Size" ) gpu_layers = gr.Slider( minimum=0, maximum=99, value=0, step=1, label="GPU Layers" ) load_btn = gr.Button("Load Model", variant="primary") unload_btn = gr.Button("Unload Model") load_status = gr.Textbox( label="Load Status", interactive=False ) with gr.Column(): gr.Markdown("### Available Models") available_models = gr.JSON( label="Models Directory", value=get_available_models() ) model_info = gr.JSON( label="Model Information", visible=False ) refresh_btn = gr.Button("Refresh Models") # Settings Tab with gr.Tab("⚙️ Settings"): gr.Markdown("### Application Settings") with gr.Row(): with gr.Column(): models_dir = gr.Textbox( label="Models Directory", value="./models", placeholder="Path to models directory" ) save_chat = gr.Checkbox( label="Save Chat History", value=True ) chat_format = gr.Dropdown( label="Chat Format", choices=["chatml", "llama2", "alpaca", "vicuna"], value="chatml" ) with gr.Column(): gr.Markdown("### Model Directory Info") dir_info = gr.JSON(label="Directory Info") update_dir_btn = gr.Button("Update Directory") # Event handlers def update_system_prompt(prompt): app.system_prompt = prompt return prompt system_prompt.change(update_system_prompt, system_prompt) # Chat functionality def user_message(user_input, history): if not user_input.strip(): return "", history history.append({"role": "user", "content": user_input}) return "", history def bot_response(history, temp, max_tok, top_p_val, repeat_pen): if not history: return history last_message = history[-1]["content"] if history else "" # Add assistant message placeholder history.append({"role": "assistant", "content": ""}) # Generate response response = "" for chunk in app.chat_response( last_message, [{"role": h["role"], "content": h["content"]} for h in history[:-2]], temp, max_tok, top_p_val, repeat_pen ): history[-1]["content"] = chunk yield history msg.submit( user_message, [msg, chatbot], [msg, chatbot] ).then( bot_response, [chatbot, temperature, max_tokens, top_p, repeat_penalty], chatbot ) send_btn.click( user_message, [msg, chatbot], [msg, chatbot] ).then( bot_response, [chatbot, temperature, max_tokens, top_p, repeat_penalty], chatbot ) clear_btn.click(app.clear_chat, outputs=chatbot) # Model loading def handle_model_load(file_obj, ctx_size, gpu_layers): if file_obj is None: return "❌ Please select a model file" status = app.load_model(file_obj.name, ctx_size, gpu_layers) # Update model status if app.current_model: model_status_text = f"✅ Loaded: {app.current_model}" else: model_status_text = "No model loaded" return status, model_status_text load_btn.click( handle_model_load, [model_file, context_size, gpu_layers], [load_status, model_status] ) unload_btn.click( app.unload_model, outputs=[load_status, model_status] ) # Refresh models refresh_btn.click( lambda: get_available_models(), outputs=available_models ) # Update model info when model is loaded def update_model_info(): if app.current_model: try: info = app.model_manager.get_model_info() return info except: return None return None demo.load( update_model_info, outputs=[model_info] ) # Update directory info def update_directory_info(directory): try: path = Path(directory) if path.exists(): return { "exists": True, "is_directory": path.is_dir(), "file_count": len(list(path.glob("*.gguf"))), "size_mb": sum(f.stat().st_size for f in path.glob("*.gguf")) / (1024 * 1024) } else: return {"exists": False} except Exception as e: return {"error": str(e)} update_dir_btn.click( update_directory_info, models_dir, dir_info ) return demo if __name__ == "__main__": # Create models directory if it doesn't exist os.makedirs("./models", exist_ok=True) # Create and launch the interface demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_api=True )