import gradio as gr from huggingface_hub import InferenceClient import json import os import shutil import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import Pipeline import joblib import logging # --------------------------- # Logging Configuration # --------------------------- logging.basicConfig( filename='app.log', filemode='a', format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO ) logger = logging.getLogger(__name__) # --------------------------- # Initialize the HuggingFace API Client # --------------------------- # Replace 'gpt-3.5-turbo' with your desired model. Ensure you have the correct access. try: client = InferenceClient("gpt-3.5-turbo") logger.info("HuggingFace InferenceClient initialized successfully.") except Exception as e: logger.error(f"Failed to initialize HuggingFace InferenceClient: {e}") raise # --------------------------- # Persistent Memory and Knowledge Base Setup # --------------------------- memory_file = "chat_memory.json" knowledge_base_dir = "knowledge_base" model_file = "chat_model.pkl" # Ensure directories exist os.makedirs(knowledge_base_dir, exist_ok=True) # --------------------------- # Memory Management Functions # --------------------------- def load_memory(): """Load conversation memory from a JSON file.""" try: if os.path.exists(memory_file): with open(memory_file, "r") as f: memory = json.load(f) logger.info("Conversation memory loaded successfully.") return memory logger.info("No existing conversation memory found. Starting fresh.") return [] except Exception as e: logger.error(f"Error loading memory: {e}") return [] def save_memory(memory): """Save conversation memory to a JSON file.""" try: with open(memory_file, "w") as f: json.dump(memory, f, indent=2) logger.info("Conversation memory saved successfully.") except Exception as e: logger.error(f"Error saving memory: {e}") def update_memory(message, response): """Append user message and assistant response to memory.""" try: memory = load_memory() memory.append({"role": "user", "content": message}) memory.append({"role": "assistant", "content": response}) # Optionally limit memory size if len(memory) > 1000: memory = memory[-1000:] save_memory(memory) except Exception as e: logger.error(f"Error updating memory: {e}") # --------------------------- # ML Model Management Functions # --------------------------- def load_or_initialize_model(): """Load the ML model from a file or initialize a new one.""" try: if os.path.exists(model_file): model = joblib.load(model_file) logger.info("ML model loaded successfully.") return model model = Pipeline([ ("vectorizer", CountVectorizer()), ("classifier", RandomForestClassifier(n_estimators=100, random_state=42)) ]) logger.info("Initialized new ML model pipeline.") return model except Exception as e: logger.error(f"Error loading or initializing model: {e}") raise def train_model_on_files(): """Train the ML model based on CSV files in the knowledge base.""" try: model = load_or_initialize_model() texts, labels = [], [] # Load data from the knowledge base for file_name in os.listdir(knowledge_base_dir): file_path = os.path.join(knowledge_base_dir, file_name) if file_path.endswith(".csv"): try: df = pd.read_csv(file_path) if "text" in df.columns and "label" in df.columns: texts.extend(df["text"].astype(str).tolist()) labels.extend(df["label"].astype(str).tolist()) logger.info(f"Loaded data from '{file_name}'.") else: logger.warning(f"File '{file_name}' is missing 'text' or 'label' columns.") return f"File '{file_name}' does not contain required 'text' and 'label' columns." except Exception as e: logger.error(f"Error reading '{file_name}': {e}") return f"Error reading '{file_name}': {str(e)}" if texts and labels: try: model.fit(texts, labels) joblib.dump(model, model_file) logger.info("ML model trained and saved successfully.") return f"Model trained on {len(texts)} samples from {len(os.listdir(knowledge_base_dir))} files." except Exception as e: logger.error(f"Error during model training: {e}") return f"Error during model training: {str(e)}" logger.warning("No valid training data found in the knowledge base.") return "No valid training data found in the knowledge base." except Exception as e: logger.error(f"Unexpected error in training model: {e}") return f"Unexpected error: {str(e)}" # --------------------------- # Chat Response Function # --------------------------- def respond(message, history, system_message, max_tokens, temperature, top_p): """ Generate a response to the user's message using the ML model or GPT model. Parameters: - message (str): User's input message. - history (list): Conversation history. - system_message (str): System prompt. - max_tokens (int): Maximum number of tokens for GPT response. - temperature (float): Sampling temperature for GPT. - top_p (float): Nucleus sampling parameter for GPT. Returns: - response (str): Generated response. """ try: # Attempt to get a prediction from the ML model model = load_or_initialize_model() pred_label = model.predict([message])[0] response = f"Predicted response: {pred_label}" update_memory(message, response) logger.info("Response generated using ML model.") return response except Exception as e: logger.info("ML model could not generate a response. Falling back to GPT model.") # Generate response using GPT try: messages = [{"role": "system", "content": system_message}] for turn in history: messages.append({"role": turn["role"], "content": turn["content"]}) messages.append({"role": "user", "content": message}) response = "" for message_part in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message_part.get("choices", [{}])[0].get("delta", {}).get("content", "") response += token update_memory(message, response) logger.info("Response generated using GPT model.") return response except Exception as e: logger.error(f"Error generating response with GPT: {e}") response = f"Error generating response: {str(e)}" update_memory(message, response) return response # --------------------------- # Gradio Interface # --------------------------- def create_gradio_interface(): """Create and configure the Gradio interface.""" with gr.Blocks() as demo: gr.Markdown("# 🧠 Advanced AI Chatbot with Knowledge Base and Model Training") # Chat Tab with gr.Tab("💬 Chat"): chatbot = gr.Chatbot(label="AI Chatbot", type="messages") with gr.Row(): with gr.Column(scale=5): user_input = gr.Textbox( label="Your Message", placeholder="Type your message here...", lines=1 ) with gr.Column(scale=1, min_width=100): send_button = gr.Button("Send", variant="primary") with gr.Row(): system_message = gr.Textbox( value="You are an advanced AI Chatbot.", label="System Message", visible=False ) max_tokens = gr.Slider( minimum=100, maximum=2048, value=512, step=100, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)", ) def handle_message(message, history, system_message, max_tokens, temperature, top_p): response = respond(message, history, system_message, max_tokens, temperature, top_p) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": response}) return history, history send_button.click( handle_message, inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p], outputs=[chatbot, chatbot], ) user_input.submit( handle_message, inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p], outputs=[chatbot, chatbot], ) # Knowledge Base Tab with gr.Tab("📚 Knowledge Base"): gr.Markdown("### Manage Knowledge Base") file_upload = gr.File( label="Upload CSV File", file_types=[".csv"], file_count="single" # Allows only single file upload ) upload_output = gr.Textbox(label="Upload Result", interactive=False) train_button = gr.Button("🔄 Train Model on Knowledge Base") train_output = gr.Textbox(label="Training Result", interactive=False) def upload_file(file): if not file: return "No file uploaded." try: # Determine file path and name if isinstance(file, dict): file_path = file.get('path', '') file_name = file.get('name', '') else: file_path = file file_name = os.path.basename(file_path) # Validate file extension if not file_name.endswith(".csv"): logger.warning(f"Invalid file type attempted: {file_name}") return "Invalid file type. Please upload a CSV file." # Save file to knowledge base directory destination_path = os.path.join(knowledge_base_dir, file_name) shutil.copy(file_path, destination_path) logger.info(f"File '{file_name}' uploaded successfully.") return f"File '{file_name}' uploaded successfully." except Exception as e: logger.error(f"Error uploading file: {e}") return f"Error uploading file: {str(e)}" file_upload.change(upload_file, inputs=file_upload, outputs=upload_output) train_button.click(train_model_on_files, inputs=None, outputs=train_output) # Memory Tab with gr.Tab("🧠 Memory"): gr.Markdown("### View and Manage Conversation Memory") memory_display = gr.JSON(label="Conversation Memory") with gr.Row(): refresh_memory = gr.Button("🔄 Refresh Memory") clear_memory = gr.Button("🗑️ Clear Memory") export_memory = gr.Button("📤 Export Memory") export_output = gr.File(label="Download Memory", visible=False) def display_memory(): return load_memory() def clear_memory_func(): try: save_memory([]) logger.info("Conversation memory cleared.") return [] except Exception as e: logger.error(f"Error clearing memory: {e}") return f"Error clearing memory: {str(e)}" def export_memory_func(): if os.path.exists(memory_file): return memory_file # Gradio will handle the download return "No memory file found." refresh_memory.click(display_memory, inputs=None, outputs=memory_display) clear_memory.click(clear_memory_func, inputs=None, outputs=memory_display) export_memory.click(export_memory_func, inputs=None, outputs=export_output) # Download Model Tab with gr.Tab("💾 Download Model"): gr.Markdown("### Download the Trained Model") download_button = gr.Button("📥 Download Model") model_download_output = gr.File(label="Downloadable Model") def download_model(): if os.path.exists(model_file): return model_file # Gradio will handle the file download return "No trained model found." download_button.click(download_model, inputs=None, outputs=model_download_output) # Settings Tab with gr.Tab("⚙️ Settings"): gr.Markdown("### Application Settings") gr.Textbox( value="", label="Settings Placeholder", placeholder="Add settings here..." # Removed 'interactive' parameter as it's unsupported ) return demo # --------------------------- # Main Execution # --------------------------- if __name__ == "__main__": try: interface = create_gradio_interface() logger.info("Launching Gradio interface.") interface.launch() except Exception as e: logger.critical(f"Application failed to start: {e}")