import gradio as gr from web_crawler import WebCrawler from rag_system import RAGSystem from chat_demo import ChatDemo import validators import logging from proxy_thread import ProxyThread import requests # Configure logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Instantiate your existing classes rag_system = RAGSystem() chat_demo = ChatDemo(rag_system) # Global variables to track the URL and server state url = "" proxy_thread = None def start_proxy_server(): global proxy_thread # If the proxy server is already running, stop it first if proxy_thread and proxy_thread.is_alive(): logger.info("Stopping the existing ProxyThread...") proxy_thread.stop() # Start a new proxy server proxy_thread = ProxyThread(host='localhost', port=5000) proxy_thread.start() logger.info("New ProxyThread started.") def load_website(input_url): global url url = input_url # Update the global url variable logger.info(f"Loading website: {url}") # Restart the ProxyServer with the new URL start_proxy_server() # Proxy server expects a specific URL format with target_url proxied_url = f"http://127.0.0.1:5000/?target_url={url}" iframe_html = f""" """ return iframe_html def highlight_text(text): if not url: return "No website loaded." logger.info(f"Highlighting text: {text}") try: response = requests.post(f'http://127.0.0.1:5000/set_highlight', json={"highlight": text}) if response.status_code == 200: return "Highlight applied." else: return "Failed to apply highlight." except Exception as e: logger.error(f"Error highlighting text: {e}") return f"Error: {e}" def clear_highlights(): if not url: return "No website loaded." logger.info("Clearing highlights and reloading the website.") start_proxy_server() # Restart the proxy server to clear highlights return load_website(url) # Reload the current website without highlights # Function to handle the initial URL submission def process_url(input_url): logger.info(f"Processing URL: {input_url}") if not validators.url(input_url): logger.error(f"Invalid URL submitted: {input_url}") return "Invalid URL. Please enter a valid URL.", None try: # Display loading message status_message = "Crawling website and processing data..." logger.info(status_message) # Instantiate WebCrawler with the provided URL web_crawler = WebCrawler() # Crawl the website logger.info(f"Starting web crawl for {input_url}") extracted_content = web_crawler.crawl(input_url, 0) logger.info(f"Web crawl completed for {input_url}") # Process the data with the RAG system logger.info("Processing extracted content with RAG system") rag_system.process_content(extracted_content) # Load the website through the proxy iframe_html = load_website(input_url) logger.info("URL processing completed successfully") return "Website content successfully crawled and processed!", [], iframe_html except Exception as e: logger.error(f"Error processing URL {input_url}: {str(e)}", exc_info=True) return f"Error: {str(e)}", [] # Function to handle chatbot interactions def chatbot_response(user_input, chat_history): logger.info(f"Received user input: {user_input}") try: # Use the ChatDemo class to generate a response logger.info("Generating chatbot response") response = chat_demo.chatbot(user_input) chat_history.append(["User", user_input]) chat_history.append(["Chatbot", response]) logger.info("Chatbot response generated successfully") # Get the context strings used for the response context_strings = chat_demo.get_last_context_strings() logger.info(f"Retrieved {len(context_strings)} context strings") # Highlight each context string individually for i, context in enumerate(context_strings, 1): highlight_result = highlight_text(context) logger.info(f"Highlight result for context {i}: {highlight_result}") # Prepare status message highlight_status = f"Highlighted {len(context_strings)} context passages" logger.info(highlight_status) # Update the chat history and return return chat_history, chat_history, highlight_status except Exception as e: logger.error(f"Error in chatbot_response: {str(e)}", exc_info=True) return [[f"Error: {str(e)}"], chat_history], chat_history, f"Error: {str(e)}" # Function to reset the application def reset_app(): global url url = "" # Clear the global URL logger.info("Resetting application and proxy server") start_proxy_server() # Restart the proxy server for a fresh session return "", [], "", "" # Build the Gradio interface with gr.Blocks() as demo: gr.Markdown("# Website Concierge") with gr.Row(): with gr.Column(scale=1): url_input = gr.Textbox(placeholder="Enter a website URL", label="Website URL", interactive=True) submit_button = gr.Button("Submit URL") status_message = gr.Textbox(label="Status", interactive=False) chat_history = gr.State(value=[]) chatbot_output = gr.Chatbot(label="Chat History") user_input = gr.Textbox(placeholder="Ask the chatbot...", label="User Input", interactive=True) highlight_status = gr.Textbox(label="Highlight Status", interactive=False) clear_button = gr.Button("Clear Highlights") reset_button = gr.Button("Change Website") with gr.Column(scale=1): proxied_view = gr.HTML(label="Website View") # Initial URL submission submit_button.click(process_url, inputs=url_input, outputs=[status_message, chat_history, proxied_view], queue=True) # Handle chatbot responses user_input.submit(chatbot_response, inputs=[user_input, chat_history], outputs=[chatbot_output, chat_history, highlight_status]) # Handle clearing highlights clear_button.click(clear_highlights, outputs=[proxied_view]) # Handle reset button click reset_button.click(reset_app, outputs=[url_input, chat_history, status_message, proxied_view]) # Launch the app if __name__ == "__main__": logger.info("Starting Gradio application") start_proxy_server() # Start with an initial ProxyServer demo.launch() logger.info("Gradio application stopped")