File size: 6,911 Bytes
748113b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import gradio as gr
from web_crawler import WebCrawler
from rag_system import RAGSystem
from chat_demo import ChatDemo
import validators
import logging
from proxy_thread import ProxyThread
import requests

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Instantiate your existing classes
rag_system = RAGSystem()
chat_demo = ChatDemo(rag_system)

# Global variables to track the URL and server state
url = ""
proxy_thread = None

def start_proxy_server():
    global proxy_thread
    # If the proxy server is already running, stop it first
    if proxy_thread and proxy_thread.is_alive():
        logger.info("Stopping the existing ProxyThread...")
        proxy_thread.stop()

    # Start a new proxy server
    proxy_thread = ProxyThread(host='localhost', port=5000)
    proxy_thread.start()
    logger.info("New ProxyThread started.")

def load_website(input_url):
    global url
    url = input_url  # Update the global url variable
    logger.info(f"Loading website: {url}")

    # Restart the ProxyServer with the new URL
    start_proxy_server()

    # Proxy server expects a specific URL format with target_url
    proxied_url = f"http://127.0.0.1:5000/?target_url={url}"
    
    iframe_html = f"""
    <iframe src="{proxied_url}" width="100%" height="600px"></iframe>
    """
    return iframe_html

def highlight_text(text):
    if not url:
        return "No website loaded."
    
    logger.info(f"Highlighting text: {text}")
    try:
        response = requests.post(f'http://127.0.0.1:5000/set_highlight', json={"highlight": text})
        if response.status_code == 200:
            return "Highlight applied."
        else:
            return "Failed to apply highlight."
    except Exception as e:
        logger.error(f"Error highlighting text: {e}")
        return f"Error: {e}"

def clear_highlights():
    if not url:
        return "No website loaded."
    
    logger.info("Clearing highlights and reloading the website.")
    start_proxy_server()  # Restart the proxy server to clear highlights
    return load_website(url)  # Reload the current website without highlights

# Function to handle the initial URL submission
def process_url(input_url):
    logger.info(f"Processing URL: {input_url}")
    if not validators.url(input_url):
        logger.error(f"Invalid URL submitted: {input_url}")
        return "Invalid URL. Please enter a valid URL.", None
    
    try:
        # Display loading message
        status_message = "Crawling website and processing data..."
        logger.info(status_message)
        
        # Instantiate WebCrawler with the provided URL
        web_crawler = WebCrawler()
        
        # Crawl the website
        logger.info(f"Starting web crawl for {input_url}")
        extracted_content = web_crawler.crawl(input_url, 0)
        logger.info(f"Web crawl completed for {input_url}")
        
        # Process the data with the RAG system
        logger.info("Processing extracted content with RAG system")
        rag_system.process_content(extracted_content)

        # Load the website through the proxy
        iframe_html = load_website(input_url)
        
        logger.info("URL processing completed successfully")
        return "Website content successfully crawled and processed!", [], iframe_html
    except Exception as e:
        logger.error(f"Error processing URL {input_url}: {str(e)}", exc_info=True)
        return f"Error: {str(e)}", []

# Function to handle chatbot interactions
def chatbot_response(user_input, chat_history):
    logger.info(f"Received user input: {user_input}")
    try:
        # Use the ChatDemo class to generate a response
        logger.info("Generating chatbot response")
        response = chat_demo.chatbot(user_input)
        chat_history.append(["User", user_input])
        chat_history.append(["Chatbot", response])
        
        logger.info("Chatbot response generated successfully")
        
        # Get the context strings used for the response
        context_strings = chat_demo.get_last_context_strings()
        logger.info(f"Retrieved {len(context_strings)} context strings")
        
        # Highlight each context string individually
        for i, context in enumerate(context_strings, 1):
            highlight_result = highlight_text(context)
            logger.info(f"Highlight result for context {i}: {highlight_result}")
        
        # Prepare status message
        highlight_status = f"Highlighted {len(context_strings)} context passages"
        logger.info(highlight_status)
        
        # Update the chat history and return
        return chat_history, chat_history, highlight_status
    except Exception as e:
        logger.error(f"Error in chatbot_response: {str(e)}", exc_info=True)
        return [[f"Error: {str(e)}"], chat_history], chat_history, f"Error: {str(e)}"

# Function to reset the application
def reset_app():
    global url
    url = ""  # Clear the global URL
    logger.info("Resetting application and proxy server")
    start_proxy_server()  # Restart the proxy server for a fresh session
    return "", [], "", ""

# Build the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Website Concierge")
    
    with gr.Row():
        with gr.Column(scale=1):
            url_input = gr.Textbox(placeholder="Enter a website URL", label="Website URL", interactive=True)
            submit_button = gr.Button("Submit URL")
            status_message = gr.Textbox(label="Status", interactive=False)
            
            chat_history = gr.State(value=[])
            chatbot_output = gr.Chatbot(label="Chat History")
            user_input = gr.Textbox(placeholder="Ask the chatbot...", label="User Input", interactive=True)
            
            highlight_status = gr.Textbox(label="Highlight Status", interactive=False)
            
            clear_button = gr.Button("Clear Highlights")
            reset_button = gr.Button("Change Website")
        
        with gr.Column(scale=1):
            proxied_view = gr.HTML(label="Website View")

    # Initial URL submission
    submit_button.click(process_url, inputs=url_input, outputs=[status_message, chat_history, proxied_view], queue=True)

    # Handle chatbot responses
    user_input.submit(chatbot_response, inputs=[user_input, chat_history], outputs=[chatbot_output, chat_history, highlight_status])

    # Handle clearing highlights
    clear_button.click(clear_highlights, outputs=[proxied_view])

    # Handle reset button click
    reset_button.click(reset_app, outputs=[url_input, chat_history, status_message, proxied_view])

# Launch the app
if __name__ == "__main__":
    logger.info("Starting Gradio application")
    start_proxy_server()  # Start with an initial ProxyServer
    demo.launch()
    logger.info("Gradio application stopped")