File size: 3,449 Bytes
9933e0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import openai
import gradio as gr
import requests
from bs4 import BeautifulSoup
import urllib.parse

try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    pass  # In production, python-dotenv may not be installed

openai.api_key = os.getenv("OPEN_API_KEY")

class Conversation:
    def __init__(self):
        self.messages = []
    
    # def is_valid_url(self, url):
    #     try:
    #         result = urlparse(url)
    #         return True if all([result.scheme, result.netloc]) else False
    #     except ValueError:
    #         return False

    def to_valid_url(self, input_string):
        url = input_string.strip()
        parsed_url = urllib.parse.urlparse(url)

        if not parsed_url.scheme:
            url = "http://" + url
            parsed_url = urllib.parse.urlparse(url)

        if not parsed_url.netloc:
            raise ValueError("Invalid input, cannot convert to valid URL")

        return parsed_url.geturl()

    def get_data(self, old_url):
        # ... your existing get_data implementation ...
        # Replace `messages` with `self.messages`

        url = self.to_valid_url(old_url)
        self.messages
        html = requests.get(url).text
        doc = BeautifulSoup(html, 'html.parser')


        headings_1 = [e.text for e in doc.find_all('h1')]
        headings_2 = [e.text for e in doc.find_all('h2')]
        # headings_3 = [e.text for e in doc.find_all('h3')]
        paragraphs = [e.text for e in doc.find_all('p')]
        # spans = [e.text for e in doc.find_all('span')]
        joined_paragraphs = (' '.join(paragraphs))

        if len(joined_paragraphs) > 7500:
          paragraphs = joined_paragraphs[:5000]

        self.messages = []
        self.messages.append({'role': 'system', 'content': "You are a helpful assistant that must answer questions about a website."})
        self.messages.append({'role': 'system', 'content': f"here are the h1s - {headings_1}"})
        self.messages.append({'role': 'system', 'content': f"here are the h2s - {headings_2}"})
        # messages.append({'role': 'system', 'content': f"here are the h3s - {headings_3}"})
        self.messages.append({'role': 'system', 'content': f"here are the paragraphs - {paragraphs}"})
        # messages.append({'role': 'system', 'content': f"here are the spans - {spans}"})
        return self.messages

    def ask_chatbot(self, input):
        # ... your existing ask_chatbot implementation ...
        # Replace `messages` with `self.messages`
        if input:
            self.messages.append({"role": "user", "content": input})
            chat = openai.ChatCompletion.create(
                model="gpt-3.5-turbo", messages=self.messages
            )
            reply = chat.choices[0].message.content
            self.messages.append({"role": "assistant", "content": reply})
            return reply


    def user(self, user_message, history):
        # ... your existing user implementation ...
        # Replace `messages` with `self.messages`
        return "", history + [[user_message, None]]

    def bot(self, history):
        # ... your existing bot implementation ...
        # Replace `messages` with `self.messages`
        user_message = history[-1][0]
        try:
          bot_message = self.ask_chatbot(user_message)
        except NameError:
          bot_message = "Please try again"
        history[-1][1] = bot_message
        return history