File size: 2,988 Bytes
fddef6f
902ba38
3b10495
902ba38
 
 
 
6349239
902ba38
a1c50c9
 
902ba38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd5aaae
 
902ba38
 
 
 
 
 
 
dd5aaae
902ba38
 
 
 
 
dd5aaae
902ba38
 
 
 
 
 
 
 
 
 
 
6349239
 
902ba38
 
cb12e63
 
dd5aaae
 
 
 
902ba38
 
 
dd5aaae
 
 
98343f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import openai
from openai import OpenAI
import os
from bs4 import BeautifulSoup
import requests
import json
import ast
# Initialize the OpenAI client with the API key
token = os.getenv('API_KEY')
client = openai.OpenAI(api_key=token)

def extract_text_from_webpage(html):
    soup = BeautifulSoup(html, "html.parser")
    for script in soup(["script", "style"]):
        script.decompose()
    visible_text = soup.get_text(separator=" ", strip=True)
    return visible_text

def search(query):
    term = query
    max_chars_per_page = 8000
    all_results = []

    with requests.Session() as session:
        try:
            resp = session.get(
                url="https://www.google.com/search",
                headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
                params={"q": term, "num": 7},
                timeout=5
            )
            resp.raise_for_status()

            soup = BeautifulSoup(resp.text, "html.parser")
            result_block = soup.find_all("div", attrs={"class": "g"})

            for result in result_block:
                link = result.find("a", href=True)
                if link:
                    link = link["href"]
                    try:
                        webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=5)
                        webpage.raise_for_status()

                        visible_text = extract_text_from_webpage(webpage.text)
                        if len(visible_text) > 8000:
                            visible_text = visible_text[:8000]

                        all_results.append({"link": link, "text": visible_text})

                    except requests.exceptions.RequestException as e:
                        print(f"Failed to retrieve {link}: {e}")
                        all_results.append({"link": link, "text": None})
        except requests.exceptions.RequestException as e:
            print(f"Google failed: {e}")

    return all_results

def chat(message, history):
    messages = [
        {'role':'system', 'content': 'RESPOND IN THE LIST OF SEARCHES DICTIONARY FORMAT. LIMIT SEARCHES TO 1'}
    ]

    messages.append({'role': 'user', 'content': message})

    response = client.chat.completions.create(
        model='ft:gpt-4o-mini-2024-07-18:personal:searchesbot:ASr1QLhe',
        messages=messages
    )

    reply = response.choices[0].message.content

    json_resply = dictionary = ast.literal_eval(reply)
    searches = json_resply['searches']
    data = []
    for value in searches:
        var = search(value)
        data.append(var)

    messages.append({'role': 'assistant', 'content': f"Search results: {data}"})

    response_for_chat = client.chat.completions.create(
        model='ft:gpt-4o-mini-2024-07-18:personal:imfin-01:ATDXwhzz',
        messages=messages
    )

    return response_for_chat.choices[0].message.content

gr.ChatInterface(fn=chat).launch()