File size: 8,747 Bytes
e2d2a4f
 
 
 
 
 
69c1d73
e2d2a4f
 
 
 
 
 
 
 
 
69c1d73
e2d2a4f
 
 
 
69c1d73
e2d2a4f
 
4657019
d7b09c6
e2d2a4f
 
 
 
 
 
 
0be648c
 
cdd50c9
0be648c
 
cdd50c9
 
0be648c
 
c7625f5
0be648c
c7625f5
 
e32e0f5
4f7d067
 
 
 
 
 
c7625f5
4f7d067
c7625f5
4f7d067
c7625f5
4f7d067
 
cdd50c9
4f7d067
0be648c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7625f5
0be648c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69c1d73
0be648c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69c1d73
0be648c
69c1d73
0be648c
69c1d73
0be648c
 
 
 
69c1d73
0be648c
 
 
 
69c1d73
0be648c
4263972
0be648c
 
 
 
 
c7625f5
0be648c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2d2a4f
69c1d73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import pandas as pd
import pandas as pd
import time
import spacy

from fuzzywuzzy import fuzz
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

import nltk
import numpy as np
from tqdm import tqdm

import requests
import gzip
import os
import torch
import re
from bs4 import BeautifulSoup
import os

import openai
openai.api_key = os.environ['OPENAI_KEY']
# def remove_html_tags(text):
#     clean = re.compile('<.*?>')
#     return re.sub(clean, '', text)

# df['content'] = df.content.apply(lambda x: remove_html_tags(x))
# df['summary_html'] = df.summary_html.apply(lambda x: remove_html_tags(x))

FARE_GPT = """FareGPT is designed to be able to assist with a wide range of flights recommendation and suggestions, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. FareGPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

FareGPT is able to process and understand large amounts of flight booking and availability seats. FareGPT will be provided with different flights, airlines, alliances and mileage points for different airflights, and will make sure to suggest the right flights to users with their precise urls.

FareGPT will start by recognizing the different places available and comprehend the user needs to give suggestions and optimal choices in terms of cost, mileage points, comfort and other factors.
FareGPT will always show the exact url to the flight without any edits when showing a flight. 
these are the existing flights:
 """
# availability = []
class_mp = {'YMileageCost': 'EconomyMileageCost', 'WMileageCost': 'BusinessMileageCost', 'YMileageCost': 'PremiumMileageCost', 'YRemainingSeats':'EconomyRemainingSeats', 'WRemainingSeats':'BusinessRemainingSeats', 'JRemainingSeats': 'PremiumRemainingSeats'}
adjust_tags = {}
taxonomy = { 'fare_amount': 'mileagecost', 'fromloc': 'Origin', 'toloc': 'Destination', 'city_name': 'City', 'country_name': 'Country'}
    
df = pd.read_json('availability.json')
routes = pd.DataFrame(list(df['Route'].values))[["OriginAirport", 'DestinationAirport', 'Source']]
df = df[['Date', 'YMileageCost',
      'WMileageCost', 'JMileageCost', 'YRemainingSeats',
      'WRemainingSeats', 'JRemainingSeats', 'FRemainingSeats',]]
      
booking = pd.concat((df, routes), axis = 1)

airport_codes = pd.read_csv("airport_code.csv")

airport_codes = airport_codes[[column for column in airport_codes.columns[:-2]]]

booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Origin{column}" for column in airport_codes.columns}), left_on = "OriginAirport",right_on = "OriginCode")
booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Destination{column}" for column in airport_codes.columns}), left_on = "DestinationAirport",right_on = "DestinationCode")
booking['url'] = booking.apply(lambda x: f"https://seats.aero/search?origin={x['OriginAirport']}&destination={x['DestinationAirport']}&date={x['Date']}".split(" ")[0], axis = 1).values
booking['real_description'] = [{booking.columns[i]: x[i] for i in range(len(x))} for x in booking.values]
with gr.Blocks(css = """#white-button {
  background-color: #FFFFFF;
  color: #000000;
}

#orange-button-1 {
  background-color: #FFDAB9;
  color: #000000;


}

#orange-button-2 {
  background-color: #FFA07A;
  color: #FFFFFF;

}

#orange-button-3 {
  background-color: #FF4500;
  color: #FFFFFF;
}""", theme=gr.themes.Soft()) as demo:
    chatbot = gr.Chatbot().style(height=750)

    with gr.Row():
      with gr.Column(scale = 0.75, min_width=0):
        msg = gr.Textbox(placeholder = "Enter text and press enter",show_label=False).style(container = False)
      with gr.Column(scale = 0.25, min_width=0):
            clear = gr.Button("Clear")
            
    
    # index = gr.Textbox(value = "0", visible = False)
    def user(user_message, history):
            return "", history + [[user_message, None]]

    def convert_columns(x):
      full_map = {}
      for key, value in x.items():
          
          try:
            dir, tag = key.split('.')[0], key.split('.')[1]
            full_word = taxonomy[dir] + taxonomy[tag]

          except:
            full_word = key
          full_map[full_word] = value
      return full_map

    def compare_flights_dynamic(flight1, flight2):
      
      # Need to integrate class type and Date (year; this needs fine-tuning)
        sims = []
        keep = True

        for key, value in flight2.items():
          
          if(key in flight1.keys()):

            local_sim = fuzz.token_set_ratio(flight1[key], flight2[key])
            sims.append(local_sim)
          
          # ADD CLASS AND MILEAGE COST
          # elif (key  == "mileagecost") and not((flight1["YMileageCost"] < value) + (flight1["WMileageCost"] < value) + (flight1["JMileageCost"] < value)):
          #   # filter out ones that don't match the right scoring happening.
          #   # At some point the user might not know the mileage range for the desired flight so it's best to give these as well as secondary ones.
          #   keep = False
          
        
        if(keep and len(sims)):
          # Average and give score?
          return sum(sims) / (len(sims)*100)

        return 0.0

    def search(query):

        response = requests.post("https://rams901-openslu.hf.space/run/predict", json={
        "data": [
          query,
        ] } )

        test_text = BeautifulSoup(response.content).get_text()
        test_list = [x.strip() for x in test_text.split("\\n") if ('B-' in x) ]
        tags = {x.split('-')[1]:x.split('-')[0][:-1] for x in test_list}
        global adjust_tags

        # UPDATE/ADD newer filters
        adjust_tags.update(convert_columns(tags))
        

        booking['latest_local'] = booking['real_description'].apply(compare_flights_dynamic, flight2 = (adjust_tags)).values

        results = (booking.sort_values("latest_local", ascending=False).head(10))
        results = results.rename(columns = class_mp)
      # })
      #   n = 10
      #   query_embedding = model.encode(query)
      #   df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))

        # results = (df.sort_values("similarity", ascending=False).head(n))
        # r_groupby = pd.DataFrame(results)
        # #results = results[['title','url','keywords','summary_html']].drop_duplicates()  
        # results = r_groupby.reset_index()
        # results = results.sort_values("similarity", ascending=False)
        resultlist = results['real_description'].to_list()
        # for r in results.index:
        #     # chunk = results
        #     # print(chunk)
        #     local = json.loads(results[r].to_json())
            
        #     local['score'] = str(results.similarity[r][0])
        #     resultlist.append(
        #         local
        #     )
        # for i in range(len(results)):
        #   resultlist.append(results['description'].iloc[i])
        return resultlist

    def new_ask(chatlog):

          
          # FARE_GPT += str(availability) + "\nMake sure to only use from the flights provided only. "
          availability = search(chatlog[-1][0])
          global adjust_tags
          print(availability, adjust_tags)
          messages = [{"role": "system", "content": FARE_GPT + str(availability) + "\nMake sure to only use from the flights provided only. "}]
          if (len(chatlog)> 1):
            messages += [ item for x in [[{'role': 'assistant', 'content': msg[1]}, {'role': 'user', 'content': msg[0]}] for msg in chatlog[-3:-1] ] for item in x]

          messages += [{'role': 'user', 'content': chatlog[-1][0]}]
          response = openai.ChatCompletion.create(
              
              model ="gpt-3.5-turbo",
              messages = messages,
              temperature = 0
          )
          # The problem is located in the gpt response when provided with the availability flights. Also, the follow-up discussions.
          # Will need to save availability in history.
          # Storing the availability and once the answer does not activate the json extraction, we will have to directly give the user input with the previous availability list.
          chatlog[-1][1] = response['choices'][0]['message']['content']
          return chatlog

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        new_ask, chatbot, chatbot
    )

    clear.click(lambda: None, None, outputs = chatbot, queue=False)

demo.launch()