flight-chat / app.py
Rams901's picture
Update app.py
cdd50c9
import gradio as gr
import pandas as pd
import pandas as pd
import time
import spacy
from fuzzywuzzy import fuzz
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest
import nltk
import numpy as np
from tqdm import tqdm
import requests
import gzip
import os
import torch
import re
from bs4 import BeautifulSoup
import os
import openai
openai.api_key = os.environ['OPENAI_KEY']
# def remove_html_tags(text):
# clean = re.compile('<.*?>')
# return re.sub(clean, '', text)
# df['content'] = df.content.apply(lambda x: remove_html_tags(x))
# df['summary_html'] = df.summary_html.apply(lambda x: remove_html_tags(x))
FARE_GPT = """FareGPT is designed to be able to assist with a wide range of flights recommendation and suggestions, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. FareGPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
FareGPT is able to process and understand large amounts of flight booking and availability seats. FareGPT will be provided with different flights, airlines, alliances and mileage points for different airflights, and will make sure to suggest the right flights to users with their precise urls.
FareGPT will start by recognizing the different places available and comprehend the user needs to give suggestions and optimal choices in terms of cost, mileage points, comfort and other factors.
FareGPT will always show the exact url to the flight without any edits when showing a flight.
these are the existing flights:
"""
# availability = []
class_mp = {'YMileageCost': 'EconomyMileageCost', 'WMileageCost': 'BusinessMileageCost', 'YMileageCost': 'PremiumMileageCost', 'YRemainingSeats':'EconomyRemainingSeats', 'WRemainingSeats':'BusinessRemainingSeats', 'JRemainingSeats': 'PremiumRemainingSeats'}
adjust_tags = {}
taxonomy = { 'fare_amount': 'mileagecost', 'fromloc': 'Origin', 'toloc': 'Destination', 'city_name': 'City', 'country_name': 'Country'}
df = pd.read_json('availability.json')
routes = pd.DataFrame(list(df['Route'].values))[["OriginAirport", 'DestinationAirport', 'Source']]
df = df[['Date', 'YMileageCost',
'WMileageCost', 'JMileageCost', 'YRemainingSeats',
'WRemainingSeats', 'JRemainingSeats', 'FRemainingSeats',]]
booking = pd.concat((df, routes), axis = 1)
airport_codes = pd.read_csv("airport_code.csv")
airport_codes = airport_codes[[column for column in airport_codes.columns[:-2]]]
booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Origin{column}" for column in airport_codes.columns}), left_on = "OriginAirport",right_on = "OriginCode")
booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Destination{column}" for column in airport_codes.columns}), left_on = "DestinationAirport",right_on = "DestinationCode")
booking['url'] = booking.apply(lambda x: f"https://seats.aero/search?origin={x['OriginAirport']}&destination={x['DestinationAirport']}&date={x['Date']}".split(" ")[0], axis = 1).values
booking['real_description'] = [{booking.columns[i]: x[i] for i in range(len(x))} for x in booking.values]
with gr.Blocks(css = """#white-button {
background-color: #FFFFFF;
color: #000000;
}
#orange-button-1 {
background-color: #FFDAB9;
color: #000000;
}
#orange-button-2 {
background-color: #FFA07A;
color: #FFFFFF;
}
#orange-button-3 {
background-color: #FF4500;
color: #FFFFFF;
}""", theme=gr.themes.Soft()) as demo:
chatbot = gr.Chatbot().style(height=750)
with gr.Row():
with gr.Column(scale = 0.75, min_width=0):
msg = gr.Textbox(placeholder = "Enter text and press enter",show_label=False).style(container = False)
with gr.Column(scale = 0.25, min_width=0):
clear = gr.Button("Clear")
# index = gr.Textbox(value = "0", visible = False)
def user(user_message, history):
return "", history + [[user_message, None]]
def convert_columns(x):
full_map = {}
for key, value in x.items():
try:
dir, tag = key.split('.')[0], key.split('.')[1]
full_word = taxonomy[dir] + taxonomy[tag]
except:
full_word = key
full_map[full_word] = value
return full_map
def compare_flights_dynamic(flight1, flight2):
# Need to integrate class type and Date (year; this needs fine-tuning)
sims = []
keep = True
for key, value in flight2.items():
if(key in flight1.keys()):
local_sim = fuzz.token_set_ratio(flight1[key], flight2[key])
sims.append(local_sim)
# ADD CLASS AND MILEAGE COST
# elif (key == "mileagecost") and not((flight1["YMileageCost"] < value) + (flight1["WMileageCost"] < value) + (flight1["JMileageCost"] < value)):
# # filter out ones that don't match the right scoring happening.
# # At some point the user might not know the mileage range for the desired flight so it's best to give these as well as secondary ones.
# keep = False
if(keep and len(sims)):
# Average and give score?
return sum(sims) / (len(sims)*100)
return 0.0
def search(query):
response = requests.post("https://rams901-openslu.hf.space/run/predict", json={
"data": [
query,
] } )
test_text = BeautifulSoup(response.content).get_text()
test_list = [x.strip() for x in test_text.split("\\n") if ('B-' in x) ]
tags = {x.split('-')[1]:x.split('-')[0][:-1] for x in test_list}
global adjust_tags
# UPDATE/ADD newer filters
adjust_tags.update(convert_columns(tags))
booking['latest_local'] = booking['real_description'].apply(compare_flights_dynamic, flight2 = (adjust_tags)).values
results = (booking.sort_values("latest_local", ascending=False).head(10))
results = results.rename(columns = class_mp)
# })
# n = 10
# query_embedding = model.encode(query)
# df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))
# results = (df.sort_values("similarity", ascending=False).head(n))
# r_groupby = pd.DataFrame(results)
# #results = results[['title','url','keywords','summary_html']].drop_duplicates()
# results = r_groupby.reset_index()
# results = results.sort_values("similarity", ascending=False)
resultlist = results['real_description'].to_list()
# for r in results.index:
# # chunk = results
# # print(chunk)
# local = json.loads(results[r].to_json())
# local['score'] = str(results.similarity[r][0])
# resultlist.append(
# local
# )
# for i in range(len(results)):
# resultlist.append(results['description'].iloc[i])
return resultlist
def new_ask(chatlog):
# FARE_GPT += str(availability) + "\nMake sure to only use from the flights provided only. "
availability = search(chatlog[-1][0])
global adjust_tags
print(availability, adjust_tags)
messages = [{"role": "system", "content": FARE_GPT + str(availability) + "\nMake sure to only use from the flights provided only. "}]
if (len(chatlog)> 1):
messages += [ item for x in [[{'role': 'assistant', 'content': msg[1]}, {'role': 'user', 'content': msg[0]}] for msg in chatlog[-3:-1] ] for item in x]
messages += [{'role': 'user', 'content': chatlog[-1][0]}]
response = openai.ChatCompletion.create(
model ="gpt-3.5-turbo",
messages = messages,
temperature = 0
)
# The problem is located in the gpt response when provided with the availability flights. Also, the follow-up discussions.
# Will need to save availability in history.
# Storing the availability and once the answer does not activate the json extraction, we will have to directly give the user input with the previous availability list.
chatlog[-1][1] = response['choices'][0]['message']['content']
return chatlog
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
new_ask, chatbot, chatbot
)
clear.click(lambda: None, None, outputs = chatbot, queue=False)
demo.launch()