Spaces:

Rams901
/

flight-chat

Runtime error

App Files Files Community

flight-chat / app.py

Rams901

Update app.py

cdd50c9 almost 3 years ago

raw

history blame contribute delete

8.75 kB

	import gradio as gr
	import pandas as pd
	import pandas as pd
	import time
	import spacy

	from fuzzywuzzy import fuzz
	from spacy.lang.en.stop_words import STOP_WORDS
	from string import punctuation
	from collections import Counter
	from heapq import nlargest

	import nltk
	import numpy as np
	from tqdm import tqdm

	import requests
	import gzip
	import os
	import torch
	import re
	from bs4 import BeautifulSoup
	import os

	import openai
	openai.api_key = os.environ['OPENAI_KEY']
	# def remove_html_tags(text):
	# clean = re.compile('<.*?>')
	# return re.sub(clean, '', text)

	# df['content'] = df.content.apply(lambda x: remove_html_tags(x))
	# df['summary_html'] = df.summary_html.apply(lambda x: remove_html_tags(x))

	FARE_GPT = """FareGPT is designed to be able to assist with a wide range of flights recommendation and suggestions, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. FareGPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

	FareGPT is able to process and understand large amounts of flight booking and availability seats. FareGPT will be provided with different flights, airlines, alliances and mileage points for different airflights, and will make sure to suggest the right flights to users with their precise urls.

	FareGPT will start by recognizing the different places available and comprehend the user needs to give suggestions and optimal choices in terms of cost, mileage points, comfort and other factors.
	FareGPT will always show the exact url to the flight without any edits when showing a flight.
	these are the existing flights:
	"""
	# availability = []
	class_mp = {'YMileageCost': 'EconomyMileageCost', 'WMileageCost': 'BusinessMileageCost', 'YMileageCost': 'PremiumMileageCost', 'YRemainingSeats':'EconomyRemainingSeats', 'WRemainingSeats':'BusinessRemainingSeats', 'JRemainingSeats': 'PremiumRemainingSeats'}
	adjust_tags = {}
	taxonomy = { 'fare_amount': 'mileagecost', 'fromloc': 'Origin', 'toloc': 'Destination', 'city_name': 'City', 'country_name': 'Country'}

	df = pd.read_json('availability.json')
	routes = pd.DataFrame(list(df['Route'].values))[["OriginAirport", 'DestinationAirport', 'Source']]
	df = df[['Date', 'YMileageCost',
	'WMileageCost', 'JMileageCost', 'YRemainingSeats',
	'WRemainingSeats', 'JRemainingSeats', 'FRemainingSeats',]]

	booking = pd.concat((df, routes), axis = 1)

	airport_codes = pd.read_csv("airport_code.csv")

	airport_codes = airport_codes[[column for column in airport_codes.columns[:-2]]]

	booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Origin{column}" for column in airport_codes.columns}), left_on = "OriginAirport",right_on = "OriginCode")
	booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Destination{column}" for column in airport_codes.columns}), left_on = "DestinationAirport",right_on = "DestinationCode")
	booking['url'] = booking.apply(lambda x: f"https://seats.aero/search?origin={x['OriginAirport']}&destination={x['DestinationAirport']}&date={x['Date']}".split(" ")[0], axis = 1).values
	booking['real_description'] = [{booking.columns[i]: x[i] for i in range(len(x))} for x in booking.values]
	with gr.Blocks(css = """#white-button {
	background-color: #FFFFFF;
	color: #000000;
	}

	#orange-button-1 {
	background-color: #FFDAB9;
	color: #000000;


	}

	#orange-button-2 {
	background-color: #FFA07A;
	color: #FFFFFF;

	}

	#orange-button-3 {
	background-color: #FF4500;
	color: #FFFFFF;
	}""", theme=gr.themes.Soft()) as demo:
	chatbot = gr.Chatbot().style(height=750)

	with gr.Row():
	with gr.Column(scale = 0.75, min_width=0):
	msg = gr.Textbox(placeholder = "Enter text and press enter",show_label=False).style(container = False)
	with gr.Column(scale = 0.25, min_width=0):
	clear = gr.Button("Clear")


	# index = gr.Textbox(value = "0", visible = False)
	def user(user_message, history):
	return "", history + [[user_message, None]]

	def convert_columns(x):
	full_map = {}
	for key, value in x.items():

	try:
	dir, tag = key.split('.')[0], key.split('.')[1]
	full_word = taxonomy[dir] + taxonomy[tag]

	except:
	full_word = key
	full_map[full_word] = value
	return full_map

	def compare_flights_dynamic(flight1, flight2):

	# Need to integrate class type and Date (year; this needs fine-tuning)
	sims = []
	keep = True

	for key, value in flight2.items():

	if(key in flight1.keys()):

	local_sim = fuzz.token_set_ratio(flight1[key], flight2[key])
	sims.append(local_sim)

	# ADD CLASS AND MILEAGE COST
	# elif (key == "mileagecost") and not((flight1["YMileageCost"] < value) + (flight1["WMileageCost"] < value) + (flight1["JMileageCost"] < value)):
	# # filter out ones that don't match the right scoring happening.
	# # At some point the user might not know the mileage range for the desired flight so it's best to give these as well as secondary ones.
	# keep = False


	if(keep and len(sims)):
	# Average and give score?
	return sum(sims) / (len(sims)*100)

	return 0.0

	def search(query):

	response = requests.post("https://rams901-openslu.hf.space/run/predict", json={
	"data": [
	query,
	] } )

	test_text = BeautifulSoup(response.content).get_text()
	test_list = [x.strip() for x in test_text.split("\\n") if ('B-' in x) ]
	tags = {x.split('-')[1]:x.split('-')[0][:-1] for x in test_list}
	global adjust_tags

	# UPDATE/ADD newer filters
	adjust_tags.update(convert_columns(tags))


	booking['latest_local'] = booking['real_description'].apply(compare_flights_dynamic, flight2 = (adjust_tags)).values

	results = (booking.sort_values("latest_local", ascending=False).head(10))
	results = results.rename(columns = class_mp)
	# })
	# n = 10
	# query_embedding = model.encode(query)
	# df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1)))

	# results = (df.sort_values("similarity", ascending=False).head(n))
	# r_groupby = pd.DataFrame(results)
	# #results = results[['title','url','keywords','summary_html']].drop_duplicates()
	# results = r_groupby.reset_index()
	# results = results.sort_values("similarity", ascending=False)
	resultlist = results['real_description'].to_list()
	# for r in results.index:
	# # chunk = results
	# # print(chunk)
	# local = json.loads(results[r].to_json())

	# local['score'] = str(results.similarity[r][0])
	# resultlist.append(
	# local
	# )
	# for i in range(len(results)):
	# resultlist.append(results['description'].iloc[i])
	return resultlist

	def new_ask(chatlog):


	# FARE_GPT += str(availability) + "\nMake sure to only use from the flights provided only. "
	availability = search(chatlog[-1][0])
	global adjust_tags
	print(availability, adjust_tags)
	messages = [{"role": "system", "content": FARE_GPT + str(availability) + "\nMake sure to only use from the flights provided only. "}]
	if (len(chatlog)> 1):
	messages += [ item for x in [[{'role': 'assistant', 'content': msg[1]}, {'role': 'user', 'content': msg[0]}] for msg in chatlog[-3:-1] ] for item in x]

	messages += [{'role': 'user', 'content': chatlog[-1][0]}]
	response = openai.ChatCompletion.create(

	model ="gpt-3.5-turbo",
	messages = messages,
	temperature = 0
	)
	# The problem is located in the gpt response when provided with the availability flights. Also, the follow-up discussions.
	# Will need to save availability in history.
	# Storing the availability and once the answer does not activate the json extraction, we will have to directly give the user input with the previous availability list.
	chatlog[-1][1] = response['choices'][0]['message']['content']
	return chatlog

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	new_ask, chatbot, chatbot
	)

	clear.click(lambda: None, None, outputs = chatbot, queue=False)

	demo.launch()