import gradio as gr import pandas as pd import pandas as pd import time import spacy from fuzzywuzzy import fuzz from spacy.lang.en.stop_words import STOP_WORDS from string import punctuation from collections import Counter from heapq import nlargest import nltk import numpy as np from tqdm import tqdm import requests import gzip import os import torch import re from bs4 import BeautifulSoup import os import openai openai.api_key = os.environ['OPENAI_KEY'] # def remove_html_tags(text): # clean = re.compile('<.*?>') # return re.sub(clean, '', text) # df['content'] = df.content.apply(lambda x: remove_html_tags(x)) # df['summary_html'] = df.summary_html.apply(lambda x: remove_html_tags(x)) FARE_GPT = """FareGPT is designed to be able to assist with a wide range of flights recommendation and suggestions, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. FareGPT is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. FareGPT is able to process and understand large amounts of flight booking and availability seats. FareGPT will be provided with different flights, airlines, alliances and mileage points for different airflights, and will make sure to suggest the right flights to users with their precise urls. FareGPT will start by recognizing the different places available and comprehend the user needs to give suggestions and optimal choices in terms of cost, mileage points, comfort and other factors. FareGPT will always show the exact url to the flight without any edits when showing a flight. these are the existing flights: """ # availability = [] class_mp = {'YMileageCost': 'EconomyMileageCost', 'WMileageCost': 'BusinessMileageCost', 'YMileageCost': 'PremiumMileageCost', 'YRemainingSeats':'EconomyRemainingSeats', 'WRemainingSeats':'BusinessRemainingSeats', 'JRemainingSeats': 'PremiumRemainingSeats'} adjust_tags = {} taxonomy = { 'fare_amount': 'mileagecost', 'fromloc': 'Origin', 'toloc': 'Destination', 'city_name': 'City', 'country_name': 'Country'} df = pd.read_json('availability.json') routes = pd.DataFrame(list(df['Route'].values))[["OriginAirport", 'DestinationAirport', 'Source']] df = df[['Date', 'YMileageCost', 'WMileageCost', 'JMileageCost', 'YRemainingSeats', 'WRemainingSeats', 'JRemainingSeats', 'FRemainingSeats',]] booking = pd.concat((df, routes), axis = 1) airport_codes = pd.read_csv("airport_code.csv") airport_codes = airport_codes[[column for column in airport_codes.columns[:-2]]] booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Origin{column}" for column in airport_codes.columns}), left_on = "OriginAirport",right_on = "OriginCode") booking = pd.merge(booking, airport_codes.rename(columns = {column: f"Destination{column}" for column in airport_codes.columns}), left_on = "DestinationAirport",right_on = "DestinationCode") booking['url'] = booking.apply(lambda x: f"https://seats.aero/search?origin={x['OriginAirport']}&destination={x['DestinationAirport']}&date={x['Date']}".split(" ")[0], axis = 1).values booking['real_description'] = [{booking.columns[i]: x[i] for i in range(len(x))} for x in booking.values] with gr.Blocks(css = """#white-button { background-color: #FFFFFF; color: #000000; } #orange-button-1 { background-color: #FFDAB9; color: #000000; } #orange-button-2 { background-color: #FFA07A; color: #FFFFFF; } #orange-button-3 { background-color: #FF4500; color: #FFFFFF; }""", theme=gr.themes.Soft()) as demo: chatbot = gr.Chatbot().style(height=750) with gr.Row(): with gr.Column(scale = 0.75, min_width=0): msg = gr.Textbox(placeholder = "Enter text and press enter",show_label=False).style(container = False) with gr.Column(scale = 0.25, min_width=0): clear = gr.Button("Clear") # index = gr.Textbox(value = "0", visible = False) def user(user_message, history): return "", history + [[user_message, None]] def convert_columns(x): full_map = {} for key, value in x.items(): try: dir, tag = key.split('.')[0], key.split('.')[1] full_word = taxonomy[dir] + taxonomy[tag] except: full_word = key full_map[full_word] = value return full_map def compare_flights_dynamic(flight1, flight2): # Need to integrate class type and Date (year; this needs fine-tuning) sims = [] keep = True for key, value in flight2.items(): if(key in flight1.keys()): local_sim = fuzz.token_set_ratio(flight1[key], flight2[key]) sims.append(local_sim) # ADD CLASS AND MILEAGE COST # elif (key == "mileagecost") and not((flight1["YMileageCost"] < value) + (flight1["WMileageCost"] < value) + (flight1["JMileageCost"] < value)): # # filter out ones that don't match the right scoring happening. # # At some point the user might not know the mileage range for the desired flight so it's best to give these as well as secondary ones. # keep = False if(keep and len(sims)): # Average and give score? return sum(sims) / (len(sims)*100) return 0.0 def search(query): response = requests.post("https://rams901-openslu.hf.space/run/predict", json={ "data": [ query, ] } ) test_text = BeautifulSoup(response.content).get_text() test_list = [x.strip() for x in test_text.split("\\n") if ('B-' in x) ] tags = {x.split('-')[1]:x.split('-')[0][:-1] for x in test_list} global adjust_tags # UPDATE/ADD newer filters adjust_tags.update(convert_columns(tags)) booking['latest_local'] = booking['real_description'].apply(compare_flights_dynamic, flight2 = (adjust_tags)).values results = (booking.sort_values("latest_local", ascending=False).head(10)) results = results.rename(columns = class_mp) # }) # n = 10 # query_embedding = model.encode(query) # df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) # results = (df.sort_values("similarity", ascending=False).head(n)) # r_groupby = pd.DataFrame(results) # #results = results[['title','url','keywords','summary_html']].drop_duplicates() # results = r_groupby.reset_index() # results = results.sort_values("similarity", ascending=False) resultlist = results['real_description'].to_list() # for r in results.index: # # chunk = results # # print(chunk) # local = json.loads(results[r].to_json()) # local['score'] = str(results.similarity[r][0]) # resultlist.append( # local # ) # for i in range(len(results)): # resultlist.append(results['description'].iloc[i]) return resultlist def new_ask(chatlog): # FARE_GPT += str(availability) + "\nMake sure to only use from the flights provided only. " availability = search(chatlog[-1][0]) global adjust_tags print(availability, adjust_tags) messages = [{"role": "system", "content": FARE_GPT + str(availability) + "\nMake sure to only use from the flights provided only. "}] if (len(chatlog)> 1): messages += [ item for x in [[{'role': 'assistant', 'content': msg[1]}, {'role': 'user', 'content': msg[0]}] for msg in chatlog[-3:-1] ] for item in x] messages += [{'role': 'user', 'content': chatlog[-1][0]}] response = openai.ChatCompletion.create( model ="gpt-3.5-turbo", messages = messages, temperature = 0 ) # The problem is located in the gpt response when provided with the availability flights. Also, the follow-up discussions. # Will need to save availability in history. # Storing the availability and once the answer does not activate the json extraction, we will have to directly give the user input with the previous availability list. chatlog[-1][1] = response['choices'][0]['message']['content'] return chatlog msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( new_ask, chatbot, chatbot ) clear.click(lambda: None, None, outputs = chatbot, queue=False) demo.launch()