Spaces:

morfriden
/

TripAdvisor

Sleeping

App Files Files Community

TripAdvisor / app.py

morfriden

Upload 4 files

124d1f9 verified 4 months ago

raw

history blame contribute delete

7.62 kB

	import gradio as gr
	from datasets import load_dataset
	from sentence_transformers import SentenceTransformer, util
	import faiss
	import numpy as np
	from transformers import pipeline
	import time
	import ast
	import re

	# --- 1. DATA LOADING AND INITIALIZATION ---
	print("===== Application Startup =====")
	start_time = time.time()

	# Load the travel dataset and limit to the first 20,000 rows (same as index)
	print("Loading TravelPlanner dataset...")
	dataset = load_dataset("osunlp/TravelPlanner", "test")
	print("Dataset ready.")

	# --- 2. EMBEDDING AND RECOMMENDATION ENGINE ---
	print("Loading embedding model...")
	model_name = "all-mpnet-base-v2"
	embedding_model = SentenceTransformer(f"sentence-transformers/{model_name}")

	index_file = "trip_index.faiss"

	print(f"Loading FAISS index from {index_file}...")

	try:
	index = faiss.read_index(index_file)
	print(f"Index is ready. Total vectors in index: {index.ntotal}")
	except RuntimeError:
	print(f"Error: FAISS index file '{index_file}' not found.")
	print("Please run the `build_index.py` script first to create the index.")
	exit()


	# --- 3. SYNTHETIC GENERATION ---
	def format_plan_details(plan_string):
	"""
	Parses and formats the raw plan string from the dataset into readable Markdown.
	"""
	# If the plan is not in the expected dictionary format, return it as is.
	if not plan_string or not plan_string.strip().startswith('['):
	return plan_string

	try:
	# Safely parse the string representation of a list of dictionaries
	plan_list = ast.literal_eval(plan_string)
	except (ValueError, SyntaxError):
	# If parsing fails, return the original string to avoid crashing
	return plan_string

	formatted_sections = []
	for section in plan_list:
	description = section.get('Description', 'Details')
	content = section.get('Content', '').strip()

	# Add a bold title for each section
	formatted_sections.append(f"#### {description}")

	# Use specific formatting based on the section's description
	if any(keyword in description for keyword in ['Attractions', 'Restaurants', 'Accommodations', 'Flight']):
	lines = content.split('\n')
	if lines:
	# Make the header bold
	formatted_sections.append(f"{lines[0]}")
	# Format the rest of the lines as a clean, bulleted list
	for item in lines[1:]:
	clean_item = ' '.join(item.split()) # Remove extra whitespace
	if clean_item:
	formatted_sections.append(f"- {clean_item}")

	elif 'Self-driving' in description or 'Taxi' in description:
	# Make simple travel descriptions more readable
	mode_emoji = "🚗" if 'Self-driving' in description else "🚕"
	formatted_sections.append(f"- {mode_emoji} {content.replace(', ', ', ')}")

	else:
	# Default formatting for any other type of content
	formatted_sections.append(content)

	# Add a newline for spacing between sections
	formatted_sections.append("")

	return "\n".join(formatted_sections)

	def get_recommendations_and_generate(query_text, k=3):
	# 1. Get Recommendations from existing data
	query_vector = embedding_model.encode([query_text])
	query_vector = np.array(query_vector, dtype=np.float32)
	distances, indices = index.search(query_vector, k)

	results = []
	for idx_numpy in indices[0]:
	idx = int(idx_numpy)
	trip_plan = {
	"dest": dataset['test']['dest'][idx],
	"days": dataset['test']['days'][idx],
	"reference_information": dataset['test']['reference_information'][idx]
	}
	results.append(trip_plan)

	while len(results) < 3:
	results.append({"dest": "No trip plan found", "days":"", "reference_information": ""})

	# 2. Create a prompt for the generative model
	prompt = f"Write a complete travel plan that includes a title and a day-by-day itinerary. The trip must be about: {query_text}."
	print("Loading generative model...")
	generator = pipeline('text-generation', model='gpt2')

	# 3. Generate 10 new, creative trip ideas
	print("Generating 10 synthetic trip ideas...")
	generated_outputs = generator(
	prompt,
	max_new_tokens=250, # Increased tokens for more detailed plans
	num_return_sequences=10,
	pad_token_id=50256
	)

	# 4. Find the best trip out of the 10 generated
	print("Finding the most relevant generated trip...")
	generated_texts = [output['generated_text'].replace(prompt, "").strip() for output in generated_outputs]

	# Embed all 10 generated texts
	generated_embeddings = embedding_model.encode(generated_texts)

	# Calculate cosine similarity between the user's query and each generated text
	similarities = util.cos_sim(query_vector, generated_embeddings)

	# Find the index of the most similar generated trip
	best_recipe_index = np.argmax(similarities)
	best_generated_trip = generated_texts[best_recipe_index]

	return results[0], results[1], results[2], best_generated_trip

	# --- 4. GRADIO USER INTERFACE ---
	def format_trip_plan(trip):
	# Formats the recommended trips with markdown
	if not trip or 'reference_information' not in trip:
	return "### No similar trip plan found."
	formatted_plan = format_plan_details(trip['reference_information'])
	return f"### {trip['days']}-days trip to {trip['dest'].upper()}\nSuggested Plan:\n{formatted_plan}"

	def format_generated_trip(trip_text):
	return trip_text

	def trip_planner_wizard(destination, days):
	# Combine user inputs into a single query for processing
	days = int(days) # Ensure days is an integer for the f-string
	query_text = f"a {days}-day trip to {destination}"
	rec1, rec2, rec3, gen_rec_text = get_recommendations_and_generate(query_text)
	return format_trip_plan(rec1), format_trip_plan(rec2), format_trip_plan(rec3), format_generated_trip(gen_rec_text)

	end_time = time.time()
	print(f"Models and data loaded in {end_time - start_time:.2f} seconds.")

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# ✈️ TripPlanner AI")
	gr.Markdown("Enter your destination and desired trip length, and get plan recommendations plus a new AI-generated idea!")

	with gr.Row():
	destination_input = gr.Textbox(label="Destination", placeholder="e.g., Paris")
	days_input = gr.Number(label="Number of Days", value=3)

	with gr.Row():
	submit_btn = gr.Button("Get Trip Plans", variant="primary")

	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### Recommended Trip Plans from Dataset")
	output_rec1 = gr.Markdown()
	output_rec2 = gr.Markdown()
	output_rec3 = gr.Markdown()
	with gr.Column(scale=1):
	gr.Markdown("### ✨ New AI-Generated Idea")
	output_gen = gr.Textbox(label="AI Generated Trip Plan", lines=20, interactive=False)

	submit_btn.click(
	fn=trip_planner_wizard,
	inputs=[destination_input, days_input],
	outputs=[output_rec1, output_rec2, output_rec3, output_gen]
	)

	gr.Examples(
	examples=[
	["Paris", 3],
	["Orlando", 7],
	["Tokyo", 5],
	["the Greek Islands", 10]
	],
	inputs=[destination_input, days_input]
	)

	demo.launch(ssr_mode=False)