itinerary_with_TSP / app_no_flask.py
Kevinyogap's picture
commit
8953f1c
# -*- coding: utf-8 -*-
"""
LAKE TOBA ITINERARY GENERATOR - MINIMAL VERSION
Core itinerary generation logic without extras
"""
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from geopy.distance import geodesic
from datetime import datetime
import ast
import warnings
warnings.filterwarnings('ignore')
class TobaItineraryGenerator:
"""Minimal itinerary generator for Lake Toba tourism"""
def __init__(self):
self.df_wisata = None
self.df_akomodasi = None
self.data_processed = None
self.user_data = None
# Define top 8 activities based on your data
self.top_activities = [
'pemandangan', 'fotografi', 'santai', 'camping',
'trekking', 'berenang', 'piknik', 'aktivitas air'
]
def get_activity_categories(self):
"""Get activity categories with top 8 + others option"""
activities = {
'1': 'pemandangan',
'2': 'fotografi',
'3': 'santai',
'4': 'camping',
'5': 'trekking',
'6': 'berenang',
'7': 'piknik',
'8': 'aktivitas air',
'9': 'lain-lain'
}
return activities
def process_user_activities(self, selected_activities):
"""Process and normalize user activity selections"""
processed_activities = []
for activity in selected_activities:
activity = activity.strip().lower()
# Check if it's one of the top 8 activities
if activity in self.top_activities:
processed_activities.append(activity)
else:
# Group under 'lain-lain' if not in top 8
if 'lain-lain' not in processed_activities:
processed_activities.append('lain-lain')
return processed_activities if processed_activities else ['nature']
def get_user_input(self):
"""Get user input with validation"""
print("LAKE TOBA ITINERARY GENERATOR")
print("=" * 40)
# Date input
while True:
try:
start_date_str = input("Start date (dd-mm-yy): ").strip()
end_date_str = input("End date (dd-mm-yy): ").strip()
start_date = datetime.strptime(start_date_str, "%d-%m-%y")
end_date = datetime.strptime(end_date_str, "%d-%m-%y")
if end_date < start_date:
print("Error: End date must be after start date!")
continue
num_days = (end_date - start_date).days + 1
break
except ValueError:
print("Error: Invalid date format! Use dd-mm-yy")
# Number of people
while True:
try:
num_people = int(input("Number of people: "))
if num_people <= 0:
print("Error: Number must be greater than 0!")
continue
break
except ValueError:
print("Error: Please enter a valid number!")
# Budget category
print("\nBudget Category:")
print("1. <1 million IDR")
print("2. 1-2 million IDR")
print("3. >2 million IDR")
while True:
try:
choice = int(input("Choose budget (1-3): "))
if choice == 1:
budget_category = "<1 million"
budget_estimate = 1000000
elif choice == 2:
budget_category = "1-2 million"
budget_estimate = 2000000
elif choice == 3:
budget_category = ">2 million"
budget_estimate = 5000000
else:
print("Error: Invalid choice!")
continue
break
except ValueError:
print("Error: Please enter 1, 2, or 3!")
# Activities - show top 8 + others option
print("\nSelect Activities (you can choose multiple):")
activity_categories = self.get_activity_categories()
for key, value in activity_categories.items():
print(f"{key}. {value.title()}")
print("\nEnter activity numbers separated by commas (e.g., 1,3,5)")
print("Or enter custom activities separated by commas")
activities_input = input("Your choice: ").strip()
# Process input - could be numbers or custom text
selected_activities = []
# Check if input contains numbers (referring to the menu)
if any(char.isdigit() for char in activities_input):
# Process as menu selections
try:
choices = [choice.strip() for choice in activities_input.split(",")]
for choice in choices:
if choice in activity_categories:
selected_activities.append(activity_categories[choice])
elif choice.isdigit() and 1 <= int(choice) <= 9:
selected_activities.append(activity_categories[str(choice)])
except:
pass
# If no valid menu selections or custom input, process as text
if not selected_activities:
custom_activities = [act.strip().lower() for act in activities_input.split(",") if act.strip()]
selected_activities = custom_activities
# Process and normalize activities
activities = self.process_user_activities(selected_activities)
print(f"Selected activities: {', '.join(activities)}")
self.user_data = {
'start_date': start_date,
'end_date': end_date,
'num_days': num_days,
'num_people': num_people,
'budget_category': budget_category,
'budget_estimate': budget_estimate,
'activities': activities
}
return self.user_data
def load_data(self):
"""Load datasets from GitHub"""
print("\nLoading data...")
try:
urls = {
'processed': 'https://raw.githubusercontent.com/kevinyogap/data/refs/heads/main/data_ProcessedZ.csv',
'wisata': 'https://raw.githubusercontent.com/kevinyogap/data/refs/heads/main/data_wisata_toba_score.csv',
'akomodasi': 'https://raw.githubusercontent.com/kevinyogap/data/refs/heads/main/data_akomodasi_danau_toba_v2.csv'
}
df_P = pd.read_csv(urls['processed'])
self.df_wisata = pd.read_csv(urls['wisata'])
self.df_akomodasi = pd.read_csv(urls['akomodasi'])
self.data_processed = pd.merge(self.df_wisata, df_P, on='title', how='inner')
print(f"Data loaded: {len(self.df_wisata)} tourism spots, {len(self.df_akomodasi)} hotels")
return True
except Exception as e:
print(f"Error loading data: {e}")
return False
def get_tourism_recommendations(self, top_n=10):
"""Get tourism recommendations using TF-IDF"""
print(f"\nFinding {top_n} recommendations...")
user_query = ' '.join(self.user_data['activities'])
try:
data_processed = self.data_processed.copy()
# Prepare category data
data_processed['kategori'] = data_processed['kategori_token'].apply(
lambda x: ast.literal_eval(x)[0] if pd.notna(x) and x != '[]' else 'general'
)
# TF-IDF vectorization
vectorizer = TfidfVectorizer(lowercase=True, stop_words=None)
all_texts = list(data_processed['tags_joined'].fillna('')) + [user_query]
vectorizer.fit(all_texts)
tfidf_wisata = vectorizer.transform(data_processed['tags_joined'].fillna(''))
tfidf_user = vectorizer.transform([user_query])
# Calculate similarity
similarities = cosine_similarity(tfidf_user, tfidf_wisata).flatten()
max_similarity = np.max(similarities)
if max_similarity > 0:
data_processed['similarity_score'] = similarities
recommendations_df = data_processed.nlargest(top_n, 'similarity_score')
else:
# Fallback to score-based recommendations
if 'score' in data_processed.columns:
recommendations_df = data_processed.nlargest(top_n, 'score')
else:
recommendations_df = data_processed.sample(min(top_n, len(data_processed)))
# Format output
recommendations = []
for idx, row in recommendations_df.iterrows():
score = row.get('similarity_score', row.get('score', 0))
recommendations.append((row['title'], row['kategori'], score))
print(f"Top 5 recommendations:")
for i, (title, category, score) in enumerate(recommendations[:5], 1):
print(f" {i}. {title} ({category})")
return recommendations
except Exception as e:
print(f"Error in recommendations: {e}")
return []
def find_nearest_hotel(self, recommendations):
"""Find nearest hotel from top tourism spot"""
print(f"\nFinding nearest hotel...")
if not recommendations:
return None, None
top_spot_name = recommendations[0][0]
top_spot = self.df_wisata[self.df_wisata['title'] == top_spot_name]
if top_spot.empty:
return None, None
top_spot = top_spot.iloc[0]
# Calculate distance to all hotels
df_akomodasi_copy = self.df_akomodasi.copy()
df_akomodasi_copy['distance'] = df_akomodasi_copy.apply(
lambda row: geodesic(
(row['latitude'], row['longitude']),
(top_spot['latitude'], top_spot['longitude'])
).km,
axis=1
)
nearest_hotel = df_akomodasi_copy.sort_values('distance').iloc[0]
print(f"Selected hotel: {nearest_hotel['name']} ({nearest_hotel['distance']:.1f} km)")
return nearest_hotel, top_spot
def select_tourism_spots(self, recommendations, starting_hotel, max_spots=5):
"""Select nearest tourism spots"""
print(f"\nSelecting {max_spots} tourism spots...")
recommendation_names = [item[0] for item in recommendations]
top_df = self.df_wisata[self.df_wisata['title'].isin(recommendation_names)]
selected_spots = []
current_location = (starting_hotel['latitude'], starting_hotel['longitude'])
for i in range(min(max_spots, len(top_df))):
candidates = top_df[~top_df['title'].isin(selected_spots)].copy()
if candidates.empty:
break
# Calculate distance from current location
candidates['distance'] = candidates.apply(
lambda row: geodesic(current_location, (row['latitude'], row['longitude'])).km,
axis=1
)
selected = candidates.sort_values('distance').iloc[0]
selected_spots.append(selected['title'])
current_location = (selected['latitude'], selected['longitude'])
print(f"Selected spots: {', '.join(selected_spots)}")
return selected_spots
def validate_time_constraint(self, hotel, tourism_spots, max_hours=8):
"""Validate that total trip time is within constraint"""
print(f"\nValidating time constraint ({max_hours} hours max)...")
valid_spots = tourism_spots.copy()
while valid_spots:
# Calculate total distance
total_distance = 0
current_location = (hotel['latitude'], hotel['longitude'])
for spot in valid_spots:
row = self.df_wisata[self.df_wisata['title'] == spot]
if not row.empty:
row = row.iloc[0]
distance = geodesic(current_location, (row['latitude'], row['longitude'])).km
total_distance += distance
current_location = (row['latitude'], row['longitude'])
# Add return distance to hotel
total_distance += geodesic(current_location, (hotel['latitude'], hotel['longitude'])).km
# Calculate total time (travel + visit time)
travel_time = total_distance / 30 # 30 km/h average speed
visit_time = len(valid_spots) * 1.5 # 1.5 hours per spot
total_time = travel_time + visit_time
if total_time <= max_hours:
print(f"Valid itinerary: {len(valid_spots)} spots, {total_time:.1f} hours")
break
else:
print(f"Too long ({total_time:.1f}h), removing: {valid_spots[-1]}")
valid_spots.pop()
return valid_spots
def calculate_costs(self, hotel, tourism_spots):
"""Calculate itinerary costs"""
# Tourism costs
tourism_df = self.df_wisata[self.df_wisata['title'].isin(tourism_spots)]
entrance_cost = tourism_df['biaya_masuk'].fillna(0).sum() if 'biaya_masuk' in tourism_df.columns else 0
parking_cost = tourism_df['biaya_parkir_mobil'].fillna(0).sum() if 'biaya_parkir_mobil' in tourism_df.columns else 0
tourism_cost = entrance_cost + parking_cost
# Hotel cost
hotel_cost = hotel.get('price', 0) * max(self.user_data['num_days'] - 1, 1)
total_cost = tourism_cost + hotel_cost
budget = self.user_data['budget_estimate']
within_budget = total_cost <= budget
return {
'tourism_cost': tourism_cost,
'hotel_cost': hotel_cost,
'total_cost': total_cost,
'budget': budget,
'within_budget': within_budget
}
def create_itinerary(self, hotel, tourism_spots):
"""Create final itinerary"""
print(f"\nCreating itinerary...")
# Calculate costs
costs = self.calculate_costs(hotel, tourism_spots)
# Create schedule
print(f"\n" + "=" * 50)
print(f"LAKE TOBA ITINERARY")
print(f"=" * 50)
print(f"Period: {self.user_data['start_date'].strftime('%d-%m-%Y')} - {self.user_data['end_date'].strftime('%d-%m-%Y')}")
print(f"Days: {self.user_data['num_days']}")
print(f"People: {self.user_data['num_people']}")
print(f"Budget: {self.user_data['budget_category']}")
print(f"Activities: {', '.join(self.user_data['activities'])}")
print(f"\nHOTEL: {hotel['name']}")
print(f"\nTOURISM SPOTS:")
for i, spot in enumerate(tourism_spots, 1):
print(f" {i}. {spot}")
print(f"\nCOST BREAKDOWN:")
print(f" Tourism: IDR {costs['tourism_cost']:,}")
print(f" Hotel: IDR {costs['hotel_cost']:,}")
print(f" Total: IDR {costs['total_cost']:,}")
print(f" Budget: IDR {costs['budget']:,}")
if costs['within_budget']:
print(f" Status: Within budget ✓")
else:
print(f" Status: Over budget ✗")
return {
'hotel': hotel,
'tourism_spots': tourism_spots,
'costs': costs
}
def find_cheaper_hotel_near_route(self, tourism_spots, max_distance_km=10):
"""
Find the cheapest hotel near any of the tourism spots within max_distance_km.
Returns the hotel row or None if not found.
"""
if not tourism_spots:
return None
# Get coordinates of all spots in the route
spots_df = self.df_wisata[self.df_wisata['title'].isin(tourism_spots)]
spot_coords = spots_df[['latitude', 'longitude']].values
# For each hotel, check if it's close to any spot
hotels = self.df_akomodasi.copy()
hotels['min_distance'] = hotels.apply(
lambda row: min(
geodesic((row['latitude'], row['longitude']), (lat, lon)).km
for lat, lon in spot_coords
),
axis=1
)
# Filter hotels within max_distance_km to any spot
nearby_hotels = hotels[hotels['min_distance'] <= max_distance_km]
if nearby_hotels.empty:
# fallback: use all hotels
nearby_hotels = hotels
# Return the cheapest hotel (by price)
cheapest = nearby_hotels.sort_values('price').iloc[0]
return cheapest
def run_generator(self):
"""Main program execution"""
print("Starting Lake Toba Itinerary Generator...\n")
try:
# 1. Get user input
self.get_user_input()
# 2. Load data
if not self.load_data():
return None
# 3. Get recommendations
recommendations = self.get_tourism_recommendations()
if not recommendations:
print("Error: Cannot create recommendations!")
return None
# 4. Find hotel
hotel, _ = self.find_nearest_hotel(recommendations)
if hotel is None:
print("Error: Cannot find suitable hotel!")
return None
# 5. Select tourism spots
tourism_spots = self.select_tourism_spots(recommendations, hotel)
if not tourism_spots:
print("Error: No tourism spots selected!")
return None
# 6. Validate time constraints
valid_spots = self.validate_time_constraint(hotel, tourism_spots)
if not valid_spots:
print("Error: No spots meet time criteria!")
return None
# 7. Create itinerary and check budget
result = self.create_itinerary(hotel, valid_spots)
if not result['costs']['within_budget']:
# Try to find a cheaper hotel near the route
cheaper_hotel = self.find_cheaper_hotel_near_route(valid_spots)
if cheaper_hotel is not None and cheaper_hotel['price'] < hotel['price']:
# Recalculate with cheaper hotel
new_result = self.create_itinerary(cheaper_hotel, valid_spots)
if new_result['costs']['within_budget']:
print("\nCheaper hotel found and used to fit the budget.")
result = new_result
else:
print("\nCheaper hotel found, but still over budget.")
else:
print("\nNo cheaper hotel found nearby or all are more expensive.")
print("\nITINERARY CREATED.")
return result
except KeyboardInterrupt:
print(f"\nProgram interrupted by user.")
return None
except Exception as e:
print(f"Error: {e}")
return None
# Run the generator
if __name__ == "__main__":
generator = TobaItineraryGenerator()
result = generator.run_generator()