streamlit-web-crawler / src /pages /02_Passport_Power_Explorer.py
mrfirdauss's picture
feat: add page doc gen
6228ce3
import streamlit as st
import pandas as pd
import numpy as np
import re
import os
# --- Page Configuration ---
st.set_page_config(
page_title="Passport Power Explorer",
page_icon="✈️",
layout="wide"
)
@st.cache_data
def load_and_process_data(filepath):
"""
Loads visa data from a CSV, cleans it, and extracts structured information.
Caches the result for performance.
"""
try:
df = pd.read_csv(filepath)
except FileNotFoundError:
st.error(f"Error: The file was not found at '{filepath}'. Please make sure 'asean_visa_avaibility_playwright.csv' is in the project's src/data directory.")
return pd.DataFrame()
def extract_days(text):
if not isinstance(text, str):
return np.nan
matches = re.findall(r'(\d+)\s*days', text, re.IGNORECASE)
if matches:
return int(matches[0])
return np.nan
df['duration_days'] = df['text'].apply(extract_days)
# 2. Categorize Visa Type
def categorize_visa(text):
if not isinstance(text, str):
return "Unknown"
text_lower = text.lower()
if 'visa-free' in text_lower:
return "βœ… Visa-Free"
if 'visa on arrival' in text_lower:
return "πŸ›¬ Visa on Arrival"
if 'evisa' in text_lower or 'eta' in text_lower:
return "πŸ“§ eVisa / eTA"
if 'visa required' in text_lower:
return "❌ Visa Required"
return "πŸ“„ Other / Pre-enrollment"
df['visa_category'] = df['text'].apply(categorize_visa)
# Clean up column names for display
df.rename(columns={
'text': 'Requirement',
'pass': 'Passport',
'dest': 'Destination',
'source': 'Source Country',
'duration_days': 'Duration (Days)',
'visa_category': 'Visa Category'
}, inplace=True)
return df
# --- Main Application UI ---
st.title("✈️ Passport Power & Visa Explorer")
st.markdown("Analyze inbound and outbound visa requirements for Southeast Asian countries based on the provided dataset.")
# Load the data
# Adjust the path to be relative to the script's location if needed
# Assuming the script is in src/pages and the CSV is in the root
# The path should be ../../visa_avaibility_playwright_20251015.csv
file_path = "src/data/asean_visa_avaibility_playwright.csv"
visa_df = load_and_process_data(file_path)
if not visa_df.empty:
# --- Sidebar Filters ---
st.sidebar.header("πŸ” Filters")
# 1. Select Focus Country
focus_countries = ['Indonesia', 'Vietnam', 'Philippines', 'Thailand']
selected_country = st.sidebar.selectbox(
"Select your focus country:",
options=focus_countries
)
# 2. Select Travel Direction (Inbound/Outbound)
travel_direction = st.sidebar.radio(
"Select travel direction:",
options=['Outbound', 'Inbound'],
horizontal=True
)
# --- Filtering Logic ---
if travel_direction == 'Outbound':
filtered_df = visa_df[visa_df['Source Country'] == selected_country].copy()
st.header(f"πŸ›‚ Outbound Travel for {selected_country} Passport Holders")
else: # Inbound
filtered_df = visa_df[visa_df['Destination'] == selected_country].copy()
st.header(f"πŸ›¬ Inbound Travel to {selected_country}")
# --- Dynamic Filters Based on Data ---
# 3. Filter by Visa Category
visa_categories = sorted(filtered_df['Visa Category'].unique())
selected_categories = st.sidebar.multiselect(
"Filter by Visa Category:",
options=visa_categories,
default=visa_categories # Default to all selected
)
# 4. Filter by Duration
min_duration, max_duration = 0, int(filtered_df['Duration (Days)'].max())
duration_range = st.sidebar.slider(
"Filter by Stay Duration (Days):",
min_value=min_duration,
max_value=max_duration,
value=(min_duration, max_duration) # Default to full range
)
# Apply category and duration filters
filtered_df = filtered_df[filtered_df['Visa Category'].isin(selected_categories)]
# Handle slider filtering, including NaNs
min_select, max_select = duration_range
# Keep rows where duration is within range OR where duration is NaN (not specified)
filtered_df = filtered_df[
(filtered_df['Duration (Days)'].between(min_select, max_select)) |
(filtered_df['Duration (Days)'].isnull())
]
# --- Display Results ---
st.markdown("---")
# Display summary metrics
col1, col2, col3 = st.columns(3)
col1.metric("Total Countries Found", f"{len(filtered_df)}")
visa_free_count = len(filtered_df[filtered_df['Visa Category'] == 'βœ… Visa-Free'])
col2.metric("Visa-Free Destinations", f"{visa_free_count}")
visa_required_count = len(filtered_df[filtered_df['Visa Category'] == '❌ Visa Required'])
col3.metric("Visa Required", f"{visa_required_count}")
st.markdown("### Visa Requirements Data")
# Define columns to display
display_cols_outbound = ['Destination', 'Visa Category', 'Requirement', 'Duration (Days)']
display_cols_inbound = ['Source Country', 'Passport', 'Visa Category', 'Requirement', 'Duration (Days)']
display_columns = display_cols_outbound if travel_direction == 'Outbound' else display_cols_inbound
st.dataframe(
filtered_df[display_columns].reset_index(drop=True),
use_container_width=True,
hide_index=True
)
else:
st.warning("Could not load visa data to display.")