Aitherway4 / app.py
GwFirman's picture
Update app.py
4deefec verified
import gradio as gr
import re
import ast
import pandas as pd
import os
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from geopy.exc import GeocoderTimedOut
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
from supabase import create_client
import google.generativeai as genai
# --- Setup awal dan download stopwords secara aman ---
def ensure_stopwords():
try:
_ = stopwords.words('indonesian')
except LookupError:
nltk.download('stopwords')
try:
_ = stopwords.words('english')
except LookupError:
nltk.download('stopwords')
ensure_stopwords()
# --- Konfigurasi API dan koneksi database ---
load_dotenv()
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SBASEKEY")
API_KEY = os.getenv("GEMINI_API_KEY")
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
genai.configure(api_key=API_KEY)
# --- Preprocessing teks ---
def preprocess_text(text):
text = text.lower()
text = ''.join([char for char in text if char not in string.punctuation and not char.isdigit()])
words = text.split()
stop_words = set(stopwords.words('indonesian') + stopwords.words('english'))
words = [word for word in words if word not in stop_words]
stemmer = PorterStemmer()
words = [stemmer.stem(word) for word in words]
return ' '.join(words)
# --- Load dan preprocessing data dari Supabase ---
def fetch_data_from_supabase():
response = supabase.table("Maps").select("*").execute()
df = pd.DataFrame(response.data)
df["deskripsi"] = df["deskripsi"].astype(str).apply(preprocess_text)
return df
df = fetch_data_from_supabase()
# --- Ekstraksi kata kunci dengan Gemini ---
def extract_keywords(user_input):
prompt = f"""
Ekstrak 3–7 kata kunci penting dari deskripsi wisata berikut:
"{user_input}"
Tulis langsung sebagai list Python tanpa variabel apapun.
"""
try:
response = genai.GenerativeModel("gemini-1.5-flash").generate_content(prompt)
matches = re.findall(r'\[.*?\]', response.text)
if matches:
return ast.literal_eval(matches[0])
else:
return []
except Exception as e:
return [f"Error: {e}"]
# --- Lokasi dan Geolokasi ---
def get_coordinates_from_location(location_name):
try:
geolocator = Nominatim(user_agent="geoapi")
location = geolocator.geocode(location_name, timeout=10)
return (location.latitude, location.longitude) if location else (None, None)
except GeocoderTimedOut:
return (None, None)
def get_location_name_from_coordinates(lat, lon):
try:
geolocator = Nominatim(user_agent="geoapi")
location = geolocator.reverse((lat, lon), timeout=10)
return location.address if location else "Tidak ditemukan"
except GeocoderTimedOut:
return "Tidak ditemukan"
# --- Perluasan deskripsi dengan Gemini jika input terlalu singkat ---
def enhance_description_with_gemini(short_desc):
prompt = f"""
Deskripsi berikut terlalu singkat: "{short_desc}"
Tolong kembangkan menjadi paragraf singkat (2–3 kalimat) yang menggambarkan keinginan wisata pengguna secara lebih lengkap.
Contohnya: sebutkan suasana, aktivitas, atau lokasi ideal.
"""
try:
response = genai.GenerativeModel("gemini-1.5-flash").generate_content(prompt)
return response.text.strip()
except Exception as e:
return short_desc
# --- Rekomendasi wisata menggunakan TF-IDF + Cosine Similarity ---
def prepare_and_recommend(df, user_description):
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df['deskripsi'].tolist() + [user_description])
similarity = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
df['similarity'] = similarity
return df.sort_values(by='similarity', ascending=False).head(10)
# --- Hitung jarak pengguna ke tempat wisata ---
def sort_by_nearest_location(df, user_lat, user_lon):
df['distance_km'] = df.apply(
lambda row: geodesic((user_lat, user_lon), (row['latitude'], row['longitude'])).km,
axis=1
)
df['distance_km'] = df['distance_km'].round(2)
return df.sort_values(by='distance_km')
# --- Fungsi utama Gradio ---
def wisata_rekomendasi(deskripsi, lokasi):
if df.empty:
return "Data tidak tersedia.", pd.DataFrame([["Data tidak tersedia"]], columns=["nama"])
if len(deskripsi.strip().split()) < 3 or len(deskripsi.strip()) < 20:
deskripsi = enhance_description_with_gemini(deskripsi)
lat, lon = get_coordinates_from_location(lokasi)
if lat is None or lon is None:
return "Lokasi tidak ditemukan.", pd.DataFrame([["Lokasi tidak ditemukan"]], columns=["nama"])
deskripsi_lengkap = f"{deskripsi} di sekitar {lokasi}"
keywords = extract_keywords(deskripsi_lengkap)
if "Error:" in str(keywords):
return f"Kata kunci gagal diambil: {keywords[0]}", pd.DataFrame([[keywords[0]]], columns=["nama"])
user_description_joined = preprocess_text(" ".join(keywords))
top_place = prepare_and_recommend(df.copy(), user_description_joined)
top_place = top_place[top_place['total_ulasan'] > 10]
sorted_place = sort_by_nearest_location(top_place, lat, lon)
sorted_place = sorted_place[sorted_place["gambar"].apply(lambda x: isinstance(x, str) and x.startswith("https"))]
sorted_place = sorted_place.sort_values(by='similarity', ascending=False)
return f"Kata kunci: {', '.join(keywords)}", sorted_place[[
"id", "nama", "alamat", "distance_km", "deskripsi", "harga", "rating", "total_ulasan", "gambar", "similarity"
]]
# --- Gradio UI ---
demo = gr.Interface(
fn=wisata_rekomendasi,
inputs=[
gr.Textbox(label="Deskripsi Wisata yang Anda Inginkan"),
gr.Textbox(label="Lokasi Anda (Contoh: Cilacap, Jawa Tengah, Indonesia)"),
],
outputs=[
gr.Textbox(label="Kata Kunci yang Diekstrak"),
gr.Dataframe(
headers=["id", "nama", "alamat", "distance_km", "deskripsi", "harga", "rating", "total_ulasan", "gambar", "similarity"],
label="Rekomendasi Tempat Wisata"
)
],
title="Sistem Rekomendasi Wisata",
description="Masukkan deskripsi dan lokasi, lalu dapatkan rekomendasi tempat wisata terdekat beserta skor kecocokannya."
)
demo.launch()