import gradio as gr import re import ast import pandas as pd import os import string import nltk from nltk.corpus import stopwords from nltk.stem import PorterStemmer from geopy.geocoders import Nominatim from geopy.distance import geodesic from geopy.exc import GeocoderTimedOut from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from dotenv import load_dotenv from supabase import create_client import google.generativeai as genai # --- Setup awal dan download stopwords secara aman --- def ensure_stopwords(): try: _ = stopwords.words('indonesian') except LookupError: nltk.download('stopwords') try: _ = stopwords.words('english') except LookupError: nltk.download('stopwords') ensure_stopwords() # --- Konfigurasi API dan koneksi database --- load_dotenv() SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SBASEKEY") API_KEY = os.getenv("GEMINI_API_KEY") supabase = create_client(SUPABASE_URL, SUPABASE_KEY) genai.configure(api_key=API_KEY) # --- Preprocessing teks --- def preprocess_text(text): text = text.lower() text = ''.join([char for char in text if char not in string.punctuation and not char.isdigit()]) words = text.split() stop_words = set(stopwords.words('indonesian') + stopwords.words('english')) words = [word for word in words if word not in stop_words] stemmer = PorterStemmer() words = [stemmer.stem(word) for word in words] return ' '.join(words) # --- Load dan preprocessing data dari Supabase --- def fetch_data_from_supabase(): response = supabase.table("Maps").select("*").execute() df = pd.DataFrame(response.data) df["deskripsi"] = df["deskripsi"].astype(str).apply(preprocess_text) return df df = fetch_data_from_supabase() # --- Ekstraksi kata kunci dengan Gemini --- def extract_keywords(user_input): prompt = f""" Ekstrak 3–7 kata kunci penting dari deskripsi wisata berikut: "{user_input}" Tulis langsung sebagai list Python tanpa variabel apapun. """ try: response = genai.GenerativeModel("gemini-1.5-flash").generate_content(prompt) matches = re.findall(r'\[.*?\]', response.text) if matches: return ast.literal_eval(matches[0]) else: return [] except Exception as e: return [f"Error: {e}"] # --- Lokasi dan Geolokasi --- def get_coordinates_from_location(location_name): try: geolocator = Nominatim(user_agent="geoapi") location = geolocator.geocode(location_name, timeout=10) return (location.latitude, location.longitude) if location else (None, None) except GeocoderTimedOut: return (None, None) def get_location_name_from_coordinates(lat, lon): try: geolocator = Nominatim(user_agent="geoapi") location = geolocator.reverse((lat, lon), timeout=10) return location.address if location else "Tidak ditemukan" except GeocoderTimedOut: return "Tidak ditemukan" # --- Perluasan deskripsi dengan Gemini jika input terlalu singkat --- def enhance_description_with_gemini(short_desc): prompt = f""" Deskripsi berikut terlalu singkat: "{short_desc}" Tolong kembangkan menjadi paragraf singkat (2–3 kalimat) yang menggambarkan keinginan wisata pengguna secara lebih lengkap. Contohnya: sebutkan suasana, aktivitas, atau lokasi ideal. """ try: response = genai.GenerativeModel("gemini-1.5-flash").generate_content(prompt) return response.text.strip() except Exception as e: return short_desc # --- Rekomendasi wisata menggunakan TF-IDF + Cosine Similarity --- def prepare_and_recommend(df, user_description): tfidf = TfidfVectorizer() tfidf_matrix = tfidf.fit_transform(df['deskripsi'].tolist() + [user_description]) similarity = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten() df['similarity'] = similarity return df.sort_values(by='similarity', ascending=False).head(10) # --- Hitung jarak pengguna ke tempat wisata --- def sort_by_nearest_location(df, user_lat, user_lon): df['distance_km'] = df.apply( lambda row: geodesic((user_lat, user_lon), (row['latitude'], row['longitude'])).km, axis=1 ) df['distance_km'] = df['distance_km'].round(2) return df.sort_values(by='distance_km') # --- Fungsi utama Gradio --- def wisata_rekomendasi(deskripsi, lokasi): if df.empty: return "Data tidak tersedia.", pd.DataFrame([["Data tidak tersedia"]], columns=["nama"]) if len(deskripsi.strip().split()) < 3 or len(deskripsi.strip()) < 20: deskripsi = enhance_description_with_gemini(deskripsi) lat, lon = get_coordinates_from_location(lokasi) if lat is None or lon is None: return "Lokasi tidak ditemukan.", pd.DataFrame([["Lokasi tidak ditemukan"]], columns=["nama"]) deskripsi_lengkap = f"{deskripsi} di sekitar {lokasi}" keywords = extract_keywords(deskripsi_lengkap) if "Error:" in str(keywords): return f"Kata kunci gagal diambil: {keywords[0]}", pd.DataFrame([[keywords[0]]], columns=["nama"]) user_description_joined = preprocess_text(" ".join(keywords)) top_place = prepare_and_recommend(df.copy(), user_description_joined) top_place = top_place[top_place['total_ulasan'] > 10] sorted_place = sort_by_nearest_location(top_place, lat, lon) sorted_place = sorted_place[sorted_place["gambar"].apply(lambda x: isinstance(x, str) and x.startswith("https"))] sorted_place = sorted_place.sort_values(by='similarity', ascending=False) return f"Kata kunci: {', '.join(keywords)}", sorted_place[[ "id", "nama", "alamat", "distance_km", "deskripsi", "harga", "rating", "total_ulasan", "gambar", "similarity" ]] # --- Gradio UI --- demo = gr.Interface( fn=wisata_rekomendasi, inputs=[ gr.Textbox(label="Deskripsi Wisata yang Anda Inginkan"), gr.Textbox(label="Lokasi Anda (Contoh: Cilacap, Jawa Tengah, Indonesia)"), ], outputs=[ gr.Textbox(label="Kata Kunci yang Diekstrak"), gr.Dataframe( headers=["id", "nama", "alamat", "distance_km", "deskripsi", "harga", "rating", "total_ulasan", "gambar", "similarity"], label="Rekomendasi Tempat Wisata" ) ], title="Sistem Rekomendasi Wisata", description="Masukkan deskripsi dan lokasi, lalu dapatkan rekomendasi tempat wisata terdekat beserta skor kecocokannya." ) demo.launch()