| |
| import json |
| import sqlite3 |
| import numpy as np |
| from sentence_transformers import SentenceTransformer |
|
|
| |
| print("Loading sentence-transformer model...") |
| model = SentenceTransformer('all-MiniLM-L6-v2') |
| print("Model loaded.") |
|
|
| |
| def setup_database(): |
| conn = sqlite3.connect('kost.db') |
| cursor = conn.cursor() |
| |
| cursor.execute('DROP TABLE IF EXISTS kost') |
| cursor.execute(''' |
| CREATE TABLE kost ( |
| id INTEGER PRIMARY KEY AUTOINCREMENT, |
| title TEXT, |
| price TEXT, |
| location TEXT, |
| url TEXT, |
| image_url TEXT, |
| embedding BLOB |
| ) |
| ''') |
| conn.commit() |
| conn.close() |
|
|
| |
| def insert_data(data): |
| conn = sqlite3.connect('kost.db') |
| cursor = conn.cursor() |
|
|
| |
| texts_to_embed = [f"{item.get('title', '')}. Lokasi: {item.get('location', '')}" for item in data] |
| |
| print(f"Generating embeddings for {len(texts_to_embed)} items...") |
| embeddings = model.encode(texts_to_embed, show_progress_bar=True) |
| print("Embeddings generated.") |
|
|
| for item, embedding in zip(data, embeddings): |
| cursor.execute( |
| 'INSERT INTO kost (title, price, location, url, image_url, embedding) VALUES (?, ?, ?, ?, ?, ?)', |
| ( |
| item.get('title'), |
| item.get('price'), |
| item.get('location'), |
| item.get('url'), |
| item.get('imageUrl'), |
| embedding.tobytes() |
| ) |
| ) |
| |
| conn.commit() |
| conn.close() |
|
|
| if __name__ == '__main__': |
| setup_database() |
| |
| with open('data/fb_marketplace_data.json', 'r', encoding='utf-8') as f: |
| kost_data = json.load(f) |
|
|
| |
| kost_data_filtered = [item for item in kost_data if item.get('title')] |
|
|
| insert_data(kost_data_filtered) |
| print("Database 'kost.db' has been created and populated successfully.") |