tueniuu's picture
Upload 4 files
8c15eb9 verified
import pandas as pd
import psycopg2
import json
import numpy as np
def format_operating_hours(time_str):
"""
Transforms a simple time string (e.g., '11:00-22:00') into a
structured JSON object for all days of the week.
"""
if not isinstance(time_str, str) or '-' not in time_str:
return None
schedule = {
"monday": time_str, "tuesday": time_str, "wednesday": time_str,
"thursday": time_str, "friday": time_str, "saturday": time_str,
"sunday": time_str,
}
return json.dumps(schedule)
def parse_period_dates(period_str):
"""
Parses a date range string like '2025.01.04 - 2025.12.27'
and returns a tuple of (start_date, end_date).
"""
if not isinstance(period_str, str) or '-' not in period_str:
return None, None
try:
parts = period_str.split('-')
start_date = parts[0].strip().replace('.', '-')
end_date = parts[1].strip().replace('.', '-')
return start_date, end_date
except Exception:
return None, None
def load_excel_to_postgres(excel_path, db_params):
"""
Connects to PostgreSQL, reads an Excel file, and inserts the data.
"""
conn = None
cur = None
try:
print(f"Reading data from '{excel_path}'...")
df = pd.read_excel(excel_path)
df = df.replace({np.nan: None})
print("Data read successfully. Preparing for database insertion...")
conn = psycopg2.connect(**db_params)
cur = conn.cursor()
print("Successfully connected to the PostgreSQL database.")
for index, row in df.iterrows():
operating_hours_json = format_operating_hours(row.get('time'))
start_date, end_date = parse_period_dates(row.get('period'))
sql = """
INSERT INTO locations (
name, address, naver_url, region, primary_category, tags,
price_level, indoor_outdoor, operating_hours,
period_start_date, period_end_date, website, meal_type, geom
) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
ST_SetSRID(ST_MakePoint(%s, %s), 4326)
) ON CONFLICT (name) DO NOTHING;
"""
data_tuple = (
row.get('name'), row.get('address'), row.get('naver_url'), row.get('region'),
row.get('primary_category'), row.get('tags'), row.get('price_level'),
row.get('indoor_outdoor'), operating_hours_json, start_date, end_date,
row.get('website'), row.get('type'),
row.get('longitude'), row.get('latitude')
)
cur.execute(sql, data_tuple)
conn.commit()
print(f"\nSuccessfully processed and inserted {len(df)} rows into the 'locations' table.")
except FileNotFoundError:
print(f"ERROR: The file '{excel_path}' was not found.")
except psycopg2.Error as e:
print(f"DATABASE ERROR: {e}")
except Exception as e:
print(f"AN UNEXPECTED ERROR OCCURRED: {e}")
finally:
if cur is not None:
cur.close()
if conn is not None:
conn.close()
print("Database connection closed.")
if __name__ == '__main__':
db_connection_params = {
"host": "localhost",
"database": "recommendation_locations",
"user": "postgres",
"password": "nafikova03",
"port": "5432"
}
excel_file_path = "loc_data.xlsx"
load_excel_to_postgres(excel_file_path, db_connection_params)