Spaces:
Sleeping
Sleeping
github-actions[bot]
Auto Deploy from Monorepo: Merge pull request #92 from human13th2team/dev
1e1f1bf | import os, uuid, json, time | |
| import redis as redis_lib | |
| import pandas as pd | |
| import anthropic | |
| from sqlalchemy import create_engine, text | |
| from dotenv import load_dotenv | |
| from pathlib import Path | |
| load_dotenv(Path(__file__).parent.parent / ".env") | |
| REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") | |
| AUTOCOMPLETE_KEY = "equipment:autocomplete" | |
| BASE_DIR = Path(__file__).parent | |
| EXCEL_EQUIPMENT = BASE_DIR / "μμ_μ€λΉ.xlsx" | |
| EXCEL_TYPES = BASE_DIR / "μμμ μ’ _λΆλ₯.xlsx" | |
| CSV_SURVIVAL = BASE_DIR / "μμΈ_μμμ _μ μκΈ°μ μμ‘΄μ¨_20260511154402.csv" | |
| DB_URL = os.environ["DATABASE_URL"].replace("postgresql+asyncpg://", "postgresql://") | |
| API_KEY = os.environ["ANTHROPIC_API_KEY"] | |
| KOSIS_CATEGORIES = [ | |
| "νμ μΌλ° μμμ μ ", "νμ λ©΄ μ리 μ λ¬Έμ ", "νμ μ‘λ₯ μ리 μ λ¬Έμ ", | |
| "νμ ν΄μ°λ¬Ό μ리 μ λ¬Έμ ", "μ€μ μμμ μ ", "μΌμ μμμ μ ", | |
| "μμμ μμμ μ ", "κΈ°ν μΈκ΅μ μμμ μ ", "μ κ³Όμ μ ", | |
| "νΌμ νλ²κ±° μλμμΉ λ° μ μ¬ μμμ μ ", "μΉν¨ μ λ¬Έμ ", | |
| "κΉλ°₯ λ° κΈ°ν κ°μ΄ μμμ μ ", "κ°μ΄ μμ ν¬μ₯ νλ§€ μ λ¬Έμ ", | |
| "μλ§₯μ£Ό μ λ¬Έμ ", "κΈ°ν μ£Όμ μ ", "μ»€νΌ μ λ¬Έμ ", "κΈ°ν λΉμμ½μ¬ μλ£μ μ ", | |
| ] | |
| # λ¨λμ£Όμ μ μλΉμ€ μ 곡 μν¨ -> μλ½ λΆλ₯ μ κ±° | |
| BUILDING_USE = { | |
| "1μ’ κ·Όλ¦°": ["μ κ³Όμ ", "μΉ΄ν", "λ‘μ§", "λμ νΈ μ λ¬Έμ ", "λ² μ΄μ»€λ¦¬ μΉ΄ν", "λΈλ°μΉ μΉ΄ν", | |
| "ν μ€νΈκ°κ²", "ν¬μ₯λ§μ°¨", "μ£½μ§"], | |
| "2μ’ κ·Όλ¦°": ["κ³ κΈ°κ΅¬μ΄ μ λ¬Έμ ", "μ€λΈμ€λΈμ§", "νμ μμ§", "μ€κ΅μ§", "μΌμμ§", | |
| "λλ¨μμμ μλΉ", "μΈλ μλΉ", "νμ§", "λ·ν", "λΆμμ§", "ν¨μ€νΈνΈλ", | |
| "μΉν¨ μ λ¬Έμ ", "νΌμ μ λ¬Έμ ", "νλ²κ±° μ λ¬Έμ ", "κ΅λ°₯μ§ (λνμ₯, μ₯μκ° μ‘μλ₯)", | |
| "κ΅μμ§", "λ§λ μ°λΉ΅μ§", "κ΅λ¬Όμ리 μ λ¬Έμ ", "μ°λ μ λ¬Έμ ", "μκΌ¬μΉ μ λ¬Έμ ", | |
| "λ§λΌνκ°κ²", "ν¬μΌ μ λ¬Έμ ", "νμ€ν μ λ¬Έμ ", "λμλ½ μ λ¬Έμ ", | |
| "μ‘±λ°, 보μ μ λ¬Έμ ", "κ³±μ°½, λ§μ°½ μ λ¬Έμ ", "λλ° μ λ¬Έμ ", "무ν리ν κ³ κΈ°μ§", | |
| "κΌ¬μΉ μ λ¬Έμ ", "μ΄μμΉ΄μΌ", "νμ μ΄λ°₯ μ§", "μ€ν μ΄ν¬ μ λ¬Έμ ", | |
| "μλ¬λ μ λ¬Έμ ", "λΌλ© μ λ¬Έμ ", "μ€λ§μΉ΄μΈ", "κΉλ°₯ μ λ¬Έμ ", "μλμμΉ μ λ¬Έμ ","νΈνμ§", "κ³ κΈ°μ£Όμ ", "μμΈλ°", "μΉ΅ν μΌλ°"] | |
| } | |
| def load_equipment(): | |
| df = pd.read_excel(EXCEL_EQUIPMENT) | |
| rows = [] | |
| for category in df.columns: | |
| for name in df[category].dropna(): | |
| name = str(name).strip() | |
| if name: | |
| rows.append({"id": str(uuid.uuid4()), "name": name, "category": category}) | |
| return rows | |
| def load_restaurant_types(): | |
| df = pd.read_excel(EXCEL_TYPES, header=None) | |
| rows = [] | |
| for name in df[0].dropna(): | |
| name = str(name).strip() | |
| if name: | |
| building_use = next( | |
| (code for code, names in BUILDING_USE.items() if name in names), "2μ’ κ·Όλ¦°" | |
| ) | |
| rows.append({ | |
| "id": str(uuid.uuid4()), | |
| "name": name, | |
| "building_use_code": building_use, | |
| }) | |
| return rows | |
| def load_survival_rates(): | |
| df = pd.read_csv(CSV_SURVIVAL, encoding="CP949") | |
| df = df.iloc[1:].reset_index(drop=True) | |
| df.columns = ["μ μ’ ","1y_19","2y_19","3y_19","4y_19","5y_19", | |
| "1y_20","2y_20","3y_20","4y_20","5y_20", | |
| "1y_21","2y_21","3y_21","4y_21","5y_21"] | |
| num_cols = df.columns[1:] | |
| df[num_cols] = df[num_cols].apply(pd.to_numeric, errors="coerce") | |
| df["avg_1y"] = ((df["1y_19"]+df["1y_20"]+df["1y_21"])/3).round(2) | |
| df["avg_3y"] = ((df["3y_19"]+df["3y_20"]+df["3y_21"])/3).round(2) | |
| df["avg_5y"] = ((df["5y_19"]+df["5y_20"]+df["5y_21"])/3).round(2) | |
| return df[["μ μ’ ","avg_1y","avg_3y","avg_5y"]].set_index("μ μ’ ").to_dict("index") | |
| def llm_map_kosis(client, restaurant_name): | |
| res = client.messages.create( | |
| model="claude-opus-4-5", | |
| max_tokens=50, | |
| messages=[{"role": "user", "content": | |
| f"μμμ μ μ’ '{restaurant_name}'μ μλ KOSIS ν΅κ³ μΉ΄ν κ³ λ¦¬ μ€ μ΄λμ ν΄λΉν΄?\n" | |
| f"κ°μ₯ μ μ¬ν μΉ΄ν κ³ λ¦¬ νλλ§ κ³¨λΌμ μ νν κ·Έ μ΄λ¦λ§ μλ΅ν΄. λ€λ₯Έ λ§ νμ§ λ§.\n\n" | |
| f"{json.dumps(KOSIS_CATEGORIES, ensure_ascii=False)}" | |
| }] | |
| ) | |
| return res.content[0].text.strip() | |
| def llm_map_equipment(client, restaurant_name, equipment_list): | |
| res = client.messages.create( | |
| model="claude-opus-4-5", | |
| max_tokens=4096, | |
| messages=[{"role": "user", "content": | |
| f"μμμ μ μ’ : {restaurant_name}\n\n" | |
| f"μλ μ€λΉ λͺ©λ‘μμ μ΄ μ μ’ μ°½μ μ νμν μ€λΉλ₯Ό μ νν΄μ€.\n" | |
| f"- required: μμΌλ©΄ μμ λΆκ°λ₯ν νμ μ€λΉ\n" | |
| f"- optional: μμΌλ©΄ μ’μ§λ§ μμ΄λ λλ μ€λΉ\n" | |
| f"- weight: ν΄λΉ μ€λΉκ° μ μ’ νΉμ±μ μΌλ§λ λννλμ§ (0.1 ~ 1.0)\n\n" | |
| f"μ€λΉ λͺ©λ‘:\n{json.dumps(equipment_list, ensure_ascii=False)}\n\n" | |
| f"λ°λμ μλ JSON νμμΌλ‘λ§ μλ΅ν΄. λ€λ₯Έ λ§ νμ§ λ§.\n" | |
| f'[{{"name": "μ€λΉλͺ ", "is_required": true, "weight": 0.9}}]' | |
| }] | |
| ) | |
| raw = res.content[0].text.strip().replace("```json","").replace("```","").strip() | |
| print(f" RAW: {raw[:200]}") | |
| return json.loads(raw) | |
| def insert_equipment(engine, rows): | |
| with engine.begin() as conn: | |
| for r in rows: | |
| conn.execute(text( | |
| "INSERT INTO equipment (id, name, category) VALUES (:id, :name, :category) ON CONFLICT (name) DO NOTHING" | |
| ), r) | |
| print(f"[equipment] {len(rows)}κ° μλ£") | |
| def insert_restaurant_types(engine, rows): | |
| with engine.begin() as conn: | |
| for r in rows: | |
| conn.execute(text( | |
| "INSERT INTO restaurant_types (id, name, building_use_code) VALUES (:id, :name, :building_use_code) ON CONFLICT (name) DO NOTHING" | |
| ), r) | |
| print(f"[restaurant_types] {len(rows)}κ° μλ£") | |
| def update_kosis_and_survival(engine, rt_rows, survival_rates, client): | |
| with engine.begin() as conn: | |
| for r in rt_rows: | |
| kosis_cat = llm_map_kosis(client, r["name"]) | |
| sv = survival_rates.get(kosis_cat, {}) | |
| conn.execute(text(""" | |
| UPDATE restaurant_types | |
| SET kosis_category=:kosis, survival_rate_1y=:s1, survival_rate_3y=:s3, survival_rate_5y=:s5 | |
| WHERE id=:id | |
| """), {"id": r["id"], "kosis": kosis_cat, | |
| "s1": sv.get("avg_1y"), "s3": sv.get("avg_3y"), "s5": sv.get("avg_5y")}) | |
| print(f" {r['name']} β {kosis_cat}") | |
| time.sleep(0.3) | |
| print("[KOSIS λ§€ν + μμ‘΄μ¨ μλ£]") | |
| def insert_equipment_map(engine, rt_rows, eq_rows, client): | |
| eq_names = [e["name"] for e in eq_rows] | |
| eq_by_name = {e["name"]: e["id"] for e in eq_rows} | |
| with engine.begin() as conn: | |
| for r in rt_rows: | |
| print(f" λ§€ν μ€: {r['name']}") | |
| try: | |
| items = llm_map_equipment(client, r["name"], eq_names) | |
| except Exception as e: | |
| print(f" [ERROR] {r['name']}: {e}") | |
| import traceback; traceback.print_exc() | |
| continue | |
| for item in items: | |
| eq_id = eq_by_name.get(item["name"]) | |
| if not eq_id: | |
| continue | |
| conn.execute(text(""" | |
| INSERT INTO restaurant_equipment_map (restaurant_type_id, equipment_id, is_required, weight) | |
| VALUES (:rt_id, :eq_id, :req, :w) | |
| ON CONFLICT DO NOTHING | |
| """), {"rt_id": r["id"], "eq_id": eq_id, | |
| "req": item.get("is_required", True), "w": item.get("weight", 1.0)}) | |
| time.sleep(0.5) | |
| print("[restaurant_equipment_map μλ£]") | |
| def cache_equipment_to_redis(eq_rows): | |
| r = redis_lib.from_url(REDIS_URL, decode_responses=True) | |
| pipe = r.pipeline() | |
| pipe.delete("equipment:autocomplete") | |
| for eq in eq_rows: | |
| name = eq["name"] | |
| pipe.zadd("equipment:autocomplete", {name: 0}) | |
| pipe.hset(f"equipment:detail:{name}", mapping={ | |
| "id": eq["id"], | |
| "category": eq["category"], | |
| }) | |
| pipe.execute() | |
| r.close() | |
| print(f"[Redis] equipment:autocomplete {len(eq_rows)}κ° μΊμ± μλ£") | |
| def main(): | |
| print("=== seed_data.py μμ ===") | |
| client = anthropic.Anthropic(api_key=API_KEY) | |
| engine = create_engine(DB_URL) | |
| eq_rows = load_equipment() | |
| insert_equipment(engine, eq_rows) | |
| cache_equipment_to_redis(eq_rows) | |
| rt_rows = load_restaurant_types() | |
| insert_restaurant_types(engine, rt_rows) | |
| survival_rates = load_survival_rates() | |
| update_kosis_and_survival(engine, rt_rows, survival_rates, client) | |
| with engine.connect() as conn: | |
| result = conn.execute(text("SELECT id, name FROM equipment")) | |
| eq_rows = [{"id": str(r.id), "name": r.name} for r in result.fetchall()] | |
| insert_equipment_map(engine, rt_rows, eq_rows, client) | |
| print("=== μλ£ ===") | |
| if __name__ == "__main__": | |
| main() |