Spaces:
Running
Running
| """ | |
| Supabase ๋๊ธฐํ ์คํฌ๋ฆฝํธ (๋ฆฌํฉํ ๋ง ๋ฒ์ ) | |
| ======================================== | |
| YAML ๋ฐ์ดํฐ๋ฅผ ์ฒญํฌ๋ก ๋ณํํ์ฌ Supabase์ ์ ์ฅ. | |
| ํธ๋ค๋ฌ ๊ธฐ๋ฐ ์ํคํ ์ฒ๋ก ์ YAML ๊ตฌ์กฐ ์ง์์ด ์ฝ์ต๋๋ค. | |
| ์ฌ์ฉ๋ฒ: | |
| python scripts/sync_to_supabase.py | |
| python scripts/sync_to_supabase.py --chain MARRIOTT | |
| python scripts/sync_to_supabase.py --dry-run | |
| """ | |
| import os | |
| import sys | |
| import yaml | |
| import hashlib | |
| import click | |
| from pathlib import Path | |
| from typing import List, Dict, Any, Optional | |
| # ํ๋ก์ ํธ ๋ฃจํธ๋ฅผ Python ๊ฒฝ๋ก์ ์ถ๊ฐ | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # ์ฒญํฌ ํธ๋ค๋ฌ ๋ชจ๋ ์ํฌํธ | |
| from chunk_handlers import ( | |
| CHUNK_HANDLERS, | |
| NESTED_HANDLERS, | |
| IGNORED_KEYS, | |
| get_handler, | |
| get_nested_handler, | |
| is_ignored, | |
| get_all_handler_keys, | |
| ) | |
| # =========================================================================== | |
| # ์ฒญํฌ ID ์์ฑ | |
| # =========================================================================== | |
| def generate_chunk_id(doc_id: str, chunk_index: int) -> str: | |
| """์ฒญํฌ ID ์์ฑ""" | |
| return f"{doc_id}_chunk_{chunk_index:04d}" | |
| # =========================================================================== | |
| # ์ฒญํฌ ์์ฑ (ํธ๋ค๋ฌ ๊ธฐ๋ฐ) | |
| # =========================================================================== | |
| def create_chunks_from_knowledge( | |
| doc_id: str, | |
| chain: str, | |
| extracted_knowledge: Dict[str, Any], | |
| verbose: bool = False | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| extracted_knowledge์์ ๊ฒ์ ๊ฐ๋ฅํ ์ฒญํฌ ์์ฑ. | |
| ํธ๋ค๋ฌ ๊ธฐ๋ฐ์ผ๋ก ๊ฐ ํค๋ฅผ ์ฒ๋ฆฌํฉ๋๋ค. | |
| """ | |
| chunks = [] | |
| chunk_index = 0 | |
| unhandled_keys = [] | |
| # ํธํ ์ ๋ณด ๋ฏธ๋ฆฌ ์ถ์ถ (context์ฉ) | |
| hotel_name = "Unknown Hotel" | |
| hotel_name_ko = None | |
| hotel_id_map = {} # hotel_id โ ํธํ ๋ช ๋งคํ | |
| hotel_properties = extracted_knowledge.get("hotel_properties", []) | |
| if hotel_properties and isinstance(hotel_properties, list): | |
| for hotel in hotel_properties: | |
| if isinstance(hotel, dict): | |
| h_id = hotel.get("hotel_id") | |
| h_name = hotel.get("name", "Unknown") | |
| h_name_localized = hotel.get("name_localized", {}) | |
| h_name_ko = h_name_localized.get("ko") if isinstance(h_name_localized, dict) else None | |
| # hotel_id ๋งคํ ์ถ๊ฐ | |
| if h_id: | |
| hotel_id_map[h_id] = { | |
| "name": h_name, | |
| "name_ko": h_name_ko, | |
| "country": hotel.get("location", {}).get("country", "") if isinstance(hotel.get("location"), dict) else "", | |
| "city": hotel.get("location", {}).get("city", "") if isinstance(hotel.get("location"), dict) else "" | |
| } | |
| # ์ฒซ ๋ฒ์งธ ํธํ ์ ๊ธฐ๋ณธ ํธํ ๋ก ์ฌ์ฉ | |
| first_hotel = hotel_properties[0] | |
| if isinstance(first_hotel, dict): | |
| hotel_name = first_hotel.get("name", "Unknown Hotel") | |
| name_localized = first_hotel.get("name_localized", {}) | |
| hotel_name_ko = name_localized.get("ko") if isinstance(name_localized, dict) else None | |
| # identity ์น์ ์์ ์ถ๊ฐ ์ ๋ณด ์ถ์ถ (ํ๋กฌํํธ ์ถ๋ ฅ ํธํ์ฑ) | |
| identity = extracted_knowledge.get("identity", {}) | |
| if identity and isinstance(identity, dict): | |
| # identity์์ ์ ๋ณด ๋ณด๊ฐ | |
| if identity.get("title") and hotel_name == "Unknown Hotel": | |
| hotel_name = identity.get("title") | |
| if identity.get("chain"): | |
| chain = identity.get("chain", chain) | |
| # source ์น์ ์์ ์ถ์ฒ ์ ๋ณด ์ถ์ถ | |
| source = extracted_knowledge.get("source", {}) | |
| source_type = source.get("source_type") if isinstance(source, dict) else None | |
| source_url = source.get("canonical_url") if isinstance(source, dict) else None | |
| retrieved_at = source.get("retrieved_at") if isinstance(source, dict) else None | |
| # version ์น์ ์์ ์ ํจ ๊ธฐ๊ฐ ์ ๋ณด ์ถ์ถ | |
| version = extracted_knowledge.get("version", {}) | |
| effective_date = version.get("effective_date") if isinstance(version, dict) else None | |
| last_updated = version.get("last_updated") if isinstance(version, dict) else None | |
| # ์ปจํ ์คํธ ์์ฑ (ํ์ฅ: source/version ์ ๋ณด ํฌํจ) | |
| context = { | |
| "chain": chain, | |
| "hotel_name": hotel_name, | |
| "hotel_name_ko": hotel_name_ko, | |
| "hotel_id_map": hotel_id_map, # hotel_id โ ํธํ ์ ๋ณด ๋งคํ ์ถ๊ฐ | |
| "doc_id": doc_id, | |
| # identity ์ถ๊ฐ ์ ๋ณด | |
| "document_category": identity.get("category") if identity else None, | |
| "document_type": identity.get("doc_type", identity.get("document_type")) if identity else None, | |
| # source ์ ๋ณด (๊ฒ์ ์ ๋ ฌ/ํํฐ์ ํ์ฉ) | |
| "source_type": source_type, # OFFICIAL, USER_GENERATED, NEWS ๋ฑ | |
| "source_url": source_url, | |
| "retrieved_at": retrieved_at, | |
| # version ์ ๋ณด (์๊ฐ ๊ธฐ๋ฐ ์ ๋ ฌ์ ํ์ฉ) | |
| "effective_date": effective_date, | |
| "last_updated": last_updated, | |
| } | |
| # ๊ณตํต ๋ฉํ๋ฐ์ดํฐ (๋ชจ๋ ์ฒญํฌ์ ์๋ ์ถ๊ฐ) | |
| common_metadata = {} | |
| if source_type: | |
| common_metadata["source_type"] = source_type | |
| if effective_date: | |
| common_metadata["effective_date"] = effective_date | |
| if last_updated: | |
| common_metadata["last_updated"] = last_updated | |
| def add_chunk(content: str, metadata: Dict[str, Any]): | |
| """์ฒญํฌ ์ถ๊ฐ ํฌํผ - ๊ณตํต ๋ฉํ๋ฐ์ดํฐ ์๋ ๋ณํฉ""" | |
| nonlocal chunk_index | |
| if content and content.strip() and len(content) > 50: | |
| # ํธ๋ค๋ฌ ๋ฉํ๋ฐ์ดํฐ + ๊ณตํต ๋ฉํ๋ฐ์ดํฐ ๋ณํฉ | |
| merged_metadata = {**common_metadata, **metadata} | |
| chunks.append({ | |
| "chunk_id": generate_chunk_id(doc_id, chunk_index), | |
| "doc_id": doc_id, | |
| "chain": chain, | |
| "content": content.strip()[:5000], | |
| "metadata": merged_metadata | |
| }) | |
| chunk_index += 1 | |
| # 1. ์ต์์ ํค ์ฒ๋ฆฌ | |
| for key, value in extracted_knowledge.items(): | |
| if is_ignored(key): | |
| continue | |
| handler = get_handler(key) | |
| if handler: | |
| try: | |
| result_chunks = handler(value, context) | |
| for rc in result_chunks: | |
| add_chunk(rc["content"], rc["metadata"]) | |
| except Exception as e: | |
| if verbose: | |
| print(f" โ ๏ธ ํธ๋ค๋ฌ ์ค๋ฅ ({key}): {e}") | |
| else: | |
| unhandled_keys.append(key) | |
| # 2. ์ค์ฒฉ ํค ์ฒ๋ฆฌ (์: facts.pricing_analysis) | |
| for nested_key, handler in NESTED_HANDLERS.items(): | |
| if handler is None: | |
| continue | |
| parts = nested_key.split(".") | |
| if len(parts) == 2: | |
| parent_key, child_key = parts | |
| parent_data = extracted_knowledge.get(parent_key) | |
| if isinstance(parent_data, dict) and child_key in parent_data: | |
| try: | |
| result_chunks = handler(parent_data[child_key], context) | |
| for rc in result_chunks: | |
| add_chunk(rc["content"], rc["metadata"]) | |
| except Exception as e: | |
| if verbose: | |
| print(f" โ ๏ธ ์ค์ฒฉ ํธ๋ค๋ฌ ์ค๋ฅ ({nested_key}): {e}") | |
| # 3. ๋ฏธ์ฒ๋ฆฌ ํค ๊ฒฝ๊ณ | |
| if unhandled_keys and verbose: | |
| print(f" โ ๏ธ ๋ฏธ์ฒ๋ฆฌ ํค: {', '.join(unhandled_keys)}") | |
| return chunks | |
| # =========================================================================== | |
| # ๋ฌธ์ ID ์์ฑ | |
| # =========================================================================== | |
| def generate_doc_id(file_path: str) -> str: | |
| """ํ์ผ ๊ฒฝ๋ก์์ ๊ณ ์ ๋ฌธ์ ID ์์ฑ""" | |
| return hashlib.md5(file_path.encode()).hexdigest()[:12] | |
| # =========================================================================== | |
| # YAML ํ์ผ ๋ก๋ | |
| # =========================================================================== | |
| def load_yaml_from_md(file_path: Path) -> Optional[Dict[str, Any]]: | |
| """๋งํฌ๋ค์ด ํ์ผ์์ YAML ํ๋ก ํธ๋งคํฐ ์ถ์ถ""" | |
| try: | |
| content = file_path.read_text(encoding='utf-8') | |
| lines = content.split('\n') | |
| # YAML ๋ธ๋ก์ ์์๊ณผ ๋์ ์ค ๋จ์๋ก ์ฐพ๊ธฐ | |
| yaml_start = None | |
| yaml_end = None | |
| for i, line in enumerate(lines): | |
| stripped = line.strip() | |
| if stripped == '---': | |
| if yaml_start is None: | |
| yaml_start = i + 1 # --- ๋ค์ ์ค๋ถํฐ | |
| else: | |
| yaml_end = i # --- ์ด์ ์ค๊น์ง | |
| break | |
| if yaml_start is None or yaml_end is None: | |
| return None | |
| # YAML ๋ธ๋ก ์ถ์ถ | |
| yaml_lines = lines[yaml_start:yaml_end] | |
| yaml_part = '\n'.join(yaml_lines) | |
| data = yaml.safe_load(yaml_part) | |
| return data if data else None | |
| except Exception as e: | |
| return None | |
| def detect_chain(file_path: Path) -> str: | |
| """ํ์ผ ๊ฒฝ๋ก์์ ์ฒด์ธ/๋๋ฉ์ธ ๊ฐ์ง (ํ์ฅ: ํธํ + ํญ๊ณต + ์นด๋ + ๋ด์ค)""" | |
| path_str = str(file_path).upper() | |
| # --- ํธํ ์ฒด์ธ --- | |
| if "MARRIOTT" in path_str: | |
| return "MARRIOTT" | |
| elif "HILTON" in path_str: | |
| return "HILTON" | |
| elif "IHG" in path_str: | |
| return "IHG" | |
| elif "ACCOR" in path_str or "ACCO" in path_str: | |
| return "ACCOR" | |
| elif "HYATT" in path_str: | |
| return "HYATT" | |
| # ๋กฏ๋ฐํธํ (ํธํ ๊ฒฝ๋ก์์๋ง ๊ฐ์ง, ์นด๋์ฌ ๋กฏ๋ฐ์ ๊ตฌ๋ถ) | |
| elif "LOTTE" in path_str and "/HOTEL/" in path_str.upper(): | |
| return "LOTTE" | |
| # Jumeirah Hotels & Resorts (๋๋ฐ์ด ๋ญ์ ๋ฆฌ ์ฒด์ธ) | |
| elif "JUMEIRAH" in path_str and "/HOTEL/" in path_str.upper(): | |
| return "JUMEIRAH" | |
| # --- ํญ๊ณต์ฌ (Phase 1 ํ์ฅ) --- | |
| elif "KOREAN_AIR" in path_str or "KOREANAIR" in path_str: | |
| return "KOREAN_AIR" | |
| elif "ASIANA" in path_str: | |
| return "ASIANA" | |
| elif "DELTA" in path_str: | |
| return "DELTA" | |
| elif "UNITED" in path_str: | |
| return "UNITED" | |
| elif "ALLIANCE" in path_str or "ONEWORLD" in path_str or "STAR_ALLIANCE" in path_str or "SKYTEAM" in path_str: | |
| return "ALLIANCE" | |
| elif "/AIRLINE/" in path_str.upper(): | |
| return "AIRLINE" | |
| # --- ์นด๋์ฌ (Phase 1 ํ์ฅ) --- | |
| elif "AMEX" in path_str or "FHR" in path_str or "THC" in path_str: | |
| return "AMEX" | |
| elif "SHINHAN" in path_str: | |
| return "SHINHAN" | |
| elif "HYUNDAI" in path_str and "CARD" in path_str: | |
| return "HYUNDAI" | |
| elif "HANA" in path_str and ("CARD" in path_str or "/CREDITCARD/" in path_str): | |
| return "HANA" | |
| elif "LOTTE" in path_str and ("CARD" in path_str or "/CREDITCARD/" in path_str): | |
| return "LOTTE" | |
| elif "WOORI" in path_str and ("CARD" in path_str or "/CREDITCARD/" in path_str): | |
| return "WOORI" | |
| elif "KB" in path_str and "CARD" in path_str: | |
| return "KB" | |
| elif "SAMSUNG" in path_str and "CARD" in path_str: | |
| return "SAMSUNG" | |
| elif "/CREDITCARD/" in path_str.upper(): | |
| return "CARD" | |
| # --- ๋ด์ค/๋ (Phase 1 ํ์ฅ) --- | |
| elif "/NEWS/" in path_str.upper() or "/DEAL" in path_str.upper(): | |
| return "NEWS" | |
| # --- ๊ธฐํ --- | |
| elif "BENEFIT_RATE" in path_str: | |
| return "BENEFIT" | |
| else: | |
| return "OTHER" | |
| # =========================================================================== | |
| # ๋ฉ์ธ ๋๊ธฐํ ํจ์ | |
| # =========================================================================== | |
| def main(chain: Optional[str], domain: str, dry_run: bool, skip_embeddings: bool, verbose: bool, file: Optional[str]): | |
| """YAML ๋ฐ์ดํฐ๋ฅผ Supabase๋ก ๋๊ธฐํ (ํ์ฅ: ํธํ + ํญ๊ณต + ์นด๋ + ๋ด์ค)""" | |
| print("๐ Supabase ๋๊ธฐํ (์ฌํ ํ๋ซํผ ํตํฉ ๋ฒ์ )") | |
| print("=" * 60) | |
| # ๋๋ฉ์ธ๋ณ ๋๋ ํ ๋ฆฌ ๋งคํ | |
| domain_dirs = { | |
| 'hotel': Path("data/raw/Hotel"), | |
| 'airline': Path("data/raw/Airline"), | |
| 'card': Path("data/raw/CreditCard"), | |
| 'news': Path("data/raw/News"), | |
| } | |
| # ์ค์บํ ๋๋ ํ ๋ฆฌ ๊ฒฐ์ | |
| if domain == 'all': | |
| data_dirs = [d for d in domain_dirs.values() if d.exists()] | |
| else: | |
| data_dirs = [domain_dirs[domain]] if domain_dirs[domain].exists() else [] | |
| if not data_dirs: | |
| print(f"โ ๋ฐ์ดํฐ ๋๋ ํ ๋ฆฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค") | |
| return | |
| print(f"๐ ์ค์บ ๋๋ ํ ๋ฆฌ: {', '.join(str(d) for d in data_dirs)}") | |
| # ํ์ผ ๋ชฉ๋ก | |
| if file: | |
| md_files = [Path(file)] | |
| else: | |
| md_files = [] | |
| for data_dir in data_dirs: | |
| md_files.extend(list(data_dir.rglob("*.md"))) | |
| print(f"๐ ์ด {len(md_files)}๊ฐ ํ์ผ ๋ฐ๊ฒฌ") | |
| # ์ฒด์ธ ํํฐ | |
| if chain: | |
| chain = chain.upper() | |
| print(f"๐ ํํฐ: {chain}") | |
| # ํต๊ณ | |
| stats = { | |
| "total": 0, | |
| "success": 0, | |
| "skipped": 0, | |
| "error": 0, | |
| "chunks": 0, | |
| } | |
| # Quarantine ๋ฆฌํฌํธ (๋ฌธ์ ํ์ผ ์ถ์ ) | |
| quarantine = { | |
| "no_yaml": [], # YAML ํ๋ก ํธ๋งคํฐ ์์ | |
| "no_knowledge": [], # extracted_knowledge ์์ | |
| "no_chunks": [], # ์ฒญํฌ ์์ฑ ์คํจ | |
| "missing_fields": [], # ํ์ ํ๋ ๋๋ฝ ๊ฒฝ๊ณ | |
| } | |
| all_chunks = [] | |
| all_docs = [] | |
| for md_file in md_files: | |
| stats["total"] += 1 | |
| # ์ฒด์ธ ๊ฐ์ง | |
| file_chain = detect_chain(md_file) | |
| if chain and file_chain != chain: | |
| stats["skipped"] += 1 | |
| continue | |
| # YAML ๋ก๋ | |
| data = load_yaml_from_md(md_file) | |
| if not data or not isinstance(data, dict): | |
| if verbose: | |
| print(f" โ ๏ธ {md_file.name} (YAML ์์)") | |
| quarantine["no_yaml"].append(str(md_file.name)) | |
| stats["skipped"] += 1 | |
| continue | |
| # extracted_knowledge ์ถ์ถ (์์ผ๋ฉด ์ต์์ data ์ฌ์ฉ) | |
| extracted_knowledge = data.get("extracted_knowledge") | |
| if not extracted_knowledge or not isinstance(extracted_knowledge, dict): | |
| # extracted_knowledge๊ฐ ์์ผ๋ฉด data ์์ฒด๊ฐ knowledge์ผ ์ ์์ | |
| # ๋ค์ํ ๋๋ฉ์ธ์ ํต์ฌ ํค๋ค์ ์ฒดํฌ | |
| core_keys = { | |
| # ํธํ | |
| "hotel_properties", "loyalty_programs", "loyalty_program", | |
| "membership_tiers", "tier_implementations", "hotel_brands", | |
| "best_rate_guarantee", "channel_benefit_packages", | |
| # ํญ๊ณต | |
| "airline_programs", "airline_program", "airline_tiers", | |
| "award_charts", "airline_earning_rules", | |
| # ์นด๋ | |
| "credit_cards", | |
| # ํ๋ก๋ชจ์ /๋ด์ค | |
| "deal_alerts", "news_updates", "promotions", | |
| # ๊ธฐํ | |
| "points_systems", "member_rates", "dining_programs", | |
| } | |
| if any(key in data for key in core_keys): | |
| extracted_knowledge = data | |
| else: | |
| if verbose: | |
| print(f" โ ๏ธ {md_file.name} (extracted_knowledge ์์)") | |
| quarantine["no_knowledge"].append(str(md_file.name)) | |
| stats["skipped"] += 1 | |
| continue | |
| # ๋ฌธ์ ID ์์ฑ | |
| # --file ์ต์ ์ฌ์ฉ ์ data_dir๊ฐ ์์ ์ ์์ผ๋ฏ๋ก ํ์ผ ๊ฒฝ๋ก์์ ์ง์ ๊ณ์ฐ | |
| try: | |
| # data/raw ๊ธฐ์ค์ผ๋ก ์๋ ๊ฒฝ๋ก ๊ณ์ฐ | |
| data_raw = Path("data/raw") | |
| if md_file.is_relative_to(data_raw): | |
| rel_path = str(md_file.relative_to(data_raw.parent)) | |
| elif "data/raw" in str(md_file): | |
| # ์ ๋ ๊ฒฝ๋ก์ธ ๊ฒฝ์ฐ data/raw ์ดํ ๋ถ๋ถ ์ถ์ถ | |
| path_str = str(md_file) | |
| idx = path_str.find("data/raw") | |
| rel_path = path_str[idx:] if idx >= 0 else str(md_file.name) | |
| else: | |
| rel_path = str(md_file) | |
| except Exception: | |
| rel_path = str(md_file) | |
| doc_id = generate_doc_id(rel_path) | |
| # ์ฒญํฌ ์์ฑ | |
| chunks = create_chunks_from_knowledge( | |
| doc_id=doc_id, | |
| chain=file_chain, | |
| extracted_knowledge=extracted_knowledge, | |
| verbose=verbose | |
| ) | |
| if chunks: | |
| print(f" โ {md_file.name} ({len(chunks)}๊ฐ ์ฒญํฌ)") | |
| stats["success"] += 1 | |
| stats["chunks"] += len(chunks) | |
| # ๋ฌธ์ ์ ๋ณด | |
| all_docs.append({ | |
| "doc_id": doc_id, | |
| "source_file": rel_path, | |
| "chain": file_chain, | |
| "chunk_count": len(chunks), | |
| }) | |
| all_chunks.extend(chunks) | |
| else: | |
| if verbose: | |
| print(f" โ ๏ธ {md_file.name} (์ฒญํฌ ์์)") | |
| quarantine["no_chunks"].append(str(md_file.name)) | |
| stats["skipped"] += 1 | |
| print() | |
| print("=" * 60) | |
| print(f"๐ ๊ฒฐ๊ณผ: {stats['success']}๊ฐ ์ฑ๊ณต, {stats['skipped']}๊ฐ ๊ฑด๋๋") | |
| print(f" ์ด ์ฒญํฌ: {stats['chunks']}๊ฐ") | |
| # Quarantine ๋ฆฌํฌํธ ์ถ๋ ฅ (๋ฌธ์ ํ์ผ์ด ์๋ ๊ฒฝ์ฐ) | |
| total_quarantined = sum(len(v) for v in quarantine.values()) | |
| if total_quarantined > 0: | |
| print(f"\nโ ๏ธ Quarantine ๋ฆฌํฌํธ ({total_quarantined}๊ฐ ํ์ผ):") | |
| if quarantine["no_yaml"]: | |
| print(f" ๐ YAML ์์ ({len(quarantine['no_yaml'])}๊ฐ): {', '.join(quarantine['no_yaml'][:5])}") | |
| if quarantine["no_knowledge"]: | |
| print(f" ๐ extracted_knowledge ์์ ({len(quarantine['no_knowledge'])}๊ฐ): {', '.join(quarantine['no_knowledge'][:5])}") | |
| if quarantine["no_chunks"]: | |
| print(f" ๐ ์ฒญํฌ ์์ฑ ์คํจ ({len(quarantine['no_chunks'])}๊ฐ): {', '.join(quarantine['no_chunks'][:5])}") | |
| print(" โ ์ ํ์ผ๋ค์ ์๋ ๊ฒํ ๊ฐ ํ์ํฉ๋๋ค.") | |
| if dry_run: | |
| print("\n๐ Dry-run ๋ชจ๋ - ์ ์ฅํ์ง ์์") | |
| return | |
| # Supabase ์ ์ฅ | |
| print("\n๐พ Supabase์ ์ ์ฅ ์ค...") | |
| try: | |
| from src.db import SupabaseAdapter | |
| adapter = SupabaseAdapter() | |
| # ๋ฌธ์ ์ ์ฅ (ํ์ ํ๋๋ง) | |
| for doc in all_docs: | |
| try: | |
| doc_data = { | |
| "doc_id": doc["doc_id"], | |
| "source_file": doc["source_file"], | |
| "chain": doc["chain"], | |
| "extracted_knowledge": {}, # ๋น dict (NOT NULL ํ๋) | |
| } | |
| adapter.client.table("kb_documents").upsert(doc_data, on_conflict='doc_id').execute() | |
| except Exception as e: | |
| if verbose: | |
| print(f" โ ๏ธ ๋ฌธ์ ์ ์ฅ ์ค๋ฅ: {e}") | |
| # ์ฒญํฌ ์ ์ฅ (์๋ฒ ๋ฉ ํฌํจ) | |
| saved = adapter.upsert_chunks( | |
| chunks=all_chunks, | |
| generate_embeddings=not skip_embeddings | |
| ) | |
| print(f"\nโ Supabase ๋๊ธฐํ ์๋ฃ!") | |
| print(f" ๋ฌธ์: {len(all_docs)}๊ฐ") | |
| print(f" ์ฒญํฌ: {saved}๊ฐ") | |
| except Exception as e: | |
| print(f"\nโ Supabase ์ ์ฅ ์ค๋ฅ: {e}") | |
| import traceback | |
| if verbose: | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| main() | |