Spaces:
Running
Running
| """ | |
| Context Compression Utilities | |
| AI ํ๋กฌํํธ ํ ํฐ ์ ๊ฐ์ ์ํ ์์ถ ์ ํธ๋ฆฌํฐ | |
| @module context_compression | |
| @description | |
| - ์คํ ๋ฐ์ดํฐ๋ฅผ ์์ถ ํ์์ผ๋ก ๋ณํํ์ฌ ํ๋กฌํํธ ํ ํฐ ์ ๊ฐ | |
| - ๋ชฉํ: ํ๋กฌํํธ ํ ํฐ 5,000 -> 500 (90% ๊ฐ์) | |
| - API ๋น์ฉ 30% ์ ๊ฐ, ์๋ต ์๋ 20-30% ๊ฐ์ | |
| @changelog | |
| - v1.0.0 (2026-01-25): ์ด๊ธฐ ๊ตฌํ | |
| - compress_spot: ๋จ์ผ ์คํ ์์ถ (JSON -> ํ์ดํ ๊ตฌ๋ถ ๋ฌธ์์ด) | |
| - compress_spots: ์คํ ๋ฆฌ์คํธ ์์ถ | |
| - decompress_course_spots: AI ์๋ต spot_id๋ก ์๋ณธ ์คํ ๋ณต์ | |
| - create_compression_guide: ํ๋กฌํํธ์ฉ ์์ถ ํ์ ๊ฐ์ด๋ | |
| @example | |
| Before (์ฝ 500 ํ ํฐ): | |
| { | |
| "id": "vj_123", | |
| "name": "ํ๊ทํฌ๊ตฌ", | |
| "category": "ํฌ๊ตฌ", | |
| "location": {"lat": 33.456, "lng": 126.789}, | |
| "tags": ["์ญ์ฌ", "๋ฐ๋ค", "์ฌ์ง"], | |
| "story_preview": "400๋ ์ ์๊ตฌ์ ์นจ๋ต..." | |
| } | |
| After (์ฝ 50 ํ ํฐ): | |
| vj_123|ํ๊ทํฌ๊ตฌ|ํฌ๊ตฌ|33.4560,126.7890|์ญ์ฌ,๋ฐ๋ค,์ฌ์ง|15 | |
| """ | |
| from typing import List, Dict, Any, Optional | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def compress_spot(spot: Dict[str, Any]) -> str: | |
| """ | |
| ๋จ์ผ ์คํ์ ์์ถ ํ์์ผ๋ก ๋ณํ | |
| Args: | |
| spot: ์คํ ๋์ ๋๋ฆฌ (id, name, category, location, tags, meta ๋ฑ) | |
| Returns: | |
| ์์ถ๋ ๋ฌธ์์ด (ํ์ดํ ๊ตฌ๋ถ) | |
| ํ์: spot_id|์ด๋ฆ|์นดํ ๊ณ ๋ฆฌ|์๋,๊ฒฝ๋|ํ๊ทธ1,ํ๊ทธ2|์ฒด๋ฅ์๊ฐ | |
| Example: | |
| Input: {"id": "vj_123", "name": "ํ๊ทํฌ๊ตฌ", ...} | |
| Output: "vj_123|ํ๊ทํฌ๊ตฌ|ํฌ๊ตฌ|33.4560,126.7890|์ญ์ฌ,๋ฐ๋ค|15" | |
| """ | |
| # ๊ธฐ๋ณธ ํ๋ ์ถ์ถ | |
| spot_id = spot.get("id", "") | |
| name = spot.get("name", "") | |
| category = spot.get("category", "") | |
| # ์์น ์ ๋ณด ์ถ์ถ (์์์ 4์๋ฆฌ๋ก ์ ํ) | |
| location = spot.get("location", {}) | |
| lat = location.get("lat", 0) | |
| lng = location.get("lng", 0) | |
| loc_str = f"{lat:.4f},{lng:.4f}" | |
| # ํ๊ทธ (์ต๋ 5๊ฐ๋ก ์ ํ) | |
| tags = spot.get("tags", [])[:5] | |
| tags_str = ",".join(tags) if tags else "" | |
| # ์ฒด๋ฅ ์๊ฐ (meta์์ ์ถ์ถ) | |
| meta = spot.get("meta", {}) | |
| stay_duration = meta.get("stay_duration_min", 15) if meta else 15 | |
| return f"{spot_id}|{name}|{category}|{loc_str}|{tags_str}|{stay_duration}" | |
| def compress_spots(spots: List[Dict[str, Any]]) -> str: | |
| """ | |
| ์คํ ๋ฆฌ์คํธ๋ฅผ ์์ถ ํ์์ผ๋ก ๋ณํ | |
| Args: | |
| spots: ์คํ ๋์ ๋๋ฆฌ ๋ฆฌ์คํธ | |
| Returns: | |
| ์ค๋ฐ๊ฟ์ผ๋ก ๊ตฌ๋ถ๋ ์์ถ ๋ฌธ์์ด | |
| Example: | |
| vj_001|ํ๊ทํฌ๊ตฌ|ํฌ๊ตฌ|33.4560,126.7890|์ญ์ฌ,๋ฐ๋ค|15 | |
| vj_002|๊ณฝ์งํด๋ณ|ํด๋ณ|33.4567,126.7891|์์ฐ,์ฌ์ง|20 | |
| """ | |
| compressed_lines = [compress_spot(spot) for spot in spots] | |
| return "\n".join(compressed_lines) | |
| def create_compression_guide() -> str: | |
| """ | |
| ์์ถ ํ์ ์ค๋ช (ํ๋กฌํํธ์ ํฌํจ) | |
| AI๊ฐ ์์ถ๋ ๋ฐ์ดํฐ๋ฅผ ์ดํดํ ์ ์๋๋ก ํ์ ์ค๋ช ์ ๊ณต | |
| Returns: | |
| ํ๋กฌํํธ์ ์ฝ์ ํ ํ์ ๊ฐ์ด๋ ๋ฌธ์์ด | |
| """ | |
| return """**์คํ ๋ฐ์ดํฐ ํ์ (์์ถ)** | |
| ๊ฐ ์ค ํ์: spot_id|์ด๋ฆ|์นดํ ๊ณ ๋ฆฌ|์๋,๊ฒฝ๋|ํ๊ทธ๋ค|์ฒด๋ฅ์๊ฐ(๋ถ) | |
| ์์: vj_123|ํ๊ทํฌ๊ตฌ|ํฌ๊ตฌ|33.4560,126.7890|์ญ์ฌ,๋ฐ๋ค|15 | |
| **์ค์**: ์๋ต์ spot_id๋ ๋ฐ๋์ ์ ๋ชฉ๋ก์ ์๋ ID๋ง ์ฌ์ฉํ์ธ์. | |
| """ | |
| def decompress_course_spots( | |
| compressed_spot_ids: List[str], | |
| original_spots: List[Dict[str, Any]] | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| AI๊ฐ ๋ฐํํ spot_id ๋ฆฌ์คํธ๋ฅผ ์๋ณธ ์คํ ๋ฐ์ดํฐ๋ก ๋ณต์ | |
| Args: | |
| compressed_spot_ids: AI๊ฐ ๋ฐํํ spot_id ๋ฆฌ์คํธ | |
| original_spots: ์๋ณธ ์คํ ๋ฐ์ดํฐ ๋ฆฌ์คํธ | |
| Returns: | |
| ๋ณต์๋ ์คํ ๋์ ๋๋ฆฌ ๋ฆฌ์คํธ | |
| Example: | |
| Input: ["vj_001", "vj_003", "vj_005"] | |
| Output: [{"id": "vj_001", ...}, {"id": "vj_003", ...}, ...] | |
| """ | |
| # spot_id -> ์๋ณธ ์คํ ๋งคํ | |
| spot_map = {spot["id"]: spot for spot in original_spots} | |
| decompressed = [] | |
| for spot_id in compressed_spot_ids: | |
| if spot_id in spot_map: | |
| decompressed.append(spot_map[spot_id]) | |
| else: | |
| logger.warning(f"[decompress] spot_id not found: {spot_id}") | |
| return decompressed | |
| def calculate_compression_ratio( | |
| original_json: str, | |
| compressed_str: str | |
| ) -> Dict[str, Any]: | |
| """ | |
| ์์ถ๋ฅ ๊ณ์ฐ ๋ฐ ํต๊ณ ๋ฐํ | |
| Args: | |
| original_json: ์๋ณธ JSON ๋ฌธ์์ด | |
| compressed_str: ์์ถ๋ ๋ฌธ์์ด | |
| Returns: | |
| ์์ถ ํต๊ณ ๋์ ๋๋ฆฌ | |
| """ | |
| original_len = len(original_json) | |
| compressed_len = len(compressed_str) | |
| # ๋๋ต์ ์ธ ํ ํฐ ์ ์ถ์ (ํ๊ธ ๊ธฐ์ค ์ฝ 2์๋น 1ํ ํฐ) | |
| original_tokens_est = original_len // 2 | |
| compressed_tokens_est = compressed_len // 2 | |
| ratio = (compressed_len / original_len * 100) if original_len > 0 else 0 | |
| savings = 100 - ratio | |
| return { | |
| "original_chars": original_len, | |
| "compressed_chars": compressed_len, | |
| "original_tokens_est": original_tokens_est, | |
| "compressed_tokens_est": compressed_tokens_est, | |
| "ratio_percent": round(ratio, 1), | |
| "savings_percent": round(savings, 1) | |
| } | |