Spaces:

JHyeok5
/

samchun-gemini

Running

App Files Files Community

samchun-gemini / utils /cache_utils.py

JHyeok5

Upload folder using huggingface_hub

0f3460d verified 21 days ago

raw

history blame contribute delete

6.66 kB

	"""
	Social Caching 유틸리티

	다른 사용자가 생성한 인기 코스를 재사용하여 AI 비용 절감.
	캐시 히트 시 0.5초 이내 응답 목표.

	@module cache_utils
	@description
	- 코스 생성 파라미터를 해시로 변환
	- 유사한 조건의 코스를 빠르게 매칭
	- 위치는 약 1km 반경으로 그룹화
	- 시간은 10분 단위로 반올림

	@changelog
	- v1.1.0 (2026-01-26): 해시 길이 증가 (L001)
	- 16자리 -> 32자리로 변경
	- 충돌 확률 감소: 2^64 -> 2^128 가능 조합
	- v1.0.0 (2026-01-25): 초기 구현
	"""

	import hashlib
	import json
	from typing import Dict, Any, Optional, List


	def create_params_hash(
	theme: Optional[str],
	duration_minutes: int,
	location_lat: float,
	location_lng: float,
	activity_level: Optional[str] = None,
	mood: Optional[List[str]] = None
	) -> str:
	"""
	코스 생성 파라미터를 해시로 변환

	유사한 조건의 코스를 매칭하기 위해 파라미터를 정규화하여 해시 생성.
	- 시간: 10분 단위로 반올림 (55분 → 60분, 65분 → 70분)
	- 위치: 소수점 2자리로 반올림 (약 1km 반경 그룹화)
	- mood: 정렬하여 순서 무관하게 동일 해시 생성

	Args:
	theme: 테마 (history, nature, food, photo, healing)
	duration_minutes: 희망 시간 (분)
	location_lat: 사용자 위도
	location_lng: 사용자 경도
	activity_level: 활동 수준 (light, moderate, active) (optional)
	mood: 분위기 리스트 (quiet, vibrant, romantic, family) (optional)

	Returns:
	SHA256 해시 (32자리) - 예: "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"

	Examples:
	>>> create_params_hash("history", 60, 33.45, 126.32)
	'a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6'

	>>> create_params_hash("nature", 55, 33.456, 126.321) # 시간 55→60, 위치 반올림
	'b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7'

	>>> create_params_hash("food", 90, 33.45, 126.32, mood=["romantic", "quiet"])
	>>> create_params_hash("food", 90, 33.45, 126.32, mood=["quiet", "romantic"]) # 동일 해시
	"""
	# 시간을 10분 단위로 반올림
	# 55분 → 60분, 65분 → 70분, 30분 → 30분
	duration_rounded = round(duration_minutes / 10) * 10

	# 위치를 약 1km 반경으로 그룹화 (소수점 2자리)
	# 위도 0.01도 ≈ 1.1km, 경도 0.01도 ≈ 0.9km (제주 기준)
	lat_rounded = round(location_lat, 2)
	lng_rounded = round(location_lng, 2)

	# 해시 데이터 구성
	hash_data: Dict[str, Any] = {
	"theme": theme or "general",
	"duration": duration_rounded,
	"lat": lat_rounded,
	"lng": lng_rounded,
	}

	# 선택적 필드 추가 (존재하는 경우만)
	if activity_level:
	hash_data["activity"] = activity_level

	if mood and len(mood) > 0:
	# mood 정렬하여 순서 무관하게 동일 해시 보장
	hash_data["mood"] = sorted(mood)

	# JSON 문자열로 변환 후 해시 생성
	# sort_keys=True로 키 순서 일관성 보장
	json_str = json.dumps(hash_data, sort_keys=True, ensure_ascii=False)
	hash_obj = hashlib.sha256(json_str.encode('utf-8'))

	# 32자리 해시 반환 (충돌 확률 극히 낮음, 16자리에서 증가)
	# 16자리: 2^64 가능 조합 → 32자리: 2^128 가능 조합
	HASH_LENGTH = 32
	return hash_obj.hexdigest()[:HASH_LENGTH]


	def normalize_params_for_cache(
	theme: Optional[str],
	duration_minutes: int,
	location_lat: float,
	location_lng: float,
	activity_level: Optional[str] = None,
	mood: Optional[List[str]] = None
	) -> Dict[str, Any]:
	"""
	캐시 조회를 위해 파라미터를 정규화

	create_params_hash와 동일한 정규화 로직을 적용하여 디버깅/로깅용 데이터 반환.

	Args:
	(create_params_hash와 동일)

	Returns:
	정규화된 파라미터 딕셔너리
	"""
	duration_rounded = round(duration_minutes / 10) * 10
	lat_rounded = round(location_lat, 2)
	lng_rounded = round(location_lng, 2)

	result: Dict[str, Any] = {
	"theme": theme or "general",
	"duration": duration_rounded,
	"lat": lat_rounded,
	"lng": lng_rounded,
	}

	if activity_level:
	result["activity"] = activity_level

	if mood and len(mood) > 0:
	result["mood"] = sorted(mood)

	return result


	def calculate_cache_similarity(
	params1: Dict[str, Any],
	params2: Dict[str, Any]
	) -> float:
	"""
	두 파라미터 세트의 유사도 계산 (0.0 ~ 1.0)

	해시가 다른 경우에도 유사한 코스를 찾기 위한 보조 함수.
	향후 fuzzy matching 구현 시 사용 예정.

	Args:
	params1: 첫 번째 정규화된 파라미터
	params2: 두 번째 정규화된 파라미터

	Returns:
	유사도 점수 (0.0 = 완전 다름, 1.0 = 완전 일치)
	"""
	score = 0.0
	max_score = 0.0

	# 테마 일치 (가중치: 30%)
	max_score += 0.3
	if params1.get("theme") == params2.get("theme"):
	score += 0.3

	# 시간 유사도 (가중치: 25%)
	max_score += 0.25
	d1 = params1.get("duration", 60)
	d2 = params2.get("duration", 60)
	time_diff = abs(d1 - d2)
	if time_diff == 0:
	score += 0.25
	elif time_diff <= 10:
	score += 0.2
	elif time_diff <= 20:
	score += 0.15
	elif time_diff <= 30:
	score += 0.1

	# 위치 유사도 (가중치: 25%)
	max_score += 0.25
	lat_diff = abs(params1.get("lat", 0) - params2.get("lat", 0))
	lng_diff = abs(params1.get("lng", 0) - params2.get("lng", 0))
	dist_approx = (lat_diff 2 + lng_diff 2) ** 0.5
	if dist_approx <= 0.01: # ~1km
	score += 0.25
	elif dist_approx <= 0.02: # ~2km
	score += 0.2
	elif dist_approx <= 0.05: # ~5km
	score += 0.1

	# 활동 수준 일치 (가중치: 10%)
	max_score += 0.1
	if params1.get("activity") == params2.get("activity"):
	score += 0.1

	# 분위기 일치 (가중치: 10%)
	max_score += 0.1
	mood1 = set(params1.get("mood", []))
	mood2 = set(params2.get("mood", []))
	if mood1 and mood2:
	overlap = len(mood1 & mood2)
	total = len(mood1 \| mood2)
	score += 0.1 * (overlap / total) if total > 0 else 0
	elif not mood1 and not mood2:
	score += 0.1

	return round(score / max_score, 2) if max_score > 0 else 0.0