Spaces:

datbkpro
/

voicebot

Sleeping

App Files Files Community

voicebot / core /cag_system.py

datbkpro

Create cag_system.py

47284c1 verified 20 days ago

raw

history blame contribute delete

19.7 kB

	# services/cag_service.py
	import hashlib
	import json
	import time
	from datetime import datetime, timedelta
	from typing import List, Dict, Any, Optional, Tuple
	import numpy as np
	import faiss
	import redis
	import pickle
	import os
	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class CAGConfig:
	"""Cấu hình hệ thống CAG"""
	# Cache settings
	USE_MEMORY_CACHE = True
	USE_REDIS_CACHE = False
	USE_DISK_CACHE = True
	CACHE_DIR = ".cag_cache"

	# TTL settings (seconds)
	EMBEDDING_TTL = 86400 # 24 hours
	SEARCH_RESULT_TTL = 3600 # 1 hour
	SEMANTIC_CACHE_TTL = 7200 # 2 hours
	GENERATION_TTL = 1800 # 30 minutes

	# Cache thresholds
	SEMANTIC_SIMILARITY_THRESHOLD = 0.85
	MIN_QUERY_LENGTH = 3
	MAX_CACHE_SIZE = 10000

	# Performance settings
	ENABLE_CACHE_STATS = True
	LOG_CACHE_PERFORMANCE = True

	class CacheHitType(str, Enum):
	"""Loại cache hit"""
	EXACT = "exact"
	SEMANTIC = "semantic"
	PARTIAL = "partial"
	NONE = "none"

	class CAGService:
	"""Cache-Augmented Generation Service"""

	def __init__(self, rag_system, multilingual_manager):
	self.rag_system = rag_system
	self.multilingual_manager = multilingual_manager

	# Cache configuration
	self.config = CAGConfig()

	# Cache storage
	self.memory_cache = {} # In-memory cache
	self.semantic_cache_index = None
	self.semantic_cache_embeddings = []
	self.semantic_cache_keys = []

	# Redis client (optional)
	self.redis_client = None
	self._init_redis()

	# Disk cache
	self._init_cache_directory()

	# Performance tracking
	self.stats = {
	"total_queries": 0,
	"cache_hits": 0,
	"exact_hits": 0,
	"semantic_hits": 0,
	"response_times": [],
	"cost_savings": 0
	}

	print("✅ CAG Service initialized")

	def _init_redis(self):
	"""Khởi tạo Redis client nếu được cấu hình"""
	if self.config.USE_REDIS_CACHE:
	try:
	self.redis_client = redis.Redis(
	host='localhost',
	port=6379,
	db=0,
	decode_responses=False
	)
	self.redis_client.ping()
	print("✅ Redis cache connected")
	except Exception as e:
	print(f"⚠️ Redis not available: {e}")
	self.config.USE_REDIS_CACHE = False

	def _init_cache_directory(self):
	"""Khởi tạo thư mục cache"""
	os.makedirs(self.config.CACHE_DIR, exist_ok=True)
	os.makedirs(f"{self.config.CACHE_DIR}/embeddings", exist_ok=True)
	os.makedirs(f"{self.config.CACHE_DIR}/results", exist_ok=True)

	def _generate_cache_key(self, data_type: str, content: str, params: Dict = None) -> str:
	"""Tạo cache key duy nhất"""
	key_data = {
	"type": data_type,
	"content": content,
	"params": params or {}
	}
	key_str = json.dumps(key_data, sort_keys=True)
	return hashlib.sha256(key_str.encode()).hexdigest()[:32]

	def cache_embedding(self, text: str, embedding: np.ndarray, language: str):
	"""Cache embedding của text"""
	if not self.config.USE_MEMORY_CACHE:
	return

	cache_key = self._generate_cache_key("embedding", text, {"language": language})

	cache_entry = {
	"embedding": embedding.tolist(),
	"language": language,
	"timestamp": datetime.now().isoformat(),
	"text_length": len(text)
	}

	# Lưu vào memory cache
	self.memory_cache[cache_key] = cache_entry

	# Lưu vào disk cache
	if self.config.USE_DISK_CACHE:
	cache_path = f"{self.config.CACHE_DIR}/embeddings/{cache_key}.pkl"
	try:
	with open(cache_path, 'wb') as f:
	pickle.dump(cache_entry, f)
	except Exception as e:
	print(f"⚠️ Failed to save embedding cache: {e}")

	def get_cached_embedding(self, text: str, language: str) -> Optional[np.ndarray]:
	"""Lấy embedding từ cache nếu có"""
	cache_key = self._generate_cache_key("embedding", text, {"language": language})

	# Check memory cache first
	if cache_key in self.memory_cache:
	entry = self.memory_cache[cache_key]
	if self._is_cache_entry_valid(entry, self.config.EMBEDDING_TTL):
	return np.array(entry["embedding"])

	# Check disk cache
	if self.config.USE_DISK_CACHE:
	cache_path = f"{self.config.CACHE_DIR}/embeddings/{cache_key}.pkl"
	if os.path.exists(cache_path):
	try:
	with open(cache_path, 'rb') as f:
	entry = pickle.load(f)
	if self._is_cache_entry_valid(entry, self.config.EMBEDDING_TTL):
	# Update memory cache
	self.memory_cache[cache_key] = entry
	return np.array(entry["embedding"])
	except Exception as e:
	print(f"⚠️ Failed to load embedding cache: {e}")

	return None

	def cache_search_results(self, query: str, results: List, top_k: int, language: str):
	"""Cache kết quả tìm kiếm"""
	cache_key = self._generate_cache_key("search", query, {
	"top_k": top_k,
	"language": language
	})

	# Generate query embedding for semantic cache
	embedding_model = self.multilingual_manager.get_embedding_model(language)
	if embedding_model:
	query_embedding = embedding_model.encode([query])[0]
	self._update_semantic_cache(cache_key, query_embedding)

	cache_entry = {
	"query": query,
	"results": [r.__dict__ if hasattr(r, '__dict__') else r for r in results],
	"timestamp": datetime.now().isoformat(),
	"language": language,
	"top_k": top_k
	}

	# Save to memory cache
	self.memory_cache[cache_key] = cache_entry

	# Save to Redis if available
	if self.config.USE_REDIS_CACHE and self.redis_client:
	try:
	self.redis_client.setex(
	f"cag:search:{cache_key}",
	self.config.SEARCH_RESULT_TTL,
	pickle.dumps(cache_entry)
	)
	except Exception as e:
	print(f"⚠️ Redis cache failed: {e}")

	# Save to disk
	if self.config.USE_DISK_CACHE:
	cache_path = f"{self.config.CACHE_DIR}/results/{cache_key}.pkl"
	try:
	with open(cache_path, 'wb') as f:
	pickle.dump(cache_entry, f)
	except Exception as e:
	print(f"⚠️ Failed to save search cache: {e}")

	def get_cached_search_results(self, query: str, top_k: int, language: str) -> Tuple[Optional[List], CacheHitType]:
	"""Lấy kết quả tìm kiếm từ cache"""
	self.stats["total_queries"] += 1

	if len(query.strip()) < self.config.MIN_QUERY_LENGTH:
	return None, CacheHitType.NONE

	# 1. Try exact match cache
	exact_key = self._generate_cache_key("search", query, {
	"top_k": top_k,
	"language": language
	})

	cached_results = self._get_cache_entry(exact_key, self.config.SEARCH_RESULT_TTL)
	if cached_results:
	self.stats["cache_hits"] += 1
	self.stats["exact_hits"] += 1
	return cached_results.get("results"), CacheHitType.EXACT

	# 2. Try semantic cache
	if self.semantic_cache_index is not None and len(self.semantic_cache_embeddings) > 0:
	embedding_model = self.multilingual_manager.get_embedding_model(language)
	if embedding_model:
	query_embedding = embedding_model.encode([query])[0]
	similar_key, similarity = self._semantic_cache_lookup(query_embedding)

	if similarity >= self.config.SEMANTIC_SIMILARITY_THRESHOLD:
	cached_results = self._get_cache_entry(similar_key, self.config.SEMANTIC_CACHE_TTL)
	if cached_results:
	self.stats["cache_hits"] += 1
	self.stats["semantic_hits"] += 1

	# Adjust results for semantic match
	adjusted_results = self._adjust_cached_results(
	cached_results.get("results"),
	query,
	similarity
	)
	return adjusted_results, CacheHitType.SEMANTIC

	return None, CacheHitType.NONE

	def _update_semantic_cache(self, cache_key: str, embedding: np.ndarray):
	"""Cập nhật semantic cache"""
	if len(self.semantic_cache_embeddings) >= self.config.MAX_CACHE_SIZE:
	# Remove oldest entries
	self.semantic_cache_keys.pop(0)
	self.semantic_cache_embeddings.pop(0)

	self.semantic_cache_keys.append(cache_key)
	self.semantic_cache_embeddings.append(embedding)

	# Rebuild FAISS index
	if len(self.semantic_cache_embeddings) > 0:
	embeddings_array = np.array(self.semantic_cache_embeddings).astype(np.float32)
	dimension = embeddings_array.shape[1]

	if self.semantic_cache_index is None:
	self.semantic_cache_index = faiss.IndexFlatIP(dimension)

	self.semantic_cache_index.reset()
	faiss.normalize_L2(embeddings_array)
	self.semantic_cache_index.add(embeddings_array)

	def _semantic_cache_lookup(self, query_embedding: np.ndarray) -> Tuple[Optional[str], float]:
	"""Tìm kiếm trong semantic cache"""
	if len(self.semantic_cache_embeddings) == 0:
	return None, 0.0

	query_embedding = query_embedding / np.linalg.norm(query_embedding)
	query_embedding = query_embedding.reshape(1, -1).astype(np.float32)

	distances, indices = self.semantic_cache_index.search(query_embedding, k=1)

	if len(indices[0]) > 0 and indices[0][0] != -1:
	idx = indices[0][0]
	similarity = 1 - distances[0][0]
	return self.semantic_cache_keys[idx], similarity

	return None, 0.0

	def _get_cache_entry(self, cache_key: str, ttl: int) -> Optional[Dict]:
	"""Lấy cache entry từ multiple layers"""
	# Check memory cache
	if cache_key in self.memory_cache:
	entry = self.memory_cache[cache_key]
	if self._is_cache_entry_valid(entry, ttl):
	return entry

	# Check Redis
	if self.config.USE_REDIS_CACHE and self.redis_client:
	try:
	cached = self.redis_client.get(f"cag:search:{cache_key}")
	if cached:
	entry = pickle.loads(cached)
	if self._is_cache_entry_valid(entry, ttl):
	# Update memory cache
	self.memory_cache[cache_key] = entry
	return entry
	except Exception as e:
	print(f"⚠️ Redis get failed: {e}")

	# Check disk cache
	if self.config.USE_DISK_CACHE:
	cache_path = f"{self.config.CACHE_DIR}/results/{cache_key}.pkl"
	if os.path.exists(cache_path):
	try:
	with open(cache_path, 'rb') as f:
	entry = pickle.load(f)
	if self._is_cache_entry_valid(entry, ttl):
	# Update memory cache
	self.memory_cache[cache_key] = entry
	return entry
	except Exception as e:
	print(f"⚠️ Disk cache read failed: {e}")

	return None

	def _is_cache_entry_valid(self, entry: Dict, ttl: int) -> bool:
	"""Kiểm tra cache entry có còn valid không"""
	if "timestamp" not in entry:
	return False

	try:
	timestamp = datetime.fromisoformat(entry["timestamp"])
	age = datetime.now() - timestamp
	return age.total_seconds() < ttl
	except:
	return False

	def _adjust_cached_results(self, cached_results: List, new_query: str, similarity: float) -> List:
	"""Điều chỉnh cached results cho semantic match"""
	adjusted_results = []

	for result in cached_results:
	# Adjust similarity score based on query similarity
	if isinstance(result, dict) and "similarity" in result:
	result["similarity"] *= similarity
	result["source"] = "semantic_cache"
	result["cache_similarity"] = similarity

	adjusted_results.append(result)

	return adjusted_results

	def search_with_cache(self, query: str, top_k: int = 5, use_cache: bool = True) -> Dict:
	"""Tìm kiếm với cache augmentation"""
	start_time = time.time()

	# Detect language
	language = self.multilingual_manager.detect_language(query)

	# Try to get from cache
	cached_results, hit_type = None, CacheHitType.NONE
	if use_cache:
	cached_results, hit_type = self.get_cached_search_results(query, top_k, language)

	if cached_results and hit_type != CacheHitType.NONE:
	# Cache hit
	response_time = time.time() - start_time
	self.stats["response_times"].append(response_time)

	return {
	"query": query,
	"results": cached_results,
	"cache_hit": True,
	"hit_type": hit_type.value,
	"response_time_ms": round(response_time * 1000, 2),
	"language": language,
	"cached": True
	}

	# Cache miss - perform actual RAG search
	rag_start_time = time.time()
	rag_results = self.rag_system.semantic_search(query, top_k=top_k)
	rag_time = time.time() - rag_start_time

	# Cache the results for next time
	if use_cache and rag_results:
	self.cache_search_results(query, rag_results, top_k, language)

	total_time = time.time() - start_time
	self.stats["response_times"].append(total_time)

	# Convert RAG results to list of dicts
	results_list = []
	for result in rag_results:
	results_list.append({
	"text": result.text,
	"similarity": result.similarity,
	"metadata": result.metadata,
	"source": "rag_search"
	})

	return {
	"query": query,
	"results": results_list,
	"cache_hit": False,
	"hit_type": "none",
	"response_time_ms": round(total_time * 1000, 2),
	"rag_time_ms": round(rag_time * 1000, 2),
	"language": language,
	"cached": False
	}

	def batch_search_with_cache(self, queries: List[str], top_k: int = 3) -> List[Dict]:
	"""Batch search với cache optimization"""
	results = []

	# First pass: check cache for all queries
	for query in queries:
	language = self.multilingual_manager.detect_language(query)
	cached_results, hit_type = self.get_cached_search_results(query, top_k, language)

	if cached_results:
	results.append({
	"query": query,
	"results": cached_results,
	"cache_hit": True,
	"hit_type": hit_type.value
	})
	else:
	results.append({
	"query": query,
	"cache_hit": False,
	"pending": True
	})

	# Process uncached queries in batch
	uncached_queries = []
	uncached_indices = []

	for i, result in enumerate(results):
	if result.get("pending", False):
	uncached_queries.append(result["query"])
	uncached_indices.append(i)

	if uncached_queries:
	# Process uncached queries
	for idx, query in zip(uncached_indices, uncached_queries):
	search_result = self.search_with_cache(query, top_k, use_cache=False)
	results[idx] = search_result

	return results

	def get_cache_stats(self) -> Dict:
	"""Lấy thống kê cache"""
	total_hits = self.stats["cache_hits"]
	total_queries = self.stats["total_queries"]

	hit_rate = total_hits / total_queries if total_queries > 0 else 0

	if self.stats["response_times"]:
	avg_response_time = sum(self.stats["response_times"]) / len(self.stats["response_times"])
	p95_response_time = np.percentile(self.stats["response_times"], 95)
	else:
	avg_response_time = p95_response_time = 0

	# Calculate estimated cost savings
	# Giả sử mỗi LLM call tốn $0.01, mỗi cache hit tiết kiệm được 1 call
	cost_per_call = 0.01 # USD
	estimated_savings = total_hits * cost_per_call

	return {
	"total_queries": total_queries,
	"cache_hits": total_hits,
	"cache_misses": total_queries - total_hits,
	"hit_rate": round(hit_rate * 100, 2),
	"exact_hits": self.stats["exact_hits"],
	"semantic_hits": self.stats["semantic_hits"],
	"avg_response_time_ms": round(avg_response_time * 1000, 2),
	"p95_response_time_ms": round(p95_response_time * 1000, 2),
	"memory_cache_size": len(self.memory_cache),
	"semantic_cache_size": len(self.semantic_cache_embeddings),
	"estimated_cost_savings_usd": round(estimated_savings, 2)
	}

	def clear_cache(self, cache_type: str = "all"):
	"""Xóa cache"""
	if cache_type == "all" or cache_type == "memory":
	self.memory_cache.clear()
	print("✅ Memory cache cleared")

	if cache_type == "all" or cache_type == "semantic":
	self.semantic_cache_index = None
	self.semantic_cache_embeddings = []
	self.semantic_cache_keys = []
	print("✅ Semantic cache cleared")

	if cache_type == "all" or cache_type == "disk":
	import shutil
	shutil.rmtree(self.config.CACHE_DIR, ignore_errors=True)
	self._init_cache_directory()
	print("✅ Disk cache cleared")

	if cache_type == "all" or cache_type == "redis":
	if self.redis_client:
	try:
	self.redis_client.flushdb()
	print("✅ Redis cache cleared")
	except Exception as e:
	print(f"⚠️ Failed to clear Redis: {e}")