Spaces:
Sleeping
Sleeping
| import requests | |
| import numpy as np | |
| import os | |
| import time | |
| import warnings | |
| from dotenv import load_dotenv | |
| from src.cag.cache_manager import CacheManager | |
| from src.cag.embedding_utils import EmbeddingUtils | |
| # Suppress PyTorch Warnings | |
| warnings.filterwarnings("ignore", message="Tried to instantiate class '__path__._path'") | |
| load_dotenv() | |
| class LLMIntegration: | |
| def __init__(self,cache_size=100, similarity_threshold=0.8): | |
| """Initialize the LLM Integration with API Key, Cache, and Embedding Utilities.""" | |
| self.cache_manager = CacheManager(max_cache_size=cache_size) | |
| self.embedding_utils = EmbeddingUtils() | |
| self.similarity_threshold = similarity_threshold | |
| def generate_response(self, query,response): | |
| """Generate a response with cache checking and similarity matching.""" | |
| query_key = self.cache_manager.normalize_key(query) | |
| # Check for cache match | |
| cached_response = self.cache_manager.get_from_cache(query_key) | |
| if cached_response: | |
| return f"Cache Hit! {cached_response}" | |
| # Generate query embedding | |
| query_embedding = self.embedding_utils.generate_embedding(query) | |
| # Check for approximate match | |
| best_match_key = self._find_best_match(query_embedding) | |
| if best_match_key: | |
| cached_response = self.cache_manager.get_from_cache(best_match_key) | |
| return f"Cache Hit! {cached_response}" | |
| # If no cache match, query the API | |
| response = response | |
| # β Only cache successful responses | |
| if response : | |
| self.cache_manager.add_to_cache(query_key, response, embedding=query_embedding) | |
| return f"Cache Miss! {response}" | |
| else: | |
| return "**Error: Could not generate a response.**" | |
| def _find_best_match(self, query_embedding): | |
| """Find the best match in the cache using similarity checking.""" | |
| best_match_key = None | |
| highest_similarity = 0 | |
| for key in self.cache_manager.cache: | |
| cached_embedding = self.cache_manager.get_embedding(key) | |
| if cached_embedding is not None: | |
| similarity = self.embedding_utils.calculate_similarity(query_embedding, cached_embedding) | |
| if similarity > highest_similarity and similarity >= self.similarity_threshold: | |
| best_match_key = key | |
| highest_similarity = similarity | |
| return best_match_key | |