Spaces:
Sleeping
Sleeping
File size: 2,463 Bytes
3392ab1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import requests
import numpy as np
import os
import time
import warnings
from dotenv import load_dotenv
from src.cag.cache_manager import CacheManager
from src.cag.embedding_utils import EmbeddingUtils
# Suppress PyTorch Warnings
warnings.filterwarnings("ignore", message="Tried to instantiate class '__path__._path'")
load_dotenv()
class LLMIntegration:
def __init__(self,cache_size=100, similarity_threshold=0.8):
"""Initialize the LLM Integration with API Key, Cache, and Embedding Utilities."""
self.cache_manager = CacheManager(max_cache_size=cache_size)
self.embedding_utils = EmbeddingUtils()
self.similarity_threshold = similarity_threshold
def generate_response(self, query,response):
"""Generate a response with cache checking and similarity matching."""
query_key = self.cache_manager.normalize_key(query)
# Check for cache match
cached_response = self.cache_manager.get_from_cache(query_key)
if cached_response:
return f"Cache Hit! {cached_response}"
# Generate query embedding
query_embedding = self.embedding_utils.generate_embedding(query)
# Check for approximate match
best_match_key = self._find_best_match(query_embedding)
if best_match_key:
cached_response = self.cache_manager.get_from_cache(best_match_key)
return f"Cache Hit! {cached_response}"
# If no cache match, query the API
response = response
# β
Only cache successful responses
if response :
self.cache_manager.add_to_cache(query_key, response, embedding=query_embedding)
return f"Cache Miss! {response}"
else:
return "**Error: Could not generate a response.**"
def _find_best_match(self, query_embedding):
"""Find the best match in the cache using similarity checking."""
best_match_key = None
highest_similarity = 0
for key in self.cache_manager.cache:
cached_embedding = self.cache_manager.get_embedding(key)
if cached_embedding is not None:
similarity = self.embedding_utils.calculate_similarity(query_embedding, cached_embedding)
if similarity > highest_similarity and similarity >= self.similarity_threshold:
best_match_key = key
highest_similarity = similarity
return best_match_key
|