Commit
Β·
5134f75
1
Parent(s):
f36fcc8
Improve caching system: Enable document-level caching for better UX
Browse files- Added document-level caching that works across different prompts
- Users can now analyze the same document with multiple prompts efficiently
- Cached document content speeds up subsequent analyses with new prompts
- Enhanced user feedback to show when cached content is being used
- Maintains exact prompt+document caching for instant responses
- Better UX for users exploring the same document with different questions
- agents.py +14 -6
- app.py +13 -4
- utils/__init__.py +34 -1
agents.py
CHANGED
|
@@ -5,7 +5,7 @@ import logging
|
|
| 5 |
from typing import Optional, Dict, Any, List, AsyncGenerator
|
| 6 |
import time
|
| 7 |
|
| 8 |
-
from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata, get_cached_analysis, cache_analysis
|
| 9 |
from utils.visual_output import VisualOutputGenerator
|
| 10 |
from config import Config
|
| 11 |
|
|
@@ -41,19 +41,27 @@ class AnalysisAgent(BaseAgent):
|
|
| 41 |
async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
|
| 42 |
start_time = time.time()
|
| 43 |
|
| 44 |
-
# Check cache first
|
| 45 |
if file_path:
|
| 46 |
cached_result = get_cached_analysis(file_path, prompt)
|
| 47 |
if cached_result:
|
| 48 |
-
logger.info(f"Returning cached analysis for {file_path}")
|
| 49 |
return cached_result
|
| 50 |
|
| 51 |
if file_path:
|
| 52 |
# Get document metadata
|
| 53 |
metadata = get_document_metadata(file_path)
|
| 54 |
|
| 55 |
-
#
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# Check if document needs chunking
|
| 59 |
if len(text) > Config.CHUNK_SIZE:
|
|
@@ -66,7 +74,7 @@ class AnalysisAgent(BaseAgent):
|
|
| 66 |
metadata = {}
|
| 67 |
result = await self._process_content(prompt, content, metadata, "")
|
| 68 |
|
| 69 |
-
# Cache the result
|
| 70 |
if file_path:
|
| 71 |
cache_analysis(file_path, prompt, result)
|
| 72 |
|
|
|
|
| 5 |
from typing import Optional, Dict, Any, List, AsyncGenerator
|
| 6 |
import time
|
| 7 |
|
| 8 |
+
from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata, get_cached_analysis, cache_analysis, get_cached_document_content, cache_document_content
|
| 9 |
from utils.visual_output import VisualOutputGenerator
|
| 10 |
from config import Config
|
| 11 |
|
|
|
|
| 41 |
async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
|
| 42 |
start_time = time.time()
|
| 43 |
|
| 44 |
+
# Check cache first - exact prompt match
|
| 45 |
if file_path:
|
| 46 |
cached_result = get_cached_analysis(file_path, prompt)
|
| 47 |
if cached_result:
|
| 48 |
+
logger.info(f"Returning cached analysis for {file_path} with exact prompt match")
|
| 49 |
return cached_result
|
| 50 |
|
| 51 |
if file_path:
|
| 52 |
# Get document metadata
|
| 53 |
metadata = get_document_metadata(file_path)
|
| 54 |
|
| 55 |
+
# Check for cached document content (any prompt)
|
| 56 |
+
cached_content = get_cached_document_content(file_path)
|
| 57 |
+
if cached_content:
|
| 58 |
+
logger.info(f"Using cached document content for {file_path}")
|
| 59 |
+
text = cached_content
|
| 60 |
+
else:
|
| 61 |
+
# Load and cache text
|
| 62 |
+
text = load_pdf_text_cached(file_path)
|
| 63 |
+
cache_document_content(file_path, text)
|
| 64 |
+
logger.info(f"Cached document content for {file_path}")
|
| 65 |
|
| 66 |
# Check if document needs chunking
|
| 67 |
if len(text) > Config.CHUNK_SIZE:
|
|
|
|
| 74 |
metadata = {}
|
| 75 |
result = await self._process_content(prompt, content, metadata, "")
|
| 76 |
|
| 77 |
+
# Cache the analysis result
|
| 78 |
if file_path:
|
| 79 |
cache_analysis(file_path, prompt, result)
|
| 80 |
|
app.py
CHANGED
|
@@ -90,15 +90,20 @@ def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
|
|
| 90 |
path = save_uploaded_file(file, username)
|
| 91 |
|
| 92 |
# Check if this is a cached result
|
| 93 |
-
from utils import get_cached_analysis
|
| 94 |
cached_result = get_cached_analysis(path, prompt)
|
|
|
|
| 95 |
|
| 96 |
if cached_result:
|
| 97 |
-
status = "β‘ **Cached
|
| 98 |
result = cached_result.get("analysis", "No analysis result.")
|
| 99 |
metadata = cached_result.get("metadata", {})
|
| 100 |
else:
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
result = run_async(
|
| 103 |
ORCHESTRATOR.handle_user_prompt,
|
| 104 |
user_id=username,
|
|
@@ -108,7 +113,11 @@ def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
|
|
| 108 |
)
|
| 109 |
result = result.get("analysis", "No analysis result.")
|
| 110 |
metadata = result.get("metadata", {}) if isinstance(result, dict) else {}
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
return result, status, metadata
|
| 114 |
except Exception as e:
|
|
|
|
| 90 |
path = save_uploaded_file(file, username)
|
| 91 |
|
| 92 |
# Check if this is a cached result
|
| 93 |
+
from utils import get_cached_analysis, get_cached_document_content
|
| 94 |
cached_result = get_cached_analysis(path, prompt)
|
| 95 |
+
cached_content = get_cached_document_content(path)
|
| 96 |
|
| 97 |
if cached_result:
|
| 98 |
+
status = "β‘ **Cached Analysis** - Instant response from previous analysis"
|
| 99 |
result = cached_result.get("analysis", "No analysis result.")
|
| 100 |
metadata = cached_result.get("metadata", {})
|
| 101 |
else:
|
| 102 |
+
if cached_content:
|
| 103 |
+
status = "π **Processing** - Using cached document, analyzing with new prompt..."
|
| 104 |
+
else:
|
| 105 |
+
status = "π **Processing** - Analyzing document with AI..."
|
| 106 |
+
|
| 107 |
result = run_async(
|
| 108 |
ORCHESTRATOR.handle_user_prompt,
|
| 109 |
user_id=username,
|
|
|
|
| 113 |
)
|
| 114 |
result = result.get("analysis", "No analysis result.")
|
| 115 |
metadata = result.get("metadata", {}) if isinstance(result, dict) else {}
|
| 116 |
+
|
| 117 |
+
if cached_content:
|
| 118 |
+
status = "β
**Analysis Complete** - Fresh analysis using cached document"
|
| 119 |
+
else:
|
| 120 |
+
status = "β
**Analysis Complete** - Fresh analysis generated and cached"
|
| 121 |
|
| 122 |
return result, status, metadata
|
| 123 |
except Exception as e:
|
utils/__init__.py
CHANGED
|
@@ -232,7 +232,7 @@ CACHE_DIR = Path(tempfile.gettempdir()) / "pdf_analysis_cache"
|
|
| 232 |
CACHE_DIR.mkdir(exist_ok=True)
|
| 233 |
|
| 234 |
def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]:
|
| 235 |
-
"""Retrieve cached analysis if available"""
|
| 236 |
file_hash = get_file_hash(file_path)
|
| 237 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
|
| 238 |
cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
|
|
@@ -250,6 +250,23 @@ def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]
|
|
| 250 |
pass
|
| 251 |
return None
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> None:
|
| 254 |
"""Cache analysis results for future use"""
|
| 255 |
file_hash = get_file_hash(file_path)
|
|
@@ -268,6 +285,22 @@ def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> Non
|
|
| 268 |
except Exception:
|
| 269 |
pass # Fail silently if caching fails
|
| 270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
def get_cached_text(file_path: str) -> Optional[str]:
|
| 272 |
"""Retrieve cached PDF text if available"""
|
| 273 |
file_hash = get_file_hash(file_path)
|
|
|
|
| 232 |
CACHE_DIR.mkdir(exist_ok=True)
|
| 233 |
|
| 234 |
def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]:
|
| 235 |
+
"""Retrieve cached analysis if available - exact prompt match"""
|
| 236 |
file_hash = get_file_hash(file_path)
|
| 237 |
prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
|
| 238 |
cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
|
|
|
|
| 250 |
pass
|
| 251 |
return None
|
| 252 |
|
| 253 |
+
def get_cached_document_content(file_path: str) -> Optional[str]:
|
| 254 |
+
"""Retrieve cached document content for any prompt - document-only match"""
|
| 255 |
+
file_hash = get_file_hash(file_path)
|
| 256 |
+
cache_file = CACHE_DIR / f"{file_hash}_content.json"
|
| 257 |
+
|
| 258 |
+
if cache_file.exists():
|
| 259 |
+
try:
|
| 260 |
+
with open(cache_file, 'r', encoding='utf-8') as f:
|
| 261 |
+
cache_data = json.load(f)
|
| 262 |
+
# Check if file hasn't been modified and cache is still valid (24 hours)
|
| 263 |
+
if (cache_data.get('file_hash') == file_hash and
|
| 264 |
+
time.time() - cache_data.get('cached_at', 0) < 86400): # 24 hours
|
| 265 |
+
return cache_data.get('content')
|
| 266 |
+
except Exception:
|
| 267 |
+
pass
|
| 268 |
+
return None
|
| 269 |
+
|
| 270 |
def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> None:
|
| 271 |
"""Cache analysis results for future use"""
|
| 272 |
file_hash = get_file_hash(file_path)
|
|
|
|
| 285 |
except Exception:
|
| 286 |
pass # Fail silently if caching fails
|
| 287 |
|
| 288 |
+
def cache_document_content(file_path: str, content: str) -> None:
|
| 289 |
+
"""Cache document content for reuse with any prompt"""
|
| 290 |
+
file_hash = get_file_hash(file_path)
|
| 291 |
+
cache_file = CACHE_DIR / f"{file_hash}_content.json"
|
| 292 |
+
|
| 293 |
+
try:
|
| 294 |
+
cache_data = {
|
| 295 |
+
'file_hash': file_hash,
|
| 296 |
+
'content': content,
|
| 297 |
+
'cached_at': time.time()
|
| 298 |
+
}
|
| 299 |
+
with open(cache_file, 'w', encoding='utf-8') as f:
|
| 300 |
+
json.dump(cache_data, f, ensure_ascii=False)
|
| 301 |
+
except Exception:
|
| 302 |
+
pass # Fail silently if caching fails
|
| 303 |
+
|
| 304 |
def get_cached_text(file_path: str) -> Optional[str]:
|
| 305 |
"""Retrieve cached PDF text if available"""
|
| 306 |
file_hash = get_file_hash(file_path)
|