JatsTheAIGen commited on
Commit
5134f75
Β·
1 Parent(s): f36fcc8

Improve caching system: Enable document-level caching for better UX

Browse files

- Added document-level caching that works across different prompts
- Users can now analyze the same document with multiple prompts efficiently
- Cached document content speeds up subsequent analyses with new prompts
- Enhanced user feedback to show when cached content is being used
- Maintains exact prompt+document caching for instant responses
- Better UX for users exploring the same document with different questions

Files changed (3) hide show
  1. agents.py +14 -6
  2. app.py +13 -4
  3. utils/__init__.py +34 -1
agents.py CHANGED
@@ -5,7 +5,7 @@ import logging
5
  from typing import Optional, Dict, Any, List, AsyncGenerator
6
  import time
7
 
8
- from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata, get_cached_analysis, cache_analysis
9
  from utils.visual_output import VisualOutputGenerator
10
  from config import Config
11
 
@@ -41,19 +41,27 @@ class AnalysisAgent(BaseAgent):
41
  async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
42
  start_time = time.time()
43
 
44
- # Check cache first
45
  if file_path:
46
  cached_result = get_cached_analysis(file_path, prompt)
47
  if cached_result:
48
- logger.info(f"Returning cached analysis for {file_path}")
49
  return cached_result
50
 
51
  if file_path:
52
  # Get document metadata
53
  metadata = get_document_metadata(file_path)
54
 
55
- # Load text with caching
56
- text = load_pdf_text_cached(file_path)
 
 
 
 
 
 
 
 
57
 
58
  # Check if document needs chunking
59
  if len(text) > Config.CHUNK_SIZE:
@@ -66,7 +74,7 @@ class AnalysisAgent(BaseAgent):
66
  metadata = {}
67
  result = await self._process_content(prompt, content, metadata, "")
68
 
69
- # Cache the result
70
  if file_path:
71
  cache_analysis(file_path, prompt, result)
72
 
 
5
  from typing import Optional, Dict, Any, List, AsyncGenerator
6
  import time
7
 
8
+ from utils import call_openai_chat, load_pdf_text_cached, load_pdf_text_chunked, get_document_metadata, get_cached_analysis, cache_analysis, get_cached_document_content, cache_document_content
9
  from utils.visual_output import VisualOutputGenerator
10
  from config import Config
11
 
 
41
  async def handle(self, user_id: str, prompt: str, file_path: Optional[str] = None, context: Optional[Dict[str, Any]] = None):
42
  start_time = time.time()
43
 
44
+ # Check cache first - exact prompt match
45
  if file_path:
46
  cached_result = get_cached_analysis(file_path, prompt)
47
  if cached_result:
48
+ logger.info(f"Returning cached analysis for {file_path} with exact prompt match")
49
  return cached_result
50
 
51
  if file_path:
52
  # Get document metadata
53
  metadata = get_document_metadata(file_path)
54
 
55
+ # Check for cached document content (any prompt)
56
+ cached_content = get_cached_document_content(file_path)
57
+ if cached_content:
58
+ logger.info(f"Using cached document content for {file_path}")
59
+ text = cached_content
60
+ else:
61
+ # Load and cache text
62
+ text = load_pdf_text_cached(file_path)
63
+ cache_document_content(file_path, text)
64
+ logger.info(f"Cached document content for {file_path}")
65
 
66
  # Check if document needs chunking
67
  if len(text) > Config.CHUNK_SIZE:
 
74
  metadata = {}
75
  result = await self._process_content(prompt, content, metadata, "")
76
 
77
+ # Cache the analysis result
78
  if file_path:
79
  cache_analysis(file_path, prompt, result)
80
 
app.py CHANGED
@@ -90,15 +90,20 @@ def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
90
  path = save_uploaded_file(file, username)
91
 
92
  # Check if this is a cached result
93
- from utils import get_cached_analysis
94
  cached_result = get_cached_analysis(path, prompt)
 
95
 
96
  if cached_result:
97
- status = "⚑ **Cached Result** - Instant response from previous analysis"
98
  result = cached_result.get("analysis", "No analysis result.")
99
  metadata = cached_result.get("metadata", {})
100
  else:
101
- status = "πŸ”„ **Processing** - Analyzing document with AI..."
 
 
 
 
102
  result = run_async(
103
  ORCHESTRATOR.handle_user_prompt,
104
  user_id=username,
@@ -108,7 +113,11 @@ def handle_analysis(file, prompt, username="anonymous", use_streaming=False):
108
  )
109
  result = result.get("analysis", "No analysis result.")
110
  metadata = result.get("metadata", {}) if isinstance(result, dict) else {}
111
- status = "βœ… **Analysis Complete** - Fresh analysis generated"
 
 
 
 
112
 
113
  return result, status, metadata
114
  except Exception as e:
 
90
  path = save_uploaded_file(file, username)
91
 
92
  # Check if this is a cached result
93
+ from utils import get_cached_analysis, get_cached_document_content
94
  cached_result = get_cached_analysis(path, prompt)
95
+ cached_content = get_cached_document_content(path)
96
 
97
  if cached_result:
98
+ status = "⚑ **Cached Analysis** - Instant response from previous analysis"
99
  result = cached_result.get("analysis", "No analysis result.")
100
  metadata = cached_result.get("metadata", {})
101
  else:
102
+ if cached_content:
103
+ status = "πŸ”„ **Processing** - Using cached document, analyzing with new prompt..."
104
+ else:
105
+ status = "πŸ”„ **Processing** - Analyzing document with AI..."
106
+
107
  result = run_async(
108
  ORCHESTRATOR.handle_user_prompt,
109
  user_id=username,
 
113
  )
114
  result = result.get("analysis", "No analysis result.")
115
  metadata = result.get("metadata", {}) if isinstance(result, dict) else {}
116
+
117
+ if cached_content:
118
+ status = "βœ… **Analysis Complete** - Fresh analysis using cached document"
119
+ else:
120
+ status = "βœ… **Analysis Complete** - Fresh analysis generated and cached"
121
 
122
  return result, status, metadata
123
  except Exception as e:
utils/__init__.py CHANGED
@@ -232,7 +232,7 @@ CACHE_DIR = Path(tempfile.gettempdir()) / "pdf_analysis_cache"
232
  CACHE_DIR.mkdir(exist_ok=True)
233
 
234
  def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]:
235
- """Retrieve cached analysis if available"""
236
  file_hash = get_file_hash(file_path)
237
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
238
  cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
@@ -250,6 +250,23 @@ def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]
250
  pass
251
  return None
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> None:
254
  """Cache analysis results for future use"""
255
  file_hash = get_file_hash(file_path)
@@ -268,6 +285,22 @@ def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> Non
268
  except Exception:
269
  pass # Fail silently if caching fails
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  def get_cached_text(file_path: str) -> Optional[str]:
272
  """Retrieve cached PDF text if available"""
273
  file_hash = get_file_hash(file_path)
 
232
  CACHE_DIR.mkdir(exist_ok=True)
233
 
234
  def get_cached_analysis(file_path: str, prompt: str) -> Optional[Dict[str, Any]]:
235
+ """Retrieve cached analysis if available - exact prompt match"""
236
  file_hash = get_file_hash(file_path)
237
  prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
238
  cache_file = CACHE_DIR / f"{file_hash}_{prompt_hash}.json"
 
250
  pass
251
  return None
252
 
253
+ def get_cached_document_content(file_path: str) -> Optional[str]:
254
+ """Retrieve cached document content for any prompt - document-only match"""
255
+ file_hash = get_file_hash(file_path)
256
+ cache_file = CACHE_DIR / f"{file_hash}_content.json"
257
+
258
+ if cache_file.exists():
259
+ try:
260
+ with open(cache_file, 'r', encoding='utf-8') as f:
261
+ cache_data = json.load(f)
262
+ # Check if file hasn't been modified and cache is still valid (24 hours)
263
+ if (cache_data.get('file_hash') == file_hash and
264
+ time.time() - cache_data.get('cached_at', 0) < 86400): # 24 hours
265
+ return cache_data.get('content')
266
+ except Exception:
267
+ pass
268
+ return None
269
+
270
  def cache_analysis(file_path: str, prompt: str, analysis: Dict[str, Any]) -> None:
271
  """Cache analysis results for future use"""
272
  file_hash = get_file_hash(file_path)
 
285
  except Exception:
286
  pass # Fail silently if caching fails
287
 
288
+ def cache_document_content(file_path: str, content: str) -> None:
289
+ """Cache document content for reuse with any prompt"""
290
+ file_hash = get_file_hash(file_path)
291
+ cache_file = CACHE_DIR / f"{file_hash}_content.json"
292
+
293
+ try:
294
+ cache_data = {
295
+ 'file_hash': file_hash,
296
+ 'content': content,
297
+ 'cached_at': time.time()
298
+ }
299
+ with open(cache_file, 'w', encoding='utf-8') as f:
300
+ json.dump(cache_data, f, ensure_ascii=False)
301
+ except Exception:
302
+ pass # Fail silently if caching fails
303
+
304
  def get_cached_text(file_path: str) -> Optional[str]:
305
  """Retrieve cached PDF text if available"""
306
  file_hash = get_file_hash(file_path)