MugdhaV commited on
Commit
45e3c28
·
1 Parent(s): 16d3d61

feat: Add AppSignal APM for performance monitoring and error tracking

Browse files

- Add appsignal and opentelemetry-instrumentation-starlette dependencies
- Create __appsignal__.py config (reads APPSIGNAL_PUSH_API_KEY from env)
- Instrument Gradio's Starlette app with StarletteInstrumentor for HTTP tracing
- Add custom OpenTelemetry spans to all 4 MCP tool functions in app.py
- Add custom spans to search, indexing, and embedding methods in ai_indexer.py
- Add send_error() reporting in all exception handlers across both files

Files changed (4) hide show
  1. __appsignal__.py +9 -0
  2. ai_indexer.py +162 -137
  3. app.py +134 -96
  4. requirements.txt +2 -0
__appsignal__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from appsignal import Appsignal
2
+
3
+ appsignal = Appsignal(
4
+ active=True,
5
+ name="MediaSearchMCP",
6
+ push_api_key="<from-env>", # APPSIGNAL_PUSH_API_KEY env var overrides this
7
+ environment="production", # APPSIGNAL_APP_ENV env var overrides this
8
+ enable_host_metrics=True,
9
+ )
ai_indexer.py CHANGED
@@ -16,9 +16,12 @@ import os
16
  from pathlib import Path
17
  from typing import List, Dict, Optional
18
  import logging
 
 
19
 
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
 
22
 
23
  class LocalMediaIndexer:
24
  """
@@ -61,6 +64,7 @@ class LocalMediaIndexer:
61
  self.processor = AutoProcessor.from_pretrained(model_name)
62
  logger.info("[OK] Model loaded successfully")
63
  except Exception as e:
 
64
  logger.error(f"[ERROR] Error loading model: {e}")
65
  raise
66
 
@@ -105,6 +109,7 @@ class LocalMediaIndexer:
105
  return frames
106
 
107
  except Exception as e:
 
108
  logger.error(f"[ERROR] Error extracting frames from {video_path}: {e}")
109
  return []
110
 
@@ -118,16 +123,18 @@ class LocalMediaIndexer:
118
  Returns:
119
  Numpy array embedding or None if error
120
  """
121
- try:
122
- inputs = self.processor(images=image, return_tensors="pt").to(self.device)
123
- with torch.no_grad():
124
- image_features = self.model.get_image_features(**inputs)
125
- # Normalize embeddings for cosine similarity
126
- image_features = image_features / image_features.norm(dim=-1, keepdim=True)
127
- return image_features.cpu().numpy().flatten()
128
- except Exception as e:
129
- logger.error(f"[ERROR] Error getting embedding: {e}")
130
- return None
 
 
131
 
132
  def index_local_directory(self, force_reindex: bool = False):
133
  """
@@ -136,105 +143,115 @@ class LocalMediaIndexer:
136
  Args:
137
  force_reindex: If True, rebuild index even if it exists
138
  """
139
- index_path = os.path.join(self.index_dir, "media_index.faiss")
140
- paths_path = os.path.join(self.index_dir, "file_paths.json")
141
- metadata_path = os.path.join(self.index_dir, "file_metadata.json")
142
-
143
- # Try to load existing index
144
- if not force_reindex and os.path.exists(index_path):
145
- logger.info("📂 Loading existing index...")
146
- if self.load_index():
147
- logger.info(f"[OK] Loaded index with {len(self.file_paths)} files")
148
- return
149
 
150
- logger.info(f"🔨 Building new index from: {self.media_dir}")
151
- embeddings = []
152
- self.file_paths = []
153
- self.file_metadata = {}
154
 
155
- # Supported file extensions
156
- image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
157
- video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.wmv', '.flv'}
158
-
159
- # Walk through local directory
160
- file_count = 0
161
- for root, dirs, files in os.walk(self.media_dir):
162
- for file in sorted(files):
163
- file_path = os.path.join(root, file)
164
- ext = Path(file).suffix.lower()
165
-
166
- # Get file stats
167
- try:
168
- stat = os.stat(file_path)
169
- file_size = stat.st_size
170
- except Exception as e:
171
- logger.warning(f"[WARNING] Cannot access {file}: {e}")
172
- continue
173
-
174
- try:
175
- if ext in image_exts:
176
- # Process image
177
- logger.info(f"📸 Processing image: {file}")
178
- image = Image.open(file_path).convert('RGB')
179
- embedding = self.get_image_embedding(image)
180
-
181
- if embedding is not None:
182
- embeddings.append(embedding)
183
- self.file_paths.append(file_path)
184
- self.file_metadata[file_path] = {
185
- 'type': 'image',
186
- 'size': file_size,
187
- 'name': file
188
- }
189
- file_count += 1
190
- logger.info(f" [OK] Indexed image: {file}")
191
-
192
- elif ext in video_exts:
193
- # Process video frames
194
- logger.info(f"🎬 Processing video: {file}")
195
- frames = self.extract_video_frames(file_path)
196
-
197
- if frames:
198
- # Use average of frame embeddings
199
- frame_embeddings = []
200
- for frame in frames:
201
- emb = self.get_image_embedding(frame)
202
- if emb is not None:
203
- frame_embeddings.append(emb)
204
-
205
- if frame_embeddings:
206
- avg_embedding = np.mean(frame_embeddings, axis=0)
207
- embeddings.append(avg_embedding)
208
  self.file_paths.append(file_path)
209
  self.file_metadata[file_path] = {
210
- 'type': 'video',
211
  'size': file_size,
212
- 'name': file,
213
- 'frames_indexed': len(frames)
214
  }
215
  file_count += 1
216
- logger.info(f" [OK] Indexed video: {file}")
217
-
218
- except Exception as e:
219
- logger.error(f"[ERROR] Error processing {file}: {e}")
220
-
221
- if not embeddings:
222
- logger.warning("[WARNING] No media files found to index!")
223
- logger.warning(f" Check that {self.media_dir} contains images or videos")
224
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- # Create FAISS index
227
- logger.info("🔨 Building FAISS index...")
228
- embeddings_array = np.array(embeddings).astype('float32')
229
- dimension = embeddings_array.shape[1]
230
 
231
- # Use IndexFlatIP for cosine similarity (embeddings already normalized)
232
- self.index = faiss.IndexFlatIP(dimension)
233
- self.index.add(embeddings_array)
234
 
235
- # Save index locally
236
- self.save_index()
237
- logger.info(f"[OK] Successfully indexed {len(self.file_paths)} files")
 
 
238
 
239
  def search(self, query: str, top_k: int = 5) -> List[Dict]:
240
  """
@@ -247,46 +264,52 @@ class LocalMediaIndexer:
247
  Returns:
248
  List of results with file paths and similarity scores
249
  """
250
- if self.index is None:
251
- logger.error("[ERROR] Index not loaded! Run index_local_directory() first")
252
- return []
253
 
254
- try:
255
- logger.info(f"🔍 Searching for: '{query}'")
256
-
257
- # Get query embedding (computed locally)
258
- inputs = self.processor(text=[query], return_tensors="pt").to(self.device)
259
- with torch.no_grad():
260
- text_features = self.model.get_text_features(**inputs)
261
-
262
- # Normalize for cosine similarity
263
- text_features = text_features / text_features.norm(dim=-1, keepdim=True)
264
- query_embedding = text_features.cpu().numpy().astype('float32')
265
-
266
- # Search locally
267
- top_k = min(top_k, len(self.file_paths))
268
- scores, indices = self.index.search(query_embedding, top_k)
269
-
270
- results = []
271
- for score, idx in zip(scores[0], indices[0]):
272
- if idx < len(self.file_paths):
273
- file_path = self.file_paths[idx]
274
- metadata = self.file_metadata.get(file_path, {})
275
-
276
- results.append({
277
- 'file_path': file_path,
278
- 'file_name': os.path.basename(file_path),
279
- 'similarity_score': float(score),
280
- 'media_type': metadata.get('type', 'unknown'),
281
- 'file_size_mb': round(metadata.get('size', 0) / (1024*1024), 2)
282
- })
283
-
284
- logger.info(f" [OK] Found {len(results)} results")
285
- return results
286
 
287
- except Exception as e:
288
- logger.error(f"[ERROR] Search error: {e}")
289
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  def save_index(self):
292
  """Save index to local disk"""
@@ -306,6 +329,7 @@ class LocalMediaIndexer:
306
  logger.info(f"💾 Index saved to {self.index_dir}")
307
 
308
  except Exception as e:
 
309
  logger.error(f"[ERROR] Error saving index: {e}")
310
 
311
  def load_index(self) -> bool:
@@ -330,5 +354,6 @@ class LocalMediaIndexer:
330
  return False
331
 
332
  except Exception as e:
 
333
  logger.error(f"[ERROR] Error loading index: {e}")
334
  return False
 
16
  from pathlib import Path
17
  from typing import List, Dict, Optional
18
  import logging
19
+ from opentelemetry import trace
20
+ from appsignal import send_error
21
 
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
+ tracer = trace.get_tracer(__name__)
25
 
26
  class LocalMediaIndexer:
27
  """
 
64
  self.processor = AutoProcessor.from_pretrained(model_name)
65
  logger.info("[OK] Model loaded successfully")
66
  except Exception as e:
67
+ send_error(e)
68
  logger.error(f"[ERROR] Error loading model: {e}")
69
  raise
70
 
 
109
  return frames
110
 
111
  except Exception as e:
112
+ send_error(e)
113
  logger.error(f"[ERROR] Error extracting frames from {video_path}: {e}")
114
  return []
115
 
 
123
  Returns:
124
  Numpy array embedding or None if error
125
  """
126
+ with tracer.start_as_current_span("get_image_embedding") as span:
127
+ try:
128
+ inputs = self.processor(images=image, return_tensors="pt").to(self.device)
129
+ with torch.no_grad():
130
+ image_features = self.model.get_image_features(**inputs)
131
+ # Normalize embeddings for cosine similarity
132
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
133
+ return image_features.cpu().numpy().flatten()
134
+ except Exception as e:
135
+ send_error(e)
136
+ logger.error(f"[ERROR] Error getting embedding: {e}")
137
+ return None
138
 
139
  def index_local_directory(self, force_reindex: bool = False):
140
  """
 
143
  Args:
144
  force_reindex: If True, rebuild index even if it exists
145
  """
146
+ with tracer.start_as_current_span("index_local_directory") as span:
147
+ span.set_attribute("index.force_reindex", force_reindex)
148
+ span.set_attribute("index.media_dir", self.media_dir)
 
 
 
 
 
 
 
149
 
150
+ index_path = os.path.join(self.index_dir, "media_index.faiss")
151
+ paths_path = os.path.join(self.index_dir, "file_paths.json")
152
+ metadata_path = os.path.join(self.index_dir, "file_metadata.json")
 
153
 
154
+ # Try to load existing index
155
+ if not force_reindex and os.path.exists(index_path):
156
+ logger.info("📂 Loading existing index...")
157
+ if self.load_index():
158
+ logger.info(f"[OK] Loaded index with {len(self.file_paths)} files")
159
+ span.set_attribute("index.loaded_from_cache", True)
160
+ span.set_attribute("index.file_count", len(self.file_paths))
161
+ return
162
+
163
+ logger.info(f"🔨 Building new index from: {self.media_dir}")
164
+ embeddings = []
165
+ self.file_paths = []
166
+ self.file_metadata = {}
167
+
168
+ # Supported file extensions
169
+ image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
170
+ video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.wmv', '.flv'}
171
+
172
+ # Walk through local directory
173
+ file_count = 0
174
+ for root, dirs, files in os.walk(self.media_dir):
175
+ for file in sorted(files):
176
+ file_path = os.path.join(root, file)
177
+ ext = Path(file).suffix.lower()
178
+
179
+ # Get file stats
180
+ try:
181
+ stat = os.stat(file_path)
182
+ file_size = stat.st_size
183
+ except Exception as e:
184
+ logger.warning(f"[WARNING] Cannot access {file}: {e}")
185
+ continue
186
+
187
+ try:
188
+ if ext in image_exts:
189
+ # Process image
190
+ logger.info(f"📸 Processing image: {file}")
191
+ image = Image.open(file_path).convert('RGB')
192
+ embedding = self.get_image_embedding(image)
193
+
194
+ if embedding is not None:
195
+ embeddings.append(embedding)
 
 
 
 
 
 
 
 
 
 
 
196
  self.file_paths.append(file_path)
197
  self.file_metadata[file_path] = {
198
+ 'type': 'image',
199
  'size': file_size,
200
+ 'name': file
 
201
  }
202
  file_count += 1
203
+ logger.info(f" [OK] Indexed image: {file}")
204
+
205
+ elif ext in video_exts:
206
+ # Process video frames
207
+ logger.info(f"🎬 Processing video: {file}")
208
+ frames = self.extract_video_frames(file_path)
209
+
210
+ if frames:
211
+ # Use average of frame embeddings
212
+ frame_embeddings = []
213
+ for frame in frames:
214
+ emb = self.get_image_embedding(frame)
215
+ if emb is not None:
216
+ frame_embeddings.append(emb)
217
+
218
+ if frame_embeddings:
219
+ avg_embedding = np.mean(frame_embeddings, axis=0)
220
+ embeddings.append(avg_embedding)
221
+ self.file_paths.append(file_path)
222
+ self.file_metadata[file_path] = {
223
+ 'type': 'video',
224
+ 'size': file_size,
225
+ 'name': file,
226
+ 'frames_indexed': len(frames)
227
+ }
228
+ file_count += 1
229
+ logger.info(f" [OK] Indexed video: {file}")
230
+
231
+ except Exception as e:
232
+ send_error(e)
233
+ logger.error(f"[ERROR] Error processing {file}: {e}")
234
+
235
+ if not embeddings:
236
+ logger.warning("[WARNING] No media files found to index!")
237
+ logger.warning(f" Check that {self.media_dir} contains images or videos")
238
+ span.set_attribute("index.file_count", 0)
239
+ return
240
 
241
+ # Create FAISS index
242
+ logger.info("🔨 Building FAISS index...")
243
+ embeddings_array = np.array(embeddings).astype('float32')
244
+ dimension = embeddings_array.shape[1]
245
 
246
+ # Use IndexFlatIP for cosine similarity (embeddings already normalized)
247
+ self.index = faiss.IndexFlatIP(dimension)
248
+ self.index.add(embeddings_array)
249
 
250
+ # Save index locally
251
+ self.save_index()
252
+ span.set_attribute("index.loaded_from_cache", False)
253
+ span.set_attribute("index.file_count", len(self.file_paths))
254
+ logger.info(f"[OK] Successfully indexed {len(self.file_paths)} files")
255
 
256
  def search(self, query: str, top_k: int = 5) -> List[Dict]:
257
  """
 
264
  Returns:
265
  List of results with file paths and similarity scores
266
  """
267
+ with tracer.start_as_current_span("indexer_search") as span:
268
+ span.set_attribute("search.query", query)
269
+ span.set_attribute("search.top_k", top_k)
270
 
271
+ if self.index is None:
272
+ logger.error("[ERROR] Index not loaded! Run index_local_directory() first")
273
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ try:
276
+ logger.info(f"🔍 Searching for: '{query}'")
277
+
278
+ # Get query embedding (computed locally)
279
+ inputs = self.processor(text=[query], return_tensors="pt").to(self.device)
280
+ with torch.no_grad():
281
+ text_features = self.model.get_text_features(**inputs)
282
+
283
+ # Normalize for cosine similarity
284
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
285
+ query_embedding = text_features.cpu().numpy().astype('float32')
286
+
287
+ # Search locally
288
+ top_k = min(top_k, len(self.file_paths))
289
+ scores, indices = self.index.search(query_embedding, top_k)
290
+
291
+ results = []
292
+ for score, idx in zip(scores[0], indices[0]):
293
+ if idx < len(self.file_paths):
294
+ file_path = self.file_paths[idx]
295
+ metadata = self.file_metadata.get(file_path, {})
296
+
297
+ results.append({
298
+ 'file_path': file_path,
299
+ 'file_name': os.path.basename(file_path),
300
+ 'similarity_score': float(score),
301
+ 'media_type': metadata.get('type', 'unknown'),
302
+ 'file_size_mb': round(metadata.get('size', 0) / (1024*1024), 2)
303
+ })
304
+
305
+ span.set_attribute("search.result_count", len(results))
306
+ logger.info(f" [OK] Found {len(results)} results")
307
+ return results
308
+
309
+ except Exception as e:
310
+ send_error(e)
311
+ logger.error(f"[ERROR] Search error: {e}")
312
+ return []
313
 
314
  def save_index(self):
315
  """Save index to local disk"""
 
329
  logger.info(f"💾 Index saved to {self.index_dir}")
330
 
331
  except Exception as e:
332
+ send_error(e)
333
  logger.error(f"[ERROR] Error saving index: {e}")
334
 
335
  def load_index(self) -> bool:
 
354
  return False
355
 
356
  except Exception as e:
357
+ send_error(e)
358
  logger.error(f"[ERROR] Error loading index: {e}")
359
  return False
app.py CHANGED
@@ -21,6 +21,16 @@ from ai_indexer import LocalMediaIndexer
21
  import logging
22
  from huggingface_hub import snapshot_download
23
 
 
 
 
 
 
 
 
 
 
 
24
  # Load environment variables
25
  load_dotenv()
26
 
@@ -61,6 +71,7 @@ if IS_HUGGINGFACE_SPACE or not os.path.exists(MEDIA_DIR) or not os.listdir(MEDIA
61
  )
62
  logger.info("Media files downloaded successfully")
63
  except Exception as e:
 
64
  logger.error(f"Failed to download media files: {e}")
65
  logger.warning("Continuing without demo media files")
66
 
@@ -77,6 +88,7 @@ try:
77
  indexer.index_local_directory(force_reindex=False)
78
 
79
  except Exception as e:
 
80
  logger.error(f"❌ Failed to initialize indexer: {e}")
81
  raise
82
 
@@ -98,33 +110,40 @@ def semantic_search(query: str, media_type: str = "all", top_k: int = 5) -> str:
98
  Returns:
99
  JSON with ranked search results and similarity scores
100
  """
101
- try:
102
- if not query or not query.strip():
103
- return json.dumps({"error": "Query cannot be empty"}, indent=2)
104
-
105
- top_k = max(1, min(20, int(top_k)))
106
-
107
- # Perform semantic search (locally)
108
- logger.info(f"Searching for: '{query}'")
109
- results = indexer.search(query, top_k=top_k)
110
-
111
- # Filter by media type
112
- if media_type != "all":
113
- results = [r for r in results if r.get('media_type') == media_type]
114
-
115
- response = {
116
- "query": query,
117
- "media_type": media_type,
118
- "count": len(results),
119
- "results": results,
120
- "note": "All processing done locally on your machine"
121
- }
122
-
123
- return json.dumps(response, indent=2)
124
-
125
- except Exception as e:
126
- logger.error(f"[ERROR] Search error: {e}")
127
- return json.dumps({"error": str(e)}, indent=2)
 
 
 
 
 
 
 
128
 
129
  def get_media_details(file_path: str) -> str:
130
  """
@@ -136,33 +155,38 @@ def get_media_details(file_path: str) -> str:
136
  Returns:
137
  JSON with file details and metadata
138
  """
139
- try:
140
- if not os.path.exists(file_path):
141
- return json.dumps({"error": f"File not found: {file_path}"}, indent=2)
142
-
143
- stat = os.stat(file_path)
144
- ext = Path(file_path).suffix.lower()
145
-
146
- image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
147
- video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.wmv', '.flv'}
148
-
149
- media_type = "image" if ext in image_exts else "video" if ext in video_exts else "unknown"
150
-
151
- details = {
152
- "file_path": file_path,
153
- "file_name": os.path.basename(file_path),
154
- "media_type": media_type,
155
- "file_size_mb": round(stat.st_size / (1024*1024), 2),
156
- "extension": ext,
157
- "exists": True,
158
- "in_index": file_path in indexer.file_paths
159
- }
160
-
161
- return json.dumps(details, indent=2)
162
-
163
- except Exception as e:
164
- logger.error(f"[ERROR] Error getting details: {e}")
165
- return json.dumps({"error": str(e)}, indent=2)
 
 
 
 
 
166
 
167
  def reindex_media(force: bool = False) -> str:
168
  """
@@ -175,22 +199,27 @@ def reindex_media(force: bool = False) -> str:
175
  Returns:
176
  Status message
177
  """
178
- try:
179
- logger.info("Starting reindex...")
180
- indexer.index_local_directory(force_reindex=force)
181
-
182
- response = {
183
- "status": "success",
184
- "message": f"Reindexed {len(indexer.file_paths)} files",
185
- "media_directory": MEDIA_DIR,
186
- "index_directory": INDEX_DIR
187
- }
188
-
189
- return json.dumps(response, indent=2)
190
-
191
- except Exception as e:
192
- logger.error(f"[ERROR] Reindex error: {e}")
193
- return json.dumps({"error": str(e)}, indent=2)
 
 
 
 
 
194
 
195
  def get_index_stats() -> str:
196
  """
@@ -199,32 +228,38 @@ def get_index_stats() -> str:
199
  Returns:
200
  JSON with index statistics
201
  """
202
- try:
203
- image_count = sum(1 for fp in indexer.file_paths
204
- if indexer.file_metadata.get(fp, {}).get('type') == 'image')
205
- video_count = sum(1 for fp in indexer.file_paths
206
- if indexer.file_metadata.get(fp, {}).get('type') == 'video')
207
-
208
- total_size = sum(meta.get('size', 0)
209
- for meta in indexer.file_metadata.values())
210
-
211
- stats = {
212
- "total_files": len(indexer.file_paths),
213
- "images": image_count,
214
- "videos": video_count,
215
- "total_size_mb": round(total_size / (1024*1024), 2),
216
- "media_directory": MEDIA_DIR,
217
- "index_directory": INDEX_DIR,
218
- "model_device": indexer.device,
219
- "model_used": "google/siglip-base-patch16-224",
220
- "privacy_note": "All data processed locally - nothing sent to cloud"
221
- }
222
-
223
- return json.dumps(stats, indent=2)
224
-
225
- except Exception as e:
226
- logger.error(f"[ERROR] Error getting stats: {e}")
227
- return json.dumps({"error": str(e)}, indent=2)
 
 
 
 
 
 
228
 
229
  # ============================================================================
230
  # UI Helper Functions
@@ -671,4 +706,7 @@ if __name__ == "__main__":
671
  launch_kwargs["server_port"] = 7860
672
  launch_kwargs["share"] = False
673
 
 
 
 
674
  demo.launch(**launch_kwargs)
 
21
  import logging
22
  from huggingface_hub import snapshot_download
23
 
24
+ # --- AppSignal APM ---
25
+ import appsignal
26
+ from appsignal import set_category, send_error
27
+ from opentelemetry.instrumentation.starlette import StarletteInstrumentor
28
+ from opentelemetry import trace
29
+
30
+ appsignal.start()
31
+
32
+ tracer = trace.get_tracer(__name__)
33
+
34
  # Load environment variables
35
  load_dotenv()
36
 
 
71
  )
72
  logger.info("Media files downloaded successfully")
73
  except Exception as e:
74
+ send_error(e)
75
  logger.error(f"Failed to download media files: {e}")
76
  logger.warning("Continuing without demo media files")
77
 
 
88
  indexer.index_local_directory(force_reindex=False)
89
 
90
  except Exception as e:
91
+ send_error(e)
92
  logger.error(f"❌ Failed to initialize indexer: {e}")
93
  raise
94
 
 
110
  Returns:
111
  JSON with ranked search results and similarity scores
112
  """
113
+ with tracer.start_as_current_span("semantic_search") as span:
114
+ set_category("mcp_tool.semantic_search")
115
+ span.set_attribute("search.query", query or "")
116
+ span.set_attribute("search.media_type", media_type)
117
+ span.set_attribute("search.top_k", top_k)
118
+ try:
119
+ if not query or not query.strip():
120
+ return json.dumps({"error": "Query cannot be empty"}, indent=2)
121
+
122
+ top_k = max(1, min(20, int(top_k)))
123
+
124
+ # Perform semantic search (locally)
125
+ logger.info(f"Searching for: '{query}'")
126
+ results = indexer.search(query, top_k=top_k)
127
+
128
+ # Filter by media type
129
+ if media_type != "all":
130
+ results = [r for r in results if r.get('media_type') == media_type]
131
+
132
+ response = {
133
+ "query": query,
134
+ "media_type": media_type,
135
+ "count": len(results),
136
+ "results": results,
137
+ "note": "All processing done locally on your machine"
138
+ }
139
+
140
+ span.set_attribute("search.result_count", len(results))
141
+ return json.dumps(response, indent=2)
142
+
143
+ except Exception as e:
144
+ send_error(e)
145
+ logger.error(f"[ERROR] Search error: {e}")
146
+ return json.dumps({"error": str(e)}, indent=2)
147
 
148
  def get_media_details(file_path: str) -> str:
149
  """
 
155
  Returns:
156
  JSON with file details and metadata
157
  """
158
+ with tracer.start_as_current_span("get_media_details") as span:
159
+ set_category("mcp_tool.get_media_details")
160
+ span.set_attribute("media.file_path", file_path or "")
161
+ try:
162
+ if not os.path.exists(file_path):
163
+ return json.dumps({"error": f"File not found: {file_path}"}, indent=2)
164
+
165
+ stat = os.stat(file_path)
166
+ ext = Path(file_path).suffix.lower()
167
+
168
+ image_exts = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'}
169
+ video_exts = {'.mp4', '.mov', '.avi', '.mkv', '.wmv', '.flv'}
170
+
171
+ media_type = "image" if ext in image_exts else "video" if ext in video_exts else "unknown"
172
+
173
+ details = {
174
+ "file_path": file_path,
175
+ "file_name": os.path.basename(file_path),
176
+ "media_type": media_type,
177
+ "file_size_mb": round(stat.st_size / (1024*1024), 2),
178
+ "extension": ext,
179
+ "exists": True,
180
+ "in_index": file_path in indexer.file_paths
181
+ }
182
+
183
+ span.set_attribute("media.type", media_type)
184
+ return json.dumps(details, indent=2)
185
+
186
+ except Exception as e:
187
+ send_error(e)
188
+ logger.error(f"[ERROR] Error getting details: {e}")
189
+ return json.dumps({"error": str(e)}, indent=2)
190
 
191
  def reindex_media(force: bool = False) -> str:
192
  """
 
199
  Returns:
200
  Status message
201
  """
202
+ with tracer.start_as_current_span("reindex_media") as span:
203
+ set_category("mcp_tool.reindex_media")
204
+ span.set_attribute("reindex.force", force)
205
+ try:
206
+ logger.info("Starting reindex...")
207
+ indexer.index_local_directory(force_reindex=force)
208
+
209
+ response = {
210
+ "status": "success",
211
+ "message": f"Reindexed {len(indexer.file_paths)} files",
212
+ "media_directory": MEDIA_DIR,
213
+ "index_directory": INDEX_DIR
214
+ }
215
+
216
+ span.set_attribute("reindex.file_count", len(indexer.file_paths))
217
+ return json.dumps(response, indent=2)
218
+
219
+ except Exception as e:
220
+ send_error(e)
221
+ logger.error(f"[ERROR] Reindex error: {e}")
222
+ return json.dumps({"error": str(e)}, indent=2)
223
 
224
  def get_index_stats() -> str:
225
  """
 
228
  Returns:
229
  JSON with index statistics
230
  """
231
+ with tracer.start_as_current_span("get_index_stats") as span:
232
+ set_category("mcp_tool.get_index_stats")
233
+ try:
234
+ image_count = sum(1 for fp in indexer.file_paths
235
+ if indexer.file_metadata.get(fp, {}).get('type') == 'image')
236
+ video_count = sum(1 for fp in indexer.file_paths
237
+ if indexer.file_metadata.get(fp, {}).get('type') == 'video')
238
+
239
+ total_size = sum(meta.get('size', 0)
240
+ for meta in indexer.file_metadata.values())
241
+
242
+ stats = {
243
+ "total_files": len(indexer.file_paths),
244
+ "images": image_count,
245
+ "videos": video_count,
246
+ "total_size_mb": round(total_size / (1024*1024), 2),
247
+ "media_directory": MEDIA_DIR,
248
+ "index_directory": INDEX_DIR,
249
+ "model_device": indexer.device,
250
+ "model_used": "google/siglip-base-patch16-224",
251
+ "privacy_note": "All data processed locally - nothing sent to cloud"
252
+ }
253
+
254
+ span.set_attribute("stats.total_files", len(indexer.file_paths))
255
+ span.set_attribute("stats.images", image_count)
256
+ span.set_attribute("stats.videos", video_count)
257
+ return json.dumps(stats, indent=2)
258
+
259
+ except Exception as e:
260
+ send_error(e)
261
+ logger.error(f"[ERROR] Error getting stats: {e}")
262
+ return json.dumps({"error": str(e)}, indent=2)
263
 
264
  # ============================================================================
265
  # UI Helper Functions
 
706
  launch_kwargs["server_port"] = 7860
707
  launch_kwargs["share"] = False
708
 
709
+ # Instrument Gradio's internal Starlette/FastAPI app (per AppSignal docs)
710
+ StarletteInstrumentor().instrument_app(demo.app)
711
+
712
  demo.launch(**launch_kwargs)
requirements.txt CHANGED
@@ -8,3 +8,5 @@ faiss-cpu>=1.7.4
8
  python-dotenv>=1.0.0
9
  sentencepiece>=0.2.0
10
  huggingface_hub>=0.20.0
 
 
 
8
  python-dotenv>=1.0.0
9
  sentencepiece>=0.2.0
10
  huggingface_hub>=0.20.0
11
+ appsignal
12
+ opentelemetry-instrumentation-starlette