Rulga commited on
Commit
2e6aca3
·
1 Parent(s): 75bf67b

Refactor annotation saving method to upload JSON as bytes, improve annotation retrieval logic, and enhance error handling for loading annotations

Browse files
Files changed (1) hide show
  1. src/analytics/chat_evaluator.py +28 -44
src/analytics/chat_evaluator.py CHANGED
@@ -239,17 +239,6 @@ class ChatEvaluator:
239
  notes: str = "") -> Tuple[bool, str]:
240
  """
241
  Save evaluation annotation
242
-
243
- Args:
244
- conversation_id: ID of the conversation
245
- question: User question
246
- original_answer: Original bot answer
247
- improved_answer: Improved answer (gold standard)
248
- ratings: Dictionary with ratings for different criteria
249
- notes: Optional evaluator notes
250
-
251
- Returns:
252
- (success, message)
253
  """
254
  try:
255
  # Create annotation object
@@ -266,12 +255,12 @@ class ChatEvaluator:
266
  # Create filename with conversation_id
267
  filename = f"{self.annotations_path}/annotation_{conversation_id}.json"
268
 
269
- # Convert to JSON string
270
- json_content = json.dumps(annotation, ensure_ascii=False, indent=2)
271
 
272
- # Upload to dataset
273
  self.api.upload_file(
274
- path_or_fileobj=io.StringIO(json_content),
275
  path_in_repo=filename,
276
  repo_id=self.dataset_id,
277
  repo_type="dataset"
@@ -329,39 +318,34 @@ class ChatEvaluator:
329
  logger.error(f"Error getting annotations: {e}")
330
  return []
331
 
332
- def get_annotation_by_conversation_id(self, conversation_id: str, force_reload=False) -> Optional[Dict[str, Any]]:
333
  """
334
- Get annotation for a specific conversation
335
-
336
- Args:
337
- conversation_id: Conversation ID to look for
338
- force_reload: If True, force reload from dataset
339
-
340
- Returns:
341
- Annotation object or None if not found
342
  """
343
- # If we have cached annotations and not forcing reload, look there first
344
- if self._annotations is not None and not force_reload:
345
- for annotation in self._annotations:
346
- if annotation.get("conversation_id") == conversation_id:
347
- return annotation
348
-
349
  try:
350
- # Try direct file access
351
- filename = f"{self.annotations_path}/annotation_{conversation_id}.json"
352
-
353
- # Download and parse annotation file
354
- content = self.api.hf_hub_download(
355
- repo_id=self.dataset_id,
356
- filename=filename,
357
- repo_type="dataset"
358
- )
359
-
360
- with open(content, 'r', encoding='utf-8') as f:
361
- return json.load(f)
362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  except Exception as e:
364
- logger.error(f"Error loading annotation for {conversation_id}: {e}")
365
  return None
366
 
367
  def export_training_data(self, output_file: str, min_rating: int = 4) -> Tuple[bool, str]:
@@ -457,4 +441,4 @@ class ChatEvaluator:
457
  improved_count = sum(1 for a in annotations if a.get("original_answer") != a.get("improved_answer"))
458
  metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
459
 
460
- return metrics
 
239
  notes: str = "") -> Tuple[bool, str]:
240
  """
241
  Save evaluation annotation
 
 
 
 
 
 
 
 
 
 
 
242
  """
243
  try:
244
  # Create annotation object
 
255
  # Create filename with conversation_id
256
  filename = f"{self.annotations_path}/annotation_{conversation_id}.json"
257
 
258
+ # Convert to JSON bytes
259
+ json_content = json.dumps(annotation, ensure_ascii=False, indent=2).encode('utf-8')
260
 
261
+ # Upload to dataset using bytes buffer
262
  self.api.upload_file(
263
+ path_or_fileobj=io.BytesIO(json_content),
264
  path_in_repo=filename,
265
  repo_id=self.dataset_id,
266
  repo_type="dataset"
 
318
  logger.error(f"Error getting annotations: {e}")
319
  return []
320
 
321
+ def get_annotation(self, conversation_id: str) -> Optional[Dict[str, Any]]:
322
  """
323
+ Get specific annotation by conversation ID
 
 
 
 
 
 
 
324
  """
 
 
 
 
 
 
325
  try:
326
+ # First check if annotations are loaded
327
+ if self._annotations is not None:
328
+ for annotation in self._annotations:
329
+ if annotation.get("conversation_id") == conversation_id:
330
+ return annotation
 
 
 
 
 
 
 
331
 
332
+ # If not found in cache, try direct file access
333
+ filename = f"{self.annotations_path}/annotation_{conversation_id}.json"
334
+ try:
335
+ content = self.api.hf_hub_download(
336
+ repo_id=self.dataset_id,
337
+ filename=filename,
338
+ repo_type="dataset"
339
+ )
340
+
341
+ with open(content, 'r', encoding='utf-8') as f:
342
+ return json.load(f)
343
+ except Exception as e:
344
+ logger.error(f"Error loading annotation for {conversation_id}: {e}")
345
+ return None
346
+
347
  except Exception as e:
348
+ logger.error(f"Error getting annotation: {e}")
349
  return None
350
 
351
  def export_training_data(self, output_file: str, min_rating: int = 4) -> Tuple[bool, str]:
 
441
  improved_count = sum(1 for a in annotations if a.get("original_answer") != a.get("improved_answer"))
442
  metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
443
 
444
+ return metrics