Rulga commited on
Commit
7d60219
·
1 Parent(s): c27741d

Refactor ChatEvaluator class; update dataset path handling, improve error logging, and enhance chat history retrieval

Browse files
Files changed (1) hide show
  1. src/analytics/chat_evaluator.py +35 -16
src/analytics/chat_evaluator.py CHANGED
@@ -6,18 +6,15 @@ import json
6
  import os
7
  import datetime
8
  from typing import List, Dict, Any, Tuple, Optional
9
- import pandas as pd
10
- from src.knowledge_base.dataset import DatasetManager
11
- from huggingface_hub import HfApi
12
  import io
13
  import logging
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
  from config.settings import (
18
  DATASET_ID,
19
- DATASET_CHAT_HISTORY_PATH,
20
- DATASET_ANNOTATIONS_PATH,
21
  HF_TOKEN
22
  )
23
 
@@ -34,9 +31,9 @@ class ChatEvaluator:
34
  self.dataset_id = dataset_id or DATASET_ID
35
  self.api = HfApi(token=self.hf_token)
36
 
37
- # Use dataset paths
38
- self.chat_history_path = DATASET_CHAT_HISTORY_PATH
39
- self.annotations_path = DATASET_ANNOTATIONS_PATH
40
 
41
  # Ensure directories exist in dataset
42
  try:
@@ -52,7 +49,7 @@ class ChatEvaluator:
52
  # Check and create chat history directory
53
  if self.chat_history_path not in files:
54
  self.api.upload_file(
55
- path_or_fileobj=io.StringIO(""),
56
  path_in_repo=f"{self.chat_history_path}/.gitkeep",
57
  repo_id=self.dataset_id,
58
  repo_type="dataset"
@@ -61,7 +58,7 @@ class ChatEvaluator:
61
  # Check and create annotations directory
62
  if self.annotations_path not in files:
63
  self.api.upload_file(
64
- path_or_fileobj=io.StringIO(""),
65
  path_in_repo=f"{self.annotations_path}/.gitkeep",
66
  repo_id=self.dataset_id,
67
  repo_type="dataset"
@@ -71,13 +68,34 @@ class ChatEvaluator:
71
  raise
72
 
73
  def get_chat_history(self) -> List[Dict[str, Any]]:
74
- """
75
- Get all chat history data from dataset
76
- """
77
- success, chat_data = self.dataset_manager.get_chat_history()
78
- if not success or not chat_data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  return []
80
- return chat_data
81
 
82
  def get_qa_pairs_for_evaluation(self, limit: int = 50) -> List[Dict[str, Any]]:
83
  """
@@ -354,3 +372,4 @@ class ChatEvaluator:
354
 
355
 
356
 
 
 
6
  import os
7
  import datetime
8
  from typing import List, Dict, Any, Tuple, Optional
 
 
 
9
  import io
10
  import logging
11
+ from huggingface_hub import HfApi
12
 
13
  logger = logging.getLogger(__name__)
14
 
15
  from config.settings import (
16
  DATASET_ID,
17
+ CHAT_HISTORY_PATH,
 
18
  HF_TOKEN
19
  )
20
 
 
31
  self.dataset_id = dataset_id or DATASET_ID
32
  self.api = HfApi(token=self.hf_token)
33
 
34
+ # Paths in dataset
35
+ self.chat_history_path = CHAT_HISTORY_PATH
36
+ self.annotations_path = "annotations"
37
 
38
  # Ensure directories exist in dataset
39
  try:
 
49
  # Check and create chat history directory
50
  if self.chat_history_path not in files:
51
  self.api.upload_file(
52
+ path_or_fileobj=io.BytesIO(b""),
53
  path_in_repo=f"{self.chat_history_path}/.gitkeep",
54
  repo_id=self.dataset_id,
55
  repo_type="dataset"
 
58
  # Check and create annotations directory
59
  if self.annotations_path not in files:
60
  self.api.upload_file(
61
+ path_or_fileobj=io.BytesIO(b""),
62
  path_in_repo=f"{self.annotations_path}/.gitkeep",
63
  repo_id=self.dataset_id,
64
  repo_type="dataset"
 
68
  raise
69
 
70
  def get_chat_history(self) -> List[Dict[str, Any]]:
71
+ """Get all chat history data from dataset"""
72
+ try:
73
+ chat_data = []
74
+ files = self.api.list_repo_files(self.dataset_id, repo_type="dataset")
75
+
76
+ # Filter chat history files
77
+ chat_files = [f for f in files if f.startswith(f"{self.chat_history_path}/")
78
+ and f.endswith('.json')]
79
+
80
+ for file in chat_files:
81
+ try:
82
+ # Download and parse chat file
83
+ content = self.api.hf_hub_download(
84
+ repo_id=self.dataset_id,
85
+ filename=file,
86
+ repo_type="dataset"
87
+ )
88
+ with open(content, 'r', encoding='utf-8') as f:
89
+ chat = json.load(f)
90
+ chat_data.append(chat)
91
+ except Exception as e:
92
+ logger.error(f"Error loading chat file {file}: {e}")
93
+ continue
94
+
95
+ return chat_data
96
+ except Exception as e:
97
+ logger.error(f"Error getting chat history: {e}")
98
  return []
 
99
 
100
  def get_qa_pairs_for_evaluation(self, limit: int = 50) -> List[Dict[str, Any]]:
101
  """
 
372
 
373
 
374
 
375
+