Rulga commited on
Commit
4adde24
·
1 Parent(s): 7726728

Refactor chat history retrieval; remove local file reading and debug print statements for cleaner code

Browse files
Files changed (2) hide show
  1. app.py +115 -128
  2. src/analytics/chat_evaluator.py +7 -31
app.py CHANGED
@@ -935,143 +935,130 @@ with gr.Blocks() as demo:
935
  outputs=[analysis_output]
936
  )
937
 
938
- with gr.Tab("Chat Evaluation"):
939
- gr.Markdown("### Evaluation of Chat Responses")
940
-
 
 
 
941
  with gr.Row():
942
- with gr.Column(scale=2):
943
- show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
944
- qa_table = gr.DataFrame(
945
- get_qa_pairs_dataframe(chat_evaluator),
946
- interactive=False,
947
- column_config={
948
- "ID": {
949
- "editable": False,
950
- },
951
- "Question": {
952
- "editable": False,
953
- },
954
- "Answer": {
955
- "editable": False,
956
- },
957
- "Evaluated": {
958
- "editable": False,
959
- }
960
- }
961
- )
962
-
963
- gr.Markdown("### Select Conversation to Evaluate")
964
- selected_conversation = gr.Textbox(
965
- label="Conversation ID",
966
- placeholder="Select from table above",
967
- interactive=False
968
- )
969
-
970
- # Define event handlers outside of the UI definition
971
- def on_table_select(evt: gr.SelectData):
972
- if evt.value:
973
- return evt.value[0] # Return the ID from the first column
974
- return ""
975
-
976
- def on_show_evaluated_change(show: bool):
977
- return get_qa_pairs_dataframe(chat_evaluator, show_evaluated=show)
978
-
979
- # Connect event handlers
980
- qa_table.select(
981
- fn=on_table_select,
982
- outputs=selected_conversation
983
- )
984
-
985
- show_evaluated.change(
986
- fn=on_show_evaluated_change,
987
- inputs=show_evaluated,
988
- outputs=qa_table
989
- )
990
-
991
- gr.Markdown("### Evaluate Response")
992
- question_display = gr.Textbox(label="User Question", interactive=False)
993
- original_answer = gr.TextArea(label="Original Bot Answer", interactive=False)
994
- improved_answer = gr.TextArea(label="Improved Answer (Gold Standard)", interactive=True)
995
-
996
- gr.Markdown("### Quality Ratings (1-5)")
997
- with gr.Row():
998
- accuracy = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Factual Accuracy")
999
- completeness = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Completeness")
1000
- with gr.Row():
1001
- relevance = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Relevance")
1002
- clarity = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Clarity")
1003
- legal_correctness = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Legal Correctness")
1004
-
1005
- notes = gr.TextArea(label="Evaluator Notes", placeholder="Add your notes about this response...")
1006
- save_btn = gr.Button("Save Evaluation", variant="primary")
1007
- evaluation_status_msg = gr.Textbox(label="Status", interactive=False)
1008
 
1009
- # Add event handlers
1010
- refresh_status_btn.click(
1011
- fn=lambda: get_evaluation_status(chat_evaluator),
1012
- inputs=[],
1013
- outputs=[evaluation_status]
 
1014
  )
1015
 
1016
- refresh_report_btn.click(
1017
- fn=lambda: generate_evaluation_report_html(chat_evaluator),
1018
- inputs=[],
1019
- outputs=[evaluation_report]
1020
- )
 
 
 
 
1021
 
1022
- show_evaluated.change(
1023
- fn=lambda x: get_qa_pairs_dataframe(chat_evaluator, x),
1024
- inputs=[show_evaluated],
1025
- outputs=[qa_table]
1026
- )
1027
 
1028
- # Table selection to conversation ID textbox
1029
- qa_table.select(
1030
- fn=lambda df, evt: evt.data[0] if evt and hasattr(evt, 'data') and len(evt.data) > 0 else "",
1031
- inputs=[qa_table],
1032
- outputs=[selected_conversation]
1033
- )
1034
-
1035
- # Handle row deletion
1036
- def delete_qa_pair(evt):
1037
- if evt and hasattr(evt, 'data'):
1038
- conversation_id = evt.data[0] # Get ID from first column
1039
- # Add logic for deleting the pair from database
1040
- return get_qa_pairs_dataframe(chat_evaluator) # Update table
1041
- return None
1042
-
1043
- qa_table.delete(
1044
- fn=delete_qa_pair,
1045
- inputs=[qa_table],
1046
- outputs=[qa_table]
1047
- )
1048
-
1049
- # Load conversation for evaluation
1050
- load_btn.click(
1051
- fn=lambda x: load_qa_pair_for_evaluation(conversation_id=x, evaluator=chat_evaluator),
1052
- inputs=[selected_conversation],
1053
- outputs=[question_display, original_answer, improved_answer,
1054
- accuracy, completeness, relevance, clarity, legal_correctness, notes]
1055
- )
1056
 
1057
- # Save evaluation
1058
- save_btn.click(
1059
- fn=lambda conv_id, q, orig_a, imp_a, acc, comp, rel, clar, legal, notes:
1060
- save_evaluation(conv_id, q, orig_a, imp_a, acc, comp, rel, clar, legal, notes, evaluator=chat_evaluator),
1061
- inputs=[
1062
- selected_conversation, question_display, original_answer, improved_answer,
1063
- accuracy, completeness, relevance, clarity, legal_correctness, notes
1064
- ],
1065
- outputs=[evaluation_status_msg]
1066
- )
1067
 
1068
- # Export training data
1069
- export_btn.click(
1070
- fn=lambda min_r, path: export_training_data_action(min_r, path, chat_evaluator),
1071
- inputs=[min_rating, export_path],
1072
- outputs=[export_status]
1073
- )
 
1074
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1075
  # Model change handler
1076
  model_selector.change(
1077
  fn=change_model,
 
935
  outputs=[analysis_output]
936
  )
937
 
938
+ with gr.Tab("Chat Evaluation"):
939
+ gr.Markdown("### Evaluation of Chat Responses")
940
+
941
+ with gr.Row():
942
+ with gr.Column(scale=2):
943
+ # Status and reports section
944
  with gr.Row():
945
+ with gr.Column(scale=1):
946
+ evaluation_status = gr.Textbox(label="Evaluation Status", interactive=False)
947
+ refresh_status_btn = gr.Button("Refresh Status")
948
+
949
+ with gr.Column(scale=1):
950
+ evaluation_report = gr.HTML(label="Evaluation Report")
951
+ refresh_report_btn = gr.Button("Generate Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
952
 
953
+ # QA pairs table section
954
+ show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
955
+ qa_table = gr.DataFrame(
956
+ get_qa_pairs_dataframe(chat_evaluator),
957
+ interactive=False
958
+ # Removed column_config for compatibility
959
  )
960
 
961
+ # Conversation selection section
962
+ gr.Markdown("### Select Conversation to Evaluate")
963
+ with gr.Row():
964
+ selected_conversation = gr.Textbox(
965
+ label="Conversation ID",
966
+ placeholder="Select from table above",
967
+ interactive=True
968
+ )
969
+ load_btn = gr.Button("Load Conversation")
970
 
971
+ # Conversation content section
972
+ gr.Markdown("### Evaluate Response")
973
+ question_display = gr.Textbox(label="User Question", interactive=False)
974
+ original_answer = gr.TextArea(label="Original Bot Answer", interactive=False)
975
+ improved_answer = gr.TextArea(label="Improved Answer (Gold Standard)", interactive=True)
976
 
977
+ # Ratings section
978
+ gr.Markdown("### Quality Ratings (1-5)")
979
+ with gr.Row():
980
+ accuracy = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Factual Accuracy")
981
+ completeness = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Completeness")
982
+ with gr.Row():
983
+ relevance = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Relevance")
984
+ clarity = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Clarity")
985
+ legal_correctness = gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Legal Correctness")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
 
987
+ # Notes and save section
988
+ notes = gr.TextArea(label="Evaluator Notes", placeholder="Add your notes about this response...")
989
+ save_btn = gr.Button("Save Evaluation", variant="primary")
990
+ evaluation_status_msg = gr.Textbox(label="Status", interactive=False)
 
 
 
 
 
 
991
 
992
+ # Data export section
993
+ gr.Markdown("### Export Evaluation Data")
994
+ with gr.Row():
995
+ min_rating = gr.Slider(minimum=1, maximum=5, value=4, step=0.5, label="Minimum Rating for Export")
996
+ export_path = gr.Textbox(label="Export File Path", value="training_data.jsonl")
997
+ export_btn = gr.Button("Export Training Data")
998
+ export_status = gr.Textbox(label="Export Status", interactive=False)
999
 
1000
+ # Event handlers
1001
+ # Status update
1002
+ refresh_status_btn.click(
1003
+ fn=lambda: get_evaluation_status(chat_evaluator),
1004
+ inputs=[],
1005
+ outputs=[evaluation_status]
1006
+ )
1007
+
1008
+ # Report generation
1009
+ refresh_report_btn.click(
1010
+ fn=lambda: generate_evaluation_report_html(chat_evaluator),
1011
+ inputs=[],
1012
+ outputs=[evaluation_report]
1013
+ )
1014
+
1015
+ # Toggle evaluated pairs display
1016
+ show_evaluated.change(
1017
+ fn=lambda x: get_qa_pairs_dataframe(chat_evaluator, x),
1018
+ inputs=[show_evaluated],
1019
+ outputs=[qa_table]
1020
+ )
1021
+
1022
+ # Table row selection function
1023
+ def on_table_select(evt):
1024
+ try:
1025
+ return evt.value[0] if evt and hasattr(evt, 'value') and len(evt.value) > 0 else ""
1026
+ except Exception as e:
1027
+ print(f"Error selecting table row: {str(e)}")
1028
+ return ""
1029
+
1030
+ # Table row selection handler
1031
+ qa_table.select(
1032
+ fn=on_table_select,
1033
+ outputs=[selected_conversation]
1034
+ )
1035
+
1036
+ # Load pair for evaluation
1037
+ load_btn.click(
1038
+ fn=lambda x: load_qa_pair_for_evaluation(conversation_id=x, evaluator=chat_evaluator),
1039
+ inputs=[selected_conversation],
1040
+ outputs=[question_display, original_answer, improved_answer,
1041
+ accuracy, completeness, relevance, clarity, legal_correctness, notes]
1042
+ )
1043
+
1044
+ # Save evaluation
1045
+ save_btn.click(
1046
+ fn=lambda conv_id, q, orig_a, imp_a, acc, comp, rel, clar, legal, notes:
1047
+ save_evaluation(conv_id, q, orig_a, imp_a, acc, comp, rel, clar, legal, notes, evaluator=chat_evaluator),
1048
+ inputs=[
1049
+ selected_conversation, question_display, original_answer, improved_answer,
1050
+ accuracy, completeness, relevance, clarity, legal_correctness, notes
1051
+ ],
1052
+ outputs=[evaluation_status_msg]
1053
+ )
1054
+
1055
+ # Export training data
1056
+ export_btn.click(
1057
+ fn=lambda min_r, path: export_training_data_action(min_r, path, chat_evaluator),
1058
+ inputs=[min_rating, export_path],
1059
+ outputs=[export_status]
1060
+ )
1061
+
1062
  # Model change handler
1063
  model_selector.change(
1064
  fn=change_model,
src/analytics/chat_evaluator.py CHANGED
@@ -23,7 +23,6 @@ class ChatEvaluator:
23
  dataset_manager: Dataset manager for retrieving chat history
24
  hf_token: Hugging Face token for uploading annotations
25
  dataset_id: Hugging Face dataset ID
26
- chat_history_path: Path to local chat history directory
27
  """
28
  self.dataset_manager = dataset_manager or DatasetManager()
29
  self.hf_token = hf_token
@@ -37,36 +36,11 @@ class ChatEvaluator:
37
 
38
  def get_chat_history(self) -> List[Dict[str, Any]]:
39
  """
40
- Get all chat history data from local files and dataset
41
  """
42
  success, chat_data = self.dataset_manager.get_chat_history()
43
-
44
- # Добавим отладочную информацию
45
- print(f"Debug - Chat history fetch success: {success}")
46
- print(f"Debug - Number of chat records: {len(chat_data) if chat_data else 0}")
47
-
48
  if not success or not chat_data:
49
- # Попробуем прочитать локальные файлы
50
- local_data = self._read_local_chat_history()
51
- print(f"Debug - Local chat records found: {len(local_data)}")
52
- return local_data
53
- return chat_data
54
-
55
- def _read_local_chat_history(self) -> List[Dict[str, Any]]:
56
- """
57
- Read chat history from local files
58
- """
59
- chat_data = []
60
- if os.path.exists(self.chat_history_path):
61
- for filename in os.listdir(self.chat_history_path):
62
- if filename.endswith('.json'):
63
- try:
64
- filepath = os.path.join(self.chat_history_path, filename)
65
- with open(filepath, 'r', encoding='utf-8') as f:
66
- data = json.load(f)
67
- chat_data.append(data)
68
- except Exception as e:
69
- print(f"Error reading chat file {filename}: {str(e)}")
70
  return chat_data
71
 
72
  def get_qa_pairs_for_evaluation(self, limit: int = 50) -> List[Dict[str, Any]]:
@@ -87,9 +61,7 @@ class ChatEvaluator:
87
  for chat in chat_data:
88
  conversation_id = chat.get("conversation_id", "unknown")
89
  timestamp = chat.get("timestamp", "")
90
- messages = chat.get("messages", []) # Changed from 'history' to 'messages'
91
-
92
- print(f"Debug - Chat {conversation_id} has {len(messages)} messages") # Debug print
93
 
94
  # Find user-assistant pairs in messages
95
  for i in range(len(messages) - 1):
@@ -349,3 +321,7 @@ class ChatEvaluator:
349
  return metrics
350
 
351
 
 
 
 
 
 
23
  dataset_manager: Dataset manager for retrieving chat history
24
  hf_token: Hugging Face token for uploading annotations
25
  dataset_id: Hugging Face dataset ID
 
26
  """
27
  self.dataset_manager = dataset_manager or DatasetManager()
28
  self.hf_token = hf_token
 
36
 
37
  def get_chat_history(self) -> List[Dict[str, Any]]:
38
  """
39
+ Get all chat history data from dataset
40
  """
41
  success, chat_data = self.dataset_manager.get_chat_history()
 
 
 
 
 
42
  if not success or not chat_data:
43
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  return chat_data
45
 
46
  def get_qa_pairs_for_evaluation(self, limit: int = 50) -> List[Dict[str, Any]]:
 
61
  for chat in chat_data:
62
  conversation_id = chat.get("conversation_id", "unknown")
63
  timestamp = chat.get("timestamp", "")
64
+ messages = chat.get("messages", [])
 
 
65
 
66
  # Find user-assistant pairs in messages
67
  for i in range(len(messages) - 1):
 
321
  return metrics
322
 
323
 
324
+
325
+
326
+
327
+