Spaces:

Rulga
/

status-law-gbot

Running

App Files Files Community

Rulga commited on Apr 8

Commit

75bf67b

1 Parent(s): 9a1d867

Enhance evaluation interface by adding force reload option, improving data refresh handling, and updating QA pairs display logic

Browse files

Files changed (3) hide show

app.py +17 -9
src/analytics/chat_evaluator.py +76 -40
web/evaluation_interface.py +67 -53

app.py CHANGED Viewed

@@ -1106,18 +1106,26 @@ with gr.Blocks() as demo:
                         with gr.Column(scale=1):
                             evaluation_status = gr.Textbox(label="Evaluation Status", interactive=False)
                             refresh_status_btn = gr.Button("Refresh Status")
                         with gr.Column(scale=1):
                             evaluation_report = gr.HTML(label="Evaluation Report")
                             refresh_report_btn = gr.Button("Generate Report")
-                    # QA pairs table section
-                    show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
-                    qa_table = gr.DataFrame(
-                        get_qa_pairs_dataframe(chat_evaluator),
-                        interactive=True,
-                        wrap=True
-                    )
                     # Conversation selection section
                     gr.Markdown("### Select Conversation to Evaluate")
@@ -1160,9 +1168,9 @@ with gr.Blocks() as demo:
             # Event handlers for Chat Evaluation
             refresh_status_btn.click(
-                fn=lambda: get_evaluation_status(chat_evaluator),
                 inputs=[],
-                outputs=[evaluation_status]
             )
             refresh_report_btn.click(

                         with gr.Column(scale=1):
                             evaluation_status = gr.Textbox(label="Evaluation Status", interactive=False)
                             refresh_status_btn = gr.Button("Refresh Status")
+                            # Add status message for data refresh
+                            refresh_data_status = gr.Textbox(
+                                label="Refresh Status",
+                                interactive=False,
+                                visible=True
+                            )
                         with gr.Column(scale=1):
                             evaluation_report = gr.HTML(label="Evaluation Report")
                             refresh_report_btn = gr.Button("Generate Report")
+                            # QA pairs table section
+                            show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
+                            import pandas as pd
+                            qa_table = gr.DataFrame(
+                            pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"]),
+                            interactive=True,
+                            wrap=True
+                        )
                     # Conversation selection section
                     gr.Markdown("### Select Conversation to Evaluate")
             # Event handlers for Chat Evaluation
             refresh_status_btn.click(
+                fn=lambda: get_evaluation_status(chat_evaluator, force_reload=True),
                 inputs=[],
+                outputs=[evaluation_status, qa_table, refresh_data_status]
             )
             refresh_report_btn.click(

src/analytics/chat_evaluator.py CHANGED Viewed

@@ -23,7 +23,7 @@ from config.settings import (
 class ChatEvaluator:
     def __init__(self, hf_token: str = None, dataset_id: str = None):
         """
-        Initialize chat evaluator
         Args:
             hf_token: Hugging Face token
@@ -33,10 +33,15 @@ class ChatEvaluator:
         self.dataset_id = dataset_id or DATASET_ID
         self.api = HfApi(token=self.hf_token)
-        # Используем пути из settings
         self.chat_history_path = DATASET_CHAT_HISTORY_PATH
         self.annotations_path = DATASET_ANNOTATIONS_PATH
         # Ensure directories exist in dataset
         try:
             self._ensure_dataset_structure()
@@ -69,11 +74,27 @@ class ChatEvaluator:
             logger.error(f"Error ensuring dataset structure: {e}")
             raise
-    def get_chat_history(self) -> List[Dict[str, Any]]:
         """
         Get all chat histories from the dataset
         """
         try:
             # Get list of all files in chat history directory
             files = self.api.list_repo_files(self.dataset_id, repo_type="dataset")
@@ -81,7 +102,7 @@ class ChatEvaluator:
             # Filter for chat history files
             chat_path = f"{self.chat_history_path}/"
             chat_files = [f for f in files if f.startswith(chat_path) and f.endswith('.json')]
-            logger.debug(f"Found {len(chat_files)} chat files")  # Более компактный лог
             histories = []
             for file in chat_files:
@@ -102,6 +123,8 @@ class ChatEvaluator:
                     logger.error(f"Error processing chat file {file}: {e}")
                     continue
             return histories
         except Exception as e:
@@ -133,20 +156,26 @@ class ChatEvaluator:
         logger.debug(f"Extracted {len(qa_pairs)} QA pairs")
         return qa_pairs
-    def get_qa_pairs_for_evaluation(self, limit: int = 50) -> List[Dict[str, Any]]:
         """
         Extract question-answer pairs for evaluation
         Args:
             limit: Maximum number of pairs to return
         Returns:
             List of QA pairs with metadata
         """
-        chat_data = self.get_chat_history()
         qa_pairs = []
-        print(f"Debug - Processing {len(chat_data)} chat histories")  # Debug print
         for chat in chat_data:
             conversation_id = chat.get("conversation_id", "unknown")
@@ -170,24 +199,26 @@ class ChatEvaluator:
                             "question_timestamp": messages[i].get("timestamp", ""),
                             "answer_timestamp": messages[i+1].get("timestamp", "")
                         })
-                        # Check if we've reached the limit
-                        if len(qa_pairs) >= limit:
-                            print(f"Debug - Reached limit of {limit} QA pairs")  # Debug print
-                            return qa_pairs
-        print(f"Debug - Extracted {len(qa_pairs)} QA pairs")  # Debug print
-        return qa_pairs
-    def get_evaluation_status(self) -> Dict[str, int]:
         """
         Get status of evaluated QA pairs
         Returns:
             Dictionary with counts of evaluated and unevaluated QA pairs
         """
-        all_pairs = self.get_qa_pairs_for_evaluation(limit=1000)  # Get a large sample
-        evaluated_pairs = self.get_annotations()
         # Count evaluated conversation IDs
         evaluated_ids = set(item.get("conversation_id") for item in evaluated_pairs)
@@ -246,16 +277,27 @@ class ChatEvaluator:
                 repo_type="dataset"
             )
             return True, "Annotation saved successfully"
         except Exception as e:
             logger.error(f"Error saving annotation: {e}")
             return False, f"Failed to save annotation: {str(e)}"
-    def get_annotations(self) -> List[Dict[str, Any]]:
         """
         Get all saved annotations from dataset
         """
         try:
             annotations = []
             files = self.api.list_repo_files(self.dataset_id, repo_type="dataset")
@@ -277,25 +319,36 @@ class ChatEvaluator:
             # Sort by timestamp (newest first)
             annotations.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
             return annotations
         except Exception as e:
             logger.error(f"Error getting annotations: {e}")
             return []
-    def get_annotation_by_conversation_id(self, conversation_id: str) -> Optional[Dict[str, Any]]:
         """
         Get annotation for a specific conversation
         Args:
             conversation_id: Conversation ID to look for
         Returns:
             Annotation object or None if not found
         """
         try:
-            # Используем DATASET_ANNOTATIONS_PATH для формирования пути
-            filename = f"{DATASET_ANNOTATIONS_PATH}/annotation_{conversation_id}.json"
             # Download and parse annotation file
             content = self.api.hf_hub_download(
@@ -404,21 +457,4 @@ class ChatEvaluator:
         improved_count = sum(1 for a in annotations if a.get("original_answer") != a.get("improved_answer"))
         metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
-        return metrics

 class ChatEvaluator:
     def __init__(self, hf_token: str = None, dataset_id: str = None):
         """
+        Initialize chat evaluator with lazy loading
         Args:
             hf_token: Hugging Face token
         self.dataset_id = dataset_id or DATASET_ID
         self.api = HfApi(token=self.hf_token)
+        # Using paths from settings
         self.chat_history_path = DATASET_CHAT_HISTORY_PATH
         self.annotations_path = DATASET_ANNOTATIONS_PATH
+        # Cache for chat histories and QA pairs
+        self._chat_histories = None
+        self._qa_pairs = None
+        self._annotations = None
         # Ensure directories exist in dataset
         try:
             self._ensure_dataset_structure()
             logger.error(f"Error ensuring dataset structure: {e}")
             raise
+    def reset_cache(self):
+        """
+        Reset the cache to force reload of data
+        """
+        self._chat_histories = None
+        self._qa_pairs = None
+        self._annotations = None
+        logger.info("Chat evaluator cache has been reset")
+    def get_chat_history(self, force_reload=False) -> List[Dict[str, Any]]:
         """
         Get all chat histories from the dataset
+        Args:
+            force_reload: If True, ignore cache and reload from dataset
         """
+        # Return cached data if available and not forcing reload
+        if self._chat_histories is not None and not force_reload:
+            logger.debug("Returning cached chat histories")
+            return self._chat_histories
         try:
             # Get list of all files in chat history directory
             files = self.api.list_repo_files(self.dataset_id, repo_type="dataset")
             # Filter for chat history files
             chat_path = f"{self.chat_history_path}/"
             chat_files = [f for f in files if f.startswith(chat_path) and f.endswith('.json')]
+            logger.debug(f"Found {len(chat_files)} chat files")  # More compact log
             histories = []
             for file in chat_files:
                     logger.error(f"Error processing chat file {file}: {e}")
                     continue
+            # Cache the results
+            self._chat_histories = histories
             return histories
         except Exception as e:
         logger.debug(f"Extracted {len(qa_pairs)} QA pairs")
         return qa_pairs
+    def get_qa_pairs_for_evaluation(self, limit: int = 50, force_reload=False) -> List[Dict[str, Any]]:
         """
         Extract question-answer pairs for evaluation
         Args:
             limit: Maximum number of pairs to return
+            force_reload: If True, force reload from dataset
         Returns:
             List of QA pairs with metadata
         """
+        # Return cached data if available and not forcing reload
+        if self._qa_pairs is not None and not force_reload:
+            logger.debug("Returning cached QA pairs")
+            return self._qa_pairs[:limit]  # Respect the limit parameter
+        chat_data = self.get_chat_history(force_reload=force_reload)
         qa_pairs = []
+        logger.debug(f"Processing {len(chat_data)} chat histories")
         for chat in chat_data:
             conversation_id = chat.get("conversation_id", "unknown")
                             "question_timestamp": messages[i].get("timestamp", ""),
                             "answer_timestamp": messages[i+1].get("timestamp", "")
                         })
+        # Cache the results
+        self._qa_pairs = qa_pairs
+        logger.debug(f"Extracted {len(qa_pairs)} QA pairs")
+        # Return up to the limit
+        return qa_pairs[:limit]
+    def get_evaluation_status(self, force_reload=False) -> Dict[str, int]:
         """
         Get status of evaluated QA pairs
+        Args:
+            force_reload: If True, force reload from dataset
         Returns:
             Dictionary with counts of evaluated and unevaluated QA pairs
         """
+        all_pairs = self.get_qa_pairs_for_evaluation(limit=1000, force_reload=force_reload)  # Get a large sample
+        evaluated_pairs = self.get_annotations(force_reload=force_reload)
         # Count evaluated conversation IDs
         evaluated_ids = set(item.get("conversation_id") for item in evaluated_pairs)
                 repo_type="dataset"
             )
+            # Reset annotations cache
+            self._annotations = None
             return True, "Annotation saved successfully"
         except Exception as e:
             logger.error(f"Error saving annotation: {e}")
             return False, f"Failed to save annotation: {str(e)}"
+    def get_annotations(self, force_reload=False) -> List[Dict[str, Any]]:
         """
         Get all saved annotations from dataset
+        Args:
+            force_reload: If True, force reload from dataset
         """
+        # Return cached data if available and not forcing reload
+        if self._annotations is not None and not force_reload:
+            logger.debug("Returning cached annotations")
+            return self._annotations
         try:
             annotations = []
             files = self.api.list_repo_files(self.dataset_id, repo_type="dataset")
             # Sort by timestamp (newest first)
             annotations.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
+            # Cache the results
+            self._annotations = annotations
             return annotations
         except Exception as e:
             logger.error(f"Error getting annotations: {e}")
             return []
+    def get_annotation_by_conversation_id(self, conversation_id: str, force_reload=False) -> Optional[Dict[str, Any]]:
         """
         Get annotation for a specific conversation
         Args:
             conversation_id: Conversation ID to look for
+            force_reload: If True, force reload from dataset
         Returns:
             Annotation object or None if not found
         """
+        # If we have cached annotations and not forcing reload, look there first
+        if self._annotations is not None and not force_reload:
+            for annotation in self._annotations:
+                if annotation.get("conversation_id") == conversation_id:
+                    return annotation
         try:
+            # Try direct file access
+            filename = f"{self.annotations_path}/annotation_{conversation_id}.json"
             # Download and parse annotation file
             content = self.api.hf_hub_download(
         improved_count = sum(1 for a in annotations if a.get("original_answer") != a.get("improved_answer"))
         metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
+        return metrics

web/evaluation_interface.py CHANGED Viewed

@@ -9,78 +9,92 @@ import json
 import os
 from typing import Dict, Any, List, Tuple
-def get_evaluation_status(evaluator: ChatEvaluator) -> str:
     """
-    Format evaluation status for display
     Args:
         evaluator: ChatEvaluator instance
     Returns:
-        Formatted markdown string with status information
     """
-    status = evaluator.get_evaluation_status()
-    status_md = f"""
-    ## Evaluation Status
-    - **Total QA Pairs:** {status['total_qa_pairs']}
-    - **Evaluated Pairs:** {status['evaluated_pairs']} ({status['evaluated_pairs']/max(1, status['total_qa_pairs'])*100:.1f}%)
-    - **Unevaluated Pairs:** {status['unevaluated_pairs']}
-    - **Evaluated Conversations:** {status['evaluated_conversations']}
-    """
-    return status_md
-def get_qa_pairs_dataframe(evaluator: ChatEvaluator, show_evaluated: bool = False, limit: int = 50) -> pd.DataFrame:
     """
-    Get QA pairs as a pandas DataFrame for display
     Args:
         evaluator: ChatEvaluator instance
-        show_evaluated: Whether to show already evaluated pairs
-        limit: Maximum number of pairs to return
     Returns:
         DataFrame with QA pairs
     """
-    qa_pairs = evaluator.get_qa_pairs_for_evaluation(limit=200)  # Get more than needed for filtering
-    annotations = evaluator.get_annotations()
-    # Create set of evaluated conversation IDs
-    evaluated_ids = set(a.get("conversation_id") for a in annotations)
-    # Filter QA pairs based on show_evaluated parameter
-    if not show_evaluated:
-        qa_pairs = [pair for pair in qa_pairs if pair.get("conversation_id") not in evaluated_ids]
-    # Limit the results
-    qa_pairs = qa_pairs[:limit]
-    # Create DataFrame
-    if qa_pairs:
-        df = pd.DataFrame(qa_pairs)
-        # Add "Evaluated" column
-        df["evaluated"] = df["conversation_id"].apply(lambda x: "Yes" if x in evaluated_ids else "No")
-        # Select and rename columns for display
-        display_df = df[["conversation_id", "question", "original_answer", "evaluated"]].copy()
-        display_df = display_df.rename(columns={
-            "conversation_id": "ID",
-            "question": "Question",
-            "original_answer": "Answer",
-            "evaluated": "Evaluated"
-        })
-        # Truncate long text for better display
-        display_df["Question"] = display_df["Question"].apply(lambda x: (x[:150] + "...") if len(x) > 150 else x)
-        display_df["Answer"] = display_df["Answer"].apply(lambda x: (x[:150] + "...") if len(x) > 150 else x)
-        return display_df
-    # Return empty DataFrame if no pairs
-    return pd.DataFrame(columns=["ID", "Question", "Answer", "Evaluated"])
 def load_qa_pair_for_evaluation(conversation_id: str, evaluator: ChatEvaluator) -> Tuple[str, str, str, int, int, int, int, int, str]:
     """

 import os
 from typing import Dict, Any, List, Tuple
+def get_evaluation_status(evaluator, force_reload=False):
     """
+    Get evaluation status as formatted string and refresh QA data
     Args:
         evaluator: ChatEvaluator instance
+        force_reload: If True, force reload data from dataset
     Returns:
+        Status message, updated QA table and refresh message
     """
+    try:
+        # First, reset cache if forcing reload
+        if force_reload:
+            evaluator.reset_cache()
+        # Get status data
+        status = evaluator.get_evaluation_status(force_reload=force_reload)
+        # Get updated QA table
+        qa_table = get_qa_pairs_dataframe(evaluator, show_evaluated=False, force_reload=force_reload)
+        status_message = f"""
+        Total QA Pairs: {status['total_qa_pairs']}
+        Evaluated Pairs: {status['evaluated_pairs']}
+        Unevaluated Pairs: {status['unevaluated_pairs']}
+        Evaluated Conversations: {status['evaluated_conversations']}
+        """
+        refresh_message = "Data refreshed successfully" if force_reload else ""
+        return status_message, qa_table, refresh_message
+    except Exception as e:
+        logger.error(f"Error getting evaluation status: {e}")
+        # Import pandas here to avoid circular imports
+        import pandas as pd
+        empty_df = pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"])
+        return f"Error getting status: {str(e)}", empty_df, f"Error: {str(e)}"
+def get_qa_pairs_dataframe(evaluator, show_evaluated=False, force_reload=False):
     """
+    Get QA pairs as DataFrame for the evaluation interface
     Args:
         evaluator: ChatEvaluator instance
+        show_evaluated: If True, include already evaluated pairs
+        force_reload: If True, force reload from dataset
     Returns:
         DataFrame with QA pairs
     """
+    try:
+        # Get QA pairs with potential force reload
+        qa_pairs = evaluator.get_qa_pairs_for_evaluation(limit=100, force_reload=force_reload)
+        # Get annotations
+        annotations = evaluator.get_annotations(force_reload=force_reload)
+        evaluated_ids = {a.get("conversation_id") for a in annotations}
+        # Filter out already evaluated pairs if needed
+        if not show_evaluated:
+            qa_pairs = [qa for qa in qa_pairs if qa["conversation_id"] not in evaluated_ids]
+        # Convert to DataFrame
+        if qa_pairs:
+            import pandas as pd
+            df = pd.DataFrame([
+                {
+                    "Conversation ID": qa["conversation_id"],
+                    "Question": qa["question"][:50] + "..." if len(qa["question"]) > 50 else qa["question"],
+                    "Timestamp": qa.get("timestamp", ""),
+                    "Evaluated": "Yes" if qa["conversation_id"] in evaluated_ids else "No"
+                }
+                for qa in qa_pairs
+            ])
+            return df
+        else:
+            import pandas as pd
+            return pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"])
+    except Exception as e:
+        logger.error(f"Error getting QA pairs dataframe: {e}")
+        import pandas as pd
+        return pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"])
 def load_qa_pair_for_evaluation(conversation_id: str, evaluator: ChatEvaluator) -> Tuple[str, str, str, int, int, int, int, int, str]:
     """