Spaces:

Rulga
/

status-law-gbot

Running

App Files Files Community

Rulga commited on Apr 7

Commit

a29ef8b

1 Parent(s): 6ea2d23

refacktor chat history

Browse files

Files changed (2) hide show

config/settings.py +30 -1
src/knowledge_base/dataset.py +9 -25

config/settings.py CHANGED Viewed

@@ -11,7 +11,7 @@ API_CONFIG = {
     "inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
     "token": HF_TOKEN,
     "is_paid_tier": True,
-    "timeout": 30,
     "headers": {
         "X-Use-Cache": "false",
         "Content-Type": "application/json",
@@ -241,3 +241,32 @@ EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
 # Request settings
 USER_AGENT = "Status-Law-Assistant/1.0"

     "inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
     "token": HF_TOKEN,
     "is_paid_tier": True,
+    "timeout": 15,
     "headers": {
         "X-Use-Cache": "false",
         "Content-Type": "application/json",
 # Request settings
 USER_AGENT = "Status-Law-Assistant/1.0"
+# Add these constants to settings.py
+CHAT_HISTORY_SCHEMA = {
+    "conversation_id": str,  # UUID format
+    "timestamp": str,        # ISO format
+    "messages": [            # Using 'messages' consistently instead of 'history'
+        {
+            "role": str,     # "user" or "assistant"
+            "content": str,
+            "timestamp": str  # ISO format
+        }
+    ]
+}
+ANNOTATION_SCHEMA = {
+    "conversation_id": str,  # UUID format
+    "timestamp": str,        # ISO format
+    "question": str,
+    "original_answer": str,
+    "improved_answer": str,
+    "ratings": {
+        "accuracy": int,         # 1-5
+        "completeness": int,     # 1-5
+        "relevance": int,        # 1-5
+        "clarity": int,          # 1-5
+        "legal_correctness": int # 1-5
+    },
+    "notes": str
+}

src/knowledge_base/dataset.py CHANGED Viewed

@@ -239,42 +239,26 @@ class DatasetManager:
         try:
             # Create filename with timestamp
             timestamp = datetime.now().isoformat()
-            filename = f"chat_history/{conversation_id}_{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
-            # Prepare data for saving with consistent structure
             chat_data = {
                 "conversation_id": conversation_id,
                 "timestamp": timestamp,
-                "messages": messages  # Using 'messages' consistently instead of 'history'
             }
-            # Use temporary file for safe writing
             with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
                 json.dump(chat_data, temp, ensure_ascii=False, indent=2)
                 temp.flush()
-                temp_name = temp.name
-            try:
-                # Upload to Hugging Face Hub with explicit error handling
-                self.api.upload_file(
-                    path_or_fileobj=temp_name,
-                    path_in_repo=filename,
-                    repo_id=self.dataset_name,
-                    repo_type="dataset"
-                )
-            except Exception as upload_error:
-                return False, f"Failed to upload chat history: {str(upload_error)}"
-            finally:
-                # Clean up temporary file
-                if os.path.exists(temp_name):
-                    os.unlink(temp_name)
-            logger.info(f"Successfully saved chat history: {filename}")
-            return True, f"Chat history saved successfully as {filename}"
         except Exception as e:
-            logger.error(f"Error in save_chat_history: {str(e)}")
-            return False, f"Failed to save chat history: {str(e)}"
     def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
         try:

         try:
             # Create filename with timestamp
             timestamp = datetime.now().isoformat()
+            filename = f"{self.chat_history_path}/{conversation_id}_{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
+            # Standardize the structure
             chat_data = {
                 "conversation_id": conversation_id,
                 "timestamp": timestamp,
+                "messages": messages  # Using 'messages' consistently
             }
+            # Validate structure matches schema
+            if not self._validate_chat_structure(chat_data):
+                return False, "Invalid chat history structure"
             with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
                 json.dump(chat_data, temp, ensure_ascii=False, indent=2)
                 temp.flush()
+            return True, "Chat history saved successfully"
         except Exception as e:
+            return False, f"Error saving chat history: {str(e)}"
     def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
         try: