Rulga commited on
Commit
a29ef8b
·
1 Parent(s): 6ea2d23

refacktor chat history

Browse files
Files changed (2) hide show
  1. config/settings.py +30 -1
  2. src/knowledge_base/dataset.py +9 -25
config/settings.py CHANGED
@@ -11,7 +11,7 @@ API_CONFIG = {
11
  "inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
12
  "token": HF_TOKEN,
13
  "is_paid_tier": True,
14
- "timeout": 30,
15
  "headers": {
16
  "X-Use-Cache": "false",
17
  "Content-Type": "application/json",
@@ -241,3 +241,32 @@ EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
241
 
242
  # Request settings
243
  USER_AGENT = "Status-Law-Assistant/1.0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
12
  "token": HF_TOKEN,
13
  "is_paid_tier": True,
14
+ "timeout": 15,
15
  "headers": {
16
  "X-Use-Cache": "false",
17
  "Content-Type": "application/json",
 
241
 
242
  # Request settings
243
  USER_AGENT = "Status-Law-Assistant/1.0"
244
+
245
+ # Add these constants to settings.py
246
+ CHAT_HISTORY_SCHEMA = {
247
+ "conversation_id": str, # UUID format
248
+ "timestamp": str, # ISO format
249
+ "messages": [ # Using 'messages' consistently instead of 'history'
250
+ {
251
+ "role": str, # "user" or "assistant"
252
+ "content": str,
253
+ "timestamp": str # ISO format
254
+ }
255
+ ]
256
+ }
257
+
258
+ ANNOTATION_SCHEMA = {
259
+ "conversation_id": str, # UUID format
260
+ "timestamp": str, # ISO format
261
+ "question": str,
262
+ "original_answer": str,
263
+ "improved_answer": str,
264
+ "ratings": {
265
+ "accuracy": int, # 1-5
266
+ "completeness": int, # 1-5
267
+ "relevance": int, # 1-5
268
+ "clarity": int, # 1-5
269
+ "legal_correctness": int # 1-5
270
+ },
271
+ "notes": str
272
+ }
src/knowledge_base/dataset.py CHANGED
@@ -239,42 +239,26 @@ class DatasetManager:
239
  try:
240
  # Create filename with timestamp
241
  timestamp = datetime.now().isoformat()
242
- filename = f"chat_history/{conversation_id}_{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
243
 
244
- # Prepare data for saving with consistent structure
245
  chat_data = {
246
  "conversation_id": conversation_id,
247
  "timestamp": timestamp,
248
- "messages": messages # Using 'messages' consistently instead of 'history'
249
  }
250
 
251
- # Use temporary file for safe writing
 
 
 
252
  with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
253
  json.dump(chat_data, temp, ensure_ascii=False, indent=2)
254
  temp.flush()
255
- temp_name = temp.name
256
-
257
- try:
258
- # Upload to Hugging Face Hub with explicit error handling
259
- self.api.upload_file(
260
- path_or_fileobj=temp_name,
261
- path_in_repo=filename,
262
- repo_id=self.dataset_name,
263
- repo_type="dataset"
264
- )
265
- except Exception as upload_error:
266
- return False, f"Failed to upload chat history: {str(upload_error)}"
267
- finally:
268
- # Clean up temporary file
269
- if os.path.exists(temp_name):
270
- os.unlink(temp_name)
271
 
272
- logger.info(f"Successfully saved chat history: {filename}")
273
- return True, f"Chat history saved successfully as {filename}"
274
-
275
  except Exception as e:
276
- logger.error(f"Error in save_chat_history: {str(e)}")
277
- return False, f"Failed to save chat history: {str(e)}"
278
 
279
  def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
280
  try:
 
239
  try:
240
  # Create filename with timestamp
241
  timestamp = datetime.now().isoformat()
242
+ filename = f"{self.chat_history_path}/{conversation_id}_{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
243
 
244
+ # Standardize the structure
245
  chat_data = {
246
  "conversation_id": conversation_id,
247
  "timestamp": timestamp,
248
+ "messages": messages # Using 'messages' consistently
249
  }
250
 
251
+ # Validate structure matches schema
252
+ if not self._validate_chat_structure(chat_data):
253
+ return False, "Invalid chat history structure"
254
+
255
  with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
256
  json.dump(chat_data, temp, ensure_ascii=False, indent=2)
257
  temp.flush()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ return True, "Chat history saved successfully"
 
 
260
  except Exception as e:
261
+ return False, f"Error saving chat history: {str(e)}"
 
262
 
263
  def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
264
  try: