Spaces:
Running
Running
refacktor chat history
Browse files- config/settings.py +30 -1
- src/knowledge_base/dataset.py +9 -25
config/settings.py
CHANGED
|
@@ -11,7 +11,7 @@ API_CONFIG = {
|
|
| 11 |
"inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
|
| 12 |
"token": HF_TOKEN,
|
| 13 |
"is_paid_tier": True,
|
| 14 |
-
"timeout":
|
| 15 |
"headers": {
|
| 16 |
"X-Use-Cache": "false",
|
| 17 |
"Content-Type": "application/json",
|
|
@@ -241,3 +241,32 @@ EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
|
|
| 241 |
|
| 242 |
# Request settings
|
| 243 |
USER_AGENT = "Status-Law-Assistant/1.0"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"inference_endpoint": os.getenv("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"),
|
| 12 |
"token": HF_TOKEN,
|
| 13 |
"is_paid_tier": True,
|
| 14 |
+
"timeout": 15,
|
| 15 |
"headers": {
|
| 16 |
"X-Use-Cache": "false",
|
| 17 |
"Content-Type": "application/json",
|
|
|
|
| 241 |
|
| 242 |
# Request settings
|
| 243 |
USER_AGENT = "Status-Law-Assistant/1.0"
|
| 244 |
+
|
| 245 |
+
# Add these constants to settings.py
|
| 246 |
+
CHAT_HISTORY_SCHEMA = {
|
| 247 |
+
"conversation_id": str, # UUID format
|
| 248 |
+
"timestamp": str, # ISO format
|
| 249 |
+
"messages": [ # Using 'messages' consistently instead of 'history'
|
| 250 |
+
{
|
| 251 |
+
"role": str, # "user" or "assistant"
|
| 252 |
+
"content": str,
|
| 253 |
+
"timestamp": str # ISO format
|
| 254 |
+
}
|
| 255 |
+
]
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
ANNOTATION_SCHEMA = {
|
| 259 |
+
"conversation_id": str, # UUID format
|
| 260 |
+
"timestamp": str, # ISO format
|
| 261 |
+
"question": str,
|
| 262 |
+
"original_answer": str,
|
| 263 |
+
"improved_answer": str,
|
| 264 |
+
"ratings": {
|
| 265 |
+
"accuracy": int, # 1-5
|
| 266 |
+
"completeness": int, # 1-5
|
| 267 |
+
"relevance": int, # 1-5
|
| 268 |
+
"clarity": int, # 1-5
|
| 269 |
+
"legal_correctness": int # 1-5
|
| 270 |
+
},
|
| 271 |
+
"notes": str
|
| 272 |
+
}
|
src/knowledge_base/dataset.py
CHANGED
|
@@ -239,42 +239,26 @@ class DatasetManager:
|
|
| 239 |
try:
|
| 240 |
# Create filename with timestamp
|
| 241 |
timestamp = datetime.now().isoformat()
|
| 242 |
-
filename = f"
|
| 243 |
|
| 244 |
-
#
|
| 245 |
chat_data = {
|
| 246 |
"conversation_id": conversation_id,
|
| 247 |
"timestamp": timestamp,
|
| 248 |
-
"messages": messages # Using 'messages' consistently
|
| 249 |
}
|
| 250 |
|
| 251 |
-
#
|
|
|
|
|
|
|
|
|
|
| 252 |
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
|
| 253 |
json.dump(chat_data, temp, ensure_ascii=False, indent=2)
|
| 254 |
temp.flush()
|
| 255 |
-
temp_name = temp.name
|
| 256 |
-
|
| 257 |
-
try:
|
| 258 |
-
# Upload to Hugging Face Hub with explicit error handling
|
| 259 |
-
self.api.upload_file(
|
| 260 |
-
path_or_fileobj=temp_name,
|
| 261 |
-
path_in_repo=filename,
|
| 262 |
-
repo_id=self.dataset_name,
|
| 263 |
-
repo_type="dataset"
|
| 264 |
-
)
|
| 265 |
-
except Exception as upload_error:
|
| 266 |
-
return False, f"Failed to upload chat history: {str(upload_error)}"
|
| 267 |
-
finally:
|
| 268 |
-
# Clean up temporary file
|
| 269 |
-
if os.path.exists(temp_name):
|
| 270 |
-
os.unlink(temp_name)
|
| 271 |
|
| 272 |
-
|
| 273 |
-
return True, f"Chat history saved successfully as {filename}"
|
| 274 |
-
|
| 275 |
except Exception as e:
|
| 276 |
-
|
| 277 |
-
return False, f"Failed to save chat history: {str(e)}"
|
| 278 |
|
| 279 |
def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
|
| 280 |
try:
|
|
|
|
| 239 |
try:
|
| 240 |
# Create filename with timestamp
|
| 241 |
timestamp = datetime.now().isoformat()
|
| 242 |
+
filename = f"{self.chat_history_path}/{conversation_id}_{datetime.now().strftime('%Y%m%d-%H%M%S')}.json"
|
| 243 |
|
| 244 |
+
# Standardize the structure
|
| 245 |
chat_data = {
|
| 246 |
"conversation_id": conversation_id,
|
| 247 |
"timestamp": timestamp,
|
| 248 |
+
"messages": messages # Using 'messages' consistently
|
| 249 |
}
|
| 250 |
|
| 251 |
+
# Validate structure matches schema
|
| 252 |
+
if not self._validate_chat_structure(chat_data):
|
| 253 |
+
return False, "Invalid chat history structure"
|
| 254 |
+
|
| 255 |
with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as temp:
|
| 256 |
json.dump(chat_data, temp, ensure_ascii=False, indent=2)
|
| 257 |
temp.flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
+
return True, "Chat history saved successfully"
|
|
|
|
|
|
|
| 260 |
except Exception as e:
|
| 261 |
+
return False, f"Error saving chat history: {str(e)}"
|
|
|
|
| 262 |
|
| 263 |
def get_chat_history(self, conversation_id: Optional[str] = None) -> Tuple[bool, Any]:
|
| 264 |
try:
|