Rulga commited on
Commit
a76b710
·
1 Parent(s): 5d0697a

Refactor app layout and add repair script for conversation IDs

Browse files

- Adjusted layout in app.py to improve UI:
- Changed column scaling for evaluation status and report sections.
- Moved refresh status textbox below the buttons.
- Set explicit height for QA pairs table.

- Introduced a new repair script (repair-script.py) to restore empty conversation IDs in chat history files.
- Implemented logging for tracking repairs and errors.
- Added retry logic for API calls to handle rate limits.
- Configured paths and parameters for dataset interaction.

- Updated log file (repair_conversation_ids.log) to reflect repair operations and errors encountered during execution.

Files changed (3) hide show
  1. app.py +16 -16
  2. repair-script.py +227 -0
  3. repair_conversation_ids.log +182 -0
app.py CHANGED
@@ -1105,32 +1105,32 @@ with gr.Blocks() as demo:
1105
  gr.Markdown("### Evaluation of Chat Responses")
1106
 
1107
  with gr.Row():
1108
- with gr.Column(scale=2):
1109
  # Status and reports section
1110
  with gr.Row():
1111
  with gr.Column(scale=1):
1112
  evaluation_status = gr.Textbox(label="Evaluation Status", interactive=False)
1113
  refresh_status_btn = gr.Button("Refresh Status")
1114
 
1115
- # Add status message for data refresh
1116
- refresh_data_status = gr.Textbox(
1117
- label="Refresh Status",
1118
- interactive=False,
1119
- visible=True
1120
- )
1121
-
1122
  with gr.Column(scale=1):
1123
  evaluation_report = gr.HTML(label="Evaluation Report")
1124
  refresh_report_btn = gr.Button("Generate Report")
1125
 
1126
- # QA pairs table section
1127
- show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
1128
- import pandas as pd
1129
- qa_table = gr.DataFrame(
1130
- pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"]),
1131
- interactive=True,
1132
- wrap=True
1133
- )
 
 
 
 
 
 
 
1134
 
1135
  # Conversation selection section
1136
  gr.Markdown("### Select Conversation to Evaluate")
 
1105
  gr.Markdown("### Evaluation of Chat Responses")
1106
 
1107
  with gr.Row():
1108
+ with gr.Column(scale=1): # Changed to full width
1109
  # Status and reports section
1110
  with gr.Row():
1111
  with gr.Column(scale=1):
1112
  evaluation_status = gr.Textbox(label="Evaluation Status", interactive=False)
1113
  refresh_status_btn = gr.Button("Refresh Status")
1114
 
 
 
 
 
 
 
 
1115
  with gr.Column(scale=1):
1116
  evaluation_report = gr.HTML(label="Evaluation Report")
1117
  refresh_report_btn = gr.Button("Generate Report")
1118
 
1119
+ # Move refresh status below
1120
+ refresh_data_status = gr.Textbox(
1121
+ label="Refresh Status",
1122
+ interactive=False,
1123
+ visible=True
1124
+ )
1125
+
1126
+ # QA pairs table section - now full width
1127
+ show_evaluated = gr.Checkbox(label="Show Already Evaluated Pairs", value=False)
1128
+ qa_table = gr.DataFrame(
1129
+ pd.DataFrame(columns=["Conversation ID", "Question", "Timestamp", "Evaluated"]),
1130
+ interactive=True,
1131
+ wrap=True,
1132
+ height=400 # Added explicit height
1133
+ )
1134
 
1135
  # Conversation selection section
1136
  gr.Markdown("### Select Conversation to Evaluate")
repair-script.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # repair_conversation_ids.py
3
+ """
4
+ Script to restore empty conversation_ids in chat history files.
5
+ One-time operation with hardcoded paths.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ import codecs
12
+ import datetime
13
+ import logging
14
+ import tempfile
15
+ from huggingface_hub import HfApi
16
+ from dotenv import load_dotenv
17
+ import time
18
+ from tenacity import retry, stop_after_attempt, wait_exponential
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+
23
+ # PATHS AND PARAMETERS CONFIGURATION
24
+ # =============================
25
+
26
+ # Modify these values according to your configuration
27
+ CHAT_HISTORY_PATH = './chat_history' # Path to local chat history files
28
+ DATASET_ID = 'Rulga/status-law-knowledge-base' # HuggingFace dataset ID
29
+ HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # HuggingFace API access token
30
+
31
+ if not HF_TOKEN:
32
+ raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
33
+
34
+ # Dataset paths
35
+ DATASET_CHAT_HISTORY_PATH = "chat_history"
36
+ DATASET_VECTOR_STORE_PATH = "vector_store"
37
+ DATASET_FINE_TUNED_PATH = "fine_tuned_models"
38
+ DATASET_ANNOTATIONS_PATH = "annotations"
39
+ DATASET_ERROR_LOGS_PATH = "error_logs"
40
+ DATASET_PREFERENCES_PATH = "preferences/user_preferences.json"
41
+
42
+ # If True, script won't make actual changes (test mode)
43
+ DRY_RUN = False
44
+
45
+ # If True, script will update only local files
46
+ LOCAL_ONLY = False
47
+
48
+ # Add temporary directory for downloads
49
+ TEMP_DIR = tempfile.mkdtemp()
50
+
51
+ # Logging configuration
52
+ logging.basicConfig(
53
+ level=logging.INFO,
54
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
55
+ handlers=[
56
+ logging.FileHandler("repair_conversation_ids.log", encoding='utf-8'),
57
+ logging.StreamHandler(sys.stdout)
58
+ ]
59
+ )
60
+ logger = logging.getLogger(__name__)
61
+
62
+ # Configure stdout encoding
63
+ sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer)
64
+
65
+ @retry(
66
+ stop=stop_after_attempt(3),
67
+ wait=wait_exponential(multiplier=60, min=60, max=180)
68
+ )
69
+ def safe_api_call(func, *args, **kwargs):
70
+ """Wrapper for API calls with retry logic"""
71
+ try:
72
+ return func(*args, **kwargs)
73
+ except Exception as e:
74
+ if "429 Client Error: Too Many Requests" in str(e):
75
+ logger.warning("Rate limit hit, waiting before retry...")
76
+ raise # Let retry handle it
77
+ raise # Other errors
78
+
79
+ def repair_conversation_ids():
80
+ """
81
+ Restore conversation_ids in chat history files directly in HuggingFace dataset
82
+ """
83
+ try:
84
+ api = HfApi(token=HF_TOKEN)
85
+
86
+ # List all files with retry
87
+ files = safe_api_call(
88
+ api.list_repo_files,
89
+ repo_id=DATASET_ID,
90
+ repo_type="dataset"
91
+ )
92
+
93
+ chat_files = [f for f in files
94
+ if f.startswith(DATASET_CHAT_HISTORY_PATH) and
95
+ f.endswith('.json') and
96
+ os.path.basename(f).startswith('None_')]
97
+
98
+ logger.info(f"Found {len(chat_files)} files with 'None_' prefix in dataset")
99
+
100
+ repaired_count = 0
101
+ skipped_count = 0
102
+ error_count = 0
103
+
104
+ for file_path in chat_files:
105
+ try:
106
+ # Add delay between files
107
+ time.sleep(2) # 2 seconds between files
108
+
109
+ # Download file content with retry
110
+ file_content = safe_api_call(
111
+ api.hf_hub_download,
112
+ repo_id=DATASET_ID,
113
+ repo_type="dataset",
114
+ filename=file_path,
115
+ local_dir=TEMP_DIR,
116
+ local_dir_use_symlinks=False
117
+ )
118
+
119
+ with open(file_content, 'r', encoding='utf-8') as f:
120
+ chat_data = json.load(f)
121
+
122
+ # Generate new ID based on timestamp and file details
123
+ timestamp_str = chat_data.get('timestamp', '')
124
+ try:
125
+ timestamp_dt = datetime.datetime.fromisoformat(timestamp_str)
126
+ time_part = timestamp_dt.strftime('%Y%m%d%H%M%S')
127
+ except (ValueError, TypeError):
128
+ time_part = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
129
+
130
+ filename = os.path.basename(file_path)
131
+ filename_part = os.path.splitext(filename)[0].replace('None_', '')
132
+ if len(filename_part) > 10:
133
+ filename_part = filename_part[:10]
134
+
135
+ new_id = f"conv_{time_part}_{filename_part}"
136
+ chat_data['conversation_id'] = new_id
137
+
138
+ if not DRY_RUN:
139
+ # Create new filename without None_ prefix
140
+ new_filename = filename.replace('None_', '')
141
+ new_path = os.path.join(
142
+ os.path.dirname(file_path),
143
+ new_filename
144
+ )
145
+
146
+ # First move the old file to archive
147
+ archive_timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
148
+ archive_filename = f"archive/None_{archive_timestamp}_{filename}"
149
+ archive_path = os.path.join(DATASET_CHAT_HISTORY_PATH, archive_filename)
150
+
151
+ # Create archive directory if it doesn't exist
152
+ try:
153
+ api.upload_file(
154
+ path_or_fileobj=b"",
155
+ path_in_repo=f"{DATASET_CHAT_HISTORY_PATH}/archive/.gitkeep",
156
+ repo_id=DATASET_ID,
157
+ repo_type="dataset"
158
+ )
159
+ except Exception:
160
+ pass # Directory might already exist
161
+
162
+ # Move old file to archive with retry
163
+ safe_api_call(
164
+ api.upload_file,
165
+ path_or_fileobj=file_content,
166
+ path_in_repo=archive_path,
167
+ repo_id=DATASET_ID,
168
+ repo_type="dataset"
169
+ )
170
+
171
+ # Upload updated content with retry
172
+ json_content = json.dumps(chat_data, ensure_ascii=False, indent=2)
173
+ safe_api_call(
174
+ api.upload_file,
175
+ path_or_fileobj=json_content.encode('utf-8'),
176
+ path_in_repo=new_path,
177
+ repo_id=DATASET_ID,
178
+ repo_type="dataset"
179
+ )
180
+
181
+ # Only after successful upload of both files, delete the original with retry
182
+ safe_api_call(
183
+ api.delete_file,
184
+ path_in_repo=file_path,
185
+ repo_id=DATASET_ID,
186
+ repo_type="dataset"
187
+ )
188
+
189
+ logger.info(f"Repaired: {filename} -> {new_filename} (archived as {archive_filename}) - New ID: {new_id}")
190
+ repaired_count += 1
191
+
192
+ except Exception as e:
193
+ logger.error(f"Error processing {file_path}: {str(e)}")
194
+ error_count += 1
195
+ continue # Skip to next file on error
196
+
197
+ logger.info(f"Repair completed: {repaired_count} files repaired, {skipped_count} skipped, {error_count} errors")
198
+ return repaired_count
199
+
200
+ except Exception as e:
201
+ logger.error(f"Error accessing dataset: {str(e)}")
202
+ return 0
203
+
204
+ if __name__ == "__main__":
205
+ # Display configuration information
206
+ logger.info("=== CONFIGURATION ===")
207
+ logger.info(f"Chat history path: {CHAT_HISTORY_PATH}")
208
+ logger.info(f"Dataset ID: {DATASET_ID}")
209
+ logger.info(f"Test mode: {'Yes' if DRY_RUN else 'No'}")
210
+ logger.info(f"Local only: {'Yes' if LOCAL_ONLY else 'No'}")
211
+ logger.info("==================")
212
+
213
+ # Start repair process
214
+ repaired = repair_conversation_ids()
215
+
216
+ if DRY_RUN:
217
+ logger.info(f"TEST MODE: Would have repaired {repaired} files")
218
+ else:
219
+ logger.info(f"Successfully repaired {repaired} files")
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
repair_conversation_ids.log ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-04-08 23:40:07,659 - __main__ - INFO - ==================
2
+ 2025-04-08 23:48:34,665 - __main__ - INFO - === CONFIGURATION ===
3
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Chat history path: ./chat_history
4
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Dataset ID: Rulga/status-law-knowledge-base
5
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Test mode: No
6
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Local only: No
7
+ 2025-04-08 23:48:34,665 - __main__ - INFO - ==================
8
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Found 0 files with 'None_' prefix in ./chat_history
9
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Repair completed: 0 files repaired, 0 skipped, 0 errors
10
+ 2025-04-08 23:48:34,665 - __main__ - INFO - Successfully repaired 0 files
11
+ 2025-04-08 23:58:37,736 - __main__ - INFO - === CONFIGURATION ===
12
+ 2025-04-08 23:58:37,736 - __main__ - INFO - Chat history path: ./chat_history
13
+ 2025-04-08 23:58:37,736 - __main__ - INFO - Dataset ID: Rulga/status-law-knowledge-base
14
+ 2025-04-08 23:58:37,736 - __main__ - INFO - Test mode: No
15
+ 2025-04-08 23:58:37,736 - __main__ - INFO - Local only: No
16
+ 2025-04-08 23:58:37,736 - __main__ - INFO - ==================
17
+ 2025-04-08 23:58:37,736 - __main__ - ERROR - Error accessing dataset: HfApi.list_repo_files() got an unexpected keyword argument 'path'
18
+ 2025-04-08 23:58:37,736 - __main__ - INFO - Successfully repaired 0 files
19
+ 2025-04-08 23:59:57,181 - __main__ - INFO - === CONFIGURATION ===
20
+ 2025-04-08 23:59:57,186 - __main__ - INFO - Chat history path: ./chat_history
21
+ 2025-04-08 23:59:57,186 - __main__ - INFO - Dataset ID: Rulga/status-law-knowledge-base
22
+ 2025-04-08 23:59:57,186 - __main__ - INFO - Test mode: No
23
+ 2025-04-08 23:59:57,186 - __main__ - INFO - Local only: No
24
+ 2025-04-08 23:59:57,187 - __main__ - INFO - ==================
25
+ 2025-04-08 23:59:58,278 - __main__ - INFO - Found 54 files with 'None_' prefix in dataset
26
+ 2025-04-09 00:00:04,290 - __main__ - INFO - Repaired: None_20250403-003446.json -> 20250403-003446.json (archived as archive/None_20250409_000000_None_20250403-003446.json) - New ID: conv_20250403003446_20250403-0
27
+ 2025-04-09 00:00:05,006 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
28
+ 2025-04-09 00:00:07,138 - __main__ - INFO - Repaired: None_20250403-004332.json -> 20250403-004332.json (archived as archive/None_20250409_000004_None_20250403-004332.json) - New ID: conv_20250403004332_20250403-0
29
+ 2025-04-09 00:00:07,781 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
30
+ 2025-04-09 00:00:09,995 - __main__ - INFO - Repaired: None_20250403-005218.json -> 20250403-005218.json (archived as archive/None_20250409_000007_None_20250403-005218.json) - New ID: conv_20250403005218_20250403-0
31
+ 2025-04-09 00:00:10,659 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
32
+ 2025-04-09 00:00:13,307 - __main__ - INFO - Repaired: None_20250403-005243.json -> 20250403-005243.json (archived as archive/None_20250409_000010_None_20250403-005243.json) - New ID: conv_20250403005243_20250403-0
33
+ 2025-04-09 00:00:13,974 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
34
+ 2025-04-09 00:00:16,190 - __main__ - INFO - Repaired: None_20250403-005335.json -> 20250403-005335.json (archived as archive/None_20250409_000013_None_20250403-005335.json) - New ID: conv_20250403005335_20250403-0
35
+ 2025-04-09 00:00:17,200 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
36
+ 2025-04-09 00:00:19,427 - __main__ - INFO - Repaired: None_20250403-115212.json -> 20250403-115212.json (archived as archive/None_20250409_000016_None_20250403-115212.json) - New ID: conv_20250403115212_20250403-1
37
+ 2025-04-09 00:00:20,074 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
38
+ 2025-04-09 00:00:22,569 - __main__ - INFO - Repaired: None_20250403-154557.json -> 20250403-154557.json (archived as archive/None_20250409_000019_None_20250403-154557.json) - New ID: conv_20250403154557_20250403-1
39
+ 2025-04-09 00:00:23,336 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
40
+ 2025-04-09 00:00:26,146 - __main__ - INFO - Repaired: None_20250403-170727.json -> 20250403-170727.json (archived as archive/None_20250409_000023_None_20250403-170727.json) - New ID: conv_20250403170727_20250403-1
41
+ 2025-04-09 00:00:26,935 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
42
+ 2025-04-09 00:00:29,465 - __main__ - INFO - Repaired: None_20250403-171345.json -> 20250403-171345.json (archived as archive/None_20250409_000026_None_20250403-171345.json) - New ID: conv_20250403171345_20250403-1
43
+ 2025-04-09 00:00:30,637 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
44
+ 2025-04-09 00:00:34,875 - __main__ - INFO - Repaired: None_20250403-180146.json -> 20250403-180146.json (archived as archive/None_20250409_000030_None_20250403-180146.json) - New ID: conv_20250403180146_20250403-1
45
+ 2025-04-09 00:00:35,815 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
46
+ 2025-04-09 00:00:39,596 - __main__ - INFO - Repaired: None_20250403-181618.json -> 20250403-181618.json (archived as archive/None_20250409_000035_None_20250403-181618.json) - New ID: conv_20250403181618_20250403-1
47
+ 2025-04-09 00:00:40,360 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
48
+ 2025-04-09 00:00:42,616 - __main__ - INFO - Repaired: None_20250403-182229.json -> 20250403-182229.json (archived as archive/None_20250409_000040_None_20250403-182229.json) - New ID: conv_20250403182229_20250403-1
49
+ 2025-04-09 00:00:44,078 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
50
+ 2025-04-09 00:00:46,335 - __main__ - INFO - Repaired: None_20250404-131413.json -> 20250404-131413.json (archived as archive/None_20250409_000043_None_20250404-131413.json) - New ID: conv_20250404131413_20250404-1
51
+ 2025-04-09 00:00:47,294 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
52
+ 2025-04-09 00:00:49,765 - __main__ - INFO - Repaired: None_20250404-131503.json -> 20250404-131503.json (archived as archive/None_20250409_000046_None_20250404-131503.json) - New ID: conv_20250404131503_20250404-1
53
+ 2025-04-09 00:00:50,496 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
54
+ 2025-04-09 00:00:52,495 - __main__ - INFO - Repaired: None_20250404-134635.json -> 20250404-134635.json (archived as archive/None_20250409_000050_None_20250404-134635.json) - New ID: conv_20250404134635_20250404-1
55
+ 2025-04-09 00:00:53,125 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
56
+ 2025-04-09 00:00:55,215 - __main__ - INFO - Repaired: None_20250404-140648.json -> 20250404-140648.json (archived as archive/None_20250409_000052_None_20250404-140648.json) - New ID: conv_20250404140648_20250404-1
57
+ 2025-04-09 00:00:55,825 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
58
+ 2025-04-09 00:00:57,976 - __main__ - INFO - Repaired: None_20250404-140914.json -> 20250404-140914.json (archived as archive/None_20250409_000055_None_20250404-140914.json) - New ID: conv_20250404140914_20250404-1
59
+ 2025-04-09 00:00:58,735 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
60
+ 2025-04-09 00:01:00,779 - __main__ - INFO - Repaired: None_20250404-140955.json -> 20250404-140955.json (archived as archive/None_20250409_000058_None_20250404-140955.json) - New ID: conv_20250404140955_20250404-1
61
+ 2025-04-09 00:01:01,448 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
62
+ 2025-04-09 00:01:03,863 - __main__ - INFO - Repaired: None_20250404-175337.json -> 20250404-175337.json (archived as archive/None_20250409_000101_None_20250404-175337.json) - New ID: conv_20250404175337_20250404-1
63
+ 2025-04-09 00:01:05,144 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
64
+ 2025-04-09 00:01:07,415 - __main__ - INFO - Repaired: None_20250404-180019.json -> 20250404-180019.json (archived as archive/None_20250409_000104_None_20250404-180019.json) - New ID: conv_20250404180019_20250404-1
65
+ 2025-04-09 00:01:08,666 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
66
+ 2025-04-09 00:01:11,586 - __main__ - INFO - Repaired: None_20250404-180053.json -> 20250404-180053.json (archived as archive/None_20250409_000108_None_20250404-180053.json) - New ID: conv_20250404180053_20250404-1
67
+ 2025-04-09 00:01:12,179 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
68
+ 2025-04-09 00:01:15,196 - __main__ - INFO - Repaired: None_20250404-180123.json -> 20250404-180123.json (archived as archive/None_20250409_000111_None_20250404-180123.json) - New ID: conv_20250404180123_20250404-1
69
+ 2025-04-09 00:01:15,966 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
70
+ 2025-04-09 00:01:20,105 - __main__ - INFO - Repaired: None_20250404-180651.json -> 20250404-180651.json (archived as archive/None_20250409_000115_None_20250404-180651.json) - New ID: conv_20250404180651_20250404-1
71
+ 2025-04-09 00:01:20,922 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
72
+ 2025-04-09 00:01:23,705 - __main__ - INFO - Repaired: None_20250404-180725.json -> 20250404-180725.json (archived as archive/None_20250409_000120_None_20250404-180725.json) - New ID: conv_20250404180725_20250404-1
73
+ 2025-04-09 00:01:24,605 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
74
+ 2025-04-09 00:01:27,163 - __main__ - INFO - Repaired: None_20250404-180940.json -> 20250404-180940.json (archived as archive/None_20250409_000124_None_20250404-180940.json) - New ID: conv_20250404180940_20250404-1
75
+ 2025-04-09 00:01:29,246 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
76
+ 2025-04-09 00:01:31,812 - __main__ - INFO - Repaired: None_20250404-181628.json -> 20250404-181628.json (archived as archive/None_20250409_000129_None_20250404-181628.json) - New ID: conv_20250404181628_20250404-1
77
+ 2025-04-09 00:01:32,711 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
78
+ 2025-04-09 00:01:36,233 - __main__ - INFO - Repaired: None_20250404-181730.json -> 20250404-181730.json (archived as archive/None_20250409_000132_None_20250404-181730.json) - New ID: conv_20250404181730_20250404-1
79
+ 2025-04-09 00:01:37,193 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
80
+ 2025-04-09 00:01:39,716 - __main__ - INFO - Repaired: None_20250404-184103.json -> 20250404-184103.json (archived as archive/None_20250409_000136_None_20250404-184103.json) - New ID: conv_20250404184103_20250404-1
81
+ 2025-04-09 00:01:40,429 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
82
+ 2025-04-09 00:01:43,604 - __main__ - INFO - Repaired: None_20250404-184613.json -> 20250404-184613.json (archived as archive/None_20250409_000140_None_20250404-184613.json) - New ID: conv_20250404184613_20250404-1
83
+ 2025-04-09 00:01:44,255 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
84
+ 2025-04-09 00:01:47,350 - __main__ - INFO - Repaired: None_20250404-184705.json -> 20250404-184705.json (archived as archive/None_20250409_000144_None_20250404-184705.json) - New ID: conv_20250404184705_20250404-1
85
+ 2025-04-09 00:01:47,984 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
86
+ 2025-04-09 00:01:57,394 - __main__ - INFO - Repaired: None_20250404-185243.json -> 20250404-185243.json (archived as archive/None_20250409_000147_None_20250404-185243.json) - New ID: conv_20250404185243_20250404-1
87
+ 2025-04-09 00:01:58,585 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
88
+ 2025-04-09 00:02:16,266 - __main__ - INFO - Repaired: None_20250404-185810.json -> 20250404-185810.json (archived as archive/None_20250409_000158_None_20250404-185810.json) - New ID: conv_20250404185810_20250404-1
89
+ 2025-04-09 00:02:16,955 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
90
+ 2025-04-09 00:02:21,675 - __main__ - INFO - Repaired: None_20250404-202051.json -> 20250404-202051.json (archived as archive/None_20250409_000216_None_20250404-202051.json) - New ID: conv_20250404202051_20250404-2
91
+ 2025-04-09 00:02:22,334 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
92
+ 2025-04-09 00:02:26,166 - __main__ - INFO - Repaired: None_20250407-174318.json -> 20250407-174318.json (archived as archive/None_20250409_000222_None_20250407-174318.json) - New ID: conv_20250407174318_20250407-1
93
+ 2025-04-09 00:02:26,829 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
94
+ 2025-04-09 00:02:30,186 - __main__ - INFO - Repaired: None_20250407-175851.json -> 20250407-175851.json (archived as archive/None_20250409_000226_None_20250407-175851.json) - New ID: conv_20250407175851_20250407-1
95
+ 2025-04-09 00:02:30,878 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
96
+ 2025-04-09 00:02:34,506 - __main__ - INFO - Repaired: None_20250408-135920.json -> 20250408-135920.json (archived as archive/None_20250409_000230_None_20250408-135920.json) - New ID: conv_20250408135920_20250408-1
97
+ 2025-04-09 00:02:35,226 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
98
+ 2025-04-09 00:02:38,338 - __main__ - INFO - Repaired: None_20250408-143211.json -> 20250408-143211.json (archived as archive/None_20250409_000234_None_20250408-143211.json) - New ID: conv_20250408143211_20250408-1
99
+ 2025-04-09 00:02:38,937 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
100
+ 2025-04-09 00:02:41,520 - __main__ - INFO - Repaired: None_20250408-152804.json -> 20250408-152804.json (archived as archive/None_20250409_000238_None_20250408-152804.json) - New ID: conv_20250408152804_20250408-1
101
+ 2025-04-09 00:02:42,156 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
102
+ 2025-04-09 00:02:44,487 - __main__ - INFO - Repaired: None_20250408-161228.json -> 20250408-161228.json (archived as archive/None_20250409_000241_None_20250408-161228.json) - New ID: conv_20250408161228_20250408-1
103
+ 2025-04-09 00:02:45,146 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
104
+ 2025-04-09 00:02:47,666 - __main__ - INFO - Repaired: None_20250408-165140.json -> 20250408-165140.json (archived as archive/None_20250409_000244_None_20250408-165140.json) - New ID: conv_20250408165140_20250408-1
105
+ 2025-04-09 00:02:48,358 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
106
+ 2025-04-09 00:02:50,936 - __main__ - INFO - Repaired: None_20250408-165215.json -> 20250408-165215.json (archived as archive/None_20250409_000248_None_20250408-165215.json) - New ID: conv_20250408165215_20250408-1
107
+ 2025-04-09 00:02:51,768 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
108
+ 2025-04-09 00:02:54,381 - __main__ - INFO - Repaired: None_20250408-165308.json -> 20250408-165308.json (archived as archive/None_20250409_000251_None_20250408-165308.json) - New ID: conv_20250408165308_20250408-1
109
+ 2025-04-09 00:02:55,249 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
110
+ 2025-04-09 00:02:56,626 - __main__ - ERROR - Error processing chat_history/None_20250408-165439.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d0e-3aa06dce36a9407f6e3c4bc8;0019f945-c6e5-4f53-9177-185f7e813af8)
111
+
112
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
113
+ 2025-04-09 00:02:57,206 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
114
+ 2025-04-09 00:02:57,800 - __main__ - ERROR - Error processing chat_history/None_20250408-165706.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d0f-59323183245f43793c756098;ba4091db-b602-48f9-a14e-c20b2e7574e0)
115
+
116
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
117
+ 2025-04-09 00:02:58,375 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
118
+ 2025-04-09 00:02:59,035 - __main__ - ERROR - Error processing chat_history/None_20250408-170353.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d10-2d5693ed479b4b090f5c7581;fc9b6a57-6774-46bd-81eb-221706734b1d)
119
+
120
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
121
+ 2025-04-09 00:02:59,846 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
122
+ 2025-04-09 00:03:00,892 - __main__ - ERROR - Error processing chat_history/None_20250408-170621.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d12-27add14c41b10a8312a23491;4869be22-ff56-4df3-96e5-46e06e9298e6)
123
+
124
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
125
+ 2025-04-09 00:03:02,025 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
126
+ 2025-04-09 00:03:02,946 - __main__ - ERROR - Error processing chat_history/None_20250408-171613.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d14-79025b8858c271d03de46570;d25ad960-bcdc-4e63-95e0-bd9bd0926652)
127
+
128
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
129
+ 2025-04-09 00:03:04,632 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
130
+ 2025-04-09 00:03:05,370 - __main__ - ERROR - Error processing chat_history/None_20250408-172112.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d17-2aaa7a6a0cf49e5e013d773b;8ca42420-f626-453b-b34b-7a40394568e1)
131
+
132
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
133
+ 2025-04-09 00:03:06,243 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
134
+ 2025-04-09 00:03:07,006 - __main__ - ERROR - Error processing chat_history/None_20250408-172458.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d18-07501cf14e87189e47e8b191;842a9df7-49f1-485b-b2ac-6affde2bb8c4)
135
+
136
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
137
+ 2025-04-09 00:03:08,078 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
138
+ 2025-04-09 00:03:08,659 - __main__ - ERROR - Error processing chat_history/None_20250408-172750.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d1a-63b94c88106891ae7e3e6115;f5c72760-c8a7-4948-9a01-cd4c50a8118f)
139
+
140
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
141
+ 2025-04-09 00:03:12,397 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
142
+ 2025-04-09 00:03:13,027 - __main__ - ERROR - Error processing chat_history/None_20250408-173221.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d1e-2b1ba98d28b948b1758cf3c3;4e79d863-b173-4cad-8ee9-163959aac45e)
143
+
144
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
145
+ 2025-04-09 00:03:14,135 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
146
+ 2025-04-09 00:03:14,745 - __main__ - ERROR - Error processing chat_history/None_20250408-174255.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d20-0043043f73a568d845410df4;096e756f-f4f9-4fea-b838-e9bd722a55d5)
147
+
148
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
149
+ 2025-04-09 00:03:15,713 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
150
+ 2025-04-09 00:03:16,356 - __main__ - ERROR - Error processing chat_history/None_20250408-174352.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d22-73fd809c1a8a17446cceffb3;0a613739-047a-4b19-b055-6eeedb53be5c)
151
+
152
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
153
+ 2025-04-09 00:03:16,906 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
154
+ 2025-04-09 00:03:17,836 - __main__ - ERROR - Error processing chat_history/None_20250408-174421.json: 429 Client Error: Too Many Requests for url: https://huggingface.co/api/datasets/Rulga/status-law-knowledge-base/commit/main (Request ID: Root=1-67f59d23-3ce9b0eb2dc7cc327a099b64;de2303ea-24c6-4379-b1d9-5c8ca2548ec9)
155
+
156
+ You have been rate-limited; you can retry this action in about 1 hour. If you're a new user, your limits will raise progressively over time. Get in touch with us at website@huggingface.co if you need access now.
157
+ 2025-04-09 00:03:17,840 - __main__ - INFO - Repair completed: 42 files repaired, 0 skipped, 12 errors
158
+ 2025-04-09 00:03:17,894 - __main__ - INFO - Successfully repaired 42 files
159
+ 2025-04-09 00:15:31,036 - __main__ - INFO - === CONFIGURATION ===
160
+ 2025-04-09 00:15:31,075 - __main__ - INFO - Chat history path: ./chat_history
161
+ 2025-04-09 00:15:31,075 - __main__ - INFO - Dataset ID: Rulga/status-law-knowledge-base
162
+ 2025-04-09 00:15:31,075 - __main__ - INFO - Test mode: No
163
+ 2025-04-09 00:15:31,075 - __main__ - INFO - Local only: No
164
+ 2025-04-09 00:15:31,075 - __main__ - INFO - ==================
165
+ 2025-04-09 00:15:31,895 - __main__ - INFO - Found 55 files with 'None_' prefix in dataset
166
+ 2025-04-09 00:15:34,926 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
167
+ 2025-04-09 00:15:35,608 - __main__ - WARNING - Rate limit hit, waiting before retry...
168
+ 2025-04-09 00:16:36,045 - __main__ - WARNING - Rate limit hit, waiting before retry...
169
+ 2025-04-09 00:18:36,424 - __main__ - WARNING - Rate limit hit, waiting before retry...
170
+ 2025-04-09 00:18:36,434 - __main__ - ERROR - Error processing chat_history/None_20250408-165439.json: RetryError[<Future at 0x155840b74a0 state=finished raised HfHubHTTPError>]
171
+ 2025-04-09 00:18:38,974 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
172
+ 2025-04-09 00:18:39,576 - __main__ - WARNING - Rate limit hit, waiting before retry...
173
+ 2025-04-09 00:19:39,969 - __main__ - WARNING - Rate limit hit, waiting before retry...
174
+ 2025-04-09 00:21:40,373 - __main__ - WARNING - Rate limit hit, waiting before retry...
175
+ 2025-04-09 00:21:40,373 - __main__ - ERROR - Error processing chat_history/None_20250408-165706.json: RetryError[<Future at 0x155841310a0 state=finished raised HfHubHTTPError>]
176
+ 2025-04-09 00:21:42,981 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
177
+ 2025-04-09 00:21:43,592 - __main__ - WARNING - Rate limit hit, waiting before retry...
178
+ 2025-04-09 00:22:44,012 - __main__ - WARNING - Rate limit hit, waiting before retry...
179
+ 2025-04-09 00:24:44,440 - __main__ - WARNING - Rate limit hit, waiting before retry...
180
+ 2025-04-09 00:24:44,440 - __main__ - ERROR - Error processing chat_history/None_20250408-170353.json: RetryError[<Future at 0x155831ba540 state=finished raised HfHubHTTPError>]
181
+ 2025-04-09 00:24:47,040 - huggingface_hub.hf_api - WARNING - No files have been modified since last commit. Skipping to prevent empty commit.
182
+ 2025-04-09 00:24:47,694 - __main__ - WARNING - Rate limit hit, waiting before retry...