Update app.py
Browse files
app.py
CHANGED
|
@@ -95,8 +95,119 @@ def get_all_files():
|
|
| 95 |
except Exception as e:
|
| 96 |
return []
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
|
| 99 |
-
"""List files with pagination -
|
| 100 |
try:
|
| 101 |
progress(0.1, desc="π Getting files...")
|
| 102 |
|
|
@@ -110,7 +221,7 @@ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
|
|
| 110 |
progress(0.5, desc="π Processing page...")
|
| 111 |
|
| 112 |
# Pagination settings
|
| 113 |
-
files_per_page = 100
|
| 114 |
start_idx = page_num * files_per_page
|
| 115 |
end_idx = start_idx + files_per_page
|
| 116 |
|
|
@@ -133,10 +244,23 @@ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
|
|
| 133 |
try:
|
| 134 |
# Get only the file name/title
|
| 135 |
file_name = getattr(file_obj, 'name', 'Unknown File')
|
|
|
|
|
|
|
|
|
|
| 136 |
global_index = start_idx + i
|
| 137 |
|
| 138 |
-
#
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
except Exception as file_error:
|
| 142 |
detailed_info += f"{start_idx + i}. β **Error loading file**\n\n"
|
|
@@ -173,140 +297,6 @@ def load_prev_page(current_page_info):
|
|
| 173 |
return list_uploaded_files_paginated(max(0, current_page - 1))
|
| 174 |
except:
|
| 175 |
return list_uploaded_files_paginated(0)
|
| 176 |
-
"""List all files uploaded to Pinecone Assistant with their metadata and timestamps"""
|
| 177 |
-
try:
|
| 178 |
-
progress(0.1, desc="π Connecting to Pinecone Assistant...")
|
| 179 |
-
assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
|
| 180 |
-
assistant = pc.assistant.Assistant(assistant_name=assistant_name)
|
| 181 |
-
|
| 182 |
-
progress(0.3, desc="π Fetching file list...")
|
| 183 |
-
time.sleep(0.5)
|
| 184 |
-
|
| 185 |
-
# List all files in the assistant
|
| 186 |
-
files_response = assistant.list_files()
|
| 187 |
-
|
| 188 |
-
progress(0.7, desc="π Processing file information...")
|
| 189 |
-
time.sleep(0.3)
|
| 190 |
-
|
| 191 |
-
# Check if files_response is directly a list or has a files attribute
|
| 192 |
-
if hasattr(files_response, 'files'):
|
| 193 |
-
files_list = files_response.files
|
| 194 |
-
else:
|
| 195 |
-
files_list = files_response
|
| 196 |
-
|
| 197 |
-
if not files_list:
|
| 198 |
-
progress(1.0, desc="β
Complete - No files found")
|
| 199 |
-
return "π **No files found in Pinecone Assistant**", ""
|
| 200 |
-
total_files = len(files_list)
|
| 201 |
-
|
| 202 |
-
# Sort files by creation time (most recent first) using robust timestamp parsing
|
| 203 |
-
sorted_files = sorted(
|
| 204 |
-
files_list,
|
| 205 |
-
key=lambda x: parse_pinecone_timestamp(getattr(x, 'created_on', '')),
|
| 206 |
-
reverse=True
|
| 207 |
-
)
|
| 208 |
-
|
| 209 |
-
# Only show last 10 files for faster loading
|
| 210 |
-
recent_files = sorted_files[:10]
|
| 211 |
-
|
| 212 |
-
progress(0.9, desc="π Formatting results...")
|
| 213 |
-
|
| 214 |
-
# Create summary focusing on newly uploaded files
|
| 215 |
-
summary = f"π **Files Summary**\n\n"
|
| 216 |
-
summary += f"π **Total files:** {total_files}\n"
|
| 217 |
-
summary += f"οΏ½ **Showing:** Last 10 files (newest first)\n"
|
| 218 |
-
summary += f"οΏ½π **Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
| 219 |
-
|
| 220 |
-
# Show latest files in summary for quick view
|
| 221 |
-
if len(recent_files) > 0:
|
| 222 |
-
summary += f"π **Latest uploads:**\n"
|
| 223 |
-
for i, file_obj in enumerate(recent_files[:5], 1):
|
| 224 |
-
file_name = getattr(file_obj, 'name', 'Unknown')
|
| 225 |
-
created_on = getattr(file_obj, 'created_on', 'Unknown')
|
| 226 |
-
try:
|
| 227 |
-
if created_on != 'Unknown':
|
| 228 |
-
created_formatted = parse_pinecone_timestamp(created_on).strftime('%m-%d %H:%M')
|
| 229 |
-
else:
|
| 230 |
-
created_formatted = 'Unknown'
|
| 231 |
-
except:
|
| 232 |
-
created_formatted = 'Unknown'
|
| 233 |
-
summary += f"{i}. {file_name} ({created_formatted})\n"
|
| 234 |
-
summary += "\n"
|
| 235 |
-
|
| 236 |
-
# Create detailed file list (only last 10 files)
|
| 237 |
-
detailed_info = "## π **Latest 10 Files**\n\n"
|
| 238 |
-
|
| 239 |
-
for i, file_obj in enumerate(recent_files, 1):
|
| 240 |
-
try:
|
| 241 |
-
# Get basic file information
|
| 242 |
-
file_name = getattr(file_obj, 'name', 'Unknown')
|
| 243 |
-
file_id = getattr(file_obj, 'id', 'Unknown')
|
| 244 |
-
file_size = getattr(file_obj, 'size', 0)
|
| 245 |
-
created_on = getattr(file_obj, 'created_on', 'Unknown')
|
| 246 |
-
updated_on = getattr(file_obj, 'updated_on', created_on)
|
| 247 |
-
|
| 248 |
-
# Format file size
|
| 249 |
-
if file_size > 1024 * 1024:
|
| 250 |
-
size_str = f"{file_size / (1024 * 1024):.2f} MB"
|
| 251 |
-
elif file_size > 1024:
|
| 252 |
-
size_str = f"{file_size / 1024:.2f} KB"
|
| 253 |
-
else:
|
| 254 |
-
size_str = f"{file_size} bytes"
|
| 255 |
-
|
| 256 |
-
# Format timestamps
|
| 257 |
-
try:
|
| 258 |
-
if created_on != 'Unknown':
|
| 259 |
-
created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M:%S UTC')
|
| 260 |
-
else:
|
| 261 |
-
created_formatted = 'Unknown'
|
| 262 |
-
|
| 263 |
-
if updated_on != 'Unknown' and updated_on != created_on:
|
| 264 |
-
updated_formatted = parse_pinecone_timestamp(updated_on).strftime('%Y-%m-%d %H:%M:%S UTC')
|
| 265 |
-
else:
|
| 266 |
-
updated_formatted = created_formatted
|
| 267 |
-
except Exception as e:
|
| 268 |
-
created_formatted = str(created_on)
|
| 269 |
-
updated_formatted = str(updated_on)
|
| 270 |
-
|
| 271 |
-
detailed_info += f"### {i}. π **{file_name}**\n"
|
| 272 |
-
detailed_info += f"- **π File ID:** `{file_id}`\n"
|
| 273 |
-
detailed_info += f"- **π Size:** {size_str}\n"
|
| 274 |
-
detailed_info += f"- **π
Uploaded:** {created_formatted}\n"
|
| 275 |
-
detailed_info += f"- **π Last Updated:** {updated_formatted}\n"
|
| 276 |
-
|
| 277 |
-
# Try to get metadata if available
|
| 278 |
-
try:
|
| 279 |
-
# Get file details for metadata
|
| 280 |
-
file_details = assistant.describe_file(file_id=file_id)
|
| 281 |
-
if hasattr(file_details, 'metadata') and file_details.metadata:
|
| 282 |
-
metadata = file_details.metadata
|
| 283 |
-
detailed_info += f"- **π·οΈ Metadata:**\n"
|
| 284 |
-
|
| 285 |
-
if isinstance(metadata, dict):
|
| 286 |
-
for key, value in metadata.items():
|
| 287 |
-
if isinstance(value, list):
|
| 288 |
-
detailed_info += f" - **{key.title()}:** {', '.join(map(str, value))}\n"
|
| 289 |
-
else:
|
| 290 |
-
detailed_info += f" - **{key.title()}:** {value}\n"
|
| 291 |
-
else:
|
| 292 |
-
detailed_info += f" - {metadata}\n"
|
| 293 |
-
except Exception as metadata_error:
|
| 294 |
-
detailed_info += f"- **π·οΈ Metadata:** Could not retrieve metadata\n"
|
| 295 |
-
|
| 296 |
-
detailed_info += "\n---\n\n"
|
| 297 |
-
|
| 298 |
-
except Exception as file_error:
|
| 299 |
-
detailed_info += f"### {i}. β **Error processing file**\n"
|
| 300 |
-
detailed_info += f"- **Error:** {str(file_error)}\n\n---\n\n"
|
| 301 |
-
|
| 302 |
-
progress(1.0, desc="β
File list retrieved successfully!")
|
| 303 |
-
time.sleep(0.3)
|
| 304 |
-
|
| 305 |
-
return summary, detailed_info
|
| 306 |
-
|
| 307 |
-
except Exception as e:
|
| 308 |
-
error_msg = f"β **Error retrieving file list:** {str(e)}"
|
| 309 |
-
return error_msg, ""
|
| 310 |
|
| 311 |
def refresh_file_list():
|
| 312 |
"""Refresh the file list"""
|
|
@@ -456,6 +446,10 @@ def clear_form():
|
|
| 456 |
"""Clear all form fields"""
|
| 457 |
return [None] + [""] * 30 + ["", "", "π’ **Ready to process documents**"]
|
| 458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
def start_processing():
|
| 460 |
"""Show processing started status"""
|
| 461 |
return "π **Processing documents... Please wait**"
|
|
@@ -480,6 +474,12 @@ with gr.Blocks(
|
|
| 480 |
text-align: center;
|
| 481 |
background-color: #f8f9fa;
|
| 482 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
.tab-nav {
|
| 484 |
margin-bottom: 20px;
|
| 485 |
}
|
|
@@ -490,12 +490,13 @@ with gr.Blocks(
|
|
| 490 |
"""
|
| 491 |
# π Tax Document Ingestion System
|
| 492 |
|
| 493 |
-
Upload and
|
| 494 |
|
| 495 |
## π Features:
|
| 496 |
- β
**Multiple file upload** - Select and upload multiple documents at once
|
| 497 |
- π·οΈ **Metadata tagging** - Add sections, keywords, and descriptions
|
| 498 |
- π **Batch processing** - All files processed with individual metadata
|
|
|
|
| 499 |
- π **File management** - View uploaded files with timestamps and metadata
|
| 500 |
- π **Detailed reporting** - See success/failure status for each operation
|
| 501 |
|
|
@@ -574,15 +575,70 @@ with gr.Blocks(
|
|
| 574 |
value="",
|
| 575 |
max_height=400 # Add height limit for scrolling
|
| 576 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
|
| 578 |
-
# Tab
|
| 579 |
with gr.TabItem("π View Uploaded Files", id="view_tab"):
|
| 580 |
gr.Markdown("### π **Uploaded Files Management**")
|
| 581 |
gr.Markdown("View all files currently uploaded to the Pinecone Assistant with their metadata and timestamps.")
|
| 582 |
|
| 583 |
with gr.Row():
|
| 584 |
refresh_btn = gr.Button(
|
| 585 |
-
"
|
| 586 |
variant="primary",
|
| 587 |
size="lg"
|
| 588 |
)
|
|
@@ -653,6 +709,27 @@ with gr.Blocks(
|
|
| 653 |
outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status]
|
| 654 |
)
|
| 655 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
# Event handlers for View Files tab
|
| 657 |
|
| 658 |
# Fetch files - load page 1
|
|
@@ -692,10 +769,22 @@ with gr.Blocks(
|
|
| 692 |
- Each file gets its own sections, keywords, and description
|
| 693 |
- Check the results section for upload status
|
| 694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
**View Uploaded Files:**
|
| 696 |
-
- Click '
|
| 697 |
- View file details including upload timestamps and metadata
|
| 698 |
- Files are sorted by most recent first
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
|
| 700 |
### π **Support:**
|
| 701 |
For issues or questions, contact the development team.
|
|
|
|
| 95 |
except Exception as e:
|
| 96 |
return []
|
| 97 |
|
| 98 |
+
def get_file_choices():
|
| 99 |
+
"""Get file choices for the dropdown - returns list of (title, file_id) tuples"""
|
| 100 |
+
try:
|
| 101 |
+
all_files = get_all_files()
|
| 102 |
+
if not all_files:
|
| 103 |
+
return []
|
| 104 |
+
|
| 105 |
+
choices = []
|
| 106 |
+
for file_obj in all_files:
|
| 107 |
+
file_name = getattr(file_obj, 'name', 'Unknown File')
|
| 108 |
+
file_id = getattr(file_obj, 'id', 'unknown')
|
| 109 |
+
created_on = getattr(file_obj, 'created_on', '')
|
| 110 |
+
|
| 111 |
+
# Format timestamp for display
|
| 112 |
+
try:
|
| 113 |
+
if created_on:
|
| 114 |
+
created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
|
| 115 |
+
display_name = f"{file_name} (uploaded: {created_formatted})"
|
| 116 |
+
else:
|
| 117 |
+
display_name = file_name
|
| 118 |
+
except:
|
| 119 |
+
display_name = file_name
|
| 120 |
+
|
| 121 |
+
choices.append((display_name, file_id))
|
| 122 |
+
|
| 123 |
+
return choices
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return []
|
| 126 |
+
|
| 127 |
+
def refresh_delete_dropdown():
|
| 128 |
+
"""Refresh the dropdown with current files"""
|
| 129 |
+
choices = get_file_choices()
|
| 130 |
+
if not choices:
|
| 131 |
+
return gr.update(choices=[], value=None, interactive=False)
|
| 132 |
+
return gr.update(choices=choices, value=None, interactive=True)
|
| 133 |
+
|
| 134 |
+
def delete_selected_files(selected_file_ids, progress=gr.Progress()):
|
| 135 |
+
"""Delete multiple selected files by their IDs"""
|
| 136 |
+
if not selected_file_ids:
|
| 137 |
+
return "β **Error:** No files selected for deletion", ""
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
progress(0.1, desc="π§ Initializing Pinecone Assistant...")
|
| 141 |
+
assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
|
| 142 |
+
assistant = pc.assistant.Assistant(assistant_name=assistant_name)
|
| 143 |
+
|
| 144 |
+
# Get current files to map IDs to names
|
| 145 |
+
all_files = get_all_files()
|
| 146 |
+
file_id_to_name = {getattr(f, 'id', ''): getattr(f, 'name', 'Unknown') for f in all_files}
|
| 147 |
+
|
| 148 |
+
total_files = len(selected_file_ids)
|
| 149 |
+
deleted_files = []
|
| 150 |
+
failed_files = []
|
| 151 |
+
|
| 152 |
+
progress(0.2, desc=f"ποΈ Starting deletion of {total_files} files...")
|
| 153 |
+
|
| 154 |
+
for i, file_id in enumerate(selected_file_ids):
|
| 155 |
+
try:
|
| 156 |
+
file_name = file_id_to_name.get(file_id, f"File ID: {file_id}")
|
| 157 |
+
progress((0.2 + (i / total_files) * 0.7), desc=f"ποΈ Deleting: {file_name}...")
|
| 158 |
+
|
| 159 |
+
# Delete the file
|
| 160 |
+
response = assistant.delete_file(file_id=file_id)
|
| 161 |
+
deleted_files.append({
|
| 162 |
+
'name': file_name,
|
| 163 |
+
'id': file_id,
|
| 164 |
+
'status': 'success'
|
| 165 |
+
})
|
| 166 |
+
|
| 167 |
+
time.sleep(0.2) # Small delay between deletions
|
| 168 |
+
|
| 169 |
+
except Exception as delete_error:
|
| 170 |
+
failed_files.append({
|
| 171 |
+
'name': file_id_to_name.get(file_id, f"File ID: {file_id}"),
|
| 172 |
+
'id': file_id,
|
| 173 |
+
'error': str(delete_error)
|
| 174 |
+
})
|
| 175 |
+
|
| 176 |
+
progress(1.0, desc="β
Deletion process completed!")
|
| 177 |
+
|
| 178 |
+
# Format results
|
| 179 |
+
success_count = len(deleted_files)
|
| 180 |
+
error_count = len(failed_files)
|
| 181 |
+
|
| 182 |
+
status_message = f"π **Deletion Complete**\n\n"
|
| 183 |
+
status_message += f"β
**Successfully deleted:** {success_count} files\n"
|
| 184 |
+
status_message += f"β **Failed to delete:** {error_count} files\n"
|
| 185 |
+
status_message += f"π **Total processed:** {total_files} files\n\n"
|
| 186 |
+
|
| 187 |
+
# Detailed results
|
| 188 |
+
detailed_results = "## ποΈ **Deletion Results**\n\n"
|
| 189 |
+
|
| 190 |
+
if deleted_files:
|
| 191 |
+
detailed_results += "### β
**Successfully Deleted Files:**\n"
|
| 192 |
+
for file_info in deleted_files:
|
| 193 |
+
detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
|
| 194 |
+
detailed_results += "\n"
|
| 195 |
+
|
| 196 |
+
if failed_files:
|
| 197 |
+
detailed_results += "### β **Failed Deletions:**\n"
|
| 198 |
+
for file_info in failed_files:
|
| 199 |
+
detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
|
| 200 |
+
detailed_results += f" - Error: {file_info['error']}\n"
|
| 201 |
+
detailed_results += "\n"
|
| 202 |
+
|
| 203 |
+
return status_message, detailed_results
|
| 204 |
+
|
| 205 |
+
except Exception as e:
|
| 206 |
+
error_msg = f"β **Critical Error during deletion:** {str(e)}"
|
| 207 |
+
return error_msg, ""
|
| 208 |
+
|
| 209 |
def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
|
| 210 |
+
"""List files with pagination - 100 files per page"""
|
| 211 |
try:
|
| 212 |
progress(0.1, desc="π Getting files...")
|
| 213 |
|
|
|
|
| 221 |
progress(0.5, desc="π Processing page...")
|
| 222 |
|
| 223 |
# Pagination settings
|
| 224 |
+
files_per_page = 100
|
| 225 |
start_idx = page_num * files_per_page
|
| 226 |
end_idx = start_idx + files_per_page
|
| 227 |
|
|
|
|
| 244 |
try:
|
| 245 |
# Get only the file name/title
|
| 246 |
file_name = getattr(file_obj, 'name', 'Unknown File')
|
| 247 |
+
file_id = getattr(file_obj, 'id', 'Unknown ID')
|
| 248 |
+
created_on = getattr(file_obj, 'created_on', '')
|
| 249 |
+
|
| 250 |
global_index = start_idx + i
|
| 251 |
|
| 252 |
+
# Format timestamp
|
| 253 |
+
try:
|
| 254 |
+
if created_on:
|
| 255 |
+
created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
|
| 256 |
+
else:
|
| 257 |
+
created_formatted = 'Unknown'
|
| 258 |
+
except:
|
| 259 |
+
created_formatted = 'Unknown'
|
| 260 |
+
|
| 261 |
+
# Display file info
|
| 262 |
+
detailed_info += f"{global_index}. **{file_name}**\n"
|
| 263 |
+
detailed_info += f" π
Uploaded: {created_formatted} | π ID: `{file_id}`\n\n"
|
| 264 |
|
| 265 |
except Exception as file_error:
|
| 266 |
detailed_info += f"{start_idx + i}. β **Error loading file**\n\n"
|
|
|
|
| 297 |
return list_uploaded_files_paginated(max(0, current_page - 1))
|
| 298 |
except:
|
| 299 |
return list_uploaded_files_paginated(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
def refresh_file_list():
|
| 302 |
"""Refresh the file list"""
|
|
|
|
| 446 |
"""Clear all form fields"""
|
| 447 |
return [None] + [""] * 30 + ["", "", "π’ **Ready to process documents**"]
|
| 448 |
|
| 449 |
+
def clear_delete_form():
|
| 450 |
+
"""Clear delete form"""
|
| 451 |
+
return gr.update(value=[]), "", ""
|
| 452 |
+
|
| 453 |
def start_processing():
|
| 454 |
"""Show processing started status"""
|
| 455 |
return "π **Processing documents... Please wait**"
|
|
|
|
| 474 |
text-align: center;
|
| 475 |
background-color: #f8f9fa;
|
| 476 |
}
|
| 477 |
+
.delete-container {
|
| 478 |
+
border: 2px dashed #f44336;
|
| 479 |
+
border-radius: 10px;
|
| 480 |
+
padding: 20px;
|
| 481 |
+
background-color: #ffebee;
|
| 482 |
+
}
|
| 483 |
.tab-nav {
|
| 484 |
margin-bottom: 20px;
|
| 485 |
}
|
|
|
|
| 490 |
"""
|
| 491 |
# π Tax Document Ingestion System
|
| 492 |
|
| 493 |
+
Upload, manage, and delete documents in the Pinecone Assistant for GST Minutes processing.
|
| 494 |
|
| 495 |
## π Features:
|
| 496 |
- β
**Multiple file upload** - Select and upload multiple documents at once
|
| 497 |
- π·οΈ **Metadata tagging** - Add sections, keywords, and descriptions
|
| 498 |
- π **Batch processing** - All files processed with individual metadata
|
| 499 |
+
- ποΈ **File deletion** - Delete multiple files by selecting from dropdown
|
| 500 |
- π **File management** - View uploaded files with timestamps and metadata
|
| 501 |
- π **Detailed reporting** - See success/failure status for each operation
|
| 502 |
|
|
|
|
| 575 |
value="",
|
| 576 |
max_height=400 # Add height limit for scrolling
|
| 577 |
)
|
| 578 |
+
|
| 579 |
+
# Tab 2: Delete Documents
|
| 580 |
+
with gr.TabItem("ποΈ Delete Documents", id="delete_tab"):
|
| 581 |
+
gr.Markdown("### ποΈ **Delete Multiple Documents**")
|
| 582 |
+
gr.Markdown("Select multiple files from the dropdown to delete them from the Pinecone Assistant.")
|
| 583 |
+
|
| 584 |
+
with gr.Row():
|
| 585 |
+
with gr.Column(scale=2):
|
| 586 |
+
file_dropdown = gr.Dropdown(
|
| 587 |
+
label="π Select Files to Delete (Multiple Selection)",
|
| 588 |
+
choices=[],
|
| 589 |
+
multiselect=True,
|
| 590 |
+
interactive=False,
|
| 591 |
+
elem_classes=["delete-container"]
|
| 592 |
+
)
|
| 593 |
+
|
| 594 |
+
with gr.Column(scale=1):
|
| 595 |
+
refresh_dropdown_btn = gr.Button(
|
| 596 |
+
"π Refresh File List",
|
| 597 |
+
variant="secondary",
|
| 598 |
+
size="lg"
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
with gr.Row():
|
| 602 |
+
with gr.Column(scale=1):
|
| 603 |
+
delete_btn = gr.Button(
|
| 604 |
+
"ποΈ Delete Selected Files",
|
| 605 |
+
variant="stop",
|
| 606 |
+
size="lg"
|
| 607 |
+
)
|
| 608 |
+
|
| 609 |
+
with gr.Column(scale=1):
|
| 610 |
+
clear_delete_btn = gr.Button(
|
| 611 |
+
"βΊ Clear Selection",
|
| 612 |
+
variant="secondary",
|
| 613 |
+
size="lg"
|
| 614 |
+
)
|
| 615 |
+
|
| 616 |
+
gr.Markdown("---")
|
| 617 |
+
|
| 618 |
+
# Delete results section
|
| 619 |
+
with gr.Row():
|
| 620 |
+
with gr.Column():
|
| 621 |
+
delete_status_output = gr.Markdown(
|
| 622 |
+
label="π Deletion Status",
|
| 623 |
+
value="*Select files to delete...*"
|
| 624 |
+
)
|
| 625 |
+
|
| 626 |
+
with gr.Row():
|
| 627 |
+
with gr.Column():
|
| 628 |
+
delete_results_output = gr.Markdown(
|
| 629 |
+
label="ποΈ Deletion Results",
|
| 630 |
+
value="",
|
| 631 |
+
max_height=400
|
| 632 |
+
)
|
| 633 |
|
| 634 |
+
# Tab 3: View Uploaded Files
|
| 635 |
with gr.TabItem("π View Uploaded Files", id="view_tab"):
|
| 636 |
gr.Markdown("### π **Uploaded Files Management**")
|
| 637 |
gr.Markdown("View all files currently uploaded to the Pinecone Assistant with their metadata and timestamps.")
|
| 638 |
|
| 639 |
with gr.Row():
|
| 640 |
refresh_btn = gr.Button(
|
| 641 |
+
"π Fetch Files",
|
| 642 |
variant="primary",
|
| 643 |
size="lg"
|
| 644 |
)
|
|
|
|
| 709 |
outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status]
|
| 710 |
)
|
| 711 |
|
| 712 |
+
# Event handlers for Delete tab
|
| 713 |
+
|
| 714 |
+
# Refresh dropdown with current files
|
| 715 |
+
refresh_dropdown_btn.click(
|
| 716 |
+
fn=refresh_delete_dropdown,
|
| 717 |
+
outputs=[file_dropdown]
|
| 718 |
+
)
|
| 719 |
+
|
| 720 |
+
# Delete selected files
|
| 721 |
+
delete_btn.click(
|
| 722 |
+
fn=delete_selected_files,
|
| 723 |
+
inputs=[file_dropdown],
|
| 724 |
+
outputs=[delete_status_output, delete_results_output]
|
| 725 |
+
)
|
| 726 |
+
|
| 727 |
+
# Clear delete form
|
| 728 |
+
clear_delete_btn.click(
|
| 729 |
+
fn=clear_delete_form,
|
| 730 |
+
outputs=[file_dropdown, delete_status_output, delete_results_output]
|
| 731 |
+
)
|
| 732 |
+
|
| 733 |
# Event handlers for View Files tab
|
| 734 |
|
| 735 |
# Fetch files - load page 1
|
|
|
|
| 769 |
- Each file gets its own sections, keywords, and description
|
| 770 |
- Check the results section for upload status
|
| 771 |
|
| 772 |
+
**Delete Documents:**
|
| 773 |
+
- Click 'Refresh File List' to load current files in dropdown
|
| 774 |
+
- Select multiple files using the dropdown (supports multi-select)
|
| 775 |
+
- Click 'Delete Selected Files' to remove them permanently
|
| 776 |
+
- View deletion results for success/failure status
|
| 777 |
+
|
| 778 |
**View Uploaded Files:**
|
| 779 |
+
- Click 'Fetch Files' to see all uploaded files
|
| 780 |
- View file details including upload timestamps and metadata
|
| 781 |
- Files are sorted by most recent first
|
| 782 |
+
- Use pagination to navigate through large file lists
|
| 783 |
+
|
| 784 |
+
### β οΈ **Important Notes:**
|
| 785 |
+
- File deletion is **permanent** and cannot be undone
|
| 786 |
+
- Always verify your selection before deleting files
|
| 787 |
+
- The system maps file titles to IDs internally for deletion
|
| 788 |
|
| 789 |
### π **Support:**
|
| 790 |
For issues or questions, contact the development team.
|