datafreak commited on
Commit
dcd1c9e
Β·
verified Β·
1 Parent(s): ab9c9c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -142
app.py CHANGED
@@ -95,8 +95,119 @@ def get_all_files():
95
  except Exception as e:
96
  return []
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
99
- """List files with pagination - 10 files per page"""
100
  try:
101
  progress(0.1, desc="πŸ” Getting files...")
102
 
@@ -110,7 +221,7 @@ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
110
  progress(0.5, desc="πŸ“Š Processing page...")
111
 
112
  # Pagination settings
113
- files_per_page = 100 # Changed to 100
114
  start_idx = page_num * files_per_page
115
  end_idx = start_idx + files_per_page
116
 
@@ -133,10 +244,23 @@ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
133
  try:
134
  # Get only the file name/title
135
  file_name = getattr(file_obj, 'name', 'Unknown File')
 
 
 
136
  global_index = start_idx + i
137
 
138
- # Display only the title vertically
139
- detailed_info += f"{global_index}. **{file_name}**\n\n"
 
 
 
 
 
 
 
 
 
 
140
 
141
  except Exception as file_error:
142
  detailed_info += f"{start_idx + i}. ❌ **Error loading file**\n\n"
@@ -173,140 +297,6 @@ def load_prev_page(current_page_info):
173
  return list_uploaded_files_paginated(max(0, current_page - 1))
174
  except:
175
  return list_uploaded_files_paginated(0)
176
- """List all files uploaded to Pinecone Assistant with their metadata and timestamps"""
177
- try:
178
- progress(0.1, desc="πŸ” Connecting to Pinecone Assistant...")
179
- assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
180
- assistant = pc.assistant.Assistant(assistant_name=assistant_name)
181
-
182
- progress(0.3, desc="πŸ“‹ Fetching file list...")
183
- time.sleep(0.5)
184
-
185
- # List all files in the assistant
186
- files_response = assistant.list_files()
187
-
188
- progress(0.7, desc="πŸ“Š Processing file information...")
189
- time.sleep(0.3)
190
-
191
- # Check if files_response is directly a list or has a files attribute
192
- if hasattr(files_response, 'files'):
193
- files_list = files_response.files
194
- else:
195
- files_list = files_response
196
-
197
- if not files_list:
198
- progress(1.0, desc="βœ… Complete - No files found")
199
- return "πŸ“‹ **No files found in Pinecone Assistant**", ""
200
- total_files = len(files_list)
201
-
202
- # Sort files by creation time (most recent first) using robust timestamp parsing
203
- sorted_files = sorted(
204
- files_list,
205
- key=lambda x: parse_pinecone_timestamp(getattr(x, 'created_on', '')),
206
- reverse=True
207
- )
208
-
209
- # Only show last 10 files for faster loading
210
- recent_files = sorted_files[:10]
211
-
212
- progress(0.9, desc="πŸ“ Formatting results...")
213
-
214
- # Create summary focusing on newly uploaded files
215
- summary = f"πŸ“Š **Files Summary**\n\n"
216
- summary += f"πŸ“ **Total files:** {total_files}\n"
217
- summary += f"οΏ½ **Showing:** Last 10 files (newest first)\n"
218
- summary += f"οΏ½πŸ• **Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
219
-
220
- # Show latest files in summary for quick view
221
- if len(recent_files) > 0:
222
- summary += f"πŸ†• **Latest uploads:**\n"
223
- for i, file_obj in enumerate(recent_files[:5], 1):
224
- file_name = getattr(file_obj, 'name', 'Unknown')
225
- created_on = getattr(file_obj, 'created_on', 'Unknown')
226
- try:
227
- if created_on != 'Unknown':
228
- created_formatted = parse_pinecone_timestamp(created_on).strftime('%m-%d %H:%M')
229
- else:
230
- created_formatted = 'Unknown'
231
- except:
232
- created_formatted = 'Unknown'
233
- summary += f"{i}. {file_name} ({created_formatted})\n"
234
- summary += "\n"
235
-
236
- # Create detailed file list (only last 10 files)
237
- detailed_info = "## πŸ“‹ **Latest 10 Files**\n\n"
238
-
239
- for i, file_obj in enumerate(recent_files, 1):
240
- try:
241
- # Get basic file information
242
- file_name = getattr(file_obj, 'name', 'Unknown')
243
- file_id = getattr(file_obj, 'id', 'Unknown')
244
- file_size = getattr(file_obj, 'size', 0)
245
- created_on = getattr(file_obj, 'created_on', 'Unknown')
246
- updated_on = getattr(file_obj, 'updated_on', created_on)
247
-
248
- # Format file size
249
- if file_size > 1024 * 1024:
250
- size_str = f"{file_size / (1024 * 1024):.2f} MB"
251
- elif file_size > 1024:
252
- size_str = f"{file_size / 1024:.2f} KB"
253
- else:
254
- size_str = f"{file_size} bytes"
255
-
256
- # Format timestamps
257
- try:
258
- if created_on != 'Unknown':
259
- created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M:%S UTC')
260
- else:
261
- created_formatted = 'Unknown'
262
-
263
- if updated_on != 'Unknown' and updated_on != created_on:
264
- updated_formatted = parse_pinecone_timestamp(updated_on).strftime('%Y-%m-%d %H:%M:%S UTC')
265
- else:
266
- updated_formatted = created_formatted
267
- except Exception as e:
268
- created_formatted = str(created_on)
269
- updated_formatted = str(updated_on)
270
-
271
- detailed_info += f"### {i}. πŸ“„ **{file_name}**\n"
272
- detailed_info += f"- **πŸ†” File ID:** `{file_id}`\n"
273
- detailed_info += f"- **πŸ“ Size:** {size_str}\n"
274
- detailed_info += f"- **πŸ“… Uploaded:** {created_formatted}\n"
275
- detailed_info += f"- **πŸ”„ Last Updated:** {updated_formatted}\n"
276
-
277
- # Try to get metadata if available
278
- try:
279
- # Get file details for metadata
280
- file_details = assistant.describe_file(file_id=file_id)
281
- if hasattr(file_details, 'metadata') and file_details.metadata:
282
- metadata = file_details.metadata
283
- detailed_info += f"- **🏷️ Metadata:**\n"
284
-
285
- if isinstance(metadata, dict):
286
- for key, value in metadata.items():
287
- if isinstance(value, list):
288
- detailed_info += f" - **{key.title()}:** {', '.join(map(str, value))}\n"
289
- else:
290
- detailed_info += f" - **{key.title()}:** {value}\n"
291
- else:
292
- detailed_info += f" - {metadata}\n"
293
- except Exception as metadata_error:
294
- detailed_info += f"- **🏷️ Metadata:** Could not retrieve metadata\n"
295
-
296
- detailed_info += "\n---\n\n"
297
-
298
- except Exception as file_error:
299
- detailed_info += f"### {i}. ❌ **Error processing file**\n"
300
- detailed_info += f"- **Error:** {str(file_error)}\n\n---\n\n"
301
-
302
- progress(1.0, desc="βœ… File list retrieved successfully!")
303
- time.sleep(0.3)
304
-
305
- return summary, detailed_info
306
-
307
- except Exception as e:
308
- error_msg = f"❌ **Error retrieving file list:** {str(e)}"
309
- return error_msg, ""
310
 
311
  def refresh_file_list():
312
  """Refresh the file list"""
@@ -456,6 +446,10 @@ def clear_form():
456
  """Clear all form fields"""
457
  return [None] + [""] * 30 + ["", "", "🟒 **Ready to process documents**"]
458
 
 
 
 
 
459
  def start_processing():
460
  """Show processing started status"""
461
  return "πŸ”„ **Processing documents... Please wait**"
@@ -480,6 +474,12 @@ with gr.Blocks(
480
  text-align: center;
481
  background-color: #f8f9fa;
482
  }
 
 
 
 
 
 
483
  .tab-nav {
484
  margin-bottom: 20px;
485
  }
@@ -490,12 +490,13 @@ with gr.Blocks(
490
  """
491
  # πŸ“„ Tax Document Ingestion System
492
 
493
- Upload and manage documents in the Pinecone Assistant for GST Minutes processing.
494
 
495
  ## πŸš€ Features:
496
  - βœ… **Multiple file upload** - Select and upload multiple documents at once
497
  - 🏷️ **Metadata tagging** - Add sections, keywords, and descriptions
498
  - πŸ”„ **Batch processing** - All files processed with individual metadata
 
499
  - πŸ“Š **File management** - View uploaded files with timestamps and metadata
500
  - πŸ“‹ **Detailed reporting** - See success/failure status for each operation
501
 
@@ -574,15 +575,70 @@ with gr.Blocks(
574
  value="",
575
  max_height=400 # Add height limit for scrolling
576
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
- # Tab 2: View Uploaded Files
579
  with gr.TabItem("πŸ“‹ View Uploaded Files", id="view_tab"):
580
  gr.Markdown("### πŸ“‹ **Uploaded Files Management**")
581
  gr.Markdown("View all files currently uploaded to the Pinecone Assistant with their metadata and timestamps.")
582
 
583
  with gr.Row():
584
  refresh_btn = gr.Button(
585
- "οΏ½ Fetch Files",
586
  variant="primary",
587
  size="lg"
588
  )
@@ -653,6 +709,27 @@ with gr.Blocks(
653
  outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status]
654
  )
655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  # Event handlers for View Files tab
657
 
658
  # Fetch files - load page 1
@@ -692,10 +769,22 @@ with gr.Blocks(
692
  - Each file gets its own sections, keywords, and description
693
  - Check the results section for upload status
694
 
 
 
 
 
 
 
695
  **View Uploaded Files:**
696
- - Click 'Refresh File List' to see all uploaded files
697
  - View file details including upload timestamps and metadata
698
  - Files are sorted by most recent first
 
 
 
 
 
 
699
 
700
  ### πŸ“ž **Support:**
701
  For issues or questions, contact the development team.
 
95
  except Exception as e:
96
  return []
97
 
98
+ def get_file_choices():
99
+ """Get file choices for the dropdown - returns list of (title, file_id) tuples"""
100
+ try:
101
+ all_files = get_all_files()
102
+ if not all_files:
103
+ return []
104
+
105
+ choices = []
106
+ for file_obj in all_files:
107
+ file_name = getattr(file_obj, 'name', 'Unknown File')
108
+ file_id = getattr(file_obj, 'id', 'unknown')
109
+ created_on = getattr(file_obj, 'created_on', '')
110
+
111
+ # Format timestamp for display
112
+ try:
113
+ if created_on:
114
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
115
+ display_name = f"{file_name} (uploaded: {created_formatted})"
116
+ else:
117
+ display_name = file_name
118
+ except:
119
+ display_name = file_name
120
+
121
+ choices.append((display_name, file_id))
122
+
123
+ return choices
124
+ except Exception as e:
125
+ return []
126
+
127
+ def refresh_delete_dropdown():
128
+ """Refresh the dropdown with current files"""
129
+ choices = get_file_choices()
130
+ if not choices:
131
+ return gr.update(choices=[], value=None, interactive=False)
132
+ return gr.update(choices=choices, value=None, interactive=True)
133
+
134
+ def delete_selected_files(selected_file_ids, progress=gr.Progress()):
135
+ """Delete multiple selected files by their IDs"""
136
+ if not selected_file_ids:
137
+ return "❌ **Error:** No files selected for deletion", ""
138
+
139
+ try:
140
+ progress(0.1, desc="πŸ”§ Initializing Pinecone Assistant...")
141
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
142
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
143
+
144
+ # Get current files to map IDs to names
145
+ all_files = get_all_files()
146
+ file_id_to_name = {getattr(f, 'id', ''): getattr(f, 'name', 'Unknown') for f in all_files}
147
+
148
+ total_files = len(selected_file_ids)
149
+ deleted_files = []
150
+ failed_files = []
151
+
152
+ progress(0.2, desc=f"πŸ—‘οΈ Starting deletion of {total_files} files...")
153
+
154
+ for i, file_id in enumerate(selected_file_ids):
155
+ try:
156
+ file_name = file_id_to_name.get(file_id, f"File ID: {file_id}")
157
+ progress((0.2 + (i / total_files) * 0.7), desc=f"πŸ—‘οΈ Deleting: {file_name}...")
158
+
159
+ # Delete the file
160
+ response = assistant.delete_file(file_id=file_id)
161
+ deleted_files.append({
162
+ 'name': file_name,
163
+ 'id': file_id,
164
+ 'status': 'success'
165
+ })
166
+
167
+ time.sleep(0.2) # Small delay between deletions
168
+
169
+ except Exception as delete_error:
170
+ failed_files.append({
171
+ 'name': file_id_to_name.get(file_id, f"File ID: {file_id}"),
172
+ 'id': file_id,
173
+ 'error': str(delete_error)
174
+ })
175
+
176
+ progress(1.0, desc="βœ… Deletion process completed!")
177
+
178
+ # Format results
179
+ success_count = len(deleted_files)
180
+ error_count = len(failed_files)
181
+
182
+ status_message = f"πŸ“Š **Deletion Complete**\n\n"
183
+ status_message += f"βœ… **Successfully deleted:** {success_count} files\n"
184
+ status_message += f"❌ **Failed to delete:** {error_count} files\n"
185
+ status_message += f"πŸ“ **Total processed:** {total_files} files\n\n"
186
+
187
+ # Detailed results
188
+ detailed_results = "## πŸ—‘οΈ **Deletion Results**\n\n"
189
+
190
+ if deleted_files:
191
+ detailed_results += "### βœ… **Successfully Deleted Files:**\n"
192
+ for file_info in deleted_files:
193
+ detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
194
+ detailed_results += "\n"
195
+
196
+ if failed_files:
197
+ detailed_results += "### ❌ **Failed Deletions:**\n"
198
+ for file_info in failed_files:
199
+ detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
200
+ detailed_results += f" - Error: {file_info['error']}\n"
201
+ detailed_results += "\n"
202
+
203
+ return status_message, detailed_results
204
+
205
+ except Exception as e:
206
+ error_msg = f"❌ **Critical Error during deletion:** {str(e)}"
207
+ return error_msg, ""
208
+
209
  def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
210
+ """List files with pagination - 100 files per page"""
211
  try:
212
  progress(0.1, desc="πŸ” Getting files...")
213
 
 
221
  progress(0.5, desc="πŸ“Š Processing page...")
222
 
223
  # Pagination settings
224
+ files_per_page = 100
225
  start_idx = page_num * files_per_page
226
  end_idx = start_idx + files_per_page
227
 
 
244
  try:
245
  # Get only the file name/title
246
  file_name = getattr(file_obj, 'name', 'Unknown File')
247
+ file_id = getattr(file_obj, 'id', 'Unknown ID')
248
+ created_on = getattr(file_obj, 'created_on', '')
249
+
250
  global_index = start_idx + i
251
 
252
+ # Format timestamp
253
+ try:
254
+ if created_on:
255
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
256
+ else:
257
+ created_formatted = 'Unknown'
258
+ except:
259
+ created_formatted = 'Unknown'
260
+
261
+ # Display file info
262
+ detailed_info += f"{global_index}. **{file_name}**\n"
263
+ detailed_info += f" πŸ“… Uploaded: {created_formatted} | πŸ†” ID: `{file_id}`\n\n"
264
 
265
  except Exception as file_error:
266
  detailed_info += f"{start_idx + i}. ❌ **Error loading file**\n\n"
 
297
  return list_uploaded_files_paginated(max(0, current_page - 1))
298
  except:
299
  return list_uploaded_files_paginated(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  def refresh_file_list():
302
  """Refresh the file list"""
 
446
  """Clear all form fields"""
447
  return [None] + [""] * 30 + ["", "", "🟒 **Ready to process documents**"]
448
 
449
+ def clear_delete_form():
450
+ """Clear delete form"""
451
+ return gr.update(value=[]), "", ""
452
+
453
  def start_processing():
454
  """Show processing started status"""
455
  return "πŸ”„ **Processing documents... Please wait**"
 
474
  text-align: center;
475
  background-color: #f8f9fa;
476
  }
477
+ .delete-container {
478
+ border: 2px dashed #f44336;
479
+ border-radius: 10px;
480
+ padding: 20px;
481
+ background-color: #ffebee;
482
+ }
483
  .tab-nav {
484
  margin-bottom: 20px;
485
  }
 
490
  """
491
  # πŸ“„ Tax Document Ingestion System
492
 
493
+ Upload, manage, and delete documents in the Pinecone Assistant for GST Minutes processing.
494
 
495
  ## πŸš€ Features:
496
  - βœ… **Multiple file upload** - Select and upload multiple documents at once
497
  - 🏷️ **Metadata tagging** - Add sections, keywords, and descriptions
498
  - πŸ”„ **Batch processing** - All files processed with individual metadata
499
+ - πŸ—‘οΈ **File deletion** - Delete multiple files by selecting from dropdown
500
  - πŸ“Š **File management** - View uploaded files with timestamps and metadata
501
  - πŸ“‹ **Detailed reporting** - See success/failure status for each operation
502
 
 
575
  value="",
576
  max_height=400 # Add height limit for scrolling
577
  )
578
+
579
+ # Tab 2: Delete Documents
580
+ with gr.TabItem("πŸ—‘οΈ Delete Documents", id="delete_tab"):
581
+ gr.Markdown("### πŸ—‘οΈ **Delete Multiple Documents**")
582
+ gr.Markdown("Select multiple files from the dropdown to delete them from the Pinecone Assistant.")
583
+
584
+ with gr.Row():
585
+ with gr.Column(scale=2):
586
+ file_dropdown = gr.Dropdown(
587
+ label="πŸ“‹ Select Files to Delete (Multiple Selection)",
588
+ choices=[],
589
+ multiselect=True,
590
+ interactive=False,
591
+ elem_classes=["delete-container"]
592
+ )
593
+
594
+ with gr.Column(scale=1):
595
+ refresh_dropdown_btn = gr.Button(
596
+ "πŸ”„ Refresh File List",
597
+ variant="secondary",
598
+ size="lg"
599
+ )
600
+
601
+ with gr.Row():
602
+ with gr.Column(scale=1):
603
+ delete_btn = gr.Button(
604
+ "πŸ—‘οΈ Delete Selected Files",
605
+ variant="stop",
606
+ size="lg"
607
+ )
608
+
609
+ with gr.Column(scale=1):
610
+ clear_delete_btn = gr.Button(
611
+ "β†Ί Clear Selection",
612
+ variant="secondary",
613
+ size="lg"
614
+ )
615
+
616
+ gr.Markdown("---")
617
+
618
+ # Delete results section
619
+ with gr.Row():
620
+ with gr.Column():
621
+ delete_status_output = gr.Markdown(
622
+ label="πŸ“Š Deletion Status",
623
+ value="*Select files to delete...*"
624
+ )
625
+
626
+ with gr.Row():
627
+ with gr.Column():
628
+ delete_results_output = gr.Markdown(
629
+ label="πŸ—‘οΈ Deletion Results",
630
+ value="",
631
+ max_height=400
632
+ )
633
 
634
+ # Tab 3: View Uploaded Files
635
  with gr.TabItem("πŸ“‹ View Uploaded Files", id="view_tab"):
636
  gr.Markdown("### πŸ“‹ **Uploaded Files Management**")
637
  gr.Markdown("View all files currently uploaded to the Pinecone Assistant with their metadata and timestamps.")
638
 
639
  with gr.Row():
640
  refresh_btn = gr.Button(
641
+ "πŸ”„ Fetch Files",
642
  variant="primary",
643
  size="lg"
644
  )
 
709
  outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status]
710
  )
711
 
712
+ # Event handlers for Delete tab
713
+
714
+ # Refresh dropdown with current files
715
+ refresh_dropdown_btn.click(
716
+ fn=refresh_delete_dropdown,
717
+ outputs=[file_dropdown]
718
+ )
719
+
720
+ # Delete selected files
721
+ delete_btn.click(
722
+ fn=delete_selected_files,
723
+ inputs=[file_dropdown],
724
+ outputs=[delete_status_output, delete_results_output]
725
+ )
726
+
727
+ # Clear delete form
728
+ clear_delete_btn.click(
729
+ fn=clear_delete_form,
730
+ outputs=[file_dropdown, delete_status_output, delete_results_output]
731
+ )
732
+
733
  # Event handlers for View Files tab
734
 
735
  # Fetch files - load page 1
 
769
  - Each file gets its own sections, keywords, and description
770
  - Check the results section for upload status
771
 
772
+ **Delete Documents:**
773
+ - Click 'Refresh File List' to load current files in dropdown
774
+ - Select multiple files using the dropdown (supports multi-select)
775
+ - Click 'Delete Selected Files' to remove them permanently
776
+ - View deletion results for success/failure status
777
+
778
  **View Uploaded Files:**
779
+ - Click 'Fetch Files' to see all uploaded files
780
  - View file details including upload timestamps and metadata
781
  - Files are sorted by most recent first
782
+ - Use pagination to navigate through large file lists
783
+
784
+ ### ⚠️ **Important Notes:**
785
+ - File deletion is **permanent** and cannot be undone
786
+ - Always verify your selection before deleting files
787
+ - The system maps file titles to IDs internally for deletion
788
 
789
  ### πŸ“ž **Support:**
790
  For issues or questions, contact the development team.