datafreak commited on
Commit
c4ff7ad
Β·
verified Β·
1 Parent(s): c41ebc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -33
app.py CHANGED
@@ -28,7 +28,200 @@ pc = Pinecone(api_key=pinecone_api_key)
28
  UPLOAD_FOLDER = "uploads"
29
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
30
 
31
- def list_uploaded_files(progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  """List all files uploaded to Pinecone Assistant with their metadata and timestamps"""
33
  try:
34
  progress(0.1, desc="πŸ” Connecting to Pinecone Assistant...")
@@ -44,27 +237,55 @@ def list_uploaded_files(progress=gr.Progress()):
44
  progress(0.7, desc="πŸ“Š Processing file information...")
45
  time.sleep(0.3)
46
 
47
- if not files_response or not hasattr(files_response, 'files') or not files_response.files:
 
 
 
 
 
 
48
  progress(1.0, desc="βœ… Complete - No files found")
49
  return "πŸ“‹ **No files found in Pinecone Assistant**", ""
50
-
51
- files_list = files_response.files
52
  total_files = len(files_list)
53
 
54
- # Sort files by creation time (most recent first)
55
- sorted_files = sorted(files_list, key=lambda x: getattr(x, 'created_on', ''), reverse=True)
 
 
 
 
 
 
 
56
 
57
  progress(0.9, desc="πŸ“ Formatting results...")
58
 
59
- # Create summary
60
  summary = f"πŸ“Š **Files Summary**\n\n"
61
  summary += f"πŸ“ **Total files:** {total_files}\n"
62
- summary += f"πŸ• **Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Create detailed file list
65
- detailed_info = "## πŸ“‹ **File Details**\n\n"
66
 
67
- for i, file_obj in enumerate(sorted_files, 1):
68
  try:
69
  # Get basic file information
70
  file_name = getattr(file_obj, 'name', 'Unknown')
@@ -84,15 +305,15 @@ def list_uploaded_files(progress=gr.Progress()):
84
  # Format timestamps
85
  try:
86
  if created_on != 'Unknown':
87
- created_formatted = datetime.fromisoformat(created_on.replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M:%S UTC')
88
  else:
89
  created_formatted = 'Unknown'
90
 
91
  if updated_on != 'Unknown' and updated_on != created_on:
92
- updated_formatted = datetime.fromisoformat(updated_on.replace('Z', '+00:00')).strftime('%Y-%m-%d %H:%M:%S UTC')
93
  else:
94
  updated_formatted = created_formatted
95
- except:
96
  created_formatted = str(created_on)
97
  updated_formatted = str(updated_on)
98
 
@@ -399,7 +620,8 @@ with gr.Blocks(
399
  with gr.Column():
400
  results_output = gr.Markdown(
401
  label="πŸ“‹ Detailed Results",
402
- value=""
 
403
  )
404
 
405
  # Tab 2: View Uploaded Files
@@ -409,25 +631,37 @@ with gr.Blocks(
409
 
410
  with gr.Row():
411
  refresh_btn = gr.Button(
412
- "πŸ”„ Refresh File List",
413
  variant="primary",
414
  size="lg"
415
  )
416
- auto_refresh_btn = gr.Button(
417
- "πŸ“‹ Load Files on Startup",
418
- variant="secondary",
419
- size="lg"
420
- )
421
 
422
  # File list status
423
  with gr.Row():
424
  file_list_status = gr.Markdown(
425
- value="🟑 **Click 'Refresh File List' to load uploaded files**",
426
  visible=True
427
  )
428
 
429
  gr.Markdown("---")
430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  # File list results
432
  with gr.Row():
433
  with gr.Column(scale=1):
@@ -440,7 +674,8 @@ with gr.Blocks(
440
  with gr.Column():
441
  file_details = gr.Markdown(
442
  label="πŸ“‹ File Details",
443
- value="*Click refresh to load file details...*"
 
444
  )
445
 
446
  # Event handlers for Upload tab
@@ -469,22 +704,28 @@ with gr.Blocks(
469
 
470
  # Event handlers for View Files tab
471
 
472
- # Refresh file list
473
  refresh_btn.click(
474
  fn=refresh_file_list,
475
  outputs=[file_list_status]
476
  ).then(
477
- fn=list_uploaded_files,
478
- outputs=[file_summary, file_details]
 
479
  )
480
 
481
- # Auto load files on startup
482
- auto_refresh_btn.click(
483
- fn=refresh_file_list,
484
- outputs=[file_list_status]
485
- ).then(
486
- fn=list_uploaded_files,
487
- outputs=[file_summary, file_details]
 
 
 
 
 
488
  )
489
 
490
  # Footer
 
28
  UPLOAD_FOLDER = "uploads"
29
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
30
 
31
+ def parse_pinecone_timestamp(iso_string):
32
+ """
33
+ Parses an ISO 8601 string from Pinecone, handling nanosecond precision.
34
+
35
+ Args:
36
+ iso_string (str): The ISO-formatted timestamp string.
37
+
38
+ Returns:
39
+ datetime: The parsed datetime object.
40
+ """
41
+ if not isinstance(iso_string, str) or not iso_string:
42
+ return datetime.min # Return a very old date for sorting purposes
43
+
44
+ # Handle ISO strings ending with 'Z' (convert to +00:00)
45
+ if iso_string.endswith('Z'):
46
+ iso_string = iso_string[:-1] + '+00:00'
47
+
48
+ # Find the decimal point for fractional seconds
49
+ decimal_point = iso_string.find('.')
50
+
51
+ if decimal_point != -1:
52
+ # Find where the timezone info starts (+ or -)
53
+ tz_start = max(iso_string.rfind('+'), iso_string.rfind('-'))
54
+
55
+ if tz_start > decimal_point:
56
+ # Extract the fractional seconds part
57
+ fractional_part = iso_string[decimal_point+1:tz_start]
58
+
59
+ # Truncate to 6 digits (microseconds) if longer
60
+ if len(fractional_part) > 6:
61
+ fractional_part = fractional_part[:6]
62
+
63
+ # Reconstruct the ISO string with truncated fractional seconds
64
+ iso_string = iso_string[:decimal_point+1] + fractional_part + iso_string[tz_start:]
65
+
66
+ return datetime.fromisoformat(iso_string)
67
+
68
+ def get_all_files():
69
+ """Get all files from Pinecone Assistant and sort them"""
70
+ try:
71
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
72
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
73
+
74
+ # List all files in the assistant
75
+ files_response = assistant.list_files()
76
+
77
+ # Check if files_response is directly a list or has a files attribute
78
+ if hasattr(files_response, 'files'):
79
+ files_list = files_response.files
80
+ else:
81
+ files_list = files_response
82
+
83
+ if not files_list:
84
+ return []
85
+
86
+ # Sort files by creation time (most recent first) using robust timestamp parsing
87
+ sorted_files = sorted(
88
+ files_list,
89
+ key=lambda x: parse_pinecone_timestamp(getattr(x, 'created_on', '')),
90
+ reverse=True
91
+ )
92
+
93
+ return sorted_files
94
+
95
+ except Exception as e:
96
+ return []
97
+
98
+ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
99
+ """List files with pagination - 10 files per page"""
100
+ try:
101
+ progress(0.1, desc="πŸ” Getting files...")
102
+
103
+ # Get all files
104
+ all_files = get_all_files()
105
+
106
+ if not all_files:
107
+ progress(1.0, desc="βœ… Complete - No files found")
108
+ return "πŸ“‹ **No files found in Pinecone Assistant**", "", "No files available", gr.update(visible=False), gr.update(visible=False)
109
+
110
+ progress(0.5, desc="πŸ“Š Processing page...")
111
+
112
+ # Pagination settings
113
+ files_per_page = 10
114
+ start_idx = page_num * files_per_page
115
+ end_idx = start_idx + files_per_page
116
+
117
+ # Get files for current page
118
+ page_files = all_files[start_idx:end_idx]
119
+ total_files = len(all_files)
120
+ total_pages = (total_files + files_per_page - 1) // files_per_page
121
+
122
+ # Create summary
123
+ summary = f"πŸ“Š **Files Summary (Page {page_num + 1} of {total_pages})**\n\n"
124
+ summary += f"πŸ“ **Total files:** {total_files}\n"
125
+ summary += f"πŸ“‹ **Showing:** {start_idx + 1}-{min(end_idx, total_files)} of {total_files}\n"
126
+ summary += f"πŸ• **Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
127
+
128
+ # Show current page files in summary
129
+ if page_files:
130
+ summary += f"πŸ†• **Files on this page:**\n"
131
+ for i, file_obj in enumerate(page_files, 1):
132
+ file_name = getattr(file_obj, 'name', 'Unknown')
133
+ created_on = getattr(file_obj, 'created_on', 'Unknown')
134
+ try:
135
+ if created_on != 'Unknown':
136
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%m-%d %H:%M')
137
+ else:
138
+ created_formatted = 'Unknown'
139
+ except:
140
+ created_formatted = 'Unknown'
141
+ summary += f"{start_idx + i}. {file_name} ({created_formatted})\n"
142
+ summary += "\n"
143
+
144
+ # Create detailed file list for current page only
145
+ detailed_info = f"## πŸ“‹ **File Details - Page {page_num + 1}**\n\n"
146
+
147
+ progress(0.8, desc="πŸ“ Formatting page results...")
148
+
149
+ for i, file_obj in enumerate(page_files, 1):
150
+ try:
151
+ # Get basic file information
152
+ file_name = getattr(file_obj, 'name', 'Unknown')
153
+ file_id = getattr(file_obj, 'id', 'Unknown')
154
+ file_size = getattr(file_obj, 'size', 0)
155
+ created_on = getattr(file_obj, 'created_on', 'Unknown')
156
+ updated_on = getattr(file_obj, 'updated_on', created_on)
157
+
158
+ # Format file size
159
+ if file_size > 1024 * 1024:
160
+ size_str = f"{file_size / (1024 * 1024):.2f} MB"
161
+ elif file_size > 1024:
162
+ size_str = f"{file_size / 1024:.2f} KB"
163
+ else:
164
+ size_str = f"{file_size} bytes"
165
+
166
+ # Format timestamps
167
+ try:
168
+ if created_on != 'Unknown':
169
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M:%S UTC')
170
+ else:
171
+ created_formatted = 'Unknown'
172
+
173
+ if updated_on != 'Unknown' and updated_on != created_on:
174
+ updated_formatted = parse_pinecone_timestamp(updated_on).strftime('%Y-%m-%d %H:%M:%S UTC')
175
+ else:
176
+ updated_formatted = created_formatted
177
+ except Exception as e:
178
+ created_formatted = str(created_on)
179
+ updated_formatted = str(updated_on)
180
+
181
+ global_index = start_idx + i
182
+ detailed_info += f"### {global_index}. πŸ“„ **{file_name}**\n"
183
+ detailed_info += f"- **πŸ†” File ID:** `{file_id}`\n"
184
+ detailed_info += f"- **πŸ“ Size:** {size_str}\n"
185
+ detailed_info += f"- **πŸ“… Uploaded:** {created_formatted}\n"
186
+ detailed_info += f"- **πŸ”„ Last Updated:** {updated_formatted}\n"
187
+ detailed_info += "\n---\n\n"
188
+
189
+ except Exception as file_error:
190
+ detailed_info += f"### {start_idx + i}. ❌ **Error processing file**\n"
191
+ detailed_info += f"- **Error:** {str(file_error)}\n\n---\n\n"
192
+
193
+ # Pagination info
194
+ pagination_info = f"πŸ“„ Page {page_num + 1} of {total_pages} | Total: {total_files} files"
195
+
196
+ # Show/hide next/prev buttons
197
+ show_prev = page_num > 0
198
+ show_next = page_num < total_pages - 1
199
+
200
+ progress(1.0, desc="βœ… Page loaded successfully!")
201
+
202
+ return summary, detailed_info, pagination_info, gr.update(visible=show_prev), gr.update(visible=show_next)
203
+
204
+ except Exception as e:
205
+ error_msg = f"❌ **Error retrieving file list:** {str(e)}"
206
+ return error_msg, "", "Error", gr.update(visible=False), gr.update(visible=False)
207
+
208
+ def load_next_page(current_page_info):
209
+ """Load next page of files"""
210
+ try:
211
+ # Extract current page number from pagination info
212
+ current_page = int(current_page_info.split("Page ")[1].split(" of")[0]) - 1
213
+ return list_uploaded_files_paginated(current_page + 1)
214
+ except:
215
+ return list_uploaded_files_paginated(0)
216
+
217
+ def load_prev_page(current_page_info):
218
+ """Load previous page of files"""
219
+ try:
220
+ # Extract current page number from pagination info
221
+ current_page = int(current_page_info.split("Page ")[1].split(" of")[0]) - 1
222
+ return list_uploaded_files_paginated(max(0, current_page - 1))
223
+ except:
224
+ return list_uploaded_files_paginated(0)
225
  """List all files uploaded to Pinecone Assistant with their metadata and timestamps"""
226
  try:
227
  progress(0.1, desc="πŸ” Connecting to Pinecone Assistant...")
 
237
  progress(0.7, desc="πŸ“Š Processing file information...")
238
  time.sleep(0.3)
239
 
240
+ # Check if files_response is directly a list or has a files attribute
241
+ if hasattr(files_response, 'files'):
242
+ files_list = files_response.files
243
+ else:
244
+ files_list = files_response
245
+
246
+ if not files_list:
247
  progress(1.0, desc="βœ… Complete - No files found")
248
  return "πŸ“‹ **No files found in Pinecone Assistant**", ""
 
 
249
  total_files = len(files_list)
250
 
251
+ # Sort files by creation time (most recent first) using robust timestamp parsing
252
+ sorted_files = sorted(
253
+ files_list,
254
+ key=lambda x: parse_pinecone_timestamp(getattr(x, 'created_on', '')),
255
+ reverse=True
256
+ )
257
+
258
+ # Only show last 10 files for faster loading
259
+ recent_files = sorted_files[:10]
260
 
261
  progress(0.9, desc="πŸ“ Formatting results...")
262
 
263
+ # Create summary focusing on newly uploaded files
264
  summary = f"πŸ“Š **Files Summary**\n\n"
265
  summary += f"πŸ“ **Total files:** {total_files}\n"
266
+ summary += f"οΏ½ **Showing:** Last 10 files (newest first)\n"
267
+ summary += f"οΏ½πŸ• **Last updated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
268
+
269
+ # Show latest files in summary for quick view
270
+ if len(recent_files) > 0:
271
+ summary += f"πŸ†• **Latest uploads:**\n"
272
+ for i, file_obj in enumerate(recent_files[:5], 1):
273
+ file_name = getattr(file_obj, 'name', 'Unknown')
274
+ created_on = getattr(file_obj, 'created_on', 'Unknown')
275
+ try:
276
+ if created_on != 'Unknown':
277
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%m-%d %H:%M')
278
+ else:
279
+ created_formatted = 'Unknown'
280
+ except:
281
+ created_formatted = 'Unknown'
282
+ summary += f"{i}. {file_name} ({created_formatted})\n"
283
+ summary += "\n"
284
 
285
+ # Create detailed file list (only last 10 files)
286
+ detailed_info = "## πŸ“‹ **Latest 10 Files**\n\n"
287
 
288
+ for i, file_obj in enumerate(recent_files, 1):
289
  try:
290
  # Get basic file information
291
  file_name = getattr(file_obj, 'name', 'Unknown')
 
305
  # Format timestamps
306
  try:
307
  if created_on != 'Unknown':
308
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M:%S UTC')
309
  else:
310
  created_formatted = 'Unknown'
311
 
312
  if updated_on != 'Unknown' and updated_on != created_on:
313
+ updated_formatted = parse_pinecone_timestamp(updated_on).strftime('%Y-%m-%d %H:%M:%S UTC')
314
  else:
315
  updated_formatted = created_formatted
316
+ except Exception as e:
317
  created_formatted = str(created_on)
318
  updated_formatted = str(updated_on)
319
 
 
620
  with gr.Column():
621
  results_output = gr.Markdown(
622
  label="πŸ“‹ Detailed Results",
623
+ value="",
624
+ max_height=400 # Add height limit for scrolling
625
  )
626
 
627
  # Tab 2: View Uploaded Files
 
631
 
632
  with gr.Row():
633
  refresh_btn = gr.Button(
634
+ "οΏ½ Fetch Files",
635
  variant="primary",
636
  size="lg"
637
  )
 
 
 
 
 
638
 
639
  # File list status
640
  with gr.Row():
641
  file_list_status = gr.Markdown(
642
+ value="🟑 **Click 'Fetch Files' to load uploaded files**",
643
  visible=True
644
  )
645
 
646
  gr.Markdown("---")
647
 
648
+ # Pagination controls
649
+ with gr.Row():
650
+ prev_btn = gr.Button(
651
+ "⬅️ Previous 10",
652
+ variant="secondary",
653
+ visible=False
654
+ )
655
+ pagination_info = gr.Markdown(
656
+ value="πŸ“„ Page 1 of 1 | Total: 0 files",
657
+ elem_classes=["pagination-info"]
658
+ )
659
+ next_btn = gr.Button(
660
+ "Next 10 ➑️",
661
+ variant="secondary",
662
+ visible=False
663
+ )
664
+
665
  # File list results
666
  with gr.Row():
667
  with gr.Column(scale=1):
 
674
  with gr.Column():
675
  file_details = gr.Markdown(
676
  label="πŸ“‹ File Details",
677
+ value="*Click refresh to load file details...*",
678
+ max_height=600 # Add height limit for scrolling
679
  )
680
 
681
  # Event handlers for Upload tab
 
704
 
705
  # Event handlers for View Files tab
706
 
707
+ # Fetch files - load page 1
708
  refresh_btn.click(
709
  fn=refresh_file_list,
710
  outputs=[file_list_status]
711
  ).then(
712
+ fn=list_uploaded_files_paginated,
713
+ inputs=[],
714
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
715
  )
716
 
717
+ # Next page button
718
+ next_btn.click(
719
+ fn=load_next_page,
720
+ inputs=[pagination_info],
721
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
722
+ )
723
+
724
+ # Previous page button
725
+ prev_btn.click(
726
+ fn=load_prev_page,
727
+ inputs=[pagination_info],
728
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
729
  )
730
 
731
  # Footer