datafreak commited on
Commit
2a96d75
·
verified ·
1 Parent(s): 943bc5c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1057 -0
app.py ADDED
@@ -0,0 +1,1057 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pathlib import Path
4
+ from pinecone import Pinecone
5
+ from typing import List, Tuple
6
+ import tempfile
7
+ import shutil
8
+ from dotenv import load_dotenv
9
+ import time
10
+ from datetime import datetime
11
+ import json
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Authentication configuration
17
+ AUTH_PASSWORD = "gst_litigation@34$$&"
18
+
19
+ def authenticate(password):
20
+ """Simple authentication function"""
21
+ return password == AUTH_PASSWORD
22
+
23
+ # Validate required environment variables
24
+ required_env_vars = ["PINECONE_API_KEY"]
25
+ missing_vars = [var for var in required_env_vars if not os.getenv(var)]
26
+
27
+ if missing_vars:
28
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
29
+
30
+ # Initialize Pinecone
31
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
32
+ pc = Pinecone(api_key=pinecone_api_key)
33
+
34
+ # Create uploads directory
35
+ UPLOAD_FOLDER = "uploads"
36
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
37
+
38
+ def parse_pinecone_timestamp(iso_string):
39
+ """
40
+ Parses an ISO 8601 string from Pinecone, handling nanosecond precision.
41
+
42
+ Args:
43
+ iso_string (str): The ISO-formatted timestamp string.
44
+
45
+ Returns:
46
+ datetime: The parsed datetime object.
47
+ """
48
+ if not isinstance(iso_string, str) or not iso_string:
49
+ return datetime.min # Return a very old date for sorting purposes
50
+
51
+ # Handle ISO strings ending with 'Z' (convert to +00:00)
52
+ if iso_string.endswith('Z'):
53
+ iso_string = iso_string[:-1] + '+00:00'
54
+
55
+ # Find the decimal point for fractional seconds
56
+ decimal_point = iso_string.find('.')
57
+
58
+ if decimal_point != -1:
59
+ # Find where the timezone info starts (+ or -)
60
+ tz_start = max(iso_string.rfind('+'), iso_string.rfind('-'))
61
+
62
+ if tz_start > decimal_point:
63
+ # Extract the fractional seconds part
64
+ fractional_part = iso_string[decimal_point+1:tz_start]
65
+
66
+ # Truncate to 6 digits (microseconds) if longer
67
+ if len(fractional_part) > 6:
68
+ fractional_part = fractional_part[:6]
69
+
70
+ # Reconstruct the ISO string with truncated fractional seconds
71
+ iso_string = iso_string[:decimal_point+1] + fractional_part + iso_string[tz_start:]
72
+
73
+ return datetime.fromisoformat(iso_string)
74
+
75
+ def get_all_files():
76
+ """Get all files from Pinecone Assistant and sort them"""
77
+ try:
78
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "freeplan")
79
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
80
+
81
+ # List all files in the assistant
82
+ files_response = assistant.list_files()
83
+
84
+ # Check if files_response is directly a list or has a files attribute
85
+ if hasattr(files_response, 'files'):
86
+ files_list = files_response.files
87
+ else:
88
+ files_list = files_response
89
+
90
+ if not files_list:
91
+ return []
92
+
93
+ # Sort files by creation time (most recent first) using robust timestamp parsing
94
+ sorted_files = sorted(
95
+ files_list,
96
+ key=lambda x: parse_pinecone_timestamp(getattr(x, 'created_on', '')),
97
+ reverse=True
98
+ )
99
+
100
+ return sorted_files
101
+
102
+ except Exception as e:
103
+ return []
104
+
105
+ def get_file_choices():
106
+ """Get file choices for the dropdown - returns list of (title, file_id) tuples"""
107
+ try:
108
+ all_files = get_all_files()
109
+ if not all_files:
110
+ return []
111
+
112
+ choices = []
113
+ for file_obj in all_files:
114
+ file_name = getattr(file_obj, 'name', 'Unknown File')
115
+ file_id = getattr(file_obj, 'id', 'unknown')
116
+ created_on = getattr(file_obj, 'created_on', '')
117
+
118
+ # Format timestamp for display
119
+ try:
120
+ if created_on:
121
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
122
+ display_name = f"{file_name} (uploaded: {created_formatted})"
123
+ else:
124
+ display_name = file_name
125
+ except:
126
+ display_name = file_name
127
+
128
+ choices.append((display_name, file_id))
129
+
130
+ return choices
131
+ except Exception as e:
132
+ return []
133
+
134
+ def refresh_delete_dropdown():
135
+ """Refresh the dropdown with current files"""
136
+ choices = get_file_choices()
137
+ if not choices:
138
+ return gr.update(choices=[], value=None, interactive=False)
139
+ return gr.update(choices=choices, value=None, interactive=True)
140
+
141
+ def delete_selected_files(selected_file_ids, progress=gr.Progress()):
142
+ """Delete multiple selected files by their IDs"""
143
+ if not selected_file_ids:
144
+ return "❌ **Error:** No files selected for deletion", ""
145
+
146
+ try:
147
+ progress(0.1, desc="🔧 Initializing Pinecone Assistant...")
148
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "freeplan")
149
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
150
+
151
+ # Get current files to map IDs to names
152
+ all_files = get_all_files()
153
+ file_id_to_name = {getattr(f, 'id', ''): getattr(f, 'name', 'Unknown') for f in all_files}
154
+
155
+ total_files = len(selected_file_ids)
156
+ deleted_files = []
157
+ failed_files = []
158
+
159
+ progress(0.2, desc=f"🗑️ Starting deletion of {total_files} files...")
160
+
161
+ for i, file_id in enumerate(selected_file_ids):
162
+ try:
163
+ file_name = file_id_to_name.get(file_id, f"File ID: {file_id}")
164
+ progress((0.2 + (i / total_files) * 0.7), desc=f"🗑️ Deleting: {file_name}...")
165
+
166
+ # Delete the file
167
+ response = assistant.delete_file(file_id=file_id)
168
+ deleted_files.append({
169
+ 'name': file_name,
170
+ 'id': file_id,
171
+ 'status': 'success'
172
+ })
173
+
174
+ time.sleep(0.2) # Small delay between deletions
175
+
176
+ except Exception as delete_error:
177
+ failed_files.append({
178
+ 'name': file_id_to_name.get(file_id, f"File ID: {file_id}"),
179
+ 'id': file_id,
180
+ 'error': str(delete_error)
181
+ })
182
+
183
+ progress(1.0, desc="✅ Deletion process completed!")
184
+
185
+ # Format results
186
+ success_count = len(deleted_files)
187
+ error_count = len(failed_files)
188
+
189
+ status_message = f"📊 **Deletion Complete**\n\n"
190
+ status_message += f"✅ **Successfully deleted:** {success_count} files\n"
191
+ status_message += f"❌ **Failed to delete:** {error_count} files\n"
192
+ status_message += f"📁 **Total processed:** {total_files} files\n\n"
193
+
194
+ # Detailed results
195
+ detailed_results = "## 🗑️ **Deletion Results**\n\n"
196
+
197
+ if deleted_files:
198
+ detailed_results += "### ✅ **Successfully Deleted Files:**\n"
199
+ for file_info in deleted_files:
200
+ detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
201
+ detailed_results += "\n"
202
+
203
+ if failed_files:
204
+ detailed_results += "### ❌ **Failed Deletions:**\n"
205
+ for file_info in failed_files:
206
+ detailed_results += f"- **{file_info['name']}** (`{file_info['id']}`)\n"
207
+ detailed_results += f" - Error: {file_info['error']}\n"
208
+ detailed_results += "\n"
209
+
210
+ return status_message, detailed_results
211
+
212
+ except Exception as e:
213
+ error_msg = f"❌ **Critical Error during deletion:** {str(e)}"
214
+ return error_msg, ""
215
+
216
+ def list_uploaded_files_paginated(page_num=0, progress=gr.Progress()):
217
+ """List files with pagination - 100 files per page"""
218
+ try:
219
+ progress(0.1, desc="🔍 Getting files...")
220
+
221
+ # Get all files
222
+ all_files = get_all_files()
223
+
224
+ if not all_files:
225
+ progress(1.0, desc="✅ Complete - No files found")
226
+ return "📋 **No files found in Pinecone Assistant**", "", "No files available", gr.update(visible=False), gr.update(visible=False)
227
+
228
+ progress(0.5, desc="📊 Processing page...")
229
+
230
+ # Pagination settings
231
+ files_per_page = 100
232
+ start_idx = page_num * files_per_page
233
+ end_idx = start_idx + files_per_page
234
+
235
+ # Get files for current page
236
+ page_files = all_files[start_idx:end_idx]
237
+ total_files = len(all_files)
238
+ total_pages = (total_files + files_per_page - 1) // files_per_page
239
+
240
+ # Create summary
241
+ summary = f"📊 **Files Summary (Page {page_num + 1} of {total_pages})**\n\n"
242
+ summary += f"📁 **Total files:** {total_files}\n"
243
+ summary += f"📋 **Showing:** {start_idx + 1}-{min(end_idx, total_files)} of {total_files}\n\n"
244
+
245
+ # Create file list - ONLY TITLES, VERTICALLY
246
+ detailed_info = f"## 📋 **Latest Uploaded Files - Page {page_num + 1}**\n\n"
247
+
248
+ progress(0.8, desc="📝 Formatting file titles...")
249
+
250
+ for i, file_obj in enumerate(page_files, 1):
251
+ try:
252
+ # Get only the file name/title
253
+ file_name = getattr(file_obj, 'name', 'Unknown File')
254
+ file_id = getattr(file_obj, 'id', 'Unknown ID')
255
+ created_on = getattr(file_obj, 'created_on', '')
256
+
257
+ global_index = start_idx + i
258
+
259
+ # Format timestamp
260
+ try:
261
+ if created_on:
262
+ created_formatted = parse_pinecone_timestamp(created_on).strftime('%Y-%m-%d %H:%M')
263
+ else:
264
+ created_formatted = 'Unknown'
265
+ except:
266
+ created_formatted = 'Unknown'
267
+
268
+ # Display file info
269
+ detailed_info += f"{global_index}. **{file_name}**\n"
270
+ detailed_info += f" 📅 Uploaded: {created_formatted} | 🆔 ID: `{file_id}`\n\n"
271
+
272
+ except Exception as file_error:
273
+ detailed_info += f"{start_idx + i}. ❌ **Error loading file**\n\n"
274
+
275
+ # Pagination info
276
+ pagination_info = f"📄 Page {page_num + 1} of {total_pages} | Total: {total_files} files"
277
+
278
+ # Show/hide next/prev buttons
279
+ show_prev = page_num > 0
280
+ show_next = page_num < total_pages - 1
281
+
282
+ progress(1.0, desc="✅ Page loaded successfully!")
283
+
284
+ return summary, detailed_info, pagination_info, gr.update(visible=show_prev), gr.update(visible=show_next)
285
+
286
+ except Exception as e:
287
+ error_msg = f"❌ **Error retrieving file list:** {str(e)}"
288
+ return error_msg, "", "Error", gr.update(visible=False), gr.update(visible=False)
289
+
290
+ def load_next_page(current_page_info):
291
+ """Load next page of files"""
292
+ try:
293
+ # Extract current page number from pagination info
294
+ current_page = int(current_page_info.split("Page ")[1].split(" of")[0]) - 1
295
+ return list_uploaded_files_paginated(current_page + 1)
296
+ except:
297
+ return list_uploaded_files_paginated(0)
298
+
299
+ def load_prev_page(current_page_info):
300
+ """Load previous page of files"""
301
+ try:
302
+ # Extract current page number from pagination info
303
+ current_page = int(current_page_info.split("Page ")[1].split(" of")[0]) - 1
304
+ return list_uploaded_files_paginated(max(0, current_page - 1))
305
+ except:
306
+ return list_uploaded_files_paginated(0)
307
+
308
+ def refresh_file_list():
309
+ """Refresh the file list"""
310
+ return "🔄 **Refreshing file list... Please wait**"
311
+
312
+ def process_files_with_progress(files, *metadata_inputs, progress=gr.Progress()):
313
+ """Process multiple files with individual metadata and show progress"""
314
+ if not files:
315
+ return "❌ Error: No files selected", ""
316
+
317
+ if len(files) > 10:
318
+ return "❌ Error: Maximum 10 files allowed at a time", ""
319
+
320
+ try:
321
+ results = []
322
+ errors = []
323
+ total_files = len(files)
324
+
325
+ # Initialize Pinecone Assistant
326
+ progress(0, desc="🔧 Initializing Pinecone Assistant...")
327
+ time.sleep(0.5) # Small delay to show the progress
328
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "freeplan")
329
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
330
+
331
+ # Process each file with its individual metadata
332
+ for i, file_path in enumerate(files):
333
+ try:
334
+ filename = os.path.basename(file_path)
335
+ progress((i / total_files), desc=f"📄 Processing {filename}... ({i+1}/{total_files})")
336
+
337
+ # Get metadata for this specific file (3 fields per file: sections, keywords, description)
338
+ sections_idx = i * 3
339
+ keywords_idx = i * 3 + 1
340
+ description_idx = i * 3 + 2
341
+
342
+ if sections_idx < len(metadata_inputs):
343
+ sections = metadata_inputs[sections_idx] or ""
344
+ keywords = metadata_inputs[keywords_idx] or ""
345
+ description = metadata_inputs[description_idx] or ""
346
+ else:
347
+ sections = keywords = description = ""
348
+
349
+ # Skip if no metadata provided for this file
350
+ if not sections.strip() and not keywords.strip() and not description.strip():
351
+ errors.append({
352
+ "filename": filename,
353
+ "error": "❌ Error: No metadata provided"
354
+ })
355
+ continue
356
+
357
+ # Prepare metadata for this file
358
+ progress((i / total_files), desc=f"🏷️ Preparing metadata for {filename}...")
359
+ metadata = {
360
+ "sections": [s.strip() for s in sections.split(",") if s.strip()],
361
+ "keywords": [k.strip() for k in keywords.split(",") if k.strip()],
362
+ "description": description.strip()
363
+ }
364
+
365
+ # Copy to uploads directory
366
+ progress((i / total_files), desc=f"📁 Copying {filename} to uploads...")
367
+ destination_path = os.path.join(UPLOAD_FOLDER, filename)
368
+ shutil.copy2(file_path, destination_path)
369
+
370
+ # Upload to Pinecone Assistant
371
+ progress((i / total_files), desc=f"☁️ Uploading {filename} to Pinecone...")
372
+ response = assistant.upload_file(
373
+ file_path=destination_path,
374
+ metadata=metadata,
375
+ timeout=None
376
+ )
377
+
378
+ results.append({
379
+ "filename": filename,
380
+ "status": "✅ Success",
381
+ "metadata": metadata,
382
+ "response": str(response)
383
+ })
384
+
385
+ except Exception as file_error:
386
+ errors.append({
387
+ "filename": os.path.basename(file_path),
388
+ "error": f"❌ Error: {str(file_error)}"
389
+ })
390
+
391
+ # Final progress update
392
+ progress(1.0, desc="✅ Processing complete!")
393
+ time.sleep(0.5)
394
+
395
+ # Format results for display
396
+ success_count = len(results)
397
+ error_count = len(errors)
398
+
399
+ status_message = f"📊 **Processing Complete**\n\n"
400
+ status_message += f"✅ **Successful uploads:** {success_count}\n"
401
+ status_message += f"❌ **Failed uploads:** {error_count}\n"
402
+ status_message += f"📁 **Total files processed:** {len(files)}\n\n"
403
+
404
+ # Detailed results
405
+ detailed_results = "## 📋 **Detailed Results**\n\n"
406
+
407
+ if results:
408
+ detailed_results += "### ✅ **Successful Uploads:**\n"
409
+ for result in results:
410
+ detailed_results += f"- **{result['filename']}**\n"
411
+ detailed_results += f" - Sections: {', '.join(result['metadata']['sections'])}\n"
412
+ detailed_results += f" - Keywords: {', '.join(result['metadata']['keywords'])}\n"
413
+ detailed_results += f" - Description: {result['metadata']['description']}\n\n"
414
+
415
+ if errors:
416
+ detailed_results += "### ❌ **Failed Uploads:**\n"
417
+ for error in errors:
418
+ detailed_results += f"- **{error['filename']}** - {error['error']}\n"
419
+
420
+ return status_message, detailed_results, "✅ **Processing completed successfully!**"
421
+
422
+ except Exception as e:
423
+ error_msg = f"❌ **Critical Error:** {str(e)}"
424
+ return error_msg, "", "❌ **Processing failed with error**"
425
+
426
+ def update_metadata_fields(files):
427
+ """Update metadata fields based on uploaded files"""
428
+ if not files:
429
+ return [gr.update(visible=False)] * 30 # Hide all fields
430
+
431
+ if len(files) > 10:
432
+ # Show error and hide all fields
433
+ return [gr.update(visible=False)] * 30
434
+
435
+ updates = []
436
+ for i in range(len(files)):
437
+ if i < len(files):
438
+ filename = os.path.basename(files[i])
439
+ # Show 3 fields per file (sections, keywords, description)
440
+ updates.extend([
441
+ gr.update(visible=True, label=f"📑 Sections for {filename}", placeholder="e.g., Case Summary, Evidence, Legal Arguments"),
442
+ gr.update(visible=True, label=f"🔍 Keywords for {filename}", placeholder="e.g., litigation, court, plaintiff, defendant"),
443
+ gr.update(visible=True, label=f"📝 Description for {filename}", placeholder="Brief description of this document")
444
+ ])
445
+
446
+ # Hide remaining fields
447
+ while len(updates) < 30:
448
+ updates.append(gr.update(visible=False))
449
+
450
+ return updates[:30]
451
+
452
+ def clear_form():
453
+ """Clear all form fields"""
454
+ return [None] + [""] * 30 + ["", "", "🟢 **Ready to process documents**"]
455
+
456
+ def clear_delete_form():
457
+ """Clear delete form"""
458
+ return gr.update(value=[]), "", ""
459
+
460
+ def start_processing():
461
+ """Show processing started status"""
462
+ return "🔄 **Processing documents... Please wait**"
463
+
464
+ def finish_processing():
465
+ """Show processing finished status"""
466
+ return "✅ **Processing completed successfully!**"
467
+
468
+ def create_main_interface():
469
+ """Create the main application interface"""
470
+ with gr.Blocks(
471
+ title="📄 LITIGATION RESEARCH DOC SYSTEM",
472
+ theme=gr.themes.Soft(),
473
+ css="""
474
+ .gradio-container {
475
+ max-width: 1400px !important;
476
+ margin: auto;
477
+ }
478
+ .upload-container {
479
+ border: 2px dashed #4CAF50;
480
+ border-radius: 10px;
481
+ padding: 20px;
482
+ text-align: center;
483
+ background-color: #f8f9fa;
484
+ }
485
+ .delete-container {
486
+ border: 2px dashed #f44336;
487
+ border-radius: 10px;
488
+ padding: 20px;
489
+ background-color: #ffebee;
490
+ }
491
+ .tab-nav {
492
+ margin-bottom: 20px;
493
+ }
494
+ """
495
+ ) as app:
496
+
497
+ gr.Markdown(
498
+ """
499
+ # 📄 LITIGATION RESEARCH DOC SYSTEM
500
+
501
+ Upload, manage, and delete documents in the Pinecone Assistant for litigation research processing.
502
+
503
+ ## 🚀 Features:
504
+ - ✅ **Multiple file upload** - Select and upload multiple documents at once
505
+ - 🏷️ **Metadata tagging** - Add sections, keywords, and descriptions
506
+ - 🔄 **Batch processing** - All files processed with individual metadata
507
+ - 🗑️ **File deletion** - Delete multiple files by selecting from dropdown
508
+ - 📊 **File management** - View uploaded files with timestamps and metadata
509
+ - 📋 **Detailed reporting** - See success/failure status for each operation
510
+
511
+ ---
512
+ """
513
+ )
514
+
515
+ # Create tabs for different functionalities
516
+ with gr.Tabs() as tabs:
517
+
518
+ # Tab 1: Upload Documents
519
+ with gr.TabItem("📤 Upload Documents", id="upload_tab"):
520
+ with gr.Row():
521
+ with gr.Column(scale=1):
522
+ gr.Markdown("### 📁 **File Upload**")
523
+ files_input = gr.File(
524
+ label="Select Documents (Max 10 files)",
525
+ file_count="multiple",
526
+ file_types=[".pdf", ".doc", ".docx", ".txt"],
527
+ elem_classes=["upload-container"]
528
+ )
529
+
530
+ with gr.Column(scale=1):
531
+ gr.Markdown("### 🏷️ **Document Metadata (Individual for Each File)**")
532
+ gr.Markdown("*Upload files first, then metadata fields will appear for each document*")
533
+
534
+ # Dynamic metadata fields container
535
+ with gr.Column() as metadata_container:
536
+ # Create 30 text fields (enough for 10 files with 3 fields each)
537
+ metadata_fields = []
538
+ for i in range(30):
539
+ field = gr.Textbox(
540
+ label=f"Field {i}",
541
+ placeholder="",
542
+ visible=False,
543
+ lines=2
544
+ )
545
+ metadata_fields.append(field)
546
+
547
+ with gr.Row():
548
+ with gr.Column(scale=1):
549
+ upload_btn = gr.Button(
550
+ "🚀 Upload Documents to Pinecone Assistant",
551
+ variant="primary",
552
+ size="lg"
553
+ )
554
+
555
+ with gr.Column(scale=1):
556
+ clear_btn = gr.Button(
557
+ "🗑️ Clear Form",
558
+ variant="secondary",
559
+ size="lg"
560
+ )
561
+
562
+ # Processing status indicator
563
+ with gr.Row():
564
+ processing_status = gr.Markdown(
565
+ value="🟢 **Ready to process documents**",
566
+ visible=True
567
+ )
568
+
569
+ gr.Markdown("---")
570
+
571
+ # Results section
572
+ with gr.Row():
573
+ with gr.Column():
574
+ status_output = gr.Markdown(
575
+ label="📊 Upload Status",
576
+ value="*Ready to upload documents...*"
577
+ )
578
+
579
+ with gr.Row():
580
+ with gr.Column():
581
+ results_output = gr.Markdown(
582
+ label="📋 Detailed Results",
583
+ value="",
584
+ max_height=400 # Add height limit for scrolling
585
+ )
586
+
587
+ # Tab 2: Delete Documents
588
+ with gr.TabItem("🗑️ Delete Documents", id="delete_tab"):
589
+ gr.Markdown("### 🗑️ **Delete Multiple Documents**")
590
+ gr.Markdown("Select multiple files from the dropdown to delete them from the Pinecone Assistant.")
591
+
592
+ with gr.Row():
593
+ with gr.Column(scale=2):
594
+ file_dropdown = gr.Dropdown(
595
+ label="📋 Select Files to Delete (Multiple Selection)",
596
+ choices=[],
597
+ multiselect=True,
598
+ interactive=False,
599
+ elem_classes=["delete-container"]
600
+ )
601
+
602
+ with gr.Column(scale=1):
603
+ refresh_dropdown_btn = gr.Button(
604
+ "🔄 Refresh File List",
605
+ variant="secondary",
606
+ size="lg"
607
+ )
608
+
609
+ with gr.Row():
610
+ with gr.Column(scale=1):
611
+ delete_btn = gr.Button(
612
+ "🗑️ Delete Selected Files",
613
+ variant="stop",
614
+ size="lg"
615
+ )
616
+
617
+ with gr.Column(scale=1):
618
+ clear_delete_btn = gr.Button(
619
+ "↺ Clear Selection",
620
+ variant="secondary",
621
+ size="lg"
622
+ )
623
+
624
+ gr.Markdown("---")
625
+
626
+ # Delete results section
627
+ with gr.Row():
628
+ with gr.Column():
629
+ delete_status_output = gr.Markdown(
630
+ label="📊 Deletion Status",
631
+ value="*Select files to delete...*"
632
+ )
633
+
634
+ with gr.Row():
635
+ with gr.Column():
636
+ delete_results_output = gr.Markdown(
637
+ label="🗑️ Deletion Results",
638
+ value="",
639
+ max_height=400
640
+ )
641
+
642
+ # Tab 3: View Uploaded Files
643
+ with gr.TabItem("📋 View Uploaded Files", id="view_tab"):
644
+ gr.Markdown("### 📋 **Uploaded Files Management**")
645
+ gr.Markdown("View all files currently uploaded to the Pinecone Assistant with their metadata and timestamps.")
646
+
647
+ with gr.Row():
648
+ refresh_btn = gr.Button(
649
+ "🔄 Fetch Files",
650
+ variant="primary",
651
+ size="lg"
652
+ )
653
+
654
+ # File list status
655
+ with gr.Row():
656
+ file_list_status = gr.Markdown(
657
+ value="🟡 **Click 'Fetch Files' to load uploaded files**",
658
+ visible=True
659
+ )
660
+
661
+ gr.Markdown("---")
662
+
663
+ # Pagination controls
664
+ with gr.Row():
665
+ prev_btn = gr.Button(
666
+ "⬅️ Previous 100",
667
+ variant="secondary",
668
+ visible=False
669
+ )
670
+ pagination_info = gr.Markdown(
671
+ value="📄 Page 1 of 1 | Total: 0 files",
672
+ elem_classes=["pagination-info"]
673
+ )
674
+ next_btn = gr.Button(
675
+ "Next 100 ➡️",
676
+ variant="secondary",
677
+ visible=False
678
+ )
679
+
680
+ # File list results
681
+ with gr.Row():
682
+ with gr.Column(scale=1):
683
+ file_summary = gr.Markdown(
684
+ label="📊 Files Summary",
685
+ value="*Click refresh to load file summary...*"
686
+ )
687
+
688
+ with gr.Row():
689
+ with gr.Column():
690
+ file_details = gr.Markdown(
691
+ label="📋 File Details",
692
+ value="*Click refresh to load file details...*",
693
+ max_height=600 # Add height limit for scrolling
694
+ )
695
+
696
+ # Event handlers for Upload tab
697
+
698
+ # Update metadata fields when files are uploaded
699
+ files_input.change(
700
+ fn=update_metadata_fields,
701
+ inputs=[files_input],
702
+ outputs=metadata_fields
703
+ )
704
+
705
+ # Show processing status when upload starts
706
+ upload_btn.click(
707
+ fn=start_processing,
708
+ outputs=[processing_status]
709
+ ).then(
710
+ fn=process_files_with_progress,
711
+ inputs=[files_input] + metadata_fields,
712
+ outputs=[status_output, results_output, processing_status]
713
+ )
714
+
715
+ clear_btn.click(
716
+ fn=clear_form,
717
+ outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status]
718
+ )
719
+
720
+ # Event handlers for Delete tab
721
+
722
+ # Refresh dropdown with current files
723
+ refresh_dropdown_btn.click(
724
+ fn=refresh_delete_dropdown,
725
+ outputs=[file_dropdown]
726
+ )
727
+
728
+ # Delete selected files
729
+ delete_btn.click(
730
+ fn=delete_selected_files,
731
+ inputs=[file_dropdown],
732
+ outputs=[delete_status_output, delete_results_output]
733
+ )
734
+
735
+ # Clear delete form
736
+ clear_delete_btn.click(
737
+ fn=clear_delete_form,
738
+ outputs=[file_dropdown, delete_status_output, delete_results_output]
739
+ )
740
+
741
+ # Event handlers for View Files tab
742
+
743
+ # Fetch files - load page 1
744
+ refresh_btn.click(
745
+ fn=refresh_file_list,
746
+ outputs=[file_list_status]
747
+ ).then(
748
+ fn=list_uploaded_files_paginated,
749
+ inputs=[],
750
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
751
+ )
752
+
753
+ # Next page button
754
+ next_btn.click(
755
+ fn=load_next_page,
756
+ inputs=[pagination_info],
757
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
758
+ )
759
+
760
+ # Previous page button
761
+ prev_btn.click(
762
+ fn=load_prev_page,
763
+ inputs=[pagination_info],
764
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
765
+ )
766
+
767
+ # Footer
768
+ gr.Markdown(
769
+ """
770
+ ---
771
+
772
+ ### 💡 **Usage Tips:**
773
+
774
+ **Upload Documents:**
775
+ - Select up to 10 PDF, DOC, DOCX, or TXT files at once
776
+ - Upload files first, then fill individual metadata for each document
777
+ - Each file gets its own sections, keywords, and description
778
+ - Check the results section for upload status
779
+
780
+ **Delete Documents:**
781
+ - Click 'Refresh File List' to load current files in dropdown
782
+ - Select multiple files using the dropdown (supports multi-select)
783
+ - Click 'Delete Selected Files' to remove them permanently
784
+ - View deletion results for success/failure status
785
+
786
+ **View Uploaded Files:**
787
+ - Click 'Fetch Files' to see all uploaded files
788
+ - View file details including upload timestamps and metadata
789
+ - Files are sorted by most recent first
790
+ - Use pagination to navigate through large file lists
791
+
792
+ ### ⚠️ **Important Notes:**
793
+ - File deletion is **permanent** and cannot be undone
794
+ - Always verify your selection before deleting files
795
+ - The system maps file titles to IDs internally for deletion
796
+
797
+ ### 📞 **Support:**
798
+ For issues or questions, contact the development team.
799
+ """
800
+ )
801
+
802
+ return app
803
+
804
+ def create_login_interface():
805
+ """Create the login interface"""
806
+ with gr.Blocks(
807
+ title="🔐 Authentication Required",
808
+ theme=gr.themes.Soft(),
809
+ css="""
810
+ .gradio-container {
811
+ max-width: 500px !important;
812
+ margin: auto;
813
+ padding-top: 100px;
814
+ }
815
+ .login-container {
816
+ border: 2px solid #2196F3;
817
+ border-radius: 15px;
818
+ padding: 30px;
819
+ text-align: center;
820
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
821
+ box-shadow: 0 10px 20px rgba(0,0,0,0.1);
822
+ }
823
+ .password-input {
824
+ margin: 20px 0;
825
+ }
826
+ .login-button {
827
+ margin-top: 15px;
828
+ }
829
+ .error-message {
830
+ color: #f44336;
831
+ font-weight: bold;
832
+ margin-top: 10px;
833
+ }
834
+ """
835
+ ) as login_app:
836
+
837
+ with gr.Column(elem_classes=["login-container"]):
838
+ gr.Markdown(
839
+ """
840
+ # 🔐 **LITIGATION RESEARCH DOC SYSTEM**
841
+ ## **Authentication Required**
842
+
843
+ Please enter the password to access the application.
844
+
845
+ ---
846
+ """
847
+ )
848
+
849
+ password_input = gr.Textbox(
850
+ label="🔑 Password",
851
+ type="password",
852
+ placeholder="Enter password to access the system",
853
+ elem_classes=["password-input"]
854
+ )
855
+
856
+ login_btn = gr.Button(
857
+ "🚀 Login",
858
+ variant="primary",
859
+ size="lg",
860
+ elem_classes=["login-button"]
861
+ )
862
+
863
+ error_message = gr.Markdown(
864
+ value="",
865
+ visible=False,
866
+ elem_classes=["error-message"]
867
+ )
868
+
869
+ gr.Markdown(
870
+ """
871
+ ---
872
+
873
+ ### 🛡️ **Security Notice:**
874
+ - This system contains sensitive litigation research documentation
875
+ - Authorized access only
876
+ - All activities are logged
877
+
878
+ ### 📞 **Need Access?**
879
+ Contact your system administrator for credentials.
880
+ """
881
+ )
882
+
883
+ return login_app, password_input, login_btn, error_message
884
+ def main():
885
+ """Main application with authentication — single Gradio Blocks app.
886
+
887
+ This builds both the login UI and the main application UI inside one
888
+ `gr.Blocks` so we only call `.launch()` once. The main UI is hidden
889
+ until authentication succeeds. This avoids launching two separate
890
+ Gradio servers which was causing the server to close unexpectedly.
891
+ """
892
+
893
+ # Reuse same theme/CSS as main app for consistency
894
+ app_css = """
895
+ .gradio-container {
896
+ max-width: 1400px !important;
897
+ margin: auto;
898
+ }
899
+ .upload-container {
900
+ border: 2px dashed #4CAF50;
901
+ border-radius: 10px;
902
+ padding: 20px;
903
+ text-align: center;
904
+ background-color: #f8f9fa;
905
+ }
906
+ .delete-container {
907
+ border: 2px dashed #f44336;
908
+ border-radius: 10px;
909
+ padding: 20px;
910
+ background-color: #ffebee;
911
+ }
912
+ .login-container {
913
+ border: 2px solid #2196F3;
914
+ border-radius: 15px;
915
+ padding: 30px;
916
+ text-align: center;
917
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
918
+ box-shadow: 0 10px 20px rgba(0,0,0,0.1);
919
+ }
920
+ """
921
+
922
+ with gr.Blocks(title="LITIGATION RESEARCH DOC SYSTEM", theme=gr.themes.Soft(), css=app_css) as app:
923
+
924
+ # --- LOGIN CONTAINER (visible initially) ---
925
+ with gr.Column(elem_classes=["login-container"]) as login_container:
926
+ gr.Markdown("""
927
+ # LITIGATION RESEARCH DOC SYSTEM
928
+ Authentication required
929
+ """)
930
+
931
+ password_input = gr.Textbox(
932
+ label="Password",
933
+ type="password",
934
+ placeholder="Enter password"
935
+ )
936
+
937
+ login_btn = gr.Button("Login", variant="primary", size="lg")
938
+
939
+ error_message = gr.Markdown(value="", visible=False)
940
+
941
+ # --- MAIN APP CONTAINER (hidden until auth) ---
942
+ with gr.Column(visible=False) as main_container:
943
+ # Insert main UI contents (slimmed text, no emojis)
944
+ gr.Markdown(
945
+ """
946
+ # LITIGATION RESEARCH DOC SYSTEM
947
+
948
+ Upload, manage, and delete documents in the Pinecone Assistant.
949
+ """
950
+ )
951
+
952
+ with gr.Tabs() as tabs:
953
+ # Upload Tab
954
+ with gr.TabItem("Upload Documents", id="upload_tab"):
955
+ with gr.Row():
956
+ with gr.Column(scale=1):
957
+ gr.Markdown("### File Upload")
958
+ files_input = gr.File(
959
+ label="Select Documents (Max 10 files)",
960
+ file_count="multiple",
961
+ file_types=[".pdf", ".doc", ".docx", ".txt"],
962
+ elem_classes=["upload-container"]
963
+ )
964
+ with gr.Column(scale=1):
965
+ gr.Markdown("### Document Metadata")
966
+
967
+ with gr.Column() as metadata_container:
968
+ metadata_fields = []
969
+ for i in range(30):
970
+ field = gr.Textbox(label=f"Field {i}", placeholder="", visible=False, lines=2)
971
+ metadata_fields.append(field)
972
+
973
+ with gr.Row():
974
+ with gr.Column(scale=1):
975
+ upload_btn = gr.Button("Upload Documents to Pinecone Assistant", variant="primary")
976
+ with gr.Column(scale=1):
977
+ clear_btn = gr.Button("Clear Form", variant="secondary")
978
+
979
+ processing_status = gr.Markdown(value="Ready to process documents", visible=True)
980
+
981
+ status_output = gr.Markdown(label="Upload Status", value="Ready to upload documents...")
982
+ results_output = gr.Markdown(label="Detailed Results", value="", max_height=400)
983
+
984
+ # Delete Tab
985
+ with gr.TabItem("Delete Documents", id="delete_tab"):
986
+ gr.Markdown("### Delete Documents")
987
+ with gr.Row():
988
+ with gr.Column(scale=2):
989
+ file_dropdown = gr.Dropdown(label="Select Files to Delete", choices=[], multiselect=True, interactive=False, elem_classes=["delete-container"])
990
+ with gr.Column(scale=1):
991
+ refresh_dropdown_btn = gr.Button("Refresh File List", variant="secondary")
992
+
993
+ with gr.Row():
994
+ with gr.Column(scale=1):
995
+ delete_btn = gr.Button("Delete Selected Files", variant="stop")
996
+ with gr.Column(scale=1):
997
+ clear_delete_btn = gr.Button("Clear Selection", variant="secondary")
998
+
999
+ delete_status_output = gr.Markdown(label="Deletion Status", value="Select files to delete...")
1000
+ delete_results_output = gr.Markdown(label="Deletion Results", value="", max_height=400)
1001
+
1002
+ # View Tab
1003
+ with gr.TabItem("View Uploaded Files", id="view_tab"):
1004
+ gr.Markdown("### Uploaded Files Management")
1005
+ with gr.Row():
1006
+ refresh_btn = gr.Button("Fetch Files", variant="primary")
1007
+
1008
+ file_list_status = gr.Markdown(value="Click 'Fetch Files' to load uploaded files", visible=True)
1009
+
1010
+ with gr.Row():
1011
+ prev_btn = gr.Button("Previous 100", variant="secondary", visible=False)
1012
+ pagination_info = gr.Markdown(value="Page 1 of 1 | Total: 0 files")
1013
+ next_btn = gr.Button("Next 100", variant="secondary", visible=False)
1014
+
1015
+ file_summary = gr.Markdown(label="Files Summary", value="Click refresh to load file summary...")
1016
+ file_details = gr.Markdown(label="File Details", value="Click refresh to load file details...", max_height=600)
1017
+
1018
+ # --- EVENT HANDLERS ---
1019
+ # Login handlers: show main_container on success, show error on failure
1020
+ def handle_login(password):
1021
+ if authenticate(password):
1022
+ return gr.update(visible=False), gr.update(value="", visible=False), gr.update(visible=True)
1023
+ else:
1024
+ return gr.update(visible=True), gr.update(value="Invalid password. Please try again.", visible=True), gr.update(visible=False)
1025
+
1026
+ login_btn.click(fn=handle_login, inputs=[password_input], outputs=[login_container, error_message, main_container])
1027
+ password_input.submit(fn=handle_login, inputs=[password_input], outputs=[login_container, error_message, main_container])
1028
+
1029
+ # Wiring existing handlers from earlier in the file
1030
+ files_input.change(fn=update_metadata_fields, inputs=[files_input], outputs=metadata_fields)
1031
+
1032
+ upload_btn.click(fn=start_processing, outputs=[processing_status]).then(
1033
+ fn=process_files_with_progress,
1034
+ inputs=[files_input] + metadata_fields,
1035
+ outputs=[status_output, results_output, processing_status]
1036
+ )
1037
+
1038
+ clear_btn.click(fn=clear_form, outputs=[files_input] + metadata_fields + [status_output, results_output, processing_status])
1039
+
1040
+ refresh_dropdown_btn.click(fn=refresh_delete_dropdown, outputs=[file_dropdown])
1041
+ delete_btn.click(fn=delete_selected_files, inputs=[file_dropdown], outputs=[delete_status_output, delete_results_output])
1042
+ clear_delete_btn.click(fn=clear_delete_form, outputs=[file_dropdown, delete_status_output, delete_results_output])
1043
+
1044
+ refresh_btn.click(fn=refresh_file_list, outputs=[file_list_status]).then(
1045
+ fn=list_uploaded_files_paginated,
1046
+ inputs=[],
1047
+ outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn]
1048
+ )
1049
+
1050
+ next_btn.click(fn=load_next_page, inputs=[pagination_info], outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn])
1051
+ prev_btn.click(fn=load_prev_page, inputs=[pagination_info], outputs=[file_summary, file_details, pagination_info, prev_btn, next_btn])
1052
+
1053
+ # Launch single app
1054
+ app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True, show_error=True)
1055
+
1056
+ if __name__ == "__main__":
1057
+ main()