datafreak commited on
Commit
048c298
Β·
verified Β·
1 Parent(s): 126b58a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -69
app.py CHANGED
@@ -5,47 +5,74 @@ from pinecone import Pinecone
5
  from typing import List, Tuple
6
  import tempfile
7
  import shutil
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Initialize Pinecone
10
- pc = Pinecone(api_key="pcsk_4CboGg_BNMrddoKLGxfrzFLhequEQ7DmTCzT2BYXpiefUBHUKeLKXhbbmozifeVJiVWXrv")
 
11
 
12
  # Create uploads directory
13
  UPLOAD_FOLDER = "uploads"
14
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
15
 
16
- def process_files(files, sections, keywords, description):
17
- """Process multiple files and upload to Pinecone Assistant"""
18
  if not files:
19
- return "❌ Error: No files selected", "", ""
20
-
21
- if not sections.strip():
22
- return "❌ Error: Sections field is required", "", ""
23
 
24
- if not keywords.strip():
25
- return "❌ Error: Keywords field is required", "", ""
26
-
27
- if not description.strip():
28
- return "❌ Error: Description field is required", "", ""
29
 
30
  try:
31
  results = []
32
  errors = []
33
 
34
- # Prepare metadata (same for all files)
35
- metadata = {
36
- "sections": [s.strip() for s in sections.split(",") if s.strip()],
37
- "keywords": [k.strip() for k in keywords.split(",") if k.strip()],
38
- "description": description.strip()
39
- }
40
-
41
  # Initialize Pinecone Assistant
42
- assistant = pc.assistant.Assistant(assistant_name="gstminutes")
 
43
 
44
- for file_path in files:
 
45
  try:
46
- # Get filename from path
47
  filename = os.path.basename(file_path)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Copy to uploads directory
50
  destination_path = os.path.join(UPLOAD_FOLDER, filename)
51
  shutil.copy2(file_path, destination_path)
@@ -60,6 +87,7 @@ def process_files(files, sections, keywords, description):
60
  results.append({
61
  "filename": filename,
62
  "status": "βœ… Success",
 
63
  "response": str(response)
64
  })
65
 
@@ -84,30 +112,51 @@ def process_files(files, sections, keywords, description):
84
  if results:
85
  detailed_results += "### βœ… **Successful Uploads:**\n"
86
  for result in results:
87
- detailed_results += f"- **{result['filename']}** - {result['status']}\n"
88
- detailed_results += "\n"
 
 
89
 
90
  if errors:
91
  detailed_results += "### ❌ **Failed Uploads:**\n"
92
  for error in errors:
93
  detailed_results += f"- **{error['filename']}** - {error['error']}\n"
94
- detailed_results += "\n"
95
 
96
- # Metadata info
97
- metadata_info = "## 🏷️ **Applied Metadata**\n\n"
98
- metadata_info += f"**Sections:** {', '.join(metadata['sections'])}\n\n"
99
- metadata_info += f"**Keywords:** {', '.join(metadata['keywords'])}\n\n"
100
- metadata_info += f"**Description:** {metadata['description']}\n"
101
-
102
- return status_message, detailed_results, metadata_info
103
 
104
  except Exception as e:
105
  error_msg = f"❌ **Critical Error:** {str(e)}"
106
- return error_msg, "", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def clear_form():
109
  """Clear all form fields"""
110
- return None, "", "", "", "", "", ""
111
 
112
  # Create Gradio interface
113
  with gr.Blocks(
@@ -148,35 +197,28 @@ with gr.Blocks(
148
  with gr.Column(scale=1):
149
  gr.Markdown("### πŸ“ **File Upload**")
150
  files_input = gr.File(
151
- label="Select Documents",
152
  file_count="multiple",
153
  file_types=[".pdf", ".doc", ".docx", ".txt"],
154
  elem_classes=["upload-container"]
155
  )
156
 
157
  with gr.Column(scale=1):
158
- gr.Markdown("### 🏷️ **Document Metadata**")
159
-
160
- sections_input = gr.Textbox(
161
- label="πŸ“‘ Sections (comma-separated)",
162
- placeholder="e.g., Introduction, Financial Data, Compliance, Summary",
163
- lines=2,
164
- info="Enter document sections separated by commas"
165
- )
166
-
167
- keywords_input = gr.Textbox(
168
- label="πŸ” Keywords (comma-separated)",
169
- placeholder="e.g., GST, tax, compliance, revenue, audit",
170
- lines=2,
171
- info="Enter relevant keywords separated by commas"
172
- )
173
-
174
- description_input = gr.Textbox(
175
- label="πŸ“ Description",
176
- placeholder="Brief description of the document(s) purpose and content",
177
- lines=3,
178
- info="Provide a clear description of the document content"
179
  )
 
180
 
181
  with gr.Row():
182
  with gr.Column(scale=1):
@@ -204,28 +246,30 @@ with gr.Blocks(
204
  )
205
 
206
  with gr.Row():
207
- with gr.Column(scale=1):
208
  results_output = gr.Markdown(
209
  label="πŸ“‹ Detailed Results",
210
  value=""
211
  )
212
-
213
- with gr.Column(scale=1):
214
- metadata_output = gr.Markdown(
215
- label="🏷️ Applied Metadata",
216
- value=""
217
- )
218
 
219
  # Event handlers
 
 
 
 
 
 
 
 
220
  upload_btn.click(
221
  fn=process_files,
222
- inputs=[files_input, sections_input, keywords_input, description_input],
223
- outputs=[status_output, results_output, metadata_output]
224
  )
225
 
226
  clear_btn.click(
227
  fn=clear_form,
228
- outputs=[files_input, sections_input, keywords_input, description_input, status_output, results_output, metadata_output]
229
  )
230
 
231
  # Footer
@@ -234,9 +278,9 @@ with gr.Blocks(
234
  ---
235
 
236
  ### πŸ’‘ **Usage Tips:**
237
- - Select multiple PDF, DOC, DOCX, or TXT files
238
- - Use descriptive sections and keywords for better organization
239
- - All selected files will use the same metadata
240
  - Check the results section for upload status
241
 
242
  ### πŸ“ž **Support:**
 
5
  from typing import List, Tuple
6
  import tempfile
7
  import shutil
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Validate required environment variables
14
+ required_env_vars = ["PINECONE_API_KEY"]
15
+ missing_vars = [var for var in required_env_vars if not os.getenv(var)]
16
+
17
+ if missing_vars:
18
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
19
 
20
  # Initialize Pinecone
21
+ pinecone_api_key = os.getenv("PINECONE_API_KEY")
22
+ pc = Pinecone(api_key=pinecone_api_key)
23
 
24
  # Create uploads directory
25
  UPLOAD_FOLDER = "uploads"
26
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
27
 
28
+ def process_files(files, *metadata_inputs):
29
+ """Process multiple files with individual metadata for each file"""
30
  if not files:
31
+ return "❌ Error: No files selected", ""
 
 
 
32
 
33
+ if len(files) > 10:
34
+ return "❌ Error: Maximum 10 files allowed at a time", ""
 
 
 
35
 
36
  try:
37
  results = []
38
  errors = []
39
 
 
 
 
 
 
 
 
40
  # Initialize Pinecone Assistant
41
+ assistant_name = os.getenv("PINECONE_ASSISTANT_NAME", "gstminutes")
42
+ assistant = pc.assistant.Assistant(assistant_name=assistant_name)
43
 
44
+ # Process each file with its individual metadata
45
+ for i, file_path in enumerate(files):
46
  try:
 
47
  filename = os.path.basename(file_path)
48
 
49
+ # Get metadata for this specific file (3 fields per file: sections, keywords, description)
50
+ sections_idx = i * 3
51
+ keywords_idx = i * 3 + 1
52
+ description_idx = i * 3 + 2
53
+
54
+ if sections_idx < len(metadata_inputs):
55
+ sections = metadata_inputs[sections_idx] or ""
56
+ keywords = metadata_inputs[keywords_idx] or ""
57
+ description = metadata_inputs[description_idx] or ""
58
+ else:
59
+ sections = keywords = description = ""
60
+
61
+ # Skip if no metadata provided for this file
62
+ if not sections.strip() and not keywords.strip() and not description.strip():
63
+ errors.append({
64
+ "filename": filename,
65
+ "error": "❌ Error: No metadata provided"
66
+ })
67
+ continue
68
+
69
+ # Prepare metadata for this file
70
+ metadata = {
71
+ "sections": [s.strip() for s in sections.split(",") if s.strip()],
72
+ "keywords": [k.strip() for k in keywords.split(",") if k.strip()],
73
+ "description": description.strip()
74
+ }
75
+
76
  # Copy to uploads directory
77
  destination_path = os.path.join(UPLOAD_FOLDER, filename)
78
  shutil.copy2(file_path, destination_path)
 
87
  results.append({
88
  "filename": filename,
89
  "status": "βœ… Success",
90
+ "metadata": metadata,
91
  "response": str(response)
92
  })
93
 
 
112
  if results:
113
  detailed_results += "### βœ… **Successful Uploads:**\n"
114
  for result in results:
115
+ detailed_results += f"- **{result['filename']}**\n"
116
+ detailed_results += f" - Sections: {', '.join(result['metadata']['sections'])}\n"
117
+ detailed_results += f" - Keywords: {', '.join(result['metadata']['keywords'])}\n"
118
+ detailed_results += f" - Description: {result['metadata']['description']}\n\n"
119
 
120
  if errors:
121
  detailed_results += "### ❌ **Failed Uploads:**\n"
122
  for error in errors:
123
  detailed_results += f"- **{error['filename']}** - {error['error']}\n"
 
124
 
125
+ return status_message, detailed_results
 
 
 
 
 
 
126
 
127
  except Exception as e:
128
  error_msg = f"❌ **Critical Error:** {str(e)}"
129
+ return error_msg, ""
130
+
131
+ def update_metadata_fields(files):
132
+ """Update metadata fields based on uploaded files"""
133
+ if not files:
134
+ return [gr.update(visible=False)] * 30 # Hide all fields
135
+
136
+ if len(files) > 10:
137
+ # Show error and hide all fields
138
+ return [gr.update(visible=False)] * 30
139
+
140
+ updates = []
141
+ for i in range(len(files)):
142
+ if i < len(files):
143
+ filename = os.path.basename(files[i])
144
+ # Show 3 fields per file (sections, keywords, description)
145
+ updates.extend([
146
+ gr.update(visible=True, label=f"πŸ“‘ Sections for {filename}", placeholder="e.g., Introduction, Financial Data, Compliance"),
147
+ gr.update(visible=True, label=f"πŸ” Keywords for {filename}", placeholder="e.g., GST, tax, compliance, revenue"),
148
+ gr.update(visible=True, label=f"πŸ“ Description for {filename}", placeholder="Brief description of this document")
149
+ ])
150
+
151
+ # Hide remaining fields
152
+ while len(updates) < 30:
153
+ updates.append(gr.update(visible=False))
154
+
155
+ return updates[:30]
156
 
157
  def clear_form():
158
  """Clear all form fields"""
159
+ return [None] + [""] * 30 + ["", ""]
160
 
161
  # Create Gradio interface
162
  with gr.Blocks(
 
197
  with gr.Column(scale=1):
198
  gr.Markdown("### πŸ“ **File Upload**")
199
  files_input = gr.File(
200
+ label="Select Documents (Max 10 files)",
201
  file_count="multiple",
202
  file_types=[".pdf", ".doc", ".docx", ".txt"],
203
  elem_classes=["upload-container"]
204
  )
205
 
206
  with gr.Column(scale=1):
207
+ gr.Markdown("### 🏷️ **Document Metadata (Individual for Each File)**")
208
+ gr.Markdown("*Upload files first, then metadata fields will appear for each document*")
209
+
210
+ # Dynamic metadata fields container
211
+ with gr.Column() as metadata_container:
212
+ # Create 30 text fields (enough for 10 files with 3 fields each)
213
+ metadata_fields = []
214
+ for i in range(30):
215
+ field = gr.Textbox(
216
+ label=f"Field {i}",
217
+ placeholder="",
218
+ visible=False,
219
+ lines=2
 
 
 
 
 
 
 
 
220
  )
221
+ metadata_fields.append(field)
222
 
223
  with gr.Row():
224
  with gr.Column(scale=1):
 
246
  )
247
 
248
  with gr.Row():
249
+ with gr.Column():
250
  results_output = gr.Markdown(
251
  label="πŸ“‹ Detailed Results",
252
  value=""
253
  )
 
 
 
 
 
 
254
 
255
  # Event handlers
256
+
257
+ # Update metadata fields when files are uploaded
258
+ files_input.change(
259
+ fn=update_metadata_fields,
260
+ inputs=[files_input],
261
+ outputs=metadata_fields
262
+ )
263
+
264
  upload_btn.click(
265
  fn=process_files,
266
+ inputs=[files_input] + metadata_fields,
267
+ outputs=[status_output, results_output]
268
  )
269
 
270
  clear_btn.click(
271
  fn=clear_form,
272
+ outputs=[files_input] + metadata_fields + [status_output, results_output]
273
  )
274
 
275
  # Footer
 
278
  ---
279
 
280
  ### πŸ’‘ **Usage Tips:**
281
+ - Select up to 10 PDF, DOC, DOCX, or TXT files at once
282
+ - Upload files first, then fill individual metadata for each document
283
+ - Each file gets its own sections, keywords, and description
284
  - Check the results section for upload status
285
 
286
  ### πŸ“ž **Support:**