datafreak commited on
Commit
126b58a
Β·
verified Β·
1 Parent(s): 179eb46

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +254 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pathlib import Path
4
+ from pinecone import Pinecone
5
+ from typing import List, Tuple
6
+ import tempfile
7
+ import shutil
8
+
9
+ # Initialize Pinecone
10
+ pc = Pinecone(api_key="pcsk_4CboGg_BNMrddoKLGxfrzFLhequEQ7DmTCzT2BYXpiefUBHUKeLKXhbbmozifeVJiVWXrv")
11
+
12
+ # Create uploads directory
13
+ UPLOAD_FOLDER = "uploads"
14
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
15
+
16
+ def process_files(files, sections, keywords, description):
17
+ """Process multiple files and upload to Pinecone Assistant"""
18
+ if not files:
19
+ return "❌ Error: No files selected", "", ""
20
+
21
+ if not sections.strip():
22
+ return "❌ Error: Sections field is required", "", ""
23
+
24
+ if not keywords.strip():
25
+ return "❌ Error: Keywords field is required", "", ""
26
+
27
+ if not description.strip():
28
+ return "❌ Error: Description field is required", "", ""
29
+
30
+ try:
31
+ results = []
32
+ errors = []
33
+
34
+ # Prepare metadata (same for all files)
35
+ metadata = {
36
+ "sections": [s.strip() for s in sections.split(",") if s.strip()],
37
+ "keywords": [k.strip() for k in keywords.split(",") if k.strip()],
38
+ "description": description.strip()
39
+ }
40
+
41
+ # Initialize Pinecone Assistant
42
+ assistant = pc.assistant.Assistant(assistant_name="gstminutes")
43
+
44
+ for file_path in files:
45
+ try:
46
+ # Get filename from path
47
+ filename = os.path.basename(file_path)
48
+
49
+ # Copy to uploads directory
50
+ destination_path = os.path.join(UPLOAD_FOLDER, filename)
51
+ shutil.copy2(file_path, destination_path)
52
+
53
+ # Upload to Pinecone Assistant
54
+ response = assistant.upload_file(
55
+ file_path=destination_path,
56
+ metadata=metadata,
57
+ timeout=None
58
+ )
59
+
60
+ results.append({
61
+ "filename": filename,
62
+ "status": "βœ… Success",
63
+ "response": str(response)
64
+ })
65
+
66
+ except Exception as file_error:
67
+ errors.append({
68
+ "filename": os.path.basename(file_path),
69
+ "error": f"❌ Error: {str(file_error)}"
70
+ })
71
+
72
+ # Format results for display
73
+ success_count = len(results)
74
+ error_count = len(errors)
75
+
76
+ status_message = f"πŸ“Š **Processing Complete**\n\n"
77
+ status_message += f"βœ… **Successful uploads:** {success_count}\n"
78
+ status_message += f"❌ **Failed uploads:** {error_count}\n"
79
+ status_message += f"πŸ“ **Total files processed:** {len(files)}\n\n"
80
+
81
+ # Detailed results
82
+ detailed_results = "## πŸ“‹ **Detailed Results**\n\n"
83
+
84
+ if results:
85
+ detailed_results += "### βœ… **Successful Uploads:**\n"
86
+ for result in results:
87
+ detailed_results += f"- **{result['filename']}** - {result['status']}\n"
88
+ detailed_results += "\n"
89
+
90
+ if errors:
91
+ detailed_results += "### ❌ **Failed Uploads:**\n"
92
+ for error in errors:
93
+ detailed_results += f"- **{error['filename']}** - {error['error']}\n"
94
+ detailed_results += "\n"
95
+
96
+ # Metadata info
97
+ metadata_info = "## 🏷️ **Applied Metadata**\n\n"
98
+ metadata_info += f"**Sections:** {', '.join(metadata['sections'])}\n\n"
99
+ metadata_info += f"**Keywords:** {', '.join(metadata['keywords'])}\n\n"
100
+ metadata_info += f"**Description:** {metadata['description']}\n"
101
+
102
+ return status_message, detailed_results, metadata_info
103
+
104
+ except Exception as e:
105
+ error_msg = f"❌ **Critical Error:** {str(e)}"
106
+ return error_msg, "", ""
107
+
108
+ def clear_form():
109
+ """Clear all form fields"""
110
+ return None, "", "", "", "", "", ""
111
+
112
+ # Create Gradio interface
113
+ with gr.Blocks(
114
+ title="πŸ“„ Tax Document Ingestion System",
115
+ theme=gr.themes.Soft(),
116
+ css="""
117
+ .gradio-container {
118
+ max-width: 1200px !important;
119
+ margin: auto;
120
+ }
121
+ .upload-container {
122
+ border: 2px dashed #4CAF50;
123
+ border-radius: 10px;
124
+ padding: 20px;
125
+ text-align: center;
126
+ background-color: #f8f9fa;
127
+ }
128
+ """
129
+ ) as app:
130
+
131
+ gr.Markdown(
132
+ """
133
+ # πŸ“„ Tax Document Ingestion System
134
+
135
+ Upload multiple documents with metadata to the Pinecone Assistant for GST Minutes processing.
136
+
137
+ ## πŸš€ Features:
138
+ - βœ… **Multiple file upload** - Select and upload multiple documents at once
139
+ - 🏷️ **Metadata tagging** - Add sections, keywords, and descriptions
140
+ - πŸ”„ **Batch processing** - All files processed with the same metadata
141
+ - πŸ“Š **Detailed reporting** - See success/failure status for each file
142
+
143
+ ---
144
+ """
145
+ )
146
+
147
+ with gr.Row():
148
+ with gr.Column(scale=1):
149
+ gr.Markdown("### πŸ“ **File Upload**")
150
+ files_input = gr.File(
151
+ label="Select Documents",
152
+ file_count="multiple",
153
+ file_types=[".pdf", ".doc", ".docx", ".txt"],
154
+ elem_classes=["upload-container"]
155
+ )
156
+
157
+ with gr.Column(scale=1):
158
+ gr.Markdown("### 🏷️ **Document Metadata**")
159
+
160
+ sections_input = gr.Textbox(
161
+ label="πŸ“‘ Sections (comma-separated)",
162
+ placeholder="e.g., Introduction, Financial Data, Compliance, Summary",
163
+ lines=2,
164
+ info="Enter document sections separated by commas"
165
+ )
166
+
167
+ keywords_input = gr.Textbox(
168
+ label="πŸ” Keywords (comma-separated)",
169
+ placeholder="e.g., GST, tax, compliance, revenue, audit",
170
+ lines=2,
171
+ info="Enter relevant keywords separated by commas"
172
+ )
173
+
174
+ description_input = gr.Textbox(
175
+ label="πŸ“ Description",
176
+ placeholder="Brief description of the document(s) purpose and content",
177
+ lines=3,
178
+ info="Provide a clear description of the document content"
179
+ )
180
+
181
+ with gr.Row():
182
+ with gr.Column(scale=1):
183
+ upload_btn = gr.Button(
184
+ "πŸš€ Upload Documents to Pinecone Assistant",
185
+ variant="primary",
186
+ size="lg"
187
+ )
188
+
189
+ with gr.Column(scale=1):
190
+ clear_btn = gr.Button(
191
+ "πŸ—‘οΈ Clear Form",
192
+ variant="secondary",
193
+ size="lg"
194
+ )
195
+
196
+ gr.Markdown("---")
197
+
198
+ # Results section
199
+ with gr.Row():
200
+ with gr.Column():
201
+ status_output = gr.Markdown(
202
+ label="πŸ“Š Upload Status",
203
+ value="*Ready to upload documents...*"
204
+ )
205
+
206
+ with gr.Row():
207
+ with gr.Column(scale=1):
208
+ results_output = gr.Markdown(
209
+ label="πŸ“‹ Detailed Results",
210
+ value=""
211
+ )
212
+
213
+ with gr.Column(scale=1):
214
+ metadata_output = gr.Markdown(
215
+ label="🏷️ Applied Metadata",
216
+ value=""
217
+ )
218
+
219
+ # Event handlers
220
+ upload_btn.click(
221
+ fn=process_files,
222
+ inputs=[files_input, sections_input, keywords_input, description_input],
223
+ outputs=[status_output, results_output, metadata_output]
224
+ )
225
+
226
+ clear_btn.click(
227
+ fn=clear_form,
228
+ outputs=[files_input, sections_input, keywords_input, description_input, status_output, results_output, metadata_output]
229
+ )
230
+
231
+ # Footer
232
+ gr.Markdown(
233
+ """
234
+ ---
235
+
236
+ ### πŸ’‘ **Usage Tips:**
237
+ - Select multiple PDF, DOC, DOCX, or TXT files
238
+ - Use descriptive sections and keywords for better organization
239
+ - All selected files will use the same metadata
240
+ - Check the results section for upload status
241
+
242
+ ### πŸ“ž **Support:**
243
+ For issues or questions, contact the development team.
244
+ """
245
+ )
246
+
247
+ if __name__ == "__main__":
248
+ app.launch(
249
+ server_name="0.0.0.0",
250
+ server_port=7860,
251
+ share=False,
252
+ debug=True,
253
+ show_error=True
254
+ )
requirements.txt ADDED
Binary file (3.23 kB). View file