Alamgirapi commited on
Commit
0128cc3
·
verified ·
1 Parent(s): ffe7e3d

Upload flask_app.py

Browse files
Files changed (1) hide show
  1. flask_app.py +309 -0
flask_app.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ import os
3
+ import json
4
+ from datetime import datetime
5
+ from werkzeug.utils import secure_filename
6
+ import mimetypes
7
+ from src.agenticRAG.components.document_parsing import DocumentChunker
8
+ from src.agenticRAG.components.vectorstore import VectorStoreManager
9
+
10
+ app = Flask(__name__, template_folder='.')
11
+
12
+ # Configuration
13
+ UPLOAD_FOLDER = 'KnowledgebaseFile'
14
+ METADATA_FILE = 'knowledge_base_metadata.json'
15
+ ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx', 'md', 'doc'}
16
+
17
+ # Ensure upload folder exists
18
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
19
+
20
+ def allowed_file(filename):
21
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
22
+
23
+ def get_file_size(filepath):
24
+ """Get file size in MB"""
25
+ size_bytes = os.path.getsize(filepath)
26
+ size_mb = size_bytes / (1024 * 1024)
27
+ return round(size_mb, 2)
28
+
29
+ def get_file_type(filename):
30
+ """Get file type based on extension"""
31
+ ext = filename.rsplit('.', 1)[1].lower()
32
+ type_map = {
33
+ 'pdf': 'PDF',
34
+ 'txt': 'Text',
35
+ 'docx': 'Word Document',
36
+ 'doc': 'Word Document',
37
+ 'md': 'Markdown'
38
+ }
39
+ return type_map.get(ext, 'Unknown')
40
+
41
+ def load_metadata():
42
+ """Load knowledge base metadata from JSON file"""
43
+ if os.path.exists(METADATA_FILE):
44
+ try:
45
+ with open(METADATA_FILE, 'r') as f:
46
+ return json.load(f)
47
+ except (json.JSONDecodeError, FileNotFoundError):
48
+ pass
49
+ return []
50
+
51
+ def save_metadata(metadata):
52
+ """Save knowledge base metadata to JSON file"""
53
+ with open(METADATA_FILE, 'w') as f:
54
+ json.dump(metadata, f, indent=2)
55
+
56
+ def get_kb_statistics():
57
+ """Get real knowledge base statistics"""
58
+ metadata = load_metadata()
59
+ total_docs = len(metadata)
60
+
61
+ total_size = 0
62
+ last_update = None
63
+
64
+ for doc in metadata:
65
+ filepath = os.path.join(UPLOAD_FOLDER, doc['filename'])
66
+ if os.path.exists(filepath):
67
+ total_size += get_file_size(filepath)
68
+ doc_date = datetime.fromisoformat(doc['uploaded'])
69
+ if last_update is None or doc_date > last_update:
70
+ last_update = doc_date
71
+
72
+ return {
73
+ 'total_docs': total_docs,
74
+ 'total_size': round(total_size, 2),
75
+ 'last_update': last_update.strftime('%Y-%m-%d') if last_update else 'Never'
76
+ }
77
+
78
+ def get_knowledge_base():
79
+ """Get current knowledge base with real file information"""
80
+ metadata = load_metadata()
81
+ kb_data = []
82
+
83
+ for doc in metadata:
84
+ filepath = os.path.join(UPLOAD_FOLDER, doc['filename'])
85
+ if os.path.exists(filepath):
86
+ kb_data.append({
87
+ 'id': doc['id'],
88
+ 'name': doc['filename'],
89
+ 'size': f"{get_file_size(filepath)} MB",
90
+ 'type': get_file_type(doc['filename']),
91
+ 'uploaded': doc['uploaded'],
92
+ 'chunks': doc.get('chunks', 'N/A'),
93
+ 'description': doc['description']
94
+ })
95
+
96
+ return kb_data
97
+
98
+ @app.route('/')
99
+ def index():
100
+ return render_template('knowledgebase_UI.html')
101
+
102
+ @app.route('/api/statistics')
103
+ def get_statistics():
104
+ """API endpoint to get knowledge base statistics"""
105
+ return jsonify(get_kb_statistics())
106
+
107
+ @app.route('/api/knowledge-base')
108
+ def get_kb_data():
109
+ """API endpoint to get knowledge base data"""
110
+ return jsonify(get_knowledge_base())
111
+
112
+ @app.route('/api/upload', methods=['POST'])
113
+ def upload_files():
114
+ """Handle file upload with description"""
115
+ try:
116
+ if 'files' not in request.files:
117
+ return jsonify({'error': 'No files provided'}), 400
118
+
119
+ files = request.files.getlist('files')
120
+ description = request.form.get('description', '').strip()
121
+
122
+ if not description:
123
+ return jsonify({'error': 'Description is required'}), 400
124
+
125
+ if not files or all(f.filename == '' for f in files):
126
+ return jsonify({'error': 'No files selected'}), 400
127
+
128
+ uploaded_files = []
129
+ metadata = load_metadata()
130
+
131
+ for file in files:
132
+ if file and allowed_file(file.filename):
133
+ filename = secure_filename(file.filename)
134
+
135
+ # Handle duplicate filenames
136
+ base_name, ext = os.path.splitext(filename)
137
+ counter = 1
138
+ while os.path.exists(os.path.join(UPLOAD_FOLDER, filename)):
139
+ filename = f"{base_name}_{counter}{ext}"
140
+ counter += 1
141
+
142
+ filepath = os.path.join(UPLOAD_FOLDER, filename)
143
+ file.save(filepath)
144
+
145
+ print(f"File saved: {filepath}")
146
+ # Create DocumentChunker instance
147
+ chunker = DocumentChunker(chunk_size=1000, chunk_overlap=100)
148
+ # Process the file to create chunks
149
+ chunks = chunker.process_file(filepath)
150
+ print(f"Chunks created: {len(chunks)} for {filename}")
151
+
152
+ # Add to vector store
153
+ vector_store_manager = VectorStoreManager()
154
+ vector_store_manager.load_vectorstore()
155
+
156
+ # Create metadata for each chunk - THIS IS THE FIX
157
+ chunk_metadatas = []
158
+ for i, chunk in enumerate(chunks):
159
+ chunk_metadata = {
160
+ 'filename': filename,
161
+ 'description': description,
162
+ 'chunk_index': i,
163
+ 'total_chunks': len(chunks),
164
+ 'uploaded': datetime.now().isoformat()
165
+ }
166
+ chunk_metadatas.append(chunk_metadata)
167
+
168
+ # Now texts and metadatas have the same length
169
+ vector_store_manager.add_documents(chunks, metadatas=chunk_metadatas)
170
+ vector_store_manager.save_vectorstore()
171
+
172
+ # Create metadata entry for the file
173
+ doc_metadata = {
174
+ 'id': len(metadata) + 1,
175
+ 'filename': filename,
176
+ 'description': description,
177
+ 'uploaded': datetime.now().isoformat(),
178
+ 'chunks': len(chunks) if chunks else 0,
179
+ }
180
+
181
+ metadata.append(doc_metadata)
182
+ uploaded_files.append(filename)
183
+
184
+ # Save updated metadata
185
+ save_metadata(metadata)
186
+
187
+ return jsonify({
188
+ 'message': f'Successfully uploaded {len(uploaded_files)} files',
189
+ 'files': uploaded_files
190
+ })
191
+
192
+ except Exception as e:
193
+ return jsonify({'error': str(e)}), 500
194
+
195
+
196
+ @app.route('/api/delete/<int:doc_id>', methods=['DELETE'])
197
+ def delete_document(doc_id):
198
+ """Delete a document from knowledge base"""
199
+ try:
200
+ metadata = load_metadata()
201
+ doc_to_delete = None
202
+
203
+ for i, doc in enumerate(metadata):
204
+ if doc['id'] == doc_id:
205
+ doc_to_delete = doc
206
+ metadata.pop(i)
207
+ break
208
+
209
+ if not doc_to_delete:
210
+ return jsonify({'error': 'Document not found'}), 404
211
+
212
+ # Delete the actual file
213
+ filepath = os.path.join(UPLOAD_FOLDER, doc_to_delete['filename'])
214
+ if os.path.exists(filepath):
215
+ os.remove(filepath)
216
+
217
+ # Save updated metadata
218
+ save_metadata(metadata)
219
+
220
+ return jsonify({'message': f'Successfully deleted {doc_to_delete["filename"]}'})
221
+
222
+ except Exception as e:
223
+ return jsonify({'error': str(e)}), 500
224
+
225
+ @app.route('/api/clear-all', methods=['DELETE'])
226
+ def clear_knowledge_base():
227
+ """Clear entire knowledge base"""
228
+ try:
229
+ metadata = load_metadata()
230
+
231
+ # Delete all files
232
+ for doc in metadata:
233
+ filepath = os.path.join(UPLOAD_FOLDER, doc['filename'])
234
+ if os.path.exists(filepath):
235
+ os.remove(filepath)
236
+
237
+ # Clear metadata
238
+ save_metadata([])
239
+
240
+ return jsonify({'message': 'Knowledge base cleared successfully'})
241
+
242
+ except Exception as e:
243
+ return jsonify({'error': str(e)}), 500
244
+
245
+ @app.route('/api/document/<int:doc_id>')
246
+ def get_document_details(doc_id):
247
+ """Get detailed information about a specific document"""
248
+ try:
249
+ metadata = load_metadata()
250
+
251
+ for doc in metadata:
252
+ if doc['id'] == doc_id:
253
+ filepath = os.path.join(UPLOAD_FOLDER, doc['filename'])
254
+ if os.path.exists(filepath):
255
+ return jsonify({
256
+ 'id': doc['id'],
257
+ 'name': doc['filename'],
258
+ 'description': doc['description'],
259
+ 'size': f"{get_file_size(filepath)} MB",
260
+ 'type': get_file_type(doc['filename']),
261
+ 'uploaded': doc['uploaded'],
262
+ 'chunks': doc.get('chunks', 'N/A'),
263
+ 'path': filepath
264
+ })
265
+ else:
266
+ return jsonify({'error': 'File not found on disk'}), 404
267
+
268
+ return jsonify({'error': 'Document not found'}), 404
269
+
270
+ except Exception as e:
271
+ return jsonify({'error': str(e)}), 500
272
+
273
+ @app.route('/api/search')
274
+ def search_documents():
275
+ """Search documents by name or description"""
276
+ try:
277
+ query = request.args.get('q', '').lower()
278
+
279
+ if not query:
280
+ return jsonify(get_knowledge_base())
281
+
282
+ metadata = load_metadata()
283
+ results = []
284
+
285
+ for doc in metadata:
286
+ if (query in doc['filename'].lower() or
287
+ query in doc['description'].lower()):
288
+
289
+ filepath = os.path.join(UPLOAD_FOLDER, doc['filename'])
290
+ if os.path.exists(filepath):
291
+ results.append({
292
+ 'id': doc['id'],
293
+ 'name': doc['filename'],
294
+ 'size': f"{get_file_size(filepath)} MB",
295
+ 'type': get_file_type(doc['filename']),
296
+ 'uploaded': doc['uploaded'],
297
+ 'chunks': doc.get('chunks', 'N/A'),
298
+ 'description': doc['description']
299
+ })
300
+
301
+ return jsonify(results)
302
+
303
+ except Exception as e:
304
+ return jsonify({'error': str(e)}), 500
305
+
306
+ if __name__ == '__main__':
307
+ print(f"Knowledge Base files will be stored in: {os.path.abspath(UPLOAD_FOLDER)}")
308
+ print(f"Metadata will be stored in: {os.path.abspath(METADATA_FILE)}")
309
+ app.run(debug=True, host='0.0.0.0', port=5001)