Prathamesh Sable commited on
Commit
d1c1737
·
1 Parent(s): dc73fb8

can upload and remove files fom frontend to vector database directly

Browse files
Files changed (4) hide show
  1. .gitignore +2 -1
  2. app.py +104 -20
  3. requirements.txt +2 -1
  4. templates/index.html +97 -1
.gitignore CHANGED
@@ -4,4 +4,5 @@ chroma_db
4
  .vscode
5
  chroma
6
  /trash
7
- uploads/
 
 
4
  .vscode
5
  chroma
6
  /trash
7
+ uploads/
8
+ /flask_session
app.py CHANGED
@@ -1,5 +1,7 @@
1
- from flask import Flask,request, jsonify
 
2
  from flask import render_template
 
3
  from werkzeug.utils import secure_filename
4
 
5
  from langchain.document_loaders import DirectoryLoader,PyPDFLoader,UnstructuredWordDocumentLoader,TextLoader,UnstructuredHTMLLoader,UnstructuredMarkdownLoader
@@ -35,11 +37,69 @@ if not os.path.exists(UPLOAD_FOLDER):
35
 
36
  app = Flask(__name__)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  @app.route('/')
40
  def index():
41
  return render_template('index.html') # Serve the HTML file we created
42
 
 
 
 
 
43
 
44
  @app.route("/ai",methods=["POST"])
45
  def aiPost():
@@ -53,30 +113,54 @@ def aiPost():
53
  return response_answer.text
54
 
55
  # add files
56
- @app.route('/upload', methods=['POST'])
57
- def upload_files():
58
- if 'files' not in request.files:
59
- return jsonify({'error': 'No files in request'}), 400
 
 
 
 
60
 
61
- files = request.files.getlist('files')
 
62
 
63
- uploaded_files = []
64
- for file in files:
65
- if file.filename:
66
- # Secure the filename
67
- filename = secure_filename(file.filename)
68
- file_path = os.path.join(UPLOAD_FOLDER, filename)
69
- file.save(file_path)
70
- uploaded_files.append(filename)
71
-
72
- # Here you can call your RAG pipeline processing function
73
- # process_pdf(file_path)
74
 
75
  return jsonify({
76
  'message': 'Files uploaded successfully',
77
- 'files': uploaded_files
78
- })
79
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def main():
82
  app.run(host="0.0.0.0",port=8000,debug=True)
 
1
+ import time
2
+ from flask import Flask,request, jsonify,session
3
  from flask import render_template
4
+ from flask_session import Session
5
  from werkzeug.utils import secure_filename
6
 
7
  from langchain.document_loaders import DirectoryLoader,PyPDFLoader,UnstructuredWordDocumentLoader,TextLoader,UnstructuredHTMLLoader,UnstructuredMarkdownLoader
 
37
 
38
  app = Flask(__name__)
39
 
40
+ app.config["SESSION_PERMANENT"] = True
41
+ app.config["SESSION_TYPE"] = "filesystem"
42
+
43
+ Session(app)
44
+
45
+ def add_to_chroma(chunks,embedding_function,CHROMA_PATH=CHROMA_PATH):
46
+ # Load the existing database.
47
+ db = Chroma(
48
+ persist_directory=CHROMA_PATH, embedding_function=embedding_function
49
+ )
50
+
51
+ # Add or Update the documents.
52
+ db.add_documents(chunks)
53
+
54
+ return True
55
+
56
+ def remove_file_from_chroma(file_id,CHROMA_PATH=CHROMA_PATH):
57
+ db = Chroma(persist_directory=CHROMA_PATH)
58
+
59
+ # delete chunks from db where metadata file_id is equal to file_id
60
+ db.delete(ids=db.get(where={"file_id": file_id},include=[])['ids'])
61
+ return True
62
+
63
+
64
+ def add_file_to_chroma(file_path,file_id):
65
+ # get extension of file
66
+ extension = file_path.split(".")[-1]
67
+ match extension:
68
+ case "pdf":
69
+ loader = PyPDFLoader(file_path)
70
+ case "docx":
71
+ loader = UnstructuredWordDocumentLoader(file_path)
72
+ case "txt":
73
+ loader = TextLoader(file_path)
74
+ case "html":
75
+ loader = UnstructuredHTMLLoader(file_path)
76
+ case "md":
77
+ loader = UnstructuredMarkdownLoader(file_path)
78
+ case _:
79
+ raise ValueError(f"Unsupported file type: {extension}")
80
+ documents = loader.load()
81
+ text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000,
82
+ chunk_overlap=100,
83
+ length_function=len,
84
+ add_start_index=True)
85
+ texts = text_splitter.split_documents(documents)
86
+ # add metadata file_id to documents
87
+ for text in texts:
88
+ text.metadata["file_id"] = file_id
89
+
90
+ add_to_chroma(texts,hugging_face_ef)
91
+ return True
92
+
93
+
94
 
95
  @app.route('/')
96
  def index():
97
  return render_template('index.html') # Serve the HTML file we created
98
 
99
+ @app.route('/wait',methods=["POST"])
100
+ def wait():
101
+ time.sleep(int(request.form.get("time")))
102
+ return jsonify({"status":"ok"}),200
103
 
104
  @app.route("/ai",methods=["POST"])
105
  def aiPost():
 
113
  return response_answer.text
114
 
115
  # add files
116
+ @app.route('/upload-file', methods=['POST'])
117
+ def upload_file():
118
+ print(request.files)
119
+ if 'file' not in request.files:
120
+ return jsonify({'error': 'No files in request', 'status': 'error'}), 400
121
+
122
+ file = request.files['file']
123
+ file_id = request.form.get('file_count')
124
 
125
+ if not session.get("files"):
126
+ session["files"] = dict()
127
 
128
+ if file.filename:
129
+ # Secure the filename
130
+ filename = secure_filename(file.filename)
131
+ file_path = os.path.join(UPLOAD_FOLDER, filename)
132
+ file.save(file_path)
133
+
134
+ session['files'][file_id] = (file_path,filename)
135
+
136
+ # Here you can call your RAG pipeline processing function
137
+ # process_pdf(file_path)
138
+ add_file_to_chroma(file_path,file_id)
139
 
140
  return jsonify({
141
  'message': 'Files uploaded successfully',
142
+ 'status': 'success'
143
+ }), 200
144
+
145
+ @app.route('/get-files',methods=["GET"])
146
+ def get_files():
147
+ return jsonify({"files":session.get("files")}),200
148
+
149
+ @app.route('/chroma-status',methods=["GET"])
150
+ def chroma_status():
151
+ # return all data from chroma db
152
+ db = Chroma(persist_directory=CHROMA_PATH)
153
+ return jsonify({"data":db.get()}),200
154
+
155
+ @app.route('/remove-file',methods=["POST"])
156
+ def remove_file():
157
+ file_id = request.form.get('file_id')
158
+ session["files"].pop(file_id)
159
+ remove_file_from_chroma(file_id)
160
+ return jsonify({
161
+ 'message': 'File deleted successfully',
162
+ 'status': 'success'
163
+ }), 200
164
 
165
  def main():
166
  app.run(host="0.0.0.0",port=8000,debug=True)
requirements.txt CHANGED
@@ -14,4 +14,5 @@ google-generativeai
14
  markdown
15
  python-docx
16
  flask
17
- werkzeug
 
 
14
  markdown
15
  python-docx
16
  flask
17
+ werkzeug
18
+ Flask-Session
templates/index.html CHANGED
@@ -20,6 +20,29 @@
20
  </head>
21
 
22
  <body class="bg-gray-100 dark:bg-gray-900 text-gray-900 dark:text-gray-100">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  <div class="flex flex-col h-screen">
24
  <!-- Combined Header -->
25
  <div class="bg-white dark:bg-gray-800 p-4 flex justify-between items-center w-full ">
@@ -127,8 +150,17 @@
127
  </div>
128
  </div>
129
  </div>
130
- <script src="https://code.jquery.com/jquery-3.7.1.min.js" integrity="sha256-/JqT3SQfawRcv/BIHPThkBvs0OEvtFFmqPF/lYI/Cxo=" crossorigin="anonymous"></script>
 
131
  <script>
 
 
 
 
 
 
 
 
132
 
133
  // File handling functionality
134
  const dropZone = document.getElementById('dropzone'); // label of input
@@ -231,6 +263,26 @@
231
  <i class="fas fa-times"></i>
232
  </button>`;
233
  fileList.appendChild(listItem);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  file_count++;
235
  });
236
  manageEmptyFileList();
@@ -254,10 +306,53 @@
254
  function removeFile(index) {
255
  console.log(index);
256
  file_list[index] = null;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  document.getElementById(`file_item_${index}`).remove();
258
  manageEmptyFileList();
259
  }
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  // Toggle sidebar for mobile view
263
  document.getElementById('menu-toggle').addEventListener('click', function () {
@@ -267,6 +362,7 @@
267
 
268
  window.onload = function () {
269
  manageEmptyFileList();
 
270
  }
271
  </script>
272
  </body>
 
20
  </head>
21
 
22
  <body class="bg-gray-100 dark:bg-gray-900 text-gray-900 dark:text-gray-100">
23
+
24
+
25
+ <div id="loader" class="fixed top-0 left-0 w-full h-full" style="background-color: rgba(50, 50, 50, 0.5);z-index: 1000;">
26
+ <div class="text-center" style="position: absolute;top: 50%;left: 50%;transform: translate(-50%, -50%);">
27
+ <div role="status">
28
+ <svg aria-hidden="true"
29
+ class="inline w-8 h-8 text-gray-200 animate-spin dark:text-gray-600 fill-blue-600"
30
+ viewBox="0 0 100 101" fill="none" xmlns="http://www.w3.org/2000/svg">
31
+ <path
32
+ d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
33
+ fill="currentColor" />
34
+ <path
35
+ d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
36
+ fill="currentFill" />
37
+ </svg>
38
+ <br>
39
+ <span id="loader_msg">Loading...</span>
40
+ </div>
41
+ </div>
42
+
43
+ </div>
44
+
45
+
46
  <div class="flex flex-col h-screen">
47
  <!-- Combined Header -->
48
  <div class="bg-white dark:bg-gray-800 p-4 flex justify-between items-center w-full ">
 
150
  </div>
151
  </div>
152
  </div>
153
+ <script src="https://code.jquery.com/jquery-3.7.1.min.js"
154
+ integrity="sha256-/JqT3SQfawRcv/BIHPThkBvs0OEvtFFmqPF/lYI/Cxo=" crossorigin="anonymous"></script>
155
  <script>
156
+ function set_loading(msg = "Loading...") {
157
+ $("#loader_msg").text(msg);
158
+ $("#loader").removeClass("hidden");
159
+ }
160
+
161
+ function remove_loading() {
162
+ $("#loader").addClass("hidden");
163
+ }
164
 
165
  // File handling functionality
166
  const dropZone = document.getElementById('dropzone'); // label of input
 
263
  <i class="fas fa-times"></i>
264
  </button>`;
265
  fileList.appendChild(listItem);
266
+ set_loading("Uploading File "+ file.name);
267
+
268
+ var formData = new FormData();
269
+ formData.append('file', file);
270
+ formData.append('file_count', file_count);
271
+ $.ajax({
272
+ url: '/upload-file',
273
+ type: 'POST',
274
+ contentType: false,
275
+ processData: false,
276
+ data: formData,
277
+ success: function (response) {
278
+ console.log(response);
279
+ },
280
+ error: function (xhr, status, error) {
281
+ console.log(error);
282
+ }
283
+ }).then(function () {
284
+ remove_loading();
285
+ })
286
  file_count++;
287
  });
288
  manageEmptyFileList();
 
306
  function removeFile(index) {
307
  console.log(index);
308
  file_list[index] = null;
309
+ set_loading("Removing File");
310
+ // ajax call to remove file
311
+ $.ajax({
312
+ url: '/remove-file',
313
+ type: 'POST',
314
+ data: {
315
+ file_id: index
316
+ },
317
+ success: function (response) {
318
+ console.log(response);
319
+ },
320
+ error: function (xhr, status, error) {
321
+ console.log(error);
322
+ }
323
+ }).then(function () {
324
+ remove_loading();
325
+ });
326
+
327
  document.getElementById(`file_item_${index}`).remove();
328
  manageEmptyFileList();
329
  }
330
 
331
+ function test(sec) {
332
+ console.log("Before Ajax");
333
+ set_loading();
334
+ // ajax call to remove file
335
+ $.ajax({
336
+ url: '/wait',
337
+ type: 'POST',
338
+ data: {
339
+ time: sec
340
+ },
341
+ success: function (response) {
342
+ console.log("Ajax Success");
343
+ console.log(response);
344
+ },
345
+ error: function (xhr, status, error) {
346
+ console.log(error);
347
+ }
348
+ }).then(function () {
349
+ console.log("Then Ajax");
350
+ remove_loading();
351
+ });
352
+
353
+ console.log("After Ajax")
354
+ }
355
+
356
 
357
  // Toggle sidebar for mobile view
358
  document.getElementById('menu-toggle').addEventListener('click', function () {
 
362
 
363
  window.onload = function () {
364
  manageEmptyFileList();
365
+ remove_loading();
366
  }
367
  </script>
368
  </body>