Spaces:
Sleeping
Sleeping
Prathamesh Sable
commited on
Commit
·
d1c1737
1
Parent(s):
dc73fb8
can upload and remove files fom frontend to vector database directly
Browse files- .gitignore +2 -1
- app.py +104 -20
- requirements.txt +2 -1
- templates/index.html +97 -1
.gitignore
CHANGED
|
@@ -4,4 +4,5 @@ chroma_db
|
|
| 4 |
.vscode
|
| 5 |
chroma
|
| 6 |
/trash
|
| 7 |
-
uploads/
|
|
|
|
|
|
| 4 |
.vscode
|
| 5 |
chroma
|
| 6 |
/trash
|
| 7 |
+
uploads/
|
| 8 |
+
/flask_session
|
app.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
from flask import render_template
|
|
|
|
| 3 |
from werkzeug.utils import secure_filename
|
| 4 |
|
| 5 |
from langchain.document_loaders import DirectoryLoader,PyPDFLoader,UnstructuredWordDocumentLoader,TextLoader,UnstructuredHTMLLoader,UnstructuredMarkdownLoader
|
|
@@ -35,11 +37,69 @@ if not os.path.exists(UPLOAD_FOLDER):
|
|
| 35 |
|
| 36 |
app = Flask(__name__)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
@app.route('/')
|
| 40 |
def index():
|
| 41 |
return render_template('index.html') # Serve the HTML file we created
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
@app.route("/ai",methods=["POST"])
|
| 45 |
def aiPost():
|
|
@@ -53,30 +113,54 @@ def aiPost():
|
|
| 53 |
return response_answer.text
|
| 54 |
|
| 55 |
# add files
|
| 56 |
-
@app.route('/upload', methods=['POST'])
|
| 57 |
-
def
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
return jsonify({
|
| 76 |
'message': 'Files uploaded successfully',
|
| 77 |
-
'
|
| 78 |
-
})
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
def main():
|
| 82 |
app.run(host="0.0.0.0",port=8000,debug=True)
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
from flask import Flask,request, jsonify,session
|
| 3 |
from flask import render_template
|
| 4 |
+
from flask_session import Session
|
| 5 |
from werkzeug.utils import secure_filename
|
| 6 |
|
| 7 |
from langchain.document_loaders import DirectoryLoader,PyPDFLoader,UnstructuredWordDocumentLoader,TextLoader,UnstructuredHTMLLoader,UnstructuredMarkdownLoader
|
|
|
|
| 37 |
|
| 38 |
app = Flask(__name__)
|
| 39 |
|
| 40 |
+
app.config["SESSION_PERMANENT"] = True
|
| 41 |
+
app.config["SESSION_TYPE"] = "filesystem"
|
| 42 |
+
|
| 43 |
+
Session(app)
|
| 44 |
+
|
| 45 |
+
def add_to_chroma(chunks,embedding_function,CHROMA_PATH=CHROMA_PATH):
|
| 46 |
+
# Load the existing database.
|
| 47 |
+
db = Chroma(
|
| 48 |
+
persist_directory=CHROMA_PATH, embedding_function=embedding_function
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# Add or Update the documents.
|
| 52 |
+
db.add_documents(chunks)
|
| 53 |
+
|
| 54 |
+
return True
|
| 55 |
+
|
| 56 |
+
def remove_file_from_chroma(file_id,CHROMA_PATH=CHROMA_PATH):
|
| 57 |
+
db = Chroma(persist_directory=CHROMA_PATH)
|
| 58 |
+
|
| 59 |
+
# delete chunks from db where metadata file_id is equal to file_id
|
| 60 |
+
db.delete(ids=db.get(where={"file_id": file_id},include=[])['ids'])
|
| 61 |
+
return True
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def add_file_to_chroma(file_path,file_id):
|
| 65 |
+
# get extension of file
|
| 66 |
+
extension = file_path.split(".")[-1]
|
| 67 |
+
match extension:
|
| 68 |
+
case "pdf":
|
| 69 |
+
loader = PyPDFLoader(file_path)
|
| 70 |
+
case "docx":
|
| 71 |
+
loader = UnstructuredWordDocumentLoader(file_path)
|
| 72 |
+
case "txt":
|
| 73 |
+
loader = TextLoader(file_path)
|
| 74 |
+
case "html":
|
| 75 |
+
loader = UnstructuredHTMLLoader(file_path)
|
| 76 |
+
case "md":
|
| 77 |
+
loader = UnstructuredMarkdownLoader(file_path)
|
| 78 |
+
case _:
|
| 79 |
+
raise ValueError(f"Unsupported file type: {extension}")
|
| 80 |
+
documents = loader.load()
|
| 81 |
+
text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000,
|
| 82 |
+
chunk_overlap=100,
|
| 83 |
+
length_function=len,
|
| 84 |
+
add_start_index=True)
|
| 85 |
+
texts = text_splitter.split_documents(documents)
|
| 86 |
+
# add metadata file_id to documents
|
| 87 |
+
for text in texts:
|
| 88 |
+
text.metadata["file_id"] = file_id
|
| 89 |
+
|
| 90 |
+
add_to_chroma(texts,hugging_face_ef)
|
| 91 |
+
return True
|
| 92 |
+
|
| 93 |
+
|
| 94 |
|
| 95 |
@app.route('/')
|
| 96 |
def index():
|
| 97 |
return render_template('index.html') # Serve the HTML file we created
|
| 98 |
|
| 99 |
+
@app.route('/wait',methods=["POST"])
|
| 100 |
+
def wait():
|
| 101 |
+
time.sleep(int(request.form.get("time")))
|
| 102 |
+
return jsonify({"status":"ok"}),200
|
| 103 |
|
| 104 |
@app.route("/ai",methods=["POST"])
|
| 105 |
def aiPost():
|
|
|
|
| 113 |
return response_answer.text
|
| 114 |
|
| 115 |
# add files
|
| 116 |
+
@app.route('/upload-file', methods=['POST'])
|
| 117 |
+
def upload_file():
|
| 118 |
+
print(request.files)
|
| 119 |
+
if 'file' not in request.files:
|
| 120 |
+
return jsonify({'error': 'No files in request', 'status': 'error'}), 400
|
| 121 |
+
|
| 122 |
+
file = request.files['file']
|
| 123 |
+
file_id = request.form.get('file_count')
|
| 124 |
|
| 125 |
+
if not session.get("files"):
|
| 126 |
+
session["files"] = dict()
|
| 127 |
|
| 128 |
+
if file.filename:
|
| 129 |
+
# Secure the filename
|
| 130 |
+
filename = secure_filename(file.filename)
|
| 131 |
+
file_path = os.path.join(UPLOAD_FOLDER, filename)
|
| 132 |
+
file.save(file_path)
|
| 133 |
+
|
| 134 |
+
session['files'][file_id] = (file_path,filename)
|
| 135 |
+
|
| 136 |
+
# Here you can call your RAG pipeline processing function
|
| 137 |
+
# process_pdf(file_path)
|
| 138 |
+
add_file_to_chroma(file_path,file_id)
|
| 139 |
|
| 140 |
return jsonify({
|
| 141 |
'message': 'Files uploaded successfully',
|
| 142 |
+
'status': 'success'
|
| 143 |
+
}), 200
|
| 144 |
+
|
| 145 |
+
@app.route('/get-files',methods=["GET"])
|
| 146 |
+
def get_files():
|
| 147 |
+
return jsonify({"files":session.get("files")}),200
|
| 148 |
+
|
| 149 |
+
@app.route('/chroma-status',methods=["GET"])
|
| 150 |
+
def chroma_status():
|
| 151 |
+
# return all data from chroma db
|
| 152 |
+
db = Chroma(persist_directory=CHROMA_PATH)
|
| 153 |
+
return jsonify({"data":db.get()}),200
|
| 154 |
+
|
| 155 |
+
@app.route('/remove-file',methods=["POST"])
|
| 156 |
+
def remove_file():
|
| 157 |
+
file_id = request.form.get('file_id')
|
| 158 |
+
session["files"].pop(file_id)
|
| 159 |
+
remove_file_from_chroma(file_id)
|
| 160 |
+
return jsonify({
|
| 161 |
+
'message': 'File deleted successfully',
|
| 162 |
+
'status': 'success'
|
| 163 |
+
}), 200
|
| 164 |
|
| 165 |
def main():
|
| 166 |
app.run(host="0.0.0.0",port=8000,debug=True)
|
requirements.txt
CHANGED
|
@@ -14,4 +14,5 @@ google-generativeai
|
|
| 14 |
markdown
|
| 15 |
python-docx
|
| 16 |
flask
|
| 17 |
-
werkzeug
|
|
|
|
|
|
| 14 |
markdown
|
| 15 |
python-docx
|
| 16 |
flask
|
| 17 |
+
werkzeug
|
| 18 |
+
Flask-Session
|
templates/index.html
CHANGED
|
@@ -20,6 +20,29 @@
|
|
| 20 |
</head>
|
| 21 |
|
| 22 |
<body class="bg-gray-100 dark:bg-gray-900 text-gray-900 dark:text-gray-100">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
<div class="flex flex-col h-screen">
|
| 24 |
<!-- Combined Header -->
|
| 25 |
<div class="bg-white dark:bg-gray-800 p-4 flex justify-between items-center w-full ">
|
|
@@ -127,8 +150,17 @@
|
|
| 127 |
</div>
|
| 128 |
</div>
|
| 129 |
</div>
|
| 130 |
-
<script src="https://code.jquery.com/jquery-3.7.1.min.js"
|
|
|
|
| 131 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
// File handling functionality
|
| 134 |
const dropZone = document.getElementById('dropzone'); // label of input
|
|
@@ -231,6 +263,26 @@
|
|
| 231 |
<i class="fas fa-times"></i>
|
| 232 |
</button>`;
|
| 233 |
fileList.appendChild(listItem);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
file_count++;
|
| 235 |
});
|
| 236 |
manageEmptyFileList();
|
|
@@ -254,10 +306,53 @@
|
|
| 254 |
function removeFile(index) {
|
| 255 |
console.log(index);
|
| 256 |
file_list[index] = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
document.getElementById(`file_item_${index}`).remove();
|
| 258 |
manageEmptyFileList();
|
| 259 |
}
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
// Toggle sidebar for mobile view
|
| 263 |
document.getElementById('menu-toggle').addEventListener('click', function () {
|
|
@@ -267,6 +362,7 @@
|
|
| 267 |
|
| 268 |
window.onload = function () {
|
| 269 |
manageEmptyFileList();
|
|
|
|
| 270 |
}
|
| 271 |
</script>
|
| 272 |
</body>
|
|
|
|
| 20 |
</head>
|
| 21 |
|
| 22 |
<body class="bg-gray-100 dark:bg-gray-900 text-gray-900 dark:text-gray-100">
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
<div id="loader" class="fixed top-0 left-0 w-full h-full" style="background-color: rgba(50, 50, 50, 0.5);z-index: 1000;">
|
| 26 |
+
<div class="text-center" style="position: absolute;top: 50%;left: 50%;transform: translate(-50%, -50%);">
|
| 27 |
+
<div role="status">
|
| 28 |
+
<svg aria-hidden="true"
|
| 29 |
+
class="inline w-8 h-8 text-gray-200 animate-spin dark:text-gray-600 fill-blue-600"
|
| 30 |
+
viewBox="0 0 100 101" fill="none" xmlns="http://www.w3.org/2000/svg">
|
| 31 |
+
<path
|
| 32 |
+
d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
|
| 33 |
+
fill="currentColor" />
|
| 34 |
+
<path
|
| 35 |
+
d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
|
| 36 |
+
fill="currentFill" />
|
| 37 |
+
</svg>
|
| 38 |
+
<br>
|
| 39 |
+
<span id="loader_msg">Loading...</span>
|
| 40 |
+
</div>
|
| 41 |
+
</div>
|
| 42 |
+
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
|
| 46 |
<div class="flex flex-col h-screen">
|
| 47 |
<!-- Combined Header -->
|
| 48 |
<div class="bg-white dark:bg-gray-800 p-4 flex justify-between items-center w-full ">
|
|
|
|
| 150 |
</div>
|
| 151 |
</div>
|
| 152 |
</div>
|
| 153 |
+
<script src="https://code.jquery.com/jquery-3.7.1.min.js"
|
| 154 |
+
integrity="sha256-/JqT3SQfawRcv/BIHPThkBvs0OEvtFFmqPF/lYI/Cxo=" crossorigin="anonymous"></script>
|
| 155 |
<script>
|
| 156 |
+
function set_loading(msg = "Loading...") {
|
| 157 |
+
$("#loader_msg").text(msg);
|
| 158 |
+
$("#loader").removeClass("hidden");
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
function remove_loading() {
|
| 162 |
+
$("#loader").addClass("hidden");
|
| 163 |
+
}
|
| 164 |
|
| 165 |
// File handling functionality
|
| 166 |
const dropZone = document.getElementById('dropzone'); // label of input
|
|
|
|
| 263 |
<i class="fas fa-times"></i>
|
| 264 |
</button>`;
|
| 265 |
fileList.appendChild(listItem);
|
| 266 |
+
set_loading("Uploading File "+ file.name);
|
| 267 |
+
|
| 268 |
+
var formData = new FormData();
|
| 269 |
+
formData.append('file', file);
|
| 270 |
+
formData.append('file_count', file_count);
|
| 271 |
+
$.ajax({
|
| 272 |
+
url: '/upload-file',
|
| 273 |
+
type: 'POST',
|
| 274 |
+
contentType: false,
|
| 275 |
+
processData: false,
|
| 276 |
+
data: formData,
|
| 277 |
+
success: function (response) {
|
| 278 |
+
console.log(response);
|
| 279 |
+
},
|
| 280 |
+
error: function (xhr, status, error) {
|
| 281 |
+
console.log(error);
|
| 282 |
+
}
|
| 283 |
+
}).then(function () {
|
| 284 |
+
remove_loading();
|
| 285 |
+
})
|
| 286 |
file_count++;
|
| 287 |
});
|
| 288 |
manageEmptyFileList();
|
|
|
|
| 306 |
function removeFile(index) {
|
| 307 |
console.log(index);
|
| 308 |
file_list[index] = null;
|
| 309 |
+
set_loading("Removing File");
|
| 310 |
+
// ajax call to remove file
|
| 311 |
+
$.ajax({
|
| 312 |
+
url: '/remove-file',
|
| 313 |
+
type: 'POST',
|
| 314 |
+
data: {
|
| 315 |
+
file_id: index
|
| 316 |
+
},
|
| 317 |
+
success: function (response) {
|
| 318 |
+
console.log(response);
|
| 319 |
+
},
|
| 320 |
+
error: function (xhr, status, error) {
|
| 321 |
+
console.log(error);
|
| 322 |
+
}
|
| 323 |
+
}).then(function () {
|
| 324 |
+
remove_loading();
|
| 325 |
+
});
|
| 326 |
+
|
| 327 |
document.getElementById(`file_item_${index}`).remove();
|
| 328 |
manageEmptyFileList();
|
| 329 |
}
|
| 330 |
|
| 331 |
+
function test(sec) {
|
| 332 |
+
console.log("Before Ajax");
|
| 333 |
+
set_loading();
|
| 334 |
+
// ajax call to remove file
|
| 335 |
+
$.ajax({
|
| 336 |
+
url: '/wait',
|
| 337 |
+
type: 'POST',
|
| 338 |
+
data: {
|
| 339 |
+
time: sec
|
| 340 |
+
},
|
| 341 |
+
success: function (response) {
|
| 342 |
+
console.log("Ajax Success");
|
| 343 |
+
console.log(response);
|
| 344 |
+
},
|
| 345 |
+
error: function (xhr, status, error) {
|
| 346 |
+
console.log(error);
|
| 347 |
+
}
|
| 348 |
+
}).then(function () {
|
| 349 |
+
console.log("Then Ajax");
|
| 350 |
+
remove_loading();
|
| 351 |
+
});
|
| 352 |
+
|
| 353 |
+
console.log("After Ajax")
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
|
| 357 |
// Toggle sidebar for mobile view
|
| 358 |
document.getElementById('menu-toggle').addEventListener('click', function () {
|
|
|
|
| 362 |
|
| 363 |
window.onload = function () {
|
| 364 |
manageEmptyFileList();
|
| 365 |
+
remove_loading();
|
| 366 |
}
|
| 367 |
</script>
|
| 368 |
</body>
|