Spaces:
Running
Running
Commit ·
538c3da
1
Parent(s): c97c61d
Dynamic source update endpoint
Browse files- app.py +83 -10
- postman.json +45 -0
app.py
CHANGED
|
@@ -3,6 +3,7 @@ from flask_cors import CORS
|
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
import functools
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
import threading
|
| 8 |
import time
|
|
@@ -23,7 +24,7 @@ from config import (
|
|
| 23 |
EXTERNAL_URL, URL_UPDATE_PERIOD_MINUTES, URL_FETCH_ENABLED,
|
| 24 |
RAG_CSV_MAX_RESULTS, RAG_CSV_CONFIDENCE_THRESHOLD
|
| 25 |
)
|
| 26 |
-
from utils import download_and_unzip_gdrive_file, download_gdrive_file, fetch_and_clean_url
|
| 27 |
|
| 28 |
# Logging Setup
|
| 29 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -38,6 +39,9 @@ rag_system = None
|
|
| 38 |
user_df = None
|
| 39 |
_APP_BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
# --- Helper: Load Users ---
|
| 42 |
def load_users_from_csv():
|
| 43 |
global user_df
|
|
@@ -164,6 +168,20 @@ def url_periodic_loop():
|
|
| 164 |
logger.info(f"[URL_UPDATE] Triggering scheduled periodic update...")
|
| 165 |
trigger_url_update()
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
# --- Startup Logic ---
|
| 168 |
def run_startup_tasks():
|
| 169 |
global rag_system
|
|
@@ -304,17 +322,72 @@ def update_faiss_index():
|
|
| 304 |
@app.route('/admin/rebuild_index', methods=['POST'])
|
| 305 |
@require_admin_auth
|
| 306 |
def rebuild_index():
|
| 307 |
-
global rag_system
|
| 308 |
try:
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 318 |
return jsonify({"error": str(e)}), 500
|
| 319 |
|
| 320 |
# Retained specific endpoint name to ensure the frontend doesn't break
|
|
|
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
import functools
|
| 6 |
+
from datetime import datetime
|
| 7 |
import pandas as pd
|
| 8 |
import threading
|
| 9 |
import time
|
|
|
|
| 24 |
EXTERNAL_URL, URL_UPDATE_PERIOD_MINUTES, URL_FETCH_ENABLED,
|
| 25 |
RAG_CSV_MAX_RESULTS, RAG_CSV_CONFIDENCE_THRESHOLD
|
| 26 |
)
|
| 27 |
+
from utils import download_and_unzip_gdrive_file, download_gdrive_file, fetch_and_clean_url, FAISS_RAG_SUPPORTED_EXTENSIONS
|
| 28 |
|
| 29 |
# Logging Setup
|
| 30 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 39 |
user_df = None
|
| 40 |
_APP_BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 41 |
|
| 42 |
+
# Source file constants
|
| 43 |
+
SOURCES_ARCHIVE_DIR = os.path.join(_APP_BASE_DIR, 'sources_archive')
|
| 44 |
+
|
| 45 |
# --- Helper: Load Users ---
|
| 46 |
def load_users_from_csv():
|
| 47 |
global user_df
|
|
|
|
| 168 |
logger.info(f"[URL_UPDATE] Triggering scheduled periodic update...")
|
| 169 |
trigger_url_update()
|
| 170 |
|
| 171 |
+
def rebuild_rag_system():
|
| 172 |
+
global rag_system
|
| 173 |
+
|
| 174 |
+
if URL_FETCH_ENABLED and EXTERNAL_URL:
|
| 175 |
+
result = trigger_url_update()
|
| 176 |
+
if "error" in result:
|
| 177 |
+
return result
|
| 178 |
+
return {"status": "Index rebuilt successfully using combined local & URL sources"}
|
| 179 |
+
|
| 180 |
+
rag_system = initialize_and_get_rag_system(force_rebuild=True)
|
| 181 |
+
if rag_system is None:
|
| 182 |
+
return {"error": "Failed to rebuild RAG system"}
|
| 183 |
+
return {"status": "Index rebuilt successfully"}
|
| 184 |
+
|
| 185 |
# --- Startup Logic ---
|
| 186 |
def run_startup_tasks():
|
| 187 |
global rag_system
|
|
|
|
| 322 |
@app.route('/admin/rebuild_index', methods=['POST'])
|
| 323 |
@require_admin_auth
|
| 324 |
def rebuild_index():
|
|
|
|
| 325 |
try:
|
| 326 |
+
result = rebuild_rag_system()
|
| 327 |
+
if "error" in result:
|
| 328 |
+
return jsonify(result), 500
|
| 329 |
+
return jsonify(result), 200
|
| 330 |
+
except Exception as e:
|
| 331 |
+
return jsonify({"error": str(e)}), 500
|
| 332 |
+
|
| 333 |
+
@app.route('/admin/upload/<path:filename>', methods=['POST'])
|
| 334 |
+
@require_admin_auth
|
| 335 |
+
def upload_vehicle_csv(filename):
|
| 336 |
+
uploaded_file = request.files.get('file') or request.files.get('upload') or request.files.get('source_file')
|
| 337 |
+
if not uploaded_file:
|
| 338 |
+
return jsonify({"error": "No CSV file uploaded. Use multipart/form-data with field 'file'."}), 400
|
| 339 |
+
|
| 340 |
+
safe_filename = os.path.basename((filename or '').strip())
|
| 341 |
+
if not safe_filename or safe_filename != filename.strip():
|
| 342 |
+
return jsonify({"error": "Invalid filename."}), 400
|
| 343 |
+
|
| 344 |
+
file_ext = safe_filename.rsplit('.', 1)[-1].lower() if '.' in safe_filename else ''
|
| 345 |
+
if file_ext not in FAISS_RAG_SUPPORTED_EXTENSIONS:
|
| 346 |
+
supported_types = sorted(FAISS_RAG_SUPPORTED_EXTENSIONS.keys())
|
| 347 |
+
return jsonify({
|
| 348 |
+
"error": "Unsupported file type.",
|
| 349 |
+
"supported_extensions": supported_types
|
| 350 |
+
}), 400
|
| 351 |
+
|
| 352 |
+
os.makedirs(RAG_SOURCES_DIR, exist_ok=True)
|
| 353 |
+
os.makedirs(SOURCES_ARCHIVE_DIR, exist_ok=True)
|
| 354 |
+
|
| 355 |
+
target_path = os.path.join(RAG_SOURCES_DIR, safe_filename)
|
| 356 |
+
temp_upload_path = os.path.join(RAG_SOURCES_DIR, f".{safe_filename}.uploading")
|
| 357 |
+
archive_path = None
|
| 358 |
+
|
| 359 |
+
try:
|
| 360 |
+
uploaded_file.save(temp_upload_path)
|
| 361 |
+
|
| 362 |
+
if os.path.getsize(temp_upload_path) == 0:
|
| 363 |
+
os.remove(temp_upload_path)
|
| 364 |
+
return jsonify({"error": "Uploaded file is empty."}), 400
|
| 365 |
+
|
| 366 |
+
if os.path.exists(target_path):
|
| 367 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 368 |
+
archive_dir = os.path.join(SOURCES_ARCHIVE_DIR, timestamp)
|
| 369 |
+
os.makedirs(archive_dir, exist_ok=True)
|
| 370 |
+
archive_path = os.path.join(archive_dir, safe_filename)
|
| 371 |
+
shutil.move(target_path, archive_path)
|
| 372 |
+
|
| 373 |
+
os.replace(temp_upload_path, target_path)
|
| 374 |
+
|
| 375 |
+
result = rebuild_rag_system()
|
| 376 |
+
if "error" in result:
|
| 377 |
+
return jsonify(result), 500
|
| 378 |
+
|
| 379 |
+
response_payload = {
|
| 380 |
+
"status": "success",
|
| 381 |
+
"message": f"{safe_filename} uploaded and index rebuilt successfully.",
|
| 382 |
+
"filename": safe_filename,
|
| 383 |
+
"saved_to": target_path,
|
| 384 |
+
"archived_previous_to": archive_path
|
| 385 |
+
}
|
| 386 |
+
return jsonify(response_payload), 200
|
| 387 |
except Exception as e:
|
| 388 |
+
logger.error(f"[SOURCE_UPLOAD] Error updating {safe_filename}: {e}", exc_info=True)
|
| 389 |
+
if os.path.exists(temp_upload_path):
|
| 390 |
+
os.remove(temp_upload_path)
|
| 391 |
return jsonify({"error": str(e)}), 500
|
| 392 |
|
| 393 |
# Retained specific endpoint name to ensure the frontend doesn't break
|
postman.json
CHANGED
|
@@ -125,6 +125,51 @@
|
|
| 125 |
},
|
| 126 |
"response": []
|
| 127 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
{
|
| 129 |
"name": "Public - Status",
|
| 130 |
"request": {
|
|
|
|
| 125 |
},
|
| 126 |
"response": []
|
| 127 |
},
|
| 128 |
+
{
|
| 129 |
+
"name": "Admin - Upload Source File",
|
| 130 |
+
"request": {
|
| 131 |
+
"auth": {
|
| 132 |
+
"type": "basic",
|
| 133 |
+
"basic": [
|
| 134 |
+
{
|
| 135 |
+
"key": "password",
|
| 136 |
+
"value": "1234",
|
| 137 |
+
"type": "string"
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"key": "username",
|
| 141 |
+
"value": "admin",
|
| 142 |
+
"type": "string"
|
| 143 |
+
}
|
| 144 |
+
]
|
| 145 |
+
},
|
| 146 |
+
"method": "POST",
|
| 147 |
+
"header": [],
|
| 148 |
+
"body": {
|
| 149 |
+
"mode": "formdata",
|
| 150 |
+
"formdata": [
|
| 151 |
+
{
|
| 152 |
+
"key": "file",
|
| 153 |
+
"type": "file",
|
| 154 |
+
"src": ""
|
| 155 |
+
}
|
| 156 |
+
]
|
| 157 |
+
},
|
| 158 |
+
"url": {
|
| 159 |
+
"raw": "{{base_url}}/admin/upload/vehicle.csv",
|
| 160 |
+
"host": [
|
| 161 |
+
"{{base_url}}"
|
| 162 |
+
],
|
| 163 |
+
"path": [
|
| 164 |
+
"admin",
|
| 165 |
+
"upload",
|
| 166 |
+
"vehicle.csv"
|
| 167 |
+
]
|
| 168 |
+
},
|
| 169 |
+
"description": "Uploads a replacement source file using the filename in the URL, archives the previous file with the same name, and rebuilds the index."
|
| 170 |
+
},
|
| 171 |
+
"response": []
|
| 172 |
+
},
|
| 173 |
{
|
| 174 |
"name": "Public - Status",
|
| 175 |
"request": {
|