Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from flask import Flask, request, render_template, send_file, redirect, url_for
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import uuid
|
|
@@ -42,12 +42,12 @@ def clear_uploads_folder():
|
|
| 42 |
clear_uploads_folder()
|
| 43 |
print("β
Uploads folder cleared.")
|
| 44 |
|
| 45 |
-
# runtime cache keyed by search
|
| 46 |
index_data = {}
|
| 47 |
|
| 48 |
# ββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
def get_paths(sid: str):
|
| 50 |
-
"""Return per
|
| 51 |
up_folder = os.path.join(BASE_UPLOADS, sid)
|
| 52 |
res_folder = os.path.join(BASE_RESULTS, sid)
|
| 53 |
os.makedirs(up_folder, exist_ok=True)
|
|
@@ -70,7 +70,7 @@ def extract_text(file_path):
|
|
| 70 |
return ""
|
| 71 |
|
| 72 |
def rebuild_merged_and_index(sid: str):
|
| 73 |
-
"""Re
|
| 74 |
up_folder, _, merged_file, _ = get_paths(sid)
|
| 75 |
|
| 76 |
merged_text = ""
|
|
@@ -107,7 +107,7 @@ def index():
|
|
| 107 |
up_folder, _, _, _ = get_paths(sid)
|
| 108 |
paragraphs, embeddings, index_faiss = index_data.get(sid, ([], None, None))
|
| 109 |
|
| 110 |
-
uploaded_filenames = sorted(os.listdir(up_folder))
|
| 111 |
|
| 112 |
results = []
|
| 113 |
query = ""
|
|
@@ -127,12 +127,14 @@ def index():
|
|
| 127 |
q_embed = q_embed[np.newaxis, :]
|
| 128 |
faiss.normalize_L2(q_embed)
|
| 129 |
D, I = index_faiss.search(q_embed, k=min(k, len(paragraphs)))
|
| 130 |
-
|
|
|
|
|
|
|
| 131 |
|
| 132 |
_, res_folder, _, result_file = get_paths(sid)
|
| 133 |
with open(result_file, "w", encoding='utf-8') as f:
|
| 134 |
-
for
|
| 135 |
-
f.write(
|
| 136 |
|
| 137 |
return render_template(
|
| 138 |
"index.html",
|
|
@@ -140,10 +142,9 @@ def index():
|
|
| 140 |
query=query,
|
| 141 |
topk=k,
|
| 142 |
sid=sid,
|
| 143 |
-
uploaded_filenames=uploaded_filenames
|
| 144 |
)
|
| 145 |
|
| 146 |
-
|
| 147 |
@app.route("/upload", methods=["POST"])
|
| 148 |
def upload_file():
|
| 149 |
sid = request.args.get("sid")
|
|
@@ -181,7 +182,6 @@ def download_merged():
|
|
| 181 |
return ("Nothing to download", 404)
|
| 182 |
return send_file(merged_file, as_attachment=True)
|
| 183 |
|
| 184 |
-
|
| 185 |
@app.route("/reset")
|
| 186 |
def reset():
|
| 187 |
sid = request.args.get("sid")
|
|
@@ -201,12 +201,48 @@ def reset():
|
|
| 201 |
def ping():
|
| 202 |
return "pong", 200
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
#if __name__ == "__main__":
|
| 205 |
# from waitress import serve
|
| 206 |
-
# # Use threads to approximate βworkersβ on Windows (Waitress is singleβprocess).
|
| 207 |
# serve(app, host="0.0.0.0", port=9001, threads=4)
|
| 208 |
|
| 209 |
-
|
| 210 |
if __name__ == "__main__":
|
| 211 |
app.run(host="0.0.0.0", port=7860)
|
| 212 |
-
|
|
|
|
| 1 |
+
from flask import Flask, request, render_template, send_file, redirect, url_for, jsonify
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import uuid
|
|
|
|
| 42 |
clear_uploads_folder()
|
| 43 |
print("β
Uploads folder cleared.")
|
| 44 |
|
| 45 |
+
# runtime cache keyed by search-id β (paragraphs, embeddings, faiss-index)
|
| 46 |
index_data = {}
|
| 47 |
|
| 48 |
# ββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
def get_paths(sid: str):
|
| 50 |
+
"""Return per-search folders & files, creating them if needed."""
|
| 51 |
up_folder = os.path.join(BASE_UPLOADS, sid)
|
| 52 |
res_folder = os.path.join(BASE_RESULTS, sid)
|
| 53 |
os.makedirs(up_folder, exist_ok=True)
|
|
|
|
| 70 |
return ""
|
| 71 |
|
| 72 |
def rebuild_merged_and_index(sid: str):
|
| 73 |
+
"""Re-embed everything for *this* search id."""
|
| 74 |
up_folder, _, merged_file, _ = get_paths(sid)
|
| 75 |
|
| 76 |
merged_text = ""
|
|
|
|
| 107 |
up_folder, _, _, _ = get_paths(sid)
|
| 108 |
paragraphs, embeddings, index_faiss = index_data.get(sid, ([], None, None))
|
| 109 |
|
| 110 |
+
uploaded_filenames = sorted(os.listdir(up_folder))
|
| 111 |
|
| 112 |
results = []
|
| 113 |
query = ""
|
|
|
|
| 127 |
q_embed = q_embed[np.newaxis, :]
|
| 128 |
faiss.normalize_L2(q_embed)
|
| 129 |
D, I = index_faiss.search(q_embed, k=min(k, len(paragraphs)))
|
| 130 |
+
|
| 131 |
+
# Keep both the text and the FAISS paragraph index so the UI can fetch context.
|
| 132 |
+
results = [{"idx": int(i), "text": paragraphs[i]} for i in I[0]]
|
| 133 |
|
| 134 |
_, res_folder, _, result_file = get_paths(sid)
|
| 135 |
with open(result_file, "w", encoding='utf-8') as f:
|
| 136 |
+
for r in results:
|
| 137 |
+
f.write(r["text"] + "\n\n")
|
| 138 |
|
| 139 |
return render_template(
|
| 140 |
"index.html",
|
|
|
|
| 142 |
query=query,
|
| 143 |
topk=k,
|
| 144 |
sid=sid,
|
| 145 |
+
uploaded_filenames=uploaded_filenames
|
| 146 |
)
|
| 147 |
|
|
|
|
| 148 |
@app.route("/upload", methods=["POST"])
|
| 149 |
def upload_file():
|
| 150 |
sid = request.args.get("sid")
|
|
|
|
| 182 |
return ("Nothing to download", 404)
|
| 183 |
return send_file(merged_file, as_attachment=True)
|
| 184 |
|
|
|
|
| 185 |
@app.route("/reset")
|
| 186 |
def reset():
|
| 187 |
sid = request.args.get("sid")
|
|
|
|
| 201 |
def ping():
|
| 202 |
return "pong", 200
|
| 203 |
|
| 204 |
+
@app.route("/api/context")
|
| 205 |
+
def api_context():
|
| 206 |
+
"""
|
| 207 |
+
Return an excerpt of the merged paragraphs centered on the requested paragraph index.
|
| 208 |
+
Query params: sid, idx (int), window (int, optional, default 3) β number of surrounding paras on each side.
|
| 209 |
+
"""
|
| 210 |
+
sid = request.args.get("sid")
|
| 211 |
+
try:
|
| 212 |
+
idx = int(request.args.get("idx", "-1"))
|
| 213 |
+
except (TypeError, ValueError):
|
| 214 |
+
return jsonify(error="Bad idx"), 400
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
window = int(request.args.get("window", "3"))
|
| 218 |
+
except (TypeError, ValueError):
|
| 219 |
+
window = 3
|
| 220 |
+
|
| 221 |
+
if not sid or idx < 0:
|
| 222 |
+
return jsonify(error="Missing sid or idx"), 400
|
| 223 |
+
|
| 224 |
+
paragraphs, _, _ = index_data.get(sid, (None, None, None))
|
| 225 |
+
if paragraphs is None:
|
| 226 |
+
return jsonify(error="No index for this sid. Upload files first."), 404
|
| 227 |
+
if idx >= len(paragraphs):
|
| 228 |
+
return jsonify(error="idx out of range"), 400
|
| 229 |
+
|
| 230 |
+
start = max(0, idx - window)
|
| 231 |
+
end = min(len(paragraphs), idx + window + 1)
|
| 232 |
+
context_paras = paragraphs[start:end]
|
| 233 |
+
center_local = idx - start # where the highlighted paragraph sits in that slice
|
| 234 |
+
|
| 235 |
+
return jsonify(
|
| 236 |
+
paras=context_paras,
|
| 237 |
+
center=center_local,
|
| 238 |
+
start=start,
|
| 239 |
+
end=end,
|
| 240 |
+
total=len(paragraphs)
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
#if __name__ == "__main__":
|
| 244 |
# from waitress import serve
|
|
|
|
| 245 |
# serve(app, host="0.0.0.0", port=9001, threads=4)
|
| 246 |
|
|
|
|
| 247 |
if __name__ == "__main__":
|
| 248 |
app.run(host="0.0.0.0", port=7860)
|
|
|