ntdservices commited on
Commit
856c6b8
Β·
verified Β·
1 Parent(s): 3185437

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -14
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from flask import Flask, request, render_template, send_file, redirect, url_for
2
  import os
3
  import re
4
  import uuid
@@ -42,12 +42,12 @@ def clear_uploads_folder():
42
  clear_uploads_folder()
43
  print("βœ… Uploads folder cleared.")
44
 
45
- # runtime cache keyed by search‑id β†’ (paragraphs, embeddings, faiss‑index)
46
  index_data = {}
47
 
48
  # ── helpers ────────────────────────────────────────────────────────────────────
49
  def get_paths(sid: str):
50
- """Return per‑search folders & files, creating them if needed."""
51
  up_folder = os.path.join(BASE_UPLOADS, sid)
52
  res_folder = os.path.join(BASE_RESULTS, sid)
53
  os.makedirs(up_folder, exist_ok=True)
@@ -70,7 +70,7 @@ def extract_text(file_path):
70
  return ""
71
 
72
  def rebuild_merged_and_index(sid: str):
73
- """Re‑embed everything for *this* search id."""
74
  up_folder, _, merged_file, _ = get_paths(sid)
75
 
76
  merged_text = ""
@@ -107,7 +107,7 @@ def index():
107
  up_folder, _, _, _ = get_paths(sid)
108
  paragraphs, embeddings, index_faiss = index_data.get(sid, ([], None, None))
109
 
110
- uploaded_filenames = sorted(os.listdir(up_folder)) # βœ… added this line
111
 
112
  results = []
113
  query = ""
@@ -127,12 +127,14 @@ def index():
127
  q_embed = q_embed[np.newaxis, :]
128
  faiss.normalize_L2(q_embed)
129
  D, I = index_faiss.search(q_embed, k=min(k, len(paragraphs)))
130
- results = [paragraphs[i] for i in I[0]]
 
 
131
 
132
  _, res_folder, _, result_file = get_paths(sid)
133
  with open(result_file, "w", encoding='utf-8') as f:
134
- for para in results:
135
- f.write(para + "\n\n")
136
 
137
  return render_template(
138
  "index.html",
@@ -140,10 +142,9 @@ def index():
140
  query=query,
141
  topk=k,
142
  sid=sid,
143
- uploaded_filenames=uploaded_filenames # βœ… pass to template
144
  )
145
 
146
-
147
  @app.route("/upload", methods=["POST"])
148
  def upload_file():
149
  sid = request.args.get("sid")
@@ -181,7 +182,6 @@ def download_merged():
181
  return ("Nothing to download", 404)
182
  return send_file(merged_file, as_attachment=True)
183
 
184
-
185
  @app.route("/reset")
186
  def reset():
187
  sid = request.args.get("sid")
@@ -201,12 +201,48 @@ def reset():
201
  def ping():
202
  return "pong", 200
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  #if __name__ == "__main__":
205
  # from waitress import serve
206
- # # Use threads to approximate β€œworkers” on Windows (Waitress is single‑process).
207
  # serve(app, host="0.0.0.0", port=9001, threads=4)
208
 
209
-
210
  if __name__ == "__main__":
211
  app.run(host="0.0.0.0", port=7860)
212
-
 
1
+ from flask import Flask, request, render_template, send_file, redirect, url_for, jsonify
2
  import os
3
  import re
4
  import uuid
 
42
  clear_uploads_folder()
43
  print("βœ… Uploads folder cleared.")
44
 
45
+ # runtime cache keyed by search-id β†’ (paragraphs, embeddings, faiss-index)
46
  index_data = {}
47
 
48
  # ── helpers ────────────────────────────────────────────────────────────────────
49
  def get_paths(sid: str):
50
+ """Return per-search folders & files, creating them if needed."""
51
  up_folder = os.path.join(BASE_UPLOADS, sid)
52
  res_folder = os.path.join(BASE_RESULTS, sid)
53
  os.makedirs(up_folder, exist_ok=True)
 
70
  return ""
71
 
72
  def rebuild_merged_and_index(sid: str):
73
+ """Re-embed everything for *this* search id."""
74
  up_folder, _, merged_file, _ = get_paths(sid)
75
 
76
  merged_text = ""
 
107
  up_folder, _, _, _ = get_paths(sid)
108
  paragraphs, embeddings, index_faiss = index_data.get(sid, ([], None, None))
109
 
110
+ uploaded_filenames = sorted(os.listdir(up_folder))
111
 
112
  results = []
113
  query = ""
 
127
  q_embed = q_embed[np.newaxis, :]
128
  faiss.normalize_L2(q_embed)
129
  D, I = index_faiss.search(q_embed, k=min(k, len(paragraphs)))
130
+
131
+ # Keep both the text and the FAISS paragraph index so the UI can fetch context.
132
+ results = [{"idx": int(i), "text": paragraphs[i]} for i in I[0]]
133
 
134
  _, res_folder, _, result_file = get_paths(sid)
135
  with open(result_file, "w", encoding='utf-8') as f:
136
+ for r in results:
137
+ f.write(r["text"] + "\n\n")
138
 
139
  return render_template(
140
  "index.html",
 
142
  query=query,
143
  topk=k,
144
  sid=sid,
145
+ uploaded_filenames=uploaded_filenames
146
  )
147
 
 
148
  @app.route("/upload", methods=["POST"])
149
  def upload_file():
150
  sid = request.args.get("sid")
 
182
  return ("Nothing to download", 404)
183
  return send_file(merged_file, as_attachment=True)
184
 
 
185
  @app.route("/reset")
186
  def reset():
187
  sid = request.args.get("sid")
 
201
  def ping():
202
  return "pong", 200
203
 
204
+ @app.route("/api/context")
205
+ def api_context():
206
+ """
207
+ Return an excerpt of the merged paragraphs centered on the requested paragraph index.
208
+ Query params: sid, idx (int), window (int, optional, default 3) – number of surrounding paras on each side.
209
+ """
210
+ sid = request.args.get("sid")
211
+ try:
212
+ idx = int(request.args.get("idx", "-1"))
213
+ except (TypeError, ValueError):
214
+ return jsonify(error="Bad idx"), 400
215
+
216
+ try:
217
+ window = int(request.args.get("window", "3"))
218
+ except (TypeError, ValueError):
219
+ window = 3
220
+
221
+ if not sid or idx < 0:
222
+ return jsonify(error="Missing sid or idx"), 400
223
+
224
+ paragraphs, _, _ = index_data.get(sid, (None, None, None))
225
+ if paragraphs is None:
226
+ return jsonify(error="No index for this sid. Upload files first."), 404
227
+ if idx >= len(paragraphs):
228
+ return jsonify(error="idx out of range"), 400
229
+
230
+ start = max(0, idx - window)
231
+ end = min(len(paragraphs), idx + window + 1)
232
+ context_paras = paragraphs[start:end]
233
+ center_local = idx - start # where the highlighted paragraph sits in that slice
234
+
235
+ return jsonify(
236
+ paras=context_paras,
237
+ center=center_local,
238
+ start=start,
239
+ end=end,
240
+ total=len(paragraphs)
241
+ )
242
+
243
  #if __name__ == "__main__":
244
  # from waitress import serve
 
245
  # serve(app, host="0.0.0.0", port=9001, threads=4)
246
 
 
247
  if __name__ == "__main__":
248
  app.run(host="0.0.0.0", port=7860)