thekusaldarshana commited on
Commit
fdfa9e5
·
verified ·
1 Parent(s): a7ff38b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +411 -4
app.py CHANGED
@@ -1,8 +1,35 @@
 
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  import fasttext
4
  import os
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  styles = """
7
  #button {
8
  background: linear-gradient(to right, #6A359C, #B589D6);
@@ -10,6 +37,9 @@ styles = """
10
  }
11
  """
12
 
 
 
 
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
  if not HF_TOKEN:
15
  raise EnvironmentError("❌ HF_TOKEN is not set. Please add it in Space Settings > Secrets.")
@@ -27,6 +57,9 @@ try:
27
  except Exception as e:
28
  raise RuntimeError(f"❌ Failed to load model: {str(e)}")
29
 
 
 
 
30
  def get_embedding(word: str):
31
  if not word or not word.strip():
32
  return {"error": "⚠️ කරුණාකර සිංහල වචනයක් ඇතුළත් කරන්න."}
@@ -36,9 +69,278 @@ def get_embedding(word: str):
36
  except Exception as e:
37
  return {"error": f"💥 Something went wrong..: {str(e)}"}
38
 
39
- # ------------------------------------------
40
- # 🖥️ Gradio App — Now your PUBLIC UI
41
- # ------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  with gr.Blocks(title="Embedding_Siyabasa", css=styles) as demo:
43
  gr.Markdown("""
44
  # 🇱🇰 Sinhala Word Embeddings
@@ -46,6 +348,7 @@ with gr.Blocks(title="Embedding_Siyabasa", css=styles) as demo:
46
  ඔබේ සිංහල වචනය ඇතුළත් කර, එහි 300D embedding vector එක ලබා ගන්න.
47
  """)
48
 
 
49
  with gr.Row():
50
  inp = gr.Textbox(label="සිංහල වචනය", placeholder="උදා: අම්මා, සියබස, නූතන, ප්‍රජාතන්ත්‍රවාදය")
51
  out = gr.JSON(label="Embedding Vector (300D)")
@@ -67,9 +370,113 @@ with gr.Blocks(title="Embedding_Siyabasa", css=styles) as demo:
67
  cache_examples=True
68
  )
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  gr.Markdown("""
71
  ---
72
  ✨ *Remeinium AI - Intelligence for a greater tomorrow*
73
  """)
74
 
75
- demo.queue(default_concurrency_limit=10).launch()
 
 
1
+ # app.py
2
  import gradio as gr
3
  from huggingface_hub import hf_hub_download
4
  import fasttext
5
  import os
6
+ import numpy as np
7
+ from functools import lru_cache
8
+ import math
9
+ import tempfile
10
+ import io
11
+ from typing import List, Tuple, Optional
12
 
13
+ # visualization libs (attempt imports, fallbacks handled later)
14
+ try:
15
+ import umap
16
+ _HAS_UMAP = True
17
+ except Exception:
18
+ _HAS_UMAP = False
19
+
20
+ try:
21
+ from sklearn.manifold import TSNE
22
+ _HAS_TSNE = True
23
+ except Exception:
24
+ _HAS_TSNE = False
25
+
26
+ import matplotlib
27
+ matplotlib.use("Agg")
28
+ import matplotlib.pyplot as plt
29
+
30
+ # -------------------------
31
+ # your original styles + button id (kept)
32
+ # -------------------------
33
  styles = """
34
  #button {
35
  background: linear-gradient(to right, #6A359C, #B589D6);
 
37
  }
38
  """
39
 
40
+ # -------------------------
41
+ # HF token + model download (kept)
42
+ # -------------------------
43
  HF_TOKEN = os.getenv("HF_TOKEN")
44
  if not HF_TOKEN:
45
  raise EnvironmentError("❌ HF_TOKEN is not set. Please add it in Space Settings > Secrets.")
 
57
  except Exception as e:
58
  raise RuntimeError(f"❌ Failed to load model: {str(e)}")
59
 
60
+ # -------------------------
61
+ # original get_embedding (untouched)
62
+ # -------------------------
63
  def get_embedding(word: str):
64
  if not word or not word.strip():
65
  return {"error": "⚠️ කරුණාකර සිංහල වචනයක් ඇතුළත් කරන්න."}
 
69
  except Exception as e:
70
  return {"error": f"💥 Something went wrong..: {str(e)}"}
71
 
72
+ # -------------------------
73
+ # Utilities & precomputations
74
+ # -------------------------
75
+ def safe_strip(s: Optional[str]) -> str:
76
+ return "" if s is None else s.strip()
77
+
78
+ @lru_cache(maxsize=1)
79
+ def load_vocab_and_matrix(max_words: int = 100000):
80
+ try:
81
+ words = model.get_words()
82
+ except Exception:
83
+ # fallback: build words from common examples (unlikely)
84
+ raise RuntimeError("Failed to get words from fastText model via model.get_words().")
85
+
86
+ if max_words and len(words) > max_words:
87
+ words = words[:max_words]
88
+
89
+ vectors = []
90
+ for w in words:
91
+ vec = model.get_word_vector(w)
92
+ vectors.append(vec)
93
+ mat = np.vstack(vectors).astype(np.float32) # N x D
94
+
95
+ # compute normalized vectors for cosine similarity (avoid division by zero)
96
+ norms = np.linalg.norm(mat, axis=1, keepdims=True)
97
+ norms[norms == 0.0] = 1.0
98
+ mat_norm = mat / norms
99
+
100
+ return words, mat, mat_norm
101
+
102
+ def cosine_similarity_vec(u: np.ndarray, mat_norm: np.ndarray) -> np.ndarray:
103
+ # normalize u
104
+ u_norm = np.linalg.norm(u)
105
+ if u_norm == 0:
106
+ return np.zeros(mat_norm.shape[0], dtype=np.float32)
107
+ u = (u / u_norm).astype(np.float32)
108
+ sims = np.dot(mat_norm, u) # (N,)
109
+ return sims
110
+
111
+ def top_k_words_for_vector(vec: np.ndarray, words: List[str], mat_norm: np.ndarray, k: int = 10, filter_self: Optional[str] = None) -> List[Tuple[str, float]]:
112
+ sims = cosine_similarity_vec(vec, mat_norm)
113
+ # argsort descending
114
+ idx = np.argsort(-sims)[: k + (1 if filter_self else 0)]
115
+ results = []
116
+ for i in idx:
117
+ w = words[i]
118
+ score = float(sims[i])
119
+ if filter_self and w == filter_self:
120
+ continue
121
+ results.append((w, round(score, 6)))
122
+ if len(results) >= k:
123
+ break
124
+ return results
125
+
126
+ # -------------------------
127
+ # 1: Word similarity (two words)
128
+ # -------------------------
129
+ def word_similarity(a: str, b: str):
130
+ a = safe_strip(a)
131
+ b = safe_strip(b)
132
+ if not a or not b:
133
+ return {"error": "⚠️ Enter 2 valid sinhala words"}
134
+ try:
135
+ va = model.get_word_vector(a)
136
+ vb = model.get_word_vector(b)
137
+ # cosine sim
138
+ denom = (np.linalg.norm(va) * np.linalg.norm(vb))
139
+ if denom == 0:
140
+ sim = 0.0
141
+ else:
142
+ sim = float(np.dot(va, vb) / denom)
143
+ return {
144
+ "word_a": a,
145
+ "word_b": b,
146
+ "cosine_similarity": round(sim, 6),
147
+ "explanation": "1.0 = identical in vector space, -1.0 = opposite. Values near 0 mean unrelated."
148
+ }
149
+ except Exception as e:
150
+ return {"error": f"💥 Error computing similarity: {str(e)}"}
151
+
152
+ # -------------------------
153
+ # 2: Nearest neighbors / semantic search
154
+ # -------------------------
155
+ def nearest_neighbors(word: str, top_k: int = 10):
156
+ word = safe_strip(word)
157
+ if not word:
158
+ return {"error": "⚠️ Please enter any Sinhala word"}
159
+ try:
160
+ words, mat, mat_norm = load_vocab_and_matrix()
161
+ vec = model.get_word_vector(word)
162
+ results = top_k_words_for_vector(vec, words, mat_norm, k=top_k, filter_self=word)
163
+ return {"query": word, "neighbors": [{"word": w, "score": s} for w, s in results]}
164
+ except Exception as e:
165
+ return {"error": f"���� Error finding neighbors: {str(e)}"}
166
+
167
+ # -------------------------
168
+ # Feature 3: Sentence embeddings (average word vectors) + similarity
169
+ # -------------------------
170
+ def sentence_to_embedding(sentence: str):
171
+ s = safe_strip(sentence)
172
+ if not s:
173
+ return {"error": "⚠️ Please enter any Sinhala sentence"}
174
+ try:
175
+ # simple whitespace tokenization + strip punctuation
176
+ tokens = [t for t in s.split() if t.strip()]
177
+ if len(tokens) == 0:
178
+ return {"error": "⚠️ Couldn't find words from the sentence"}
179
+ vecs = [model.get_word_vector(t) for t in tokens]
180
+ mat = np.vstack(vecs)
181
+ avg = mat.mean(axis=0)
182
+ return {"sentence": s, "tokens": tokens, "embedding": avg.tolist()}
183
+ except Exception as e:
184
+ return {"error": f"💥 Error computing sentence embedding: {str(e)}"}
185
+
186
+ def sentence_similarity(s1: str, s2: str):
187
+ try:
188
+ r1 = sentence_to_embedding(s1)
189
+ r2 = sentence_to_embedding(s2)
190
+ if "error" in r1 or "error" in r2:
191
+ return {"error": r1.get("error") or r2.get("error")}
192
+ v1 = np.array(r1["embedding"], dtype=np.float32)
193
+ v2 = np.array(r2["embedding"], dtype=np.float32)
194
+ denom = (np.linalg.norm(v1) * np.linalg.norm(v2))
195
+ if denom == 0:
196
+ sim = 0.0
197
+ else:
198
+ sim = float(np.dot(v1, v2) / denom)
199
+ return {"sentence_a": s1, "sentence_b": s2, "cosine_similarity": round(sim, 6)}
200
+ except Exception as e:
201
+ return {"error": f"💥 Error computing sentence similarity: {str(e)}"}
202
+
203
+ # -------------------------
204
+ # Feature 4: Visualization
205
+ # -------------------------
206
+ def visualize_words(words_text: str, use_neighbors: bool = False, neighbors_k: int = 10, projection_method: str = "umap"):
207
+ words_raw = [w.strip() for w in words_text.replace(",", "\n").splitlines() if w.strip()]
208
+ if not words_raw:
209
+ return {"error": "⚠️ Something went wrong"}
210
+ try:
211
+ words, mat, mat_norm = load_vocab_and_matrix()
212
+ selected_words = []
213
+ for w in words_raw:
214
+ selected_words.append(w)
215
+ if use_neighbors:
216
+ vec = model.get_word_vector(w)
217
+ nn = top_k_words_for_vector(vec, words, mat_norm, k=neighbors_k, filter_self=w)
218
+ selected_words.extend([x for x, _ in nn])
219
+
220
+ # dedupe preserving order
221
+ seen = set()
222
+ final_words = []
223
+ for w in selected_words:
224
+ if w not in seen:
225
+ final_words.append(w)
226
+ seen.add(w)
227
+
228
+ # fetch vectors (if OOV, fasttext provides vector from subwords)
229
+ vecs = np.vstack([model.get_word_vector(w) for w in final_words])
230
+
231
+ # projection
232
+ if projection_method == "umap" and _HAS_UMAP:
233
+ reducer = umap.UMAP(n_components=2, random_state=42)
234
+ coords = reducer.fit_transform(vecs)
235
+ elif _HAS_TSNE:
236
+ tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(final_words)-1 or 2))
237
+ coords = tsne.fit_transform(vecs)
238
+ elif _HAS_UMAP:
239
+ # if user requested tsne but only umap available
240
+ reducer = umap.UMAP(n_components=2, random_state=42)
241
+ coords = reducer.fit_transform(vecs)
242
+ else:
243
+ return {"error": "⚠️ Neither UMAP nor t-SNE is available in this environment. Please install 'umap-learn' or 'scikit-learn'."}
244
+
245
+ # plot
246
+ fig, ax = plt.subplots(figsize=(8, 6))
247
+ ax.scatter(coords[:, 0], coords[:, 1], s=40)
248
+ for i, w in enumerate(final_words):
249
+ ax.annotate(w, (coords[i, 0], coords[i, 1]), fontsize=9, alpha=0.9)
250
+ ax.set_title("2D Projection of Sinhala Words (embedding space)")
251
+ ax.set_xticks([])
252
+ ax.set_yticks([])
253
+ buf = io.BytesIO()
254
+ fig.tight_layout()
255
+ fig.savefig(buf, format="png", dpi=150)
256
+ plt.close(fig)
257
+ buf.seek(0)
258
+ return buf
259
+ except Exception as e:
260
+ return {"error": f"💥 Error creating visualization: {str(e)}"}
261
+
262
+ # -------------------------
263
+ # Feature 5: Practical demo - index uploaded documents and search
264
+ # -------------------------
265
+ def parse_uploaded_documents(file):
266
+ if file is None:
267
+ return {"error": "⚠️ Please upload a file (txt/csv)."}
268
+ try:
269
+ raw = file.read().decode("utf-8")
270
+ except Exception:
271
+ try:
272
+ raw = file.read().decode("latin-1")
273
+ except Exception as e:
274
+ return {"error": f"💥 Something went wrong: {str(e)}"}
275
+
276
+ docs = []
277
+ # simple CSV detection: many commas vs newlines
278
+ if "," in raw and raw.count(",") > raw.count("\n"):
279
+ # parse as CSV rows
280
+ for line in raw.splitlines():
281
+ if not line.strip():
282
+ continue
283
+ # take entire line (or split and take last column). Keep it simple.
284
+ docs.append(line.strip())
285
+ else:
286
+ for line in raw.splitlines():
287
+ if line.strip():
288
+ docs.append(line.strip())
289
+ if not docs:
290
+ return {"error": "⚠️ Couldn't identify words from the file"}
291
+ return {"documents": docs}
292
+
293
+ def index_documents_for_search(docs: List[str]):
294
+ if not docs:
295
+ return {"error": "⚠️ The file was empty"}
296
+ try:
297
+ vecs = []
298
+ for d in docs:
299
+ tokens = [t for t in d.split() if t.strip()]
300
+ if not tokens:
301
+ vecs.append(np.zeros((model.get_dimension(),), dtype=np.float32))
302
+ continue
303
+ mats = np.vstack([model.get_word_vector(t) for t in tokens])
304
+ vecs.append(mats.mean(axis=0))
305
+ M = np.vstack(vecs).astype(np.float32) # num_docs x D
306
+ norms = np.linalg.norm(M, axis=1, keepdims=True)
307
+ norms[norms == 0] = 1.0
308
+ M_norm = M / norms
309
+ return {"matrix": M, "matrix_norm": M_norm, "docs": docs}
310
+ except Exception as e:
311
+ return {"error": f"💥 දත්ත सूचीක් Index කිරීමේ දෝෂයක්: {str(e)}"}
312
+
313
+ def search_documents(query: str, indexed):
314
+ """
315
+ indexed: dict returned by index_documents_for_search
316
+ returns top-5 matching docs
317
+ """
318
+ q = safe_strip(query)
319
+ if not q:
320
+ return {"error": "⚠️ Enter a query to search"}
321
+ try:
322
+ q_tokens = [t for t in q.split() if t.strip()]
323
+ if not q_tokens:
324
+ return {"error": "⚠️ Couldn't search from the query"}
325
+ q_vecs = np.vstack([model.get_word_vector(t) for t in q_tokens])
326
+ q_avg = q_vecs.mean(axis=0)
327
+ q_norm = np.linalg.norm(q_avg)
328
+ if q_norm == 0:
329
+ sims = np.zeros(indexed["matrix_norm"].shape[0], dtype=np.float32)
330
+ else:
331
+ q_avg = (q_avg / q_norm).astype(np.float32)
332
+ sims = np.dot(indexed["matrix_norm"], q_avg)
333
+ idx = np.argsort(-sims)[:10]
334
+ results = []
335
+ for i in idx:
336
+ results.append({"doc": indexed["docs"][i], "score": float(round(float(sims[i]), 6))})
337
+ return {"query": q, "results": results}
338
+ except Exception as e:
339
+ return {"error": f"💥 සෙවුම කළ විට දෝෂයක්: {str(e)}"}
340
+
341
+ # -------------------------
342
+ # Gradio UI - keep original section and add new blocks
343
+ # -------------------------
344
  with gr.Blocks(title="Embedding_Siyabasa", css=styles) as demo:
345
  gr.Markdown("""
346
  # 🇱🇰 Sinhala Word Embeddings
 
348
  ඔබේ සිංහල වචනය ඇතුළත් කර, එහි 300D embedding vector එක ලබා ගන්න.
349
  """)
350
 
351
+ # Original simple embedding (kept exactly as before)
352
  with gr.Row():
353
  inp = gr.Textbox(label="සිංහල වචනය", placeholder="උදා: අම්මා, සියබස, නූතන, ප්‍රජාතන්ත්‍රවාදය")
354
  out = gr.JSON(label="Embedding Vector (300D)")
 
370
  cache_examples=True
371
  )
372
 
373
+ # -------------------------
374
+ # NEW: Word similarity
375
+ # -------------------------
376
+ gr.Markdown("## 🔎 Word Similarity for 2 words — cosine similarity")
377
+ with gr.Row():
378
+ ws_a = gr.Textbox(label="Word A", placeholder="උදා: අම්මා")
379
+ ws_b = gr.Textbox(label="Word B", placeholder="උදා: තාත්තා")
380
+ ws_out = gr.JSON(label="Similarity Result")
381
+ ws_btn = gr.Button("🔁 Compare", elem_id="button")
382
+ ws_btn.click(fn=word_similarity, inputs=[ws_a, ws_b], outputs=ws_out)
383
+
384
+ # -------------------------
385
+ # NEW: Nearest neighbors (word -> top-N)
386
+ # -------------------------
387
+ gr.Markdown("## 🧭 Semantic Search")
388
+ with gr.Row():
389
+ nn_word = gr.Textbox(label="Query Word (සිංහල)", placeholder="උදා: ගුරු")
390
+ nn_k = gr.Slider(minimum=1, maximum=50, step=1, value=10, label="Top K (අවුරුදු)")
391
+ nn_out = gr.JSON(label="Top-K Neighbors")
392
+ nn_btn = gr.Button("🔎 Find Neighbors", elem_id="button")
393
+ nn_btn.click(fn=nearest_neighbors, inputs=[nn_word, nn_k], outputs=nn_out)
394
+
395
+ # -------------------------
396
+ # NEW: Sentence embeddings
397
+ # -------------------------
398
+ gr.Markdown("## 🧾 Sentence Embeddings")
399
+ with gr.Row():
400
+ sent_inp = gr.Textbox(label="සිංහල වාක්‍යය", placeholder="උදා: මම පාසලට යමි.")
401
+ sent_out = gr.JSON(label="Sentence Embedding (avg)")
402
+ sent_btn = gr.Button("🧠 Get Sentence Embedding", elem_id="button")
403
+ sent_btn.click(fn=sentence_to_embedding, inputs=sent_inp, outputs=sent_out)
404
+
405
+ # sentence similarity
406
+ with gr.Row():
407
+ sa = gr.Textbox(label="Sentence A")
408
+ sb = gr.Textbox(label="Sentence B")
409
+ ssim_out = gr.JSON(label="Sentence Similarity")
410
+ ssim_btn = gr.Button("🔁 Compare Sentences", elem_id="button")
411
+ ssim_btn.click(fn=sentence_similarity, inputs=[sa, sb], outputs=ssim_out)
412
+
413
+ # -------------------------
414
+ # NEW: Visualization (UMAP / t-SNE)
415
+ # -------------------------
416
+ gr.Markdown("## 📊 Visualization")
417
+ with gr.Row():
418
+ viz_words = gr.Textbox(label="Words (comma or newline separated)", placeholder="උදා: අම්මා, සියබස, පාසල")
419
+ viz_use_neighbors = gr.Checkbox(label="Expand with nearest neighbors", value=False)
420
+ viz_k = gr.Slider(minimum=1, maximum=40, step=1, value=10, label="Neighbors per word (if expanded)")
421
+ viz_method = gr.Radio(choices=["umap", "tsne"], value="umap", label="Projection method")
422
+ viz_img = gr.Image(type="pil", label="Projection (PNG)")
423
+ viz_btn = gr.Button("🎨 Create Visualization", elem_id="button")
424
+ def _viz_wrapper(words_text, use_neighbors, k, method):
425
+ res = visualize_words(words_text, use_neighbors, neighbors_k=int(k), projection_method=method)
426
+ if isinstance(res, dict) and "error" in res:
427
+ return gr.update(value=None), gr.update(value=f"Error: {res['error']}")
428
+ # res is a BytesIO
429
+ return res, ""
430
+ viz_btn.click(fn=_viz_wrapper, inputs=[viz_words, viz_use_neighbors, viz_k, viz_method], outputs=[viz_img, gr.Textbox(visible=False)])
431
+
432
+ # -------------------------
433
+ # NEW: Practical demo - upload docs and semantic search
434
+ # -------------------------
435
+ gr.Markdown("## 📚 Practical demo — Upload Sinhala documents and semantic search")
436
+ with gr.Row():
437
+ upload = gr.File(label="Upload a .txt or .csv (each line is a doc)", file_count="single")
438
+ docs_list = gr.Dataframe(headers=["Document (first 200 chars)"], interactive=False)
439
+ idx_btn = gr.Button("📥 Index Documents", elem_id="button")
440
+ # store indexed dataset in a state object
441
+ indexed_state = gr.State(value=None)
442
+
443
+ def _index_upload(file):
444
+ parsed = parse_uploaded_documents(file)
445
+ if "error" in parsed:
446
+ return None, gr.update(value=[]), {"error": parsed["error"]}
447
+ docs = parsed["documents"]
448
+ indexed = index_documents_for_search(docs)
449
+ if "error" in indexed:
450
+ return None, gr.update(value=[]), {"error": indexed["error"]}
451
+ # return the indexed object into state, and display a preview
452
+ preview = [[(d[:200] + "..." if len(d) > 200 else d)] for d in docs[:30]]
453
+ return indexed, gr.update(value=preview), {"success": f"Indexed {len(docs)} documents."}
454
+
455
+ idx_btn.click(fn=_index_upload, inputs=[upload], outputs=[indexed_state, docs_list, gr.Textbox(visible=False)])
456
+
457
+ with gr.Row():
458
+ q = gr.Textbox(label="Search query (Sinhala)")
459
+ topn = gr.Slider(1, 20, value=5, step=1, label="Top N results")
460
+ results_out = gr.JSON(label="Search Results")
461
+ def _search_wrapper(query, topn_, state):
462
+ if state is None:
463
+ return {"error": "⚠️ Please index documents first (upload + Index Documents)."}
464
+ # ensure state has matrix_norm
465
+ indexed = state
466
+ # run search (we ignore topn_ for now but results are sorted)
467
+ res = search_documents(query, indexed)
468
+ if "error" in res:
469
+ return res
470
+ # truncate to topn_
471
+ if "results" in res:
472
+ res["results"] = res["results"][:int(topn_)]
473
+ return res
474
+ gr.Button("🔎 Search Documents", elem_id="button").click(fn=_search_wrapper, inputs=[q, topn, indexed_state], outputs=[results_out])
475
+
476
  gr.Markdown("""
477
  ---
478
  ✨ *Remeinium AI - Intelligence for a greater tomorrow*
479
  """)
480
 
481
+ # Keep queue and launch (same as before)
482
+ demo.queue(default_concurrency_limit=10).launch()