EngrMuhammadBilal commited on
Commit
83e3775
·
verified ·
1 Parent(s): 355c578

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -133
app.py CHANGED
@@ -10,132 +10,125 @@ from docx import Document
10
  from docx.shared import Pt
11
  from string import Template
12
 
13
- # ---------- Branding ----------
 
 
14
  APP_NAME = "ScholarLens"
15
  TAGLINE = "Query your literature, get page-level proof"
16
 
17
- # ---------- Palette (guarantees light text on dark, dark text on light) ----------
18
  PALETTE = {
19
  "navy": "#083D77", # dark background
20
  "gold": "#F2B400", # primary buttons / accents
21
  "ice": "#FBF8F9", # off-white
22
  "maroon": "#8B1E1E", # emphasis chips / separators
23
  "amber": "#F5C26B", # secondary accent
24
- "text_on_dark": "#EAF2FF", # light text for dark surfaces
25
- "text_on_light": "#0B1220" # dark text for light (gold/amber/ice)
26
  }
27
 
28
  def build_custom_css():
29
- """Safe CSS builder using string.Template so { } braces don't break Python."""
30
  tmpl = Template(r"""
31
- :root {
32
- --navy: $navy;
33
- --gold: $gold;
34
- --ice: $ice;
35
- --maroon: $maroon;
36
- --amber: $amber;
37
- --text-dark: $text_dark;
38
- --text-light: $text_light;
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
 
41
- /* Global surfaces */
42
- body, .gradio-container {
43
- background: var(--navy) !important;
44
- color: var(--text-light) !important;
45
  }
46
 
47
- /* Blocks and tabs */
48
- .gradio-container .block,
49
- .gradio-container .tabs,
50
- .gradio-container .tabs > .tabitem {
51
- background: color-mix(in srgb, var(--navy) 80%, black 20%) !important;
52
  color: var(--text-light) !important;
53
  border-radius: 12px;
54
- border: 1px solid color-mix(in srgb, var(--navy) 70%, white 10%);
55
  }
56
 
57
- /* Hero stripe styled with your palette */
58
- #hero {
59
- background:
60
- linear-gradient(90deg, var(--navy) 0%, var(--gold) 25%, var(--ice) 45%, var(--maroon) 65%, var(--amber) 85%, transparent 100%);
61
- border: 1px solid color-mix(in srgb, var(--gold) 40%, white 20%);
62
- border-radius: 14px;
63
- padding: 14px 16px;
64
- color: var(--text-light);
65
  }
66
 
67
- /* Links */
68
- a, .prose a { color: var(--amber) !important; text-decoration: none; }
69
- a:hover { text-decoration: underline; }
70
-
71
  /* KPI chips */
72
- .kpi {
73
- text-align:center; padding:12px; border-radius:10px;
74
- border:1px solid color-mix(in srgb, var(--ice) 35%, var(--navy) 65%);
75
- background: color-mix(in srgb, var(--navy) 75%, black 25%);
76
- color: var(--text-light);
77
- }
78
 
79
  /* Buttons */
80
- button, .gr-button { border-radius: 10px !important; font-weight: 600 !important; }
81
- .gr-button, button { color: var(--text-light) !important; }
82
- .gr-button-primary {
83
- background: var(--gold) !important;
84
- color: var(--text-dark) !important; /* dark text on light gold */
85
- border: 1px solid color-mix(in srgb, var(--gold) 70%, black 10%) !important;
86
  }
87
- .gr-button-secondary {
88
- background: color-mix(in srgb, var(--amber) 60%, var(--gold) 40%) !important;
89
- color: var(--text-dark) !important;
 
90
  }
91
 
92
  /* Inputs */
93
- input, textarea, .gr-textbox, .gr-text-area, .gr-slider, .gr-dropdown, .gr-file {
94
- background: color-mix(in srgb, var(--navy) 65%, black 35%) !important;
95
  color: var(--text-light) !important;
96
- border: 1px solid color-mix(in srgb, var(--ice) 25%, var(--navy) 75%) !important;
97
  border-radius: 10px !important;
98
  }
99
- input::placeholder, textarea::placeholder {
100
- color: color-mix(in srgb, var(--text-light) 60%, transparent) !important;
101
- }
102
 
103
- /* Text blocks */
104
- label, .label, .prose h1, .prose h2, .prose h3, .prose p, .markdown-body {
105
- color: var(--text-light) !important;
106
- }
107
 
108
- /* Dataframe / table */
109
- .dataframe, table, .table, .gr-dataframe * { color: var(--text-light) !important; background: transparent !important; }
110
- .dataframe th {
111
- background: color-mix(in srgb, var(--navy) 70%, black 30%) !important;
112
- border-bottom: 1px solid color-mix(in srgb, var(--ice) 20%, var(--navy) 80%) !important;
113
- }
114
- .dataframe td {
115
- border-bottom: 1px solid color-mix(in srgb, var(--ice) 15%, var(--navy) 85%) !important;
116
  }
117
 
118
- /* Accordions */
119
- .accordion, .gr-accordion {
120
- background: color-mix(in srgb, var(--navy) 70%, black 30%) !important;
121
- border: 1px solid color-mix(in srgb, var(--ice) 20%, var(--navy) 80%) !important;
122
- border-radius: 10px !important;
123
  }
124
 
125
- /* Footer */
126
- footer, .footer { opacity:.75; color: var(--text-light); }
127
  """)
128
  return tmpl.substitute(
129
- navy=PALETTE["navy"],
130
- gold=PALETTE["gold"],
131
- ice=PALETTE["ice"],
132
- maroon=PALETTE["maroon"],
133
- amber=PALETTE["amber"],
134
- text_dark=PALETTE["text_on_light"],
135
- text_light=PALETTE["text_on_dark"],
136
  )
137
 
138
- # ---------- Config ----------
 
 
139
  EMBED_MODEL_NAME = "intfloat/multilingual-e5-small"
140
  CHUNK_SIZE = 1200
141
  CHUNK_OVERLAP = 200
@@ -156,7 +149,9 @@ embedder = None
156
  faiss_index = None
157
  docstore: List[Dict[str, Any]] = []
158
 
159
- # ---------- PDF utils ----------
 
 
160
  def extract_text_from_pdf(pdf_path: str) -> List[Tuple[int, str]]:
161
  pages = []
162
  with fitz.open(pdf_path) as doc:
@@ -165,10 +160,7 @@ def extract_text_from_pdf(pdf_path: str) -> List[Tuple[int, str]]:
165
  if not txt.strip():
166
  blocks = page.get_text("blocks")
167
  if isinstance(blocks, list):
168
- txt = "\n".join(
169
- b[4] for b in blocks
170
- if isinstance(b, (list, tuple)) and len(b) > 4
171
- )
172
  pages.append((i, txt or ""))
173
  return pages
174
 
@@ -183,7 +175,9 @@ def chunk_text(text: str, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP) -> List[
183
  start = max(end - overlap, start + 1)
184
  return out
185
 
186
- # ---------- Embeddings / FAISS ----------
 
 
187
  def load_embedder():
188
  global embedder
189
  if embedder is None:
@@ -226,7 +220,9 @@ def load_index() -> bool:
226
  return True
227
  return False
228
 
229
- # ---------- Ingest ----------
 
 
230
  def _collect_pdf_paths(upload_paths: List[str]) -> List[str]:
231
  """Accept PDFs and ZIPs of PDFs."""
232
  if not upload_paths:
@@ -259,10 +255,8 @@ def ingest_pdfs(paths: List[str]) -> Tuple[Any, List[Dict[str, Any]]]:
259
  continue
260
  for ci, ch in enumerate(chunk_text(ptxt)):
261
  entries.append({
262
- "text": ch,
263
- "source": base,
264
- "page_start": pno,
265
- "page_end": pno,
266
  "chunk_id": f"{base}::p{pno}::c{ci}",
267
  })
268
  except Exception as e:
@@ -274,7 +268,9 @@ def ingest_pdfs(paths: List[str]) -> Tuple[Any, List[Dict[str, Any]]]:
274
  index = build_faiss(embs)
275
  return index, entries
276
 
277
- # ---------- Retrieval with optional keyword filter ----------
 
 
278
  def retrieve(query: str, top_k=5, must_contain: str = ""):
279
  global faiss_index, docstore
280
  if faiss_index is None or not docstore:
@@ -304,7 +300,9 @@ def retrieve(query: str, top_k=5, must_contain: str = ""):
304
  hits.append(item)
305
  return hits
306
 
307
- # ---------- Groq LLM ----------
 
 
308
  def groq_answer(query: str, contexts, model_name="llama-3.3-70b-versatile", temperature=0.2, max_tokens=1000):
309
  try:
310
  if not os.environ.get("GROQ_API_KEY"):
@@ -331,9 +329,7 @@ def groq_answer(query: str, contexts, model_name="llama-3.3-70b-versatile", temp
331
  )
332
 
333
  resp = client.chat.completions.create(
334
- model=model_name,
335
- temperature=float(temperature),
336
- max_tokens=int(max_tokens),
337
  messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
338
  )
339
  return resp.choices[0].message.content.strip()
@@ -341,12 +337,13 @@ def groq_answer(query: str, contexts, model_name="llama-3.3-70b-versatile", temp
341
  import traceback
342
  return f"Groq API error: {e}\n```\n{traceback.format_exc()}\n```"
343
 
344
- # ---------- Export helpers ----------
 
 
345
  def export_answer_to_docx(question: str, answer_md: str, rows: List[List[str]]) -> str:
346
- """Save Q&A with sources table to a .docx and return path (rows = [Source, Page, Score, Snippet])."""
347
  doc = Document()
348
- styles = doc.styles
349
  try:
 
350
  styles['Normal'].font.name = 'Calibri'
351
  styles['Normal'].font.size = Pt(11)
352
  except Exception:
@@ -362,10 +359,7 @@ def export_answer_to_docx(question: str, answer_md: str, rows: List[List[str]])
362
  doc.add_heading("References (Top Passages)", level=2)
363
  table = doc.add_table(rows=1, cols=4)
364
  hdr = table.rows[0].cells
365
- hdr[0].text = "Source"
366
- hdr[1].text = "Page"
367
- hdr[2].text = "Score"
368
- hdr[3].text = "Snippet"
369
  for r in rows:
370
  row = table.add_row().cells
371
  for i, val in enumerate(r):
@@ -375,7 +369,9 @@ def export_answer_to_docx(question: str, answer_md: str, rows: List[List[str]])
375
  doc.save(path)
376
  return path
377
 
378
- # ---------- UI helpers ----------
 
 
379
  def build_index_from_uploads(paths: List[str]) -> str:
380
  global faiss_index, docstore
381
  pdfs = _collect_pdf_paths(paths)
@@ -433,40 +429,34 @@ def do_export_docx(question, answer_md, sources_rows):
433
  if not answer_md or not sources_rows:
434
  return None
435
  try:
436
- path = export_answer_to_docx(question, answer_md, sources_rows)
437
- return path
438
  except Exception:
439
  return None
440
 
441
- # ---------- Theme (simple; main styling via CSS for compatibility) ----------
 
 
442
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate")
443
 
444
- # ---------- Gradio UI ----------
445
- with gr.Blocks(
446
- title=f"{APP_NAME} | RAG over PDFs",
447
- theme=theme,
448
- css=build_custom_css()
449
- ) as demo:
450
- # --- Header / Hero ---
451
  with gr.Group(elem_id="hero"):
452
- gr.Markdown(
453
- f"""
454
  <div style="display:flex;align-items:center;gap:16px;">
455
  <div style="font-size:36px">📚🔎 <b>{APP_NAME}</b></div>
456
  <div style="opacity:.9;">{TAGLINE}</div>
457
  </div>
458
  <p style="opacity:.85;margin-top:6px;">
459
  Upload your papers, build an index, and ask research questions with verifiable, page-level citations.
460
- </p>
461
- """)
462
 
463
- # --- KPI row ---
464
  with gr.Row():
465
  gr.Markdown("**Meaning-aware retrieval**<br><span class='kpi'>E5 + FAISS</span>", elem_classes=["kpi"])
466
  gr.Markdown("**Cited answers**<br><span class='kpi'>Page-level proof</span>", elem_classes=["kpi"])
467
  gr.Markdown("**Runs anywhere**<br><span class='kpi'>HF Spaces or Colab</span>", elem_classes=["kpi"])
468
 
469
- # --- Key / Settings ---
470
  with gr.Row():
471
  api_box = gr.Textbox(label="(Optional) Set GROQ_API_KEY", type="password", placeholder="sk_...")
472
  set_btn = gr.Button("Set Key")
@@ -474,7 +464,7 @@ Upload your papers, build an index, and ask research questions with verifiable,
474
  set_btn.click(set_api_key, inputs=[api_box], outputs=[set_out])
475
 
476
  with gr.Tabs():
477
- # ---------------- Tab 1: Build / Load ----------------
478
  with gr.Tab("1) Build or Load Index"):
479
  gr.Markdown("Upload PDFs or a ZIP of PDFs, then click **Build Index**.")
480
  file_u = gr.Files(label="Upload PDFs or ZIP", file_types=[".pdf", ".zip"], type="filepath")
@@ -496,7 +486,7 @@ Upload your papers, build an index, and ask research questions with verifiable,
496
  zpath = gr.File(label="Index bundle", interactive=False)
497
  download_btn.click(fn=download_index_zip, outputs=[zpath])
498
 
499
- # ---------------- Tab 2: Ask ----------------
500
  with gr.Tab("2) Ask Questions"):
501
  with gr.Row():
502
  with gr.Column(scale=1):
@@ -515,7 +505,7 @@ Upload your papers, build an index, and ask research questions with verifiable,
515
  ["List camera model, sensor type, resolution, and FPS across studies. Cite pages.", "camera, fps, resolution"],
516
  ["Extract limitations and future work across the corpus, with page references.", ""],
517
  ["Compare GTAW setups: current range, travel speed, torch standoff, sensors.", "GTAW, current, speed, torch"],
518
- ["Summarize the main results tables with metrics and page citations.", "table, accuracy, mAP, F1"]
519
  ],
520
  inputs=[q, must],
521
  label="Quick examples",
@@ -529,15 +519,13 @@ Upload your papers, build an index, and ask research questions with verifiable,
529
  export_btn = gr.Button("Export Answer to DOCX", visible=False)
530
  exported = gr.File(label="Download answer", visible=True)
531
 
532
- # wire buttons
533
  ask_btn.click(fn=ask_rag, inputs=[q, topk, model_dd, temp, must], outputs=[ans, src, snippets_md, export_btn])
534
  export_btn.click(fn=do_export_docx, inputs=[q, ans, src], outputs=[exported])
535
  clear_btn.click(lambda: ("", [], "", gr.update(visible=False)), outputs=[ans, src, snippets_md, export_btn])
536
 
537
- # ---------------- Tab 3: About ----------------
538
  with gr.Tab("About"):
539
- gr.Markdown(
540
- """
541
  **ScholarLens** helps researchers move from reading to results with answers grounded in the papers you upload.
542
 
543
  - Meaning-aware retrieval (E5 + FAISS)
@@ -547,10 +535,9 @@ Upload your papers, build an index, and ask research questions with verifiable,
547
  - Powered by Groq models
548
 
549
  *Privacy note:* your files stay on this Space. Only the Groq call is external.
550
- """
551
- )
552
 
553
- # Broad compatibility for Spaces
554
  demo.queue()
555
  if __name__ == "__main__":
556
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
10
  from docx.shared import Pt
11
  from string import Template
12
 
13
+ # =========================
14
+ # Branding & Palette
15
+ # =========================
16
  APP_NAME = "ScholarLens"
17
  TAGLINE = "Query your literature, get page-level proof"
18
 
 
19
  PALETTE = {
20
  "navy": "#083D77", # dark background
21
  "gold": "#F2B400", # primary buttons / accents
22
  "ice": "#FBF8F9", # off-white
23
  "maroon": "#8B1E1E", # emphasis chips / separators
24
  "amber": "#F5C26B", # secondary accent
25
+ "text_on_dark": "#EAF2FF", # light text on dark
26
+ "text_on_light": "#0B1220" # dark text on light (gold/amber)
27
  }
28
 
29
  def build_custom_css():
30
+ """Dark UI with light text and gold primary button safe for Python via Template."""
31
  tmpl = Template(r"""
32
+ :root{
33
+ --navy: $navy; --gold: $gold; --ice: $ice; --maroon: $maroon; --amber: $amber;
34
+ --text-dark: $text_dark; --text-light: $text_light;
35
+
36
+ /* Gradio tokens (force our palette) */
37
+ --body-background-fill: var(--navy);
38
+ --body-text-color: var(--text-light);
39
+ --block-background-fill: rgba(8,61,119,0.82);
40
+ --block-title-text-color: var(--text-light);
41
+ --border-color-primary: rgba(255,255,255,0.12);
42
+
43
+ --button-primary-background-fill: var(--gold);
44
+ --button-primary-text-color: var(--text-dark);
45
+ --button-primary-border-color: #c89200;
46
+
47
+ --button-secondary-background-fill: var(--amber);
48
+ --button-secondary-text-color: var(--text-dark);
49
+ --button-secondary-border-color: #caa157;
50
+
51
+ --link-text-color: var(--amber);
52
  }
53
 
54
+ /* Global surfaces & text */
55
+ body, .gradio-container{
56
+ background: var(--body-background-fill) !important;
57
+ color: var(--body-text-color) !important;
58
  }
59
 
60
+ /* Blocks / tabs */
61
+ .gradio-container .block, .gradio-container .tabs, .gradio-container .tabs>.tabitem{
62
+ background: var(--block-background-fill) !important;
 
 
63
  color: var(--text-light) !important;
64
  border-radius: 12px;
65
+ border: 1px solid var(--border-color-primary);
66
  }
67
 
68
+ /* Hero stripe using your palette */
69
+ #hero{
70
+ background: linear-gradient(90deg, var(--navy) 0%, var(--gold) 25%, var(--ice) 45%, var(--maroon) 65%, var(--amber) 85%, transparent 100%);
71
+ border: 1px solid rgba(242,180,0,0.6);
72
+ border-radius: 14px; padding: 14px 16px; color: var(--text-light);
 
 
 
73
  }
74
 
 
 
 
 
75
  /* KPI chips */
76
+ .kpi{ text-align:center; padding:12px; border-radius:10px;
77
+ border:1px solid rgba(255,255,255,.14); background: rgba(8,61,119,0.65); color: var(--text-light);}
 
 
 
 
78
 
79
  /* Buttons */
80
+ .gradio-container .gr-button, .gradio-container button{ border-radius:10px !important; font-weight:600 !important; }
81
+ .gradio-container .gr-button-primary, .gradio-container button.primary{
82
+ background: var(--button-primary-background-fill) !important;
83
+ color: var(--button-primary-text-color) !important;
84
+ border: 1px solid var(--button-primary-border-color) !important;
 
85
  }
86
+ .gradio-container .gr-button-secondary, .gradio-container button.secondary{
87
+ background: var(--button-secondary-background-fill) !important;
88
+ color: var(--button-secondary-text-color) !important;
89
+ border: 1px solid var(--button-secondary-border-color) !important;
90
  }
91
 
92
  /* Inputs */
93
+ input, textarea, select, .gr-textbox, .gr-text-area, .gr-dropdown, .gr-file, .gr-slider{
94
+ background: rgba(8,61,119,0.55) !important;
95
  color: var(--text-light) !important;
96
+ border: 1px solid rgba(255,255,255,0.18) !important;
97
  border-radius: 10px !important;
98
  }
99
+ input::placeholder, textarea::placeholder{ color: rgba(234,242,255,0.65) !important; }
 
 
100
 
101
+ /* Links & text */
102
+ a, .prose a{ color: var(--amber) !important; text-decoration:none; } a:hover{ text-decoration:underline; }
103
+ label, .label, .prose h1, .prose h2, .prose h3, .prose p, .markdown-body{ color: var(--text-light) !important; }
 
104
 
105
+ /* Dataframe */
106
+ .dataframe, table, .table, .gr-dataframe *{ color: var(--text-light) !important; background: transparent !important; }
107
+ .dataframe th{ background: rgba(8,61,119,0.72) !important; border-bottom: 1px solid rgba(255,255,255,0.18) !important; }
108
+ .dataframe td{ border-bottom: 1px solid rgba(255,255,255,0.12) !important; }
109
+
110
+ /* Accordion */
111
+ .accordion, .gr-accordion{
112
+ background: rgba(8,61,119,0.65) !important; border: 1px solid rgba(255,255,255,0.14) !important; border-radius: 10px !important;
113
  }
114
 
115
+ /* Tabs active underline color */
116
+ .gradio-container .tabs .tab-nav button.selected{
117
+ box-shadow: inset 0 -3px 0 0 var(--gold) !important; color: var(--text-light) !important;
 
 
118
  }
119
 
120
+ /* Optional: center content */
121
+ .gradio-container{ max-width: 1120px; margin: 0 auto; }
122
  """)
123
  return tmpl.substitute(
124
+ navy=PALETTE["navy"], gold=PALETTE["gold"], ice=PALETTE["ice"],
125
+ maroon=PALETTE["maroon"], amber=PALETTE["amber"],
126
+ text_dark=PALETTE["text_on_light"], text_light=PALETTE["text_on_dark"]
 
 
 
 
127
  )
128
 
129
+ # =========================
130
+ # Engine config
131
+ # =========================
132
  EMBED_MODEL_NAME = "intfloat/multilingual-e5-small"
133
  CHUNK_SIZE = 1200
134
  CHUNK_OVERLAP = 200
 
149
  faiss_index = None
150
  docstore: List[Dict[str, Any]] = []
151
 
152
+ # =========================
153
+ # PDF utils
154
+ # =========================
155
  def extract_text_from_pdf(pdf_path: str) -> List[Tuple[int, str]]:
156
  pages = []
157
  with fitz.open(pdf_path) as doc:
 
160
  if not txt.strip():
161
  blocks = page.get_text("blocks")
162
  if isinstance(blocks, list):
163
+ txt = "\n".join(b[4] for b in blocks if isinstance(b, (list, tuple)) and len(b) > 4)
 
 
 
164
  pages.append((i, txt or ""))
165
  return pages
166
 
 
175
  start = max(end - overlap, start + 1)
176
  return out
177
 
178
+ # =========================
179
+ # Embeddings / FAISS
180
+ # =========================
181
  def load_embedder():
182
  global embedder
183
  if embedder is None:
 
220
  return True
221
  return False
222
 
223
+ # =========================
224
+ # Ingest
225
+ # =========================
226
  def _collect_pdf_paths(upload_paths: List[str]) -> List[str]:
227
  """Accept PDFs and ZIPs of PDFs."""
228
  if not upload_paths:
 
255
  continue
256
  for ci, ch in enumerate(chunk_text(ptxt)):
257
  entries.append({
258
+ "text": ch, "source": base,
259
+ "page_start": pno, "page_end": pno,
 
 
260
  "chunk_id": f"{base}::p{pno}::c{ci}",
261
  })
262
  except Exception as e:
 
268
  index = build_faiss(embs)
269
  return index, entries
270
 
271
+ # =========================
272
+ # Retrieval
273
+ # =========================
274
  def retrieve(query: str, top_k=5, must_contain: str = ""):
275
  global faiss_index, docstore
276
  if faiss_index is None or not docstore:
 
300
  hits.append(item)
301
  return hits
302
 
303
+ # =========================
304
+ # Groq LLM
305
+ # =========================
306
  def groq_answer(query: str, contexts, model_name="llama-3.3-70b-versatile", temperature=0.2, max_tokens=1000):
307
  try:
308
  if not os.environ.get("GROQ_API_KEY"):
 
329
  )
330
 
331
  resp = client.chat.completions.create(
332
+ model=model_name, temperature=float(temperature), max_tokens=int(max_tokens),
 
 
333
  messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
334
  )
335
  return resp.choices[0].message.content.strip()
 
337
  import traceback
338
  return f"Groq API error: {e}\n```\n{traceback.format_exc()}\n```"
339
 
340
+ # =========================
341
+ # Export helpers
342
+ # =========================
343
  def export_answer_to_docx(question: str, answer_md: str, rows: List[List[str]]) -> str:
 
344
  doc = Document()
 
345
  try:
346
+ styles = doc.styles
347
  styles['Normal'].font.name = 'Calibri'
348
  styles['Normal'].font.size = Pt(11)
349
  except Exception:
 
359
  doc.add_heading("References (Top Passages)", level=2)
360
  table = doc.add_table(rows=1, cols=4)
361
  hdr = table.rows[0].cells
362
+ hdr[0].text = "Source"; hdr[1].text = "Page"; hdr[2].text = "Score"; hdr[3].text = "Snippet"
 
 
 
363
  for r in rows:
364
  row = table.add_row().cells
365
  for i, val in enumerate(r):
 
369
  doc.save(path)
370
  return path
371
 
372
+ # =========================
373
+ # UI helpers
374
+ # =========================
375
  def build_index_from_uploads(paths: List[str]) -> str:
376
  global faiss_index, docstore
377
  pdfs = _collect_pdf_paths(paths)
 
429
  if not answer_md or not sources_rows:
430
  return None
431
  try:
432
+ return export_answer_to_docx(question, answer_md, sources_rows)
 
433
  except Exception:
434
  return None
435
 
436
+ # =========================
437
+ # UI
438
+ # =========================
439
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate")
440
 
441
+ with gr.Blocks(title=f"{APP_NAME} | RAG over PDFs", theme=theme, css=build_custom_css()) as demo:
442
+ # Header / Hero
 
 
 
 
 
443
  with gr.Group(elem_id="hero"):
444
+ gr.Markdown(f"""
 
445
  <div style="display:flex;align-items:center;gap:16px;">
446
  <div style="font-size:36px">📚🔎 <b>{APP_NAME}</b></div>
447
  <div style="opacity:.9;">{TAGLINE}</div>
448
  </div>
449
  <p style="opacity:.85;margin-top:6px;">
450
  Upload your papers, build an index, and ask research questions with verifiable, page-level citations.
451
+ </p>""")
 
452
 
453
+ # KPI Row
454
  with gr.Row():
455
  gr.Markdown("**Meaning-aware retrieval**<br><span class='kpi'>E5 + FAISS</span>", elem_classes=["kpi"])
456
  gr.Markdown("**Cited answers**<br><span class='kpi'>Page-level proof</span>", elem_classes=["kpi"])
457
  gr.Markdown("**Runs anywhere**<br><span class='kpi'>HF Spaces or Colab</span>", elem_classes=["kpi"])
458
 
459
+ # API Key
460
  with gr.Row():
461
  api_box = gr.Textbox(label="(Optional) Set GROQ_API_KEY", type="password", placeholder="sk_...")
462
  set_btn = gr.Button("Set Key")
 
464
  set_btn.click(set_api_key, inputs=[api_box], outputs=[set_out])
465
 
466
  with gr.Tabs():
467
+ # Tab 1: Build / Load
468
  with gr.Tab("1) Build or Load Index"):
469
  gr.Markdown("Upload PDFs or a ZIP of PDFs, then click **Build Index**.")
470
  file_u = gr.Files(label="Upload PDFs or ZIP", file_types=[".pdf", ".zip"], type="filepath")
 
486
  zpath = gr.File(label="Index bundle", interactive=False)
487
  download_btn.click(fn=download_index_zip, outputs=[zpath])
488
 
489
+ # Tab 2: Ask
490
  with gr.Tab("2) Ask Questions"):
491
  with gr.Row():
492
  with gr.Column(scale=1):
 
505
  ["List camera model, sensor type, resolution, and FPS across studies. Cite pages.", "camera, fps, resolution"],
506
  ["Extract limitations and future work across the corpus, with page references.", ""],
507
  ["Compare GTAW setups: current range, travel speed, torch standoff, sensors.", "GTAW, current, speed, torch"],
508
+ ["Summarize results tables with metrics and page citations.", "table, accuracy, mAP, F1"]
509
  ],
510
  inputs=[q, must],
511
  label="Quick examples",
 
519
  export_btn = gr.Button("Export Answer to DOCX", visible=False)
520
  exported = gr.File(label="Download answer", visible=True)
521
 
 
522
  ask_btn.click(fn=ask_rag, inputs=[q, topk, model_dd, temp, must], outputs=[ans, src, snippets_md, export_btn])
523
  export_btn.click(fn=do_export_docx, inputs=[q, ans, src], outputs=[exported])
524
  clear_btn.click(lambda: ("", [], "", gr.update(visible=False)), outputs=[ans, src, snippets_md, export_btn])
525
 
526
+ # Tab 3: About
527
  with gr.Tab("About"):
528
+ gr.Markdown("""
 
529
  **ScholarLens** helps researchers move from reading to results with answers grounded in the papers you upload.
530
 
531
  - Meaning-aware retrieval (E5 + FAISS)
 
535
  - Powered by Groq models
536
 
537
  *Privacy note:* your files stay on this Space. Only the Groq call is external.
538
+ """)
 
539
 
540
+ # Run
541
  demo.queue()
542
  if __name__ == "__main__":
543
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))