EngrMuhammadBilal commited on
Commit
1d9c80e
·
verified ·
1 Parent(s): b623c81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -13,7 +13,7 @@ from docx.shared import Pt
13
  APP_NAME = "ScholarLens"
14
  TAGLINE = "Query your literature, get page-level proof"
15
 
16
- # ---------- Config (same engine, nicer UI) ----------
17
  EMBED_MODEL_NAME = "intfloat/multilingual-e5-small"
18
  CHUNK_SIZE = 1200
19
  CHUNK_OVERLAP = 200
@@ -23,9 +23,8 @@ MAX_CONTEXT_CHARS = 16000
23
  INDEX_PATH = "rag_index.faiss"
24
  STORE_PATH = "rag_store.pkl"
25
 
26
- # You can edit the default model here. All are selectable in the UI.
27
  MODEL_CHOICES = [
28
- "llama-3.3-70b-versatile",
29
  "llama-3.1-8b-instant",
30
  "mixtral-8x7b-32768",
31
  ]
@@ -44,7 +43,10 @@ def extract_text_from_pdf(pdf_path: str) -> List[Tuple[int, str]]:
44
  if not txt.strip():
45
  blocks = page.get_text("blocks")
46
  if isinstance(blocks, list):
47
- txt = "\n".join(b[4] for b in blocks if isinstance(b, (list, tuple)) and len(b) > 4)
 
 
 
48
  pages.append((i, txt or ""))
49
  return pages
50
 
@@ -182,7 +184,7 @@ def retrieve(query: str, top_k=5, must_contain: str = ""):
182
  return hits
183
 
184
  # ---------- Groq LLM ----------
185
- def groq_answer(query: str, contexts, model_name="llama-3.3-70b-versatile", temperature=0.2, max_tokens=1000):
186
  try:
187
  if not os.environ.get("GROQ_API_KEY"):
188
  return "GROQ_API_KEY is not set. Add it in your Space secrets or the key box."
@@ -236,7 +238,6 @@ def export_answer_to_docx(question: str, answer_md: str, rows: List[List[str]])
236
  doc.add_paragraph(f"Question: {question}")
237
 
238
  doc.add_heading("Answer", level=2)
239
- # Write as plain text to keep it simple in Word
240
  for line in answer_md.splitlines():
241
  doc.add_paragraph(line)
242
 
@@ -278,13 +279,11 @@ def ask_rag(question: str, top_k, model_name: str, temperature: float, must_cont
278
  ctx = retrieve(question, top_k=int(top_k) if top_k else TOP_K_DEFAULT, must_contain=must_contain)
279
  ans = groq_answer(question, ctx, model_name=model_name, temperature=temperature)
280
 
281
- # sources table
282
  rows = []
283
  for c in ctx:
284
  preview = c["text"][:200].replace("\n"," ") + ("..." if len(c["text"])>200 else "")
285
  rows.append([c["source"], str(c["page_start"]), f"{c['score']:.3f}", preview])
286
 
287
- # snippets pretty print
288
  details = []
289
  for c in ctx:
290
  details.append(f"**{c['source']} p.{c['page_start']}**\n> {c['text'].strip()[:1000]}")
@@ -321,28 +320,32 @@ def do_export_docx(question, answer_md, sources_rows):
321
  except Exception:
322
  return None
323
 
324
- # ---------- Theme ----------
325
  theme = gr.themes.Soft(
326
  primary_hue="indigo",
327
  secondary_hue="blue",
328
  neutral_hue="slate",
329
- ).set(
330
- body_background_fill="#0B1220", # dark-friendly hero
331
- block_background_fill="#0F172A",
332
- block_shadow="*shadow-lg",
333
- radius_size="8px",
334
  )
335
 
336
  # ---------- Gradio UI ----------
337
- with gr.Blocks(title=f"{APP_NAME} | RAG over PDFs", theme=theme, css="""
 
 
 
338
  #hero {
339
  background: radial-gradient(1200px 600px at 20% -10%, rgba(99,102,241,.25), transparent),
340
  radial-gradient(1000px 500px at 120% 10%, rgba(14,165,233,.20), transparent);
341
  border: 1px solid rgba(99,102,241,.20);
 
 
342
  }
343
  .kpi {text-align:center;padding:12px;border-radius:10px;border:1px solid rgba(255,255,255,.08);}
344
  .footer {opacity:.8;}
345
- """) as demo:
 
 
 
 
346
  # --- Header / Hero ---
347
  with gr.Group(elem_id="hero"):
348
  gr.Markdown(
@@ -396,7 +399,7 @@ Upload your papers, build an index, and ask research questions with verifiable,
396
  with gr.Tab("2) Ask Questions"):
397
  with gr.Row():
398
  with gr.Column(scale=1):
399
- q = gr.Textbox(label="Your question", lines=3, placeholder="e.g., Compare GTAW experimental parameters with citations")
400
  must = gr.Textbox(label="Must contain (comma-separated keywords)", placeholder="camera, CMOS, frame rate")
401
  with gr.Accordion("Advanced settings", open=False):
402
  topk = gr.Slider(1, 20, value=TOP_K_DEFAULT, step=1, label="Top-K passages")
@@ -446,7 +449,7 @@ Upload your papers, build an index, and ask research questions with verifiable,
446
  """
447
  )
448
 
449
- # broad compatibility for Spaces
450
  demo.queue()
451
  if __name__ == "__main__":
452
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
13
  APP_NAME = "ScholarLens"
14
  TAGLINE = "Query your literature, get page-level proof"
15
 
16
+ # ---------- Config ----------
17
  EMBED_MODEL_NAME = "intfloat/multilingual-e5-small"
18
  CHUNK_SIZE = 1200
19
  CHUNK_OVERLAP = 200
 
23
  INDEX_PATH = "rag_index.faiss"
24
  STORE_PATH = "rag_store.pkl"
25
 
 
26
  MODEL_CHOICES = [
27
+ "llama-3.1-70b-versatile",
28
  "llama-3.1-8b-instant",
29
  "mixtral-8x7b-32768",
30
  ]
 
43
  if not txt.strip():
44
  blocks = page.get_text("blocks")
45
  if isinstance(blocks, list):
46
+ txt = "\n".join(
47
+ b[4] for b in blocks
48
+ if isinstance(b, (list, tuple)) and len(b) > 4
49
+ )
50
  pages.append((i, txt or ""))
51
  return pages
52
 
 
184
  return hits
185
 
186
  # ---------- Groq LLM ----------
187
+ def groq_answer(query: str, contexts, model_name="llama-3.1-70b-versatile", temperature=0.2, max_tokens=1000):
188
  try:
189
  if not os.environ.get("GROQ_API_KEY"):
190
  return "GROQ_API_KEY is not set. Add it in your Space secrets or the key box."
 
238
  doc.add_paragraph(f"Question: {question}")
239
 
240
  doc.add_heading("Answer", level=2)
 
241
  for line in answer_md.splitlines():
242
  doc.add_paragraph(line)
243
 
 
279
  ctx = retrieve(question, top_k=int(top_k) if top_k else TOP_K_DEFAULT, must_contain=must_contain)
280
  ans = groq_answer(question, ctx, model_name=model_name, temperature=temperature)
281
 
 
282
  rows = []
283
  for c in ctx:
284
  preview = c["text"][:200].replace("\n"," ") + ("..." if len(c["text"])>200 else "")
285
  rows.append([c["source"], str(c["page_start"]), f"{c['score']:.3f}", preview])
286
 
 
287
  details = []
288
  for c in ctx:
289
  details.append(f"**{c['source']} p.{c['page_start']}**\n> {c['text'].strip()[:1000]}")
 
320
  except Exception:
321
  return None
322
 
323
+ # ---------- Theme (no .set used; styling via CSS) ----------
324
  theme = gr.themes.Soft(
325
  primary_hue="indigo",
326
  secondary_hue="blue",
327
  neutral_hue="slate",
 
 
 
 
 
328
  )
329
 
330
  # ---------- Gradio UI ----------
331
+ with gr.Blocks(
332
+ title=f"{APP_NAME} | RAG over PDFs",
333
+ theme=theme,
334
+ css="""
335
  #hero {
336
  background: radial-gradient(1200px 600px at 20% -10%, rgba(99,102,241,.25), transparent),
337
  radial-gradient(1000px 500px at 120% 10%, rgba(14,165,233,.20), transparent);
338
  border: 1px solid rgba(99,102,241,.20);
339
+ border-radius: 12px;
340
+ padding: 14px 16px;
341
  }
342
  .kpi {text-align:center;padding:12px;border-radius:10px;border:1px solid rgba(255,255,255,.08);}
343
  .footer {opacity:.8;}
344
+ /* Dark-friendly background */
345
+ body, .gradio-container { background: #0B1220 !important; }
346
+ .gradio-container .block, .gradio-container .tabs { background: #0F172A !important; }
347
+ """
348
+ ) as demo:
349
  # --- Header / Hero ---
350
  with gr.Group(elem_id="hero"):
351
  gr.Markdown(
 
399
  with gr.Tab("2) Ask Questions"):
400
  with gr.Row():
401
  with gr.Column(scale=1):
402
+ q = gr.Textbox(label="Your question", lines=3, placeholder="e.g., Compare GTAW parameters with citations")
403
  must = gr.Textbox(label="Must contain (comma-separated keywords)", placeholder="camera, CMOS, frame rate")
404
  with gr.Accordion("Advanced settings", open=False):
405
  topk = gr.Slider(1, 20, value=TOP_K_DEFAULT, step=1, label="Top-K passages")
 
449
  """
450
  )
451
 
452
+ # Broad compatibility for Spaces
453
  demo.queue()
454
  if __name__ == "__main__":
455
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))