Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
# ================================================================
|
| 2 |
-
#
|
| 3 |
-
# FIX: Global Search now uses arXiv relevance sort (not date)
|
| 4 |
-
# → searching "Attention is All You Need" returns the correct paper
|
| 5 |
# ================================================================
|
| 6 |
import os, re, time, json, pickle, threading
|
| 7 |
import requests
|
|
@@ -32,10 +30,10 @@ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
|
|
| 32 |
S2_API_KEY = os.environ.get("S2_API_KEY", "")
|
| 33 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 34 |
|
| 35 |
-
print("
|
| 36 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
| 37 |
_ = embedder.encode(["warmup"])
|
| 38 |
-
print("
|
| 39 |
|
| 40 |
PAPERS = []
|
| 41 |
ACTIVE_PAPERS = []
|
|
@@ -45,57 +43,52 @@ AUTO_LOG = []
|
|
| 45 |
CURRENT_YEAR = datetime.now().year
|
| 46 |
|
| 47 |
PERSIST_DIR = "/tmp"
|
| 48 |
-
FAVORITES_PATH =
|
| 49 |
-
SEEN_IDS_PATH =
|
| 50 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 51 |
|
| 52 |
CATEGORIES = {
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
}
|
| 65 |
CROSSREF_SUBJECTS = {
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
-
"
|
| 70 |
-
"
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
-
"
|
| 77 |
}
|
| 78 |
-
LANG_CHOICES = ["
|
| 79 |
-
SORT_CHOICES = [
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
"📊 الأقل اقتباساً / Least Cited",
|
| 84 |
-
]
|
| 85 |
-
AR_FORMAT_RULES = """
|
| 86 |
-
قواعد التنسيق:
|
| 87 |
-
- ابدأ كل قسم بـ ## على سطر منفرد مع سطر فارغ قبله وبعده
|
| 88 |
-
- اكتب كل قسم في فقرة من 3-4 جمل بالعربية الفصحى
|
| 89 |
- لا تكرر عنوان القسم داخل النص
|
| 90 |
-
- لا تضف --- أو *** أو رموز زائدة
|
| 91 |
"""
|
| 92 |
|
| 93 |
# ================================================================
|
| 94 |
# HELPERS
|
| 95 |
# ================================================================
|
| 96 |
def detect_lang(text):
|
| 97 |
-
try:
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
|
| 100 |
def clean_md(text):
|
| 101 |
text = re.sub(r"[#*`>\[\]!_~]", "", text)
|
|
@@ -109,37 +102,39 @@ def fix_ar_format(text):
|
|
| 109 |
def cit_badge(n):
|
| 110 |
if n is None or n == "": return "—"
|
| 111 |
n = int(n)
|
| 112 |
-
if n >= 1000: return
|
| 113 |
-
if n >= 100: return
|
| 114 |
-
if n >= 10: return
|
| 115 |
-
if n > 0: return
|
| 116 |
return "·"
|
| 117 |
|
| 118 |
def build_table(papers_list):
|
| 119 |
-
rows = "| # |
|
| 120 |
rows += "|---|---|---|---|---|---|\n"
|
| 121 |
choices = []
|
| 122 |
for i, p in enumerate(papers_list):
|
| 123 |
-
first
|
| 124 |
-
badge
|
| 125 |
-
rows
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
return rows, choices
|
| 130 |
|
| 131 |
def s2_headers():
|
| 132 |
h = {"User-Agent": "ScientificPaperBot/7.4"}
|
| 133 |
-
if S2_API_KEY:
|
|
|
|
| 134 |
return h
|
| 135 |
|
| 136 |
def cr_headers():
|
| 137 |
return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"}
|
| 138 |
|
| 139 |
# ================================================================
|
| 140 |
-
#
|
| 141 |
# ================================================================
|
| 142 |
-
def parse_crossref_date(item
|
| 143 |
for field in ["issued", "published", "published-print", "published-online", "created"]:
|
| 144 |
dp = (item.get(field) or {}).get("date-parts", [[]])
|
| 145 |
if not dp or not dp[0]: continue
|
|
@@ -149,7 +144,7 @@ def parse_crossref_date(item: dict) -> str:
|
|
| 149 |
if not (1900 <= year <= CURRENT_YEAR + 1): continue
|
| 150 |
month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1))
|
| 151 |
day = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1))
|
| 152 |
-
return
|
| 153 |
except (ValueError, TypeError, IndexError):
|
| 154 |
continue
|
| 155 |
return "N/A"
|
|
@@ -175,48 +170,52 @@ def save_favorite(paper):
|
|
| 175 |
if paper["id"] not in {p["id"] for p in favs}:
|
| 176 |
favs.append(paper)
|
| 177 |
with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f)
|
| 178 |
-
return
|
| 179 |
-
return "
|
| 180 |
|
| 181 |
def export_favorites_csv():
|
| 182 |
favs = load_favorites()
|
| 183 |
if not favs: return None
|
| 184 |
-
df
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
df.to_csv(path, index=False, encoding="utf-8-sig")
|
| 189 |
return path
|
| 190 |
|
| 191 |
def gr_export_fav(): return export_favorites_csv()
|
| 192 |
|
| 193 |
# ================================================================
|
| 194 |
-
#
|
| 195 |
# ================================================================
|
| 196 |
def export_explanation_pdf(explanation_text, paper_title="paper"):
|
| 197 |
if not explanation_text or len(explanation_text) < 30: return None
|
| 198 |
safe = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_")
|
| 199 |
-
path =
|
| 200 |
doc = SimpleDocTemplate(path, pagesize=A4,
|
| 201 |
rightMargin=2*cm, leftMargin=2*cm,
|
| 202 |
-
topMargin=2*cm,
|
| 203 |
-
styles
|
| 204 |
-
h2_style
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
spaceAfter=10)
|
| 212 |
story = []
|
| 213 |
for line in explanation_text.split("\n"):
|
| 214 |
line = line.strip()
|
| 215 |
if not line: story.append(Spacer(1, 6)); continue
|
| 216 |
clean = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
|
| 217 |
-
clean = re.sub(r"\*(.+?)\*",
|
| 218 |
-
clean = re.sub(r"`(.+?)`",
|
| 219 |
-
clean = re.sub(r"^#{1,6}\s*",
|
| 220 |
clean = re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]", "", clean).strip()
|
| 221 |
if not clean: continue
|
| 222 |
if line.startswith("## ") or line.startswith("# "):
|
|
@@ -224,95 +223,109 @@ def export_explanation_pdf(explanation_text, paper_title="paper"):
|
|
| 224 |
color=colors.HexColor("#e2e8f0"), spaceAfter=4))
|
| 225 |
story.append(Paragraph(clean, h2_style))
|
| 226 |
elif line.startswith(">"):
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
story.append(Paragraph(
|
| 231 |
-
|
|
|
|
| 232 |
else:
|
| 233 |
-
story.append(Paragraph(clean,
|
| 234 |
-
story += [
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
| 238 |
try:
|
| 239 |
doc.build(story); return path
|
| 240 |
except Exception as e:
|
| 241 |
-
print(
|
| 242 |
|
| 243 |
def gr_export_pdf(explanation_text, choice):
|
| 244 |
if not explanation_text or len(explanation_text) < 50:
|
| 245 |
-
return None, "
|
| 246 |
title = choice.split(". ", 1)[-1] if choice else "paper"
|
| 247 |
path = export_explanation_pdf(explanation_text, title)
|
| 248 |
-
return (path, "
|
| 249 |
|
| 250 |
# ================================================================
|
| 251 |
# SOURCE 1 — arXiv
|
| 252 |
-
#
|
| 253 |
-
#
|
| 254 |
-
#
|
| 255 |
# ================================================================
|
| 256 |
def fetch_arxiv_papers(query, category, max_results=20, days_back=365,
|
| 257 |
sort_by="submittedDate"):
|
| 258 |
parts = []
|
| 259 |
-
# ✅ If query looks like a paper title (>3 words), use ti: prefix for precision
|
| 260 |
words = query.strip().split()
|
| 261 |
if len(words) >= 3 and sort_by == "relevance":
|
| 262 |
-
parts.append(
|
| 263 |
elif query.strip():
|
| 264 |
-
parts.append(
|
| 265 |
if category.strip():
|
| 266 |
-
parts.append(
|
| 267 |
-
sq
|
| 268 |
-
params = {
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
try:
|
| 271 |
resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30)
|
| 272 |
resp.raise_for_status()
|
| 273 |
-
except Exception as e:
|
|
|
|
| 274 |
|
| 275 |
ns_a = "http://www.w3.org/2005/Atom"
|
| 276 |
ns_x = "http://arxiv.org/schemas/atom"
|
| 277 |
root = ET.fromstring(resp.content)
|
| 278 |
cutoff = datetime.now() - timedelta(days=days_back)
|
| 279 |
papers = []
|
| 280 |
-
for entry in root.findall(
|
| 281 |
try:
|
| 282 |
-
pid = entry.find(
|
| 283 |
-
title = entry.find(
|
| 284 |
-
abstract = entry.find(
|
| 285 |
-
published = entry.find(
|
| 286 |
-
authors = [a.find(
|
| 287 |
-
for a in entry.findall(
|
| 288 |
cats = set()
|
| 289 |
-
pc = entry.find(
|
| 290 |
if pc is not None: cats.add(pc.get("term",""))
|
| 291 |
-
for c in entry.findall(
|
| 292 |
cats.discard("")
|
| 293 |
papers.append({
|
| 294 |
-
"id":
|
| 295 |
-
"
|
| 296 |
-
"
|
| 297 |
-
"
|
| 298 |
-
"
|
| 299 |
-
"
|
| 300 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
})
|
| 302 |
-
except Exception as e:
|
|
|
|
| 303 |
return papers
|
| 304 |
|
| 305 |
# ================================================================
|
| 306 |
-
# SOURCE 2 — CrossRef
|
| 307 |
# ================================================================
|
| 308 |
-
def fetch_crossref_papers(query, category_label="", max_results=20,
|
| 309 |
-
use_title=False):
|
| 310 |
subject = CROSSREF_SUBJECTS.get(category_label, "")
|
| 311 |
-
full_query =
|
|
|
|
| 312 |
params = {
|
| 313 |
-
|
| 314 |
-
"rows":
|
| 315 |
-
"sort":
|
| 316 |
"select": ("title,author,abstract,published,published-print,"
|
| 317 |
"published-online,issued,created,DOI,"
|
| 318 |
"is-referenced-by-count,link,subject"),
|
|
@@ -325,8 +338,9 @@ def fetch_crossref_papers(query, category_label="", max_results=20, days_back=36
|
|
| 325 |
if r.status_code == 200:
|
| 326 |
items = r.json().get("message",{}).get("items",[]); break
|
| 327 |
if r.status_code == 429: time.sleep(2**attempt); continue
|
| 328 |
-
print(
|
| 329 |
-
except Exception as e:
|
|
|
|
| 330 |
|
| 331 |
cutoff = datetime.now() - timedelta(days=days_back)
|
| 332 |
papers, seen_ids = [], set()
|
|
@@ -339,71 +353,76 @@ def fetch_crossref_papers(query, category_label="", max_results=20, days_back=36
|
|
| 339 |
pub = parse_crossref_date(item)
|
| 340 |
if pub == "N/A": continue
|
| 341 |
cit = int(item.get("is-referenced-by-count", 0) or 0)
|
| 342 |
-
authors = [
|
| 343 |
-
|
|
|
|
|
|
|
| 344 |
authors = [a for a in authors if a.strip()] or ["Unknown"]
|
| 345 |
-
abstract = re.sub(r"<[^>]+>","",
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
|
|
|
| 349 |
if pid in seen_ids: continue
|
| 350 |
seen_ids.add(pid)
|
| 351 |
-
pdf_url
|
| 352 |
-
|
| 353 |
-
try: recent = datetime.strptime(pub[:10],"%Y-%m-%d") >= cutoff
|
| 354 |
except: recent = False
|
| 355 |
papers.append({
|
| 356 |
-
"id":
|
| 357 |
-
"
|
|
|
|
|
|
|
|
|
|
| 358 |
"categories": item.get("subject",[])[:3],
|
| 359 |
-
"citations":
|
| 360 |
-
"
|
|
|
|
|
|
|
|
|
|
| 361 |
})
|
| 362 |
papers.sort(key=lambda x: x["citations"], reverse=True)
|
| 363 |
return papers
|
| 364 |
|
| 365 |
# ================================================================
|
| 366 |
-
#
|
| 367 |
# ================================================================
|
| 368 |
def global_paper_search(query, source_choice, max_results=10):
|
| 369 |
if not query or not query.strip():
|
| 370 |
-
return "
|
| 371 |
-
q
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
if source_choice in ("arXiv", "كلاهما / Both"):
|
| 375 |
-
# ✅ sort_by="relevance" → returns most relevant, not newest
|
| 376 |
papers += fetch_arxiv_papers(q, "", int(max_results), 3650,
|
| 377 |
sort_by="relevance")
|
| 378 |
-
|
| 379 |
-
if source_choice in ("CrossRef", "كلاهما / Both"):
|
| 380 |
-
# ✅ use_title=True → uses query.title for precise title match
|
| 381 |
papers += fetch_crossref_papers(q, "", int(max_results), 3650,
|
| 382 |
use_title=True)
|
| 383 |
-
|
| 384 |
if not papers:
|
| 385 |
-
return
|
| 386 |
|
| 387 |
-
# Deduplicate
|
| 388 |
seen, unique = set(), []
|
| 389 |
for p in papers:
|
| 390 |
key = re.sub(r"\W","",p["title"].lower())[:60]
|
| 391 |
if key not in seen: seen.add(key); unique.append(p)
|
| 392 |
-
|
| 393 |
-
# Sort by citation count (most cited first for well-known papers)
|
| 394 |
unique.sort(key=lambda x: x.get("citations") or 0, reverse=True)
|
| 395 |
|
| 396 |
-
|
|
|
|
|
|
|
| 397 |
for i, p in enumerate(unique, 1):
|
| 398 |
-
cit =
|
| 399 |
-
cats = "
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
+"
|
|
|
|
|
|
|
|
|
|
| 407 |
return md
|
| 408 |
|
| 409 |
# ================================================================
|
|
@@ -420,7 +439,8 @@ def enrich_citations(papers):
|
|
| 420 |
id_map, batch_ids = {}, []
|
| 421 |
for p in arxiv_papers:
|
| 422 |
clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
|
| 423 |
-
id_map[clean] = p
|
|
|
|
| 424 |
for i in range(0, len(batch_ids), 500):
|
| 425 |
try:
|
| 426 |
r = requests.post(
|
|
@@ -432,43 +452,47 @@ def enrich_citations(papers):
|
|
| 432 |
for item in r.json():
|
| 433 |
if not item: continue
|
| 434 |
ext = item.get("externalIds") or {}
|
| 435 |
-
clean = re.sub(r"v\d+$","",
|
|
|
|
| 436 |
if clean and clean in id_map:
|
| 437 |
c = item.get("citationCount")
|
| 438 |
if c is not None: id_map[clean]["citations"] = int(c)
|
| 439 |
elif r.status_code == 429: time.sleep(4)
|
| 440 |
-
except Exception as e: print(
|
| 441 |
for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]:
|
| 442 |
clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
|
| 443 |
for attempt in range(2):
|
| 444 |
try:
|
| 445 |
r = requests.get(
|
| 446 |
-
|
| 447 |
-
params={"fields":"citationCount"},
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
| 453 |
time.sleep(0.12)
|
| 454 |
for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]:
|
| 455 |
try:
|
| 456 |
r = requests.get("https://api.crossref.org/works",
|
| 457 |
-
params={"query.title":p["title"],"rows":1,
|
| 458 |
-
"select":"is-referenced-by-count,title"},
|
| 459 |
headers=cr_headers(), timeout=8)
|
| 460 |
-
if r.status_code==200:
|
| 461 |
-
items=r.json().get("message",{}).get("items",[])
|
| 462 |
if items:
|
| 463 |
-
found=(items[0].get("title") or [""])[0].lower()
|
| 464 |
-
qw=set(p["title"].lower().split()[:5])
|
| 465 |
-
fw=set(found.split()[:10])
|
| 466 |
-
p["citations"]=
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
| 470 |
time.sleep(0.12)
|
| 471 |
-
except: p["citations"]=0
|
| 472 |
for p in papers:
|
| 473 |
if p.get("citations") is None: p["citations"] = 0
|
| 474 |
return papers
|
|
@@ -480,10 +504,11 @@ def build_papers_index(papers):
|
|
| 480 |
global FAISS_INDEX, PAPERS
|
| 481 |
PAPERS = papers
|
| 482 |
if not papers: FAISS_INDEX = None; return
|
| 483 |
-
texts = [
|
| 484 |
embs = embedder.encode(texts, convert_to_numpy=True,
|
| 485 |
normalize_embeddings=True).astype("float32")
|
| 486 |
-
idx = faiss.IndexFlatIP(embs.shape[1])
|
|
|
|
| 487 |
FAISS_INDEX = idx
|
| 488 |
|
| 489 |
def search_papers(query, top_k=5):
|
|
@@ -491,8 +516,8 @@ def search_papers(query, top_k=5):
|
|
| 491 |
qe = embedder.encode([query], convert_to_numpy=True,
|
| 492 |
normalize_embeddings=True).astype("float32")
|
| 493 |
scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS)))
|
| 494 |
-
return [{"paper":PAPERS[i],"score":float(s)}
|
| 495 |
-
for s,i in zip(scores[0],ids[0]) if i>=0 and float(s)>0.1]
|
| 496 |
|
| 497 |
# ================================================================
|
| 498 |
# AUTO-FETCH
|
|
@@ -507,142 +532,154 @@ def auto_fetch_worker(query, category, interval):
|
|
| 507 |
new_ps = [p for p in papers if p["id"] not in seen]
|
| 508 |
if new_ps:
|
| 509 |
save_seen_ids(seen | {p["id"] for p in papers})
|
| 510 |
-
AUTO_LOG.append(
|
| 511 |
-
|
|
|
|
|
|
|
| 512 |
|
| 513 |
def start_auto_fetch(query, cat_label, interval_min):
|
| 514 |
global AUTO_RUNNING
|
| 515 |
-
if AUTO_RUNNING: return "
|
| 516 |
AUTO_RUNNING = True
|
| 517 |
-
threading.Thread(
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
|
|
|
| 521 |
|
| 522 |
def stop_auto_fetch():
|
| 523 |
-
global AUTO_RUNNING; AUTO_RUNNING = False; return "
|
| 524 |
|
| 525 |
def get_auto_log():
|
| 526 |
-
return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "
|
| 527 |
|
| 528 |
# ================================================================
|
| 529 |
# TRENDS
|
| 530 |
# ================================================================
|
| 531 |
def analyze_trends(papers):
|
| 532 |
-
if not papers: return None, "
|
| 533 |
date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A")
|
| 534 |
stopwords = {"the","a","an","of","in","for","on","with","and","or","to","using",
|
| 535 |
"based","via","from","by","is","are","our","we","this","that","which",
|
| 536 |
"towards","approach","method","new","into","over","learning","deep",
|
| 537 |
"model","models","data","neural","large","language","paper","study",
|
| 538 |
"analysis","results","show","also","can","used","two","its","their"}
|
| 539 |
-
all_words
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
|
|
|
|
|
|
| 553 |
C = ["#3b82f6","#8b5cf6","#10b981","#f59e0b","#ef4444","#06b6d4",
|
| 554 |
"#ec4899","#14b8a6","#f97316","#a855f7","#22d3ee","#84cc16",
|
| 555 |
"#fbbf24","#34d399","#f87171"]
|
| 556 |
BG,PNL,BR,W = "#0f172a","#1e293b","#334155","white"
|
| 557 |
-
fig,
|
| 558 |
fig.patch.set_facecolor(BG)
|
| 559 |
-
fig.suptitle("
|
| 560 |
def style(ax):
|
| 561 |
ax.set_facecolor(PNL)
|
| 562 |
for sp in ax.spines.values(): sp.set_edgecolor(BR)
|
| 563 |
ax.tick_params(colors=W, labelsize=8)
|
| 564 |
-
ax=axes[0,0]; style(ax)
|
| 565 |
if date_counts:
|
| 566 |
-
ms,cs=zip(*sorted(date_counts.items()))
|
| 567 |
-
|
|
|
|
| 568 |
for b,c in zip(bars,cs):
|
| 569 |
-
ax.text(b.get_x()+b.get_width()/2,b.get_height()+.05,str(c),
|
| 570 |
-
ha="center",va="bottom",color=W,fontsize=8)
|
| 571 |
-
if len(cs)>2:
|
| 572 |
-
z=np.polyfit(range(len(cs)),cs,1)
|
| 573 |
-
ax.plot(ms,np.poly1d(z)(range(len(cs))),"--",
|
| 574 |
-
lw=1.5,alpha=.8,label="Trend")
|
| 575 |
-
ax.legend(fontsize=8,facecolor=PNL,labelcolor=W)
|
| 576 |
-
ax.set_title("
|
| 577 |
-
ax.set_ylabel("Count",color=W,fontsize=9)
|
| 578 |
-
ax
|
|
|
|
| 579 |
if top_words:
|
| 580 |
-
wds,wcts=zip(*top_words)
|
| 581 |
-
ax.barh(list(wds),list(wcts),color=C[:len(wds)],edgecolor="#475569",lw=.6)
|
| 582 |
-
for b,c in zip(ax.patches,wcts):
|
| 583 |
-
ax.text(b.get_width()+.1,b.get_y()+b.get_height()/2,str(c),
|
| 584 |
-
va="center",color=W,fontsize=8)
|
| 585 |
-
ax.set_title("
|
| 586 |
-
ax.set_xlabel("Frequency",color=W,fontsize=9)
|
| 587 |
-
ax=axes[0,2]; ax.set_facecolor(PNL)
|
| 588 |
if sources:
|
| 589 |
-
sl,sv=zip(*sources.items())
|
| 590 |
-
_,txts,ats=ax.pie(sv,labels=sl,autopct="%1.0f%%",
|
| 591 |
-
|
| 592 |
-
|
|
|
|
| 593 |
for at in ats: at.set_color(W); at.set_fontsize(9)
|
| 594 |
-
ax.set_title("
|
| 595 |
-
ax=axes[1,0]; style(ax)
|
| 596 |
if top_cited:
|
| 597 |
-
lbls=[p["title"][:35]+"
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
|
|
|
| 605 |
else:
|
| 606 |
-
ax.text(.5,.5,"No citation data",ha="center",va="center",
|
| 607 |
-
color="#94a3b8",fontsize=11,transform=ax.transAxes)
|
| 608 |
-
ax.set_title("
|
| 609 |
-
ax=axes[1,1]; style(ax)
|
| 610 |
if any(bcounts):
|
| 611 |
-
ax.bar(blabels,bcounts,color=C[2],edgecolor="#475569",lw=.8)
|
| 612 |
-
for b,c in zip(ax.patches,bcounts):
|
| 613 |
-
if c>0:
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
ax.
|
| 617 |
-
ax.
|
| 618 |
-
|
| 619 |
-
|
|
|
|
| 620 |
else:
|
| 621 |
-
ax.text(.5,.5,"No citation data",ha="center",va="center",
|
| 622 |
-
color="#94a3b8",fontsize=11,transform=ax.transAxes)
|
| 623 |
-
ax.set_title("
|
| 624 |
-
ax=axes[1,2]; style(ax)
|
| 625 |
if top_authors:
|
| 626 |
-
an,ac=zip(*top_authors)
|
| 627 |
-
ax.barh(list(an)[::-1],list(ac)[::-1],color=C[3],edgecolor="#475569",lw=.6)
|
| 628 |
-
for b,c in zip(ax.patches,list(ac)[::-1]):
|
| 629 |
-
ax.text(b.get_width()+.05,b.get_y()+b.get_height()/2,str(c),
|
| 630 |
-
va="center",color=W,fontsize=8)
|
| 631 |
-
ax.set_xlabel("Papers",color=W,fontsize=9)
|
| 632 |
-
ax.set_title("
|
| 633 |
plt.tight_layout(pad=3)
|
| 634 |
-
path=
|
| 635 |
-
plt.savefig(path,bbox_inches="tight",dpi=150,facecolor=BG)
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
|
|
|
| 642 |
if top5:
|
| 643 |
-
stats += "###
|
| 644 |
for i,p in enumerate(top5,1):
|
| 645 |
-
stats +=
|
|
|
|
| 646 |
return path, stats
|
| 647 |
|
| 648 |
# ================================================================
|
|
@@ -654,50 +691,53 @@ def _llm(messages, max_tokens=1200):
|
|
| 654 |
model="llama-3.3-70b-versatile",
|
| 655 |
messages=messages, temperature=0.3, max_tokens=max_tokens)
|
| 656 |
return r.choices[0].message.content.strip()
|
| 657 |
-
except Exception as e: return
|
| 658 |
|
| 659 |
def explain_paper(paper, lang="ar"):
|
| 660 |
cit = paper.get("citations","N/A")
|
| 661 |
-
if lang=="ar":
|
| 662 |
return fix_ar_format(_llm([
|
| 663 |
-
{"role":"system","content":
|
| 664 |
-
f"أنت خبير أكاديمي يشرح الأبحاث بالعربية الفصحى.\n{AR_FORMAT_RULES}"},
|
| 665 |
{"role":"user","content":
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
"##
|
| 671 |
-
"##
|
| 672 |
return _llm([{"role":"user","content":
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
"
|
| 676 |
-
"##
|
| 677 |
|
| 678 |
def compare_papers(pa, pb, lang="ar"):
|
| 679 |
-
body = (
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
if lang=="ar":
|
| 684 |
return fix_ar_format(_llm([{"role":"user","content":
|
| 685 |
-
|
| 686 |
-
"##
|
| 687 |
-
"##
|
| 688 |
return _llm([{"role":"user","content":
|
| 689 |
-
|
| 690 |
-
"## Strengths\n## Limits\n## Verdict"}], 1400)
|
| 691 |
|
| 692 |
def summarize_papers(papers, topic, lang="ar"):
|
| 693 |
-
text = "".join(
|
| 694 |
-
|
| 695 |
-
|
|
|
|
|
|
|
| 696 |
return fix_ar_format(_llm([{"role":"user","content":
|
| 697 |
-
|
| 698 |
-
"
|
|
|
|
|
|
|
| 699 |
return _llm([{"role":"user","content":
|
| 700 |
-
|
| 701 |
"## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900)
|
| 702 |
|
| 703 |
def generate_bibliography(papers, style="APA"):
|
|
@@ -706,38 +746,44 @@ def generate_bibliography(papers, style="APA"):
|
|
| 706 |
auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "")
|
| 707 |
year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d."
|
| 708 |
t,u = p["title"], p["url"]
|
| 709 |
-
if
|
| 710 |
-
|
|
|
|
| 711 |
ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "")
|
| 712 |
-
entries.append(
|
| 713 |
-
elif style=="Chicago":
|
|
|
|
| 714 |
else:
|
| 715 |
key = re.sub(r"\W","", (p["authors"][0].split()[-1]
|
| 716 |
-
|
| 717 |
-
entries.append(
|
| 718 |
-
|
|
|
|
| 719 |
bib = "\n\n".join(entries)
|
| 720 |
-
path =
|
| 721 |
-
with open(path,"w",encoding="utf-8") as f: f.write(bib)
|
| 722 |
return bib, path
|
| 723 |
|
| 724 |
def chat_about_papers(question, history):
|
| 725 |
if not PAPERS:
|
| 726 |
-
return ("
|
| 727 |
-
|
| 728 |
-
lang=detect_lang(question)
|
|
|
|
|
|
|
| 729 |
if relevant:
|
| 730 |
-
context = "الأوراق ذات الصلة:\n\n" if lang=="ar" else "Relevant papers:\n\n"
|
| 731 |
for r in relevant:
|
| 732 |
-
p=r["paper"]
|
| 733 |
-
cit=
|
| 734 |
-
context +=
|
| 735 |
-
|
| 736 |
-
|
|
|
|
| 737 |
msgs = [{"role":"system","content":sys_msg}]
|
| 738 |
for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]})
|
| 739 |
msgs.append({"role":"user","content":
|
| 740 |
-
|
| 741 |
out = _llm(msgs, 800)
|
| 742 |
return fix_ar_format(out) if lang=="ar" else out
|
| 743 |
|
|
@@ -745,9 +791,10 @@ def text_to_audio(text, lang="ar"):
|
|
| 745 |
clean = clean_md(text)
|
| 746 |
if not clean: return None
|
| 747 |
try:
|
| 748 |
-
tts=gTTS(text=clean, lang=lang, slow=False)
|
| 749 |
-
path=
|
| 750 |
-
|
|
|
|
| 751 |
|
| 752 |
# ================================================================
|
| 753 |
# GRADIO HANDLERS
|
|
@@ -755,17 +802,17 @@ def text_to_audio(text, lang="ar"):
|
|
| 755 |
def gr_fetch(query, category_label, max_results, days_back, source_choice,
|
| 756 |
progress=gr.Progress()):
|
| 757 |
global ACTIVE_PAPERS
|
| 758 |
-
progress(0.05, desc="
|
| 759 |
papers, warn = [], ""
|
| 760 |
-
if source_choice in ("arXiv",
|
| 761 |
-
progress(0.15, desc="
|
| 762 |
papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""),
|
| 763 |
int(max_results), int(days_back),
|
| 764 |
sort_by="submittedDate")
|
| 765 |
-
if source_choice in ("CrossRef",
|
| 766 |
-
progress(0.35, desc="
|
| 767 |
cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back))
|
| 768 |
-
if not cr: warn = "\n\n>
|
| 769 |
papers += cr
|
| 770 |
seen, unique = set(), []
|
| 771 |
for p in papers:
|
|
@@ -773,30 +820,32 @@ def gr_fetch(query, category_label, max_results, days_back, source_choice,
|
|
| 773 |
if key not in seen: seen.add(key); unique.append(p)
|
| 774 |
papers = unique
|
| 775 |
if not papers:
|
| 776 |
-
return ("
|
| 777 |
-
gr.update(choices=[],value=None), gr.update(choices=[],value=None),
|
| 778 |
-
gr.update(choices=[],value=None), gr.update(choices=[],value=None),
|
| 779 |
-
|
|
|
|
| 780 |
papers = enrich_citations(papers)
|
| 781 |
-
progress(0.85, desc="
|
| 782 |
build_papers_index(papers)
|
| 783 |
ACTIVE_PAPERS = list(papers)
|
| 784 |
tbl, choices = build_table(papers)
|
| 785 |
recent = sum(1 for p in papers if p.get("recent"))
|
| 786 |
tot_cit = sum(p.get("citations") or 0 for p in papers)
|
| 787 |
zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0)
|
| 788 |
-
note = (
|
| 789 |
if zero_cit else "")
|
| 790 |
-
md = (
|
| 791 |
-
|
| 792 |
-
|
|
|
|
| 793 |
upd = gr.update(choices=choices, value=choices[0] if choices else None)
|
| 794 |
progress(1.0)
|
| 795 |
-
return md, upd, upd, upd, upd,
|
| 796 |
|
| 797 |
def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
|
| 798 |
global ACTIVE_PAPERS
|
| 799 |
-
if not PAPERS: return "
|
| 800 |
filtered = []
|
| 801 |
for p in PAPERS:
|
| 802 |
try:
|
|
@@ -806,114 +855,121 @@ def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
|
|
| 806 |
cit = int(p.get("citations") or 0)
|
| 807 |
if cit < int(cit_min) or cit > int(cit_max): continue
|
| 808 |
filtered.append(p)
|
| 809 |
-
if "Newest"
|
| 810 |
-
|
| 811 |
-
elif "
|
| 812 |
-
|
| 813 |
-
elif "Most" in sort_by or "الأكثر" in sort_by:
|
| 814 |
-
filtered.sort(key=lambda x:x.get("citations") or 0, reverse=True)
|
| 815 |
-
elif "Least" in sort_by or "الأقل" in sort_by:
|
| 816 |
-
filtered.sort(key=lambda x:x.get("citations") or 0)
|
| 817 |
if not filtered:
|
| 818 |
-
ACTIVE_PAPERS=[]
|
| 819 |
-
return "
|
| 820 |
ACTIVE_PAPERS = list(filtered)
|
| 821 |
tbl, choices = build_table(filtered)
|
| 822 |
tot = sum(p.get("citations") or 0 for p in filtered)
|
| 823 |
-
md = (
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
|
| 829 |
def gr_search_fetched(query):
|
| 830 |
-
if not query or not query.strip(): return "
|
| 831 |
-
if not PAPERS: return "
|
| 832 |
results = search_papers(query.strip(), top_k=8)
|
| 833 |
-
if not results: return
|
| 834 |
-
|
|
|
|
| 835 |
for r in results:
|
| 836 |
-
p,s
|
| 837 |
-
bar
|
| 838 |
-
cit
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
return md
|
| 847 |
|
| 848 |
def _get_paper(choice):
|
| 849 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 850 |
-
try: return pool[int(choice.split(".")[0])-1]
|
| 851 |
except: return None
|
| 852 |
|
| 853 |
def gr_explain(choice, lang_choice):
|
| 854 |
-
if not choice: return "
|
| 855 |
paper = _get_paper(choice)
|
| 856 |
-
if not paper: return "
|
| 857 |
-
lang
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 867 |
return header + explain_paper(paper, lang)
|
| 868 |
|
| 869 |
def gr_audio(txt, lang_choice):
|
| 870 |
-
if not txt or len(txt)<50: return None
|
| 871 |
-
return text_to_audio(txt, "ar" if "
|
| 872 |
|
| 873 |
def gr_save_fav(choice):
|
| 874 |
-
if not choice: return "
|
| 875 |
paper = _get_paper(choice)
|
| 876 |
-
return save_favorite(paper) if paper else "
|
| 877 |
|
| 878 |
def gr_show_favs():
|
| 879 |
favs = load_favorites()
|
| 880 |
-
if not favs: return "
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
|
|
|
|
|
|
|
|
|
| 884 |
for p in favs]
|
| 885 |
-
return
|
|
|
|
| 886 |
|
| 887 |
def gr_compare(ca, cb, lc):
|
| 888 |
-
if not ca or not cb: return "
|
| 889 |
pa = _get_paper(ca); pb = _get_paper(cb)
|
| 890 |
-
if not pa or not pb: return "
|
| 891 |
-
if pa["id"]==pb["id"]: return "
|
| 892 |
-
return compare_papers(pa, pb, "ar" if "
|
| 893 |
|
| 894 |
def gr_overview(query, lc):
|
| 895 |
-
if not PAPERS: return "
|
| 896 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 897 |
-
return (
|
| 898 |
-
|
|
|
|
| 899 |
|
| 900 |
def gr_trends():
|
| 901 |
-
if not PAPERS: return None, "
|
| 902 |
return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS)
|
| 903 |
|
| 904 |
def gr_bib(style, progress=gr.Progress()):
|
| 905 |
-
if not PAPERS: return "
|
| 906 |
-
progress(0.5, desc="
|
| 907 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 908 |
text, path = generate_bibliography(pool, style)
|
| 909 |
progress(1.0)
|
| 910 |
-
|
|
|
|
| 911 |
|
| 912 |
def gr_chat_fn(message, history):
|
| 913 |
if not message.strip(): return history, ""
|
| 914 |
hd = []
|
| 915 |
for pair in history:
|
| 916 |
-
if pair[0]: hd.append({"role":"user","content":pair[0]})
|
| 917 |
if pair[1]: hd.append({"role":"assistant","content":pair[1]})
|
| 918 |
history.append((message, chat_about_papers(message, hd)))
|
| 919 |
return history, ""
|
|
@@ -925,185 +981,185 @@ CSS = """
|
|
| 925 |
footer{display:none!important}
|
| 926 |
h1{text-align:center}
|
| 927 |
.status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0}
|
| 928 |
-
.legend{font-size:.8rem;color:#cbd5e1;background:#1e293b;
|
| 929 |
-
|
| 930 |
-
.
|
|
|
|
|
|
|
|
|
|
| 931 |
"""
|
| 932 |
|
| 933 |
with gr.Blocks(
|
| 934 |
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
|
| 935 |
-
title="
|
| 936 |
) as demo:
|
| 937 |
|
| 938 |
-
gr.Markdown("#
|
| 939 |
-
gr.Markdown(
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
status_bar = gr.Markdown("_لم يتم جلب أوراق بعد_", elem_classes="status-bar")
|
| 943 |
|
| 944 |
with gr.Tabs():
|
| 945 |
|
| 946 |
-
# ── TAB 1: BROWSE
|
| 947 |
-
with gr.Tab("
|
| 948 |
with gr.Row():
|
| 949 |
with gr.Column(scale=3):
|
| 950 |
-
t_query = gr.Textbox(label="
|
| 951 |
placeholder="ARIMA, inflation, LLM...",
|
| 952 |
value="economic forecasting")
|
| 953 |
-
t_category = gr.Dropdown(label="
|
| 954 |
choices=list(CATEGORIES.keys()),
|
| 955 |
-
value="
|
| 956 |
-
t_source = gr.Radio(label="
|
| 957 |
-
choices=["arXiv","CrossRef","
|
| 958 |
value="arXiv")
|
| 959 |
with gr.Column(scale=1):
|
| 960 |
-
t_max = gr.Slider(5, 50, value=15, step=5, label="
|
| 961 |
-
t_days = gr.Slider(1, 1500, value=365, step=30, label="
|
| 962 |
-
btn_fetch = gr.Button("
|
| 963 |
-
papers_table_md = gr.Markdown("
|
| 964 |
-
paper_selector = gr.Dropdown(label="
|
| 965 |
with gr.Group(elem_classes="filter-box"):
|
| 966 |
-
gr.Markdown("###
|
| 967 |
with gr.Row():
|
| 968 |
-
f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="
|
| 969 |
-
f_year_to = gr.Slider(2000,2026,value=2026,step=1,label="
|
| 970 |
with gr.Row():
|
| 971 |
-
f_cit_min = gr.Slider(0,5000,value=0, step=5,label="
|
| 972 |
-
f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="
|
| 973 |
with gr.Row():
|
| 974 |
f_sort = gr.Dropdown(choices=SORT_CHOICES,
|
| 975 |
-
value=
|
| 976 |
-
btn_filter = gr.Button("
|
| 977 |
-
gr.Markdown("---\n###
|
| 978 |
with gr.Row():
|
| 979 |
-
search_in_box = gr.Textbox(label="
|
| 980 |
placeholder="ARIMA, transformer...",scale=5)
|
| 981 |
-
btn_search_in = gr.Button("
|
| 982 |
search_in_out = gr.Markdown()
|
| 983 |
|
| 984 |
-
# ── TAB 2: GLOBAL SEARCH
|
| 985 |
-
with gr.Tab("
|
| 986 |
gr.Markdown(
|
| 987 |
-
"###
|
| 988 |
-
">
|
| 989 |
-
"
|
| 990 |
)
|
| 991 |
with gr.Group(elem_classes="gs-box"):
|
| 992 |
with gr.Row():
|
| 993 |
gs_query = gr.Textbox(
|
| 994 |
-
label="
|
| 995 |
-
placeholder="Attention is All You Need | ARIMA
|
| 996 |
scale=4)
|
| 997 |
-
gs_source = gr.Radio(
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
|
| 1001 |
-
|
| 1002 |
-
|
| 1003 |
-
gs_out = gr.Markdown("_أدخل عنوان ورقة أو كلمات مفتاحية..._")
|
| 1004 |
|
| 1005 |
# ── TAB 3: EXPLAIN ─────────────────────────────────
|
| 1006 |
-
with gr.Tab("
|
| 1007 |
with gr.Row():
|
| 1008 |
-
paper_sel2 = gr.Dropdown(label="
|
| 1009 |
choices=[], interactive=True, scale=4)
|
| 1010 |
-
lang_exp = gr.Radio(LANG_CHOICES, value=
|
| 1011 |
-
label="
|
| 1012 |
with gr.Row():
|
| 1013 |
-
btn_explain = gr.Button("
|
| 1014 |
-
btn_fav = gr.Button("
|
| 1015 |
-
btn_audio = gr.Button("
|
| 1016 |
-
btn_export_pdf = gr.Button("
|
| 1017 |
with gr.Row():
|
| 1018 |
fav_status = gr.Markdown()
|
| 1019 |
pdf_status = gr.Markdown()
|
| 1020 |
-
explanation_out = gr.Markdown("
|
| 1021 |
-
audio_out = gr.Audio(label="
|
| 1022 |
-
pdf_out = gr.File(label="
|
| 1023 |
|
| 1024 |
# ── TAB 4: COMPARE ─────────────────────────────────
|
| 1025 |
-
with gr.Tab("
|
| 1026 |
with gr.Row():
|
| 1027 |
-
cmp_a = gr.Dropdown(label="
|
| 1028 |
-
cmp_b = gr.Dropdown(label="
|
| 1029 |
-
lang_cmp = gr.Radio(LANG_CHOICES, value=
|
| 1030 |
-
label="
|
| 1031 |
-
btn_compare = gr.Button("
|
| 1032 |
-
compare_out = gr.Markdown("
|
| 1033 |
|
| 1034 |
# ── TAB 5: OVERVIEW ────────────────────────────────
|
| 1035 |
-
with gr.Tab("
|
| 1036 |
with gr.Row():
|
| 1037 |
-
lang_ov = gr.Radio(LANG_CHOICES, value=
|
| 1038 |
-
label="
|
| 1039 |
-
btn_overview = gr.Button("
|
| 1040 |
-
overview_out = gr.Markdown("
|
| 1041 |
|
| 1042 |
# ── TAB 6: TRENDS ──────────────────────────────────
|
| 1043 |
-
with gr.Tab("
|
| 1044 |
-
btn_trends = gr.Button("
|
| 1045 |
-
trend_chart = gr.Image(label="
|
| 1046 |
-
trend_stats = gr.Markdown("
|
| 1047 |
|
| 1048 |
# ── TAB 7: BIBLIOGRAPHY ────────────────────────────
|
| 1049 |
-
with gr.Tab("
|
| 1050 |
-
bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"],
|
| 1051 |
-
|
| 1052 |
-
btn_bib = gr.Button("
|
| 1053 |
bib_out = gr.Markdown()
|
| 1054 |
-
bib_file = gr.File(label="
|
| 1055 |
|
| 1056 |
# ── TAB 8: FAVORITES ───────────────────────────────
|
| 1057 |
-
with gr.Tab("
|
| 1058 |
-
btn_show_fav = gr.Button("
|
| 1059 |
-
favs_md = gr.Markdown("
|
| 1060 |
-
btn_export_fav = gr.Button("
|
| 1061 |
-
fav_csv_file = gr.File(label="
|
| 1062 |
|
| 1063 |
# ── TAB 9: AUTO-FETCH ──────────────────────────────
|
| 1064 |
-
with gr.Tab("
|
| 1065 |
with gr.Row():
|
| 1066 |
-
auto_q = gr.Textbox(label="
|
| 1067 |
value="economic forecasting", scale=3)
|
| 1068 |
-
auto_cat = gr.Dropdown(label="
|
| 1069 |
choices=list(CATEGORIES.keys()),
|
| 1070 |
-
value="
|
| 1071 |
auto_interval = gr.Slider(5,120,value=60,step=5,
|
| 1072 |
-
label="
|
| 1073 |
with gr.Row():
|
| 1074 |
-
btn_start_auto = gr.Button("
|
| 1075 |
-
btn_stop_auto = gr.Button("
|
| 1076 |
-
btn_refresh_log = gr.Button("
|
| 1077 |
auto_status = gr.Markdown()
|
| 1078 |
-
auto_log_md = gr.Markdown("
|
| 1079 |
|
| 1080 |
# ── TAB 10: CHAT ───────────────────────────────────
|
| 1081 |
-
with gr.Tab("
|
| 1082 |
-
chatbot_ui = gr.Chatbot(label="
|
| 1083 |
-
bubble_full_width=False)
|
| 1084 |
with gr.Row():
|
| 1085 |
-
chat_in = gr.Textbox(label="
|
| 1086 |
-
placeholder="
|
| 1087 |
-
btn_send = gr.Button("
|
| 1088 |
-
btn_clear = gr.Button("
|
| 1089 |
|
| 1090 |
# ── TAB 11: ABOUT ──────────────────────────────────
|
| 1091 |
-
with gr.Tab("
|
| 1092 |
gr.Markdown("""
|
| 1093 |
-
##
|
| 1094 |
-
|
| 1095 |
-
### ✅ جديد في v7.4
|
| 1096 |
-
| الميزة | التفاصيل |
|
| 1097 |
-
|---|---|
|
| 1098 |
-
| 🌐 بحث عالمي محسّن | يستخدم `sort_by="relevance"` + `ti:"..."` للعثور على الورقة بالعنوان الدقيق |
|
| 1099 |
-
| 📄 تصدير PDF | شرح كامل بتنسيق احترافي بزر واحد |
|
| 1100 |
|
| 1101 |
-
###
|
| 1102 |
-
|
|
| 1103 |
|---|---|---|
|
| 1104 |
-
|
|
| 1105 |
-
|
|
| 1106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1107 |
""")
|
| 1108 |
|
| 1109 |
# ── WIRING ──────────────────────────────────────────────
|
|
@@ -1125,17 +1181,17 @@ with gr.Blocks(
|
|
| 1125 |
btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
|
| 1126 |
gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
|
| 1127 |
|
| 1128 |
-
btn_explain.click(gr_explain, inputs=[paper_sel2, lang_exp],
|
| 1129 |
-
btn_fav.click(gr_save_fav, inputs=[paper_sel2],
|
| 1130 |
-
btn_audio.click(gr_audio, inputs=[explanation_out, lang_exp],
|
| 1131 |
btn_export_pdf.click(gr_export_pdf,
|
| 1132 |
inputs=[explanation_out, paper_sel2],
|
| 1133 |
outputs=[pdf_out, pdf_status])
|
| 1134 |
|
| 1135 |
-
btn_compare.click(gr_compare, inputs=[cmp_a, cmp_b, lang_cmp],
|
| 1136 |
-
btn_overview.click(gr_overview, inputs=[t_query, lang_ov],
|
| 1137 |
btn_trends.click(gr_trends, outputs=[trend_chart, trend_stats])
|
| 1138 |
-
btn_bib.click(gr_bib, inputs=[bib_style],
|
| 1139 |
|
| 1140 |
btn_show_fav.click(gr_show_favs, outputs=[favs_md])
|
| 1141 |
btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file])
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
+
# Scientific Paper Discovery Bot v7.4 — SyntaxError FIXED
|
|
|
|
|
|
|
| 3 |
# ================================================================
|
| 4 |
import os, re, time, json, pickle, threading
|
| 5 |
import requests
|
|
|
|
| 30 |
S2_API_KEY = os.environ.get("S2_API_KEY", "")
|
| 31 |
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 32 |
|
| 33 |
+
print("Loading embedder...")
|
| 34 |
embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
| 35 |
_ = embedder.encode(["warmup"])
|
| 36 |
+
print("Embedder ready!")
|
| 37 |
|
| 38 |
PAPERS = []
|
| 39 |
ACTIVE_PAPERS = []
|
|
|
|
| 43 |
CURRENT_YEAR = datetime.now().year
|
| 44 |
|
| 45 |
PERSIST_DIR = "/tmp"
|
| 46 |
+
FAVORITES_PATH = PERSIST_DIR + "/favorites.pkl"
|
| 47 |
+
SEEN_IDS_PATH = PERSIST_DIR + "/seen_ids.json"
|
| 48 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
| 49 |
|
| 50 |
CATEGORIES = {
|
| 51 |
+
"All": "",
|
| 52 |
+
"Economics": "econ",
|
| 53 |
+
"Quant Fin": "q-fin",
|
| 54 |
+
"AI": "cs.AI",
|
| 55 |
+
"ML": "cs.LG",
|
| 56 |
+
"NLP": "cs.CL",
|
| 57 |
+
"Statistics": "stat",
|
| 58 |
+
"Biology": "q-bio",
|
| 59 |
+
"Physics": "physics",
|
| 60 |
+
"Math": "math",
|
| 61 |
+
"CS": "cs",
|
| 62 |
}
|
| 63 |
CROSSREF_SUBJECTS = {
|
| 64 |
+
"All": "",
|
| 65 |
+
"Economics": "economics",
|
| 66 |
+
"Quant Fin": "finance",
|
| 67 |
+
"AI": "artificial intelligence",
|
| 68 |
+
"ML": "machine learning",
|
| 69 |
+
"NLP": "natural language processing",
|
| 70 |
+
"Statistics": "statistics",
|
| 71 |
+
"Biology": "biology",
|
| 72 |
+
"Physics": "physics",
|
| 73 |
+
"Math": "mathematics",
|
| 74 |
+
"CS": "computer science",
|
| 75 |
}
|
| 76 |
+
LANG_CHOICES = ["Arabic", "English"]
|
| 77 |
+
SORT_CHOICES = ["Newest", "Oldest", "Most Cited", "Least Cited"]
|
| 78 |
+
AR_RULES = """
|
| 79 |
+
- ابدأ كل قسم بـ ## مع سطر فارغ قبله وبعده
|
| 80 |
+
- اكتب كل قسم في فقرة 3-4 جمل بالعربية الفصحى
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
- لا تكرر عنوان القسم داخل النص
|
|
|
|
| 82 |
"""
|
| 83 |
|
| 84 |
# ================================================================
|
| 85 |
# HELPERS
|
| 86 |
# ================================================================
|
| 87 |
def detect_lang(text):
|
| 88 |
+
try:
|
| 89 |
+
return "ar" if detect(str(text)[:300]).startswith("ar") else "en"
|
| 90 |
+
except:
|
| 91 |
+
return "en"
|
| 92 |
|
| 93 |
def clean_md(text):
|
| 94 |
text = re.sub(r"[#*`>\[\]!_~]", "", text)
|
|
|
|
| 102 |
def cit_badge(n):
|
| 103 |
if n is None or n == "": return "—"
|
| 104 |
n = int(n)
|
| 105 |
+
if n >= 1000: return "🥇 " + "{:,}".format(n)
|
| 106 |
+
if n >= 100: return "🏆 " + "{:,}".format(n)
|
| 107 |
+
if n >= 10: return "⭐ " + "{:,}".format(n)
|
| 108 |
+
if n > 0: return "📄 " + str(n)
|
| 109 |
return "·"
|
| 110 |
|
| 111 |
def build_table(papers_list):
|
| 112 |
+
rows = "| # | Title | Author | Date | Citations | Source |\n"
|
| 113 |
rows += "|---|---|---|---|---|---|\n"
|
| 114 |
choices = []
|
| 115 |
for i, p in enumerate(papers_list):
|
| 116 |
+
first = p["authors"][0] if p["authors"] else "N/A"
|
| 117 |
+
badge = "NEW" if p.get("recent") else "📄"
|
| 118 |
+
rows += "| {} | {} {} | {} | {} | {} | {} |\n".format(
|
| 119 |
+
i+1, badge, p["title"], first,
|
| 120 |
+
p["published"], cit_badge(p.get("citations")),
|
| 121 |
+
p.get("source","arXiv"))
|
| 122 |
+
choices.append("{}. {}".format(i+1, p["title"]))
|
| 123 |
return rows, choices
|
| 124 |
|
| 125 |
def s2_headers():
|
| 126 |
h = {"User-Agent": "ScientificPaperBot/7.4"}
|
| 127 |
+
if S2_API_KEY:
|
| 128 |
+
h["x-api-key"] = S2_API_KEY
|
| 129 |
return h
|
| 130 |
|
| 131 |
def cr_headers():
|
| 132 |
return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"}
|
| 133 |
|
| 134 |
# ================================================================
|
| 135 |
+
# CrossRef date parser — rejects garbage years
|
| 136 |
# ================================================================
|
| 137 |
+
def parse_crossref_date(item):
|
| 138 |
for field in ["issued", "published", "published-print", "published-online", "created"]:
|
| 139 |
dp = (item.get(field) or {}).get("date-parts", [[]])
|
| 140 |
if not dp or not dp[0]: continue
|
|
|
|
| 144 |
if not (1900 <= year <= CURRENT_YEAR + 1): continue
|
| 145 |
month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1))
|
| 146 |
day = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1))
|
| 147 |
+
return "{:04d}-{:02d}-{:02d}".format(year, month, day)
|
| 148 |
except (ValueError, TypeError, IndexError):
|
| 149 |
continue
|
| 150 |
return "N/A"
|
|
|
|
| 170 |
if paper["id"] not in {p["id"] for p in favs}:
|
| 171 |
favs.append(paper)
|
| 172 |
with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f)
|
| 173 |
+
return "Saved: " + paper["title"]
|
| 174 |
+
return "Already saved."
|
| 175 |
|
| 176 |
def export_favorites_csv():
|
| 177 |
favs = load_favorites()
|
| 178 |
if not favs: return None
|
| 179 |
+
df = pd.DataFrame([{
|
| 180 |
+
"Title": p["title"],
|
| 181 |
+
"Authors": ", ".join(p["authors"][:3]),
|
| 182 |
+
"Date": p["published"],
|
| 183 |
+
"Citations": p.get("citations","N/A"),
|
| 184 |
+
"URL": p["url"],
|
| 185 |
+
"Source": p.get("source","arXiv")
|
| 186 |
+
} for p in favs])
|
| 187 |
+
path = PERSIST_DIR + "/favorites.csv"
|
| 188 |
df.to_csv(path, index=False, encoding="utf-8-sig")
|
| 189 |
return path
|
| 190 |
|
| 191 |
def gr_export_fav(): return export_favorites_csv()
|
| 192 |
|
| 193 |
# ================================================================
|
| 194 |
+
# PDF EXPORT
|
| 195 |
# ================================================================
|
| 196 |
def export_explanation_pdf(explanation_text, paper_title="paper"):
|
| 197 |
if not explanation_text or len(explanation_text) < 30: return None
|
| 198 |
safe = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_")
|
| 199 |
+
path = PERSIST_DIR + "/explanation_" + safe + ".pdf"
|
| 200 |
doc = SimpleDocTemplate(path, pagesize=A4,
|
| 201 |
rightMargin=2*cm, leftMargin=2*cm,
|
| 202 |
+
topMargin=2*cm, bottomMargin=2*cm)
|
| 203 |
+
styles = getSampleStyleSheet()
|
| 204 |
+
h2_style = ParagraphStyle("H2", parent=styles["Heading2"],
|
| 205 |
+
fontSize=11, textColor=colors.HexColor("#2563eb"),
|
| 206 |
+
spaceBefore=14, spaceAfter=6)
|
| 207 |
+
bd_style = ParagraphStyle("BD", parent=styles["Normal"],
|
| 208 |
+
fontSize=10, leading=16, spaceAfter=8)
|
| 209 |
+
mt_style = ParagraphStyle("MT", parent=styles["Normal"],
|
| 210 |
+
fontSize=9, textColor=colors.HexColor("#64748b"))
|
|
|
|
| 211 |
story = []
|
| 212 |
for line in explanation_text.split("\n"):
|
| 213 |
line = line.strip()
|
| 214 |
if not line: story.append(Spacer(1, 6)); continue
|
| 215 |
clean = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
|
| 216 |
+
clean = re.sub(r"\*(.+?)\*", r"\1", clean)
|
| 217 |
+
clean = re.sub(r"`(.+?)`", r"\1", clean)
|
| 218 |
+
clean = re.sub(r"^#{1,6}\s*", "", clean)
|
| 219 |
clean = re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]", "", clean).strip()
|
| 220 |
if not clean: continue
|
| 221 |
if line.startswith("## ") or line.startswith("# "):
|
|
|
|
| 223 |
color=colors.HexColor("#e2e8f0"), spaceAfter=4))
|
| 224 |
story.append(Paragraph(clean, h2_style))
|
| 225 |
elif line.startswith(">"):
|
| 226 |
+
q_st = ParagraphStyle("Q", parent=styles["Normal"],
|
| 227 |
+
fontSize=9, leftIndent=20,
|
| 228 |
+
textColor=colors.HexColor("#475569"), leading=14)
|
| 229 |
+
story.append(Paragraph(
|
| 230 |
+
re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]","",line.lstrip(">").strip()),
|
| 231 |
+
q_st))
|
| 232 |
else:
|
| 233 |
+
story.append(Paragraph(clean, bd_style))
|
| 234 |
+
story += [
|
| 235 |
+
Spacer(1, 20),
|
| 236 |
+
HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0")),
|
| 237 |
+
Paragraph("Generated by Paper Discovery v7.4 — " +
|
| 238 |
+
datetime.now().strftime("%Y-%m-%d %H:%M"), mt_style)
|
| 239 |
+
]
|
| 240 |
try:
|
| 241 |
doc.build(story); return path
|
| 242 |
except Exception as e:
|
| 243 |
+
print("PDF error: " + str(e)); return None
|
| 244 |
|
| 245 |
def gr_export_pdf(explanation_text, choice):
|
| 246 |
if not explanation_text or len(explanation_text) < 50:
|
| 247 |
+
return None, "Explain a paper first."
|
| 248 |
title = choice.split(". ", 1)[-1] if choice else "paper"
|
| 249 |
path = export_explanation_pdf(explanation_text, title)
|
| 250 |
+
return (path, "PDF ready!") if path else (None, "PDF failed.")
|
| 251 |
|
| 252 |
# ================================================================
|
| 253 |
# SOURCE 1 — arXiv
|
| 254 |
+
# KEY FIX: sort_by parameter
|
| 255 |
+
# Browse → "submittedDate" latest papers
|
| 256 |
+
# Global → "relevance" exact title match
|
| 257 |
# ================================================================
|
| 258 |
def fetch_arxiv_papers(query, category, max_results=20, days_back=365,
|
| 259 |
sort_by="submittedDate"):
|
| 260 |
parts = []
|
|
|
|
| 261 |
words = query.strip().split()
|
| 262 |
if len(words) >= 3 and sort_by == "relevance":
|
| 263 |
+
parts.append('ti:"' + query.strip() + '"')
|
| 264 |
elif query.strip():
|
| 265 |
+
parts.append("all:" + query.strip())
|
| 266 |
if category.strip():
|
| 267 |
+
parts.append("cat:" + category.strip())
|
| 268 |
+
sq = " AND ".join(parts) if parts else "all:machine learning"
|
| 269 |
+
params = {
|
| 270 |
+
"search_query": sq,
|
| 271 |
+
"start": 0,
|
| 272 |
+
"max_results": max_results,
|
| 273 |
+
"sortBy": sort_by,
|
| 274 |
+
"sortOrder": "descending",
|
| 275 |
+
}
|
| 276 |
try:
|
| 277 |
resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30)
|
| 278 |
resp.raise_for_status()
|
| 279 |
+
except Exception as e:
|
| 280 |
+
print("arXiv error: " + str(e)); return []
|
| 281 |
|
| 282 |
ns_a = "http://www.w3.org/2005/Atom"
|
| 283 |
ns_x = "http://arxiv.org/schemas/atom"
|
| 284 |
root = ET.fromstring(resp.content)
|
| 285 |
cutoff = datetime.now() - timedelta(days=days_back)
|
| 286 |
papers = []
|
| 287 |
+
for entry in root.findall("{" + ns_a + "}entry"):
|
| 288 |
try:
|
| 289 |
+
pid = entry.find("{" + ns_a + "}id").text.split("/abs/")[-1].strip()
|
| 290 |
+
title = entry.find("{" + ns_a + "}title").text.strip().replace("\n"," ")
|
| 291 |
+
abstract = entry.find("{" + ns_a + "}summary").text.strip().replace("\n"," ")
|
| 292 |
+
published = entry.find("{" + ns_a + "}published").text[:10]
|
| 293 |
+
authors = [a.find("{" + ns_a + "}name").text
|
| 294 |
+
for a in entry.findall("{" + ns_a + "}author")]
|
| 295 |
cats = set()
|
| 296 |
+
pc = entry.find("{" + ns_x + "}primary_category")
|
| 297 |
if pc is not None: cats.add(pc.get("term",""))
|
| 298 |
+
for c in entry.findall("{" + ns_x + "}category"): cats.add(c.get("term",""))
|
| 299 |
cats.discard("")
|
| 300 |
papers.append({
|
| 301 |
+
"id": pid,
|
| 302 |
+
"title": title,
|
| 303 |
+
"authors": authors[:6],
|
| 304 |
+
"abstract": abstract[:1200],
|
| 305 |
+
"published": published,
|
| 306 |
+
"categories": list(cats)[:4],
|
| 307 |
+
"citations": None,
|
| 308 |
+
"url": "https://arxiv.org/abs/" + pid,
|
| 309 |
+
"pdf_url": "https://arxiv.org/pdf/" + pid,
|
| 310 |
+
"recent": datetime.strptime(published, "%Y-%m-%d") >= cutoff,
|
| 311 |
+
"source": "arXiv",
|
| 312 |
})
|
| 313 |
+
except Exception as e:
|
| 314 |
+
print("arXiv parse: " + str(e))
|
| 315 |
return papers
|
| 316 |
|
| 317 |
# ================================================================
|
| 318 |
+
# SOURCE 2 — CrossRef
|
| 319 |
# ================================================================
|
| 320 |
+
def fetch_crossref_papers(query, category_label="", max_results=20,
|
| 321 |
+
days_back=365, use_title=False):
|
| 322 |
subject = CROSSREF_SUBJECTS.get(category_label, "")
|
| 323 |
+
full_query = (query + " " + subject).strip() if subject else query
|
| 324 |
+
key = "query.title" if use_title else "query"
|
| 325 |
params = {
|
| 326 |
+
key: full_query,
|
| 327 |
+
"rows": min(max_results * 3, 200),
|
| 328 |
+
"sort": "relevance",
|
| 329 |
"select": ("title,author,abstract,published,published-print,"
|
| 330 |
"published-online,issued,created,DOI,"
|
| 331 |
"is-referenced-by-count,link,subject"),
|
|
|
|
| 338 |
if r.status_code == 200:
|
| 339 |
items = r.json().get("message",{}).get("items",[]); break
|
| 340 |
if r.status_code == 429: time.sleep(2**attempt); continue
|
| 341 |
+
print("CrossRef " + str(r.status_code)); return []
|
| 342 |
+
except Exception as e:
|
| 343 |
+
print("CrossRef attempt " + str(attempt) + ": " + str(e)); time.sleep(1)
|
| 344 |
|
| 345 |
cutoff = datetime.now() - timedelta(days=days_back)
|
| 346 |
papers, seen_ids = [], set()
|
|
|
|
| 353 |
pub = parse_crossref_date(item)
|
| 354 |
if pub == "N/A": continue
|
| 355 |
cit = int(item.get("is-referenced-by-count", 0) or 0)
|
| 356 |
+
authors = [
|
| 357 |
+
(a.get("given","") + " " + a.get("family","")).strip()
|
| 358 |
+
for a in item.get("author",[])[:6]
|
| 359 |
+
]
|
| 360 |
authors = [a for a in authors if a.strip()] or ["Unknown"]
|
| 361 |
+
abstract = re.sub(r"<[^>]+>","",
|
| 362 |
+
item.get("abstract","No abstract.")).strip()[:1200]
|
| 363 |
+
doi = item.get("DOI","")
|
| 364 |
+
url = "https://doi.org/" + doi if doi else "#"
|
| 365 |
+
pid = doi or re.sub(r"\W","",title)[:40]
|
| 366 |
if pid in seen_ids: continue
|
| 367 |
seen_ids.add(pid)
|
| 368 |
+
pdf_url = next((l.get("URL","") for l in item.get("link",[])
|
| 369 |
+
if "pdf" in l.get("content-type","").lower()), "")
|
| 370 |
+
try: recent = datetime.strptime(pub[:10], "%Y-%m-%d") >= cutoff
|
| 371 |
except: recent = False
|
| 372 |
papers.append({
|
| 373 |
+
"id": pid,
|
| 374 |
+
"title": title,
|
| 375 |
+
"authors": authors,
|
| 376 |
+
"abstract": abstract,
|
| 377 |
+
"published": pub[:10],
|
| 378 |
"categories": item.get("subject",[])[:3],
|
| 379 |
+
"citations": cit,
|
| 380 |
+
"url": url,
|
| 381 |
+
"pdf_url": pdf_url,
|
| 382 |
+
"recent": recent,
|
| 383 |
+
"source": "CrossRef",
|
| 384 |
})
|
| 385 |
papers.sort(key=lambda x: x["citations"], reverse=True)
|
| 386 |
return papers
|
| 387 |
|
| 388 |
# ================================================================
|
| 389 |
+
# GLOBAL PAPER SEARCH — relevance sorted
|
| 390 |
# ================================================================
|
| 391 |
def global_paper_search(query, source_choice, max_results=10):
|
| 392 |
if not query or not query.strip():
|
| 393 |
+
return "Enter a title or keywords."
|
| 394 |
+
q = query.strip(); papers = []
|
| 395 |
+
if source_choice in ("arXiv", "Both"):
|
|
|
|
|
|
|
|
|
|
| 396 |
papers += fetch_arxiv_papers(q, "", int(max_results), 3650,
|
| 397 |
sort_by="relevance")
|
| 398 |
+
if source_choice in ("CrossRef", "Both"):
|
|
|
|
|
|
|
| 399 |
papers += fetch_crossref_papers(q, "", int(max_results), 3650,
|
| 400 |
use_title=True)
|
|
|
|
| 401 |
if not papers:
|
| 402 |
+
return "No results for: " + q
|
| 403 |
|
|
|
|
| 404 |
seen, unique = set(), []
|
| 405 |
for p in papers:
|
| 406 |
key = re.sub(r"\W","",p["title"].lower())[:60]
|
| 407 |
if key not in seen: seen.add(key); unique.append(p)
|
|
|
|
|
|
|
| 408 |
unique.sort(key=lambda x: x.get("citations") or 0, reverse=True)
|
| 409 |
|
| 410 |
+
NL = "\n"
|
| 411 |
+
md = "## Search Results: " + q + NL + NL
|
| 412 |
+
md += "**" + str(len(unique)) + " papers found**" + NL + NL + "---" + NL + NL
|
| 413 |
for i, p in enumerate(unique, 1):
|
| 414 |
+
cit = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else ""
|
| 415 |
+
cats = " | ".join(p.get("categories",[])[:2])
|
| 416 |
+
auth = ", ".join(p["authors"][:3])
|
| 417 |
+
abst = p["abstract"][:450]
|
| 418 |
+
link = "[View](" + p["url"] + ")"
|
| 419 |
+
pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
|
| 420 |
+
src = p.get("source","")
|
| 421 |
+
md += ("### " + str(i) + ". " + p["title"] + NL + NL +
|
| 422 |
+
auth + " | " + p["published"] + cit + " | " + src +
|
| 423 |
+
(" | " + cats if cats else "") + NL + NL +
|
| 424 |
+
"> " + abst + "..." + NL + NL +
|
| 425 |
+
link + pdf + NL + NL + "---" + NL + NL)
|
| 426 |
return md
|
| 427 |
|
| 428 |
# ================================================================
|
|
|
|
| 439 |
id_map, batch_ids = {}, []
|
| 440 |
for p in arxiv_papers:
|
| 441 |
clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
|
| 442 |
+
id_map[clean] = p
|
| 443 |
+
batch_ids.append("arXiv:" + clean)
|
| 444 |
for i in range(0, len(batch_ids), 500):
|
| 445 |
try:
|
| 446 |
r = requests.post(
|
|
|
|
| 452 |
for item in r.json():
|
| 453 |
if not item: continue
|
| 454 |
ext = item.get("externalIds") or {}
|
| 455 |
+
clean = re.sub(r"v\d+$","",
|
| 456 |
+
ext.get("ArXiv","").split("/")[-1].strip())
|
| 457 |
if clean and clean in id_map:
|
| 458 |
c = item.get("citationCount")
|
| 459 |
if c is not None: id_map[clean]["citations"] = int(c)
|
| 460 |
elif r.status_code == 429: time.sleep(4)
|
| 461 |
+
except Exception as e: print("S2 batch: " + str(e))
|
| 462 |
for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]:
|
| 463 |
clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
|
| 464 |
for attempt in range(2):
|
| 465 |
try:
|
| 466 |
r = requests.get(
|
| 467 |
+
"https://api.semanticscholar.org/graph/v1/paper/arXiv:" + clean,
|
| 468 |
+
params={"fields":"citationCount"},
|
| 469 |
+
headers=s2_headers(), timeout=10)
|
| 470 |
+
if r.status_code == 200:
|
| 471 |
+
c = r.json().get("citationCount")
|
| 472 |
+
p["citations"] = int(c) if c else 0; break
|
| 473 |
+
if r.status_code == 429: time.sleep(2**attempt); continue
|
| 474 |
+
p["citations"] = 0; break
|
| 475 |
+
except: p["citations"] = 0; break
|
| 476 |
time.sleep(0.12)
|
| 477 |
for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]:
|
| 478 |
try:
|
| 479 |
r = requests.get("https://api.crossref.org/works",
|
| 480 |
+
params={"query.title": p["title"], "rows": 1,
|
| 481 |
+
"select": "is-referenced-by-count,title"},
|
| 482 |
headers=cr_headers(), timeout=8)
|
| 483 |
+
if r.status_code == 200:
|
| 484 |
+
items = r.json().get("message",{}).get("items",[])
|
| 485 |
if items:
|
| 486 |
+
found = (items[0].get("title") or [""])[0].lower()
|
| 487 |
+
qw = set(p["title"].lower().split()[:5])
|
| 488 |
+
fw = set(found.split()[:10])
|
| 489 |
+
p["citations"] = (
|
| 490 |
+
int(items[0].get("is-referenced-by-count",0) or 0)
|
| 491 |
+
if len(qw & fw) >= 2 else 0)
|
| 492 |
+
else: p["citations"] = 0
|
| 493 |
+
else: p["citations"] = 0
|
| 494 |
time.sleep(0.12)
|
| 495 |
+
except: p["citations"] = 0
|
| 496 |
for p in papers:
|
| 497 |
if p.get("citations") is None: p["citations"] = 0
|
| 498 |
return papers
|
|
|
|
| 504 |
global FAISS_INDEX, PAPERS
|
| 505 |
PAPERS = papers
|
| 506 |
if not papers: FAISS_INDEX = None; return
|
| 507 |
+
texts = [p["title"] + " " + p["abstract"] for p in papers]
|
| 508 |
embs = embedder.encode(texts, convert_to_numpy=True,
|
| 509 |
normalize_embeddings=True).astype("float32")
|
| 510 |
+
idx = faiss.IndexFlatIP(embs.shape[1])
|
| 511 |
+
idx.add(embs)
|
| 512 |
FAISS_INDEX = idx
|
| 513 |
|
| 514 |
def search_papers(query, top_k=5):
|
|
|
|
| 516 |
qe = embedder.encode([query], convert_to_numpy=True,
|
| 517 |
normalize_embeddings=True).astype("float32")
|
| 518 |
scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS)))
|
| 519 |
+
return [{"paper": PAPERS[i], "score": float(s)}
|
| 520 |
+
for s, i in zip(scores[0], ids[0]) if i >= 0 and float(s) > 0.1]
|
| 521 |
|
| 522 |
# ================================================================
|
| 523 |
# AUTO-FETCH
|
|
|
|
| 532 |
new_ps = [p for p in papers if p["id"] not in seen]
|
| 533 |
if new_ps:
|
| 534 |
save_seen_ids(seen | {p["id"] for p in papers})
|
| 535 |
+
AUTO_LOG.append(
|
| 536 |
+
"[" + datetime.now().strftime("%H:%M") + "] NEW " +
|
| 537 |
+
str(len(new_ps)) + " — " + query)
|
| 538 |
+
if len(AUTO_LOG) > 20: AUTO_LOG.pop(0)
|
| 539 |
|
| 540 |
def start_auto_fetch(query, cat_label, interval_min):
|
| 541 |
global AUTO_RUNNING
|
| 542 |
+
if AUTO_RUNNING: return "Already running."
|
| 543 |
AUTO_RUNNING = True
|
| 544 |
+
threading.Thread(
|
| 545 |
+
target=auto_fetch_worker,
|
| 546 |
+
args=(query, CATEGORIES.get(cat_label,""), int(interval_min)*60),
|
| 547 |
+
daemon=True).start()
|
| 548 |
+
return "Auto-fetch started every " + str(interval_min) + " min for: " + query
|
| 549 |
|
| 550 |
def stop_auto_fetch():
|
| 551 |
+
global AUTO_RUNNING; AUTO_RUNNING = False; return "Stopped."
|
| 552 |
|
| 553 |
def get_auto_log():
|
| 554 |
+
return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "No log."
|
| 555 |
|
| 556 |
# ================================================================
|
| 557 |
# TRENDS
|
| 558 |
# ================================================================
|
| 559 |
def analyze_trends(papers):
|
| 560 |
+
if not papers: return None, "No papers."
|
| 561 |
date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A")
|
| 562 |
stopwords = {"the","a","an","of","in","for","on","with","and","or","to","using",
|
| 563 |
"based","via","from","by","is","are","our","we","this","that","which",
|
| 564 |
"towards","approach","method","new","into","over","learning","deep",
|
| 565 |
"model","models","data","neural","large","language","paper","study",
|
| 566 |
"analysis","results","show","also","can","used","two","its","their"}
|
| 567 |
+
all_words = [w.lower() for p in papers
|
| 568 |
+
for w in re.findall(r"[a-zA-Z]{4,}", p["title"])
|
| 569 |
+
if w.lower() not in stopwords]
|
| 570 |
+
top_words = Counter(all_words).most_common(15)
|
| 571 |
+
sources = Counter(p.get("source","arXiv") for p in papers)
|
| 572 |
+
cit_papers = [p for p in papers if (p.get("citations") or 0)>0]
|
| 573 |
+
top_cited = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:10]
|
| 574 |
+
all_auth = [a for p in papers for a in p["authors"][:3]]
|
| 575 |
+
top_authors = Counter(all_auth).most_common(10)
|
| 576 |
+
cvals = [p["citations"] for p in cit_papers]
|
| 577 |
+
buckets = [0,1,5,10,50,100,500,10000]
|
| 578 |
+
blabels = ["0","1-4","5-9","10-49","50-99","100-499","500+"]
|
| 579 |
+
bcounts = ([sum(1 for c in cvals if buckets[i]<=c<buckets[i+1])
|
| 580 |
+
for i in range(len(buckets)-1)] if cvals else [0]*7)
|
| 581 |
+
avg_cit = round(sum(cvals)/max(len(cvals),1),1) if cvals else 0
|
| 582 |
+
total_cit = sum(p.get("citations") or 0 for p in papers)
|
| 583 |
C = ["#3b82f6","#8b5cf6","#10b981","#f59e0b","#ef4444","#06b6d4",
|
| 584 |
"#ec4899","#14b8a6","#f97316","#a855f7","#22d3ee","#84cc16",
|
| 585 |
"#fbbf24","#34d399","#f87171"]
|
| 586 |
BG,PNL,BR,W = "#0f172a","#1e293b","#334155","white"
|
| 587 |
+
fig, axes = plt.subplots(2, 3, figsize=(20,12))
|
| 588 |
fig.patch.set_facecolor(BG)
|
| 589 |
+
fig.suptitle("Research Trends", color=W, fontsize=16, fontweight="bold", y=1.01)
|
| 590 |
def style(ax):
|
| 591 |
ax.set_facecolor(PNL)
|
| 592 |
for sp in ax.spines.values(): sp.set_edgecolor(BR)
|
| 593 |
ax.tick_params(colors=W, labelsize=8)
|
| 594 |
+
ax = axes[0,0]; style(ax)
|
| 595 |
if date_counts:
|
| 596 |
+
ms,cs = zip(*sorted(date_counts.items()))
|
| 597 |
+
ms,cs = list(ms), list(cs)
|
| 598 |
+
bars = ax.bar(ms, cs, color=C[0], edgecolor="#60a5fa", lw=0.8)
|
| 599 |
for b,c in zip(bars,cs):
|
| 600 |
+
ax.text(b.get_x()+b.get_width()/2, b.get_height()+.05, str(c),
|
| 601 |
+
ha="center", va="bottom", color=W, fontsize=8)
|
| 602 |
+
if len(cs) > 2:
|
| 603 |
+
z = np.polyfit(range(len(cs)), cs, 1)
|
| 604 |
+
ax.plot(ms, np.poly1d(z)(range(len(cs))), "--",
|
| 605 |
+
color="#f59e0b", lw=1.5, alpha=.8, label="Trend")
|
| 606 |
+
ax.legend(fontsize=8, facecolor=PNL, labelcolor=W)
|
| 607 |
+
ax.set_title("Papers per Month", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 608 |
+
ax.set_ylabel("Count", color=W, fontsize=9)
|
| 609 |
+
ax.tick_params(rotation=45)
|
| 610 |
+
ax = axes[0,1]; style(ax)
|
| 611 |
if top_words:
|
| 612 |
+
wds,wcts = zip(*top_words)
|
| 613 |
+
ax.barh(list(wds), list(wcts), color=C[:len(wds)], edgecolor="#475569", lw=.6)
|
| 614 |
+
for b,c in zip(ax.patches, wcts):
|
| 615 |
+
ax.text(b.get_width()+.1, b.get_y()+b.get_height()/2, str(c),
|
| 616 |
+
va="center", color=W, fontsize=8)
|
| 617 |
+
ax.set_title("Top Keywords", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 618 |
+
ax.set_xlabel("Frequency", color=W, fontsize=9)
|
| 619 |
+
ax = axes[0,2]; ax.set_facecolor(PNL)
|
| 620 |
if sources:
|
| 621 |
+
sl,sv = zip(*sources.items())
|
| 622 |
+
_,txts,ats = ax.pie(sv, labels=sl, autopct="%1.0f%%",
|
| 623 |
+
colors=C[:len(sl)], startangle=90,
|
| 624 |
+
textprops={"color":W,"fontsize":10},
|
| 625 |
+
wedgeprops={"edgecolor":BR,"linewidth":1.5})
|
| 626 |
for at in ats: at.set_color(W); at.set_fontsize(9)
|
| 627 |
+
ax.set_title("Source Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 628 |
+
ax = axes[1,0]; style(ax)
|
| 629 |
if top_cited:
|
| 630 |
+
lbls = [(p["title"][:35]+"..." if len(p["title"])>35 else p["title"])
|
| 631 |
+
for p in top_cited]
|
| 632 |
+
cv = [p["citations"] for p in top_cited]
|
| 633 |
+
ax.barh(lbls[::-1], cv[::-1], color=C[1], edgecolor="#475569", lw=.6)
|
| 634 |
+
mx = max(cv) if cv else 1
|
| 635 |
+
for b,c in zip(ax.patches, cv[::-1]):
|
| 636 |
+
ax.text(b.get_width()+mx*.01, b.get_y()+b.get_height()/2,
|
| 637 |
+
"{:,}".format(c), va="center", color=W, fontsize=8)
|
| 638 |
+
ax.set_xlabel("Citations", color=W, fontsize=9)
|
| 639 |
else:
|
| 640 |
+
ax.text(.5,.5,"No citation data", ha="center", va="center",
|
| 641 |
+
color="#94a3b8", fontsize=11, transform=ax.transAxes)
|
| 642 |
+
ax.set_title("Top 10 Cited", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 643 |
+
ax = axes[1,1]; style(ax)
|
| 644 |
if any(bcounts):
|
| 645 |
+
ax.bar(blabels, bcounts, color=C[2], edgecolor="#475569", lw=.8)
|
| 646 |
+
for b,c in zip(ax.patches, bcounts):
|
| 647 |
+
if c > 0:
|
| 648 |
+
ax.text(b.get_x()+b.get_width()/2, b.get_height()+.1, str(c),
|
| 649 |
+
ha="center", va="bottom", color=W, fontsize=9)
|
| 650 |
+
ax.set_xlabel("Citation Range", color=W, fontsize=9)
|
| 651 |
+
ax.set_ylabel("Papers", color=W, fontsize=9)
|
| 652 |
+
ax.annotate("Avg " + str(avg_cit) + " | Total " + "{:,}".format(total_cit),
|
| 653 |
+
xy=(.98,.96), xycoords="axes fraction",
|
| 654 |
+
ha="right", va="top", color="#94a3b8", fontsize=8)
|
| 655 |
else:
|
| 656 |
+
ax.text(.5,.5,"No citation data", ha="center", va="center",
|
| 657 |
+
color="#94a3b8", fontsize=11, transform=ax.transAxes)
|
| 658 |
+
ax.set_title("Citation Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 659 |
+
ax = axes[1,2]; style(ax)
|
| 660 |
if top_authors:
|
| 661 |
+
an,ac = zip(*top_authors)
|
| 662 |
+
ax.barh(list(an)[::-1], list(ac)[::-1], color=C[3], edgecolor="#475569", lw=.6)
|
| 663 |
+
for b,c in zip(ax.patches, list(ac)[::-1]):
|
| 664 |
+
ax.text(b.get_width()+.05, b.get_y()+b.get_height()/2, str(c),
|
| 665 |
+
va="center", color=W, fontsize=8)
|
| 666 |
+
ax.set_xlabel("Papers", color=W, fontsize=9)
|
| 667 |
+
ax.set_title("Top Authors", color=W, fontsize=12, fontweight="bold", pad=10)
|
| 668 |
plt.tight_layout(pad=3)
|
| 669 |
+
path = PERSIST_DIR + "/trends.png"
|
| 670 |
+
plt.savefig(path, bbox_inches="tight", dpi=150, facecolor=BG)
|
| 671 |
+
plt.close()
|
| 672 |
+
top5 = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:5]
|
| 673 |
+
stats = ("### Stats\n\n| Metric | Value |\n|---|---|\n" +
|
| 674 |
+
"| Total | **" + str(len(papers)) + "** |\n" +
|
| 675 |
+
"| New | **" + str(sum(1 for p in papers if p.get("recent"))) + "** |\n" +
|
| 676 |
+
"| Citations | **" + "{:,}".format(total_cit) + "** |\n" +
|
| 677 |
+
"| Average | **" + str(avg_cit) + "** |\n\n")
|
| 678 |
if top5:
|
| 679 |
+
stats += "### Top Cited\n\n"
|
| 680 |
for i,p in enumerate(top5,1):
|
| 681 |
+
stats += (str(i) + ". [" + p["title"] + "](" + p["url"] + ")" +
|
| 682 |
+
" — **" + "{:,}".format(p["citations"]) + "**\n\n")
|
| 683 |
return path, stats
|
| 684 |
|
| 685 |
# ================================================================
|
|
|
|
| 691 |
model="llama-3.3-70b-versatile",
|
| 692 |
messages=messages, temperature=0.3, max_tokens=max_tokens)
|
| 693 |
return r.choices[0].message.content.strip()
|
| 694 |
+
except Exception as e: return "LLM Error: " + str(e)
|
| 695 |
|
| 696 |
def explain_paper(paper, lang="ar"):
|
| 697 |
cit = paper.get("citations","N/A")
|
| 698 |
+
if lang == "ar":
|
| 699 |
return fix_ar_format(_llm([
|
| 700 |
+
{"role":"system","content": "أنت خبير أكاديمي يشرح الأبحاث بالعربية الفصحى.\n" + AR_RULES},
|
|
|
|
| 701 |
{"role":"user","content":
|
| 702 |
+
"اشرح الورقة:\nالعنوان: " + paper["title"] + "\n" +
|
| 703 |
+
"المؤلفون: " + ", ".join(paper["authors"][:3]) + "\n" +
|
| 704 |
+
"التاريخ: " + paper["published"] + " | الاقتباسات: " + str(cit) + "\n" +
|
| 705 |
+
"الملخص: " + paper["abstract"] + "\n\n" +
|
| 706 |
+
"## موضوع الورقة\n\n## المشكلة\n\n## المنهجية\n\n" +
|
| 707 |
+
"## النتائج\n\n## الأهمية\n\n## التطبيقات"}]))
|
| 708 |
return _llm([{"role":"user","content":
|
| 709 |
+
"Explain:\nTitle: " + paper["title"] + "\nAuthors: " +
|
| 710 |
+
", ".join(paper["authors"][:3]) + "\nDate: " + paper["published"] +
|
| 711 |
+
" | Citations: " + str(cit) + "\nAbstract: " + paper["abstract"] + "\n\n" +
|
| 712 |
+
"## Topic\n## Problem\n## Methodology\n## Findings\n## Contribution\n## Applications"}])
|
| 713 |
|
| 714 |
def compare_papers(pa, pb, lang="ar"):
|
| 715 |
+
body = ("Paper A: " + pa["title"] + " | Citations: " + str(pa.get("citations","N/A")) +
|
| 716 |
+
"\n" + pa["abstract"][:500] + "\n\nPaper B: " +
|
| 717 |
+
pb["title"] + " | Citations: " + str(pb.get("citations","N/A")) +
|
| 718 |
+
"\n" + pb["abstract"][:500])
|
| 719 |
+
if lang == "ar":
|
| 720 |
return fix_ar_format(_llm([{"role":"user","content":
|
| 721 |
+
"قارن بين الورقتين.\n" + AR_RULES + "\n\n" + body + "\n\n" +
|
| 722 |
+
"## الهدف\n\n## المنهجية\n\n## النتائج\n\n" +
|
| 723 |
+
"## القوة\n\n## القيود\n\n## الخلاصة"}], 1400))
|
| 724 |
return _llm([{"role":"user","content":
|
| 725 |
+
"Compare:\n" + body + "\n\n" +
|
| 726 |
+
"## Topic\n## Methodology\n## Results\n## Strengths\n## Limits\n## Verdict"}], 1400)
|
| 727 |
|
| 728 |
def summarize_papers(papers, topic, lang="ar"):
|
| 729 |
+
text = "".join(
|
| 730 |
+
str(i) + ". " + p["title"] + " (" + p["published"] + "): " +
|
| 731 |
+
p["abstract"][:300] + "...\n\n"
|
| 732 |
+
for i,p in enumerate(papers[:8],1))
|
| 733 |
+
if lang == "ar":
|
| 734 |
return fix_ar_format(_llm([{"role":"user","content":
|
| 735 |
+
"نظرة عامة أكاديمية حول \"" + topic + "\".\n" + AR_RULES +
|
| 736 |
+
"\n\n" + text + "\n\n" +
|
| 737 |
+
"## الاتجاهات\n\n## أبرز الأوراق\n\n" +
|
| 738 |
+
"## المواضيع المشتركة\n\n## الفجوات"}], 900))
|
| 739 |
return _llm([{"role":"user","content":
|
| 740 |
+
"Academic overview of \"" + topic + "\":\n" + text + "\n\n" +
|
| 741 |
"## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900)
|
| 742 |
|
| 743 |
def generate_bibliography(papers, style="APA"):
|
|
|
|
| 746 |
auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "")
|
| 747 |
year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d."
|
| 748 |
t,u = p["title"], p["url"]
|
| 749 |
+
if style == "APA":
|
| 750 |
+
entries.append(str(i) + ". " + auth + " (" + year + "). *" + t + "*. " + u)
|
| 751 |
+
elif style == "IEEE":
|
| 752 |
ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "")
|
| 753 |
+
entries.append("[" + str(i) + "] " + ae + ', "' + t + '," ' + year + ". [Online]: " + u)
|
| 754 |
+
elif style == "Chicago":
|
| 755 |
+
entries.append(str(i) + ". " + auth + '. "' + t + '." (' + year + "). " + u)
|
| 756 |
else:
|
| 757 |
key = re.sub(r"\W","", (p["authors"][0].split()[-1]
|
| 758 |
+
if p["authors"] else "Auth")) + year
|
| 759 |
+
entries.append("@article{" + key + str(i) + ",\n title={" + t +
|
| 760 |
+
"},\n author={" + auth + "},\n year={" + year +
|
| 761 |
+
"},\n url={" + u + "}\n}")
|
| 762 |
bib = "\n\n".join(entries)
|
| 763 |
+
path = PERSIST_DIR + "/bibliography_" + style + ".txt"
|
| 764 |
+
with open(path, "w", encoding="utf-8") as f: f.write(bib)
|
| 765 |
return bib, path
|
| 766 |
|
| 767 |
def chat_about_papers(question, history):
|
| 768 |
if not PAPERS:
|
| 769 |
+
return ("يرجى جلب الأوراق أولاً." if detect_lang(question)=="ar"
|
| 770 |
+
else "Fetch papers first.")
|
| 771 |
+
lang = detect_lang(question)
|
| 772 |
+
relevant = search_papers(question, top_k=4)
|
| 773 |
+
context = ""
|
| 774 |
if relevant:
|
| 775 |
+
context = ("الأوراق ذات الصلة:\n\n" if lang=="ar" else "Relevant papers:\n\n")
|
| 776 |
for r in relevant:
|
| 777 |
+
p = r["paper"]
|
| 778 |
+
cit = (" | " + str(p["citations"]) + " citations") if p.get("citations") else ""
|
| 779 |
+
context += ("**" + p["title"] + "** (" + p["published"] + ")" + cit +
|
| 780 |
+
"\n" + p["abstract"][:400] + "\n🔗 " + p["url"] + "\n\n")
|
| 781 |
+
sys_msg = (("أنت مساعد بحثي. أجب بالعربية الفصحى.\n" + AR_RULES) if lang=="ar"
|
| 782 |
+
else "You are an academic assistant. Answer in English.")
|
| 783 |
msgs = [{"role":"system","content":sys_msg}]
|
| 784 |
for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]})
|
| 785 |
msgs.append({"role":"user","content":
|
| 786 |
+
(context + "\nسؤال: " + question) if context else question})
|
| 787 |
out = _llm(msgs, 800)
|
| 788 |
return fix_ar_format(out) if lang=="ar" else out
|
| 789 |
|
|
|
|
| 791 |
clean = clean_md(text)
|
| 792 |
if not clean: return None
|
| 793 |
try:
|
| 794 |
+
tts = gTTS(text=clean, lang=lang, slow=False)
|
| 795 |
+
path = PERSIST_DIR + "/audio_" + lang + ".mp3"
|
| 796 |
+
tts.save(path); return path
|
| 797 |
+
except Exception as e: print("TTS: " + str(e)); return None
|
| 798 |
|
| 799 |
# ================================================================
|
| 800 |
# GRADIO HANDLERS
|
|
|
|
| 802 |
def gr_fetch(query, category_label, max_results, days_back, source_choice,
|
| 803 |
progress=gr.Progress()):
|
| 804 |
global ACTIVE_PAPERS
|
| 805 |
+
progress(0.05, desc="Connecting...")
|
| 806 |
papers, warn = [], ""
|
| 807 |
+
if source_choice in ("arXiv", "Both"):
|
| 808 |
+
progress(0.15, desc="Fetching arXiv...")
|
| 809 |
papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""),
|
| 810 |
int(max_results), int(days_back),
|
| 811 |
sort_by="submittedDate")
|
| 812 |
+
if source_choice in ("CrossRef", "Both"):
|
| 813 |
+
progress(0.35, desc="Fetching CrossRef...")
|
| 814 |
cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back))
|
| 815 |
+
if not cr: warn = "\n\n> CrossRef: no results."
|
| 816 |
papers += cr
|
| 817 |
seen, unique = set(), []
|
| 818 |
for p in papers:
|
|
|
|
| 820 |
if key not in seen: seen.add(key); unique.append(p)
|
| 821 |
papers = unique
|
| 822 |
if not papers:
|
| 823 |
+
return ("No results." + warn,
|
| 824 |
+
gr.update(choices=[], value=None), gr.update(choices=[], value=None),
|
| 825 |
+
gr.update(choices=[], value=None), gr.update(choices=[], value=None),
|
| 826 |
+
"0 papers")
|
| 827 |
+
progress(0.60, desc="Fetching citations...")
|
| 828 |
papers = enrich_citations(papers)
|
| 829 |
+
progress(0.85, desc="FAISS indexing...")
|
| 830 |
build_papers_index(papers)
|
| 831 |
ACTIVE_PAPERS = list(papers)
|
| 832 |
tbl, choices = build_table(papers)
|
| 833 |
recent = sum(1 for p in papers if p.get("recent"))
|
| 834 |
tot_cit = sum(p.get("citations") or 0 for p in papers)
|
| 835 |
zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0)
|
| 836 |
+
note = ("\n\n> " + str(zero_cit) + " papers with 0 citations (new/unindexed)."
|
| 837 |
if zero_cit else "")
|
| 838 |
+
md = ("## Fetched **" + str(len(papers)) + "** papers\n\n" +
|
| 839 |
+
"New: **" + str(recent) + "** | Citations: **" +
|
| 840 |
+
"{:,}".format(tot_cit) + "**" + warn + note +
|
| 841 |
+
"\n\n---\n\n" + tbl)
|
| 842 |
upd = gr.update(choices=choices, value=choices[0] if choices else None)
|
| 843 |
progress(1.0)
|
| 844 |
+
return md, upd, upd, upd, upd, str(len(papers)) + " papers | " + "{:,}".format(tot_cit) + " cit."
|
| 845 |
|
| 846 |
def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
|
| 847 |
global ACTIVE_PAPERS
|
| 848 |
+
if not PAPERS: return "Fetch papers first.", gr.update(), "0"
|
| 849 |
filtered = []
|
| 850 |
for p in PAPERS:
|
| 851 |
try:
|
|
|
|
| 855 |
cit = int(p.get("citations") or 0)
|
| 856 |
if cit < int(cit_min) or cit > int(cit_max): continue
|
| 857 |
filtered.append(p)
|
| 858 |
+
if sort_by == "Newest": filtered.sort(key=lambda x: x["published"], reverse=True)
|
| 859 |
+
elif sort_by == "Oldest": filtered.sort(key=lambda x: x["published"])
|
| 860 |
+
elif sort_by == "Most Cited": filtered.sort(key=lambda x: x.get("citations") or 0, reverse=True)
|
| 861 |
+
elif sort_by == "Least Cited":filtered.sort(key=lambda x: x.get("citations") or 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 862 |
if not filtered:
|
| 863 |
+
ACTIVE_PAPERS = []
|
| 864 |
+
return "No matching papers.", gr.update(choices=[], value=None), "0"
|
| 865 |
ACTIVE_PAPERS = list(filtered)
|
| 866 |
tbl, choices = build_table(filtered)
|
| 867 |
tot = sum(p.get("citations") or 0 for p in filtered)
|
| 868 |
+
md = ("## " + str(len(filtered)) + "/" + str(len(PAPERS)) + " papers" +
|
| 869 |
+
" | " + str(year_from) + "-" + str(year_to) +
|
| 870 |
+
" | cit " + str(cit_min) + "-" + str(cit_max) +
|
| 871 |
+
" | total " + "{:,}".format(tot) + "\n\n---\n\n" + tbl)
|
| 872 |
+
return md, gr.update(choices=choices, value=choices[0] if choices else None), str(len(filtered)) + "/" + str(len(PAPERS))
|
| 873 |
|
| 874 |
def gr_search_fetched(query):
|
| 875 |
+
if not query or not query.strip(): return "Enter a query."
|
| 876 |
+
if not PAPERS: return "Fetch papers first."
|
| 877 |
results = search_papers(query.strip(), top_k=8)
|
| 878 |
+
if not results: return "No results for: " + query
|
| 879 |
+
NL = "\n"
|
| 880 |
+
md = "## Search: " + query + " — " + str(len(results)) + " results" + NL + NL
|
| 881 |
for r in results:
|
| 882 |
+
p,s = r["paper"], r["score"]
|
| 883 |
+
bar = "green " * round(s*10)
|
| 884 |
+
cit = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else ""
|
| 885 |
+
link = "[View](" + p["url"] + ")"
|
| 886 |
+
pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
|
| 887 |
+
md += ("### " + "{:.0f}".format(s*100) + "% — " + p["title"] + NL + NL +
|
| 888 |
+
", ".join(p["authors"][:2]) + " | " + p["published"] + cit +
|
| 889 |
+
" | " + p.get("source","") + NL + NL +
|
| 890 |
+
"> " + p["abstract"][:350] + "..." + NL + NL +
|
| 891 |
+
link + pdf + NL + NL + "---" + NL + NL)
|
| 892 |
return md
|
| 893 |
|
| 894 |
def _get_paper(choice):
|
| 895 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 896 |
+
try: return pool[int(choice.split(".")[0]) - 1]
|
| 897 |
except: return None
|
| 898 |
|
| 899 |
def gr_explain(choice, lang_choice):
|
| 900 |
+
if not choice: return "Fetch papers and select one."
|
| 901 |
paper = _get_paper(choice)
|
| 902 |
+
if not paper: return "Selection error."
|
| 903 |
+
lang = "ar" if "Arabic" in lang_choice else "en"
|
| 904 |
+
NL = "\n"
|
| 905 |
+
# ✅ FIX: No backslash inside f-string — use concatenation
|
| 906 |
+
pdf_link = (" [PDF](" + paper["pdf_url"] + ")") if paper.get("pdf_url") else ""
|
| 907 |
+
header = ("# " + paper["title"] + NL + NL +
|
| 908 |
+
"**Authors:** " + ", ".join(paper["authors"]) + NL + NL +
|
| 909 |
+
"**Date:** " + paper["published"] +
|
| 910 |
+
" | **Citations:** " + cit_badge(paper.get("citations")) +
|
| 911 |
+
" | **Source:** " + paper.get("source","arXiv") + NL + NL +
|
| 912 |
+
"[View Paper](" + paper["url"] + ")" + pdf_link + NL + NL +
|
| 913 |
+
"---" + NL + NL +
|
| 914 |
+
"> " + paper["abstract"] + NL + NL +
|
| 915 |
+
"---" + NL + NL +
|
| 916 |
+
"## Explanation (Llama 3.3 70B)" + NL + NL)
|
| 917 |
return header + explain_paper(paper, lang)
|
| 918 |
|
| 919 |
def gr_audio(txt, lang_choice):
|
| 920 |
+
if not txt or len(txt) < 50: return None
|
| 921 |
+
return text_to_audio(txt, "ar" if "Arabic" in lang_choice else "en")
|
| 922 |
|
| 923 |
def gr_save_fav(choice):
|
| 924 |
+
if not choice: return "Select a paper first."
|
| 925 |
paper = _get_paper(choice)
|
| 926 |
+
return save_favorite(paper) if paper else "Error."
|
| 927 |
|
| 928 |
def gr_show_favs():
|
| 929 |
favs = load_favorites()
|
| 930 |
+
if not favs: return "No saved papers."
|
| 931 |
+
NL = "\n"
|
| 932 |
+
lines = [("**" + p["title"] + "**" + NL +
|
| 933 |
+
(p["authors"][0] if p["authors"] else "N/A") +
|
| 934 |
+
" | " + p["published"] + " | " + p.get("source","") +
|
| 935 |
+
" | " + cit_badge(p.get("citations")) +
|
| 936 |
+
" | [Link](" + p["url"] + ")")
|
| 937 |
for p in favs]
|
| 938 |
+
return ("### Favorites — " + str(len(favs)) + " papers" + NL + NL +
|
| 939 |
+
(NL + NL + "---" + NL + NL).join(lines))
|
| 940 |
|
| 941 |
def gr_compare(ca, cb, lc):
|
| 942 |
+
if not ca or not cb: return "Select two papers."
|
| 943 |
pa = _get_paper(ca); pb = _get_paper(cb)
|
| 944 |
+
if not pa or not pb: return "Selection error."
|
| 945 |
+
if pa["id"] == pb["id"]: return "Select two different papers."
|
| 946 |
+
return compare_papers(pa, pb, "ar" if "Arabic" in lc else "en")
|
| 947 |
|
| 948 |
def gr_overview(query, lc):
|
| 949 |
+
if not PAPERS: return "Fetch papers first."
|
| 950 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 951 |
+
return ("## Overview\n\n" +
|
| 952 |
+
summarize_papers(pool, query or "research",
|
| 953 |
+
"ar" if "Arabic" in lc else "en"))
|
| 954 |
|
| 955 |
def gr_trends():
|
| 956 |
+
if not PAPERS: return None, "Fetch papers first."
|
| 957 |
return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS)
|
| 958 |
|
| 959 |
def gr_bib(style, progress=gr.Progress()):
|
| 960 |
+
if not PAPERS: return "Fetch papers first.", None
|
| 961 |
+
progress(0.5, desc="Generating...")
|
| 962 |
pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
|
| 963 |
text, path = generate_bibliography(pool, style)
|
| 964 |
progress(1.0)
|
| 965 |
+
short = text[:3000] + ("..." if len(text)>3000 else "")
|
| 966 |
+
return "```\n" + short + "\n```", path
|
| 967 |
|
| 968 |
def gr_chat_fn(message, history):
|
| 969 |
if not message.strip(): return history, ""
|
| 970 |
hd = []
|
| 971 |
for pair in history:
|
| 972 |
+
if pair[0]: hd.append({"role":"user", "content":pair[0]})
|
| 973 |
if pair[1]: hd.append({"role":"assistant","content":pair[1]})
|
| 974 |
history.append((message, chat_about_papers(message, hd)))
|
| 975 |
return history, ""
|
|
|
|
| 981 |
footer{display:none!important}
|
| 982 |
h1{text-align:center}
|
| 983 |
.status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0}
|
| 984 |
+
.legend{font-size:.8rem;color:#cbd5e1;background:#1e293b;
|
| 985 |
+
border-radius:8px;padding:6px 14px;margin-bottom:6px}
|
| 986 |
+
.filter-box{background:#1e293b;border-radius:10px;
|
| 987 |
+
padding:12px 16px;margin-top:8px}
|
| 988 |
+
.gs-box{background:#1e293b;border-radius:10px;padding:14px 18px;
|
| 989 |
+
margin-bottom:10px;border:1px solid #334155}
|
| 990 |
"""
|
| 991 |
|
| 992 |
with gr.Blocks(
|
| 993 |
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
|
| 994 |
+
title="Scientific Paper Discovery v7.4", css=CSS
|
| 995 |
) as demo:
|
| 996 |
|
| 997 |
+
gr.Markdown("# Scientific Paper Discovery v7.4\narXiv · CrossRef · Llama-3.3-70B · FAISS")
|
| 998 |
+
gr.Markdown("Citations: 🥇 >=1000 | 🏆 >=100 | ⭐ >=10 | 📄 <10 | · = 0",
|
| 999 |
+
elem_classes="legend")
|
| 1000 |
+
status_bar = gr.Markdown("No papers loaded yet.", elem_classes="status-bar")
|
|
|
|
| 1001 |
|
| 1002 |
with gr.Tabs():
|
| 1003 |
|
| 1004 |
+
# ── TAB 1: BROWSE ──────────────────────────────────
|
| 1005 |
+
with gr.Tab("Browse / Search"):
|
| 1006 |
with gr.Row():
|
| 1007 |
with gr.Column(scale=3):
|
| 1008 |
+
t_query = gr.Textbox(label="Topic",
|
| 1009 |
placeholder="ARIMA, inflation, LLM...",
|
| 1010 |
value="economic forecasting")
|
| 1011 |
+
t_category = gr.Dropdown(label="Category",
|
| 1012 |
choices=list(CATEGORIES.keys()),
|
| 1013 |
+
value="Economics")
|
| 1014 |
+
t_source = gr.Radio(label="Source",
|
| 1015 |
+
choices=["arXiv","CrossRef","Both"],
|
| 1016 |
value="arXiv")
|
| 1017 |
with gr.Column(scale=1):
|
| 1018 |
+
t_max = gr.Slider(5, 50, value=15, step=5, label="Max papers")
|
| 1019 |
+
t_days = gr.Slider(1, 1500, value=365, step=30, label="Last N days")
|
| 1020 |
+
btn_fetch = gr.Button("Fetch Papers", variant="primary", size="lg")
|
| 1021 |
+
papers_table_md = gr.Markdown("Results appear here.")
|
| 1022 |
+
paper_selector = gr.Dropdown(label="Select paper", choices=[], interactive=True)
|
| 1023 |
with gr.Group(elem_classes="filter-box"):
|
| 1024 |
+
gr.Markdown("### Filter & Sort")
|
| 1025 |
with gr.Row():
|
| 1026 |
+
f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="Year from")
|
| 1027 |
+
f_year_to = gr.Slider(2000,2026,value=2026,step=1,label="Year to")
|
| 1028 |
with gr.Row():
|
| 1029 |
+
f_cit_min = gr.Slider(0,5000,value=0, step=5,label="Citations min")
|
| 1030 |
+
f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="Citations max")
|
| 1031 |
with gr.Row():
|
| 1032 |
f_sort = gr.Dropdown(choices=SORT_CHOICES,
|
| 1033 |
+
value="Most Cited",label="Sort",scale=3)
|
| 1034 |
+
btn_filter = gr.Button("Apply",variant="primary",scale=1)
|
| 1035 |
+
gr.Markdown("---\n### Semantic Search (FAISS — in loaded papers)")
|
| 1036 |
with gr.Row():
|
| 1037 |
+
search_in_box = gr.Textbox(label="Search in loaded papers",
|
| 1038 |
placeholder="ARIMA, transformer...",scale=5)
|
| 1039 |
+
btn_search_in = gr.Button("Search",scale=1)
|
| 1040 |
search_in_out = gr.Markdown()
|
| 1041 |
|
| 1042 |
+
# ── TAB 2: GLOBAL SEARCH ───────────────────────────
|
| 1043 |
+
with gr.Tab("Global Search"):
|
| 1044 |
gr.Markdown(
|
| 1045 |
+
"### Search any paper by title or keywords\n\n"
|
| 1046 |
+
"> Uses arXiv **relevance** sort + CrossRef **title** search.\n"
|
| 1047 |
+
"> Example: `Attention is All You Need`"
|
| 1048 |
)
|
| 1049 |
with gr.Group(elem_classes="gs-box"):
|
| 1050 |
with gr.Row():
|
| 1051 |
gs_query = gr.Textbox(
|
| 1052 |
+
label="Title or keywords",
|
| 1053 |
+
placeholder="Attention is All You Need | ARIMA forecasting ...",
|
| 1054 |
scale=4)
|
| 1055 |
+
gs_source = gr.Radio(label="Source",
|
| 1056 |
+
choices=["arXiv","CrossRef","Both"],
|
| 1057 |
+
value="Both", scale=2)
|
| 1058 |
+
gs_max = gr.Slider(5,30,value=10,step=5,label="Max results",scale=1)
|
| 1059 |
+
btn_gs = gr.Button("Search Now", variant="primary", size="lg")
|
| 1060 |
+
gs_out = gr.Markdown("Enter a title or keywords...")
|
|
|
|
| 1061 |
|
| 1062 |
# ── TAB 3: EXPLAIN ─────────────────────────────────
|
| 1063 |
+
with gr.Tab("Explain"):
|
| 1064 |
with gr.Row():
|
| 1065 |
+
paper_sel2 = gr.Dropdown(label="Select paper",
|
| 1066 |
choices=[], interactive=True, scale=4)
|
| 1067 |
+
lang_exp = gr.Radio(LANG_CHOICES, value="Arabic",
|
| 1068 |
+
label="Language", scale=1)
|
| 1069 |
with gr.Row():
|
| 1070 |
+
btn_explain = gr.Button("Explain", variant="primary")
|
| 1071 |
+
btn_fav = gr.Button("Save Fav")
|
| 1072 |
+
btn_audio = gr.Button("Listen")
|
| 1073 |
+
btn_export_pdf = gr.Button("Export PDF", variant="secondary")
|
| 1074 |
with gr.Row():
|
| 1075 |
fav_status = gr.Markdown()
|
| 1076 |
pdf_status = gr.Markdown()
|
| 1077 |
+
explanation_out = gr.Markdown("Fetch papers and select one.")
|
| 1078 |
+
audio_out = gr.Audio(label="Audio", type="filepath")
|
| 1079 |
+
pdf_out = gr.File(label="Download PDF")
|
| 1080 |
|
| 1081 |
# ── TAB 4: COMPARE ─────────────────────────────────
|
| 1082 |
+
with gr.Tab("Compare"):
|
| 1083 |
with gr.Row():
|
| 1084 |
+
cmp_a = gr.Dropdown(label="Paper A", choices=[], interactive=True)
|
| 1085 |
+
cmp_b = gr.Dropdown(label="Paper B", choices=[], interactive=True)
|
| 1086 |
+
lang_cmp = gr.Radio(LANG_CHOICES, value="Arabic",
|
| 1087 |
+
label="Language", scale=1)
|
| 1088 |
+
btn_compare = gr.Button("Compare", variant="primary")
|
| 1089 |
+
compare_out = gr.Markdown("Select two papers.")
|
| 1090 |
|
| 1091 |
# ── TAB 5: OVERVIEW ────────────────────────────────
|
| 1092 |
+
with gr.Tab("Overview"):
|
| 1093 |
with gr.Row():
|
| 1094 |
+
lang_ov = gr.Radio(LANG_CHOICES, value="Arabic",
|
| 1095 |
+
label="Language", scale=1)
|
| 1096 |
+
btn_overview = gr.Button("Generate Report", variant="primary", scale=3)
|
| 1097 |
+
overview_out = gr.Markdown("Fetch papers first.")
|
| 1098 |
|
| 1099 |
# ── TAB 6: TRENDS ──────────────────────────────────
|
| 1100 |
+
with gr.Tab("Trends"):
|
| 1101 |
+
btn_trends = gr.Button("Analyze Trends", variant="primary", size="lg")
|
| 1102 |
+
trend_chart = gr.Image(label="Trends Dashboard", type="filepath")
|
| 1103 |
+
trend_stats = gr.Markdown("Fetch papers first.")
|
| 1104 |
|
| 1105 |
# ── TAB 7: BIBLIOGRAPHY ────────────────────────────
|
| 1106 |
+
with gr.Tab("Bibliography"):
|
| 1107 |
+
bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"],
|
| 1108 |
+
value="APA", label="Style")
|
| 1109 |
+
btn_bib = gr.Button("Generate Bibliography", variant="primary")
|
| 1110 |
bib_out = gr.Markdown()
|
| 1111 |
+
bib_file = gr.File(label="Download")
|
| 1112 |
|
| 1113 |
# ── TAB 8: FAVORITES ───────────────────────────────
|
| 1114 |
+
with gr.Tab("Favorites"):
|
| 1115 |
+
btn_show_fav = gr.Button("Show Favorites")
|
| 1116 |
+
favs_md = gr.Markdown("Press to show.")
|
| 1117 |
+
btn_export_fav = gr.Button("Export CSV", variant="secondary")
|
| 1118 |
+
fav_csv_file = gr.File(label="CSV File")
|
| 1119 |
|
| 1120 |
# ── TAB 9: AUTO-FETCH ──────────────────────────────
|
| 1121 |
+
with gr.Tab("Auto-Fetch"):
|
| 1122 |
with gr.Row():
|
| 1123 |
+
auto_q = gr.Textbox(label="Topic",
|
| 1124 |
value="economic forecasting", scale=3)
|
| 1125 |
+
auto_cat = gr.Dropdown(label="Category",
|
| 1126 |
choices=list(CATEGORIES.keys()),
|
| 1127 |
+
value="Economics", scale=2)
|
| 1128 |
auto_interval = gr.Slider(5,120,value=60,step=5,
|
| 1129 |
+
label="Every (min)",scale=1)
|
| 1130 |
with gr.Row():
|
| 1131 |
+
btn_start_auto = gr.Button("Start", variant="primary")
|
| 1132 |
+
btn_stop_auto = gr.Button("Stop", variant="stop")
|
| 1133 |
+
btn_refresh_log = gr.Button("Refresh Log")
|
| 1134 |
auto_status = gr.Markdown()
|
| 1135 |
+
auto_log_md = gr.Markdown("No log.")
|
| 1136 |
|
| 1137 |
# ── TAB 10: CHAT ───────────────────────────────────
|
| 1138 |
+
with gr.Tab("Chat"):
|
| 1139 |
+
chatbot_ui = gr.Chatbot(label="Research Assistant",
|
| 1140 |
+
height=480, bubble_full_width=False)
|
| 1141 |
with gr.Row():
|
| 1142 |
+
chat_in = gr.Textbox(label="Question", scale=5,
|
| 1143 |
+
placeholder="Key findings? | ما أبرز النتائج؟")
|
| 1144 |
+
btn_send = gr.Button("Send", variant="primary", scale=1)
|
| 1145 |
+
btn_clear = gr.Button("Clear", size="sm")
|
| 1146 |
|
| 1147 |
# ── TAB 11: ABOUT ──────────────────────────────────
|
| 1148 |
+
with gr.Tab("About"):
|
| 1149 |
gr.Markdown("""
|
| 1150 |
+
## Scientific Paper Discovery — v7.4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1151 |
|
| 1152 |
+
### Search Mode Comparison
|
| 1153 |
+
| Mode | sortBy | Best for |
|
| 1154 |
|---|---|---|
|
| 1155 |
+
| Browse tab | `submittedDate` | Latest papers on a topic |
|
| 1156 |
+
| Global Search | `relevance` + `ti:` | Finding a paper by exact title |
|
| 1157 |
+
| FAISS (internal) | Cosine similarity | Semantic search in loaded papers |
|
| 1158 |
+
|
| 1159 |
+
### v7.4 Fixes
|
| 1160 |
+
- **arXiv Global Search** now uses `sortBy=relevance` + `ti:"..."` prefix
|
| 1161 |
+
- **CrossRef Global Search** now uses `query.title` for precise title matching
|
| 1162 |
+
- **SyntaxError fix**: removed backslashes from inside f-strings
|
| 1163 |
""")
|
| 1164 |
|
| 1165 |
# ── WIRING ──────────────────────────────────────────────
|
|
|
|
| 1181 |
btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
|
| 1182 |
gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
|
| 1183 |
|
| 1184 |
+
btn_explain.click(gr_explain, inputs=[paper_sel2, lang_exp], outputs=[explanation_out])
|
| 1185 |
+
btn_fav.click(gr_save_fav, inputs=[paper_sel2], outputs=[fav_status])
|
| 1186 |
+
btn_audio.click(gr_audio, inputs=[explanation_out, lang_exp], outputs=[audio_out])
|
| 1187 |
btn_export_pdf.click(gr_export_pdf,
|
| 1188 |
inputs=[explanation_out, paper_sel2],
|
| 1189 |
outputs=[pdf_out, pdf_status])
|
| 1190 |
|
| 1191 |
+
btn_compare.click(gr_compare, inputs=[cmp_a, cmp_b, lang_cmp], outputs=[compare_out])
|
| 1192 |
+
btn_overview.click(gr_overview, inputs=[t_query, lang_ov], outputs=[overview_out])
|
| 1193 |
btn_trends.click(gr_trends, outputs=[trend_chart, trend_stats])
|
| 1194 |
+
btn_bib.click(gr_bib, inputs=[bib_style], outputs=[bib_out, bib_file])
|
| 1195 |
|
| 1196 |
btn_show_fav.click(gr_show_favs, outputs=[favs_md])
|
| 1197 |
btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file])
|