# ================================================================ # Scientific Paper Discovery Bot v7.4 โ€” SyntaxError FIXED # ================================================================ import os, re, time, json, pickle, threading import requests import xml.etree.ElementTree as ET from datetime import datetime, timedelta from collections import Counter import numpy as np import faiss import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import gradio as gr from sentence_transformers import SentenceTransformer from groq import Groq from gtts import gTTS from langdetect import detect, DetectorFactory from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import cm from reportlab.lib import colors from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable DetectorFactory.seed = 0 GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "") S2_API_KEY = os.environ.get("S2_API_KEY", "") groq_client = Groq(api_key=GROQ_API_KEY) print("Loading embedder...") embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") _ = embedder.encode(["warmup"]) print("Embedder ready!") PAPERS = [] ACTIVE_PAPERS = [] FAISS_INDEX = None AUTO_RUNNING = False AUTO_LOG = [] CURRENT_YEAR = datetime.now().year PERSIST_DIR = "/tmp" FAVORITES_PATH = PERSIST_DIR + "/favorites.pkl" SEEN_IDS_PATH = PERSIST_DIR + "/seen_ids.json" os.makedirs(PERSIST_DIR, exist_ok=True) CATEGORIES = { "๐ŸŒ All": "", "๐Ÿ“Š Economics": "econ", "๐Ÿ’ฐ Quant Finance": "q-fin", "๐Ÿค– AI": "cs.AI", "๐Ÿง  Machine Learning":"cs.LG", "๐Ÿ’ฌ NLP": "cs.CL", "๐Ÿ“ˆ Statistics": "stat", "๐Ÿ”ฌ Biology": "q-bio", "โš›๏ธ Physics": "physics", "๐Ÿ“ Mathematics": "math", "๐Ÿ’ป Computer Science":"cs", } CROSSREF_SUBJECTS = { "๐ŸŒ All": "", "๐Ÿ“Š Economics": "economics", "๐Ÿ’ฐ Quant Finance": "finance", "๐Ÿค– AI": "artificial intelligence", "๐Ÿง  Machine Learning":"machine learning", "๐Ÿ’ฌ NLP": "natural language processing", "๐Ÿ“ˆ Statistics": "statistics", "๐Ÿ”ฌ Biology": "biology", "โš›๏ธ Physics": "physics", "๐Ÿ“ Mathematics": "mathematics", "๐Ÿ’ป Computer Science":"computer science", } LANG_CHOICES = ["Arabic", "English"] SORT_CHOICES = ["Newest", "Oldest", "Most Cited", "Least Cited"] AR_RULES = """ - ุงุจุฏุฃ ูƒู„ ู‚ุณู… ุจู€ ## ู…ุน ุณุทุฑ ูุงุฑุบ ู‚ุจู„ู‡ ูˆุจุนุฏู‡ - ุงูƒุชุจ ูƒู„ ู‚ุณู… ููŠ ูู‚ุฑุฉ 3-4 ุฌู…ู„ ุจุงู„ุนุฑุจูŠุฉ ุงู„ูุตุญู‰ - ู„ุง ุชูƒุฑุฑ ุนู†ูˆุงู† ุงู„ู‚ุณู… ุฏุงุฎู„ ุงู„ู†ุต """ # ================================================================ # HELPERS # ================================================================ def detect_lang(text): try: return "ar" if detect(str(text)[:300]).startswith("ar") else "en" except: return "en" def clean_md(text): text = re.sub(r"[#*`>\[\]!_~]", "", text) return re.sub(r"\n+", " ", text).strip()[:2500] def fix_ar_format(text): text = re.sub(r"\n(##)", r"\n\n\1", text) text = re.sub(r"(## [^\n]+)\n([^\n#])", r"\1\n\n\2", text) return re.sub(r"\n{3,}", "\n\n", text).strip() def cit_badge(n): if n is None or n == "": return "โ€”" n = int(n) if n >= 1000: return "๐Ÿฅ‡ " + "{:,}".format(n) if n >= 100: return "๐Ÿ† " + "{:,}".format(n) if n >= 10: return "โญ " + "{:,}".format(n) if n > 0: return "๐Ÿ“„ " + str(n) return "ยท" def build_table(papers_list): rows = "| # | Title | Author | Date | Citations | Source |\n" rows += "|---|---|---|---|---|---|\n" choices = [] for i, p in enumerate(papers_list): first = p["authors"][0] if p["authors"] else "N/A" badge = "NEW" if p.get("recent") else "๐Ÿ“„" rows += "| {} | {} {} | {} | {} | {} | {} |\n".format( i+1, badge, p["title"], first, p["published"], cit_badge(p.get("citations")), p.get("source","arXiv")) choices.append("{}. {}".format(i+1, p["title"])) return rows, choices def s2_headers(): h = {"User-Agent": "ScientificPaperBot/7.4"} if S2_API_KEY: h["x-api-key"] = S2_API_KEY return h def cr_headers(): return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"} # ================================================================ # CrossRef date parser โ€” rejects garbage years # ================================================================ def parse_crossref_date(item): for field in ["issued", "published", "published-print", "published-online", "created"]: dp = (item.get(field) or {}).get("date-parts", [[]]) if not dp or not dp[0]: continue pts = dp[0] try: year = int(pts[0]) if not (1900 <= year <= CURRENT_YEAR + 1): continue month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1)) day = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1)) return "{:04d}-{:02d}-{:02d}".format(year, month, day) except (ValueError, TypeError, IndexError): continue return "N/A" # ================================================================ # SEEN / FAVORITES # ================================================================ def load_seen_ids(): try: with open(SEEN_IDS_PATH) as f: return set(json.load(f)) except: return set() def save_seen_ids(ids): with open(SEEN_IDS_PATH, "w") as f: json.dump(list(ids), f) def load_favorites(): try: with open(FAVORITES_PATH, "rb") as f: return pickle.load(f) except: return [] def save_favorite(paper): favs = load_favorites() if paper["id"] not in {p["id"] for p in favs}: favs.append(paper) with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f) return "Saved: " + paper["title"] return "Already saved." def export_favorites_csv(): favs = load_favorites() if not favs: return None df = pd.DataFrame([{ "Title": p["title"], "Authors": ", ".join(p["authors"][:3]), "Date": p["published"], "Citations": p.get("citations","N/A"), "URL": p["url"], "Source": p.get("source","arXiv") } for p in favs]) path = PERSIST_DIR + "/favorites.csv" df.to_csv(path, index=False, encoding="utf-8-sig") return path def gr_export_fav(): return export_favorites_csv() # ================================================================ # PDF EXPORT # ================================================================ def export_explanation_pdf(explanation_text, paper_title="paper"): if not explanation_text or len(explanation_text) < 30: return None safe = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_") path = PERSIST_DIR + "/explanation_" + safe + ".pdf" doc = SimpleDocTemplate(path, pagesize=A4, rightMargin=2*cm, leftMargin=2*cm, topMargin=2*cm, bottomMargin=2*cm) styles = getSampleStyleSheet() h2_style = ParagraphStyle("H2", parent=styles["Heading2"], fontSize=11, textColor=colors.HexColor("#2563eb"), spaceBefore=14, spaceAfter=6) bd_style = ParagraphStyle("BD", parent=styles["Normal"], fontSize=10, leading=16, spaceAfter=8) mt_style = ParagraphStyle("MT", parent=styles["Normal"], fontSize=9, textColor=colors.HexColor("#64748b")) story = [] for line in explanation_text.split("\n"): line = line.strip() if not line: story.append(Spacer(1, 6)); continue clean = re.sub(r"\*\*(.+?)\*\*", r"\1", line) clean = re.sub(r"\*(.+?)\*", r"\1", clean) clean = re.sub(r"`(.+?)`", r"\1", clean) clean = re.sub(r"^#{1,6}\s*", "", clean) clean = re.sub(r"[๐ŸŽฏโ“๐Ÿ”ง๐Ÿ“Š๐ŸŒŸ๐Ÿ”—๐Ÿ“„๐Ÿ‘ฅ๐Ÿ“…๐Ÿ“ก๐Ÿค–#*_~]", "", clean).strip() if not clean: continue if line.startswith("## ") or line.startswith("# "): story.append(HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0"), spaceAfter=4)) story.append(Paragraph(clean, h2_style)) elif line.startswith(">"): q_st = ParagraphStyle("Q", parent=styles["Normal"], fontSize=9, leftIndent=20, textColor=colors.HexColor("#475569"), leading=14) story.append(Paragraph( re.sub(r"[๐ŸŽฏโ“๐Ÿ”ง๐Ÿ“Š๐ŸŒŸ๐Ÿ”—๐Ÿ“„๐Ÿ‘ฅ๐Ÿ“…๐Ÿ“ก๐Ÿค–#*_~]","",line.lstrip(">").strip()), q_st)) else: story.append(Paragraph(clean, bd_style)) story += [ Spacer(1, 20), HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0")), Paragraph("Generated by Paper Discovery v7.4 โ€” " + datetime.now().strftime("%Y-%m-%d %H:%M"), mt_style) ] try: doc.build(story); return path except Exception as e: print("PDF error: " + str(e)); return None def gr_export_pdf(explanation_text, choice): if not explanation_text or len(explanation_text) < 50: return None, "Explain a paper first." title = choice.split(". ", 1)[-1] if choice else "paper" path = export_explanation_pdf(explanation_text, title) return (path, "PDF ready!") if path else (None, "PDF failed.") # ================================================================ # SOURCE 1 โ€” arXiv # KEY FIX: sort_by parameter # Browse โ†’ "submittedDate" latest papers # Global โ†’ "relevance" exact title match # ================================================================ def fetch_arxiv_papers(query, category, max_results=20, days_back=365, sort_by="submittedDate"): parts = [] words = query.strip().split() if len(words) >= 3 and sort_by == "relevance": parts.append('ti:"' + query.strip() + '"') elif query.strip(): parts.append("all:" + query.strip()) if category.strip(): parts.append("cat:" + category.strip()) sq = " AND ".join(parts) if parts else "all:machine learning" params = { "search_query": sq, "start": 0, "max_results": max_results, "sortBy": sort_by, "sortOrder": "descending", } try: resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30) resp.raise_for_status() except Exception as e: print("arXiv error: " + str(e)); return [] ns_a = "http://www.w3.org/2005/Atom" ns_x = "http://arxiv.org/schemas/atom" root = ET.fromstring(resp.content) cutoff = datetime.now() - timedelta(days=days_back) papers = [] for entry in root.findall("{" + ns_a + "}entry"): try: pid = entry.find("{" + ns_a + "}id").text.split("/abs/")[-1].strip() title = entry.find("{" + ns_a + "}title").text.strip().replace("\n"," ") abstract = entry.find("{" + ns_a + "}summary").text.strip().replace("\n"," ") published = entry.find("{" + ns_a + "}published").text[:10] authors = [a.find("{" + ns_a + "}name").text for a in entry.findall("{" + ns_a + "}author")] cats = set() pc = entry.find("{" + ns_x + "}primary_category") if pc is not None: cats.add(pc.get("term","")) for c in entry.findall("{" + ns_x + "}category"): cats.add(c.get("term","")) cats.discard("") papers.append({ "id": pid, "title": title, "authors": authors[:6], "abstract": abstract[:1200], "published": published, "categories": list(cats)[:4], "citations": None, "url": "https://arxiv.org/abs/" + pid, "pdf_url": "https://arxiv.org/pdf/" + pid, "recent": datetime.strptime(published, "%Y-%m-%d") >= cutoff, "source": "arXiv", }) except Exception as e: print("arXiv parse: " + str(e)) return papers # ================================================================ # SOURCE 2 โ€” CrossRef # ================================================================ def fetch_crossref_papers(query, category_label="", max_results=20, days_back=365, use_title=False): subject = CROSSREF_SUBJECTS.get(category_label, "") full_query = (query + " " + subject).strip() if subject else query key = "query.title" if use_title else "query" params = { key: full_query, "rows": min(max_results * 3, 200), "sort": "relevance", "select": ("title,author,abstract,published,published-print," "published-online,issued,created,DOI," "is-referenced-by-count,link,subject"), } items = [] for attempt in range(3): try: r = requests.get("https://api.crossref.org/works", params=params, headers=cr_headers(), timeout=30) if r.status_code == 200: items = r.json().get("message",{}).get("items",[]); break if r.status_code == 429: time.sleep(2**attempt); continue print("CrossRef " + str(r.status_code)); return [] except Exception as e: print("CrossRef attempt " + str(attempt) + ": " + str(e)); time.sleep(1) cutoff = datetime.now() - timedelta(days=days_back) papers, seen_ids = [], set() for item in items: if len(papers) >= max_results: break title_list = item.get("title", []) if not title_list: continue title = title_list[0].strip() if not title or title.lower().startswith("title pending"): continue pub = parse_crossref_date(item) if pub == "N/A": continue cit = int(item.get("is-referenced-by-count", 0) or 0) authors = [ (a.get("given","") + " " + a.get("family","")).strip() for a in item.get("author",[])[:6] ] authors = [a for a in authors if a.strip()] or ["Unknown"] abstract = re.sub(r"<[^>]+>","", item.get("abstract","No abstract.")).strip()[:1200] doi = item.get("DOI","") url = "https://doi.org/" + doi if doi else "#" pid = doi or re.sub(r"\W","",title)[:40] if pid in seen_ids: continue seen_ids.add(pid) pdf_url = next((l.get("URL","") for l in item.get("link",[]) if "pdf" in l.get("content-type","").lower()), "") try: recent = datetime.strptime(pub[:10], "%Y-%m-%d") >= cutoff except: recent = False papers.append({ "id": pid, "title": title, "authors": authors, "abstract": abstract, "published": pub[:10], "categories": item.get("subject",[])[:3], "citations": cit, "url": url, "pdf_url": pdf_url, "recent": recent, "source": "CrossRef", }) papers.sort(key=lambda x: x["citations"], reverse=True) return papers # ================================================================ # GLOBAL PAPER SEARCH โ€” relevance sorted # ================================================================ def global_paper_search(query, source_choice, max_results=10): if not query or not query.strip(): return "Enter a title or keywords." q = query.strip(); papers = [] if source_choice in ("arXiv", "Both"): papers += fetch_arxiv_papers(q, "", int(max_results), 3650, sort_by="relevance") if source_choice in ("CrossRef", "Both"): papers += fetch_crossref_papers(q, "", int(max_results), 3650, use_title=True) if not papers: return "No results for: " + q seen, unique = set(), [] for p in papers: key = re.sub(r"\W","",p["title"].lower())[:60] if key not in seen: seen.add(key); unique.append(p) unique.sort(key=lambda x: x.get("citations") or 0, reverse=True) NL = "\n" md = "## Search Results: " + q + NL + NL md += "**" + str(len(unique)) + " papers found**" + NL + NL + "---" + NL + NL for i, p in enumerate(unique, 1): cit = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else "" cats = " | ".join(p.get("categories",[])[:2]) auth = ", ".join(p["authors"][:3]) abst = p["abstract"][:450] link = "[View](" + p["url"] + ")" pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else "" src = p.get("source","") md += ("### " + str(i) + ". " + p["title"] + NL + NL + auth + " | " + p["published"] + cit + " | " + src + (" | " + cats if cats else "") + NL + NL + "> " + abst + "..." + NL + NL + link + pdf + NL + NL + "---" + NL + NL) return md # ================================================================ # CITATION ENGINE โ€” 3-layer # ================================================================ def enrich_citations(papers): arxiv_papers = [p for p in papers if p.get("source")=="arXiv" and (p.get("citations") is None or p.get("citations")==0)] if not arxiv_papers: for p in papers: if p.get("citations") is None: p["citations"] = 0 return papers id_map, batch_ids = {}, [] for p in arxiv_papers: clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip()) id_map[clean] = p batch_ids.append("arXiv:" + clean) for i in range(0, len(batch_ids), 500): try: r = requests.post( "https://api.semanticscholar.org/graph/v1/paper/batch", json={"ids": batch_ids[i:i+500]}, params={"fields":"citationCount,externalIds"}, headers=s2_headers(), timeout=30) if r.status_code == 200: for item in r.json(): if not item: continue ext = item.get("externalIds") or {} clean = re.sub(r"v\d+$","", ext.get("ArXiv","").split("/")[-1].strip()) if clean and clean in id_map: c = item.get("citationCount") if c is not None: id_map[clean]["citations"] = int(c) elif r.status_code == 429: time.sleep(4) except Exception as e: print("S2 batch: " + str(e)) for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]: clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip()) for attempt in range(2): try: r = requests.get( "https://api.semanticscholar.org/graph/v1/paper/arXiv:" + clean, params={"fields":"citationCount"}, headers=s2_headers(), timeout=10) if r.status_code == 200: c = r.json().get("citationCount") p["citations"] = int(c) if c else 0; break if r.status_code == 429: time.sleep(2**attempt); continue p["citations"] = 0; break except: p["citations"] = 0; break time.sleep(0.12) for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]: try: r = requests.get("https://api.crossref.org/works", params={"query.title": p["title"], "rows": 1, "select": "is-referenced-by-count,title"}, headers=cr_headers(), timeout=8) if r.status_code == 200: items = r.json().get("message",{}).get("items",[]) if items: found = (items[0].get("title") or [""])[0].lower() qw = set(p["title"].lower().split()[:5]) fw = set(found.split()[:10]) p["citations"] = ( int(items[0].get("is-referenced-by-count",0) or 0) if len(qw & fw) >= 2 else 0) else: p["citations"] = 0 else: p["citations"] = 0 time.sleep(0.12) except: p["citations"] = 0 for p in papers: if p.get("citations") is None: p["citations"] = 0 return papers # ================================================================ # FAISS # ================================================================ def build_papers_index(papers): global FAISS_INDEX, PAPERS PAPERS = papers if not papers: FAISS_INDEX = None; return texts = [p["title"] + " " + p["abstract"] for p in papers] embs = embedder.encode(texts, convert_to_numpy=True, normalize_embeddings=True).astype("float32") idx = faiss.IndexFlatIP(embs.shape[1]) idx.add(embs) FAISS_INDEX = idx def search_papers(query, top_k=5): if FAISS_INDEX is None or not PAPERS: return [] qe = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype("float32") scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS))) return [{"paper": PAPERS[i], "score": float(s)} for s, i in zip(scores[0], ids[0]) if i >= 0 and float(s) > 0.1] # ================================================================ # AUTO-FETCH # ================================================================ def auto_fetch_worker(query, category, interval): global AUTO_RUNNING while AUTO_RUNNING: time.sleep(interval) if not AUTO_RUNNING: break papers = fetch_arxiv_papers(query, category, 30, 1) seen = load_seen_ids() new_ps = [p for p in papers if p["id"] not in seen] if new_ps: save_seen_ids(seen | {p["id"] for p in papers}) AUTO_LOG.append( "[" + datetime.now().strftime("%H:%M") + "] NEW " + str(len(new_ps)) + " โ€” " + query) if len(AUTO_LOG) > 20: AUTO_LOG.pop(0) def start_auto_fetch(query, cat_label, interval_min): global AUTO_RUNNING if AUTO_RUNNING: return "Already running." AUTO_RUNNING = True threading.Thread( target=auto_fetch_worker, args=(query, CATEGORIES.get(cat_label,""), int(interval_min)*60), daemon=True).start() return "Auto-fetch started every " + str(interval_min) + " min for: " + query def stop_auto_fetch(): global AUTO_RUNNING; AUTO_RUNNING = False; return "Stopped." def get_auto_log(): return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "No log." # ================================================================ # TRENDS # ================================================================ def analyze_trends(papers): if not papers: return None, "No papers." date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A") stopwords = {"the","a","an","of","in","for","on","with","and","or","to","using", "based","via","from","by","is","are","our","we","this","that","which", "towards","approach","method","new","into","over","learning","deep", "model","models","data","neural","large","language","paper","study", "analysis","results","show","also","can","used","two","its","their"} all_words = [w.lower() for p in papers for w in re.findall(r"[a-zA-Z]{4,}", p["title"]) if w.lower() not in stopwords] top_words = Counter(all_words).most_common(15) sources = Counter(p.get("source","arXiv") for p in papers) cit_papers = [p for p in papers if (p.get("citations") or 0)>0] top_cited = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:10] all_auth = [a for p in papers for a in p["authors"][:3]] top_authors = Counter(all_auth).most_common(10) cvals = [p["citations"] for p in cit_papers] buckets = [0,1,5,10,50,100,500,10000] blabels = ["0","1-4","5-9","10-49","50-99","100-499","500+"] bcounts = ([sum(1 for c in cvals if buckets[i]<=c 2: z = np.polyfit(range(len(cs)), cs, 1) ax.plot(ms, np.poly1d(z)(range(len(cs))), "--", color="#f59e0b", lw=1.5, alpha=.8, label="Trend") ax.legend(fontsize=8, facecolor=PNL, labelcolor=W) ax.set_title("Papers per Month", color=W, fontsize=12, fontweight="bold", pad=10) ax.set_ylabel("Count", color=W, fontsize=9) ax.tick_params(rotation=45) ax = axes[0,1]; style(ax) if top_words: wds,wcts = zip(*top_words) ax.barh(list(wds), list(wcts), color=C[:len(wds)], edgecolor="#475569", lw=.6) for b,c in zip(ax.patches, wcts): ax.text(b.get_width()+.1, b.get_y()+b.get_height()/2, str(c), va="center", color=W, fontsize=8) ax.set_title("Top Keywords", color=W, fontsize=12, fontweight="bold", pad=10) ax.set_xlabel("Frequency", color=W, fontsize=9) ax = axes[0,2]; ax.set_facecolor(PNL) if sources: sl,sv = zip(*sources.items()) _,txts,ats = ax.pie(sv, labels=sl, autopct="%1.0f%%", colors=C[:len(sl)], startangle=90, textprops={"color":W,"fontsize":10}, wedgeprops={"edgecolor":BR,"linewidth":1.5}) for at in ats: at.set_color(W); at.set_fontsize(9) ax.set_title("Source Distribution", color=W, fontsize=12, fontweight="bold", pad=10) ax = axes[1,0]; style(ax) if top_cited: lbls = [(p["title"][:35]+"..." if len(p["title"])>35 else p["title"]) for p in top_cited] cv = [p["citations"] for p in top_cited] ax.barh(lbls[::-1], cv[::-1], color=C[1], edgecolor="#475569", lw=.6) mx = max(cv) if cv else 1 for b,c in zip(ax.patches, cv[::-1]): ax.text(b.get_width()+mx*.01, b.get_y()+b.get_height()/2, "{:,}".format(c), va="center", color=W, fontsize=8) ax.set_xlabel("Citations", color=W, fontsize=9) else: ax.text(.5,.5,"No citation data", ha="center", va="center", color="#94a3b8", fontsize=11, transform=ax.transAxes) ax.set_title("Top 10 Cited", color=W, fontsize=12, fontweight="bold", pad=10) ax = axes[1,1]; style(ax) if any(bcounts): ax.bar(blabels, bcounts, color=C[2], edgecolor="#475569", lw=.8) for b,c in zip(ax.patches, bcounts): if c > 0: ax.text(b.get_x()+b.get_width()/2, b.get_height()+.1, str(c), ha="center", va="bottom", color=W, fontsize=9) ax.set_xlabel("Citation Range", color=W, fontsize=9) ax.set_ylabel("Papers", color=W, fontsize=9) ax.annotate("Avg " + str(avg_cit) + " | Total " + "{:,}".format(total_cit), xy=(.98,.96), xycoords="axes fraction", ha="right", va="top", color="#94a3b8", fontsize=8) else: ax.text(.5,.5,"No citation data", ha="center", va="center", color="#94a3b8", fontsize=11, transform=ax.transAxes) ax.set_title("Citation Distribution", color=W, fontsize=12, fontweight="bold", pad=10) ax = axes[1,2]; style(ax) if top_authors: an,ac = zip(*top_authors) ax.barh(list(an)[::-1], list(ac)[::-1], color=C[3], edgecolor="#475569", lw=.6) for b,c in zip(ax.patches, list(ac)[::-1]): ax.text(b.get_width()+.05, b.get_y()+b.get_height()/2, str(c), va="center", color=W, fontsize=8) ax.set_xlabel("Papers", color=W, fontsize=9) ax.set_title("Top Authors", color=W, fontsize=12, fontweight="bold", pad=10) plt.tight_layout(pad=3) path = PERSIST_DIR + "/trends.png" plt.savefig(path, bbox_inches="tight", dpi=150, facecolor=BG) plt.close() top5 = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:5] stats = ("### Stats\n\n| Metric | Value |\n|---|---|\n" + "| Total | **" + str(len(papers)) + "** |\n" + "| New | **" + str(sum(1 for p in papers if p.get("recent"))) + "** |\n" + "| Citations | **" + "{:,}".format(total_cit) + "** |\n" + "| Average | **" + str(avg_cit) + "** |\n\n") if top5: stats += "### Top Cited\n\n" for i,p in enumerate(top5,1): stats += (str(i) + ". [" + p["title"] + "](" + p["url"] + ")" + " โ€” **" + "{:,}".format(p["citations"]) + "**\n\n") return path, stats # ================================================================ # LLM # ================================================================ def _llm(messages, max_tokens=1200): try: r = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=messages, temperature=0.3, max_tokens=max_tokens) return r.choices[0].message.content.strip() except Exception as e: return "LLM Error: " + str(e) def explain_paper(paper, lang="ar"): cit = paper.get("citations","N/A") if lang == "ar": return fix_ar_format(_llm([ {"role":"system","content": "ุฃู†ุช ุฎุจูŠุฑ ุฃูƒุงุฏูŠู…ูŠ ูŠุดุฑุญ ุงู„ุฃุจุญุงุซ ุจุงู„ุนุฑุจูŠุฉ ุงู„ูุตุญู‰.\n" + AR_RULES}, {"role":"user","content": "ุงุดุฑุญ ุงู„ูˆุฑู‚ุฉ:\nุงู„ุนู†ูˆุงู†: " + paper["title"] + "\n" + "ุงู„ู…ุคู„ููˆู†: " + ", ".join(paper["authors"][:3]) + "\n" + "ุงู„ุชุงุฑูŠุฎ: " + paper["published"] + " | ุงู„ุงู‚ุชุจุงุณุงุช: " + str(cit) + "\n" + "ุงู„ู…ู„ุฎุต: " + paper["abstract"] + "\n\n" + "## ู…ูˆุถูˆุน ุงู„ูˆุฑู‚ุฉ\n\n## ุงู„ู…ุดูƒู„ุฉ\n\n## ุงู„ู…ู†ู‡ุฌูŠุฉ\n\n" + "## ุงู„ู†ุชุงุฆุฌ\n\n## ุงู„ุฃู‡ู…ูŠุฉ\n\n## ุงู„ุชุทุจูŠู‚ุงุช"}])) return _llm([{"role":"user","content": "Explain:\nTitle: " + paper["title"] + "\nAuthors: " + ", ".join(paper["authors"][:3]) + "\nDate: " + paper["published"] + " | Citations: " + str(cit) + "\nAbstract: " + paper["abstract"] + "\n\n" + "## Topic\n## Problem\n## Methodology\n## Findings\n## Contribution\n## Applications"}]) def compare_papers(pa, pb, lang="ar"): body = ("Paper A: " + pa["title"] + " | Citations: " + str(pa.get("citations","N/A")) + "\n" + pa["abstract"][:500] + "\n\nPaper B: " + pb["title"] + " | Citations: " + str(pb.get("citations","N/A")) + "\n" + pb["abstract"][:500]) if lang == "ar": return fix_ar_format(_llm([{"role":"user","content": "ู‚ุงุฑู† ุจูŠู† ุงู„ูˆุฑู‚ุชูŠู†.\n" + AR_RULES + "\n\n" + body + "\n\n" + "## ุงู„ู‡ุฏู\n\n## ุงู„ู…ู†ู‡ุฌูŠุฉ\n\n## ุงู„ู†ุชุงุฆุฌ\n\n" + "## ุงู„ู‚ูˆุฉ\n\n## ุงู„ู‚ูŠูˆุฏ\n\n## ุงู„ุฎู„ุงุตุฉ"}], 1400)) return _llm([{"role":"user","content": "Compare:\n" + body + "\n\n" + "## Topic\n## Methodology\n## Results\n## Strengths\n## Limits\n## Verdict"}], 1400) def summarize_papers(papers, topic, lang="ar"): text = "".join( str(i) + ". " + p["title"] + " (" + p["published"] + "): " + p["abstract"][:300] + "...\n\n" for i,p in enumerate(papers[:8],1)) if lang == "ar": return fix_ar_format(_llm([{"role":"user","content": "ู†ุธุฑุฉ ุนุงู…ุฉ ุฃูƒุงุฏูŠู…ูŠุฉ ุญูˆู„ \"" + topic + "\".\n" + AR_RULES + "\n\n" + text + "\n\n" + "## ุงู„ุงุชุฌุงู‡ุงุช\n\n## ุฃุจุฑุฒ ุงู„ุฃูˆุฑุงู‚\n\n" + "## ุงู„ู…ูˆุงุถูŠุน ุงู„ู…ุดุชุฑูƒุฉ\n\n## ุงู„ูุฌูˆุงุช"}], 900)) return _llm([{"role":"user","content": "Academic overview of \"" + topic + "\":\n" + text + "\n\n" + "## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900) def generate_bibliography(papers, style="APA"): entries = [] for i,p in enumerate(papers,1): auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "") year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d." t,u = p["title"], p["url"] if style == "APA": entries.append(str(i) + ". " + auth + " (" + year + "). *" + t + "*. " + u) elif style == "IEEE": ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "") entries.append("[" + str(i) + "] " + ae + ', "' + t + '," ' + year + ". [Online]: " + u) elif style == "Chicago": entries.append(str(i) + ". " + auth + '. "' + t + '." (' + year + "). " + u) else: key = re.sub(r"\W","", (p["authors"][0].split()[-1] if p["authors"] else "Auth")) + year entries.append("@article{" + key + str(i) + ",\n title={" + t + "},\n author={" + auth + "},\n year={" + year + "},\n url={" + u + "}\n}") bib = "\n\n".join(entries) path = PERSIST_DIR + "/bibliography_" + style + ".txt" with open(path, "w", encoding="utf-8") as f: f.write(bib) return bib, path def chat_about_papers(question, history): if not PAPERS: return ("ูŠุฑุฌู‰ ุฌู„ุจ ุงู„ุฃูˆุฑุงู‚ ุฃูˆู„ุงู‹." if detect_lang(question)=="ar" else "Fetch papers first.") lang = detect_lang(question) relevant = search_papers(question, top_k=4) context = "" if relevant: context = ("ุงู„ุฃูˆุฑุงู‚ ุฐุงุช ุงู„ุตู„ุฉ:\n\n" if lang=="ar" else "Relevant papers:\n\n") for r in relevant: p = r["paper"] cit = (" | " + str(p["citations"]) + " citations") if p.get("citations") else "" context += ("**" + p["title"] + "** (" + p["published"] + ")" + cit + "\n" + p["abstract"][:400] + "\n๐Ÿ”— " + p["url"] + "\n\n") sys_msg = (("ุฃู†ุช ู…ุณุงุนุฏ ุจุญุซูŠ. ุฃุฌุจ ุจุงู„ุนุฑุจูŠุฉ ุงู„ูุตุญู‰.\n" + AR_RULES) if lang=="ar" else "You are an academic assistant. Answer in English.") msgs = [{"role":"system","content":sys_msg}] for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]}) msgs.append({"role":"user","content": (context + "\nุณุคุงู„: " + question) if context else question}) out = _llm(msgs, 800) return fix_ar_format(out) if lang=="ar" else out def text_to_audio(text, lang="ar"): clean = clean_md(text) if not clean: return None try: tts = gTTS(text=clean, lang=lang, slow=False) path = PERSIST_DIR + "/audio_" + lang + ".mp3" tts.save(path); return path except Exception as e: print("TTS: " + str(e)); return None # ================================================================ # GRADIO HANDLERS # ================================================================ def gr_fetch(query, category_label, max_results, days_back, source_choice, progress=gr.Progress()): global ACTIVE_PAPERS progress(0.05, desc="Connecting...") papers, warn = [], "" if source_choice in ("arXiv", "Both"): progress(0.15, desc="Fetching arXiv...") papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""), int(max_results), int(days_back), sort_by="submittedDate") if source_choice in ("CrossRef", "Both"): progress(0.35, desc="Fetching CrossRef...") cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back)) if not cr: warn = "\n\n> CrossRef: no results." papers += cr seen, unique = set(), [] for p in papers: key = re.sub(r"\W","",p["title"].lower())[:60] if key not in seen: seen.add(key); unique.append(p) papers = unique if not papers: return ("No results." + warn, gr.update(choices=[], value=None), gr.update(choices=[], value=None), gr.update(choices=[], value=None), gr.update(choices=[], value=None), "0 papers") progress(0.60, desc="Fetching citations...") papers = enrich_citations(papers) progress(0.85, desc="FAISS indexing...") build_papers_index(papers) ACTIVE_PAPERS = list(papers) tbl, choices = build_table(papers) recent = sum(1 for p in papers if p.get("recent")) tot_cit = sum(p.get("citations") or 0 for p in papers) zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0) note = ("\n\n> " + str(zero_cit) + " papers with 0 citations (new/unindexed)." if zero_cit else "") md = ("## Fetched **" + str(len(papers)) + "** papers\n\n" + "New: **" + str(recent) + "** | Citations: **" + "{:,}".format(tot_cit) + "**" + warn + note + "\n\n---\n\n" + tbl) upd = gr.update(choices=choices, value=choices[0] if choices else None) progress(1.0) return md, upd, upd, upd, upd, str(len(papers)) + " papers | " + "{:,}".format(tot_cit) + " cit." def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by): global ACTIVE_PAPERS if not PAPERS: return "Fetch papers first.", gr.update(), "0" filtered = [] for p in PAPERS: try: y = int(p["published"][:4]) if y < int(year_from) or y > int(year_to): continue except: pass cit = int(p.get("citations") or 0) if cit < int(cit_min) or cit > int(cit_max): continue filtered.append(p) if sort_by == "Newest": filtered.sort(key=lambda x: x["published"], reverse=True) elif sort_by == "Oldest": filtered.sort(key=lambda x: x["published"]) elif sort_by == "Most Cited": filtered.sort(key=lambda x: x.get("citations") or 0, reverse=True) elif sort_by == "Least Cited":filtered.sort(key=lambda x: x.get("citations") or 0) if not filtered: ACTIVE_PAPERS = [] return "No matching papers.", gr.update(choices=[], value=None), "0" ACTIVE_PAPERS = list(filtered) tbl, choices = build_table(filtered) tot = sum(p.get("citations") or 0 for p in filtered) md = ("## " + str(len(filtered)) + "/" + str(len(PAPERS)) + " papers" + " | " + str(year_from) + "-" + str(year_to) + " | cit " + str(cit_min) + "-" + str(cit_max) + " | total " + "{:,}".format(tot) + "\n\n---\n\n" + tbl) return md, gr.update(choices=choices, value=choices[0] if choices else None), str(len(filtered)) + "/" + str(len(PAPERS)) def gr_search_fetched(query): if not query or not query.strip(): return "Enter a query." if not PAPERS: return "Fetch papers first." results = search_papers(query.strip(), top_k=8) if not results: return "No results for: " + query NL = "\n" md = "## Search: " + query + " โ€” " + str(len(results)) + " results" + NL + NL for r in results: p,s = r["paper"], r["score"] bar = "green " * round(s*10) cit = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else "" link = "[View](" + p["url"] + ")" pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else "" md += ("### " + "{:.0f}".format(s*100) + "% โ€” " + p["title"] + NL + NL + ", ".join(p["authors"][:2]) + " | " + p["published"] + cit + " | " + p.get("source","") + NL + NL + "> " + p["abstract"][:350] + "..." + NL + NL + link + pdf + NL + NL + "---" + NL + NL) return md def _get_paper(choice): pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS try: return pool[int(choice.split(".")[0]) - 1] except: return None def gr_explain(choice, lang_choice): if not choice: return "Fetch papers and select one." paper = _get_paper(choice) if not paper: return "Selection error." lang = "ar" if "Arabic" in lang_choice else "en" NL = "\n" # โœ… FIX: No backslash inside f-string โ€” use concatenation pdf_link = (" [PDF](" + paper["pdf_url"] + ")") if paper.get("pdf_url") else "" header = ("# " + paper["title"] + NL + NL + "**Authors:** " + ", ".join(paper["authors"]) + NL + NL + "**Date:** " + paper["published"] + " | **Citations:** " + cit_badge(paper.get("citations")) + " | **Source:** " + paper.get("source","arXiv") + NL + NL + "[View Paper](" + paper["url"] + ")" + pdf_link + NL + NL + "---" + NL + NL + "> " + paper["abstract"] + NL + NL + "---" + NL + NL + "## Explanation (Llama 3.3 70B)" + NL + NL) return header + explain_paper(paper, lang) def gr_audio(txt, lang_choice): if not txt or len(txt) < 50: return None return text_to_audio(txt, "ar" if "Arabic" in lang_choice else "en") def gr_save_fav(choice): if not choice: return "Select a paper first." paper = _get_paper(choice) return save_favorite(paper) if paper else "Error." def gr_show_favs(): favs = load_favorites() if not favs: return "No saved papers." NL = "\n" lines = [("**" + p["title"] + "**" + NL + (p["authors"][0] if p["authors"] else "N/A") + " | " + p["published"] + " | " + p.get("source","") + " | " + cit_badge(p.get("citations")) + " | [Link](" + p["url"] + ")") for p in favs] return ("### Favorites โ€” " + str(len(favs)) + " papers" + NL + NL + (NL + NL + "---" + NL + NL).join(lines)) def gr_compare(ca, cb, lc): if not ca or not cb: return "Select two papers." pa = _get_paper(ca); pb = _get_paper(cb) if not pa or not pb: return "Selection error." if pa["id"] == pb["id"]: return "Select two different papers." return compare_papers(pa, pb, "ar" if "Arabic" in lc else "en") def gr_overview(query, lc): if not PAPERS: return "Fetch papers first." pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS return ("## Overview\n\n" + summarize_papers(pool, query or "research", "ar" if "Arabic" in lc else "en")) def gr_trends(): if not PAPERS: return None, "Fetch papers first." return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS) def gr_bib(style, progress=gr.Progress()): if not PAPERS: return "Fetch papers first.", None progress(0.5, desc="Generating...") pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS text, path = generate_bibliography(pool, style) progress(1.0) short = text[:3000] + ("..." if len(text)>3000 else "") return "```\n" + short + "\n```", path def gr_chat_fn(message, history): if not message.strip(): return history, "" hd = [] for pair in history: if pair[0]: hd.append({"role":"user", "content":pair[0]}) if pair[1]: hd.append({"role":"assistant","content":pair[1]}) history.append((message, chat_about_papers(message, hd))) return history, "" # ================================================================ # UI # ================================================================ CSS = """ footer{display:none!important} h1{text-align:center} .status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0} .legend{font-size:.8rem;color:#cbd5e1;background:#1e293b; border-radius:8px;padding:6px 14px;margin-bottom:6px} .filter-box{background:#1e293b;border-radius:10px; padding:12px 16px;margin-top:8px} .gs-box{background:#1e293b;border-radius:10px;padding:14px 18px; margin-bottom:10px;border:1px solid #334155} """ with gr.Blocks( theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"), title="Scientific Paper Discovery v7.4", css=CSS ) as demo: gr.Markdown("# Scientific Paper Discovery v7.4\narXiv ยท CrossRef ยท Llama-3.3-70B ยท FAISS") gr.Markdown("Citations: ๐Ÿฅ‡ >=1000 | ๐Ÿ† >=100 | โญ >=10 | ๐Ÿ“„ <10 | ยท = 0", elem_classes="legend") status_bar = gr.Markdown("No papers loaded yet.", elem_classes="status-bar") with gr.Tabs(): # โ”€โ”€ TAB 1: BROWSE โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Browse / Search"): with gr.Row(): with gr.Column(scale=3): t_query = gr.Textbox(label="Topic", placeholder="ARIMA, inflation, LLM...", value="economic forecasting") t_category = gr.Dropdown(label="Category", choices=list(CATEGORIES.keys()), value="๐Ÿ“Š Economics") t_source = gr.Radio(label="Source", choices=["arXiv","CrossRef","Both"], value="arXiv") with gr.Column(scale=1): t_max = gr.Slider(5, 50, value=15, step=5, label="Max papers") t_days = gr.Slider(1, 1500, value=365, step=30, label="Last N days") btn_fetch = gr.Button("Fetch Papers", variant="primary", size="lg") papers_table_md = gr.Markdown("Results appear here.") paper_selector = gr.Dropdown(label="Select paper", choices=[], interactive=True) with gr.Group(elem_classes="filter-box"): gr.Markdown("### Filter & Sort") with gr.Row(): f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="Year from") f_year_to = gr.Slider(2000,2026,value=2026,step=1,label="Year to") with gr.Row(): f_cit_min = gr.Slider(0,5000,value=0, step=5,label="Citations min") f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="Citations max") with gr.Row(): f_sort = gr.Dropdown(choices=SORT_CHOICES, value="Most Cited",label="Sort",scale=3) btn_filter = gr.Button("Apply",variant="primary",scale=1) gr.Markdown("---\n### Semantic Search (FAISS โ€” in loaded papers)") with gr.Row(): search_in_box = gr.Textbox(label="Search in loaded papers", placeholder="ARIMA, transformer...",scale=5) btn_search_in = gr.Button("Search",scale=1) search_in_out = gr.Markdown() # โ”€โ”€ TAB 2: GLOBAL SEARCH โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Global Search"): gr.Markdown( "### Search any paper by title or keywords\n\n" "> Uses arXiv **relevance** sort + CrossRef **title** search.\n" "> Example: `Attention is All You Need`" ) with gr.Group(elem_classes="gs-box"): with gr.Row(): gs_query = gr.Textbox( label="Title or keywords", placeholder="Attention is All You Need | ARIMA forecasting ...", scale=4) gs_source = gr.Radio(label="Source", choices=["arXiv","CrossRef","Both"], value="Both", scale=2) gs_max = gr.Slider(5,30,value=10,step=5,label="Max results",scale=1) btn_gs = gr.Button("Search Now", variant="primary", size="lg") gs_out = gr.Markdown("Enter a title or keywords...") # โ”€โ”€ TAB 3: EXPLAIN โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Explain"): with gr.Row(): paper_sel2 = gr.Dropdown(label="Select paper", choices=[], interactive=True, scale=4) lang_exp = gr.Radio(LANG_CHOICES, value="Arabic", label="Language", scale=1) with gr.Row(): btn_explain = gr.Button("Explain", variant="primary") btn_fav = gr.Button("Save Fav") btn_audio = gr.Button("Listen") btn_export_pdf = gr.Button("Export PDF", variant="secondary") with gr.Row(): fav_status = gr.Markdown() pdf_status = gr.Markdown() explanation_out = gr.Markdown("Fetch papers and select one.") audio_out = gr.Audio(label="Audio", type="filepath") pdf_out = gr.File(label="Download PDF") # โ”€โ”€ TAB 4: COMPARE โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Compare"): with gr.Row(): cmp_a = gr.Dropdown(label="Paper A", choices=[], interactive=True) cmp_b = gr.Dropdown(label="Paper B", choices=[], interactive=True) lang_cmp = gr.Radio(LANG_CHOICES, value="Arabic", label="Language", scale=1) btn_compare = gr.Button("Compare", variant="primary") compare_out = gr.Markdown("Select two papers.") # โ”€โ”€ TAB 5: CHAT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Chat"): chatbot_ui = gr.Chatbot(label="Research Assistant", height=480, bubble_full_width=False) with gr.Row(): chat_in = gr.Textbox(label="Question", scale=5, placeholder="Key findings? | ู…ุง ุฃุจุฑุฒ ุงู„ู†ุชุงุฆุฌุŸ") btn_send = gr.Button("Send", variant="primary", scale=1) btn_clear = gr.Button("Clear", size="sm") # โ”€โ”€ TAB 6: OVERVIEW โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Overview"): with gr.Row(): lang_ov = gr.Radio(LANG_CHOICES, value="Arabic", label="Language", scale=1) btn_overview = gr.Button("Generate Report", variant="primary", scale=3) overview_out = gr.Markdown("Fetch papers first.") # โ”€โ”€ TAB 7: TRENDS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Trends"): btn_trends = gr.Button("Analyze Trends", variant="primary", size="lg") trend_chart = gr.Image(label="Trends Dashboard", type="filepath") trend_stats = gr.Markdown("Fetch papers first.") # โ”€โ”€ TAB 8: BIBLIOGRAPHY โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Bibliography"): bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"], value="APA", label="Style") btn_bib = gr.Button("Generate Bibliography", variant="primary") bib_out = gr.Markdown() bib_file = gr.File(label="Download") # โ”€โ”€ TAB 9: FAVORITES โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Favorites"): btn_show_fav = gr.Button("Show Favorites") favs_md = gr.Markdown("Press to show.") btn_export_fav = gr.Button("Export CSV", variant="secondary") fav_csv_file = gr.File(label="CSV File") # โ”€โ”€ TAB 10: AUTO-FETCH โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("Auto-Fetch"): with gr.Row(): auto_q = gr.Textbox(label="Topic", value="economic forecasting", scale=3) auto_cat = gr.Dropdown(label="Category", choices=list(CATEGORIES.keys()), value="๐Ÿ“Š Economics", scale=2) auto_interval = gr.Slider(5,120,value=60,step=5, label="Every (min)",scale=1) with gr.Row(): btn_start_auto = gr.Button("Start", variant="primary") btn_stop_auto = gr.Button("Stop", variant="stop") btn_refresh_log = gr.Button("Refresh Log") auto_status = gr.Markdown() auto_log_md = gr.Markdown("No log.") # โ”€โ”€ TAB 11: ABOUT โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ with gr.Tab("About"): gr.Markdown(""" # ๐Ÿ”ฌ Scientific Paper Discovery ### Version 7.4 โ€” Intelligent Research Assistant --- ## ๐Ÿง  About This Tool **Scientific Paper Discovery** is an AI-powered academic research assistant that enables researchers, students, and scientists to **discover, understand, and organize** scientific literature with unprecedented ease. It combines state-of-the-art language models with multi-source academic APIs to deliver a seamless research experience. --- ## โš™๏ธ Core Technologies | Component | Technology | Role | |---|---|---| | ๐Ÿค– Language Model | **Llama 3.3 70B** via Groq API | Paper explanation, comparison & chat | | ๐Ÿ” Semantic Search | **FAISS** + MiniLM-L12-v2 | Vector similarity search | | ๐Ÿ“ก Source 1 | **arXiv API** | Preprints across all sciences | | ๐Ÿ“š Source 2 | **CrossRef API** | Peer-reviewed journal articles | | ๐Ÿ“Š Citations | **Semantic Scholar** (3-layer) | Real citation counts | | ๐ŸŽ™๏ธ Text-to-Speech | **gTTS** | Audio playback of explanations | | ๐Ÿ“„ PDF Export | **ReportLab** | Professional PDF generation | --- ## ๐Ÿ—‚๏ธ Feature Overview | Tab | Feature | Description | |---|---|---| | ๐Ÿ” Browse | Paper Fetching | Fetch latest papers by topic & category | | ๐ŸŒ Global Search | Title Search | Find any paper by exact title (relevance-sorted) | | ๐Ÿ“– Explain | AI Explanation | Full structured explanation in Arabic or English | | โš–๏ธ Compare | Paper Comparison | Side-by-side AI comparison of two papers | | ๐Ÿ’ฌ Chat | Research Chat | Ask questions about loaded papers | | ๐ŸŒ Overview | Batch Summary | Academic overview of all loaded papers | | ๐Ÿ“Š Trends | Analytics | Citation, keyword & author trend charts | | ๐Ÿ“š Bibliography | Citation Export | APA, IEEE, Chicago, BibTeX formats | | โญ Favorites | Saved Papers | Bookmark & export favorite papers | | ๐Ÿ”” Auto-Fetch | Monitoring | Automatic periodic paper discovery | --- ## ๐Ÿ”Ž Search Mode Guide | Mode | Algorithm | Best For | |---|---|---| | Browse | `sortBy=submittedDate` | Discovering latest papers on a topic | | ๐ŸŒ Global Search | `sortBy=relevance` + `ti:"..."` | Finding a specific paper by title | | FAISS (internal) | Cosine similarity | Semantic search within loaded papers | --- ## ๐Ÿ“Œ Citation Badges | Badge | Meaning | |---|---| | ๐Ÿฅ‡ | โ‰ฅ 1,000 citations โ€” Highly influential | | ๐Ÿ† | โ‰ฅ 100 citations โ€” Well-cited | | โญ | โ‰ฅ 10 citations โ€” Notable | | ๐Ÿ“„ | < 10 citations โ€” Recent or niche | | ยท | 0 citations โ€” New or unindexed | --- *Built with โค๏ธ for the research community โ€” v7.4* """) # โ”€โ”€ WIRING โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ FETCH_OUT = [papers_table_md, paper_selector, paper_sel2, cmp_a, cmp_b, status_bar] btn_fetch.click(gr_fetch, inputs=[t_query, t_category, t_max, t_days, t_source], outputs=FETCH_OUT) btn_filter.click(gr_filter_papers, inputs=[f_year_from, f_year_to, f_cit_min, f_cit_max, f_sort], outputs=[papers_table_md, paper_selector, status_bar]) paper_selector.change(lambda x: [gr.update(value=x)]*3, inputs=[paper_selector], outputs=[paper_sel2, cmp_a, cmp_b]) btn_search_in.click(gr_search_fetched, inputs=[search_in_box], outputs=[search_in_out]) search_in_box.submit(gr_search_fetched, inputs=[search_in_box], outputs=[search_in_out]) btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out]) gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out]) btn_explain.click(gr_explain, inputs=[paper_sel2, lang_exp], outputs=[explanation_out]) btn_fav.click(gr_save_fav, inputs=[paper_sel2], outputs=[fav_status]) btn_audio.click(gr_audio, inputs=[explanation_out, lang_exp], outputs=[audio_out]) btn_export_pdf.click(gr_export_pdf, inputs=[explanation_out, paper_sel2], outputs=[pdf_out, pdf_status]) btn_compare.click(gr_compare, inputs=[cmp_a, cmp_b, lang_cmp], outputs=[compare_out]) btn_overview.click(gr_overview, inputs=[t_query, lang_ov], outputs=[overview_out]) btn_trends.click(gr_trends, outputs=[trend_chart, trend_stats]) btn_bib.click(gr_bib, inputs=[bib_style], outputs=[bib_out, bib_file]) btn_show_fav.click(gr_show_favs, outputs=[favs_md]) btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file]) btn_start_auto.click(start_auto_fetch, inputs=[auto_q, auto_cat, auto_interval], outputs=[auto_status]) btn_stop_auto.click(stop_auto_fetch, outputs=[auto_status]) btn_refresh_log.click(get_auto_log, outputs=[auto_log_md]) btn_send.click(gr_chat_fn, inputs=[chat_in, chatbot_ui], outputs=[chatbot_ui, chat_in]) chat_in.submit(gr_chat_fn, inputs=[chat_in, chatbot_ui], outputs=[chatbot_ui, chat_in]) btn_clear.click(lambda: ([], ""), outputs=[chatbot_ui, chat_in]) if __name__ == "__main__": demo.launch()