Spaces:

sitayeb
/

Scientific_Paper_Discovery_Bot

Sleeping

App Files Files Community

sitayeb commited on Mar 2

Commit

566a5de

verified ·

1 Parent(s): 48e63cf

Update app.py

Browse files

Files changed (1) hide show

app.py +593 -537

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # ================================================================
-# 🔬 Scientific Paper Discovery Bot — v7.4
-# FIX: Global Search now uses arXiv relevance sort (not date)
-#      → searching "Attention is All You Need" returns the correct paper
 # ================================================================
 import os, re, time, json, pickle, threading
 import requests
@@ -32,10 +30,10 @@ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
 S2_API_KEY   = os.environ.get("S2_API_KEY", "")
 groq_client  = Groq(api_key=GROQ_API_KEY)
-print("⏳ Loading embedder...")
 embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
 _ = embedder.encode(["warmup"])
-print("✅ Embedder ready!")
 PAPERS        = []
 ACTIVE_PAPERS = []
@@ -45,57 +43,52 @@ AUTO_LOG      = []
 CURRENT_YEAR  = datetime.now().year
 PERSIST_DIR    = "/tmp"
-FAVORITES_PATH = f"{PERSIST_DIR}/favorites.pkl"
-SEEN_IDS_PATH  = f"{PERSIST_DIR}/seen_ids.json"
 os.makedirs(PERSIST_DIR, exist_ok=True)
 CATEGORIES = {
-    "🌐 الكل / All":                    "",
-    "📊 الاقتصاد / Economics":          "econ",
-    "💰 المالية الكمية / Quant Finance": "q-fin",
-    "🤖 الذكاء الاصطناعي / AI":         "cs.AI",
-    "🧠 تعلم الآلة / ML":               "cs.LG",
-    "💬 معالجة اللغة / NLP":            "cs.CL",
-    "📈 الإحصاء / Statistics":          "stat",
-    "🔬 علم الأحياء / Bio":             "q-bio",
-    "⚛️ الفيزياء / Physics":            "physics",
-    "📐 الرياضيات / Math":              "math",
-    "💻 علوم الحاسوب / CS":             "cs",
 }
 CROSSREF_SUBJECTS = {
-    "🌐 الكل / All":                    "",
-    "📊 الاقتصاد / Economics":          "economics",
-    "💰 المالية الكمية / Quant Finance": "finance",
-    "🤖 الذكاء الاصطناعي / AI":         "artificial intelligence",
-    "🧠 تعلم الآلة / ML":               "machine learning",
-    "💬 معالجة اللغة / NLP":            "natural language processing",
-    "📈 الإحصاء / Statistics":          "statistics",
-    "🔬 علم الأحياء / Bio":             "biology",
-    "⚛️ الفيزياء / Physics":            "physics",
-    "📐 الرياضيات / Math":              "mathematics",
-    "💻 علوم الحاسوب / CS":             "computer science",
 }
-LANG_CHOICES = ["🇸🇦 عربي / Arabic", "🇬🇧 English"]
-SORT_CHOICES = [
-    "📅 الأحدث / Newest",
-    "📅 الأقدم / Oldest",
-    "🏆 الأكثر اقتباساً / Most Cited",
-    "📊 الأقل اقتباساً / Least Cited",
-]
-AR_FORMAT_RULES = """
-قواعد التنسيق:
-- ابدأ كل قسم بـ ## على سطر منفرد مع سطر فارغ قبله وبعده
-- اكتب كل قسم في فقرة من 3-4 جمل بالعربية الفصحى
 - لا تكرر عنوان القسم داخل النص
-- لا تضف --- أو *** أو رموز زائدة
 """
 # ================================================================
 # HELPERS
 # ================================================================
 def detect_lang(text):
-    try: return "ar" if detect(str(text)[:300]).startswith("ar") else "en"
-    except: return "en"
 def clean_md(text):
     text = re.sub(r"[#*`>\[\]!_~]", "", text)
@@ -109,37 +102,39 @@ def fix_ar_format(text):
 def cit_badge(n):
     if n is None or n == "": return "—"
     n = int(n)
-    if n >= 1000: return f"🥇 {n:,}"
-    if n >= 100:  return f"🏆 {n:,}"
-    if n >= 10:   return f"⭐ {n:,}"
-    if n > 0:     return f"📄 {n}"
     return "·"
 def build_table(papers_list):
-    rows  = "| # | 🏷️ العنوان | 👥 مؤلف رئيسي | 📅 التاريخ | 📊 الاقتباسات | 📡 المصدر |\n"
     rows += "|---|---|---|---|---|---|\n"
     choices = []
     for i, p in enumerate(papers_list):
-        first  = p["authors"][0] if p["authors"] else "N/A"
-        badge  = "🆕" if p.get("recent") else "📄"
-        rows  += (f"| {i+1} | {badge} {p['title']} | {first} | "
-                  f"{p['published']} | {cit_badge(p.get('citations'))} | "
-                  f"{p.get('source','arXiv')} |\n")
-        choices.append(f"{i+1}. {p['title']}")
     return rows, choices
 def s2_headers():
     h = {"User-Agent": "ScientificPaperBot/7.4"}
-    if S2_API_KEY: h["x-api-key"] = S2_API_KEY
     return h
 def cr_headers():
     return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"}
 # ================================================================
-# ✅ CrossRef date parser — rejects garbage years (2048, 2116...)
 # ================================================================
-def parse_crossref_date(item: dict) -> str:
     for field in ["issued", "published", "published-print", "published-online", "created"]:
         dp = (item.get(field) or {}).get("date-parts", [[]])
         if not dp or not dp[0]: continue
@@ -149,7 +144,7 @@ def parse_crossref_date(item: dict) -> str:
             if not (1900 <= year <= CURRENT_YEAR + 1): continue
             month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1))
             day   = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1))
-            return f"{year:04d}-{month:02d}-{day:02d}"
         except (ValueError, TypeError, IndexError):
             continue
     return "N/A"
@@ -175,48 +170,52 @@ def save_favorite(paper):
     if paper["id"] not in {p["id"] for p in favs}:
         favs.append(paper)
         with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f)
-        return f"⭐ تم الحفظ: {paper['title']}"
-    return "ℹ️ موجودة بالفعل."
 def export_favorites_csv():
     favs = load_favorites()
     if not favs: return None
-    df   = pd.DataFrame([{"Title": p["title"], "Authors": ", ".join(p["authors"][:3]),
-                           "Date": p["published"], "Citations": p.get("citations","N/A"),
-                           "URL": p["url"], "Source": p.get("source","arXiv")} for p in favs])
-    path = f"{PERSIST_DIR}/favorites.csv"
     df.to_csv(path, index=False, encoding="utf-8-sig")
     return path
 def gr_export_fav(): return export_favorites_csv()
 # ================================================================
-# ✅ PDF EXPORT
 # ================================================================
 def export_explanation_pdf(explanation_text, paper_title="paper"):
     if not explanation_text or len(explanation_text) < 30: return None
     safe  = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_")
-    path  = f"{PERSIST_DIR}/explanation_{safe}.pdf"
     doc   = SimpleDocTemplate(path, pagesize=A4,
                               rightMargin=2*cm, leftMargin=2*cm,
-                              topMargin=2*cm, bottomMargin=2*cm)
-    styles = getSampleStyleSheet()
-    h2_style   = ParagraphStyle("H2",   parent=styles["Heading2"],
-                                fontSize=11, textColor=colors.HexColor("#2563eb"),
-                                spaceBefore=14, spaceAfter=6)
-    body_style = ParagraphStyle("Body", parent=styles["Normal"],
-                                fontSize=10, leading=16, spaceAfter=8)
-    meta_style = ParagraphStyle("Meta", parent=styles["Normal"],
-                                fontSize=9,  textColor=colors.HexColor("#64748b"),
-                                spaceAfter=10)
     story = []
     for line in explanation_text.split("\n"):
         line  = line.strip()
         if not line: story.append(Spacer(1, 6)); continue
         clean = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
-        clean = re.sub(r"\*(.+?)\*",     r"\1", clean)
-        clean = re.sub(r"`(.+?)`",       r"\1", clean)
-        clean = re.sub(r"^#{1,6}\s*",    "",    clean)
         clean = re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]", "", clean).strip()
         if not clean: continue
         if line.startswith("## ") or line.startswith("# "):
@@ -224,95 +223,109 @@ def export_explanation_pdf(explanation_text, paper_title="paper"):
                                     color=colors.HexColor("#e2e8f0"), spaceAfter=4))
             story.append(Paragraph(clean, h2_style))
         elif line.startswith(">"):
-            q_style = ParagraphStyle("Q", parent=styles["Normal"],
-                                     fontSize=9, leftIndent=20,
-                                     textColor=colors.HexColor("#475569"), leading=14)
-            story.append(Paragraph(re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]","",
-                                          line.lstrip(">").strip()), q_style))
         else:
-            story.append(Paragraph(clean, body_style))
-    story += [Spacer(1,20),
-              HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0")),
-              Paragraph(f"Generated by 🔬 Paper Discovery v7.4 — {datetime.now().strftime('%Y-%m-%d %H:%M')}",
-                        meta_style)]
     try:
         doc.build(story); return path
     except Exception as e:
-        print(f"PDF error: {e}"); return None
 def gr_export_pdf(explanation_text, choice):
     if not explanation_text or len(explanation_text) < 50:
-        return None, "⚠️ اشرح الورقة أولاً ثم صدّر PDF."
     title = choice.split(". ", 1)[-1] if choice else "paper"
     path  = export_explanation_pdf(explanation_text, title)
-    return (path, "✅ تم إنشاء PDF!") if path else (None, "❌ فشل إنشاء PDF.")
 # ================================================================
 # SOURCE 1 — arXiv
-# ✅ KEY FIX: sort_by parameter
-#    Browse mode  → "submittedDate"  (latest papers in topic)
-#    Search mode  → "relevance"      (most relevant to query/title)
 # ================================================================
 def fetch_arxiv_papers(query, category, max_results=20, days_back=365,
                        sort_by="submittedDate"):
     parts = []
-    # ✅ If query looks like a paper title (>3 words), use ti: prefix for precision
     words = query.strip().split()
     if len(words) >= 3 and sort_by == "relevance":
-        parts.append(f'ti:"{query.strip()}"')        # exact title search
     elif query.strip():
-        parts.append(f"all:{query.strip()}")
     if category.strip():
-        parts.append(f"cat:{category.strip()}")
-    sq     = " AND ".join(parts) if parts else "all:machine learning"
-    params = {"search_query": sq, "start": 0, "max_results": max_results,
-              "sortBy": sort_by, "sortOrder": "descending"}
     try:
         resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30)
         resp.raise_for_status()
-    except Exception as e: print(f"arXiv: {e}"); return []
     ns_a   = "http://www.w3.org/2005/Atom"
     ns_x   = "http://arxiv.org/schemas/atom"
     root   = ET.fromstring(resp.content)
     cutoff = datetime.now() - timedelta(days=days_back)
     papers = []
-    for entry in root.findall(f"{{{ns_a}}}entry"):
         try:
-            pid       = entry.find(f"{{{ns_a}}}id").text.split("/abs/")[-1].strip()
-            title     = entry.find(f"{{{ns_a}}}title").text.strip().replace("\n"," ")
-            abstract  = entry.find(f"{{{ns_a}}}summary").text.strip().replace("\n"," ")
-            published = entry.find(f"{{{ns_a}}}published").text[:10]
-            authors   = [a.find(f"{{{ns_a}}}name").text
-                         for a in entry.findall(f"{{{ns_a}}}author")]
             cats = set()
-            pc = entry.find(f"{{{ns_x}}}primary_category")
             if pc is not None: cats.add(pc.get("term",""))
-            for c in entry.findall(f"{{{ns_x}}}category"): cats.add(c.get("term",""))
             cats.discard("")
             papers.append({
-                "id": pid, "title": title, "authors": authors[:6],
-                "abstract": abstract[:1200], "published": published,
-                "categories": list(cats)[:4], "citations": None,
-                "url": f"https://arxiv.org/abs/{pid}",
-                "pdf_url": f"https://arxiv.org/pdf/{pid}",
-                "recent": datetime.strptime(published,"%Y-%m-%d") >= cutoff,
-                "source": "arXiv",
             })
-        except Exception as e: print(f"arXiv parse: {e}")
     return papers
 # ================================================================
-# SOURCE 2 — CrossRef (fixed date parser + title filter)
 # ================================================================
-def fetch_crossref_papers(query, category_label="", max_results=20, days_back=365,
-                          use_title=False):
     subject    = CROSSREF_SUBJECTS.get(category_label, "")
-    full_query = f"{query} {subject}".strip() if subject else query
     params = {
-        "query.title" if use_title else "query": full_query,
-        "rows":   min(max_results * 3, 200),
-        "sort":   "relevance",
         "select": ("title,author,abstract,published,published-print,"
                    "published-online,issued,created,DOI,"
                    "is-referenced-by-count,link,subject"),
@@ -325,8 +338,9 @@ def fetch_crossref_papers(query, category_label="", max_results=20, days_back=36
             if r.status_code == 200:
                 items = r.json().get("message",{}).get("items",[]); break
             if r.status_code == 429: time.sleep(2**attempt); continue
-            print(f"CrossRef {r.status_code}"); return []
-        except Exception as e: print(f"CrossRef {attempt+1}: {e}"); time.sleep(1)
     cutoff = datetime.now() - timedelta(days=days_back)
     papers, seen_ids = [], set()
@@ -339,71 +353,76 @@ def fetch_crossref_papers(query, category_label="", max_results=20, days_back=36
         pub = parse_crossref_date(item)
         if pub == "N/A": continue
         cit     = int(item.get("is-referenced-by-count", 0) or 0)
-        authors = [f"{a.get('given','').strip()} {a.get('family','').strip()}".strip()
-                   for a in item.get("author",[])[:6]]
         authors = [a for a in authors if a.strip()] or ["Unknown"]
-        abstract = re.sub(r"<[^>]+>","", item.get("abstract","No abstract.")).strip()[:1200]
-        doi      = item.get("DOI","")
-        url      = f"https://doi.org/{doi}" if doi else "#"
-        pid      = doi or re.sub(r"\W","",title)[:40]
         if pid in seen_ids: continue
         seen_ids.add(pid)
-        pdf_url  = next((l.get("URL","") for l in item.get("link",[])
-                         if "pdf" in l.get("content-type","").lower()), "")
-        try:    recent = datetime.strptime(pub[:10],"%Y-%m-%d") >= cutoff
         except: recent = False
         papers.append({
-            "id": pid, "title": title, "authors": authors,
-            "abstract": abstract, "published": pub[:10],
             "categories": item.get("subject",[])[:3],
-            "citations": cit, "url": url, "pdf_url": pdf_url,
-            "recent": recent, "source": "CrossRef",
         })
     papers.sort(key=lambda x: x["citations"], reverse=True)
     return papers
 # ================================================================
-# ✅ GLOBAL PAPER SEARCH — title-aware, relevance-sorted
 # ================================================================
 def global_paper_search(query, source_choice, max_results=10):
     if not query or not query.strip():
-        return "⚠️ أدخل عنوان أو كلمات مفتاحية للبحث."
-    q      = query.strip()
-    papers = []
-    if source_choice in ("arXiv", "كلاهما / Both"):
-        # ✅ sort_by="relevance" → returns most relevant, not newest
         papers += fetch_arxiv_papers(q, "", int(max_results), 3650,
                                      sort_by="relevance")
-    if source_choice in ("CrossRef", "كلاهما / Both"):
-        # ✅ use_title=True → uses query.title for precise title match
         papers += fetch_crossref_papers(q, "", int(max_results), 3650,
                                         use_title=True)
     if not papers:
-        return f"❌ لا نتائج لـ `{q}`. جرب كلمات مختلفة."
-    # Deduplicate
     seen, unique = set(), []
     for p in papers:
         key = re.sub(r"\W","",p["title"].lower())[:60]
         if key not in seen: seen.add(key); unique.append(p)
-    # Sort by citation count (most cited first for well-known papers)
     unique.sort(key=lambda x: x.get("citations") or 0, reverse=True)
-    md = f"## 🔎 نتائج البحث — `{q}`\n\n**{len(unique)}** ورقة\n\n---\n\n"
     for i, p in enumerate(unique, 1):
-        cit  = f" | {cit_badge(p.get('citations'))}" if p.get("citations") is not None else ""
-        cats = " · ".join(p.get("categories",[])[:2])
-        md  += (f"### {i}. {p['title']}\n\n"
-                f"👥 {', '.join(p['authors'][:3])} | 📅 {p['published']}{cit}"
-                f" | 📡 {p.get('source','')} | 🏷️ {cats}\n\n"
-                f"> {p['abstract'][:450]}...\n\n"
-                f"🔗 [View]({p['url']})"
-                +(f"  📥 [PDF]({p['pdf_url']})" if p.get("pdf_url") else "")
-                +"\n\n---\n\n")
     return md
 # ================================================================
@@ -420,7 +439,8 @@ def enrich_citations(papers):
     id_map, batch_ids = {}, []
     for p in arxiv_papers:
         clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
-        id_map[clean] = p; batch_ids.append(f"arXiv:{clean}")
     for i in range(0, len(batch_ids), 500):
         try:
             r = requests.post(
@@ -432,43 +452,47 @@ def enrich_citations(papers):
                 for item in r.json():
                     if not item: continue
                     ext   = item.get("externalIds") or {}
-                    clean = re.sub(r"v\d+$","", ext.get("ArXiv","").split("/")[-1].strip())
                     if clean and clean in id_map:
                         c = item.get("citationCount")
                         if c is not None: id_map[clean]["citations"] = int(c)
             elif r.status_code == 429: time.sleep(4)
-        except Exception as e: print(f"S2 batch: {e}")
     for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]:
         clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
         for attempt in range(2):
             try:
                 r = requests.get(
-                    f"https://api.semanticscholar.org/graph/v1/paper/arXiv:{clean}",
-                    params={"fields":"citationCount"}, headers=s2_headers(), timeout=10)
-                if r.status_code==200:
-                    c=r.json().get("citationCount"); p["citations"]=int(c) if c else 0; break
-                if r.status_code==429: time.sleep(2**attempt); continue
-                p["citations"]=0; break
-            except: p["citations"]=0; break
         time.sleep(0.12)
     for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]:
         try:
             r = requests.get("https://api.crossref.org/works",
-                params={"query.title":p["title"],"rows":1,
-                        "select":"is-referenced-by-count,title"},
                 headers=cr_headers(), timeout=8)
-            if r.status_code==200:
-                items=r.json().get("message",{}).get("items",[])
                 if items:
-                    found=(items[0].get("title") or [""])[0].lower()
-                    qw=set(p["title"].lower().split()[:5])
-                    fw=set(found.split()[:10])
-                    p["citations"]=(int(items[0].get("is-referenced-by-count",0) or 0)
-                                    if len(qw&fw)>=2 else 0)
-                else: p["citations"]=0
-            else: p["citations"]=0
             time.sleep(0.12)
-        except: p["citations"]=0
     for p in papers:
         if p.get("citations") is None: p["citations"] = 0
     return papers
@@ -480,10 +504,11 @@ def build_papers_index(papers):
     global FAISS_INDEX, PAPERS
     PAPERS = papers
     if not papers: FAISS_INDEX = None; return
-    texts = [f"{p['title']} {p['abstract']}" for p in papers]
     embs  = embedder.encode(texts, convert_to_numpy=True,
                             normalize_embeddings=True).astype("float32")
-    idx   = faiss.IndexFlatIP(embs.shape[1]); idx.add(embs)
     FAISS_INDEX = idx
 def search_papers(query, top_k=5):
@@ -491,8 +516,8 @@ def search_papers(query, top_k=5):
     qe = embedder.encode([query], convert_to_numpy=True,
                          normalize_embeddings=True).astype("float32")
     scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS)))
-    return [{"paper":PAPERS[i],"score":float(s)}
-            for s,i in zip(scores[0],ids[0]) if i>=0 and float(s)>0.1]
 # ================================================================
 # AUTO-FETCH
@@ -507,142 +532,154 @@ def auto_fetch_worker(query, category, interval):
         new_ps = [p for p in papers if p["id"] not in seen]
         if new_ps:
             save_seen_ids(seen | {p["id"] for p in papers})
-            AUTO_LOG.append(f"[{datetime.now().strftime('%H:%M')}] 🆕 {len(new_ps)} — {query}")
-            if len(AUTO_LOG)>20: AUTO_LOG.pop(0)
 def start_auto_fetch(query, cat_label, interval_min):
     global AUTO_RUNNING
-    if AUTO_RUNNING: return "⚠️ يعمل بالفعل."
     AUTO_RUNNING = True
-    threading.Thread(target=auto_fetch_worker,
-                     args=(query, CATEGORIES.get(cat_label,""), int(interval_min)*60),
-                     daemon=True).start()
-    return f"✅ كل **{interval_min} دقيقة** — يراقب: `{query}`"
 def stop_auto_fetch():
-    global AUTO_RUNNING; AUTO_RUNNING = False; return "🛑 تم الإيقاف."
 def get_auto_log():
-    return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "_لا يوجد سجل._"
 # ================================================================
 # TRENDS
 # ================================================================
 def analyze_trends(papers):
-    if not papers: return None, "⚠️ لا توجد أوراق."
     date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A")
     stopwords   = {"the","a","an","of","in","for","on","with","and","or","to","using",
                    "based","via","from","by","is","are","our","we","this","that","which",
                    "towards","approach","method","new","into","over","learning","deep",
                    "model","models","data","neural","large","language","paper","study",
                    "analysis","results","show","also","can","used","two","its","their"}
-    all_words  = [w.lower() for p in papers for w in re.findall(r"[a-zA-Z]{4,}",p["title"])
-                  if w.lower() not in stopwords]
-    top_words  = Counter(all_words).most_common(15)
-    sources    = Counter(p.get("source","arXiv") for p in papers)
-    cit_papers = [p for p in papers if (p.get("citations") or 0)>0]
-    top_cited  = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:10]
-    all_auth   = [a for p in papers for a in p["authors"][:3]]
-    top_authors= Counter(all_auth).most_common(10)
-    cvals      = [p["citations"] for p in cit_papers]
-    buckets=[0,1,5,10,50,100,500,10000]; blabels=["0","1–4","5–9","10–49","50–99","100–499","500+"]
-    bcounts=([sum(1 for c in cvals if buckets[i]<=c<buckets[i+1])
-              for i in range(len(buckets)-1)] if cvals else [0]*7)
-    avg_cit  = round(sum(cvals)/max(len(cvals),1),1) if cvals else 0
-    total_cit= sum(p.get("citations") or 0 for p in papers)
     C  = ["#3b82f6","#8b5cf6","#10b981","#f59e0b","#ef4444","#06b6d4",
           "#ec4899","#14b8a6","#f97316","#a855f7","#22d3ee","#84cc16",
           "#fbbf24","#34d399","#f87171"]
     BG,PNL,BR,W = "#0f172a","#1e293b","#334155","white"
-    fig,axes = plt.subplots(2,3,figsize=(20,12))
     fig.patch.set_facecolor(BG)
-    fig.suptitle("📊 Research Trends Dashboard",color=W,fontsize=16,fontweight="bold",y=1.01)
     def style(ax):
         ax.set_facecolor(PNL)
         for sp in ax.spines.values(): sp.set_edgecolor(BR)
         ax.tick_params(colors=W, labelsize=8)
-    ax=axes[0,0]; style(ax)
     if date_counts:
-        ms,cs=zip(*sorted(date_counts.items())); ms,cs=list(ms),list(cs)
-        bars=ax.bar(ms,cs,color=C[0],edgecolor="#60a5fa",lw=0.8)
         for b,c in zip(bars,cs):
-            ax.text(b.get_x()+b.get_width()/2,b.get_height()+.05,str(c),
-                    ha="center",va="bottom",color=W,fontsize=8)
-        if len(cs)>2:
-            z=np.polyfit(range(len(cs)),cs,1)
-            ax.plot(ms,np.poly1d(z)(range(len(cs))),"--",color="#f59e0b",
-                    lw=1.5,alpha=.8,label="Trend")
-            ax.legend(fontsize=8,facecolor=PNL,labelcolor=W)
-    ax.set_title("📅 Papers per Month",color=W,fontsize=12,fontweight="bold",pad=10)
-    ax.set_ylabel("Count",color=W,fontsize=9); ax.tick_params(rotation=45)
-    ax=axes[0,1]; style(ax)
     if top_words:
-        wds,wcts=zip(*top_words)
-        ax.barh(list(wds),list(wcts),color=C[:len(wds)],edgecolor="#475569",lw=.6)
-        for b,c in zip(ax.patches,wcts):
-            ax.text(b.get_width()+.1,b.get_y()+b.get_height()/2,str(c),
-                    va="center",color=W,fontsize=8)
-    ax.set_title("🔑 Top Keywords",color=W,fontsize=12,fontweight="bold",pad=10)
-    ax.set_xlabel("Frequency",color=W,fontsize=9)
-    ax=axes[0,2]; ax.set_facecolor(PNL)
     if sources:
-        sl,sv=zip(*sources.items())
-        _,txts,ats=ax.pie(sv,labels=sl,autopct="%1.0f%%",colors=C[:len(sl)],startangle=90,
-                          textprops={"color":W,"fontsize":10},
-                          wedgeprops={"edgecolor":BR,"linewidth":1.5})
         for at in ats: at.set_color(W); at.set_fontsize(9)
-    ax.set_title("📡 Source Distribution",color=W,fontsize=12,fontweight="bold",pad=10)
-    ax=axes[1,0]; style(ax)
     if top_cited:
-        lbls=[p["title"][:35]+"…" if len(p["title"])>35 else p["title"] for p in top_cited]
-        cv=[p["citations"] for p in top_cited]
-        ax.barh(lbls[::-1],cv[::-1],color=C[1],edgecolor="#475569",lw=.6)
-        mx=max(cv) if cv else 1
-        for b,c in zip(ax.patches,cv[::-1]):
-            ax.text(b.get_width()+mx*.01,b.get_y()+b.get_height()/2,f"{c:,}",
-                    va="center",color=W,fontsize=8)
-        ax.set_xlabel("Citations",color=W,fontsize=9)
     else:
-        ax.text(.5,.5,"No citation data",ha="center",va="center",
-                color="#94a3b8",fontsize=11,transform=ax.transAxes)
-    ax.set_title("🏆 Top 10 Cited",color=W,fontsize=12,fontweight="bold",pad=10)
-    ax=axes[1,1]; style(ax)
     if any(bcounts):
-        ax.bar(blabels,bcounts,color=C[2],edgecolor="#475569",lw=.8)
-        for b,c in zip(ax.patches,bcounts):
-            if c>0: ax.text(b.get_x()+b.get_width()/2,b.get_height()+.1,str(c),
-                            ha="center",va="bottom",color=W,fontsize=9)
-        ax.set_xlabel("Citation Range",color=W,fontsize=9)
-        ax.set_ylabel("Papers",color=W,fontsize=9)
-        ax.annotate(f"Avg {avg_cit} | Total {total_cit:,}",
-                    xy=(.98,.96),xycoords="axes fraction",
-                    ha="right",va="top",color="#94a3b8",fontsize=8)
     else:
-        ax.text(.5,.5,"No citation data",ha="center",va="center",
-                color="#94a3b8",fontsize=11,transform=ax.transAxes)
-    ax.set_title("📊 Citation Distribution",color=W,fontsize=12,fontweight="bold",pad=10)
-    ax=axes[1,2]; style(ax)
     if top_authors:
-        an,ac=zip(*top_authors)
-        ax.barh(list(an)[::-1],list(ac)[::-1],color=C[3],edgecolor="#475569",lw=.6)
-        for b,c in zip(ax.patches,list(ac)[::-1]):
-            ax.text(b.get_width()+.05,b.get_y()+b.get_height()/2,str(c),
-                    va="center",color=W,fontsize=8)
-        ax.set_xlabel("Papers",color=W,fontsize=9)
-    ax.set_title("👥 Top Authors",color=W,fontsize=12,fontweight="bold",pad=10)
     plt.tight_layout(pad=3)
-    path=f"{PERSIST_DIR}/trends.png"
-    plt.savefig(path,bbox_inches="tight",dpi=150,facecolor=BG); plt.close()
-    top5  = sorted(cit_papers,key=lambda x:x["citations"],reverse=True)[:5]
-    stats = (f"### 📊 إحصائيات\n\n| المؤشر | القيمة |\n|---|---|\n"
-             f"| 📦 الإجمالي | **{len(papers)}** |\n"
-             f"| 🆕 جديدة | **{sum(1 for p in papers if p.get('recent'))}** |\n"
-             f"| 🔢 الاقتباسات | **{total_cit:,}** |\n"
-             f"| 📈 متوسط | **{avg_cit}** |\n\n")
     if top5:
-        stats += "### 🏆 الأكثر اقتباساً\n\n"
         for i,p in enumerate(top5,1):
-            stats += f"{i}. [{p['title']}]({p['url']}) — **{p['citations']:,}**\n\n"
     return path, stats
 # ================================================================
@@ -654,50 +691,53 @@ def _llm(messages, max_tokens=1200):
             model="llama-3.3-70b-versatile",
             messages=messages, temperature=0.3, max_tokens=max_tokens)
         return r.choices[0].message.content.strip()
-    except Exception as e: return f"⚠️ LLM Error: {e}"
 def explain_paper(paper, lang="ar"):
     cit = paper.get("citations","N/A")
-    if lang=="ar":
         return fix_ar_format(_llm([
-            {"role":"system","content":
-             f"أنت خبير أكاديمي يشرح الأبحاث بالعربية الفصحى.\n{AR_FORMAT_RULES}"},
             {"role":"user","content":
-             f"اشرح الورقة:\nالعنوان: {paper['title']}\n"
-             f"المؤلفون: {', '.join(paper['authors'][:3])}\n"
-             f"التاريخ: {paper['published']} | الاقتباسات: {cit}\n"
-             f"الملخص: {paper['abstract']}\n\n"
-             "## 🎯 موضوع الورقة\n\n## ❓ المشكلة\n\n## 🔧 المنهجية\n\n"
-             "## 📊 النتائج\n\n## 🌟 الأهمية\n\n## 🔗 التطبيقات"}]))
     return _llm([{"role":"user","content":
-        f"Explain:\nTitle: {paper['title']}\nAuthors: {', '.join(paper['authors'][:3])}\n"
-        f"Date: {paper['published']} | Citations: {cit}\nAbstract: {paper['abstract']}\n\n"
-        "## 🎯 Topic\n## ❓ Problem\n## 🔧 Methodology\n## 📊 Findings\n"
-        "## 🌟 Contribution\n## 🔗 Applications"}])
 def compare_papers(pa, pb, lang="ar"):
-    body = (f"الأولى: {pa['title']} | اقتباسات: {pa.get('citations','N/A')}\n"
-            f"{pa['abstract'][:500]}\n\n"
-            f"الثانية: {pb['title']} | اقتباسات: {pb.get('citations','N/A')}\n"
-            f"{pb['abstract'][:500]}")
-    if lang=="ar":
         return fix_ar_format(_llm([{"role":"user","content":
-            f"قارن بين الورقتين.\n{AR_FORMAT_RULES}\n\n{body}\n\n"
-            "## 🎯 الهدف\n\n## 🔧 المنهجية\n\n## 📊 النتائج\n\n"
-            "## 💪 القوة\n\n## ⚠️ القيود\n\n## 🏆 الخلاصة"}], 1400))
     return _llm([{"role":"user","content":
-        f"Compare:\n{body}\n\n## Topic\n## Methodology\n## Results\n"
-        "## Strengths\n## Limits\n## Verdict"}], 1400)
 def summarize_papers(papers, topic, lang="ar"):
-    text = "".join(f"{i}. {p['title']} ({p['published']}): {p['abstract'][:300]}...\n\n"
-                   for i,p in enumerate(papers[:8],1))
-    if lang=="ar":
         return fix_ar_format(_llm([{"role":"user","content":
-            f"نظرة عامة أكاديمية حول \"{topic}\".\n{AR_FORMAT_RULES}\n\n{text}\n\n"
-            "## 1. الاتجاهات\n\n## 2. أبرز الأوراق\n\n## 3. المواضيع المشتركة\n\n## 4. الفجوات"}], 900))
     return _llm([{"role":"user","content":
-        f"Academic overview of \"{topic}\":\n{text}\n\n"
         "## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900)
 def generate_bibliography(papers, style="APA"):
@@ -706,38 +746,44 @@ def generate_bibliography(papers, style="APA"):
         auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "")
         year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d."
         t,u  = p["title"], p["url"]
-        if   style=="APA":     entries.append(f"{i}. {auth} ({year}). *{t}*. {u}")
-        elif style=="IEEE":
             ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "")
-            entries.append(f'[{i}] {ae}, "{t}," {year}. [Online]: {u}')
-        elif style=="Chicago": entries.append(f'{i}. {auth}. "{t}." ({year}). {u}')
         else:
             key = re.sub(r"\W","", (p["authors"][0].split()[-1]
-                                    if p["authors"] else "Auth"))+year
-            entries.append(f"@article{{{key}{i},\n  title={{{t}}},\n  "
-                           f"author={{{auth}}},\n  year={{{year}}},\n  url={{{u}}}\n}}")
     bib  = "\n\n".join(entries)
-    path = f"{PERSIST_DIR}/bibliography_{style}.txt"
-    with open(path,"w",encoding="utf-8") as f: f.write(bib)
     return bib, path
 def chat_about_papers(question, history):
     if not PAPERS:
-        return ("⚠️ يرجى جلب الأوراق أولاً."
-                if detect_lang(question)=="ar" else "⚠️ Fetch papers first.")
-    lang=detect_lang(question); relevant=search_papers(question, top_k=4); context=""
     if relevant:
-        context = "الأوراق ذات الصلة:\n\n" if lang=="ar" else "Relevant papers:\n\n"
         for r in relevant:
-            p=r["paper"]
-            cit=f" | {p['citations']:,} citations" if p.get("citations") else ""
-            context += f"**{p['title']}** ({p['published']}){cit}\n{p['abstract'][:400]}\n🔗 {p['url']}\n\n"
-    sys_msg = (f"أنت مساعد بحثي. أجب بالعربية الفصحى.\n{AR_FORMAT_RULES}"
-               if lang=="ar" else "You are an academic assistant. Answer in English.")
     msgs = [{"role":"system","content":sys_msg}]
     for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]})
     msgs.append({"role":"user","content":
-                 f"{context}\nسؤال: {question}" if context else question})
     out = _llm(msgs, 800)
     return fix_ar_format(out) if lang=="ar" else out
@@ -745,9 +791,10 @@ def text_to_audio(text, lang="ar"):
     clean = clean_md(text)
     if not clean: return None
     try:
-        tts=gTTS(text=clean, lang=lang, slow=False)
-        path=f"{PERSIST_DIR}/audio_{lang}.mp3"; tts.save(path); return path
-    except Exception as e: print(f"TTS: {e}"); return None
 # ================================================================
 # GRADIO HANDLERS
@@ -755,17 +802,17 @@ def text_to_audio(text, lang="ar"):
 def gr_fetch(query, category_label, max_results, days_back, source_choice,
              progress=gr.Progress()):
     global ACTIVE_PAPERS
-    progress(0.05, desc="🌐 Connecting...")
     papers, warn = [], ""
-    if source_choice in ("arXiv","كلاهما / Both"):
-        progress(0.15, desc="📡 Fetching arXiv...")
         papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""),
                                      int(max_results), int(days_back),
                                      sort_by="submittedDate")
-    if source_choice in ("CrossRef","كلاهما / Both"):
-        progress(0.35, desc="📚 Fetching CrossRef...")
         cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back))
-        if not cr: warn = "\n\n> ⚠️ CrossRef: لا نتائج — جرب موضوعاً مختلفاً."
         papers += cr
     seen, unique = set(), []
     for p in papers:
@@ -773,30 +820,32 @@ def gr_fetch(query, category_label, max_results, days_back, source_choice,
         if key not in seen: seen.add(key); unique.append(p)
     papers = unique
     if not papers:
-        return ("❌ لا توجد نتائج."+warn,
-                gr.update(choices=[],value=None), gr.update(choices=[],value=None),
-                gr.update(choices=[],value=None), gr.update(choices=[],value=None), "❌ 0")
-    progress(0.60, desc="📊 جلب الاقتباسات (3-layer)...")
     papers = enrich_citations(papers)
-    progress(0.85, desc="🔢 FAISS indexing...")
     build_papers_index(papers)
     ACTIVE_PAPERS = list(papers)
     tbl, choices = build_table(papers)
     recent   = sum(1 for p in papers if p.get("recent"))
     tot_cit  = sum(p.get("citations") or 0 for p in papers)
     zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0)
-    note     = (f"\n\n> ℹ️ **{zero_cit}** ورقة بدون اقتباسات (جديدة أو غير مفهرسة)."
                 if zero_cit else "")
-    md = (f"## ✅ تم جلب **{len(papers)}** ورقة\n\n"
-          f"🆕 جديدة: **{recent}** &nbsp;|&nbsp; 📊 الاقتباسات: **{tot_cit:,}**"
-          +warn+note+f"\n\n---\n\n{tbl}")
     upd = gr.update(choices=choices, value=choices[0] if choices else None)
     progress(1.0)
-    return md, upd, upd, upd, upd, f"✅ {len(papers)} | 🆕 {recent} | 📊 {tot_cit:,}"
 def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
     global ACTIVE_PAPERS
-    if not PAPERS: return "⚠️ اجلب الأوراق أولاً.", gr.update(), "⚠️"
     filtered = []
     for p in PAPERS:
         try:
@@ -806,114 +855,121 @@ def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
         cit = int(p.get("citations") or 0)
         if cit < int(cit_min) or cit > int(cit_max): continue
         filtered.append(p)
-    if   "Newest"  in sort_by or "الأحدث" in sort_by:
-        filtered.sort(key=lambda x:x["published"], reverse=True)
-    elif "Oldest"  in sort_by or "الأقدم" in sort_by:
-        filtered.sort(key=lambda x:x["published"])
-    elif "Most"    in sort_by or "الأكثر" in sort_by:
-        filtered.sort(key=lambda x:x.get("citations") or 0, reverse=True)
-    elif "Least"   in sort_by or "الأقل"  in sort_by:
-        filtered.sort(key=lambda x:x.get("citations") or 0)
     if not filtered:
-        ACTIVE_PAPERS=[]
-        return "❌ لا توجد أوراق تطابق الفلتر.", gr.update(choices=[],value=None), "❌ 0"
     ACTIVE_PAPERS = list(filtered)
     tbl, choices = build_table(filtered)
     tot = sum(p.get("citations") or 0 for p in filtered)
-    md  = (f"## 🔽 **{len(filtered)}** من **{len(PAPERS)}** &nbsp;|&nbsp; "
-           f"📅 {year_from}–{year_to} &nbsp;|&nbsp; "
-           f"📊 {cit_min}–{cit_max} &nbsp;|&nbsp; مجموع {tot:,}\n\n---\n\n{tbl}")
-    return md, gr.update(choices=choices, value=choices[0] if choices else None), \
-           f"🔽 {len(filtered)}/{len(PAPERS)}"
 def gr_search_fetched(query):
-    if not query or not query.strip(): return "⚠️ أدخل كلمة بحث."
-    if not PAPERS: return "⚠️ اجلب الأوراق أولاً."
     results = search_papers(query.strip(), top_k=8)
-    if not results: return f"❌ لا نتائج لـ `{query}`."
-    md = f"## 🔍 `{query}` — {len(results)} نتائج\n\n"
     for r in results:
-        p,s = r["paper"],r["score"]
-        bar = "🟩"*round(s*10)+"⬜"*(10-round(s*10))
-        cit = f" | {cit_badge(p.get('citations'))}" if p.get("citations") else ""
-        md += (f"### {bar} `{s*100:.0f}%` — {p['title']}\n\n"
-               f"👥 {', '.join(p['authors'][:2])} | 📅 {p['published']}{cit}"
-               f" | {p.get('source','')}\n\n"
-               f"> {p['abstract'][:350]}...\n\n"
-               f"🔗 [View]({p['url']})"
-               +(f"  📥 [PDF]({p['pdf_url']})" if p.get("pdf_url") else "")
-               +"\n\n---\n\n")
     return md
 def _get_paper(choice):
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
-    try: return pool[int(choice.split(".")[0])-1]
     except: return None
 def gr_explain(choice, lang_choice):
-    if not choice: return "⚠️ اجلب الأوراق ثم اختر ورقة."
     paper = _get_paper(choice)
-    if not paper: return "⚠️ خطأ في الاختيار."
-    lang   = "ar" if "عربي" in lang_choice else "en"
-    header = (f"# 📄 {paper['title']}\n\n"
-              f"**👥** {', '.join(paper['authors'])} &nbsp;|&nbsp; "
-              f"**📅** {paper['published']} &nbsp;|&nbsp; "
-              f"**📊** {cit_badge(paper.get('citations'))} &nbsp;|&nbsp; "
-              f"**📡** {paper.get('source','arXiv')}\n\n"
-              f"🔗 [View Paper]({paper['url']})"
-              +(f"  📥 [PDF]({paper['pdf_url']})" if paper.get("pdf_url") else "")
-              f"\n\n---\n\n> {paper['abstract']}\n\n---\n\n"
-              f"## 🤖 {'الشرح' if lang=='ar' else 'Explanation'} — Llama 3.3 70B\n\n")
     return header + explain_paper(paper, lang)
 def gr_audio(txt, lang_choice):
-    if not txt or len(txt)<50: return None
-    return text_to_audio(txt, "ar" if "عربي" in lang_choice else "en")
 def gr_save_fav(choice):
-    if not choice: return "⚠️ اختر ورقة أولاً."
     paper = _get_paper(choice)
-    return save_favorite(paper) if paper else "⚠️ خطأ."
 def gr_show_favs():
     favs = load_favorites()
-    if not favs: return "_لا توجد أوراق محفوظة._"
-    lines = [f"⭐ **{p['title']}**\n👥 {p['authors'][0] if p['authors'] else 'N/A'} | "
-             f"📅 {p['published']} | 📡 {p.get('source','')} | "
-             f"📊 {cit_badge(p.get('citations'))} | 🔗 [رابط]({p['url']})"
              for p in favs]
-    return f"### ⭐ المفضلة — {len(favs)} ورقة\n\n" + "\n\n---\n\n".join(lines)
 def gr_compare(ca, cb, lc):
-    if not ca or not cb: return "⚠️ اختر ورقتين أولاً."
     pa = _get_paper(ca); pb = _get_paper(cb)
-    if not pa or not pb: return "⚠️ خطأ في الاختيار."
-    if pa["id"]==pb["id"]: return "⚠️ اختر ورقتين مختلفتين."
-    return compare_papers(pa, pb, "ar" if "عربي" in lc else "en")
 def gr_overview(query, lc):
-    if not PAPERS: return "⚠️ اجلب الأوراق أولاً."
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
-    return (f"## 🌐 نظرة عامة\n\n"
-            f"{summarize_papers(pool, query or 'research', 'ar' if 'عربي' in lc else 'en')}")
 def gr_trends():
-    if not PAPERS: return None, "⚠️ اجلب الأوراق أولاً."
     return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS)
 def gr_bib(style, progress=gr.Progress()):
-    if not PAPERS: return "⚠️ اجلب الأوراق أولاً.", None
-    progress(0.5, desc="توليد...")
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
     text, path = generate_bibliography(pool, style)
     progress(1.0)
-    return f"```\n{text[:3000]+('...' if len(text)>3000 else '')}\n```", path
 def gr_chat_fn(message, history):
     if not message.strip(): return history, ""
     hd = []
     for pair in history:
-        if pair[0]: hd.append({"role":"user","content":pair[0]})
         if pair[1]: hd.append({"role":"assistant","content":pair[1]})
     history.append((message, chat_about_papers(message, hd)))
     return history, ""
@@ -925,185 +981,185 @@ CSS = """
 footer{display:none!important}
 h1{text-align:center}
 .status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0}
-.legend{font-size:.8rem;color:#cbd5e1;background:#1e293b;border-radius:8px;padding:6px 14px;margin-bottom:6px}
-.filter-box{background:#1e293b;border-radius:10px;padding:12px 16px;margin-top:8px}
-.gs-box{background:#1e293b;border-radius:10px;padding:14px 18px;margin-bottom:10px;border:1px solid #334155}
 """
 with gr.Blocks(
     theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
-    title="🔬 Paper Discovery v7.4", css=CSS
 ) as demo:
-    gr.Markdown("# 🔬 Scientific Paper Discovery v7.4\n**arXiv · CrossRef · Llama-3.3-70B · FAISS**")
-    gr.Markdown(
-        "📊 **الاقتباسات:** 🥇 ≥1,000 &nbsp;|&nbsp; 🏆 ≥100 &nbsp;|&nbsp; ⭐ ≥10 &nbsp;|&nbsp; 📄 <10 &nbsp;|&nbsp; · = 0",
-        elem_classes="legend")
-    status_bar = gr.Markdown("_لم يتم جلب أوراق بعد_", elem_classes="status-bar")
     with gr.Tabs():
-        # ── TAB 1: BROWSE / FETCH ──────────────────────────
-        with gr.Tab("🔍 البحث / Browse"):
             with gr.Row():
                 with gr.Column(scale=3):
-                    t_query    = gr.Textbox(label="🔎 الموضوع",
                                            placeholder="ARIMA, inflation, LLM...",
                                            value="economic forecasting")
-                    t_category = gr.Dropdown(label="📂 التصنيف",
                                              choices=list(CATEGORIES.keys()),
-                                             value="📊 الاقتصاد / Economics")
-                    t_source   = gr.Radio(label="📡 المصدر",
-                                         choices=["arXiv","CrossRef","كلاهما / Both"],
                                          value="arXiv")
                 with gr.Column(scale=1):
-                    t_max  = gr.Slider(5,  50,   value=15, step=5,  label="📊 عدد الأوراق")
-                    t_days = gr.Slider(1, 1500, value=365, step=30, label="📅 آخر N يوم")
-                    btn_fetch = gr.Button("🚀 جلب الأوراق", variant="primary", size="lg")
-            papers_table_md = gr.Markdown("_ستظهر النتائج هنا._")
-            paper_selector  = gr.Dropdown(label="📄 اختر ورقة", choices=[], interactive=True)
             with gr.Group(elem_classes="filter-box"):
-                gr.Markdown("### 🔽 فلترة وترتيب")
                 with gr.Row():
-                    f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="📅 من سنة")
-                    f_year_to   = gr.Slider(2000,2026,value=2026,step=1,label="📅 إلى سنة")
                 with gr.Row():
-                    f_cit_min = gr.Slider(0,5000,value=0,   step=5,label="📊 اقتباسات من")
-                    f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="📊 اقتباسات إلى")
                 with gr.Row():
                     f_sort     = gr.Dropdown(choices=SORT_CHOICES,
-                                            value=SORT_CHOICES[2],label="🔃 الترتيب",scale=3)
-                    btn_filter = gr.Button("✅ تطبيق",variant="primary",scale=1)
-            gr.Markdown("---\n### 🔍 بحث دلالي داخلي (FAISS)")
             with gr.Row():
-                search_in_box = gr.Textbox(label="🔍 ابحث في الأوراق المحملة",
                                            placeholder="ARIMA, transformer...",scale=5)
-                btn_search_in = gr.Button("بحث 🔍",scale=1)
             search_in_out = gr.Markdown()
-        # ── TAB 2: GLOBAL SEARCH ✅ ─────────────────────────
-        with gr.Tab("🌐 بحث عالمي / Global Search"):
             gr.Markdown(
-                "### 🌐 ابحث عن أي ورقة بالعنوان أو الكلمات المفتاحية\n\n"
-                "> يستخدم **arXiv relevance** و **CrossRef title search** "
-                "للعثور على الورقة الصحيحة مباشرة."
             )
             with gr.Group(elem_classes="gs-box"):
                 with gr.Row():
                     gs_query  = gr.Textbox(
-                        label="🔎 العنوان أو الكلمات المفتاحية",
-                        placeholder="Attention is All You Need | ARIMA inflation Algeria ...",
                         scale=4)
-                    gs_source = gr.Radio(
-                        label="📡 المصدر",
-                        choices=["arXiv","CrossRef","كلاهما / Both"],
-                        value="كلاهما / Both", scale=2)
-                    gs_max    = gr.Slider(5,30,value=10,step=5,label="📊 عدد النتائج",scale=1)
-                btn_gs = gr.Button("🔎 بحث الآن", variant="primary", size="lg")
-            gs_out = gr.Markdown("_أدخل عنوان ورقة أو كلمات مفتاحية..._")
         # ── TAB 3: EXPLAIN ─────────────────────────────────
-        with gr.Tab("📖 الشرح / Explain"):
             with gr.Row():
-                paper_sel2 = gr.Dropdown(label="📄 اختر الورقة",
                                          choices=[], interactive=True, scale=4)
-                lang_exp   = gr.Radio(LANG_CHOICES, value=LANG_CHOICES[0],
-                                      label="🌐 اللغة", scale=1)
             with gr.Row():
-                btn_explain    = gr.Button("📖 اشرح",    variant="primary")
-                btn_fav        = gr.Button("⭐ حفظ")
-                btn_audio      = gr.Button("🔊 استمع")
-                btn_export_pdf = gr.Button("📄 تصدير PDF", variant="secondary")
             with gr.Row():
                 fav_status = gr.Markdown()
                 pdf_status = gr.Markdown()
-            explanation_out = gr.Markdown("_اجلب الأوراق واختر ورقة._")
-            audio_out       = gr.Audio(label="🔊", type="filepath")
-            pdf_out         = gr.File(label="📄 تحميل PDF")
         # ── TAB 4: COMPARE ─────────────────────────────────
-        with gr.Tab("⚖️ المقارنة / Compare"):
             with gr.Row():
-                cmp_a    = gr.Dropdown(label="📄 الأولى",  choices=[], interactive=True)
-                cmp_b    = gr.Dropdown(label="📄 الثانية", choices=[], interactive=True)
-                lang_cmp = gr.Radio(LANG_CHOICES, value=LANG_CHOICES[0],
-                                    label="🌐 اللغة", scale=1)
-            btn_compare = gr.Button("⚖️ قارن الآن", variant="primary")
-            compare_out = gr.Markdown("_اختر ورقتين._")
         # ── TAB 5: OVERVIEW ────────────────────────────────
-        with gr.Tab("🌐 نظرة عامة"):
             with gr.Row():
-                lang_ov      = gr.Radio(LANG_CHOICES, value=LANG_CHOICES[0],
-                                        label="🌐 اللغة", scale=1)
-                btn_overview = gr.Button("🤖 توليد التقرير", variant="primary", scale=3)
-            overview_out = gr.Markdown("_اجلب الأوراق أولاً._")
         # ── TAB 6: TRENDS ──────────────────────────────────
-        with gr.Tab("📊 الاتجاهات / Trends"):
-            btn_trends  = gr.Button("📊 تحليل الاتجاهات", variant="primary", size="lg")
-            trend_chart = gr.Image(label="📊 لوحة الاتجاهات", type="filepath")
-            trend_stats = gr.Markdown("_اجلب الأوراق أولاً._")
         # ── TAB 7: BIBLIOGRAPHY ────────────────────────────
-        with gr.Tab("📚 المراجع"):
-            bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"], value="APA",
-                                 label="📐 النمط")
-            btn_bib   = gr.Button("📚 توليد المراجع", variant="primary")
             bib_out   = gr.Markdown()
-            bib_file  = gr.File(label="📥 تحميل")
         # ── TAB 8: FAVORITES ───────────────────────────────
-        with gr.Tab("⭐ المفضلة"):
-            btn_show_fav   = gr.Button("📋 عرض المفضلة")
-            favs_md        = gr.Markdown("_اضغط عرض._")
-            btn_export_fav = gr.Button("📥 تصدير CSV", variant="secondary")
-            fav_csv_file   = gr.File(label="📄 CSV")
         # ── TAB 9: AUTO-FETCH ──────────────────────────────
-        with gr.Tab("🔔 تحديث تلقائي"):
             with gr.Row():
-                auto_q        = gr.Textbox(label="🔎 الموضوع",
                                            value="economic forecasting", scale=3)
-                auto_cat      = gr.Dropdown(label="📂 التصنيف",
                                             choices=list(CATEGORIES.keys()),
-                                            value="📊 الاقتصاد / Economics", scale=2)
                 auto_interval = gr.Slider(5,120,value=60,step=5,
-                                         label="⏱️ كل (دقيقة)",scale=1)
             with gr.Row():
-                btn_start_auto  = gr.Button("▶️ بدء",   variant="primary")
-                btn_stop_auto   = gr.Button("⏹️ إيقاف", variant="stop")
-                btn_refresh_log = gr.Button("🔄 السجل")
             auto_status = gr.Markdown()
-            auto_log_md = gr.Markdown("_لا يوجد سجل._")
         # ── TAB 10: CHAT ───────────────────────────────────
-        with gr.Tab("💬 محادثة / Chat"):
-            chatbot_ui = gr.Chatbot(label="مساعد الأبحاث", height=480,
-                                    bubble_full_width=False)
             with gr.Row():
-                chat_in  = gr.Textbox(label="سؤالك", scale=5,
-                                      placeholder="🇸🇦 ما أبرز النتائج؟ | 🇬🇧 Key findings?")
-                btn_send = gr.Button("إرسال ✉️", variant="primary", scale=1)
-            btn_clear = gr.Button("🗑️ مسح", size="sm")
         # ── TAB 11: ABOUT ──────────────────────────────────
-        with gr.Tab("ℹ️ حول"):
             gr.Markdown("""
-## 🔬 Scientific Paper Discovery — v7.4
-### ✅ جديد في v7.4
-| الميزة | التفاصيل |
-|---|---|
-| 🌐 بحث عالمي محسّن | يستخدم `sort_by="relevance"` + `ti:"..."` للعثور على الورقة بالعنوان الدقيق |
-| 📄 تصدير PDF | شرح كامل بتنسيق احترافي بزر واحد |
-### 🔧 مقارنة أوضاع البحث
-| الوضع | يستخدم | مناسب لـ |
 |---|---|---|
-| 🔍 Browse | `sortBy=submittedDate` | استكشاف أحدث أوراق موضوع |
-| 🌐 Global Search | `sortBy=relevance` + `ti:` | البحث عن ورقة بعنوانها |
-| 🔍 FAISS (داخلي) | Cosine similarity | البحث في الأوراق المحملة |
 """)
     # ── WIRING ──────────────────────────────────────────────
@@ -1125,17 +1181,17 @@ with gr.Blocks(
     btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
     gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
-    btn_explain.click(gr_explain,   inputs=[paper_sel2, lang_exp],       outputs=[explanation_out])
-    btn_fav.click(gr_save_fav,      inputs=[paper_sel2],                 outputs=[fav_status])
-    btn_audio.click(gr_audio,       inputs=[explanation_out, lang_exp],  outputs=[audio_out])
     btn_export_pdf.click(gr_export_pdf,
                          inputs=[explanation_out, paper_sel2],
                          outputs=[pdf_out, pdf_status])
-    btn_compare.click(gr_compare,   inputs=[cmp_a, cmp_b, lang_cmp],    outputs=[compare_out])
-    btn_overview.click(gr_overview, inputs=[t_query, lang_ov],          outputs=[overview_out])
     btn_trends.click(gr_trends,     outputs=[trend_chart, trend_stats])
-    btn_bib.click(gr_bib,           inputs=[bib_style],                 outputs=[bib_out, bib_file])
     btn_show_fav.click(gr_show_favs,    outputs=[favs_md])
     btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file])

 # ================================================================
+# Scientific Paper Discovery Bot v7.4 — SyntaxError FIXED
 # ================================================================
 import os, re, time, json, pickle, threading
 import requests
 S2_API_KEY   = os.environ.get("S2_API_KEY", "")
 groq_client  = Groq(api_key=GROQ_API_KEY)
+print("Loading embedder...")
 embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
 _ = embedder.encode(["warmup"])
+print("Embedder ready!")
 PAPERS        = []
 ACTIVE_PAPERS = []
 CURRENT_YEAR  = datetime.now().year
 PERSIST_DIR    = "/tmp"
+FAVORITES_PATH = PERSIST_DIR + "/favorites.pkl"
+SEEN_IDS_PATH  = PERSIST_DIR + "/seen_ids.json"
 os.makedirs(PERSIST_DIR, exist_ok=True)
 CATEGORIES = {
+    "All":         "",
+    "Economics":   "econ",
+    "Quant Fin":   "q-fin",
+    "AI":          "cs.AI",
+    "ML":          "cs.LG",
+    "NLP":         "cs.CL",
+    "Statistics":  "stat",
+    "Biology":     "q-bio",
+    "Physics":     "physics",
+    "Math":        "math",
+    "CS":          "cs",
 }
 CROSSREF_SUBJECTS = {
+    "All":         "",
+    "Economics":   "economics",
+    "Quant Fin":   "finance",
+    "AI":          "artificial intelligence",
+    "ML":          "machine learning",
+    "NLP":         "natural language processing",
+    "Statistics":  "statistics",
+    "Biology":     "biology",
+    "Physics":     "physics",
+    "Math":        "mathematics",
+    "CS":          "computer science",
 }
+LANG_CHOICES = ["Arabic", "English"]
+SORT_CHOICES = ["Newest", "Oldest", "Most Cited", "Least Cited"]
+AR_RULES = """
+- ابدأ كل قسم بـ ## مع سطر فارغ قبله وبعده
+- اكتب كل قسم في فقرة 3-4 جمل بالعربية الفصحى
 - لا تكرر عنوان القسم داخل النص
 """
 # ================================================================
 # HELPERS
 # ================================================================
 def detect_lang(text):
+    try:
+        return "ar" if detect(str(text)[:300]).startswith("ar") else "en"
+    except:
+        return "en"
 def clean_md(text):
     text = re.sub(r"[#*`>\[\]!_~]", "", text)
 def cit_badge(n):
     if n is None or n == "": return "—"
     n = int(n)
+    if n >= 1000: return "🥇 " + "{:,}".format(n)
+    if n >= 100:  return "🏆 " + "{:,}".format(n)
+    if n >= 10:   return "⭐ " + "{:,}".format(n)
+    if n > 0:     return "📄 " + str(n)
     return "·"
 def build_table(papers_list):
+    rows  = "| # | Title | Author | Date | Citations | Source |\n"
     rows += "|---|---|---|---|---|---|\n"
     choices = []
     for i, p in enumerate(papers_list):
+        first = p["authors"][0] if p["authors"] else "N/A"
+        badge = "NEW" if p.get("recent") else "📄"
+        rows += "| {} | {} {} | {} | {} | {} | {} |\n".format(
+            i+1, badge, p["title"], first,
+            p["published"], cit_badge(p.get("citations")),
+            p.get("source","arXiv"))
+        choices.append("{}. {}".format(i+1, p["title"]))
     return rows, choices
 def s2_headers():
     h = {"User-Agent": "ScientificPaperBot/7.4"}
+    if S2_API_KEY:
+        h["x-api-key"] = S2_API_KEY
     return h
 def cr_headers():
     return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"}
 # ================================================================
+# CrossRef date parser — rejects garbage years
 # ================================================================
+def parse_crossref_date(item):
     for field in ["issued", "published", "published-print", "published-online", "created"]:
         dp = (item.get(field) or {}).get("date-parts", [[]])
         if not dp or not dp[0]: continue
             if not (1900 <= year <= CURRENT_YEAR + 1): continue
             month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1))
             day   = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1))
+            return "{:04d}-{:02d}-{:02d}".format(year, month, day)
         except (ValueError, TypeError, IndexError):
             continue
     return "N/A"
     if paper["id"] not in {p["id"] for p in favs}:
         favs.append(paper)
         with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f)
+        return "Saved: " + paper["title"]
+    return "Already saved."
 def export_favorites_csv():
     favs = load_favorites()
     if not favs: return None
+    df = pd.DataFrame([{
+        "Title":     p["title"],
+        "Authors":   ", ".join(p["authors"][:3]),
+        "Date":      p["published"],
+        "Citations": p.get("citations","N/A"),
+        "URL":       p["url"],
+        "Source":    p.get("source","arXiv")
+    } for p in favs])
+    path = PERSIST_DIR + "/favorites.csv"
     df.to_csv(path, index=False, encoding="utf-8-sig")
     return path
 def gr_export_fav(): return export_favorites_csv()
 # ================================================================
+# PDF EXPORT
 # ================================================================
 def export_explanation_pdf(explanation_text, paper_title="paper"):
     if not explanation_text or len(explanation_text) < 30: return None
     safe  = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_")
+    path  = PERSIST_DIR + "/explanation_" + safe + ".pdf"
     doc   = SimpleDocTemplate(path, pagesize=A4,
                               rightMargin=2*cm, leftMargin=2*cm,
+                              topMargin=2*cm,  bottomMargin=2*cm)
+    styles   = getSampleStyleSheet()
+    h2_style = ParagraphStyle("H2", parent=styles["Heading2"],
+                               fontSize=11, textColor=colors.HexColor("#2563eb"),
+                               spaceBefore=14, spaceAfter=6)
+    bd_style = ParagraphStyle("BD", parent=styles["Normal"],
+                               fontSize=10, leading=16, spaceAfter=8)
+    mt_style = ParagraphStyle("MT", parent=styles["Normal"],
+                               fontSize=9, textColor=colors.HexColor("#64748b"))
     story = []
     for line in explanation_text.split("\n"):
         line  = line.strip()
         if not line: story.append(Spacer(1, 6)); continue
         clean = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
+        clean = re.sub(r"\*(.+?)\*",       r"\1", clean)
+        clean = re.sub(r"`(.+?)`",           r"\1", clean)
+        clean = re.sub(r"^#{1,6}\s*",       "",     clean)
         clean = re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]", "", clean).strip()
         if not clean: continue
         if line.startswith("## ") or line.startswith("# "):
                                     color=colors.HexColor("#e2e8f0"), spaceAfter=4))
             story.append(Paragraph(clean, h2_style))
         elif line.startswith(">"):
+            q_st = ParagraphStyle("Q", parent=styles["Normal"],
+                                  fontSize=9, leftIndent=20,
+                                  textColor=colors.HexColor("#475569"), leading=14)
+            story.append(Paragraph(
+                re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]","",line.lstrip(">").strip()),
+                q_st))
         else:
+            story.append(Paragraph(clean, bd_style))
+    story += [
+        Spacer(1, 20),
+        HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0")),
+        Paragraph("Generated by Paper Discovery v7.4 — " +
+                  datetime.now().strftime("%Y-%m-%d %H:%M"), mt_style)
+    ]
     try:
         doc.build(story); return path
     except Exception as e:
+        print("PDF error: " + str(e)); return None
 def gr_export_pdf(explanation_text, choice):
     if not explanation_text or len(explanation_text) < 50:
+        return None, "Explain a paper first."
     title = choice.split(". ", 1)[-1] if choice else "paper"
     path  = export_explanation_pdf(explanation_text, title)
+    return (path, "PDF ready!") if path else (None, "PDF failed.")
 # ================================================================
 # SOURCE 1 — arXiv
+# KEY FIX: sort_by parameter
+#   Browse  → "submittedDate"   latest papers
+#   Global  → "relevance"       exact title match
 # ================================================================
 def fetch_arxiv_papers(query, category, max_results=20, days_back=365,
                        sort_by="submittedDate"):
     parts = []
     words = query.strip().split()
     if len(words) >= 3 and sort_by == "relevance":
+        parts.append('ti:"' + query.strip() + '"')
     elif query.strip():
+        parts.append("all:" + query.strip())
     if category.strip():
+        parts.append("cat:" + category.strip())
+    sq = " AND ".join(parts) if parts else "all:machine learning"
+    params = {
+        "search_query": sq,
+        "start":        0,
+        "max_results":  max_results,
+        "sortBy":       sort_by,
+        "sortOrder":    "descending",
+    }
     try:
         resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30)
         resp.raise_for_status()
+    except Exception as e:
+        print("arXiv error: " + str(e)); return []
     ns_a   = "http://www.w3.org/2005/Atom"
     ns_x   = "http://arxiv.org/schemas/atom"
     root   = ET.fromstring(resp.content)
     cutoff = datetime.now() - timedelta(days=days_back)
     papers = []
+    for entry in root.findall("{" + ns_a + "}entry"):
         try:
+            pid       = entry.find("{" + ns_a + "}id").text.split("/abs/")[-1].strip()
+            title     = entry.find("{" + ns_a + "}title").text.strip().replace("\n"," ")
+            abstract  = entry.find("{" + ns_a + "}summary").text.strip().replace("\n"," ")
+            published = entry.find("{" + ns_a + "}published").text[:10]
+            authors   = [a.find("{" + ns_a + "}name").text
+                         for a in entry.findall("{" + ns_a + "}author")]
             cats = set()
+            pc = entry.find("{" + ns_x + "}primary_category")
             if pc is not None: cats.add(pc.get("term",""))
+            for c in entry.findall("{" + ns_x + "}category"): cats.add(c.get("term",""))
             cats.discard("")
             papers.append({
+                "id":         pid,
+                "title":      title,
+                "authors":    authors[:6],
+                "abstract":   abstract[:1200],
+                "published":  published,
+                "categories": list(cats)[:4],
+                "citations":  None,
+                "url":        "https://arxiv.org/abs/" + pid,
+                "pdf_url":    "https://arxiv.org/pdf/" + pid,
+                "recent":     datetime.strptime(published, "%Y-%m-%d") >= cutoff,
+                "source":     "arXiv",
             })
+        except Exception as e:
+            print("arXiv parse: " + str(e))
     return papers
 # ================================================================
+# SOURCE 2 — CrossRef
 # ================================================================
+def fetch_crossref_papers(query, category_label="", max_results=20,
+                          days_back=365, use_title=False):
     subject    = CROSSREF_SUBJECTS.get(category_label, "")
+    full_query = (query + " " + subject).strip() if subject else query
+    key        = "query.title" if use_title else "query"
     params = {
+        key:    full_query,
+        "rows": min(max_results * 3, 200),
+        "sort": "relevance",
         "select": ("title,author,abstract,published,published-print,"
                    "published-online,issued,created,DOI,"
                    "is-referenced-by-count,link,subject"),
             if r.status_code == 200:
                 items = r.json().get("message",{}).get("items",[]); break
             if r.status_code == 429: time.sleep(2**attempt); continue
+            print("CrossRef " + str(r.status_code)); return []
+        except Exception as e:
+            print("CrossRef attempt " + str(attempt) + ": " + str(e)); time.sleep(1)
     cutoff = datetime.now() - timedelta(days=days_back)
     papers, seen_ids = [], set()
         pub = parse_crossref_date(item)
         if pub == "N/A": continue
         cit     = int(item.get("is-referenced-by-count", 0) or 0)
+        authors = [
+            (a.get("given","") + " " + a.get("family","")).strip()
+            for a in item.get("author",[])[:6]
+        ]
         authors = [a for a in authors if a.strip()] or ["Unknown"]
+        abstract = re.sub(r"<[^>]+>","",
+                          item.get("abstract","No abstract.")).strip()[:1200]
+        doi     = item.get("DOI","")
+        url     = "https://doi.org/" + doi if doi else "#"
+        pid     = doi or re.sub(r"\W","",title)[:40]
         if pid in seen_ids: continue
         seen_ids.add(pid)
+        pdf_url = next((l.get("URL","") for l in item.get("link",[])
+                        if "pdf" in l.get("content-type","").lower()), "")
+        try:    recent = datetime.strptime(pub[:10], "%Y-%m-%d") >= cutoff
         except: recent = False
         papers.append({
+            "id":         pid,
+            "title":      title,
+            "authors":    authors,
+            "abstract":   abstract,
+            "published":  pub[:10],
             "categories": item.get("subject",[])[:3],
+            "citations":  cit,
+            "url":        url,
+            "pdf_url":    pdf_url,
+            "recent":     recent,
+            "source":     "CrossRef",
         })
     papers.sort(key=lambda x: x["citations"], reverse=True)
     return papers
 # ================================================================
+# GLOBAL PAPER SEARCH — relevance sorted
 # ================================================================
 def global_paper_search(query, source_choice, max_results=10):
     if not query or not query.strip():
+        return "Enter a title or keywords."
+    q = query.strip(); papers = []
+    if source_choice in ("arXiv", "Both"):
         papers += fetch_arxiv_papers(q, "", int(max_results), 3650,
                                      sort_by="relevance")
+    if source_choice in ("CrossRef", "Both"):
         papers += fetch_crossref_papers(q, "", int(max_results), 3650,
                                         use_title=True)
     if not papers:
+        return "No results for: " + q
     seen, unique = set(), []
     for p in papers:
         key = re.sub(r"\W","",p["title"].lower())[:60]
         if key not in seen: seen.add(key); unique.append(p)
     unique.sort(key=lambda x: x.get("citations") or 0, reverse=True)
+    NL = "\n"
+    md = "## Search Results: " + q + NL + NL
+    md += "**" + str(len(unique)) + " papers found**" + NL + NL + "---" + NL + NL
     for i, p in enumerate(unique, 1):
+        cit  = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else ""
+        cats = " | ".join(p.get("categories",[])[:2])
+        auth = ", ".join(p["authors"][:3])
+        abst = p["abstract"][:450]
+        link = "[View](" + p["url"] + ")"
+        pdf  = ("  [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
+        src  = p.get("source","")
+        md  += ("### " + str(i) + ". " + p["title"] + NL + NL +
+                auth + " | " + p["published"] + cit + " | " + src +
+                (" | " + cats if cats else "") + NL + NL +
+                "> " + abst + "..." + NL + NL +
+                link + pdf + NL + NL + "---" + NL + NL)
     return md
 # ================================================================
     id_map, batch_ids = {}, []
     for p in arxiv_papers:
         clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
+        id_map[clean] = p
+        batch_ids.append("arXiv:" + clean)
     for i in range(0, len(batch_ids), 500):
         try:
             r = requests.post(
                 for item in r.json():
                     if not item: continue
                     ext   = item.get("externalIds") or {}
+                    clean = re.sub(r"v\d+$","",
+                                   ext.get("ArXiv","").split("/")[-1].strip())
                     if clean and clean in id_map:
                         c = item.get("citationCount")
                         if c is not None: id_map[clean]["citations"] = int(c)
             elif r.status_code == 429: time.sleep(4)
+        except Exception as e: print("S2 batch: " + str(e))
     for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]:
         clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
         for attempt in range(2):
             try:
                 r = requests.get(
+                    "https://api.semanticscholar.org/graph/v1/paper/arXiv:" + clean,
+                    params={"fields":"citationCount"},
+                    headers=s2_headers(), timeout=10)
+                if r.status_code == 200:
+                    c = r.json().get("citationCount")
+                    p["citations"] = int(c) if c else 0; break
+                if r.status_code == 429: time.sleep(2**attempt); continue
+                p["citations"] = 0; break
+            except: p["citations"] = 0; break
         time.sleep(0.12)
     for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]:
         try:
             r = requests.get("https://api.crossref.org/works",
+                params={"query.title": p["title"], "rows": 1,
+                        "select": "is-referenced-by-count,title"},
                 headers=cr_headers(), timeout=8)
+            if r.status_code == 200:
+                items = r.json().get("message",{}).get("items",[])
                 if items:
+                    found = (items[0].get("title") or [""])[0].lower()
+                    qw    = set(p["title"].lower().split()[:5])
+                    fw    = set(found.split()[:10])
+                    p["citations"] = (
+                        int(items[0].get("is-referenced-by-count",0) or 0)
+                        if len(qw & fw) >= 2 else 0)
+                else: p["citations"] = 0
+            else: p["citations"] = 0
             time.sleep(0.12)
+        except: p["citations"] = 0
     for p in papers:
         if p.get("citations") is None: p["citations"] = 0
     return papers
     global FAISS_INDEX, PAPERS
     PAPERS = papers
     if not papers: FAISS_INDEX = None; return
+    texts = [p["title"] + " " + p["abstract"] for p in papers]
     embs  = embedder.encode(texts, convert_to_numpy=True,
                             normalize_embeddings=True).astype("float32")
+    idx   = faiss.IndexFlatIP(embs.shape[1])
+    idx.add(embs)
     FAISS_INDEX = idx
 def search_papers(query, top_k=5):
     qe = embedder.encode([query], convert_to_numpy=True,
                          normalize_embeddings=True).astype("float32")
     scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS)))
+    return [{"paper": PAPERS[i], "score": float(s)}
+            for s, i in zip(scores[0], ids[0]) if i >= 0 and float(s) > 0.1]
 # ================================================================
 # AUTO-FETCH
         new_ps = [p for p in papers if p["id"] not in seen]
         if new_ps:
             save_seen_ids(seen | {p["id"] for p in papers})
+            AUTO_LOG.append(
+                "[" + datetime.now().strftime("%H:%M") + "] NEW " +
+                str(len(new_ps)) + " — " + query)
+            if len(AUTO_LOG) > 20: AUTO_LOG.pop(0)
 def start_auto_fetch(query, cat_label, interval_min):
     global AUTO_RUNNING
+    if AUTO_RUNNING: return "Already running."
     AUTO_RUNNING = True
+    threading.Thread(
+        target=auto_fetch_worker,
+        args=(query, CATEGORIES.get(cat_label,""), int(interval_min)*60),
+        daemon=True).start()
+    return "Auto-fetch started every " + str(interval_min) + " min for: " + query
 def stop_auto_fetch():
+    global AUTO_RUNNING; AUTO_RUNNING = False; return "Stopped."
 def get_auto_log():
+    return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "No log."
 # ================================================================
 # TRENDS
 # ================================================================
 def analyze_trends(papers):
+    if not papers: return None, "No papers."
     date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A")
     stopwords   = {"the","a","an","of","in","for","on","with","and","or","to","using",
                    "based","via","from","by","is","are","our","we","this","that","which",
                    "towards","approach","method","new","into","over","learning","deep",
                    "model","models","data","neural","large","language","paper","study",
                    "analysis","results","show","also","can","used","two","its","their"}
+    all_words   = [w.lower() for p in papers
+                   for w in re.findall(r"[a-zA-Z]{4,}", p["title"])
+                   if w.lower() not in stopwords]
+    top_words   = Counter(all_words).most_common(15)
+    sources     = Counter(p.get("source","arXiv") for p in papers)
+    cit_papers  = [p for p in papers if (p.get("citations") or 0)>0]
+    top_cited   = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:10]
+    all_auth    = [a for p in papers for a in p["authors"][:3]]
+    top_authors = Counter(all_auth).most_common(10)
+    cvals       = [p["citations"] for p in cit_papers]
+    buckets     = [0,1,5,10,50,100,500,10000]
+    blabels     = ["0","1-4","5-9","10-49","50-99","100-499","500+"]
+    bcounts     = ([sum(1 for c in cvals if buckets[i]<=c<buckets[i+1])
+                    for i in range(len(buckets)-1)] if cvals else [0]*7)
+    avg_cit     = round(sum(cvals)/max(len(cvals),1),1) if cvals else 0
+    total_cit   = sum(p.get("citations") or 0 for p in papers)
     C  = ["#3b82f6","#8b5cf6","#10b981","#f59e0b","#ef4444","#06b6d4",
           "#ec4899","#14b8a6","#f97316","#a855f7","#22d3ee","#84cc16",
           "#fbbf24","#34d399","#f87171"]
     BG,PNL,BR,W = "#0f172a","#1e293b","#334155","white"
+    fig, axes   = plt.subplots(2, 3, figsize=(20,12))
     fig.patch.set_facecolor(BG)
+    fig.suptitle("Research Trends", color=W, fontsize=16, fontweight="bold", y=1.01)
     def style(ax):
         ax.set_facecolor(PNL)
         for sp in ax.spines.values(): sp.set_edgecolor(BR)
         ax.tick_params(colors=W, labelsize=8)
+    ax = axes[0,0]; style(ax)
     if date_counts:
+        ms,cs = zip(*sorted(date_counts.items()))
+        ms,cs = list(ms), list(cs)
+        bars  = ax.bar(ms, cs, color=C[0], edgecolor="#60a5fa", lw=0.8)
         for b,c in zip(bars,cs):
+            ax.text(b.get_x()+b.get_width()/2, b.get_height()+.05, str(c),
+                    ha="center", va="bottom", color=W, fontsize=8)
+        if len(cs) > 2:
+            z = np.polyfit(range(len(cs)), cs, 1)
+            ax.plot(ms, np.poly1d(z)(range(len(cs))), "--",
+                    color="#f59e0b", lw=1.5, alpha=.8, label="Trend")
+            ax.legend(fontsize=8, facecolor=PNL, labelcolor=W)
+    ax.set_title("Papers per Month",  color=W, fontsize=12, fontweight="bold", pad=10)
+    ax.set_ylabel("Count",            color=W, fontsize=9)
+    ax.tick_params(rotation=45)
+    ax = axes[0,1]; style(ax)
     if top_words:
+        wds,wcts = zip(*top_words)
+        ax.barh(list(wds), list(wcts), color=C[:len(wds)], edgecolor="#475569", lw=.6)
+        for b,c in zip(ax.patches, wcts):
+            ax.text(b.get_width()+.1, b.get_y()+b.get_height()/2, str(c),
+                    va="center", color=W, fontsize=8)
+    ax.set_title("Top Keywords", color=W, fontsize=12, fontweight="bold", pad=10)
+    ax.set_xlabel("Frequency",   color=W, fontsize=9)
+    ax = axes[0,2]; ax.set_facecolor(PNL)
     if sources:
+        sl,sv = zip(*sources.items())
+        _,txts,ats = ax.pie(sv, labels=sl, autopct="%1.0f%%",
+                            colors=C[:len(sl)], startangle=90,
+                            textprops={"color":W,"fontsize":10},
+                            wedgeprops={"edgecolor":BR,"linewidth":1.5})
         for at in ats: at.set_color(W); at.set_fontsize(9)
+    ax.set_title("Source Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
+    ax = axes[1,0]; style(ax)
     if top_cited:
+        lbls = [(p["title"][:35]+"..." if len(p["title"])>35 else p["title"])
+                for p in top_cited]
+        cv   = [p["citations"] for p in top_cited]
+        ax.barh(lbls[::-1], cv[::-1], color=C[1], edgecolor="#475569", lw=.6)
+        mx = max(cv) if cv else 1
+        for b,c in zip(ax.patches, cv[::-1]):
+            ax.text(b.get_width()+mx*.01, b.get_y()+b.get_height()/2,
+                    "{:,}".format(c), va="center", color=W, fontsize=8)
+        ax.set_xlabel("Citations", color=W, fontsize=9)
     else:
+        ax.text(.5,.5,"No citation data", ha="center", va="center",
+                color="#94a3b8", fontsize=11, transform=ax.transAxes)
+    ax.set_title("Top 10 Cited", color=W, fontsize=12, fontweight="bold", pad=10)
+    ax = axes[1,1]; style(ax)
     if any(bcounts):
+        ax.bar(blabels, bcounts, color=C[2], edgecolor="#475569", lw=.8)
+        for b,c in zip(ax.patches, bcounts):
+            if c > 0:
+                ax.text(b.get_x()+b.get_width()/2, b.get_height()+.1, str(c),
+                        ha="center", va="bottom", color=W, fontsize=9)
+        ax.set_xlabel("Citation Range", color=W, fontsize=9)
+        ax.set_ylabel("Papers",         color=W, fontsize=9)
+        ax.annotate("Avg " + str(avg_cit) + " | Total " + "{:,}".format(total_cit),
+                    xy=(.98,.96), xycoords="axes fraction",
+                    ha="right", va="top", color="#94a3b8", fontsize=8)
     else:
+        ax.text(.5,.5,"No citation data", ha="center", va="center",
+                color="#94a3b8", fontsize=11, transform=ax.transAxes)
+    ax.set_title("Citation Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
+    ax = axes[1,2]; style(ax)
     if top_authors:
+        an,ac = zip(*top_authors)
+        ax.barh(list(an)[::-1], list(ac)[::-1], color=C[3], edgecolor="#475569", lw=.6)
+        for b,c in zip(ax.patches, list(ac)[::-1]):
+            ax.text(b.get_width()+.05, b.get_y()+b.get_height()/2, str(c),
+                    va="center", color=W, fontsize=8)
+        ax.set_xlabel("Papers", color=W, fontsize=9)
+    ax.set_title("Top Authors", color=W, fontsize=12, fontweight="bold", pad=10)
     plt.tight_layout(pad=3)
+    path = PERSIST_DIR + "/trends.png"
+    plt.savefig(path, bbox_inches="tight", dpi=150, facecolor=BG)
+    plt.close()
+    top5  = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:5]
+    stats = ("### Stats\n\n| Metric | Value |\n|---|---|\n" +
+             "| Total | **" + str(len(papers)) + "** |\n" +
+             "| New | **" + str(sum(1 for p in papers if p.get("recent"))) + "** |\n" +
+             "| Citations | **" + "{:,}".format(total_cit) + "** |\n" +
+             "| Average | **" + str(avg_cit) + "** |\n\n")
     if top5:
+        stats += "### Top Cited\n\n"
         for i,p in enumerate(top5,1):
+            stats += (str(i) + ". [" + p["title"] + "](" + p["url"] + ")" +
+                      " — **" + "{:,}".format(p["citations"]) + "**\n\n")
     return path, stats
 # ================================================================
             model="llama-3.3-70b-versatile",
             messages=messages, temperature=0.3, max_tokens=max_tokens)
         return r.choices[0].message.content.strip()
+    except Exception as e: return "LLM Error: " + str(e)
 def explain_paper(paper, lang="ar"):
     cit = paper.get("citations","N/A")
+    if lang == "ar":
         return fix_ar_format(_llm([
+            {"role":"system","content": "أنت خبير أكاديمي يشرح الأبحاث بالعربية الفصحى.\n" + AR_RULES},
             {"role":"user","content":
+             "اشرح الورقة:\nالعنوان: " + paper["title"] + "\n" +
+             "المؤلفون: " + ", ".join(paper["authors"][:3]) + "\n" +
+             "التاريخ: " + paper["published"] + " | الاقتباسات: " + str(cit) + "\n" +
+             "الملخص: " + paper["abstract"] + "\n\n" +
+             "## موضوع الورقة\n\n## المشكلة\n\n## المنهجية\n\n" +
+             "## النتائج\n\n## الأهمية\n\n## التطبيقات"}]))
     return _llm([{"role":"user","content":
+        "Explain:\nTitle: " + paper["title"] + "\nAuthors: " +
+        ", ".join(paper["authors"][:3]) + "\nDate: " + paper["published"] +
+        " | Citations: " + str(cit) + "\nAbstract: " + paper["abstract"] + "\n\n" +
+        "## Topic\n## Problem\n## Methodology\n## Findings\n## Contribution\n## Applications"}])
 def compare_papers(pa, pb, lang="ar"):
+    body = ("Paper A: " + pa["title"] + " | Citations: " + str(pa.get("citations","N/A")) +
+            "\n" + pa["abstract"][:500] + "\n\nPaper B: " +
+            pb["title"] + " | Citations: " + str(pb.get("citations","N/A")) +
+            "\n" + pb["abstract"][:500])
+    if lang == "ar":
         return fix_ar_format(_llm([{"role":"user","content":
+            "قارن بين الورقتين.\n" + AR_RULES + "\n\n" + body + "\n\n" +
+            "## الهدف\n\n## المنهجية\n\n## النتائج\n\n" +
+            "## القوة\n\n## القيود\n\n## الخلاصة"}], 1400))
     return _llm([{"role":"user","content":
+        "Compare:\n" + body + "\n\n" +
+        "## Topic\n## Methodology\n## Results\n## Strengths\n## Limits\n## Verdict"}], 1400)
 def summarize_papers(papers, topic, lang="ar"):
+    text = "".join(
+        str(i) + ". " + p["title"] + " (" + p["published"] + "): " +
+        p["abstract"][:300] + "...\n\n"
+        for i,p in enumerate(papers[:8],1))
+    if lang == "ar":
         return fix_ar_format(_llm([{"role":"user","content":
+            "نظرة عامة أكاديمية حول \"" + topic + "\".\n" + AR_RULES +
+            "\n\n" + text + "\n\n" +
+            "## الاتجاهات\n\n## أبرز الأوراق\n\n" +
+            "## المواضيع المشتركة\n\n## الفجوات"}], 900))
     return _llm([{"role":"user","content":
+        "Academic overview of \"" + topic + "\":\n" + text + "\n\n" +
         "## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900)
 def generate_bibliography(papers, style="APA"):
         auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "")
         year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d."
         t,u  = p["title"], p["url"]
+        if style == "APA":
+            entries.append(str(i) + ". " + auth + " (" + year + "). *" + t + "*. " + u)
+        elif style == "IEEE":
             ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "")
+            entries.append("[" + str(i) + "] " + ae + ', "' + t + '," ' + year + ". [Online]: " + u)
+        elif style == "Chicago":
+            entries.append(str(i) + ". " + auth + '. "' + t + '." (' + year + "). " + u)
         else:
             key = re.sub(r"\W","", (p["authors"][0].split()[-1]
+                                     if p["authors"] else "Auth")) + year
+            entries.append("@article{" + key + str(i) + ",\n  title={" + t +
+                           "},\n  author={" + auth + "},\n  year={" + year +
+                           "},\n  url={" + u + "}\n}")
     bib  = "\n\n".join(entries)
+    path = PERSIST_DIR + "/bibliography_" + style + ".txt"
+    with open(path, "w", encoding="utf-8") as f: f.write(bib)
     return bib, path
 def chat_about_papers(question, history):
     if not PAPERS:
+        return ("يرجى جلب الأوراق أولاً." if detect_lang(question)=="ar"
+                else "Fetch papers first.")
+    lang     = detect_lang(question)
+    relevant = search_papers(question, top_k=4)
+    context  = ""
     if relevant:
+        context = ("الأوراق ذات الصلة:\n\n" if lang=="ar" else "Relevant papers:\n\n")
         for r in relevant:
+            p   = r["paper"]
+            cit = (" | " + str(p["citations"]) + " citations") if p.get("citations") else ""
+            context += ("**" + p["title"] + "** (" + p["published"] + ")" + cit +
+                        "\n" + p["abstract"][:400] + "\n🔗 " + p["url"] + "\n\n")
+    sys_msg = (("أنت مساعد بحثي. أجب بالعربية الفصحى.\n" + AR_RULES) if lang=="ar"
+               else "You are an academic assistant. Answer in English.")
     msgs = [{"role":"system","content":sys_msg}]
     for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]})
     msgs.append({"role":"user","content":
+                 (context + "\nسؤال: " + question) if context else question})
     out = _llm(msgs, 800)
     return fix_ar_format(out) if lang=="ar" else out
     clean = clean_md(text)
     if not clean: return None
     try:
+        tts  = gTTS(text=clean, lang=lang, slow=False)
+        path = PERSIST_DIR + "/audio_" + lang + ".mp3"
+        tts.save(path); return path
+    except Exception as e: print("TTS: " + str(e)); return None
 # ================================================================
 # GRADIO HANDLERS
 def gr_fetch(query, category_label, max_results, days_back, source_choice,
              progress=gr.Progress()):
     global ACTIVE_PAPERS
+    progress(0.05, desc="Connecting...")
     papers, warn = [], ""
+    if source_choice in ("arXiv", "Both"):
+        progress(0.15, desc="Fetching arXiv...")
         papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""),
                                      int(max_results), int(days_back),
                                      sort_by="submittedDate")
+    if source_choice in ("CrossRef", "Both"):
+        progress(0.35, desc="Fetching CrossRef...")
         cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back))
+        if not cr: warn = "\n\n> CrossRef: no results."
         papers += cr
     seen, unique = set(), []
     for p in papers:
         if key not in seen: seen.add(key); unique.append(p)
     papers = unique
     if not papers:
+        return ("No results." + warn,
+                gr.update(choices=[], value=None), gr.update(choices=[], value=None),
+                gr.update(choices=[], value=None), gr.update(choices=[], value=None),
+                "0 papers")
+    progress(0.60, desc="Fetching citations...")
     papers = enrich_citations(papers)
+    progress(0.85, desc="FAISS indexing...")
     build_papers_index(papers)
     ACTIVE_PAPERS = list(papers)
     tbl, choices = build_table(papers)
     recent   = sum(1 for p in papers if p.get("recent"))
     tot_cit  = sum(p.get("citations") or 0 for p in papers)
     zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0)
+    note     = ("\n\n> " + str(zero_cit) + " papers with 0 citations (new/unindexed)."
                 if zero_cit else "")
+    md = ("## Fetched **" + str(len(papers)) + "** papers\n\n" +
+          "New: **" + str(recent) + "** | Citations: **" +
+          "{:,}".format(tot_cit) + "**" + warn + note +
+          "\n\n---\n\n" + tbl)
     upd = gr.update(choices=choices, value=choices[0] if choices else None)
     progress(1.0)
+    return md, upd, upd, upd, upd, str(len(papers)) + " papers | " + "{:,}".format(tot_cit) + " cit."
 def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
     global ACTIVE_PAPERS
+    if not PAPERS: return "Fetch papers first.", gr.update(), "0"
     filtered = []
     for p in PAPERS:
         try:
         cit = int(p.get("citations") or 0)
         if cit < int(cit_min) or cit > int(cit_max): continue
         filtered.append(p)
+    if   sort_by == "Newest":     filtered.sort(key=lambda x: x["published"], reverse=True)
+    elif sort_by == "Oldest":     filtered.sort(key=lambda x: x["published"])
+    elif sort_by == "Most Cited": filtered.sort(key=lambda x: x.get("citations") or 0, reverse=True)
+    elif sort_by == "Least Cited":filtered.sort(key=lambda x: x.get("citations") or 0)
     if not filtered:
+        ACTIVE_PAPERS = []
+        return "No matching papers.", gr.update(choices=[], value=None), "0"
     ACTIVE_PAPERS = list(filtered)
     tbl, choices = build_table(filtered)
     tot = sum(p.get("citations") or 0 for p in filtered)
+    md  = ("## " + str(len(filtered)) + "/" + str(len(PAPERS)) + " papers" +
+           " | " + str(year_from) + "-" + str(year_to) +
+           " | cit " + str(cit_min) + "-" + str(cit_max) +
+           " | total " + "{:,}".format(tot) + "\n\n---\n\n" + tbl)
+    return md, gr.update(choices=choices, value=choices[0] if choices else None),            str(len(filtered)) + "/" + str(len(PAPERS))
 def gr_search_fetched(query):
+    if not query or not query.strip(): return "Enter a query."
+    if not PAPERS: return "Fetch papers first."
     results = search_papers(query.strip(), top_k=8)
+    if not results: return "No results for: " + query
+    NL = "\n"
+    md = "## Search: " + query + " — " + str(len(results)) + " results" + NL + NL
     for r in results:
+        p,s  = r["paper"], r["score"]
+        bar  = "green " * round(s*10)
+        cit  = (" | " + cit_badge(p.get("citations"))) if p.get("citations") else ""
+        link = "[View](" + p["url"] + ")"
+        pdf  = ("  [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
+        md  += ("### " + "{:.0f}".format(s*100) + "% — " + p["title"] + NL + NL +
+                ", ".join(p["authors"][:2]) + " | " + p["published"] + cit +
+                " | " + p.get("source","") + NL + NL +
+                "> " + p["abstract"][:350] + "..." + NL + NL +
+                link + pdf + NL + NL + "---" + NL + NL)
     return md
 def _get_paper(choice):
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
+    try: return pool[int(choice.split(".")[0]) - 1]
     except: return None
 def gr_explain(choice, lang_choice):
+    if not choice: return "Fetch papers and select one."
     paper = _get_paper(choice)
+    if not paper: return "Selection error."
+    lang = "ar" if "Arabic" in lang_choice else "en"
+    NL   = "\n"
+    # ✅ FIX: No backslash inside f-string — use concatenation
+    pdf_link = ("  [PDF](" + paper["pdf_url"] + ")") if paper.get("pdf_url") else ""
+    header = ("# " + paper["title"] + NL + NL +
+              "**Authors:** " + ", ".join(paper["authors"]) + NL + NL +
+              "**Date:** " + paper["published"] +
+              "  |  **Citations:** " + cit_badge(paper.get("citations")) +
+              "  |  **Source:** " + paper.get("source","arXiv") + NL + NL +
+              "[View Paper](" + paper["url"] + ")" + pdf_link + NL + NL +
+              "---" + NL + NL +
+              "> " + paper["abstract"] + NL + NL +
+              "---" + NL + NL +
+              "## Explanation (Llama 3.3 70B)" + NL + NL)
     return header + explain_paper(paper, lang)
 def gr_audio(txt, lang_choice):
+    if not txt or len(txt) < 50: return None
+    return text_to_audio(txt, "ar" if "Arabic" in lang_choice else "en")
 def gr_save_fav(choice):
+    if not choice: return "Select a paper first."
     paper = _get_paper(choice)
+    return save_favorite(paper) if paper else "Error."
 def gr_show_favs():
     favs = load_favorites()
+    if not favs: return "No saved papers."
+    NL    = "\n"
+    lines = [("**" + p["title"] + "**" + NL +
+              (p["authors"][0] if p["authors"] else "N/A") +
+              " | " + p["published"] + " | " + p.get("source","") +
+              " | " + cit_badge(p.get("citations")) +
+              " | [Link](" + p["url"] + ")")
              for p in favs]
+    return ("### Favorites — " + str(len(favs)) + " papers" + NL + NL +
+            (NL + NL + "---" + NL + NL).join(lines))
 def gr_compare(ca, cb, lc):
+    if not ca or not cb: return "Select two papers."
     pa = _get_paper(ca); pb = _get_paper(cb)
+    if not pa or not pb: return "Selection error."
+    if pa["id"] == pb["id"]: return "Select two different papers."
+    return compare_papers(pa, pb, "ar" if "Arabic" in lc else "en")
 def gr_overview(query, lc):
+    if not PAPERS: return "Fetch papers first."
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
+    return ("## Overview\n\n" +
+            summarize_papers(pool, query or "research",
+                             "ar" if "Arabic" in lc else "en"))
 def gr_trends():
+    if not PAPERS: return None, "Fetch papers first."
     return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS)
 def gr_bib(style, progress=gr.Progress()):
+    if not PAPERS: return "Fetch papers first.", None
+    progress(0.5, desc="Generating...")
     pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
     text, path = generate_bibliography(pool, style)
     progress(1.0)
+    short = text[:3000] + ("..." if len(text)>3000 else "")
+    return "```\n" + short + "\n```", path
 def gr_chat_fn(message, history):
     if not message.strip(): return history, ""
     hd = []
     for pair in history:
+        if pair[0]: hd.append({"role":"user",     "content":pair[0]})
         if pair[1]: hd.append({"role":"assistant","content":pair[1]})
     history.append((message, chat_about_papers(message, hd)))
     return history, ""
 footer{display:none!important}
 h1{text-align:center}
 .status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0}
+.legend{font-size:.8rem;color:#cbd5e1;background:#1e293b;
+        border-radius:8px;padding:6px 14px;margin-bottom:6px}
+.filter-box{background:#1e293b;border-radius:10px;
+            padding:12px 16px;margin-top:8px}
+.gs-box{background:#1e293b;border-radius:10px;padding:14px 18px;
+        margin-bottom:10px;border:1px solid #334155}
 """
 with gr.Blocks(
     theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
+    title="Scientific Paper Discovery v7.4", css=CSS
 ) as demo:
+    gr.Markdown("# Scientific Paper Discovery v7.4\narXiv · CrossRef · Llama-3.3-70B · FAISS")
+    gr.Markdown("Citations: 🥇 >=1000 | 🏆 >=100 | ⭐ >=10 | 📄 <10 | · = 0",
+                elem_classes="legend")
+    status_bar = gr.Markdown("No papers loaded yet.", elem_classes="status-bar")
     with gr.Tabs():
+        # ── TAB 1: BROWSE ──────────────────────────────────
+        with gr.Tab("Browse / Search"):
             with gr.Row():
                 with gr.Column(scale=3):
+                    t_query    = gr.Textbox(label="Topic",
                                            placeholder="ARIMA, inflation, LLM...",
                                            value="economic forecasting")
+                    t_category = gr.Dropdown(label="Category",
                                              choices=list(CATEGORIES.keys()),
+                                             value="Economics")
+                    t_source   = gr.Radio(label="Source",
+                                         choices=["arXiv","CrossRef","Both"],
                                          value="arXiv")
                 with gr.Column(scale=1):
+                    t_max  = gr.Slider(5,  50,   value=15, step=5,  label="Max papers")
+                    t_days = gr.Slider(1, 1500, value=365, step=30, label="Last N days")
+                    btn_fetch = gr.Button("Fetch Papers", variant="primary", size="lg")
+            papers_table_md = gr.Markdown("Results appear here.")
+            paper_selector  = gr.Dropdown(label="Select paper", choices=[], interactive=True)
             with gr.Group(elem_classes="filter-box"):
+                gr.Markdown("### Filter & Sort")
                 with gr.Row():
+                    f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="Year from")
+                    f_year_to   = gr.Slider(2000,2026,value=2026,step=1,label="Year to")
                 with gr.Row():
+                    f_cit_min = gr.Slider(0,5000,value=0,   step=5,label="Citations min")
+                    f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="Citations max")
                 with gr.Row():
                     f_sort     = gr.Dropdown(choices=SORT_CHOICES,
+                                            value="Most Cited",label="Sort",scale=3)
+                    btn_filter = gr.Button("Apply",variant="primary",scale=1)
+            gr.Markdown("---\n### Semantic Search (FAISS — in loaded papers)")
             with gr.Row():
+                search_in_box = gr.Textbox(label="Search in loaded papers",
                                            placeholder="ARIMA, transformer...",scale=5)
+                btn_search_in = gr.Button("Search",scale=1)
             search_in_out = gr.Markdown()
+        # ── TAB 2: GLOBAL SEARCH ───────────────────────────
+        with gr.Tab("Global Search"):
             gr.Markdown(
+                "### Search any paper by title or keywords\n\n"
+                "> Uses arXiv **relevance** sort + CrossRef **title** search.\n"
+                "> Example: `Attention is All You Need`"
             )
             with gr.Group(elem_classes="gs-box"):
                 with gr.Row():
                     gs_query  = gr.Textbox(
+                        label="Title or keywords",
+                        placeholder="Attention is All You Need | ARIMA forecasting ...",
                         scale=4)
+                    gs_source = gr.Radio(label="Source",
+                                        choices=["arXiv","CrossRef","Both"],
+                                        value="Both", scale=2)
+                    gs_max    = gr.Slider(5,30,value=10,step=5,label="Max results",scale=1)
+                btn_gs = gr.Button("Search Now", variant="primary", size="lg")
+            gs_out = gr.Markdown("Enter a title or keywords...")
         # ── TAB 3: EXPLAIN ─────────────────────────────────
+        with gr.Tab("Explain"):
             with gr.Row():
+                paper_sel2 = gr.Dropdown(label="Select paper",
                                          choices=[], interactive=True, scale=4)
+                lang_exp   = gr.Radio(LANG_CHOICES, value="Arabic",
+                                      label="Language", scale=1)
             with gr.Row():
+                btn_explain    = gr.Button("Explain",     variant="primary")
+                btn_fav        = gr.Button("Save Fav")
+                btn_audio      = gr.Button("Listen")
+                btn_export_pdf = gr.Button("Export PDF",  variant="secondary")
             with gr.Row():
                 fav_status = gr.Markdown()
                 pdf_status = gr.Markdown()
+            explanation_out = gr.Markdown("Fetch papers and select one.")
+            audio_out       = gr.Audio(label="Audio", type="filepath")
+            pdf_out         = gr.File(label="Download PDF")
         # ── TAB 4: COMPARE ─────────────────────────────────
+        with gr.Tab("Compare"):
             with gr.Row():
+                cmp_a    = gr.Dropdown(label="Paper A", choices=[], interactive=True)
+                cmp_b    = gr.Dropdown(label="Paper B", choices=[], interactive=True)
+                lang_cmp = gr.Radio(LANG_CHOICES, value="Arabic",
+                                    label="Language", scale=1)
+            btn_compare = gr.Button("Compare", variant="primary")
+            compare_out = gr.Markdown("Select two papers.")
         # ── TAB 5: OVERVIEW ────────────────────────────────
+        with gr.Tab("Overview"):
             with gr.Row():
+                lang_ov      = gr.Radio(LANG_CHOICES, value="Arabic",
+                                        label="Language", scale=1)
+                btn_overview = gr.Button("Generate Report", variant="primary", scale=3)
+            overview_out = gr.Markdown("Fetch papers first.")
         # ── TAB 6: TRENDS ──────────────────────────────────
+        with gr.Tab("Trends"):
+            btn_trends  = gr.Button("Analyze Trends", variant="primary", size="lg")
+            trend_chart = gr.Image(label="Trends Dashboard", type="filepath")
+            trend_stats = gr.Markdown("Fetch papers first.")
         # ── TAB 7: BIBLIOGRAPHY ────────────────────────────
+        with gr.Tab("Bibliography"):
+            bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"],
+                                 value="APA", label="Style")
+            btn_bib   = gr.Button("Generate Bibliography", variant="primary")
             bib_out   = gr.Markdown()
+            bib_file  = gr.File(label="Download")
         # ── TAB 8: FAVORITES ───────────────────────────────
+        with gr.Tab("Favorites"):
+            btn_show_fav   = gr.Button("Show Favorites")
+            favs_md        = gr.Markdown("Press to show.")
+            btn_export_fav = gr.Button("Export CSV", variant="secondary")
+            fav_csv_file   = gr.File(label="CSV File")
         # ── TAB 9: AUTO-FETCH ──────────────────────────────
+        with gr.Tab("Auto-Fetch"):
             with gr.Row():
+                auto_q        = gr.Textbox(label="Topic",
                                            value="economic forecasting", scale=3)
+                auto_cat      = gr.Dropdown(label="Category",
                                             choices=list(CATEGORIES.keys()),
+                                            value="Economics", scale=2)
                 auto_interval = gr.Slider(5,120,value=60,step=5,
+                                         label="Every (min)",scale=1)
             with gr.Row():
+                btn_start_auto  = gr.Button("Start", variant="primary")
+                btn_stop_auto   = gr.Button("Stop",  variant="stop")
+                btn_refresh_log = gr.Button("Refresh Log")
             auto_status = gr.Markdown()
+            auto_log_md = gr.Markdown("No log.")
         # ── TAB 10: CHAT ───────────────────────────────────
+        with gr.Tab("Chat"):
+            chatbot_ui = gr.Chatbot(label="Research Assistant",
+                                    height=480, bubble_full_width=False)
             with gr.Row():
+                chat_in  = gr.Textbox(label="Question", scale=5,
+                                      placeholder="Key findings? | ما أبرز النتائج؟")
+                btn_send = gr.Button("Send", variant="primary", scale=1)
+            btn_clear = gr.Button("Clear", size="sm")
         # ── TAB 11: ABOUT ──────────────────────────────────
+        with gr.Tab("About"):
             gr.Markdown("""
+## Scientific Paper Discovery — v7.4
+### Search Mode Comparison
+| Mode | sortBy | Best for |
 |---|---|---|
+| Browse tab | `submittedDate` | Latest papers on a topic |
+| Global Search | `relevance` + `ti:` | Finding a paper by exact title |
+| FAISS (internal) | Cosine similarity | Semantic search in loaded papers |
+### v7.4 Fixes
+- **arXiv Global Search** now uses `sortBy=relevance` + `ti:"..."` prefix
+- **CrossRef Global Search** now uses `query.title` for precise title matching
+- **SyntaxError fix**: removed backslashes from inside f-strings
 """)
     # ── WIRING ──────────────────────────────────────────────
     btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
     gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
+    btn_explain.click(gr_explain,   inputs=[paper_sel2, lang_exp],      outputs=[explanation_out])
+    btn_fav.click(gr_save_fav,      inputs=[paper_sel2],                outputs=[fav_status])
+    btn_audio.click(gr_audio,       inputs=[explanation_out, lang_exp], outputs=[audio_out])
     btn_export_pdf.click(gr_export_pdf,
                          inputs=[explanation_out, paper_sel2],
                          outputs=[pdf_out, pdf_status])
+    btn_compare.click(gr_compare,   inputs=[cmp_a, cmp_b, lang_cmp],   outputs=[compare_out])
+    btn_overview.click(gr_overview, inputs=[t_query, lang_ov],         outputs=[overview_out])
     btn_trends.click(gr_trends,     outputs=[trend_chart, trend_stats])
+    btn_bib.click(gr_bib,           inputs=[bib_style],                outputs=[bib_out, bib_file])
     btn_show_fav.click(gr_show_favs,    outputs=[favs_md])
     btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file])