ScholarAgent

Sleeping

App Files Files Community

pdx97 commited on Mar 8

Commit

6cdbdc2

verified ·

1 Parent(s): f0e61d0

Updated fetch function in app.py

Browse files

Force strict keyword search using "title" and "abstract"

Files changed (1) hide show

app.py +81 -24

app.py CHANGED Viewed

@@ -4,13 +4,58 @@ import yaml
 import gradio as gr
 from smolagents import CodeAgent, HfApiModel, tool
 @tool
-def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
-    """Fetches the latest research papers from arXiv based on provided keywords.
     Args:
         keywords: A list of keywords to search for relevant papers.
-        num_results: The number of papers to fetch (default is 3).
     Returns:
         A list of dictionaries containing:
@@ -21,33 +66,45 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
             - "link": A direct link to the paper on arXiv.
     """
     try:
-        print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")  # Debug input
-        #Properly format query with +AND+ for multiple keywords
-        query = "+AND+".join([f"all:{kw}" for kw in keywords])
-        query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
-        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
-        print(f"DEBUG: Query URL - {url}")  # Debug URL
-        feed = feedparser.parse(url)
         papers = []
         for entry in feed.entries:
-            papers.append({
-                "title": entry.title,
-                "authors": ", ".join(author.name for author in entry.authors),
-                "year": entry.published[:4],  # Extract year
-                "abstract": entry.summary,
-                "link": entry.link
-            })
-        return papers
     except Exception as e:
-        print(f"ERROR: {str(e)}")  # Debug errors
-        return [f"Error fetching research papers: {str(e)}"]

 import gradio as gr
 from smolagents import CodeAgent, HfApiModel, tool
+# @tool
+# def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
+#     """Fetches the latest research papers from arXiv based on provided keywords.
+#     Args:
+#         keywords: A list of keywords to search for relevant papers.
+#         num_results: The number of papers to fetch (default is 3).
+#     Returns:
+#         A list of dictionaries containing:
+#             - "title": The title of the research paper.
+#             - "authors": The authors of the paper.
+#             - "year": The publication year.
+#             - "abstract": A summary of the research paper.
+#             - "link": A direct link to the paper on arXiv.
+#     """
+#     try:
+#         print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")  # Debug input
+#         #Properly format query with +AND+ for multiple keywords
+#         query = "+AND+".join([f"all:{kw}" for kw in keywords])
+#         query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
+#         url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
+#         print(f"DEBUG: Query URL - {url}")  # Debug URL
+#         feed = feedparser.parse(url)
+#         papers = []
+#         for entry in feed.entries:
+#             papers.append({
+#                 "title": entry.title,
+#                 "authors": ", ".join(author.name for author in entry.authors),
+#                 "year": entry.published[:4],  # Extract year
+#                 "abstract": entry.summary,
+#                 "link": entry.link
+#             })
+#         return papers
+#     except Exception as e:
+#         print(f"ERROR: {str(e)}")  # Debug errors
+#         return [f"Error fetching research papers: {str(e)}"]
 @tool
+def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
+    """Fetches the latest research papers from arXiv based on **strict keyword presence**.
     Args:
         keywords: A list of keywords to search for relevant papers.
+        num_results: The number of papers to fetch (default is 5).
     Returns:
         A list of dictionaries containing:
             - "link": A direct link to the paper on arXiv.
     """
     try:
+        print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
+        # Force strict keyword search using "title" and "abstract" only
+        query = "+AND+".join([f"(ti:\"{kw}\"+OR+abs:\"{kw}\")" for kw in keywords])
+        query_encoded = urllib.parse.quote(query)  # Encode query for URL
+        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
+        print(f"DEBUG: Query URL - {url}")
+        feed = feedparser.parse(url)
         papers = []
         for entry in feed.entries:
+            title = entry.title.lower()
+            abstract = entry.summary.lower()
+            # Ensure ALL keywords appear in **either** title or abstract
+            if all(kw.lower() in title or kw.lower() in abstract for kw in keywords):
+                papers.append({
+                    "title": entry.title,
+                    "authors": ", ".join(author.name for author in entry.authors),
+                    "year": entry.published[:4],  # Extract year
+                    "abstract": entry.summary,
+                    "link": entry.link
+                })
+        # If no relevant papers found, return "No results found."
+        if not papers:
+            return [{"error": "No results found. Try different keywords."}]
+        # Prioritize papers where keywords appear in the **title**
+        papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
+        return papers[:num_results]  # Return top-matching papers
     except Exception as e:
+        print(f"ERROR: {str(e)}")
+        return [{"error": f"Error fetching research papers: {str(e)}"}]