Spaces:
Sleeping
Sleeping
Updated fetch function in app.py
Browse filesForce strict keyword search using "title" and "abstract"
app.py
CHANGED
|
@@ -4,13 +4,58 @@ import yaml
|
|
| 4 |
import gradio as gr
|
| 5 |
from smolagents import CodeAgent, HfApiModel, tool
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
@tool
|
| 8 |
-
def fetch_latest_arxiv_papers(keywords: list, num_results: int =
|
| 9 |
-
"""Fetches the latest research papers from arXiv based on
|
| 10 |
|
| 11 |
Args:
|
| 12 |
keywords: A list of keywords to search for relevant papers.
|
| 13 |
-
num_results: The number of papers to fetch (default is
|
| 14 |
|
| 15 |
Returns:
|
| 16 |
A list of dictionaries containing:
|
|
@@ -21,33 +66,45 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
|
|
| 21 |
- "link": A direct link to the paper on arXiv.
|
| 22 |
"""
|
| 23 |
try:
|
| 24 |
-
print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
|
| 25 |
-
|
| 26 |
-
#
|
| 27 |
-
query = "+AND+".join([f"
|
| 28 |
-
query_encoded = urllib.parse.quote(query) # Encode
|
| 29 |
-
|
| 30 |
-
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=
|
| 31 |
-
|
| 32 |
-
print(f"DEBUG: Query URL - {url}")
|
| 33 |
-
|
| 34 |
-
feed = feedparser.parse(url)
|
| 35 |
|
|
|
|
| 36 |
papers = []
|
|
|
|
| 37 |
for entry in feed.entries:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
return papers
|
| 47 |
|
| 48 |
except Exception as e:
|
| 49 |
-
print(f"ERROR: {str(e)}")
|
| 50 |
-
return [f"Error fetching research papers: {str(e)}"]
|
| 51 |
|
| 52 |
|
| 53 |
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from smolagents import CodeAgent, HfApiModel, tool
|
| 6 |
|
| 7 |
+
# @tool
|
| 8 |
+
# def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
|
| 9 |
+
# """Fetches the latest research papers from arXiv based on provided keywords.
|
| 10 |
+
|
| 11 |
+
# Args:
|
| 12 |
+
# keywords: A list of keywords to search for relevant papers.
|
| 13 |
+
# num_results: The number of papers to fetch (default is 3).
|
| 14 |
+
|
| 15 |
+
# Returns:
|
| 16 |
+
# A list of dictionaries containing:
|
| 17 |
+
# - "title": The title of the research paper.
|
| 18 |
+
# - "authors": The authors of the paper.
|
| 19 |
+
# - "year": The publication year.
|
| 20 |
+
# - "abstract": A summary of the research paper.
|
| 21 |
+
# - "link": A direct link to the paper on arXiv.
|
| 22 |
+
# """
|
| 23 |
+
# try:
|
| 24 |
+
# print(f"DEBUG: Searching arXiv papers with keywords: {keywords}") # Debug input
|
| 25 |
+
|
| 26 |
+
# #Properly format query with +AND+ for multiple keywords
|
| 27 |
+
# query = "+AND+".join([f"all:{kw}" for kw in keywords])
|
| 28 |
+
# query_encoded = urllib.parse.quote(query) # Encode spaces and special characters
|
| 29 |
+
|
| 30 |
+
# url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
|
| 31 |
+
|
| 32 |
+
# print(f"DEBUG: Query URL - {url}") # Debug URL
|
| 33 |
+
|
| 34 |
+
# feed = feedparser.parse(url)
|
| 35 |
+
|
| 36 |
+
# papers = []
|
| 37 |
+
# for entry in feed.entries:
|
| 38 |
+
# papers.append({
|
| 39 |
+
# "title": entry.title,
|
| 40 |
+
# "authors": ", ".join(author.name for author in entry.authors),
|
| 41 |
+
# "year": entry.published[:4], # Extract year
|
| 42 |
+
# "abstract": entry.summary,
|
| 43 |
+
# "link": entry.link
|
| 44 |
+
# })
|
| 45 |
+
|
| 46 |
+
# return papers
|
| 47 |
+
|
| 48 |
+
# except Exception as e:
|
| 49 |
+
# print(f"ERROR: {str(e)}") # Debug errors
|
| 50 |
+
# return [f"Error fetching research papers: {str(e)}"]
|
| 51 |
+
|
| 52 |
@tool
|
| 53 |
+
def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
|
| 54 |
+
"""Fetches the latest research papers from arXiv based on **strict keyword presence**.
|
| 55 |
|
| 56 |
Args:
|
| 57 |
keywords: A list of keywords to search for relevant papers.
|
| 58 |
+
num_results: The number of papers to fetch (default is 5).
|
| 59 |
|
| 60 |
Returns:
|
| 61 |
A list of dictionaries containing:
|
|
|
|
| 66 |
- "link": A direct link to the paper on arXiv.
|
| 67 |
"""
|
| 68 |
try:
|
| 69 |
+
print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
|
| 70 |
+
|
| 71 |
+
# Force strict keyword search using "title" and "abstract" only
|
| 72 |
+
query = "+AND+".join([f"(ti:\"{kw}\"+OR+abs:\"{kw}\")" for kw in keywords])
|
| 73 |
+
query_encoded = urllib.parse.quote(query) # Encode query for URL
|
| 74 |
+
|
| 75 |
+
url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=20&sortBy=submittedDate&sortOrder=descending"
|
| 76 |
+
|
| 77 |
+
print(f"DEBUG: Query URL - {url}")
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
feed = feedparser.parse(url)
|
| 80 |
papers = []
|
| 81 |
+
|
| 82 |
for entry in feed.entries:
|
| 83 |
+
title = entry.title.lower()
|
| 84 |
+
abstract = entry.summary.lower()
|
| 85 |
+
|
| 86 |
+
# Ensure ALL keywords appear in **either** title or abstract
|
| 87 |
+
if all(kw.lower() in title or kw.lower() in abstract for kw in keywords):
|
| 88 |
+
papers.append({
|
| 89 |
+
"title": entry.title,
|
| 90 |
+
"authors": ", ".join(author.name for author in entry.authors),
|
| 91 |
+
"year": entry.published[:4], # Extract year
|
| 92 |
+
"abstract": entry.summary,
|
| 93 |
+
"link": entry.link
|
| 94 |
+
})
|
| 95 |
+
|
| 96 |
+
# If no relevant papers found, return "No results found."
|
| 97 |
+
if not papers:
|
| 98 |
+
return [{"error": "No results found. Try different keywords."}]
|
| 99 |
+
|
| 100 |
+
# Prioritize papers where keywords appear in the **title**
|
| 101 |
+
papers.sort(key=lambda x: sum(kw.lower() in x["title"].lower() for kw in keywords), reverse=True)
|
| 102 |
|
| 103 |
+
return papers[:num_results] # Return top-matching papers
|
| 104 |
|
| 105 |
except Exception as e:
|
| 106 |
+
print(f"ERROR: {str(e)}")
|
| 107 |
+
return [{"error": f"Error fetching research papers: {str(e)}"}]
|
| 108 |
|
| 109 |
|
| 110 |
|