ScholarAgent

Sleeping

pdx97 commited on Mar 13

Commit

1fea399

verified ·

1 Parent(s): 1d916f0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -347,7 +347,7 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
         # ✅ Construct the query for ArXiv API
         query = "+AND+".join([f"all:{kw}" for kw in keywords])
         query_encoded = urllib.parse.quote(query)
-        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=50&sortBy=submittedDate&sortOrder=descending"
         # ✅ Fetch papers from ArXiv
         feed = feedparser.parse(url)
@@ -370,7 +370,7 @@ def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
         # ✅ TF-IDF Vectorization
         corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
-        vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
         tfidf_matrix = vectorizer.fit_transform(corpus)
         # ✅ Transform Query into TF-IDF Vector

         # ✅ Construct the query for ArXiv API
         query = "+AND+".join([f"all:{kw}" for kw in keywords])
         query_encoded = urllib.parse.quote(query)
+        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=5&sortBy=submittedDate&sortOrder=descending"
         # ✅ Fetch papers from ArXiv
         feed = feedparser.parse(url)
         # ✅ TF-IDF Vectorization
         corpus = [paper["title"] + " " + paper["abstract"] for paper in papers]
+        vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'),max_features=3000)
         tfidf_matrix = vectorizer.fit_transform(corpus)
         # ✅ Transform Query into TF-IDF Vector