| import requests | |
| import xml.etree.ElementTree as ET | |
| from scholarly import scholarly | |
| class DataLoader: | |
| def __init__(self): | |
| print("DataLoader Init") | |
| def fetch_arxiv_papers(self, query): | |
| """ | |
| Fetches top 5 research papers from ArXiv based on the user query. | |
| If <5 papers are found, expands the search using related topics. | |
| Returns: | |
| list: A list of dictionaries containing paper details (title, summary, link). | |
| """ | |
| def search_arxiv(query): | |
| """Helper function to query ArXiv API.""" | |
| url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5" | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| root = ET.fromstring(response.text) | |
| return [ | |
| { | |
| "title": entry.find("{http://www.w3.org/2005/Atom}title").text, | |
| "summary": entry.find("{http://www.w3.org/2005/Atom}summary").text, | |
| "link": entry.find("{http://www.w3.org/2005/Atom}id").text | |
| } | |
| for entry in root.findall("{http://www.w3.org/2005/Atom}entry") | |
| ] | |
| return [] | |
| papers = search_arxiv(query) | |
| if len(papers) < 5 and self.search_agent: | |
| related_topics_response = self.search_agent.generate_reply( | |
| messages=[{"role": "user", "content": f"Suggest 3 related research topics for '{query}'"}] | |
| ) | |
| related_topics = related_topics_response.get("content", "").split("\n") | |
| for topic in related_topics: | |
| topic = topic.strip() | |
| if topic and len(papers) < 5: | |
| new_papers = search_arxiv(topic) | |
| papers.extend(new_papers) | |
| papers = papers[:5] | |
| return papers | |
| def fetch_google_scholar_papers(self, query): | |
| """ | |
| Fetches top 5 research papers from Google Scholar. | |
| Returns: | |
| list: A list of dictionaries containing paper details (title, summary, link) | |
| """ | |
| papers = [] | |
| search_results = scholarly.search_pubs(query) | |
| for i, paper in enumerate(search_results): | |
| if i >= 5: | |
| break | |
| papers.append({ | |
| "title": paper["bib"]["title"], | |
| "summary": paper["bib"].get("abstract", "No summary available"), | |
| "link": paper.get("pub_url", "No link available") | |
| }) | |
| return papers | |