Spaces:
Sleeping
Sleeping
| from scholarly import scholarly | |
| import csv | |
| from scholarly import ProxyGenerator, scholarly | |
| import os | |
| import requests | |
| api_key = os.getenv('ELSEVIER_API_KEY') | |
| # Initialize a global variable to track if the proxy setup has been done | |
| proxy_setup_done = False | |
| def setup_proxy(): | |
| global proxy_setup_done | |
| # Check if the proxy setup has already been done | |
| if not proxy_setup_done: | |
| # Set up a ProxyGenerator object to use free proxies | |
| pg = ProxyGenerator() | |
| pg.FreeProxies() | |
| scholarly.use_proxy(pg) | |
| # Mark the setup as done | |
| proxy_setup_done = True | |
| print("Proxy setup completed.") | |
| else: | |
| print("Proxy setup was already completed earlier in this session.") | |
| # Example usage | |
| setup_proxy() | |
| def fetch_papers(search_string, min_results=8): | |
| search_query = scholarly.search_pubs(search_string) | |
| papers_details = [] | |
| for _ in range(min_results): | |
| try: | |
| paper = next(search_query) | |
| paper_details = { | |
| 'title': paper['bib']['title'], | |
| 'author': paper['bib'].get('author'), | |
| 'pub_year': paper['bib'].get('pub_year'), | |
| 'publication_url': paper.get('pub_url', 'Not Available'), | |
| 'journal_name': paper['bib'].get('journal', 'Not Available'), | |
| # Attempting to extract DOI, publication date, and making an educated guess on paper type | |
| 'doi': paper.get('doi', 'Not Available'), | |
| 'publication_date': paper['bib'].get('pub_year', 'Not Available'), # Simplified to publication year | |
| 'paper_type': 'Journal' if 'journal' in paper['bib'] else 'Conference' if 'conference' in paper['bib'] else 'Primary Study' # Simplistic categorization | |
| } | |
| papers_details.append(paper_details) | |
| except StopIteration: | |
| break # Exit if there are no more results | |
| return papers_details | |
| def save_papers_to_csv(papers_details, filename='papers.csv'): | |
| fieldnames = ['title', 'author', 'pub_year', 'publication_url', 'journal_name', 'doi', 'publication_date', 'paper_type'] | |
| with open(filename, mode='w', newline='', encoding='utf-8') as file: | |
| writer = csv.DictWriter(file, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for paper in papers_details: | |
| writer.writerow(paper) | |
| def search_elsevier(search_string, start_year, end_year, limit): | |
| url = "https://api.elsevier.com/content/search/scopus" | |
| headers = { | |
| "X-ELS-APIKey": api_key, | |
| "Accept": "application/json" | |
| } | |
| query = f"TITLE-ABS-KEY({search_string}) AND PUBYEAR = {start_year}" | |
| params = { | |
| "query": query, | |
| "count": limit, | |
| } | |
| response = requests.get(url, headers=headers, params=params) | |
| if response.status_code == 200: | |
| response_data = response.json() | |
| papers = response_data.get('search-results', {}).get('entry', []) | |
| parsed_papers = [] | |
| for paper in papers: | |
| parsed_paper = { | |
| "affiliation-country": next((affil.get("affiliation-country", "Not Available") for affil in paper.get("affiliation", [])), "Not Available"), | |
| "affilname": next((affil.get("affilname", "Not Available") for affil in paper.get("affiliation", [])), "Not Available"), | |
| "creator": paper.get("dc:creator", "Not Available"), | |
| "identifier": paper.get("dc:identifier", "Not Available"), | |
| "title": paper.get("dc:title", "Not Available"), | |
| "link": next((link["@href"] for link in paper.get("link", []) if link["@ref"] == "scopus"), "Not Available"), | |
| "year": paper.get("prism:coverDate", "Not Available").split("-")[0], | |
| "openaccess": paper.get("openaccess", "0") == "1", | |
| "publicationName": paper.get("prism:publicationName", "Not Available"), | |
| "aggregationType": paper.get("prism:aggregationType", "Not Available"), | |
| "volume": paper.get("prism:volume", "Not Available"), | |
| "doi": paper.get("prism:doi", "Not Available") | |
| } | |
| parsed_papers.append(parsed_paper) | |
| return parsed_papers | |
| else: | |
| print(f"Failed to fetch papers: {response.status_code} {response.text}") | |
| return {"error": "Failed to fetch papers from Elsevier", "status_code": response.status_code, "message": response.text} |