Spaces:
Runtime error
Runtime error
| import os | |
| from paperqa import Docs | |
| import requests | |
| import paperqa | |
| import json | |
| from bs4 import BeautifulSoup | |
| import urllib.request | |
| class PDBQuery: | |
| def __init__(self, pdbid): | |
| self.pdbid = pdbid | |
| self.pubids = [] | |
| self.refs = [] | |
| self.citations = [] | |
| self.pubids = [] | |
| def get_pdb_descriptions(self,pdbid): | |
| pdbrest = f"https://data.rcsb.org/rest/v1/core/entry/{pdbid}" | |
| r = requests.get(pdbrest) | |
| self.data = r.json() | |
| return self.data | |
| def get_pubids(self): | |
| #extract pubmed IDs of the articles | |
| for paper in self.result['citation']: | |
| for k,v in paper.items(): | |
| if k=="pdbx_database_id_pub_med": | |
| self.pubids.append(v) | |
| return self.pubids | |
| def create_citation(self): | |
| ##Create citation using pubmed ids | |
| self.result = self.get_pdb_descriptions(self.pdbid) | |
| self.pubids = self.get_pubids() | |
| for pid in self.pubids: | |
| puburl = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pid}&retmode=json' | |
| response = requests.get(puburl) | |
| search_response = requests.get(puburl).json() | |
| article = str(pid) | |
| author_list = [] | |
| pubmed_id = search_response["result"][article]["uid"] | |
| title = search_response["result"][article]["title"] | |
| authors = search_response["result"][article]["authors"] | |
| journal = search_response["result"][article]["source"] | |
| pub_date = search_response["result"][article]["pubdate"] | |
| volume = search_response["result"][article]["volume"] | |
| issue = search_response["result"][article]["issue"] | |
| pages = search_response["result"][article]["pages"] | |
| doi = search_response["result"][article]["elocationid"] | |
| for i in authors: | |
| all_authors = i["name"] | |
| author_list.append(all_authors) | |
| names = str(author_list).replace("'", "").replace("[","").replace("]","") | |
| corrected_title = title.replace("<i>", "").replace("</i>", "") | |
| self.citations.append(f"{names}.{journal} {pub_date[0:4]};{volume}({issue}):{pages}. {doi}") | |
| return self.citations | |
| def write_webdata(self): | |
| url_list = [f'https://pubmed.ncbi.nlm.nih.gov/{pid}' for pid in self.pubids] | |
| #finally add a basic PDB databank search page | |
| url_list.append(f"https://data.rcsb.org/rest/v1/core/entry/{self.pdbid}") | |
| with open('web_data.txt', "w", encoding="utf-8") as out: | |
| for url in url_list: | |
| r = requests.get(url) | |
| bs = BeautifulSoup(r.text, 'html.parser') | |
| ## remove html syntax ---> this improved the results much more | |
| html = bs.prettify() | |
| for i in range(0, len(html)): | |
| out.write(html[i]) | |