File size: 2,984 Bytes
b15b045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65a6215
b15b045
 
65a6215
b15b045
 
 
 
 
 
 
 
 
 
65a6215
b15b045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b42a81
b15b045
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from paperqa import Docs
import requests
import paperqa
import json
from bs4 import BeautifulSoup
import urllib.request

class PDBQuery:

    def __init__(self, pdbid):
        self.pdbid = pdbid
        self.pubids = []
        self.refs = []
        self.citations = []
        self.pubids = []

    def get_pdb_descriptions(self,pdbid):

        pdbrest = f"https://data.rcsb.org/rest/v1/core/entry/{pdbid}"
        r = requests.get(pdbrest)
        self.data = r.json() 
        return self.data

    def get_pubids(self):
        #extract pubmed IDs of the articles
        
        for paper in self.result['citation']:
            for k,v in paper.items():
                if k=="pdbx_database_id_pub_med":
                    self.pubids.append(v)
                    
        return self.pubids

    def create_citation(self):
        ##Create citation using pubmed ids
        self.result = self.get_pdb_descriptions(self.pdbid)
    
        self.pubids = self.get_pubids()

        for pid in self.pubids:
          
          puburl = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pid}&retmode=json'
          response = requests.get(puburl)
          search_response = requests.get(puburl).json()
      
          article = str(pid)
          author_list = []
          
          pubmed_id = search_response["result"][article]["uid"]
          title = search_response["result"][article]["title"]
          authors = search_response["result"][article]["authors"] 
          journal = search_response["result"][article]["source"]
          pub_date = search_response["result"][article]["pubdate"]
          volume = search_response["result"][article]["volume"]
          issue = search_response["result"][article]["issue"]
          pages = search_response["result"][article]["pages"]
          doi = search_response["result"][article]["elocationid"]

    

          for i in authors: 
              all_authors = i["name"]
              author_list.append(all_authors)

          names = str(author_list).replace("'", "").replace("[","").replace("]","")
          corrected_title = title.replace("<i>", "").replace("</i>", "")

          self.citations.append(f"{names}.{journal} {pub_date[0:4]};{volume}({issue}):{pages}. {doi}")

        return self.citations
        

    def write_webdata(self):

        url_list = [f'https://pubmed.ncbi.nlm.nih.gov/{pid}' for pid in self.pubids]
        #finally add a basic PDB databank search page
        url_list.append(f"https://data.rcsb.org/rest/v1/core/entry/{self.pdbid}")

        with open('web_data.txt', "w", encoding="utf-8") as out:
            for url in url_list:
                r = requests.get(url)
                bs = BeautifulSoup(r.text, 'html.parser')
                ## remove html syntax ---> this improved the results much more
                html = bs.prettify()  

                for i in range(0, len(html)):
                    out.write(html[i])