File size: 1,454 Bytes
9b8e6f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import rispy
import pandas as pd
import io
class RisFileException(Exception):
pass
class RisFile:
'''
ris文件解析器, 使用rispy模块
'''
def __init__(self, file):
self.file = file
self.fHandle = None
def _fetch_info(self, kwd: list):
collected = []
for entry in rispy.load(self.fHanlde):
rec = {}
for key in kwd:
if not key in self.keywords:
raise RisFileException(f'Not valid info that can be parsed from ris file, all keywords: {self.keywords}')
rec[key] = entry[key]
collected.append(rec)
return pd.DataFrame(collected)
def parse_info(self, kwd: list):
'''
解析给定区域的数值, 如果字段不存在则抛出错误
'''
if isinstance(self.file, str):
with open(self.file, 'r') as self.fHanlde:
return self._fetch_info(kwd)
elif isinstance(self.file, io.StringIO):
self.fHanlde = self.file
return self._fetch_info(kwd)
@property
def keywords(self):
'''
调用rispy给出可解析的所有字段
'''
return set(rispy.TAG_KEY_MAPPING.values())
if __name__ == "__main__":
risFile = RisFile(file='/home/silen/git_proj/ReviewGPT/test/G1/Paper035')
print(risFile.keywords)
print(risFile.parse_info(kwd=['doi', 'title', 'abstract']))
|