import rispy import pandas as pd import io class RisFileException(Exception): pass class RisFile: ''' ris文件解析器, 使用rispy模块 ''' def __init__(self, file): self.file = file self.fHandle = None def _fetch_info(self, kwd: list): collected = [] for entry in rispy.load(self.fHanlde): rec = {} for key in kwd: if not key in self.keywords: raise RisFileException(f'Not valid info that can be parsed from ris file, all keywords: {self.keywords}') rec[key] = entry[key] collected.append(rec) return pd.DataFrame(collected) def parse_info(self, kwd: list): ''' 解析给定区域的数值, 如果字段不存在则抛出错误 ''' if isinstance(self.file, str): with open(self.file, 'r') as self.fHanlde: return self._fetch_info(kwd) elif isinstance(self.file, io.StringIO): self.fHanlde = self.file return self._fetch_info(kwd) @property def keywords(self): ''' 调用rispy给出可解析的所有字段 ''' return set(rispy.TAG_KEY_MAPPING.values()) if __name__ == "__main__": risFile = RisFile(file='/home/silen/git_proj/ReviewGPT/test/G1/Paper035') print(risFile.keywords) print(risFile.parse_info(kwd=['doi', 'title', 'abstract']))