ReviewGPT / utils /ris_parser.py
Silen Wang
v0.1 demo
9b8e6f6
import rispy
import pandas as pd
import io
class RisFileException(Exception):
pass
class RisFile:
'''
ris文件解析器, 使用rispy模块
'''
def __init__(self, file):
self.file = file
self.fHandle = None
def _fetch_info(self, kwd: list):
collected = []
for entry in rispy.load(self.fHanlde):
rec = {}
for key in kwd:
if not key in self.keywords:
raise RisFileException(f'Not valid info that can be parsed from ris file, all keywords: {self.keywords}')
rec[key] = entry[key]
collected.append(rec)
return pd.DataFrame(collected)
def parse_info(self, kwd: list):
'''
解析给定区域的数值, 如果字段不存在则抛出错误
'''
if isinstance(self.file, str):
with open(self.file, 'r') as self.fHanlde:
return self._fetch_info(kwd)
elif isinstance(self.file, io.StringIO):
self.fHanlde = self.file
return self._fetch_info(kwd)
@property
def keywords(self):
'''
调用rispy给出可解析的所有字段
'''
return set(rispy.TAG_KEY_MAPPING.values())
if __name__ == "__main__":
risFile = RisFile(file='/home/silen/git_proj/ReviewGPT/test/G1/Paper035')
print(risFile.keywords)
print(risFile.parse_info(kwd=['doi', 'title', 'abstract']))