File size: 3,611 Bytes
c4ad2cf
 
 
 
9b8e6f6
 
 
 
 
 
 
c4ad2cf
9b8e6f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4ad2cf
9b8e6f6
c4ad2cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
'''
这个文件里的所有内容其实应该精简以后全部放在ReviewerBot下
'''

from json import loads
import pandas as pd
import io

from utils.review  import Reviewer
from utils.pubmed  import PubMedFetcher
from utils.ris_parser  import RisFile
from utils.pdf_parser  import PdfFile


def get_paper_info(inputMethod, email=None, pmids=None, ris_data=None):
    '''
    解析文章信息准备用于解读
    '''

    if inputMethod == 'PMID':
        pmids = pmids.strip().split('\n')
        fetcher = PubMedFetcher(email=email, pmids=pmids)
        paper_info = pd.DataFrame(fetcher.fetch_abstract())
    elif inputMethod == 'RIS File':
        fileHandle = io.StringIO(ris_data.decode('utf-8'))
        risFile = RisFile(file=fileHandle)
        paper_info = risFile.parse_info(kwd=['doi', 'title', 'abstract']).rename(columns={
            'doi': 'DOI',
            'title': 'Title',
            'abstract': 'Abstract'
        })
    
    return paper_info


def review(task, paper_info, prompts, openai_key, review_model):
    '''
    调用已有函数进行文献工作:
    1. 文献纳入判断(基于摘要)
    2. 文献小结(基于摘要)
    '''

    reviewer = Reviewer(api_key=openai_key, model=review_model)
    if task == 'Screen': # 准入任务
        answers = []
        for _, rec in paper_info.iterrows():
            response = reviewer.screen(prompts, rec['Abstract'])
            answer = loads(response['choices'][0]['message']['content'])
            if 'PMID' in rec:
                answers.append('\n'.join([
                    f'**PMID: {rec["PMID"]}**',
                    f'- Title: {rec["Title"]}',
                    f'- Inclusion: {answer["Inclusion"]}',
                    '- Explanation:',
                    '\n'.join([f'    + {exp}' for exp in answer["Explanation"]])
                ]))
            elif 'DOI' in rec:
                answers.append('\n'.join([
                    f'**DOI: {rec["DOI"]}**',
                    f'- Title: {rec["Title"]}',
                    f'- Inclusion: {answer["Inclusion"]}',
                    '- Explanation:',
                    '\n'.join([f'    + {exp}' for exp in answer["Explanation"]])
                ]))
            else:
                raise Exception('No PMID nor DOI in record.')
        return '\n\n------------------------\n\n'.join(answers)
    
    elif task == 'Summarise':
        papers = []
        for _, rec in paper_info.iterrows():
            if 'PMID' in rec:
                papers.append((rec['PMID'], rec['Abstract']))
            elif 'DOI' in rec:
                papers.append((rec['DOI'], rec['Abstract']))
            else:
                raise Exception('No PMID nor DOI in record.')
        response = reviewer.summarise(papers, prompts)
        return response['choices'][0]['message']['content']


def parse_pdf_info(pdf_data, openai_key):
    '''
    调用模块解析PDF, 单独放出来, 解析只需要一次
    '''
    fileHandle = io.BytesIO(pdf_data)
    pdf = PdfFile(file=fileHandle, api_key=openai_key)
    paper_data = pdf.parse_info()
    return paper_data


def study(paper_data, prompts, openai_key, review_model):
    '''
    根据解析好的PDF数据进行阅读
    '''
    reviewer = Reviewer(api_key=openai_key, model=review_model)
    response = reviewer.study(prompts, paper_data)
    return response['choices'][0]['message']['content']


def query(prompts, openai_key, review_model):
    reviewer = Reviewer(api_key=openai_key, model=review_model)
    response = reviewer.query(prompts)
    return response['choices'][0]['message']['content']