juniorjukeko commited on
Commit
c00c005
·
1 Parent(s): 87ff9aa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain import OpenAI
6
+ from langchain import PromptTemplate
7
+ from langchain.chains.summarize import load_summarize_chain
8
+ import gradio as gr
9
+
10
+ title = '''
11
+ <div style="text-align: left; font-family:Arial; color:Black; font-size: 16px; max-width: 750px;">
12
+ <h1>Small PDF Summarizer</h1>
13
+ <p style="text-align: left;">Upload a .PDF from your computer, click the "Upload PDF" button and fill OpenAI API Key. <br />
14
+ Output will be on the textbox bellow. You can also change some LLM configurations from the 'config' tab<br/>
15
+ </div>
16
+ '''
17
+
18
+ desc_1 = '''
19
+ <div style="text-align: left; font-family:Arial; color:Black; font-size: 14px;">
20
+ <h3>Custom Prompt Template</h3>
21
+ <p style="text-align: left;">You can customize input prompt for the map and combine prompt of langchain's Map-Reduce Summarization pipeline
22
+ using the texboxt bellow.<br/>
23
+ Prompt which will be fed into LLM use the format of : <b>{textbox input} + {pdf_text} + "SUMMARY:"</b> <br/>
24
+ In essence each page of PDF will be summarized using map prompt, and each summary then be combined for final output using combine prompt.<br/>
25
+ <a href="https://python.langchain.com/docs/use_cases/summarization">More Info on Map-Reduce for Summarization</a>
26
+ </div>
27
+ '''
28
+
29
+ MAP_PROMPT = """
30
+ You will be given a page of text which section is enclosed in triple backticks (```).
31
+ Your goal is to give a summary of this section, ignoring references and footnote if present.
32
+ Your response should be at least 200 words only if input classified as academic text.
33
+ Your response must fully encompass what was said in the page.
34
+
35
+ ```{text}```
36
+ SUMMARY:
37
+ """
38
+ COMBINE_PROMPT = """
39
+ Write a full summary of the following text enclosed in triple backticks (```).
40
+ Full summary consists of a descriptive summary of at least 100 words (if possible),
41
+ followed by numbered list which covers key points of the text.
42
+
43
+ ```{text}```
44
+ SUMMARY:
45
+ """
46
+ config_info = {'temperature': 'Higher means more randomness to the output.',
47
+ 'max_tokens' : 'The maximum number of tokens to generate in the output.',
48
+ 'llm_list' : ''}
49
+ model_list = {'gpt-3.5-turbo':'chat',
50
+ 'gpt-4':'chat',
51
+ 'gpt-3.5-turbo-instruct':'instruct',
52
+ 'text-davinci-003':'instruct'}
53
+
54
+ text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=250)
55
+
56
+ def summarize_pdf(pdf_file, api_key,
57
+ model_name, temperature, llm_max_tokens,
58
+ custom_map_prompt, custom_combine_prompt):
59
+ global pdf_docs
60
+ # Read PDF
61
+ loader = PyPDFLoader(pdf_file.name)
62
+ pdf_docs = loader.load_and_split(text_splitter)
63
+ file_check(pdf_file)
64
+
65
+ # Build LLM Model
66
+ os.environ["OPENAI_API_KEY"] = api_key
67
+ if model_list[model_name] == 'chat':
68
+ gpt_llm = ChatOpenAI(temperature=temperature, model_name=model_name, max_tokens=int(llm_max_tokens))
69
+ else:
70
+ gpt_llm = OpenAI(temperature=temperature, model_name=model_name, max_tokens=int(llm_max_tokens))
71
+
72
+ # Summarize PDF
73
+ if custom_map_prompt !="":
74
+ map_template = PromptTemplate(template=generate_template(custom_map_prompt), input_variables=["text"])
75
+ else:
76
+ map_template = PromptTemplate(template=MAP_PROMPT, input_variables=["text"])
77
+
78
+ if custom_combine_prompt !="":
79
+ combine_template = PromptTemplate(template=generate_template(custom_combine_prompt), input_variables=["text"])
80
+ else:
81
+ combine_template = PromptTemplate(template=COMBINE_PROMPT, input_variables=["text"])
82
+
83
+ map_reduce_chain = load_summarize_chain(
84
+ gpt_llm,
85
+ chain_type="map_reduce",
86
+ map_prompt=map_prompt,
87
+ combine_prompt=combine_prompt,
88
+ return_intermediate_steps=True,
89
+ token_max=3840 # limit the maximum number of tokens in the combined document (combine prompt).
90
+ )
91
+ map_reduce_outputs = map_reduce_chain({"input_documents": pdf_docs})
92
+ return map_reduce_outputs['output_text']
93
+
94
+ def file_check(pdf_file):
95
+ if os.path.getsize(pdf_file.name)/1024 **2 > 1:
96
+ raise gr.Error("Maximum File Size is 1MB!")
97
+ elif len(pdf_docs) > 15:
98
+ raise gr.Error("Maximum File Length is 15 Pages!")
99
+ else:
100
+ pass
101
+
102
+ def generate_template(custom_prompt):
103
+ custom_template = custom_prompt + '''
104
+
105
+ ```{text}```
106
+ SUMMARY:
107
+ '''
108
+ return custom_template
109
+
110
+ def main():
111
+ with gr.Blocks() as demo:
112
+ gr.HTML(title)
113
+ with gr.Tab("Main"):
114
+ with gr.Column():
115
+ pdf_doc = gr.File(label="Uploaded PDF:", file_types=['.pdf'], type="file")
116
+ API_KEY = gr.Textbox(label="OpenAI API Key:", lines=1, type="password")
117
+ summarize_button = gr.Button(value="Summarize!")
118
+ summarized_text = gr.Textbox(label="Summary", lines=10, show_copy_button=True)
119
+
120
+
121
+ with gr.Tab("Config"):
122
+ llm_model = gr.Dropdown(choices=model_list.keys(), label="LLM model used", value='gpt-3.5-turbo', interactive=True)
123
+ with gr.Row():
124
+ temperature = gr.Slider(minimum=0, maximum=0.5, step=0.1, label="temperature", info=config_info['temperature'])
125
+ llm_max_tokens = gr.Radio(choices=[128, 256, 512], value=256, interactive=True, label="LLM max tokens", info=config_info['max_tokens'])
126
+ gr.HTML(desc_1)
127
+ with gr.Row():
128
+ user_map_prompt = gr.Textbox(label="Map PROMPT", lines=10, interactive=True)
129
+ user_comb_prompt = gr.Textbox(label="Combine PROMPT", lines=10, interactive=True)
130
+
131
+ with gr.Accordion("Default Template", open=False):
132
+ with gr.Row():
133
+ default_map_prompt = gr.Textbox(label="Map PROMPT", value=MAP_PROMPT, lines=10, interactive=False)
134
+ default_comb_prompt = gr.Textbox(label="Combine PROMPT", value=COMBINE_PROMPT, lines=10, interactive=False)
135
+ with gr.Accordion("User Custom Prompt Preview", open=False):
136
+ prompt_preview_button = gr.Button(value="View Custom Prompt")
137
+ with gr.Row():
138
+ custom_map_view = gr.Textbox(label="Map PROMPT", lines=10, interactive=False)
139
+ custom_comb_view = gr.Textbox(label="Combine PROMPT", lines=10, interactive=False)
140
+
141
+ prompt_preview_button.click(generate_template, inputs=[user_map_prompt], outputs=[custom_map_view])
142
+ prompt_preview_button.click(generate_template, inputs=[user_comb_prompt], outputs=[custom_comb_view])
143
+
144
+ list_inputs = [pdf_doc, API_KEY, llm_model, temperature, llm_max_tokens, user_map_prompt, user_comb_prompt]
145
+ summarize_button.click(summarize_pdf, inputs=list_inputs, outputs=[summarized_text])
146
+
147
+ demo.queue().launch(share=False)
148
+
149
+ if __name__ == "__main__":
150
+ main()