Spaces:
Sleeping
Sleeping
Commit
·
d29770b
1
Parent(s):
8650c17
updates on the experiment
Browse files- app.py +5 -3
- backend.py +39 -20
- offline_results/exp_ga.csv +0 -0
- offline_results/exp_gb.csv +0 -0
- offline_results/exp_test.csv +0 -0
app.py
CHANGED
|
@@ -35,7 +35,9 @@ with gr.Blocks(theme="dark") as demo:
|
|
| 35 |
openai_key = gr.Textbox(
|
| 36 |
label='Enter your OpenAI API key here',
|
| 37 |
type='password')
|
| 38 |
-
|
|
|
|
|
|
|
| 39 |
file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
|
| 40 |
btn_submit_txt_online = gr.Button(value='Submit')
|
| 41 |
# btn_submit_txt.style(full_width=True)
|
|
@@ -43,7 +45,7 @@ with gr.Blocks(theme="dark") as demo:
|
|
| 43 |
with gr.Group():
|
| 44 |
gr.Markdown(f'<center><h2>Or Load Offline</h2></center>')
|
| 45 |
questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions (Please don't change this part now)", info="Please select the question you want to ask")
|
| 46 |
-
answer_type = gr.Radio(choices = ["ChatGPT_txt", "GPT4_txt", 'New_GPT_4_pdf'], label="Answer_type", info="Please select the type of answer you want to show")
|
| 47 |
btn_submit_txt_offline = gr.Button(value='Show Answers')
|
| 48 |
# btn_submit_txt.style(full_width=True)
|
| 49 |
|
|
@@ -119,7 +121,7 @@ with gr.Blocks(theme="dark") as demo:
|
|
| 119 |
# Submit button
|
| 120 |
btn_submit_txt_online.click(
|
| 121 |
backend.process_file_online,
|
| 122 |
-
inputs=[file, questions, openai_key],
|
| 123 |
outputs=[filename_box, question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference],
|
| 124 |
)
|
| 125 |
|
|
|
|
| 35 |
openai_key = gr.Textbox(
|
| 36 |
label='Enter your OpenAI API key here',
|
| 37 |
type='password')
|
| 38 |
+
|
| 39 |
+
model_selection = gr.Radio(choices = ["ChatGPT", "GPT4"], label="Model Selection", info="Please select the model you want to use")
|
| 40 |
+
|
| 41 |
file = gr.File(label='Upload your .txt or .pdf file here', file_types=['.txt', '.pdf'], file_count = 'multiple')
|
| 42 |
btn_submit_txt_online = gr.Button(value='Submit')
|
| 43 |
# btn_submit_txt.style(full_width=True)
|
|
|
|
| 45 |
with gr.Group():
|
| 46 |
gr.Markdown(f'<center><h2>Or Load Offline</h2></center>')
|
| 47 |
questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions (Please don't change this part now)", info="Please select the question you want to ask")
|
| 48 |
+
answer_type = gr.Radio(choices = ["ChatGPT_txt", "GPT4_txt", 'New_GPT_4_pdf', 'Exp_text', 'Exp_Group_A', 'Exp_Group_B'], label="Answer_type", info="Please select the type of answer you want to show")
|
| 49 |
btn_submit_txt_offline = gr.Button(value='Show Answers')
|
| 50 |
# btn_submit_txt.style(full_width=True)
|
| 51 |
|
|
|
|
| 121 |
# Submit button
|
| 122 |
btn_submit_txt_online.click(
|
| 123 |
backend.process_file_online,
|
| 124 |
+
inputs=[file, questions, openai_key, model_selection],
|
| 125 |
outputs=[filename_box, question_box, answer_box, reference_box, highlighted_text, correct_answer, correct_reference],
|
| 126 |
)
|
| 127 |
|
backend.py
CHANGED
|
@@ -73,7 +73,7 @@ class Backend:
|
|
| 73 |
|
| 74 |
return text
|
| 75 |
|
| 76 |
-
def process_file_online(self, file, questions, openai_key, progress = gr.Progress()):
|
| 77 |
# record the questions
|
| 78 |
self.questions = questions
|
| 79 |
|
|
@@ -83,10 +83,16 @@ class Backend:
|
|
| 83 |
# make the prompt
|
| 84 |
prompt_list = [self.prompt.get(text, questions, 'v3') for text in self.text_list]
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
# interact with openai
|
| 87 |
self.res_list = []
|
| 88 |
for prompt in progress.tqdm(prompt_list, desc = 'Generating answers...'):
|
| 89 |
-
res = self.agent(prompt, with_history = False, temperature = 0.1, model =
|
| 90 |
res = self.prompt.process_result(res, 'v3')
|
| 91 |
self.res_list.append(res)
|
| 92 |
|
|
@@ -362,29 +368,42 @@ class Backend:
|
|
| 362 |
df = pd.read_csv('./offline_results/results_all_gpt4.csv')
|
| 363 |
elif answer_type == 'New_GPT_4_pdf':
|
| 364 |
df = pd.read_csv('./offline_results/results_new_pdf.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
# make the prompt
|
| 367 |
self.res_list = self.phase_df(df)
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
self.filename_list = df['fn'].tolist()
|
| 374 |
-
self.
|
| 375 |
|
| 376 |
-
self.text_list = []
|
| 377 |
-
for file in progress.tqdm(self.filename_list):
|
| 378 |
-
if file.split('.')[-1] == 'pdf':
|
| 379 |
-
# convert pdf to txt
|
| 380 |
-
text = self.phrase_pdf(os.path.join(txt_root_path, file))
|
| 381 |
-
|
| 382 |
-
else:
|
| 383 |
-
text_path = os.path.join(txt_root_path, file)
|
| 384 |
-
with open(text_path, 'r', encoding='utf-8') as f:
|
| 385 |
-
text = f.read()
|
| 386 |
-
|
| 387 |
-
self.text_list.append(text)
|
| 388 |
|
| 389 |
# Use the first file as default
|
| 390 |
# Use the first question for multiple questions
|
|
|
|
| 73 |
|
| 74 |
return text
|
| 75 |
|
| 76 |
+
def process_file_online(self, file, questions, openai_key, model_selection, progress = gr.Progress()):
|
| 77 |
# record the questions
|
| 78 |
self.questions = questions
|
| 79 |
|
|
|
|
| 83 |
# make the prompt
|
| 84 |
prompt_list = [self.prompt.get(text, questions, 'v3') for text in self.text_list]
|
| 85 |
|
| 86 |
+
# select the model
|
| 87 |
+
if model_selection == 'ChatGPT':
|
| 88 |
+
model = 'gpt-3.5-turbo-16k'
|
| 89 |
+
elif model_selection == 'GPT4':
|
| 90 |
+
model = 'gpt-4-1106-preview'
|
| 91 |
+
|
| 92 |
# interact with openai
|
| 93 |
self.res_list = []
|
| 94 |
for prompt in progress.tqdm(prompt_list, desc = 'Generating answers...'):
|
| 95 |
+
res = self.agent(prompt, with_history = False, temperature = 0.1, model = model, api_key = openai_key)
|
| 96 |
res = self.prompt.process_result(res, 'v3')
|
| 97 |
self.res_list.append(res)
|
| 98 |
|
|
|
|
| 368 |
df = pd.read_csv('./offline_results/results_all_gpt4.csv')
|
| 369 |
elif answer_type == 'New_GPT_4_pdf':
|
| 370 |
df = pd.read_csv('./offline_results/results_new_pdf.csv')
|
| 371 |
+
elif answer_type == 'Exp_text':
|
| 372 |
+
df = pd.read_csv('./offline_results/exp_test.csv')
|
| 373 |
+
elif answer_type == 'Exp_Group_A':
|
| 374 |
+
df = pd.read_csv('./offline_results/exp_ga.csv')
|
| 375 |
+
elif answer_type == 'Exp_Group_B':
|
| 376 |
+
df = pd.read_csv('./offline_results/exp_gb.csv')
|
| 377 |
|
| 378 |
# make the prompt
|
| 379 |
self.res_list = self.phase_df(df)
|
| 380 |
+
|
| 381 |
+
if answer_type in ['ChatGPT_txt', 'GPT4_txt', 'New_GPT_4_pdf']:
|
| 382 |
+
if answer_type == 'ChatGPT_txt' or answer_type == 'GPT4_txt':
|
| 383 |
+
txt_root_path = './20230808-AI coding-1st round'
|
| 384 |
+
self.filename_list = df['fn'].tolist()
|
| 385 |
+
elif answer_type == 'New_GPT_4_pdf':
|
| 386 |
+
txt_root_path = './new_pdfs'
|
| 387 |
+
self.filename_list = df['fn'].tolist()
|
| 388 |
+
self.filename_list = ['.'.join(f.split('.')[:-1]) + '.txt' for f in self.filename_list]
|
| 389 |
+
|
| 390 |
+
self.text_list = []
|
| 391 |
+
for file in progress.tqdm(self.filename_list):
|
| 392 |
+
if file.split('.')[-1] == 'pdf':
|
| 393 |
+
# convert pdf to txt
|
| 394 |
+
text = self.phrase_pdf(os.path.join(txt_root_path, file))
|
| 395 |
+
|
| 396 |
+
else:
|
| 397 |
+
text_path = os.path.join(txt_root_path, file)
|
| 398 |
+
with open(text_path, 'r', encoding='utf-8') as f:
|
| 399 |
+
text = f.read()
|
| 400 |
+
|
| 401 |
+
self.text_list.append(text)
|
| 402 |
+
|
| 403 |
+
elif answer_type in ['Exp_text', 'Exp_Group_A', 'Exp_Group_B']:
|
| 404 |
self.filename_list = df['fn'].tolist()
|
| 405 |
+
self.text_list = df['content'].tolist()
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
# Use the first file as default
|
| 409 |
# Use the first question for multiple questions
|
offline_results/exp_ga.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
offline_results/exp_gb.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
offline_results/exp_test.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|