Spaces:
Runtime error
Runtime error
kartheikiyer
commited on
Commit
Β·
c0025fa
1
Parent(s):
2f5de34
more df index fixes
Browse files- .ipynb_checkpoints/app_gradio-checkpoint.py +14 -1
- app_gradio.py +14 -1
.ipynb_checkpoints/app_gradio-checkpoint.py
CHANGED
|
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
|
|
| 533 |
def deep_research(question, top_k, ec):
|
| 534 |
|
| 535 |
full_answer = '## ' + question
|
|
|
|
| 536 |
|
| 537 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
| 538 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
| 539 |
rscope_text = gen_client.invoke(messages).content
|
| 540 |
|
| 541 |
full_answer = full_answer +' \n'+ rscope_text
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
| 544 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
|
|
| 563 |
atom_qn_strs.append(linkstr)
|
| 564 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
| 565 |
full_answer = full_answer +' \n'+smallans
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
| 568 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
| 569 |
|
| 570 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
|
|
|
| 571 |
|
| 572 |
rag_answer = {}
|
| 573 |
rag_answer['answer'] = full_answer
|
|
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
| 602 |
ec.rerank = True
|
| 603 |
|
| 604 |
if prompt_type == "Deep Research (BETA)":
|
| 605 |
-
gr.Info("Starting deep research
|
| 606 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
| 607 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 608 |
|
| 609 |
else:
|
| 610 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
|
|
|
| 611 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
| 612 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
| 613 |
yield formatted_df, None, None, None, None
|
| 614 |
|
| 615 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
|
|
|
| 616 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
| 617 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 618 |
|
| 619 |
# progress(0.6, desc="Generating consensus")
|
|
|
|
| 620 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
| 621 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
| 622 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
| 623 |
|
| 624 |
# progress(0.8, desc="Analyzing question type")
|
|
|
|
| 625 |
question_type_gen = guess_question_type(query)
|
| 626 |
if '<categorization>' in question_type_gen:
|
| 627 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
| 632 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
| 633 |
|
| 634 |
# progress(1.0, desc="Visualizing embeddings")
|
|
|
|
| 635 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
| 636 |
|
| 637 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
|
|
|
| 533 |
def deep_research(question, top_k, ec):
|
| 534 |
|
| 535 |
full_answer = '## ' + question
|
| 536 |
+
yield None, None
|
| 537 |
|
| 538 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
| 539 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
| 540 |
rscope_text = gen_client.invoke(messages).content
|
| 541 |
|
| 542 |
full_answer = full_answer +' \n'+ rscope_text
|
| 543 |
+
rag_answer = {}
|
| 544 |
+
rag_answer['answer'] = full_answer
|
| 545 |
+
yield None, rag_answer
|
| 546 |
|
| 547 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
| 548 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
|
|
| 567 |
atom_qn_strs.append(linkstr)
|
| 568 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
| 569 |
full_answer = full_answer +' \n'+smallans
|
| 570 |
+
rag_answer = {}
|
| 571 |
+
rag_answer['answer'] = full_answer
|
| 572 |
+
yield None, rag_answer
|
| 573 |
|
| 574 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
| 575 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
| 576 |
|
| 577 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
| 578 |
+
full_df.index = full_df.index + 1
|
| 579 |
|
| 580 |
rag_answer = {}
|
| 581 |
rag_answer['answer'] = full_answer
|
|
|
|
| 610 |
ec.rerank = True
|
| 611 |
|
| 612 |
if prompt_type == "Deep Research (BETA)":
|
| 613 |
+
gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
|
| 614 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
| 615 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 616 |
|
| 617 |
else:
|
| 618 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
| 619 |
+
gr.Info(search_text_list[np.random.choice(len(search_text_list))])
|
| 620 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
| 621 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
| 622 |
yield formatted_df, None, None, None, None
|
| 623 |
|
| 624 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
| 625 |
+
gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
|
| 626 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
| 627 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 628 |
|
| 629 |
# progress(0.6, desc="Generating consensus")
|
| 630 |
+
gr.Info("Generating consensus")
|
| 631 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
| 632 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
| 633 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
| 634 |
|
| 635 |
# progress(0.8, desc="Analyzing question type")
|
| 636 |
+
gr.Info("Analyzing question type")
|
| 637 |
question_type_gen = guess_question_type(query)
|
| 638 |
if '<categorization>' in question_type_gen:
|
| 639 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
|
|
| 644 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
| 645 |
|
| 646 |
# progress(1.0, desc="Visualizing embeddings")
|
| 647 |
+
gr.Info("Visualizing embeddings")
|
| 648 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
| 649 |
|
| 650 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
app_gradio.py
CHANGED
|
@@ -533,12 +533,16 @@ def compileinfo(query, atom_qns, atom_qn_ans, atom_qn_strs):
|
|
| 533 |
def deep_research(question, top_k, ec):
|
| 534 |
|
| 535 |
full_answer = '## ' + question
|
|
|
|
| 536 |
|
| 537 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
| 538 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
| 539 |
rscope_text = gen_client.invoke(messages).content
|
| 540 |
|
| 541 |
full_answer = full_answer +' \n'+ rscope_text
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
| 544 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
@@ -563,11 +567,15 @@ def deep_research(question, top_k, ec):
|
|
| 563 |
atom_qn_strs.append(linkstr)
|
| 564 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
| 565 |
full_answer = full_answer +' \n'+smallans
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
| 568 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
| 569 |
|
| 570 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
|
|
|
| 571 |
|
| 572 |
rag_answer = {}
|
| 573 |
rag_answer['answer'] = full_answer
|
|
@@ -602,26 +610,30 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
| 602 |
ec.rerank = True
|
| 603 |
|
| 604 |
if prompt_type == "Deep Research (BETA)":
|
| 605 |
-
gr.Info("Starting deep research
|
| 606 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
| 607 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 608 |
|
| 609 |
else:
|
| 610 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
|
|
|
| 611 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
| 612 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
| 613 |
yield formatted_df, None, None, None, None
|
| 614 |
|
| 615 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
|
|
|
| 616 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
| 617 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 618 |
|
| 619 |
# progress(0.6, desc="Generating consensus")
|
|
|
|
| 620 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
| 621 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
| 622 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
| 623 |
|
| 624 |
# progress(0.8, desc="Analyzing question type")
|
|
|
|
| 625 |
question_type_gen = guess_question_type(query)
|
| 626 |
if '<categorization>' in question_type_gen:
|
| 627 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
@@ -632,6 +644,7 @@ def run_pathfinder(query, top_k, extra_keywords, toggles, prompt_type, rag_type,
|
|
| 632 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
| 633 |
|
| 634 |
# progress(1.0, desc="Visualizing embeddings")
|
|
|
|
| 635 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
| 636 |
|
| 637 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|
|
|
|
| 533 |
def deep_research(question, top_k, ec):
|
| 534 |
|
| 535 |
full_answer = '## ' + question
|
| 536 |
+
yield None, None
|
| 537 |
|
| 538 |
gen_client = openai_llm(temperature=0,model_name='gpt-4o-mini', openai_api_key = openai_key)
|
| 539 |
messages = [("system",df_atomic_prompt,),("human", question),]
|
| 540 |
rscope_text = gen_client.invoke(messages).content
|
| 541 |
|
| 542 |
full_answer = full_answer +' \n'+ rscope_text
|
| 543 |
+
rag_answer = {}
|
| 544 |
+
rag_answer['answer'] = full_answer
|
| 545 |
+
yield None, rag_answer
|
| 546 |
|
| 547 |
rscope_messages = [("system","""In the given text, what are the main atomic questions being asked? Please answer as a concise list.""",),("human", rscope_text),]
|
| 548 |
rscope_qns = gen_client.invoke(rscope_messages).content
|
|
|
|
| 567 |
atom_qn_strs.append(linkstr)
|
| 568 |
full_answer = full_answer +' \n### '+atom_qns[i]
|
| 569 |
full_answer = full_answer +' \n'+smallans
|
| 570 |
+
rag_answer = {}
|
| 571 |
+
rag_answer['answer'] = full_answer
|
| 572 |
+
yield None, rag_answer
|
| 573 |
|
| 574 |
finalans, finallinks = compileinfo(question, atom_qns, atom_qn_ans, atom_qn_strs)
|
| 575 |
full_answer = full_answer +' \n'+'### Summary:\n'+finalans
|
| 576 |
|
| 577 |
full_df = pd.concat(atom_qn_dfs, ignore_index=True)
|
| 578 |
+
full_df.index = full_df.index + 1
|
| 579 |
|
| 580 |
rag_answer = {}
|
| 581 |
rag_answer['answer'] = full_answer
|
|
|
|
| 610 |
ec.rerank = True
|
| 611 |
|
| 612 |
if prompt_type == "Deep Research (BETA)":
|
| 613 |
+
gr.Info("Starting deep research - this takes a few mins, so grab a drink or stretch your legs.")
|
| 614 |
formatted_df, rag_answer = deep_research(query, top_k = top_k, ec=ec)
|
| 615 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 616 |
|
| 617 |
else:
|
| 618 |
# progress(0.2, desc=search_text_list[np.random.choice(len(search_text_list))])
|
| 619 |
+
gr.Info(search_text_list[np.random.choice(len(search_text_list))])
|
| 620 |
rs, small_df = ec.retrieve(query, top_k = top_k, return_scores=True)
|
| 621 |
formatted_df = ec.return_formatted_df(rs, small_df)
|
| 622 |
yield formatted_df, None, None, None, None
|
| 623 |
|
| 624 |
# progress(0.4, desc=gen_text_list[np.random.choice(len(gen_text_list))])
|
| 625 |
+
gr.Info(gen_text_list[np.random.choice(len(gen_text_list))])
|
| 626 |
rag_answer = run_rag_qa(query, formatted_df, prompt_type)
|
| 627 |
yield formatted_df, rag_answer['answer'], None, None, None
|
| 628 |
|
| 629 |
# progress(0.6, desc="Generating consensus")
|
| 630 |
+
gr.Info("Generating consensus")
|
| 631 |
consensus_answer = evaluate_overall_consensus(query, [formatted_df['abstract'][i+1] for i in range(len(formatted_df))])
|
| 632 |
consensus = '## Consensus \n'+consensus_answer.consensus + '\n\n'+consensus_answer.explanation + '\n\n > Relevance of retrieved papers to answer: %.1f' %consensus_answer.relevance_score
|
| 633 |
yield formatted_df, rag_answer['answer'], consensus, None, None
|
| 634 |
|
| 635 |
# progress(0.8, desc="Analyzing question type")
|
| 636 |
+
gr.Info("Analyzing question type")
|
| 637 |
question_type_gen = guess_question_type(query)
|
| 638 |
if '<categorization>' in question_type_gen:
|
| 639 |
question_type_gen = question_type_gen.split('<categorization>')[1]
|
|
|
|
| 644 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, None
|
| 645 |
|
| 646 |
# progress(1.0, desc="Visualizing embeddings")
|
| 647 |
+
gr.Info("Visualizing embeddings")
|
| 648 |
fig = make_embedding_plot(formatted_df, top_k, consensus_answer)
|
| 649 |
|
| 650 |
yield formatted_df, rag_answer['answer'], consensus, qn_type, fig
|