Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -68,7 +68,7 @@ class Journal:
|
|
| 68 |
def __repr__(self):
|
| 69 |
return f"Journal(name='{self.name}', bytes='{self.bytes}')"
|
| 70 |
|
| 71 |
-
llm = ChatOpenAI(temperature=0, model="gpt-
|
| 72 |
|
| 73 |
textex_chain = create_extraction_chain(textex_schema, llm)
|
| 74 |
tablex_chain = create_extraction_chain(tablex_schema, llm)
|
|
@@ -104,7 +104,7 @@ if uploaded_files:
|
|
| 104 |
if on_h:
|
| 105 |
chunk_size_h = st.selectbox(
|
| 106 |
'Tokens amounts per process :',
|
| 107 |
-
(
|
| 108 |
)
|
| 109 |
parseButtonH = st.button("Get Result", key='table_H')
|
| 110 |
|
|
@@ -116,7 +116,7 @@ if uploaded_files:
|
|
| 116 |
if on_v:
|
| 117 |
chunk_size_v = st.selectbox(
|
| 118 |
'Tokens amounts per process :',
|
| 119 |
-
(
|
| 120 |
)
|
| 121 |
parseButtonV = st.button("Get Result", key='table_V')
|
| 122 |
with col3:
|
|
@@ -127,7 +127,7 @@ if uploaded_files:
|
|
| 127 |
if on_t:
|
| 128 |
chunk_size_t = st.selectbox(
|
| 129 |
'Tokens amounts per process :',
|
| 130 |
-
(
|
| 131 |
)
|
| 132 |
parseButtonT = st.button("Get Result", key="no_Table")
|
| 133 |
|
|
@@ -161,7 +161,10 @@ if uploaded_files:
|
|
| 161 |
try:
|
| 162 |
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
| 163 |
except:
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
| 165 |
# df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
|
| 166 |
chunkdf.append(df)
|
| 167 |
|
|
@@ -203,7 +206,7 @@ if uploaded_files:
|
|
| 203 |
embeddings = OpenAIEmbeddings()
|
| 204 |
|
| 205 |
db = Chroma.from_documents(docs, embeddings)
|
| 206 |
-
llm_table = ChatOpenAI(model_name="gpt-
|
| 207 |
qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
|
| 208 |
|
| 209 |
# List of questions
|
|
@@ -232,6 +235,7 @@ if uploaded_files:
|
|
| 232 |
if output_list[0]['result'].split('\n')[i] != "":
|
| 233 |
try:
|
| 234 |
row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
|
|
|
|
| 235 |
row = {**row, **{
|
| 236 |
'Title' : concat['title'][0],
|
| 237 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
|
@@ -397,26 +401,33 @@ if uploaded_files:
|
|
| 397 |
'Recommendation' : summary,
|
| 398 |
}
|
| 399 |
}
|
| 400 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
row.update({
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
|
|
|
|
|
|
| 420 |
else:
|
| 421 |
L.append(row)
|
| 422 |
except SyntaxError:
|
|
@@ -616,7 +627,17 @@ if uploaded_files:
|
|
| 616 |
chunkdf = []
|
| 617 |
for i, chunk in enumerate(text_chunk):
|
| 618 |
inp = chunk
|
| 619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
chunkdf.append(df)
|
| 621 |
|
| 622 |
concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
|
|
@@ -687,11 +708,10 @@ if uploaded_files:
|
|
| 687 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 688 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 689 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 690 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 691 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 692 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 693 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 694 |
-
'Recommendation' : summary,
|
| 695 |
}}
|
| 696 |
if len(row['Genes'].strip().split(',')) > 1:
|
| 697 |
for g in row['Genes'].strip().split(','):
|
|
@@ -705,8 +725,8 @@ if uploaded_files:
|
|
| 705 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 706 |
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 707 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 708 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 709 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 710 |
'Recommendation' : summary,
|
| 711 |
})
|
| 712 |
else:
|
|
@@ -718,10 +738,10 @@ if uploaded_files:
|
|
| 718 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 719 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 720 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 721 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 722 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 723 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 724 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 725 |
'Recommendation' : summary,
|
| 726 |
}}
|
| 727 |
if len(row['Genes'].strip().split(',')) > 1:
|
|
@@ -734,10 +754,10 @@ if uploaded_files:
|
|
| 734 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 735 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 736 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 737 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 738 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 739 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 740 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 741 |
'Recommendation' : summary,
|
| 742 |
})
|
| 743 |
else:
|
|
@@ -750,10 +770,10 @@ if uploaded_files:
|
|
| 750 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 751 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 752 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 753 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 754 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 755 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 756 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 757 |
'Recommendation' : summary,
|
| 758 |
}
|
| 759 |
}
|
|
@@ -770,10 +790,10 @@ if uploaded_files:
|
|
| 770 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 771 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 772 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 773 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 774 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 775 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 776 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 777 |
'Recommendation' : summary,
|
| 778 |
}
|
| 779 |
}
|
|
@@ -793,10 +813,10 @@ if uploaded_files:
|
|
| 793 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 794 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 795 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 796 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 797 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 798 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 799 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 800 |
'Recommendation' : summary,
|
| 801 |
}}
|
| 802 |
if row['SNPs'] != "Not available":
|
|
@@ -813,10 +833,10 @@ if uploaded_files:
|
|
| 813 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 814 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 815 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 816 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 817 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 818 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 819 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 820 |
'Recommendation' : summary,
|
| 821 |
})
|
| 822 |
else:
|
|
@@ -828,10 +848,10 @@ if uploaded_files:
|
|
| 828 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 829 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 830 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 831 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 832 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 833 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 834 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 835 |
'Recommendation' : summary,
|
| 836 |
}}
|
| 837 |
if row['SNPs'] != "Not available":
|
|
@@ -848,10 +868,10 @@ if uploaded_files:
|
|
| 848 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 849 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 850 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 851 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 852 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 853 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 854 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 855 |
'Recommendation' : summary,
|
| 856 |
})
|
| 857 |
else:
|
|
@@ -864,10 +884,10 @@ if uploaded_files:
|
|
| 864 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 865 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 866 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 867 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 868 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 869 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 870 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 871 |
'Recommendation' : summary,
|
| 872 |
}
|
| 873 |
}
|
|
@@ -884,10 +904,10 @@ if uploaded_files:
|
|
| 884 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 885 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 886 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 887 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 888 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 889 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 890 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 891 |
'Recommendation' : summary,
|
| 892 |
}
|
| 893 |
}
|
|
@@ -907,10 +927,10 @@ if uploaded_files:
|
|
| 907 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 908 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 909 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 910 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 911 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 912 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 913 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 914 |
'Recommendation' : summary,
|
| 915 |
}
|
| 916 |
}
|
|
@@ -927,10 +947,10 @@ if uploaded_files:
|
|
| 927 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 928 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 929 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 930 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 931 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 932 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 933 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 934 |
'Recommendation' : summary,
|
| 935 |
}
|
| 936 |
}
|
|
@@ -948,10 +968,10 @@ if uploaded_files:
|
|
| 948 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 949 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 950 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 951 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 952 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 953 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 954 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 955 |
'Recommendation' : summary,
|
| 956 |
}
|
| 957 |
}
|
|
@@ -968,10 +988,10 @@ if uploaded_files:
|
|
| 968 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 969 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 970 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 971 |
-
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 972 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 973 |
-
'Study Methodology' : ' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title(),
|
| 974 |
-
'Study Level' : ' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title(),
|
| 975 |
'Recommendation' : summary,
|
| 976 |
}
|
| 977 |
}
|
|
@@ -1039,7 +1059,10 @@ if uploaded_files:
|
|
| 1039 |
time.sleep(0.1)
|
| 1040 |
st.write("☑ Generating Summary ...")
|
| 1041 |
|
| 1042 |
-
|
|
|
|
|
|
|
|
|
|
| 1043 |
for col in list(concat.columns):
|
| 1044 |
concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
|
| 1045 |
|
|
@@ -1096,7 +1119,7 @@ if uploaded_files:
|
|
| 1096 |
'Recommendation' : summary,
|
| 1097 |
})
|
| 1098 |
|
| 1099 |
-
csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True)
|
| 1100 |
status.update(label="Gene and SNPs succesfully collected.")
|
| 1101 |
st.dataframe(csv)
|
| 1102 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|
|
|
|
| 68 |
def __repr__(self):
|
| 69 |
return f"Journal(name='{self.name}', bytes='{self.bytes}')"
|
| 70 |
|
| 71 |
+
llm = ChatOpenAI(temperature=0, model="gpt-4-0125-preview")
|
| 72 |
|
| 73 |
textex_chain = create_extraction_chain(textex_schema, llm)
|
| 74 |
tablex_chain = create_extraction_chain(tablex_schema, llm)
|
|
|
|
| 104 |
if on_h:
|
| 105 |
chunk_size_h = st.selectbox(
|
| 106 |
'Tokens amounts per process :',
|
| 107 |
+
(120000, 96000, 64000, 32000), key='table_h'
|
| 108 |
)
|
| 109 |
parseButtonH = st.button("Get Result", key='table_H')
|
| 110 |
|
|
|
|
| 116 |
if on_v:
|
| 117 |
chunk_size_v = st.selectbox(
|
| 118 |
'Tokens amounts per process :',
|
| 119 |
+
(120000, 96000, 64000, 32000), key='table_v'
|
| 120 |
)
|
| 121 |
parseButtonV = st.button("Get Result", key='table_V')
|
| 122 |
with col3:
|
|
|
|
| 127 |
if on_t:
|
| 128 |
chunk_size_t = st.selectbox(
|
| 129 |
'Tokens amounts per process :',
|
| 130 |
+
(120000, 96000, 64000, 32000), key='no_table'
|
| 131 |
)
|
| 132 |
parseButtonT = st.button("Get Result", key="no_Table")
|
| 133 |
|
|
|
|
| 161 |
try:
|
| 162 |
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
| 163 |
except:
|
| 164 |
+
try:
|
| 165 |
+
df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
|
| 166 |
+
except SyntaxError:
|
| 167 |
+
df = pd.DataFrame(literal_eval('[' + str(json.dumps(tablex_chain.run(inp)[0]) + ']').replace("\'", '\"')), index=[0]).fillna('')
|
| 168 |
# df = pd.DataFrame(repair_json(tablex_chain.run(inp)[0]))
|
| 169 |
chunkdf.append(df)
|
| 170 |
|
|
|
|
| 206 |
embeddings = OpenAIEmbeddings()
|
| 207 |
|
| 208 |
db = Chroma.from_documents(docs, embeddings)
|
| 209 |
+
llm_table = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0)
|
| 210 |
qa_chain = RetrievalQA.from_chain_type(llm_table, retriever=db.as_retriever())
|
| 211 |
|
| 212 |
# List of questions
|
|
|
|
| 235 |
if output_list[0]['result'].split('\n')[i] != "":
|
| 236 |
try:
|
| 237 |
row = literal_eval(repair_json(output_list[0]['result'].split('\n')[i]))[0]
|
| 238 |
+
st.write(row)
|
| 239 |
row = {**row, **{
|
| 240 |
'Title' : concat['title'][0],
|
| 241 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
|
|
|
| 401 |
'Recommendation' : summary,
|
| 402 |
}
|
| 403 |
}
|
| 404 |
+
if 'SNPs' in list(row.keys()):
|
| 405 |
+
if row['SNPs'] != "Not available":
|
| 406 |
+
row.update({
|
| 407 |
+
'SNPs' : "Not available"
|
| 408 |
+
})
|
| 409 |
+
else:
|
| 410 |
row.update({
|
| 411 |
+
'SNPs' : "Not available"
|
| 412 |
+
})
|
| 413 |
+
|
| 414 |
+
if 'Genes' in list(row.keys()):
|
| 415 |
+
if len(row['Genes'].strip().split(',')) > 1:
|
| 416 |
+
for g in row['Genes'].strip().split(','):
|
| 417 |
+
L.append({
|
| 418 |
+
'Title' : concat['title'][0],
|
| 419 |
+
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 420 |
+
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 421 |
+
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 422 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 423 |
+
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 424 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 425 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 426 |
+
'Recommendation' : summary,
|
| 427 |
+
'Genes' : g.strip().upper().replace('Unknown', ''),
|
| 428 |
+
"SNPs" : "Not available",
|
| 429 |
+
"Diseases" : ''.join(list(row['Diseases'].title() if row['Diseases'] not in ['T2D', 'T2DM', 'NAFLD', 'CVD'] else row['Diseases'])).replace('Unknown', '').replace('Unknown', '')
|
| 430 |
+
})
|
| 431 |
else:
|
| 432 |
L.append(row)
|
| 433 |
except SyntaxError:
|
|
|
|
| 627 |
chunkdf = []
|
| 628 |
for i, chunk in enumerate(text_chunk):
|
| 629 |
inp = chunk
|
| 630 |
+
# Assuming tablex_chain.run(inp)[0] returns a dictionary
|
| 631 |
+
original_dict = tablex_chain.run(inp)[0]
|
| 632 |
+
# Convert the dictionary to a JSON string
|
| 633 |
+
json_str = json.dumps(original_dict)
|
| 634 |
+
# Replace single quotes with double quotes in the JSON string
|
| 635 |
+
json_str_fixed = json_str.replace("'", '"')
|
| 636 |
+
# Use literal_eval to safely evaluate the JSON string as a Python dictionary
|
| 637 |
+
fixed_dict = literal_eval(json_str_fixed)
|
| 638 |
+
# Create a DataFrame from the fixed dictionary
|
| 639 |
+
df = pd.DataFrame(fixed_dict, index=[0]).fillna('')
|
| 640 |
+
# df = pd.DataFrame(literal_eval(str(json.dumps(tablex_chain.run(inp)[0])).replace("\'", '\"')), index=[0]).fillna('')
|
| 641 |
chunkdf.append(df)
|
| 642 |
|
| 643 |
concat = pd.concat(chunkdf, axis=0).reset_index().drop('index', axis=1).fillna('')
|
|
|
|
| 708 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 709 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 710 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 711 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 712 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 713 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 714 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
|
|
|
| 715 |
}}
|
| 716 |
if len(row['Genes'].strip().split(',')) > 1:
|
| 717 |
for g in row['Genes'].strip().split(','):
|
|
|
|
| 725 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 726 |
'Population' : ' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title(),
|
| 727 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 728 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 729 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 730 |
'Recommendation' : summary,
|
| 731 |
})
|
| 732 |
else:
|
|
|
|
| 738 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 739 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 740 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 741 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 742 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 743 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 744 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 745 |
'Recommendation' : summary,
|
| 746 |
}}
|
| 747 |
if len(row['Genes'].strip().split(',')) > 1:
|
|
|
|
| 754 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 755 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 756 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 757 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 758 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 759 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 760 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 761 |
'Recommendation' : summary,
|
| 762 |
})
|
| 763 |
else:
|
|
|
|
| 770 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 771 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 772 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 773 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 774 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 775 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 776 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 777 |
'Recommendation' : summary,
|
| 778 |
}
|
| 779 |
}
|
|
|
|
| 790 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 791 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 792 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 793 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 794 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 795 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 796 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 797 |
'Recommendation' : summary,
|
| 798 |
}
|
| 799 |
}
|
|
|
|
| 813 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 814 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 815 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 816 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 817 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 818 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 819 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 820 |
'Recommendation' : summary,
|
| 821 |
}}
|
| 822 |
if row['SNPs'] != "Not available":
|
|
|
|
| 833 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 834 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 835 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 836 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 837 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 838 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 839 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 840 |
'Recommendation' : summary,
|
| 841 |
})
|
| 842 |
else:
|
|
|
|
| 848 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 849 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 850 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 851 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 852 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 853 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 854 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 855 |
'Recommendation' : summary,
|
| 856 |
}}
|
| 857 |
if row['SNPs'] != "Not available":
|
|
|
|
| 868 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 869 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 870 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 871 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 872 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 873 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 874 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 875 |
'Recommendation' : summary,
|
| 876 |
})
|
| 877 |
else:
|
|
|
|
| 884 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 885 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 886 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 887 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 888 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 889 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 890 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 891 |
'Recommendation' : summary,
|
| 892 |
}
|
| 893 |
}
|
|
|
|
| 904 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 905 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 906 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 907 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 908 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 909 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 910 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 911 |
'Recommendation' : summary,
|
| 912 |
}
|
| 913 |
}
|
|
|
|
| 927 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 928 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 929 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 930 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 931 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 932 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 933 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 934 |
'Recommendation' : summary,
|
| 935 |
}
|
| 936 |
}
|
|
|
|
| 947 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 948 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 949 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 950 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 951 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 952 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 953 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 954 |
'Recommendation' : summary,
|
| 955 |
}
|
| 956 |
}
|
|
|
|
| 968 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 969 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 970 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 971 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 972 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 973 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 974 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 975 |
'Recommendation' : summary,
|
| 976 |
}
|
| 977 |
}
|
|
|
|
| 988 |
'Authors' : concat['authors'][0] if 'authors' in list(concat.columns) else '',
|
| 989 |
'Publisher Name' : concat['publisher_name'][0] if 'publisher_name' in list(concat.columns) else '',
|
| 990 |
'Publication Year' : get_valid_year(' '.join(concat['year_of_publication'].values.tolist())) if 'year_of_publication' in concat.columns else concat.assign(year_of_publication='')['year_of_publication'],
|
| 991 |
+
'Population' : upper_abbreviation(' '.join(concat['population_race'].values.tolist()).replace('Unknown', '').title()) if 'population_race' in concat.columns else concat.assign(population_race='')['population_race'],
|
| 992 |
'Sample Size' : sample_size_postproc(' '.join(concat['sample_size'].values.tolist()).replace('Unknown', '').title()) if 'sample_size' in concat.columns else concat.assign(sample_size='')['sample_size'],
|
| 993 |
+
'Study Methodology' : upper_abbreviation(' '.join(concat['study_methodology'].values.tolist()).replace('Unknown', '').title()) if 'study_methodology' in concat.columns else concat.assign(study_methodology='')['study_methodology'],
|
| 994 |
+
'Study Level' : upper_abbreviation(' '.join(concat['study_level'].values.tolist()).replace('Unknown', '').title()) if 'study_level' in concat.columns else concat.assign(study_level='')['study_level'],
|
| 995 |
'Recommendation' : summary,
|
| 996 |
}
|
| 997 |
}
|
|
|
|
| 1059 |
time.sleep(0.1)
|
| 1060 |
st.write("☑ Generating Summary ...")
|
| 1061 |
|
| 1062 |
+
if 'SNPs' in list(concat.columns):
|
| 1063 |
+
concat['SNPs'] = concat['SNPs'].apply(lambda x: x if x.startswith('rs') else '')
|
| 1064 |
+
else:
|
| 1065 |
+
concat['SNPs'] = ''
|
| 1066 |
for col in list(concat.columns):
|
| 1067 |
concat[col] = concat[col].apply(lambda x: x if x not in ['N/A', 'not mentioned', 'Not mentioned', 'Unknown'] else '')
|
| 1068 |
|
|
|
|
| 1119 |
'Recommendation' : summary,
|
| 1120 |
})
|
| 1121 |
|
| 1122 |
+
csv = pd.concat([csv, pd.DataFrame(L)], ignore_index=True).drop_duplicates(subset='Genes')
|
| 1123 |
status.update(label="Gene and SNPs succesfully collected.")
|
| 1124 |
st.dataframe(csv)
|
| 1125 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|