Spaces:
Runtime error
Runtime error
| """The following program will read in 2 XL sheets of KP matches and the user will evaluate the quality of the matching""" | |
| import streamlit as st | |
| import ast | |
| import pandas as pd | |
| import random | |
| from time import sleep | |
| threshold = st.radio("Select threshold", ["0.7", "0.8", "0.85", "0.87", "0.9", "0.95"], 2) | |
| num_kp = st.slider("Number of key-phrases to select", min_value=10, max_value=100,value=50,step=5) | |
| xl1 = st.file_uploader("Choose first file", key="xl1") | |
| #xl2 = st.file_uploader("Choose second file", key="xl2") | |
| def merge_dicts(x): | |
| return {k: v for d in x.dropna() for k, v in d.items()} | |
| def clean_dict(x): | |
| return x.replace("'", '"') | |
| if xl1 is not None : | |
| #assert that the first few columns are the same | |
| df1 = pd.read_excel(xl1, sheet_name= f"{threshold} Threshold") | |
| #first convert strings into dicts | |
| df1["Matched KPs"] = df1["Matched KPs"].apply(clean_dict) | |
| df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: ast.literal_eval(x)) | |
| df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key: x[key] for key in x.keys() if x[key]!="null"}) | |
| #now pop direct matches | |
| df1["Matched KPs"] = df1["Matched KPs"].apply(lambda x: {key:x[key] for key in x.keys() if x[key] <0.99}) | |
| df1.drop(df1[df1["Matched KPs"] == {}].index, inplace=True) | |
| #now merge same KPs and their respective dicts | |
| new_df = df1[["KP","Matched KPs"]].groupby("KP").agg(merge_dicts) | |
| new_df["dict len"] = new_df["Matched KPs"].apply(lambda x: len(list(x.keys()))) | |
| new_df = new_df.sort_values(by="dict len", ascending=False) | |
| new_df.reset_index(inplace=True) | |
| #new_df = new_df.drop("dict len", axis=1) | |
| with st.form("First excel file"): | |
| choices = [] | |
| i = 0 | |
| if num_kp > new_df.shape[0] : | |
| num_kp = new_df.shape[0] | |
| for t1 in new_df.sample(n=num_kp,random_state=42).iterrows(): | |
| #for t1 in new_df.sample(n=10, random_state=42).iterrows(): | |
| r1 = t1[1] | |
| kps1 = r1["Matched KPs"] | |
| curr_keys = list(kps1.keys()).copy() | |
| for kp1 in curr_keys: | |
| if kps1[kp1] > 0.99: | |
| kps1.pop(kp1) | |
| # now display the kps | |
| if kps1 == {}: | |
| continue | |
| else: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write(r1["KP"]) | |
| with col2: | |
| #if number of keys > 5, then shuffle them and select 5 random | |
| if len(list(kps1.keys())) > 5: | |
| #we can repurpose curr_keys as it was used to pop direct matches | |
| curr_keys = list(kps1.keys()) | |
| random.Random(42).shuffle(curr_keys) | |
| curr_keys = curr_keys[:5] | |
| else: | |
| curr_keys = list(kps1.keys()) | |
| for kp1 in curr_keys: | |
| choices.append(st.checkbox(f"{kp1}: {kps1[kp1]:0.2f}", key = i)) | |
| i+=1 | |
| st.markdown("""---""") | |
| submitted = st.form_submit_button("Submit") | |
| if submitted: | |
| st.write(len([x for x in choices if x]), i, f"{len([x for x in choices if x])/i : 0.3f}") |