Spaces:
Sleeping
Sleeping
| # -*- coding:utf-8 -*- | |
| import io | |
| import pandas | |
| import streamlit as st | |
| from pycaprio import Pycaprio, mappings | |
| from zipfile import ZipFile | |
| from requests.exceptions import JSONDecodeError | |
| from n4a_analytics_lib.analytics import (GlobalStatistics, | |
| IaaStatistics) | |
| from n4a_analytics_lib.constants import KAPPA_LEGEND | |
| def convert_df(df_ex: pandas.DataFrame) -> bytes: | |
| return df_ex.to_csv(encoding="utf-8").encode('utf-8') | |
| def check_login(username: str, password: str) -> bool: | |
| if (len(username) == 0) or (len(password) == 0): | |
| return False | |
| return True | |
| def display_data(col: st.columns) -> None: | |
| col.metric("Total curated annotations", | |
| f"{st.session_state['gs_obj'].total_annotations_project} Named entities") | |
| col.dataframe(st.session_state['gs_obj'].df_i) | |
| selected_data = col.selectbox('Select specific data to display bar plot:', | |
| st.session_state['gs_obj'].documents, key="selector_data") | |
| col.pyplot(st.session_state['gs_obj'].create_plot(selected_data)) | |
| def template_agreement_dataframe(title: str, | |
| df: pandas.DataFrame, | |
| total_pov: int, | |
| total_annotations: int, | |
| percentage_pov: float, | |
| mode: str) -> None: | |
| st.subheader(title) | |
| st.markdown(f"{total_pov} / {total_annotations} annotations ({percentage_pov} %)") | |
| st.download_button( | |
| "Press to Download CSV", | |
| convert_df(df), | |
| f"csv_annotators_{mode}.csv", | |
| "text/csv", | |
| key=f'download-csv_{mode}' | |
| ) | |
| st.dataframe(df) | |
| def init_session_iaa(data: st.file_uploader, | |
| baseline: st.file_uploader, | |
| col: st.columns) -> None: | |
| project_analyzed = IaaStatistics(zip_project=data, baseline_text=baseline.getvalue()) | |
| baseline_analyzer = project_analyzed.analyze_text() | |
| col.markdown(f""" | |
| ### BASELINE TEXT: {baseline.name} | |
| - sentences: {baseline_analyzer[0]} | |
| - words: {baseline_analyzer[1]} | |
| - characters: {baseline_analyzer[2]} | |
| """) | |
| st.markdown("## ๐ IAA metrics") | |
| col1_kappa, col2_kappa = st.columns(2) | |
| # Display Kappa group | |
| col1_kappa.subheader("Fleiss Kappa (global score for group):") | |
| col1_kappa.markdown(interpret_kappa(project_analyzed.fleiss_kappa), unsafe_allow_html=True) | |
| # Display pairs kappa | |
| col1_kappa.subheader("Cohen Kappa (score for annotators pair):") | |
| for coders, c_k in project_analyzed.compute_pairs_cohen_kappa().items(): | |
| col1_kappa.markdown(f"* {coders[0]} <> {coders[1]} : {interpret_kappa(c_k)}", unsafe_allow_html=True) | |
| # Display Kappa legend | |
| col2_kappa.markdown(KAPPA_LEGEND, unsafe_allow_html=True) | |
| # Plot confusion matrix | |
| if st.checkbox('Display confusion matrix'): | |
| width = st.slider("matrix width", 1, 10, 14) | |
| height = st.slider("matrix height", 1, 10, 4) | |
| st.pyplot(project_analyzed.plot_confusion_matrix(width=width, height=height).figure) | |
| # Agree CSV | |
| template_agreement_dataframe(title="โ ๏ธ Agree annotations", | |
| df=project_analyzed.df_agree, | |
| total_pov=project_analyzed.total_agree, | |
| total_annotations=project_analyzed.total_annotations, | |
| percentage_pov=project_analyzed.percent_agree, | |
| mode="agree") | |
| # Disagree CSV | |
| template_agreement_dataframe(title="โ Disagree annotations", | |
| df=project_analyzed.df_disagree, | |
| total_pov=project_analyzed.total_disagree, | |
| total_annotations=project_analyzed.total_annotations, | |
| percentage_pov=project_analyzed.percent_disagree, | |
| mode="disagree") | |
| # Pie plot | |
| st.subheader("๐ท๏ธ Global Labels Statistics") | |
| st.pyplot(project_analyzed.plot_agreement_pies().figure) | |
| def init_session_statistics(remote: bool, local: bool, data: tuple) -> None: | |
| # clear session | |
| st.session_state = {} | |
| # create a session variable | |
| st.session_state["gs_local"] = local | |
| st.session_state["gs_remote"] = remote | |
| # create a new object: | |
| # if remote fetch data from API Host first | |
| if remote and not(local): | |
| st.success('Fetch curated documents from host INCEpTION API in progress...') | |
| try: | |
| fetch_curated_data_from_remote( | |
| username=data[0], | |
| password=data[1] | |
| ) | |
| except JSONDecodeError: | |
| # username / password incorrect | |
| st.error('Username or Password is incorrect please retry.') | |
| st.session_state = {} | |
| if local and not(remote): | |
| st.session_state["gs_obj"] = GlobalStatistics(zip_project=data, remote=False) | |
| def fetch_curated_data_from_remote(username: str, | |
| password: str, | |
| endpoint: str = "https://inception.dhlab.epfl.ch/prod", | |
| project_title: str = "ner4archives-template") -> None: | |
| # open a client | |
| client = Pycaprio(inception_host=endpoint, authentication=(str(username), str(password))) | |
| # get project object | |
| project_name = [p for p in client.api.projects() if p.project_name == project_title] | |
| # get all documents from project | |
| documents = client.api.documents(project_name[0].project_id) | |
| curations = [] | |
| zipfiles = [] | |
| count = 0 | |
| flag = "a" | |
| # iterate over all documents and retrieve only curated into ZIP container | |
| for document in documents: | |
| if count > 0: | |
| flag = "r" | |
| if document.document_state == mappings.DocumentState.CURATION_COMPLETE: | |
| curated_content = client.api.curation(project_name[0].project_id, document, | |
| curation_format=mappings.InceptionFormat.UIMA_CAS_XMI_XML_1_1) | |
| curations.append(curated_content) | |
| for curation in curations: | |
| z = ZipFile(io.BytesIO(curation), mode=flag) | |
| zipfiles.append(z) | |
| count += 1 | |
| # Merge all zip in one | |
| with zipfiles[0] as z1: | |
| for fname in zipfiles[1:]: | |
| zf = fname | |
| # print(zf.namelist()) | |
| for n in zf.namelist(): | |
| if n not in z1.namelist(): | |
| z1.writestr(n, zf.open(n).read()) | |
| # Create a new object | |
| st.session_state["gs_obj"] = GlobalStatistics(zip_project=z1, remote=True) | |
| def interpret_kappa(score: float) -> str: | |
| color = "" | |
| if score < 0: | |
| color= "#e74c3c;" | |
| elif 0.01 <= score <= 0.20: | |
| color= "#f39c12;" | |
| elif 0.21 <= score <= 0.40: | |
| color= "#f4d03f;" | |
| elif 0.41 <= score <= 0.60: | |
| color= "#5dade2;" | |
| elif 0.61 <= score <= 0.80: | |
| color= "#58d68d;" | |
| elif 0.81 <= score <= 0.99: | |
| color= "#28b463;" | |
| return f"<span style='font-size:30px; color: {color}'>{round(score*100, 2)} %</span>" | |