Spaces:

ntphuc149
/

QA_Data_Validator

Sleeping

App Files Files Community

Truong-Phuc Nguyen commited on May 6, 2024

Commit

00b5c9c

verified ·

1 Parent(s): 3188aec

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -16

app.py CHANGED Viewed

@@ -1,24 +1,36 @@
 import streamlit as st
 import pandas as pd
 import re
 st.set_page_config(layout='wide')
-def load_data():
-    return pd.read_csv(filepath_or_buffer='./data.csv')
-df = load_data()
 if 'idx' not in st.session_state:
     st.session_state.idx = 0
 st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)
 col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 btn_prev = col_1.button(label='Previous sample', use_container_width=True)
 btn_next = col_2.button(label='Next sample', use_container_width=True)
 btn_save = col_3.button(label='Save changes', use_container_width=True)
 if btn_prev:
     if st.session_state.idx > 0:
@@ -30,17 +42,23 @@ if btn_next:
 st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)
-context = st.text_area(label='Your context: ', value=df['contexts'][st.session_state.idx], height=300)
-question = st.text_area(label='Your question: ', value=df['questions'][st.session_state.idx], height=100)
-answer = st.text_area(label='Your answer: ', value=df['answers'][st.session_state.idx], height=100)
-if answer.strip() and context.strip():
-    highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
-    st.markdown(highlighted_context, unsafe_allow_html=True)
-if btn_save:
-    df.loc[st.session_state.idx, 'contexts'] = context
-    df.loc[st.session_state.idx, 'questions'] = question
-    df.loc[st.session_state.idx, 'answers'] = answer
-    df.to_csv('./data.csv', index=False)

 import streamlit as st
 import pandas as pd
 import re
+import base64
 st.set_page_config(layout='wide')
+def load_data(file):
+    if file is not None:
+        return pd.read_csv(file)
+    else:
+        return pd.DataFrame(columns=['context', 'question', 'answer'])
+def download_csv(dataframe):
+    csv = dataframe.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="checked_data.csv">Download CSV File</a>'
+    st.markdown(href, unsafe_allow_html=True)
+uploaded_file = st.file_uploader("Upload CSV file", type=['csv'])
+df = load_data(uploaded_file)
 if 'idx' not in st.session_state:
     st.session_state.idx = 0
 st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)
 col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
 btn_prev = col_1.button(label='Previous sample', use_container_width=True)
 btn_next = col_2.button(label='Next sample', use_container_width=True)
 btn_save = col_3.button(label='Save changes', use_container_width=True)
+btn_download = col_4.button(label='Download CSV', use_container_width=True)
 if btn_prev:
     if st.session_state.idx > 0:
 st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)
+if not df.empty:
+    context = st.text_area(label='Your context: ', value=df['context'][st.session_state.idx], height=300)
+    question = st.text_area(label='Your question: ', value=df['question'][st.session_state.idx], height=100)
+    answer = st.text_area(label='Your answer: ', value=df['answer'][st.session_state.idx], height=100)
+    if answer.strip() and context.strip():
+        highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
+        st.markdown(highlighted_context, unsafe_allow_html=True)
+    if btn_save:
+        df.loc[st.session_state.idx, 'context'] = context
+        df.loc[st.session_state.idx, 'question'] = question
+        df.loc[st.session_state.idx, 'answer'] = answer
+        if uploaded_file is not None:
+            uploaded_file.seek(0)
+            df.to_csv(uploaded_file, index=False)
+if btn_download:
+    download_csv(df)