Spaces:

SonFox2920
/

FC_annotation_check

Sleeping

App Files Files Community

FC_annotation_check / annotate.py

SonFox2920

Upload annotate.py

abe716b verified over 1 year ago

raw

history blame contribute delete

7.54 kB

	import streamlit as st
	import pandas as pd

	st.set_page_config(layout="wide")

	# Load the dataset
	def load_data():
	uploaded_file = st.sidebar.file_uploader("Upload CSV file", type=["csv"])
	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file)
	if 'Label_n' not in df.columns:
	df['Label_n'] = None # Add a Label_n column if it doesn't exist
	if 'Annotator' not in df.columns:
	df['Annotator'] = None # Add an Annotator column if it doesn't exist
	return df, uploaded_file.name

	# Save the annotations from session_state to DataFrame
	def save_annotations_to_df(df):
	for idx, label in enumerate(st.session_state.selected_labels):
	df.at[idx, 'Label_n'] = label
	return df

	# Create the annotation app
	def annotation_app():
	tab0, tab1, tab2 = st.tabs(["Mission", "Annotate", "Save"])

	with tab0:
	st.title("Nhiệm vụ")
	st.write("""
	Nhiệm vụ của bạn là gán nhãn cho các câu tuyên bố dựa trên ngữ cảnh được cung cấp trước đó. Có ba nhãn mà bạn cần phải chọn:
	<span style='color:#7FFF00'>SUPPORTED</span> (Được hỗ trợ), <span style='color:#DC143C'>REFUTED</span> (Bị phủ nhận), hoặc <span style='color:#FFD700'>NEI</span> (Không đủ thông tin). Dưới đây là các bước để thực hiện nhiệm vụ này:

	1. Đọc ngữ cảnh (context): Hiểu rõ nội dung, thông tin của đoạn văn bản được cung cấp.

	2. Xem câu tuyên bố (claim): Dựa trên thông tin, nội dung của đoạn văn bản, bạn sẽ đánh giá câu tuyên bố.

	3. Phân loại câu tuyên bố: Chọn nhãn phù hợp cho câu tuyên bố:
	- <span style='color:#7FFF00'>SUPPORTED</span> (Được hỗ trợ): Khi câu tuyên bố là chính xác theo thông tin trong ngữ cảnh.
	- <span style='color:#DC143C'>REFUTED</span> (Bị phủ nhận): Khi câu tuyên bố là sai theo thông tin trong ngữ cảnh.
	- <span style='color:#FFD700'>NEI</span> (Không đủ thông tin): Khi thông tin của câu tuyên bố không thể xác nhận được đúng hay sai dựa trên ngữ cảnh.

	4. Lưu dữ liệu: Sau khi đã chọn nhãn cho tất cả các câu tuyên bố trong tệp CSV, hãy lưu lại kết quả với thông tin người gán nhãn.

	5. Di chuyển giữa các câu tuyên bố: Sử dụng các nút "Previous" và "Next" để di chuyển giữa các câu tuyên bố trong tệp.

	Sau khi hoàn thành, bạn có thể lưu lại toàn bộ dữ liệu với tên của mình để xác nhận nhiệm vụ đã được hoàn thành.
	""", unsafe_allow_html=True)

	data = load_data()

	if data is None:
	st.sidebar.warning("Please upload a CSV file.")
	st.stop()

	df, original_filename = data

	with tab1:
	# Drop unnamed column if it's just an index
	df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

	if 'current_index' not in st.session_state:
	st.session_state.current_index = 0

	if 'selected_labels' not in st.session_state:
	# Initialize with current DataFrame values
	st.session_state.selected_labels = list(df['Label_n'])

	max_index = len(df) - 1
	current_index = st.session_state.current_index
	current_row = df.iloc[current_index]

	context_col = 'Context'
	claim_col = 'Claim'
	label_col = 'Label_n'

	c3 = st.container(border=True)
	with c3:
	left_column, right_column = st.columns([0.65, 0.35])

	with left_column:
	c3_1 = st.container(border=True, height=550)
	with c3_1:
	st.subheader("Context")
	st.write(current_row[context_col])

	with right_column:
	c3_2 = st.container(border=True, height=450)
	with c3_2:
	st.subheader("Claim")
	st.write(current_row[claim_col])

	st.subheader("Label")
	c3_3 = st.container(border=True, height=200)
	with c3_3:
	current_label = st.session_state.selected_labels[current_index]

	selected_label = st.selectbox(
	"Select the label for this claim",
	options=('', 'SUPPORTED', 'REFUTED', 'NEI'), # Add an empty string as the first option
	index=0 if pd.isna(current_label) else ['SUPPORTED', 'REFUTED', 'NEI'].index(current_label) + 1, # Adjust index to match options
	format_func=lambda x: 'Select label for claim ...' if x == '' else x, # Format empty string as placeholder text
	)

	if selected_label:
	st.session_state.selected_labels[current_index] = selected_label

	# Add navigation buttons (Previous, Next)
	previous, next_ = st.columns(2, gap='large')

	with previous:
	if st.button("Previous"):
	if current_index > 0:
	st.session_state.current_index = current_index - 1
	else:
	st.session_state.current_index = max_index
	st.experimental_rerun()

	with next_:
	if st.button("Next"):
	if selected_label:
	st.session_state.selected_labels[current_index] = selected_label
	if current_index < max_index:
	st.session_state.current_index = current_index + 1
	else:
	st.session_state.current_index = 0
	st.experimental_rerun()

	with tab2:
	annotator_name = st.text_input("Enter your name to save annotations")

	# Update the 'Annotator' column for all rows with the provided name
	if annotator_name:
	df['Annotator'] = annotator_name

	# Update the DataFrame with all the labels before saving
	df = save_annotations_to_df(df)

	all_annotated = df['Label_n'].notna().all()

	if not all_annotated:
	st.warning("Please ensure all claims are annotated before saving.")
	else:
	if not annotator_name:
	st.warning("Please enter your name before saving.")
	else:
	st.write("Annotated DataFrame:")
	st.dataframe(df)

	csv = df.to_csv(index=False).encode('utf-8')

	save_filename = f"annotated_{df['Title'][0]}_{annotator_name}.csv"

	st.download_button(
	label="Save and Download",
	data=csv,
	file_name=save_filename,
	mime='text/csv',
	)
	st.success(f"Data saved successfully as {save_filename}!")
	# Call the main function to run the app
	if __name__ == '__main__':
	annotation_app()