Spaces:

NghiTran1009
/

annotator-tool

Sleeping

NghiTran1009 commited on Mar 11, 2025

Commit

aeb7075

verified ·

1 Parent(s): 9fac977

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import streamlit as st
+import pandas as pd
+import joblib
+# Load mô hình CRF đã train
+@st.cache_resource
+def load_model():
+    return joblib.load("chunking_crf_model.pkl")
+crf = load_model()
+# Hàm trích xuất feature
+def extract_features(sentence):
+    return [{"word": token, "pos": pos} for token, pos in zip(sentence["Token"], sentence["POS"])]
+# Load dữ liệu test
+df_test = pd.read_csv("chunking_test.csv")
+# Nhóm theo câu
+sentences = [group for _, group in df_test.groupby("Sentence_ID")]
+# Giao diện web
+st.title("📝 Tool Annotator - Chỉnh sửa dữ liệu chunking")
+# Chọn câu để annotate
+sentence_id = st.selectbox("Chọn câu", range(len(sentences)))
+sentence = sentences[sentence_id]
+# Hiển thị dữ liệu ban đầu
+st.write("🔹 **Câu gốc**")
+st.table(sentence)
+# Dự đoán bằng mô hình
+X_test = [extract_features(sentence)]
+y_pred = crf.predict(X_test)[0]
+# Thêm cột dự đoán vào dataframe
+sentence["Predicted_Chunk"] = y_pred
+# Hiển thị dữ liệu dự đoán
+st.write("🔹 **Dữ liệu dự đoán**")
+edited_df = st.data_editor(sentence, key="edit_table")
+# Lưu lại kết quả chỉnh sửa
+if st.button("Lưu chỉnh sửa"):
+    edited_df.to_csv("corrected_data.csv", index=False, encoding="utf-8")
+    st.success("✅ Dữ liệu đã được lưu thành corrected_data.csv!")