Spaces:

SonFox2920
/

Vnese_crawl

Build error

App Files Files Community

SonFox2920 commited on Mar 27, 2025

Commit

c375407

verified ·

1 Parent(s): f7dfc65

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -20

app.py CHANGED Viewed

@@ -201,7 +201,7 @@ class ClaimGenerator:
                         2. *Mượn cấu trúc và n-gram đặc trưng của SUPPORTED để tạo sự đánh lừa*
                            - Sử dụng n-gram đặc trưng của SUPPORTED:
-                             - "một số", "của người", "đối với", "lãnh thổ", "thế kỷ"
                              - "trong lịch sử", "của người dân", "khoảng thời gian"
                            - Tạo câu có cấu trúc giống SUPPORTED nhưng thay đổi thông tin cốt lõi.
                            - Ví dụ: "Trong lịch sử phát triển của [Title], một số yếu tố đã dẫn đến..." (dùng cụm từ SUPPORTED)
@@ -494,7 +494,7 @@ class ClaimGenerator:
             'results': results
         }
-def process_dataframe_with_claims(df: pd.DataFrame) -> pd.DataFrame:
     """Process each summary in the DataFrame and generate claims with fallback handling."""
     all_rows = []
     progress_bar = st.progress(0)
@@ -503,7 +503,7 @@ def process_dataframe_with_claims(df: pd.DataFrame) -> pd.DataFrame:
         context = row['Summary']
         title = row['Title']
-        for claim_type in ["SUPPORTED", "REFUTED", "NEI"]:
             generator = ClaimGenerator(claim_type)
             result = generator.generate_hard_claims(context, title, predict)
@@ -710,6 +710,7 @@ def create_id(topic, stt):
     # Generate unique ID
     topic_num = chars_to_nums(topic_abbr)
     return f'uit_{topic_num}_{stt}'
 def wikipedia_scrape(title_input, stt, filename):
     try:
         page = wikipedia.page(title_input)
@@ -783,25 +784,36 @@ def main():
             st.subheader("Danh sách bài viết từ Wikipedia:")
             st.dataframe(df, use_container_width=True)
             # Add automatic claim generation for all summaries
             if st.button("Tạo Claims cho tất cả bài viết"):
-                with st.spinner("Đang tạo claims cho tất cả bài viết..."):
-                    # Process the DataFrame and generate claims
-                    df_with_claims = process_dataframe_with_claims(df)
-                    st.subheader("Bảng dữ liệu với Claims:")
-                    st.dataframe(df_with_claims, use_container_width=True)
-                    # Download options
-                    csv = convert_df_to_csv(df_with_claims)
-                    claims_filename = f"uit_{uploaded_file.name.split('.')[0]}_with_claims.csv"
-                    st.download_button(
-                        label="Download CSV với Claims",
-                        data=csv,
-                        file_name=claims_filename,
-                        mime="text/csv",
-                    )
             # Original download option for basic DataFrame
             csv = convert_df_to_csv(df)

                         2. *Mượn cấu trúc và n-gram đặc trưng của SUPPORTED để tạo sự đánh lừa*
                            - Sử dụng n-gram đặc trưng của SUPPORTED:
+                             - "một số", "so với", "đối với", "lãnh thổ", "thế kỷ"
                              - "trong lịch sử", "của người dân", "khoảng thời gian"
                            - Tạo câu có cấu trúc giống SUPPORTED nhưng thay đổi thông tin cốt lõi.
                            - Ví dụ: "Trong lịch sử phát triển của [Title], một số yếu tố đã dẫn đến..." (dùng cụm từ SUPPORTED)
             'results': results
         }
+def process_dataframe_with_claims(df: pd.DataFrame, selected_claim_types: List[str]) -> pd.DataFrame:
     """Process each summary in the DataFrame and generate claims with fallback handling."""
     all_rows = []
     progress_bar = st.progress(0)
         context = row['Summary']
         title = row['Title']
+        for claim_type in selected_claim_types:
             generator = ClaimGenerator(claim_type)
             result = generator.generate_hard_claims(context, title, predict)
     # Generate unique ID
     topic_num = chars_to_nums(topic_abbr)
     return f'uit_{topic_num}_{stt}'
 def wikipedia_scrape(title_input, stt, filename):
     try:
         page = wikipedia.page(title_input)
             st.subheader("Danh sách bài viết từ Wikipedia:")
             st.dataframe(df, use_container_width=True)
+            # Add claim type selection
+            claim_types = ["SUPPORTED", "REFUTED", "NEI"]
+            selected_claim_types = st.multiselect(
+                "Chọn loại claim cần tạo:",
+                claim_types,
+                default=claim_types
+            )
             # Add automatic claim generation for all summaries
             if st.button("Tạo Claims cho tất cả bài viết"):
+                if not selected_claim_types:
+                    st.error("Vui lòng chọn ít nhất một loại claim để tạo.")
+                else:
+                    with st.spinner("Đang tạo claims cho tất cả bài viết..."):
+                        # Process the DataFrame and generate claims
+                        df_with_claims = process_dataframe_with_claims(df, selected_claim_types)
+                        st.subheader("Bảng dữ liệu với Claims:")
+                        st.dataframe(df_with_claims, use_container_width=True)
+                        # Download options
+                        csv = convert_df_to_csv(df_with_claims)
+                        claims_filename = f"uit_{uploaded_file.name.split('.')[0]}_with_claims.csv"
+                        st.download_button(
+                            label="Download CSV với Claims",
+                            data=csv,
+                            file_name=claims_filename,
+                            mime="text/csv",
+                        )
             # Original download option for basic DataFrame
             csv = convert_df_to_csv(df)