Spaces:

Di12
/

sentiment_analysis

Sleeping

App Files Files Community

Di12 commited on Jul 1, 2025

Commit

dca0786

verified ·

1 Parent(s): 71672d1

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -24

app.py CHANGED Viewed

@@ -219,35 +219,62 @@ def process_input(text_input, file):
         comments = [p.strip() for p in parts if p and p.strip()]
     elif file:
-        # Nếu file là bytes, chuyển đổi thành đối tượng BytesIO
         if isinstance(file, bytes):
-            file_content = io.BytesIO(file)
             try:
-                # Thử đọc như tệp văn bản
-                content = file_content.read().decode('utf-8') + "\n"
                 parts = re.split(r'[.?!]\s*|\n+', content)
-                comments = [p.strip() for p in parts if p and p.strip()]
-            except UnicodeDecodeError:
-                # Nếu không thành công, thử đọc như CSV
-                file_content.seek(0)
-                try:
-                    df = pd.read_csv(file_content, header=None, names=["Comment"], encoding='utf-8')
-                    comments += df["Comment"].dropna().astype(str).tolist()
-                except Exception as e:
-                    raise gr.Error(f"Lỗi khi đọc tệp CSV: {str(e)}")
-        # Nếu file là đường dẫn chuỗi
         elif isinstance(file, str):
-            try:
-                with open(file, 'r', encoding='utf-8') as f:
-                    content = f.read()
-                    parts = re.split(r'[.?!]\s*|\n+', content)
-                    comments = [p.strip() for p in parts if p and p.strip()]
-            except Exception as e:
-                raise gr.Error(f"Lỗi khi đọc tệp: {str(e)}")
         else:
-            raise gr.Error("Định dạng tệp không được hỗ trợ.")
     if len(comments) == 0:
         raise gr.Error("Vui lòng nhập ít nhất một bình luận hoặc tải lên tệp chứa bình luận.")

         comments = [p.strip() for p in parts if p and p.strip()]
     elif file:
+        # # Nếu file là bytes, chuyển đổi thành đối tượng BytesIO
+        # if isinstance(file, bytes):
+        #     file_content = io.BytesIO(file)
+        #     try:
+        #         # Thử đọc như tệp văn bản
+        #         content = file_content.read().decode('utf-8') + "\n"
+        #         parts = re.split(r'[.?!]\s*|\n+', content)
+        #         comments = [p.strip() for p in parts if p and p.strip()]
+        #     except UnicodeDecodeError:
+        #         # Nếu không thành công, thử đọc như CSV
+        #         file_content.seek(0)
+        #         try:
+        #             df = pd.read_csv(file_content, header=None, names=["Comment"], encoding='utf-8')
+        #             comments += df["Comment"].dropna().astype(str).tolist()
+        #         except Exception as e:
+        #             raise gr.Error(f"Lỗi khi đọc tệp CSV: {str(e)}")
+        # # Nếu file là đường dẫn chuỗi
+        # elif isinstance(file, str):
+        #     try:
+        #         with open(file, 'r', encoding='utf-8') as f:
+        #             content = f.read()
+        #             parts = re.split(r'[.?!]\s*|\n+', content)
+        #             comments = [p.strip() for p in parts if p and p.strip()]
+        #     except Exception as e:
+        #         raise gr.Error(f"Lỗi khi đọc tệp: {str(e)}")
+        # Đọc file CSV hoặc TXT
+        df = None
         if isinstance(file, bytes):
+            # thử đọc CSV
             try:
+                df = pd.read_csv(io.BytesIO(file), header=None, names=["Comment"], encoding="utf-8")
+            except Exception:
+                # fallback: đọc như text, rồi tách câu
+                content = io.BytesIO(file).read().decode('utf-8', errors='ignore')
                 parts = re.split(r'[.?!]\s*|\n+', content)
+                comments = [p.strip() for p in parts if p.strip()]
         elif isinstance(file, str):
+            # file path
+            if file.lower().endswith('.csv'):
+                df = pd.read_csv(file, header=None, names=["Comment"], encoding='utf-8')
+            else:
+                content = open(file, 'r', encoding='utf-8').read()
+                parts = re.split(r'[.?!]\s*|\n+', content)
+                comments = [p.strip() for p in parts if p.strip()]
         else:
+            raise gr.Error("Định dạng tệp không được hỗ trợ.")
+        # Nếu đọc được CSV, chuyển mỗi dòng thành comment nguyên bản
+        if df is not None:
+            df = df.dropna(subset=["Comment"])
+            if df["Comment"].empty:
+                raise gr.Error("File CSV không chứa comment hợp lệ.")
+            comments = df["Comment"].astype(str).tolist()
     if len(comments) == 0:
         raise gr.Error("Vui lòng nhập ít nhất một bình luận hoặc tải lên tệp chứa bình luận.")