Spaces:

Di12
/

sentiment_analysis

Sleeping

App Files Files Community

Di12 commited on Jun 30, 2025

Commit

4c1c582

verified ·

1 Parent(s): 5bb637b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -31

app.py CHANGED Viewed

@@ -210,37 +210,46 @@ def predict_sentiment(model, sentence, vocab, label_mapping=None):
     return (label_mapping[idx], probs) if label_mapping else (idx, probs)
 def process_input(text_input, file):
-    comments = []
-    if text_input:
-        comments += [line.strip() for line in text_input.splitlines() if line.strip()]
-    elif file:
-        # Nếu file là bytes, chuyển đổi thành đối tượng BytesIO
-        if isinstance(file, bytes):
-            file_content = io.BytesIO(file)
-            try:
-                # Thử đọc như tệp văn bản
-                content = file_content.read().decode('utf-8')
-                comments += [line.strip() for line in content.splitlines() if line.strip()]
-            except UnicodeDecodeError:
-                # Nếu không thành công, thử đọc như CSV
-                file_content.seek(0)
-                try:
-                    df = pd.read_csv(file_content, header=None, names=["Comment"], encoding='utf-8')
-                    comments += df["Comment"].dropna().astype(str).tolist()
-                except Exception as e:
-                    raise gr.Error(f"Lỗi khi đọc tệp CSV: {str(e)}")
-        # Nếu file là đường dẫn chuỗi
-        elif isinstance(file, str):
-            try:
-                with open(file, 'r', encoding='utf-8') as f:
-                    content = f.read()
-                    comments += [line.strip() for line in content.splitlines() if line.strip()]
-            except Exception as e:
-                raise gr.Error(f"Lỗi khi đọc tệp: {str(e)}")
-        else:
-            raise gr.Error("Định dạng tệp không được hỗ trợ.")
     if len(comments) == 0:
         raise gr.Error("Vui lòng nhập ít nhất một bình luận hoặc tải lên tệp chứa bình luận.")

     return (label_mapping[idx], probs) if label_mapping else (idx, probs)
 def process_input(text_input, file):
+    content = ""
+    if text:
+        content += text + "\n"
+    if file is not None:
+        content += file.read().decode('utf-8') + "\n"
+    # Tách câu: theo dấu ., ?, ! hoặc xuống dòng
+    parts = re.split(r'[.?!]\s*|\n+', content)
+    comments = [p.strip() for p in parts if p and p.strip()]
+    # comments = []
+    # if text_input:
+    #     comments += [line.strip() for line in text_input.splitlines() if line.strip()]
+    # elif file:
+    #     # Nếu file là bytes, chuyển đổi thành đối tượng BytesIO
+    #     if isinstance(file, bytes):
+    #         file_content = io.BytesIO(file)
+    #         try:
+    #             # Thử đọc như tệp văn bản
+    #             content = file_content.read().decode('utf-8')
+    #             comments += [line.strip() for line in content.splitlines() if line.strip()]
+    #         except UnicodeDecodeError:
+    #             # Nếu không thành công, thử đọc như CSV
+    #             file_content.seek(0)
+    #             try:
+    #                 df = pd.read_csv(file_content, header=None, names=["Comment"], encoding='utf-8')
+    #                 comments += df["Comment"].dropna().astype(str).tolist()
+    #             except Exception as e:
+    #                 raise gr.Error(f"Lỗi khi đọc tệp CSV: {str(e)}")
+    #     # Nếu file là đường dẫn chuỗi
+    #     elif isinstance(file, str):
+    #         try:
+    #             with open(file, 'r', encoding='utf-8') as f:
+    #                 content = f.read()
+    #                 comments += [line.strip() for line in content.splitlines() if line.strip()]
+    #         except Exception as e:
+    #             raise gr.Error(f"Lỗi khi đọc tệp: {str(e)}")
+    #     else:
+    #         raise gr.Error("Định dạng tệp không được hỗ trợ.")
     if len(comments) == 0:
         raise gr.Error("Vui lòng nhập ít nhất một bình luận hoặc tải lên tệp chứa bình luận.")