Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -79,7 +79,6 @@ def clean_text(text: str) -> str:
|
|
| 79 |
|
| 80 |
return text
|
| 81 |
|
| 82 |
-
# Vocabulary class unchanged...
|
| 83 |
class Vocabulary:
|
| 84 |
def __init__(self):
|
| 85 |
self.word2id = dict()
|
|
@@ -187,11 +186,10 @@ bidirectional = False
|
|
| 187 |
dropout = 0.3
|
| 188 |
pad_idx = vocab["<pad>"]
|
| 189 |
unk_idx = vocab["<unk>"]
|
| 190 |
-
n_classes = 3
|
| 191 |
|
| 192 |
label_map = {0: 'tiêu cực', 1: 'bình thường', 2: 'tích cực'}
|
| 193 |
|
| 194 |
-
# Ensure model and its weights moved to correct device
|
| 195 |
def load_model(path: str):
|
| 196 |
model = RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional, dropout, pad_idx, n_classes)
|
| 197 |
model.load_state_dict(torch.load(path, map_location=device))
|
|
@@ -201,7 +199,6 @@ def load_model(path: str):
|
|
| 201 |
|
| 202 |
model = load_model(model_path)
|
| 203 |
|
| 204 |
-
# Prediction helper
|
| 205 |
def predict_sentiment(model, sentence, vocab, label_mapping=None):
|
| 206 |
tensor = vocab.corpus_to_tensor([sentence])[0]
|
| 207 |
length = torch.LongTensor([tensor.size(0)]).to(device)
|
|
@@ -216,11 +213,9 @@ def predict_sentiment(model, sentence, vocab, label_mapping=None):
|
|
| 216 |
def process_input(text_input, file):
|
| 217 |
comments = []
|
| 218 |
|
| 219 |
-
# Xử lý văn bản nhập trực tiếp
|
| 220 |
if text_input:
|
| 221 |
comments += [line.strip() for line in text_input.splitlines() if line.strip()]
|
| 222 |
|
| 223 |
-
# Xử lý tệp tải lên
|
| 224 |
if file is not None:
|
| 225 |
file_name = file.name
|
| 226 |
|
|
@@ -234,14 +229,12 @@ def process_input(text_input, file):
|
|
| 234 |
comments += [line.strip() for line in content.splitlines() if line.strip()]
|
| 235 |
|
| 236 |
elif file_name.endswith('.csv'):
|
| 237 |
-
df = pd.read_csv(file, encoding='utf-8')
|
| 238 |
-
comments
|
| 239 |
|
| 240 |
-
# Kiểm tra nếu không có bình luận nào được cung cấp
|
| 241 |
if not comments:
|
| 242 |
return pd.DataFrame(columns=["Comment", "Dự đoán", "Xác suất"])
|
| 243 |
|
| 244 |
-
# Dự đoán cảm xúc cho từng bình luận
|
| 245 |
results = []
|
| 246 |
for comment in comments:
|
| 247 |
label, probability = predict_sentiment(model, comment, vocab, label_map)
|
|
|
|
| 79 |
|
| 80 |
return text
|
| 81 |
|
|
|
|
| 82 |
class Vocabulary:
|
| 83 |
def __init__(self):
|
| 84 |
self.word2id = dict()
|
|
|
|
| 186 |
dropout = 0.3
|
| 187 |
pad_idx = vocab["<pad>"]
|
| 188 |
unk_idx = vocab["<unk>"]
|
| 189 |
+
n_classes = 3
|
| 190 |
|
| 191 |
label_map = {0: 'tiêu cực', 1: 'bình thường', 2: 'tích cực'}
|
| 192 |
|
|
|
|
| 193 |
def load_model(path: str):
|
| 194 |
model = RNN(input_dim, embedding_dim, hidden_dim, n_layers, bidirectional, dropout, pad_idx, n_classes)
|
| 195 |
model.load_state_dict(torch.load(path, map_location=device))
|
|
|
|
| 199 |
|
| 200 |
model = load_model(model_path)
|
| 201 |
|
|
|
|
| 202 |
def predict_sentiment(model, sentence, vocab, label_mapping=None):
|
| 203 |
tensor = vocab.corpus_to_tensor([sentence])[0]
|
| 204 |
length = torch.LongTensor([tensor.size(0)]).to(device)
|
|
|
|
| 213 |
def process_input(text_input, file):
|
| 214 |
comments = []
|
| 215 |
|
|
|
|
| 216 |
if text_input:
|
| 217 |
comments += [line.strip() for line in text_input.splitlines() if line.strip()]
|
| 218 |
|
|
|
|
| 219 |
if file is not None:
|
| 220 |
file_name = file.name
|
| 221 |
|
|
|
|
| 229 |
comments += [line.strip() for line in content.splitlines() if line.strip()]
|
| 230 |
|
| 231 |
elif file_name.endswith('.csv'):
|
| 232 |
+
df = pd.read_csv(file, header=None, names=['Comment'], encoding='utf-8')
|
| 233 |
+
comments = df['Comment'].dropna().astype(str).tolist()
|
| 234 |
|
|
|
|
| 235 |
if not comments:
|
| 236 |
return pd.DataFrame(columns=["Comment", "Dự đoán", "Xác suất"])
|
| 237 |
|
|
|
|
| 238 |
results = []
|
| 239 |
for comment in comments:
|
| 240 |
label, probability = predict_sentiment(model, comment, vocab, label_map)
|