Spaces:

Ptul2x5
/

Student_Feedback_Sentiment

Paused

App Files Files Community

Ptul2x5 commited on Nov 13, 2025

Commit

68f763b

verified ·

1 Parent(s): 0b80577

Update

Browse files

Files changed (9) hide show

PhoBERTPairABSA.py +24 -0
app.py +188 -218
database_manager.py +5 -49
forms.py +1 -8
model_config.py +342 -0
models.py +3 -4
requirements.txt +0 -7
static/css/style.css +26 -0
static/js/app.js +105 -8

PhoBERTPairABSA.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+from torch import nn
+from transformers import AutoModel
+class PhoBERTPairABSA(nn.Module):
+    """Pair-ABSA model: Predicts sentiment for a specific topic in a sentence"""
+    def __init__(self, base_model="vinai/phobert-base", num_cls=4, dropout=0.2):
+        super().__init__()
+        self.backbone = AutoModel.from_pretrained(base_model)
+        hidden_size = self.backbone.config.hidden_size
+        self.classifier = nn.Sequential(
+            nn.Dropout(dropout),
+            nn.Linear(hidden_size, hidden_size),
+            nn.GELU(),
+            nn.LayerNorm(hidden_size),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_size, num_cls)
+        )
+    def forward(self, input_ids, attention_mask):
+        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
+        cls = out.last_hidden_state[:, 0, :]
+        logits = self.classifier(cls)
+        return logits

app.py CHANGED Viewed

@@ -12,66 +12,60 @@ from flask_login import LoginManager, login_user, logout_user, login_required, c
 from functools import wraps
 from models import db, User, Feedback
 from forms import RegistrationForm, LoginForm
-from PhoBERTMultiTask import PhoBERTMultiTask
 from datetime import datetime, timedelta
 import pytz
 from database_manager import db_manager
 app = Flask(__name__)
-# Cấu hình
 app.config['SECRET_KEY'] = 'your-secret-key-change-this-in-production'
-# Database backup functions
 def backup_database(force: bool = False):
     """Backup database to Hugging Face Hub"""
     try:
         return db_manager.backup_database(force=force)
-    except Exception as e:
-        print(f"❌ Backup error: {e}")
         return False
 def restore_database():
     """Restore database from Hugging Face Hub"""
     try:
         return db_manager.restore_database()
-    except Exception as e:
-        print(f"❌ Restore error: {e}")
         return False
 def run_scheduler():
     """Run scheduled backup every hour"""
     while True:
         schedule.run_pending()
-        time.sleep(60)  # Check every minute
-# Schedule backup every hour
 schedule.every().hour.do(backup_database)
-# Start scheduler in background thread
 scheduler_thread = Thread(target=run_scheduler, daemon=True)
 scheduler_thread.start()
-# Backup on shutdown
 atexit.register(backup_database)
-# Thiết lập múi giờ Việt Nam
 VIETNAM_TIMEZONE = pytz.timezone('Asia/Ho_Chi_Minh')
 def utc_to_vietnam_time(utc_datetime):
     """Chuyển đổi thời gian UTC sang múi giờ Việt Nam"""
     if utc_datetime is None:
         return None
-    # Nếu datetime không có timezone info, coi như UTC
     if utc_datetime.tzinfo is None:
         utc_datetime = pytz.utc.localize(utc_datetime)
     return utc_datetime.astimezone(VIETNAM_TIMEZONE)
-# Sử dụng đường dẫn database phù hợp với Hugging Face Spaces
 db_path = os.path.join(os.getcwd(), 'instance', 'feedback_analysis.db')
 os.makedirs(os.path.dirname(db_path), exist_ok=True)
 app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}'
 app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
-# Khởi tạo extensions
 db.init_app(app)
 login_manager = LoginManager()
 login_manager.init_app(app)
@@ -79,7 +73,6 @@ login_manager.login_view = 'login'
 login_manager.login_message = 'Vui lòng đăng nhập để sử dụng hệ thống phân tích feedback.'
 login_manager.login_message_category = 'info'
-# Thêm hàm chuyển đổi múi giờ vào template context
 @app.context_processor
 def utility_processor():
     return dict(utc_to_vietnam_time=utc_to_vietnam_time)
@@ -88,8 +81,131 @@ def utility_processor():
 def load_user(user_id):
     return User.query.get(int(user_id))
-# Decorator để yêu cầu quyền admin
 def admin_required(f):
     @wraps(f)
     def decorated_function(*args, **kwargs):
         if not current_user.is_authenticated:
@@ -101,30 +217,27 @@ def admin_required(f):
     return decorated_function
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# === Load tokenizer & model ===
-MODEL_REPO = "Ptul2x5/Student_Feedback_Sentiment"  # 🔹 Repo Hugging Face của bạn
 try:
-    # Disable hf_transfer to avoid the error
     os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'
     tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, use_fast=False)
-    # Load model weights from Hugging Face
-    MODEL_URL = f"https://huggingface.co/{MODEL_REPO}/resolve/main/multitask_model.bin"
-    state_dict = torch.hub.load_state_dict_from_url(MODEL_URL, map_location=device)
-    model = PhoBERTMultiTask(num_sentiment=3, num_topic=4)
     model.load_state_dict(state_dict, strict=False)
     model.to(device)
     model.eval()
-except Exception as e:
-    print(f"Error loading model: {e}")
     model = None
     tokenizer = None
-# ====== ROUTES ======
 @app.route("/", methods=["GET"])
 @login_required
 def home():
@@ -141,10 +254,7 @@ def register():
         user.set_password(form.password.data)
         db.session.add(user)
         db.session.commit()
-        # Backup database after user registration
         backup_database()
         flash('Đăng ký thành công! Vui lòng đăng nhập.', 'success')
         return redirect(url_for('login'))
@@ -177,34 +287,27 @@ def logout():
 @app.route("/api/health", methods=["GET"])
 def health():
-    return jsonify({"status": "healthy", "message": "✅ PhoBERT MultiTask API is running!"})
 @app.route("/my-statistics")
 @login_required
 def my_statistics():
-    """Trang thống kê feedback cá nhân của user"""
     try:
-        # Lấy thống kê feedback của user hiện tại
         user_feedbacks = Feedback.query.filter_by(user_id=current_user.id).all()
         total_feedbacks = len(user_feedbacks)
-        # Thống kê sentiment
         sentiment_stats = db.session.query(
             Feedback.sentiment,
             db.func.count(Feedback.id).label('count')
         ).filter_by(user_id=current_user.id).group_by(Feedback.sentiment).all()
         sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats]
-        # Thống kê topic
         topic_stats = db.session.query(
             Feedback.topic,
             db.func.count(Feedback.id).label('count')
         ).filter_by(user_id=current_user.id).group_by(Feedback.topic).all()
         topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats]
-        # Thống kê theo ngày (30 ngày gần nhất)
-        from datetime import datetime, timedelta
         thirty_days_ago = datetime.now() - timedelta(days=30)
         daily_stats = db.session.query(
             db.func.date(Feedback.created_at).label('date'),
@@ -217,7 +320,6 @@ def my_statistics():
         ).order_by('date').all()
         daily_stats = [{'date': str(item.date), 'count': item.count} for item in daily_stats]
-        # Feedback gần nhất của user
         recent_feedbacks = Feedback.query.filter_by(user_id=current_user.id)\
                                        .order_by(Feedback.created_at.desc()).limit(10).all()
@@ -234,31 +336,23 @@ def my_statistics():
 @app.route("/admin/database")
 @admin_required
 def view_database():
-    """Trang xem database với giao diện đẹp"""
     try:
-        # Lấy thống kê tổng quan
         total_users = User.query.count()
         total_feedbacks = Feedback.query.count()
-        # Lấy feedbacks gần nhất
         recent_feedbacks = Feedback.query.order_by(Feedback.created_at.desc()).limit(10).all()
-        # Thống kê sentiment
         sentiment_stats = db.session.query(
             Feedback.sentiment,
             db.func.count(Feedback.id).label('count')
         ).group_by(Feedback.sentiment).all()
         sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats]
-        # Thống kê topic
         topic_stats = db.session.query(
             Feedback.topic,
             db.func.count(Feedback.id).label('count')
         ).group_by(Feedback.topic).all()
         topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats]
-        # Thống kê theo ngày (7 ngày gần nhất)
-        from datetime import datetime, timedelta
         seven_days_ago = datetime.now() - timedelta(days=7)
         daily_stats = db.session.query(
             db.func.date(Feedback.created_at).label('date'),
@@ -282,7 +376,6 @@ def view_database():
 @app.route("/api/feedback-history", methods=["GET"])
 @login_required
 def get_feedback_history():
-    """Lấy lịch sử feedback của user hiện tại với filter thời gian"""
     try:
         page = request.args.get('page', 1, type=int)
         per_page = request.args.get('per_page', 10, type=int)
@@ -290,53 +383,35 @@ def get_feedback_history():
         start_date = request.args.get('start_date', None, type=str)
         end_date = request.args.get('end_date', None, type=str)
-        # Tạo query base
         query = Feedback.query.filter_by(user_id=current_user.id)
-        # Áp dụng filter thời gian
         if time_filter != 'all':
             vietnam_now = utc_to_vietnam_time(datetime.utcnow())
             if time_filter == 'today':
-                # Từ đầu ngày hôm nay đến hiện tại
                 today_start = vietnam_now.replace(hour=0, minute=0, second=0, microsecond=0)
                 today_start_utc = today_start.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= today_start_utc)
             elif time_filter == 'week':
-                # 1 tuần trước đến hiện tại
                 week_ago = vietnam_now - timedelta(days=7)
                 week_ago_utc = week_ago.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= week_ago_utc)
             elif time_filter == 'month':
-                # 1 tháng trước đến hiện tại
                 month_ago = vietnam_now - timedelta(days=30)
                 month_ago_utc = month_ago.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= month_ago_utc)
             elif time_filter == 'custom' and start_date and end_date:
-                # Filter theo ngày tùy chỉnh
                 try:
                     start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
                     end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
-                    # Chuyển đổi sang UTC
                     start_datetime_utc = VIETNAM_TIMEZONE.localize(start_datetime).astimezone(pytz.utc).replace(tzinfo=None)
                     end_datetime_utc = VIETNAM_TIMEZONE.localize(end_datetime.replace(hour=23, minute=59, second=59)).astimezone(pytz.utc).replace(tzinfo=None)
-                    query = query.filter(Feedback.created_at >= start_datetime_utc,
-                                       Feedback.created_at <= end_datetime_utc)
                 except ValueError:
                     return jsonify({'error': 'Định dạng ngày không hợp lệ'}), 400
-        # Đếm tổng số feedback theo filter (không phân trang)
         total_count = query.count()
-        # Sắp xếp theo thời gian mới nhất và phân trang
-        feedbacks = query.order_by(Feedback.created_at.desc())\
-                         .paginate(page=page, per_page=per_page, error_out=False)
         feedback_list = []
         for feedback in feedbacks.items:
@@ -352,7 +427,7 @@ def get_feedback_history():
         return jsonify({
             'feedbacks': feedback_list,
-            'total': total_count,  # Sử dụng total_count thay vì feedbacks.total
             'pages': feedbacks.pages,
             'current_page': page,
             'has_next': feedbacks.has_next,
@@ -371,63 +446,24 @@ def predict():
         if not text:
             return jsonify({"error": "Missing 'text' field"}), 400
-        # Validate input length
         if len(text) > 1000:
             return jsonify({"error": "Text quá dài. Vui lòng nhập tối đa 1000 ký tự."}), 400
-        # Tokenize
-        if tokenizer is None:
-            return jsonify({"error": "Tokenizer not loaded. Please restart the application."}), 500
-        inputs = tokenizer(
-            text, return_tensors="pt", truncation=True, padding=True, max_length=128
-        ).to(device)
-        # Inference
-        if model is None:
-            return jsonify({"error": "Model not loaded. Please restart the application."}), 500
-        with torch.no_grad():
-            logits_sent, logits_topic = model(inputs["input_ids"], inputs["attention_mask"])
-            sent = torch.argmax(logits_sent, dim=1).item()
-            topic = torch.argmax(logits_topic, dim=1).item()
-        # Mapping
-        SENTIMENT_MAP = {0: "negative", 1: "neutral", 2: "positive"}
-        TOPIC_MAP = {0: "lecturer", 1: "training_program", 2: "facility", 3: "others"}
-        sentiment = SENTIMENT_MAP[sent]
-        topic_result = TOPIC_MAP[topic]
-        sentiment_confidence = float(torch.softmax(logits_sent, dim=1).max().item())
-        topic_confidence = float(torch.softmax(logits_topic, dim=1).max().item())
-        # Lưu feedback vào database
         try:
-            feedback = Feedback(
-                text=text,
-                sentiment=sentiment,
-                topic=topic_result,
-                sentiment_confidence=sentiment_confidence,
-                topic_confidence=topic_confidence,
-                user_id=current_user.id
-            )
-            db.session.add(feedback)
             db.session.commit()
-            # Backup database after adding feedback
             backup_database()
-        except Exception as db_error:
-            print(f"Database error: {db_error}")
-            # Không dừng quá trình nếu lưu DB thất bại
         return jsonify({
-            "sentiment": sentiment,
-            "topic": topic_result,
-            "confidence": {
-                "sentiment": sentiment_confidence,
-                "topic": topic_confidence
-            }
         })
     except Exception as e:
         return jsonify({"error": f"Có lỗi xảy ra khi xử lý: {str(e)}"}), 500
@@ -435,7 +471,6 @@ def predict():
 @app.route('/admin/backup', methods=['POST'])
 @admin_required
 def manual_backup():
-    """Manual backup endpoint for admin"""
     try:
         if backup_database():
             return jsonify({"success": True, "message": "Backup completed successfully"})
@@ -447,7 +482,6 @@ def manual_backup():
 @app.route('/admin/restore', methods=['POST'])
 @admin_required
 def manual_restore():
-    """Manual restore endpoint for admin"""
     try:
         if restore_database():
             return jsonify({"success": True, "message": "Database restored successfully"})
@@ -456,17 +490,11 @@ def manual_restore():
     except Exception as e:
         return jsonify({"success": False, "message": f"Restore error: {str(e)}"}), 500
-# Khởi tạo database
 with app.app_context():
-    # Initialize database from backup if needed
     db_manager.initialize_database_if_needed()
     db.create_all()
-    # Create initial backup after database setup
     db_manager.backup_database()
-    # Add is_admin column if not exists
     try:
         db.session.execute(db.text("SELECT is_admin FROM users LIMIT 1"))
     except Exception:
@@ -476,11 +504,9 @@ with app.app_context():
         except Exception:
             pass
-    # Create default admin user if database is empty
     try:
         total_users = User.query.count()
         admin_user = User.query.filter_by(username='admin').first()
         if not admin_user and total_users == 0:
             admin_user = User(username='admin', is_admin=True)
             admin_user.set_password('123456')
@@ -495,7 +521,6 @@ with app.app_context():
 @app.route("/analyze-csv", methods=["POST"])
 @login_required
 def analyze_csv():
-    """Phân tích nhiều feedback từ file CSV"""
     try:
         if 'csvFile' not in request.files:
             return jsonify({'error': 'Không tìm thấy file CSV'}), 400
@@ -507,22 +532,18 @@ def analyze_csv():
         if not file.filename.lower().endswith('.csv'):
             return jsonify({'error': 'File phải có định dạng CSV'}), 400
-        # Đọc và validate file CSV
         try:
-            # Thử decode file với UTF-8
             file_content = file.stream.read().decode("UTF8")
         except UnicodeDecodeError:
-            return jsonify({'error': 'File CSV phải được mã hóa UTF-8. Vui lòng lưu file với encoding UTF-8 và thử lại.'}), 400
         try:
             stream = io.StringIO(file_content, newline=None)
             csv_input = csv.DictReader(stream)
-            # Kiểm tra file có header không
             if not csv_input.fieldnames:
-                return jsonify({'error': 'File CSV không có header (tên cột). Vui lòng thêm header vào file CSV.'}), 400
-            # Tìm cột chứa feedback
             feedback_column = None
             available_columns = []
             for col in csv_input.fieldnames:
@@ -533,25 +554,22 @@ def analyze_csv():
             if not feedback_column:
                 return jsonify({
-                    'error': f'Không tìm thấy cột chứa feedback. Các cột có sẵn: {", ".join(available_columns)}. Tên cột phải là: feedback, text, content hoặc comment'
                 }), 400
-            # Kiểm tra file có dữ liệu không
             rows = list(csv_input)
             if not rows:
-                return jsonify({'error': 'File CSV không có dữ liệu (chỉ có header). Vui lòng thêm dữ liệu vào file.'}), 400
         except csv.Error as e:
-            return jsonify({'error': f'File CSV không đúng định dạng: {str(e)}. Vui lòng kiểm tra lại file CSV.'}), 400
         except Exception as e:
             return jsonify({'error': f'Lỗi khi đọc file CSV: {str(e)}'}), 400
-        feedbacks = []
         results = []
         processed_count = 0
         error_count = 0
-        for row_num, row in enumerate(rows, start=1):  # Bắt đầu từ 1 vì hiển thị số dòng thực tế (trừ header)
             feedback_text = row[feedback_column].strip()
             if not feedback_text:
@@ -564,86 +582,47 @@ def analyze_csv():
                 continue
             try:
-                # Phân tích feedback
-                if tokenizer is None:
                     results.append({
                         "row": row_num,
                         "feedback": feedback_text,
-                        "sentiment": "N/A",
-                        "topic": "N/A",
-                        "sentiment_confidence": 0.0,
-                        "topic_confidence": 0.0,
-                        "error": "Tokenizer not loaded"
                     })
                     continue
-                inputs = tokenizer(feedback_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
-                inputs = {k: v.to(device) for k, v in inputs.items()}
-                with torch.no_grad():
-                    if model is None:
-                        results.append({
-                            "row": row_num,
-                            "feedback": feedback_text,
-                            "sentiment": "N/A",
-                            "topic": "N/A",
-                            "sentiment_confidence": 0.0,
-                            "topic_confidence": 0.0,
-                            "error": "Model not loaded"
-                        })
-                        continue
-                    # Gọi model với đúng parameters
-                    sentiment_logits, topic_logits = model(inputs["input_ids"], inputs["attention_mask"])
-                    sentiment_probs = torch.softmax(sentiment_logits, dim=-1)
-                    topic_probs = torch.softmax(topic_logits, dim=-1)
-                    sentiment_pred = torch.argmax(sentiment_probs, dim=-1).item()
-                    topic_pred = torch.argmax(topic_probs, dim=-1).item()
-                    sentiment_confidence = sentiment_probs[0][sentiment_pred].item()
-                    topic_confidence = topic_probs[0][topic_pred].item()
-                # Map predictions
-                sentiment_labels = ['negative', 'neutral', 'positive']
-                topic_labels = ['lecturer', 'training_program', 'facility', 'others']
-                sentiment = sentiment_labels[sentiment_pred]
-                topic = topic_labels[topic_pred]
-                # Lưu vào database
                 try:
-                    feedback = Feedback(
-                        text=feedback_text,
-                        sentiment=sentiment,
-                        topic=topic,
-                        sentiment_confidence=sentiment_confidence,
-                        topic_confidence=topic_confidence,
-                        user_id=current_user.id
-                    )
-                    db.session.add(feedback)
-                    feedbacks.append(feedback)
                     results.append({
                         'row': row_num,
                         'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text,
-                        'sentiment': sentiment,
-                        'topic': topic,
-                        'sentiment_confidence': round(sentiment_confidence * 100, 1),
-                        'topic_confidence': round(topic_confidence * 100, 1),
                         'success': True
                     })
                     processed_count += 1
-                except Exception as db_error:
-                    print(f"Database error for row {row_num}: {db_error}")
                     error_count += 1
                     results.append({
                         'row': row_num,
                         'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text,
-                        'error': f'Lỗi lưu database: {str(db_error)}'
                     })
             except Exception as e:
@@ -654,27 +633,19 @@ def analyze_csv():
                     'error': f'Lỗi phân tích: {str(e)}'
                 })
-        # Commit tất cả feedback vào database
         try:
             db.session.commit()
-            print(f"✅ Đã lưu {processed_count} feedback vào database")
-            # Backup database after CSV processing
             backup_database()
         except Exception as commit_error:
             db.session.rollback()
-            print(f"❌ Lỗi khi commit database: {commit_error}")
-            return jsonify({
-                'error': f'Lỗi khi lưu dữ liệu: {str(commit_error)}'
-            }), 500
         return jsonify({
             'success': True,
             'total_rows': len(results),
             'processed_count': processed_count,
             'error_count': error_count,
-            'results': results[:50],  # Chỉ trả về 50 kết quả đầu tiên để tránh response quá lớn
             'message': f'Đã xử lý {processed_count}/{len(results)} feedback thành công'
         })
@@ -685,6 +656,5 @@ def analyze_csv():
         }), 500
 if __name__ == "__main__":
-    # Hugging Face Spaces configuration
     debug = os.environ.get("DEBUG", "False").lower() == "true"
     app.run(host="0.0.0.0", port=7860, debug=debug)

 from functools import wraps
 from models import db, User, Feedback
 from forms import RegistrationForm, LoginForm
+from PhoBERTPairABSA import PhoBERTPairABSA
 from datetime import datetime, timedelta
 import pytz
 from database_manager import db_manager
+from model_config import (
+    get_prompt, ASPECTS_EN, ASPECTS_VI, LABEL_MAP, MAX_LEN, PRED_THRESHOLD,
+    MIN_SENT_PROB, MIN_MARGIN,
+    _is_garbage, _aspect_has_kw, _has_any_kw, _norm_match, ASPECT_REVERSE_MAPPING,
+    BASE_MODEL, NUM_CLASSES, DROPOUT
+)
 app = Flask(__name__)
 app.config['SECRET_KEY'] = 'your-secret-key-change-this-in-production'
 def backup_database(force: bool = False):
     """Backup database to Hugging Face Hub"""
     try:
         return db_manager.backup_database(force=force)
+    except Exception:
         return False
 def restore_database():
     """Restore database from Hugging Face Hub"""
     try:
         return db_manager.restore_database()
+    except Exception:
         return False
 def run_scheduler():
     """Run scheduled backup every hour"""
     while True:
         schedule.run_pending()
+        time.sleep(60)
 schedule.every().hour.do(backup_database)
 scheduler_thread = Thread(target=run_scheduler, daemon=True)
 scheduler_thread.start()
 atexit.register(backup_database)
 VIETNAM_TIMEZONE = pytz.timezone('Asia/Ho_Chi_Minh')
 def utc_to_vietnam_time(utc_datetime):
     """Chuyển đổi thời gian UTC sang múi giờ Việt Nam"""
     if utc_datetime is None:
         return None
     if utc_datetime.tzinfo is None:
         utc_datetime = pytz.utc.localize(utc_datetime)
     return utc_datetime.astimezone(VIETNAM_TIMEZONE)
 db_path = os.path.join(os.getcwd(), 'instance', 'feedback_analysis.db')
 os.makedirs(os.path.dirname(db_path), exist_ok=True)
 app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}'
 app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
 db.init_app(app)
 login_manager = LoginManager()
 login_manager.init_app(app)
 login_manager.login_message = 'Vui lòng đăng nhập để sử dụng hệ thống phân tích feedback.'
 login_manager.login_message_category = 'info'
 @app.context_processor
 def utility_processor():
     return dict(utc_to_vietnam_time=utc_to_vietnam_time)
 def load_user(user_id):
     return User.query.get(int(user_id))
+def analyze_feedback(text):
+    """Phân tích feedback với model Pair-ABSA"""
+    if tokenizer is None or model is None:
+        return []
+    text = str(text).strip()
+    if _is_garbage(text):
+        return []
+    s_norm = _norm_match(text)
+    tau_len = float(PRED_THRESHOLD)
+    logits_list = []
+    has_keywords = []
+    with torch.no_grad():
+        for aspect_en in ASPECTS_EN:
+            aspect_vi = ASPECT_REVERSE_MAPPING.get(aspect_en, "khac")
+            prompt = get_prompt(aspect_en, sentence=text, use_subprompt=True)
+            inputs = tokenizer(
+                prompt, text,
+                return_tensors="pt",
+                truncation="only_second",  # Chỉ cắt text (second sequence), giữ nguyên prompt
+                padding=True,
+                max_length=MAX_LEN
+            ).to(device)
+            logits = model(inputs["input_ids"], inputs["attention_mask"]).squeeze(0)
+            logits_list.append(logits)
+            has_keywords.append(_aspect_has_kw(aspect_vi, s_norm))
+    logits_tensor = torch.stack(logits_list, dim=0)
+    probs = torch.softmax(logits_tensor, dim=-1)
+    p_none = probs[:, 0]
+    conf_not_none = 1.0 - p_none
+    # Giảm cường độ boost để tránh false positive từ keywords
+    KW_BOOST = 0.02  # Giảm từ 0.05 xuống 0.02 (từ 5% xuống 2%)
+    conf_not_none_boosted = conf_not_none.clone()
+    for i, has_kw in enumerate(has_keywords):
+        if has_kw:
+            conf_not_none_boosted[i] = min(1.0, conf_not_none_boosted[i] + KW_BOOST)
+    # Bước 1: Lọc aspects có confidence >= threshold VÀ có keywords
+    # Nếu không có keywords, cần confidence cao hơn nhiều (>= 0.85)
+    keep_indices = []
+    for i in range(len(ASPECTS_EN)):
+        if has_keywords[i]:
+            # Có keywords: cần confidence >= threshold
+            if conf_not_none_boosted[i] >= tau_len:
+                keep_indices.append(i)
+        else:
+            # Không có keywords: cần confidence rất cao (>= 0.85)
+            if conf_not_none_boosted[i] >= 0.85:
+                keep_indices.append(i)
+    # Bước 2: Kiểm tra xem có aspect nào có confidence rất cao không (>95%)
+    high_confidence_indices = [i for i in keep_indices if conf_not_none_boosted[i] >= 0.95]
+    # Bước 3: Nếu có aspect với confidence rất cao, loại bỏ các aspects khác không có keywords
+    if len(high_confidence_indices) > 0:
+        # Loại bỏ các aspects không có keywords nếu đã có aspect khác có confidence rất cao
+        keep_indices = [i for i in keep_indices if has_keywords[i] or i in high_confidence_indices]
+        # Nếu vẫn còn slot, có thể thêm aspects khác nếu có keywords VÀ confidence đủ cao
+        if len(keep_indices) < len(ASPECTS_EN):
+            tau_len_adjusted = tau_len - 0.05  # Chỉ giảm 5%
+            for i in range(len(ASPECTS_EN)):
+                if i not in keep_indices:
+                    # Chỉ giữ nếu có keywords VÀ confidence >= adjusted + 0.10
+                    if has_keywords[i] and conf_not_none_boosted[i] >= tau_len_adjusted + 0.10:
+                        keep_indices.append(i)
+    if not keep_indices:
+        return []
+    results = []
+    for i in sorted(keep_indices, key=lambda j: float(conf_not_none_boosted[j]), reverse=True):
+        sent_probs = probs[i, 1:].clone()
+        top_idx = int(torch.argmax(sent_probs).item())
+        top_p = float(sent_probs[top_idx].item())
+        sent_probs[top_idx] = -1.0
+        second_p = float(sent_probs.max().item())
+        margin = top_p - second_p
+        min_margin_adj = MIN_MARGIN
+        if has_keywords[i]:
+            min_margin_adj = MIN_MARGIN - 0.02
+        if top_p < MIN_SENT_PROB or margin < min_margin_adj:
+            continue
+        sentiment_str = LABEL_MAP[top_idx + 1]
+        results.append({
+            "topic": ASPECTS_EN[i],
+            "sentiment": sentiment_str,
+            "confidence": float(conf_not_none_boosted[i].item()),
+            "sentiment_confidence": top_p,
+            "margin": margin
+        })
+    results.sort(key=lambda x: x["confidence"], reverse=True)
+    return results
+def save_feedback_to_db(text, results, user_id):
+    """Lưu feedback results vào database"""
+    for result in results:
+        sentiment_conf = result.get('sentiment_confidence', result['confidence'])
+        topic_conf = result['confidence']
+        feedback = Feedback(
+            text=text,
+            sentiment=result['sentiment'],
+            topic=result['topic'],
+            sentiment_confidence=sentiment_conf,
+            topic_confidence=topic_conf,
+            user_id=user_id
+        )
+        db.session.add(feedback)
 def admin_required(f):
+    """Decorator để yêu cầu quyền admin"""
     @wraps(f)
     def decorated_function(*args, **kwargs):
         if not current_user.is_authenticated:
     return decorated_function
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+MODEL_REPO = "Ptul2x5/Student_Feedback_Sentiment"
 try:
     os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0'
     tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, use_fast=False)
+    MODEL_URL = f"https://huggingface.co/{MODEL_REPO}/resolve/main/model.bin"
+    loaded = torch.hub.load_state_dict_from_url(MODEL_URL, map_location=device)
+    if isinstance(loaded, dict) and "model_state" in loaded:
+        state_dict = loaded["model_state"]
+    else:
+        state_dict = loaded
+    model = PhoBERTPairABSA(base_model=BASE_MODEL, num_cls=NUM_CLASSES, dropout=DROPOUT)
     model.load_state_dict(state_dict, strict=False)
     model.to(device)
     model.eval()
+except Exception:
     model = None
     tokenizer = None
 @app.route("/", methods=["GET"])
 @login_required
 def home():
         user.set_password(form.password.data)
         db.session.add(user)
         db.session.commit()
         backup_database()
         flash('Đăng ký thành công! Vui lòng đăng nhập.', 'success')
         return redirect(url_for('login'))
 @app.route("/api/health", methods=["GET"])
 def health():
+    return jsonify({"status": "healthy"})
 @app.route("/my-statistics")
 @login_required
 def my_statistics():
     try:
         user_feedbacks = Feedback.query.filter_by(user_id=current_user.id).all()
         total_feedbacks = len(user_feedbacks)
         sentiment_stats = db.session.query(
             Feedback.sentiment,
             db.func.count(Feedback.id).label('count')
         ).filter_by(user_id=current_user.id).group_by(Feedback.sentiment).all()
         sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats]
         topic_stats = db.session.query(
             Feedback.topic,
             db.func.count(Feedback.id).label('count')
         ).filter_by(user_id=current_user.id).group_by(Feedback.topic).all()
         topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats]
         thirty_days_ago = datetime.now() - timedelta(days=30)
         daily_stats = db.session.query(
             db.func.date(Feedback.created_at).label('date'),
         ).order_by('date').all()
         daily_stats = [{'date': str(item.date), 'count': item.count} for item in daily_stats]
         recent_feedbacks = Feedback.query.filter_by(user_id=current_user.id)\
                                        .order_by(Feedback.created_at.desc()).limit(10).all()
 @app.route("/admin/database")
 @admin_required
 def view_database():
     try:
         total_users = User.query.count()
         total_feedbacks = Feedback.query.count()
         recent_feedbacks = Feedback.query.order_by(Feedback.created_at.desc()).limit(10).all()
         sentiment_stats = db.session.query(
             Feedback.sentiment,
             db.func.count(Feedback.id).label('count')
         ).group_by(Feedback.sentiment).all()
         sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats]
         topic_stats = db.session.query(
             Feedback.topic,
             db.func.count(Feedback.id).label('count')
         ).group_by(Feedback.topic).all()
         topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats]
         seven_days_ago = datetime.now() - timedelta(days=7)
         daily_stats = db.session.query(
             db.func.date(Feedback.created_at).label('date'),
 @app.route("/api/feedback-history", methods=["GET"])
 @login_required
 def get_feedback_history():
     try:
         page = request.args.get('page', 1, type=int)
         per_page = request.args.get('per_page', 10, type=int)
         start_date = request.args.get('start_date', None, type=str)
         end_date = request.args.get('end_date', None, type=str)
         query = Feedback.query.filter_by(user_id=current_user.id)
         if time_filter != 'all':
             vietnam_now = utc_to_vietnam_time(datetime.utcnow())
             if time_filter == 'today':
                 today_start = vietnam_now.replace(hour=0, minute=0, second=0, microsecond=0)
                 today_start_utc = today_start.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= today_start_utc)
             elif time_filter == 'week':
                 week_ago = vietnam_now - timedelta(days=7)
                 week_ago_utc = week_ago.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= week_ago_utc)
             elif time_filter == 'month':
                 month_ago = vietnam_now - timedelta(days=30)
                 month_ago_utc = month_ago.astimezone(pytz.utc).replace(tzinfo=None)
                 query = query.filter(Feedback.created_at >= month_ago_utc)
             elif time_filter == 'custom' and start_date and end_date:
                 try:
                     start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
                     end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
                     start_datetime_utc = VIETNAM_TIMEZONE.localize(start_datetime).astimezone(pytz.utc).replace(tzinfo=None)
                     end_datetime_utc = VIETNAM_TIMEZONE.localize(end_datetime.replace(hour=23, minute=59, second=59)).astimezone(pytz.utc).replace(tzinfo=None)
+                    query = query.filter(Feedback.created_at >= start_datetime_utc, Feedback.created_at <= end_datetime_utc)
                 except ValueError:
                     return jsonify({'error': 'Định dạng ngày không hợp lệ'}), 400
         total_count = query.count()
+        feedbacks = query.order_by(Feedback.created_at.desc()).paginate(page=page, per_page=per_page, error_out=False)
         feedback_list = []
         for feedback in feedbacks.items:
         return jsonify({
             'feedbacks': feedback_list,
+            'total': total_count,
             'pages': feedbacks.pages,
             'current_page': page,
             'has_next': feedbacks.has_next,
         if not text:
             return jsonify({"error": "Missing 'text' field"}), 400
         if len(text) > 1000:
             return jsonify({"error": "Text quá dài. Vui lòng nhập tối đa 1000 ký tự."}), 400
+        if tokenizer is None or model is None:
+            return jsonify({"error": "Model or tokenizer not loaded. Please restart the application."}), 500
+        results = analyze_feedback(text)
         try:
+            save_feedback_to_db(text, results, current_user.id)
             db.session.commit()
             backup_database()
+        except Exception:
+            pass
         return jsonify({
+            "results": results,
+            "has_multiple_topics": len(results) > 1
         })
     except Exception as e:
         return jsonify({"error": f"Có lỗi xảy ra khi xử lý: {str(e)}"}), 500
 @app.route('/admin/backup', methods=['POST'])
 @admin_required
 def manual_backup():
     try:
         if backup_database():
             return jsonify({"success": True, "message": "Backup completed successfully"})
 @app.route('/admin/restore', methods=['POST'])
 @admin_required
 def manual_restore():
     try:
         if restore_database():
             return jsonify({"success": True, "message": "Database restored successfully"})
     except Exception as e:
         return jsonify({"success": False, "message": f"Restore error: {str(e)}"}), 500
 with app.app_context():
     db_manager.initialize_database_if_needed()
     db.create_all()
     db_manager.backup_database()
     try:
         db.session.execute(db.text("SELECT is_admin FROM users LIMIT 1"))
     except Exception:
         except Exception:
             pass
     try:
         total_users = User.query.count()
         admin_user = User.query.filter_by(username='admin').first()
         if not admin_user and total_users == 0:
             admin_user = User(username='admin', is_admin=True)
             admin_user.set_password('123456')
 @app.route("/analyze-csv", methods=["POST"])
 @login_required
 def analyze_csv():
     try:
         if 'csvFile' not in request.files:
             return jsonify({'error': 'Không tìm thấy file CSV'}), 400
         if not file.filename.lower().endswith('.csv'):
             return jsonify({'error': 'File phải có định dạng CSV'}), 400
         try:
             file_content = file.stream.read().decode("UTF8")
         except UnicodeDecodeError:
+            return jsonify({'error': 'File CSV phải được mã hóa UTF-8'}), 400
         try:
             stream = io.StringIO(file_content, newline=None)
             csv_input = csv.DictReader(stream)
             if not csv_input.fieldnames:
+                return jsonify({'error': 'File CSV không có header'}), 400
             feedback_column = None
             available_columns = []
             for col in csv_input.fieldnames:
             if not feedback_column:
                 return jsonify({
+                    'error': f'Không tìm thấy cột chứa feedback. Các cột: {", ".join(available_columns)}'
                 }), 400
             rows = list(csv_input)
             if not rows:
+                return jsonify({'error': 'File CSV không có dữ liệu'}), 400
         except csv.Error as e:
+            return jsonify({'error': f'File CSV không đúng định dạng: {str(e)}'}), 400
         except Exception as e:
             return jsonify({'error': f'Lỗi khi đọc file CSV: {str(e)}'}), 400
         results = []
         processed_count = 0
         error_count = 0
+        for row_num, row in enumerate(rows, start=1):
             feedback_text = row[feedback_column].strip()
             if not feedback_text:
                 continue
             try:
+                if tokenizer is None or model is None:
                     results.append({
                         "row": row_num,
                         "feedback": feedback_text,
+                        "error": "Model or tokenizer not loaded"
                     })
                     continue
+                row_topics = analyze_feedback(feedback_text)
                 try:
+                    save_feedback_to_db(feedback_text, row_topics, current_user.id)
+                    if row_topics:
+                        first = row_topics[0]
+                        first_topic = first['topic']
+                        first_sentiment = first['sentiment']
+                        first_sentiment_conf = first.get('sentiment_confidence', first['confidence'])
+                        first_topic_conf = first['confidence']
+                    else:
+                        first_topic = 'others'
+                        first_sentiment = 'neutral'
+                        first_sentiment_conf = 0.0
+                        first_topic_conf = 0.0
                     results.append({
                         'row': row_num,
                         'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text,
+                        'sentiment': first_sentiment,
+                        'topic': first_topic,
+                        'sentiment_confidence': round(first_sentiment_conf * 100, 1),
+                        'topic_confidence': round(first_topic_conf * 100, 1),
                         'success': True
                     })
                     processed_count += 1
+                except Exception as db_err:
                     error_count += 1
                     results.append({
                         'row': row_num,
                         'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text,
+                        'error': f'Lỗi lưu database: {str(db_err)}'
                     })
             except Exception as e:
                     'error': f'Lỗi phân tích: {str(e)}'
                 })
         try:
             db.session.commit()
             backup_database()
         except Exception as commit_error:
             db.session.rollback()
+            return jsonify({'error': f'Lỗi khi lưu dữ liệu: {str(commit_error)}'}), 500
         return jsonify({
             'success': True,
             'total_rows': len(results),
             'processed_count': processed_count,
             'error_count': error_count,
+            'results': results[:50],
             'message': f'Đã xử lý {processed_count}/{len(results)} feedback thành công'
         })
         }), 500
 if __name__ == "__main__":
     debug = os.environ.get("DEBUG", "False").lower() == "true"
     app.run(host="0.0.0.0", port=7860, debug=debug)

database_manager.py CHANGED Viewed

@@ -4,31 +4,20 @@ import json
 from datetime import datetime
 from typing import Optional
 from huggingface_hub import HfApi, login
-from datasets import Dataset
 import sqlite3
 import tempfile
 class DatabaseManager:
     def __init__(self, hf_token: Optional[str] = None, repo_id: Optional[str] = None):
-        """
-        Initialize Database Manager for Hugging Face Hub storage
-        Args:
-            hf_token: Hugging Face token (can be set via environment variable HF_TOKEN)
-            repo_id: Hugging Face repository ID for storing the database
-        """
         self.repo_id = repo_id or os.getenv('REPO_ID', 'your-username/student-feedback-db')
         self.hf_token = hf_token or os.getenv('HF_TOKEN')
         self.db_path = 'instance/feedback_analysis.db'
         self.backup_dir = 'backups'
-        # Check if running locally (no HF_TOKEN)
         self.is_local = not self.hf_token
-        # Create backup directory if it doesn't exist
         os.makedirs(self.backup_dir, exist_ok=True)
         if self.hf_token:
             try:
                 login(token=self.hf_token)
@@ -39,12 +28,11 @@ class DatabaseManager:
             self.api = None
     def sqlite_to_json(self, db_path: str) -> dict:
-        """Convert SQLite database to JSON format for Hugging Face Dataset"""
         try:
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
-            # Get all tables
             cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
             tables = cursor.fetchall()
@@ -53,28 +41,20 @@ class DatabaseManager:
             for table in tables:
                 table_name = table[0]
-                # Skip system tables
                 if table_name in ['sqlite_sequence', 'sqlite_master']:
                     continue
-                # Get table schema
                 cursor.execute(f"PRAGMA table_info({table_name})")
                 columns = [col[1] for col in cursor.fetchall()]
-                # Get all data from table
                 cursor.execute(f"SELECT * FROM {table_name}")
                 rows = cursor.fetchall()
-                # Convert to list of dictionaries
                 table_data = []
                 for row in rows:
                     row_dict = {}
                     for i, value in enumerate(row):
-                        # Convert datetime objects to strings
-                        if isinstance(value, datetime):
-                            value = value.isoformat()
-                        # Convert boolean fields
-                        elif columns[i] == 'is_admin':
                             value = bool(value) if value is not None else False
                         row_dict[columns[i]] = value
                     table_data.append(row_dict)
@@ -84,7 +64,6 @@ class DatabaseManager:
                     'data': table_data
                 }
-            # Handle sqlite_sequence separately
             try:
                 cursor.execute("SELECT * FROM sqlite_sequence")
                 sequence_rows = cursor.fetchall()
@@ -100,31 +79,25 @@ class DatabaseManager:
                         'data': sequence_data
                     }
             except Exception:
-                # sqlite_sequence might not exist, that's ok
                 pass
             conn.close()
             return data
-        except Exception as e:
-            print(f"❌ Error converting SQLite to JSON: {e}")
             return {}
     def json_to_sqlite(self, json_data: dict, db_path: str):
         """Convert JSON data back to SQLite database"""
         try:
-            # Remove existing database if it exists
             if os.path.exists(db_path):
                 os.remove(db_path)
-            # Create directory if it doesn't exist
             os.makedirs(os.path.dirname(db_path), exist_ok=True)
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
             for table_name, table_info in json_data.items():
-                # Skip system tables
                 if table_name in ['sqlite_sequence', 'sqlite_master']:
                     continue
@@ -134,7 +107,6 @@ class DatabaseManager:
                 if not columns:
                     continue
-                # Create table
                 column_defs = []
                 for col in columns:
                     if col == 'id':
@@ -147,7 +119,6 @@ class DatabaseManager:
                 create_sql = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(column_defs)})"
                 cursor.execute(create_sql)
-                # Insert data
                 if data:
                     placeholders = ', '.join(['?' for _ in columns])
                     insert_sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
@@ -156,7 +127,6 @@ class DatabaseManager:
                         values = []
                         for col in columns:
                             value = row.get(col)
-                            # Convert boolean fields properly
                             if col == 'is_admin':
                                 if isinstance(value, bool):
                                     values.append(int(value))
@@ -168,7 +138,6 @@ class DatabaseManager:
                                 values.append(value)
                         cursor.execute(insert_sql, values)
-            # Handle sqlite_sequence separately if it exists in backup
             if 'sqlite_sequence' in json_data:
                 sequence_data = json_data['sqlite_sequence']['data']
                 for seq_row in sequence_data:
@@ -180,7 +149,6 @@ class DatabaseManager:
             conn.commit()
             conn.close()
         except Exception:
             pass
@@ -193,21 +161,17 @@ class DatabaseManager:
             return False
         try:
-            # Convert database to JSON
             json_data = self.sqlite_to_json(self.db_path)
             if not json_data:
                 return False
-            # Create temporary file for JSON data
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             temp_file = f"{self.backup_dir}/feedback_backup_{timestamp}.json"
             with open(temp_file, 'w', encoding='utf-8') as f:
                 json.dump(json_data, f, indent=2, ensure_ascii=False)
-            # Upload to Hugging Face Hub
             try:
                 self.api.upload_file(
                     path_or_fileobj=temp_file,
@@ -218,14 +182,12 @@ class DatabaseManager:
                 )
             except Exception as upload_error:
                 if "No files have been modified" in str(upload_error):
-                    return True  # This is actually success
                 else:
                     raise upload_error
-            # Clean up temporary file
             os.remove(temp_file)
             return True
         except Exception:
             return False
@@ -235,7 +197,6 @@ class DatabaseManager:
             return False
         try:
-            # Download latest backup
             temp_dir = tempfile.mkdtemp()
             temp_file = os.path.join(temp_dir, 'feedback_backup.json')
@@ -246,17 +207,12 @@ class DatabaseManager:
                 repo_type="dataset"
             )
-            # Convert JSON back to SQLite
             with open(temp_file, 'r', encoding='utf-8') as f:
                 json_data = json.load(f)
             self.json_to_sqlite(json_data, self.db_path)
-            # Clean up
             shutil.rmtree(temp_dir)
             return True
         except Exception:
             return False

 from datetime import datetime
 from typing import Optional
 from huggingface_hub import HfApi, login
 import sqlite3
 import tempfile
 class DatabaseManager:
     def __init__(self, hf_token: Optional[str] = None, repo_id: Optional[str] = None):
+        """Initialize Database Manager for Hugging Face Hub storage"""
         self.repo_id = repo_id or os.getenv('REPO_ID', 'your-username/student-feedback-db')
         self.hf_token = hf_token or os.getenv('HF_TOKEN')
         self.db_path = 'instance/feedback_analysis.db'
         self.backup_dir = 'backups'
         self.is_local = not self.hf_token
         os.makedirs(self.backup_dir, exist_ok=True)
         if self.hf_token:
             try:
                 login(token=self.hf_token)
             self.api = None
     def sqlite_to_json(self, db_path: str) -> dict:
+        """Convert SQLite database to JSON format"""
         try:
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
             cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
             tables = cursor.fetchall()
             for table in tables:
                 table_name = table[0]
                 if table_name in ['sqlite_sequence', 'sqlite_master']:
                     continue
                 cursor.execute(f"PRAGMA table_info({table_name})")
                 columns = [col[1] for col in cursor.fetchall()]
                 cursor.execute(f"SELECT * FROM {table_name}")
                 rows = cursor.fetchall()
                 table_data = []
                 for row in rows:
                     row_dict = {}
                     for i, value in enumerate(row):
+                        if columns[i] == 'is_admin':
                             value = bool(value) if value is not None else False
                         row_dict[columns[i]] = value
                     table_data.append(row_dict)
                     'data': table_data
                 }
             try:
                 cursor.execute("SELECT * FROM sqlite_sequence")
                 sequence_rows = cursor.fetchall()
                         'data': sequence_data
                     }
             except Exception:
                 pass
             conn.close()
             return data
+        except Exception:
             return {}
     def json_to_sqlite(self, json_data: dict, db_path: str):
         """Convert JSON data back to SQLite database"""
         try:
             if os.path.exists(db_path):
                 os.remove(db_path)
             os.makedirs(os.path.dirname(db_path), exist_ok=True)
             conn = sqlite3.connect(db_path)
             cursor = conn.cursor()
             for table_name, table_info in json_data.items():
                 if table_name in ['sqlite_sequence', 'sqlite_master']:
                     continue
                 if not columns:
                     continue
                 column_defs = []
                 for col in columns:
                     if col == 'id':
                 create_sql = f"CREATE TABLE IF NOT EXISTS {table_name} ({', '.join(column_defs)})"
                 cursor.execute(create_sql)
                 if data:
                     placeholders = ', '.join(['?' for _ in columns])
                     insert_sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
                         values = []
                         for col in columns:
                             value = row.get(col)
                             if col == 'is_admin':
                                 if isinstance(value, bool):
                                     values.append(int(value))
                                 values.append(value)
                         cursor.execute(insert_sql, values)
             if 'sqlite_sequence' in json_data:
                 sequence_data = json_data['sqlite_sequence']['data']
                 for seq_row in sequence_data:
             conn.commit()
             conn.close()
         except Exception:
             pass
             return False
         try:
             json_data = self.sqlite_to_json(self.db_path)
             if not json_data:
                 return False
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             temp_file = f"{self.backup_dir}/feedback_backup_{timestamp}.json"
             with open(temp_file, 'w', encoding='utf-8') as f:
                 json.dump(json_data, f, indent=2, ensure_ascii=False)
             try:
                 self.api.upload_file(
                     path_or_fileobj=temp_file,
                 )
             except Exception as upload_error:
                 if "No files have been modified" in str(upload_error):
+                    return True
                 else:
                     raise upload_error
             os.remove(temp_file)
             return True
         except Exception:
             return False
             return False
         try:
             temp_dir = tempfile.mkdtemp()
             temp_file = os.path.join(temp_dir, 'feedback_backup.json')
                 repo_type="dataset"
             )
             with open(temp_file, 'r', encoding='utf-8') as f:
                 json_data = json.load(f)
             self.json_to_sqlite(json_data, self.db_path)
             shutil.rmtree(temp_dir)
             return True
         except Exception:
             return False

forms.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from flask_wtf import FlaskForm
-from wtforms import StringField, PasswordField, SubmitField, TextAreaField
 from wtforms.validators import DataRequired, Length, EqualTo, ValidationError
 from models import User
@@ -37,10 +37,3 @@ class LoginForm(FlaskForm):
     submit = SubmitField('Đăng nhập')
-class FeedbackForm(FlaskForm):
-    text = TextAreaField('Feedback của bạn', validators=[
-        DataRequired(message='Vui lòng nhập feedback'),
-        Length(min=10, max=1000, message='Feedback phải từ 10-1000 ký tự')
-    ])
-    submit = SubmitField('Phân Tích Feedback')

 from flask_wtf import FlaskForm
+from wtforms import StringField, PasswordField, SubmitField
 from wtforms.validators import DataRequired, Length, EqualTo, ValidationError
 from models import User
     submit = SubmitField('Đăng nhập')

model_config.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""Configuration for PhoBERT Pair-ABSA Model"""
+import unicodedata
+import re
+BASE_MODEL = "vinai/phobert-base"
+MAX_LEN = 256
+PRED_THRESHOLD = 0.60
+NUM_CLASSES = 4
+DROPOUT = 0.3
+MIN_SENT_PROB = 0.50
+MIN_MARGIN = 0.08
+ASPECTS_VI = ["giang_vien", "chuong_trinh", "co_so_vat_chat", "khac"]
+ASPECTS_EN = ["lecturer", "training_program", "facility", "others"]
+ASPECT_MAPPING = {
+    "giang_vien": "lecturer",
+    "chuong_trinh": "training_program",
+    "co_so_vat_chat": "facility",
+    "khac": "others"
+}
+ASPECT_REVERSE_MAPPING = {v: k for k, v in ASPECT_MAPPING.items()}
+LABEL_MAP = {
+    0: "none",
+    1: "negative",
+    2: "neutral",
+    3: "positive"
+}
+def _norm_store(s: str) -> str:
+    """Chuẩn hoá để lưu/dedup (giữ dấu)"""
+    s = unicodedata.normalize("NFC", str(s)).strip()
+    s = re.sub(r"\s+", " ", s)
+    return s
+def _norm_match(s: str) -> str:
+    """Chuẩn hoá để match keyword (bỏ dấu + lower)"""
+    s = s.lower()
+    s = unicodedata.normalize("NFD", s)
+    return "".join(ch for ch in s if unicodedata.category(ch) != "Mn")
+def _no_diacritics_set(kws: set) -> set:
+    """Build keyword set có & không dấu"""
+    return kws | {_norm_match(k) for k in kws}
+ASPECT_PROMPTS = {
+    "giang_vien": {
+        "_default": (
+            "ĐÁNH GIÁ phần liên quan GIẢNG VIÊN (giảng dạy, thái độ, hỗ trợ, chấm điểm, đúng giờ). Nếu câu không nhắc rõ đến GIẢNG VIÊN -> NONE. Mỗi aspect đánh giá độc lập (ví dụ: giảng viên đi dạy trễ nhưng mạng wifi tốt -> giảng viên NEGATIVE, cơ sở vật chất POSITIVE). NEGATIVE khi phàn nàn trễ, khó hiểu, thiếu hỗ trợ; POSITIVE khi được khen đúng giờ, nhiệt tình, dễ hiểu; không rõ -> NEUTRAL."
+        ),
+        "giang_day": (
+            "ĐÁNH GIÁ GIẢNG DẠY của GIẢNG VIÊN. Nếu câu không nói về bài giảng, cách truyền đạt, phương pháp -> NONE. Mỗi aspect độc lập. NEGATIVE khi khó hiểu, quá nhanh/chậm, thiếu ví dụ; POSITIVE khi dễ hiểu, nhiều ví dụ, rõ ràng; không rõ -> NEUTRAL."
+        ),
+        "dung_gio": (
+            "ĐÁNH GIÁ ĐÚNG GIỜ của GIẢNG VIÊN. Nếu câu không nhắc việc vào lớp, bắt đầu/kết thúc tiết -> NONE. Mỗi aspect độc lập. NEGATIVE khi trễ, bỏ tiết; POSITIVE khi đúng giờ, giữ lịch; không rõ -> NEUTRAL."
+        ),
+        "ho_tro": (
+            "ĐÁNH GIÁ HỖ TRỢ/TƯ VẤN của GIẢNG VIÊN. Nếu câu không nhắc hỗ trợ, phản hồi, giải đáp -> NONE. Mỗi aspect độc lập. NEGATIVE khi chậm phản hồi, không giúp; POSITIVE khi nhiệt tình, phản hồi nhanh; không rõ -> NEUTRAL."
+        ),
+        "cham_diem": (
+            "ĐÁNH GIÁ CHẤM ĐIỂM của GIẢNG VIÊN. Nếu câu không nói điểm, rubric, phúc khảo -> NONE. Mỗi aspect độc lập. NEGATIVE khi không công bằng, khó hiểu; POSITIVE khi minh bạch, công bằng; không rõ -> NEUTRAL."
+        ),
+        "thai_do": (
+            "ĐÁNH GIÁ THÁI ĐỘ/TÁC PHONG của GIẢNG VIÊN. Nếu câu không nhắc thái độ, giao tiếp -> NONE. Mỗi aspect độc lập. NEGATIVE khi thô lỗ, thiếu tôn trọng; POSITIVE khi thân thiện, tôn trọng; không rõ -> NEUTRAL."
+        ),
+    },
+    "chuong_trinh": {
+        "_default": (
+            "ĐÁNH GIÁ CHƯƠNG TRÌNH ĐÀO TẠO (môn học, tín chỉ, nội dung, lộ trình, lịch). Nếu câu không nhắc rõ đến chương trình -> NONE. Mỗi aspect đánh giá độc lập (ví dụ: lịch học dày nhưng giảng viên hỗ trợ tốt -> chương trình NEGATIVE, giảng viên POSITIVE). NEGATIVE khi quá tải, lạc hậu, trùng lặp; POSITIVE khi hợp lý, cập nhật, thực tế; không rõ -> NEUTRAL."
+        ),
+        "noi_dung": (
+            "ĐÁNH GIÁ NỘI DUNG CHƯƠNG TRÌNH. Nếu câu không nói nội dung môn, học liệu, lộ trình -> NONE. Mỗi aspect độc lập. NEGATIVE khi lạc hậu, trùng lặp, thiếu thực tế; POSITIVE khi cập nhật, hữu ích; không rõ -> NEUTRAL."
+        ),
+        "lich_hoc": (
+            "ĐÁNH GIÁ LỊCH HỌC/KẾ HOẠCH. Nếu câu không nhắc lịch, thời khóa biểu, xếp ca -> NONE. Mỗi aspect độc lập. NEGATIVE khi dồn dập, trùng lịch, đổi lịch liên tục; POSITIVE khi rõ ràng, hợp lý; không rõ -> NEUTRAL."
+        ),
+        "tin_chi": (
+            "ĐÁNH GIÁ TÍN CHỈ/HỌC PHẦN. Nếu câu không nói tín chỉ, đăng ký học phần, tiên quyết -> NONE. Mỗi aspect độc lập. NEGATIVE khi bất hợp lý, khó đăng ký; POSITIVE khi phân bổ hợp lý, dễ đăng ký; không rõ -> NEUTRAL."
+        ),
+        "de_cuong": (
+            "ĐÁNH GIÁ ĐỀ CƯƠNG/GIÁO TRÌNH. Nếu câu không nhắc đề cương, tài liệu, rubric -> NONE. Mỗi aspect độc lập. NEGATIVE khi thiếu rõ ràng, thiếu tài liệu; POSITIVE khi đầy đủ, minh bạch; không rõ -> NEUTRAL."
+        ),
+    },
+    "co_so_vat_chat": {
+        "_default": (
+            "ĐÁNH GIÁ CƠ SỞ VẬT CHẤT (mạng, phòng học, phòng thí nghiệm, thiết bị, thư viện, gửi xe, vệ sinh, cổng đào tạo). Nếu câu không nhắc rõ đến cơ sở vật chất -> NONE. Mỗi aspect đánh giá độc lập (ví dụ: phòng học nóng nhưng thầy cô dạy dễ hiểu -> cơ sở vật chất NEGATIVE, giảng viên POSITIVE). NEGATIVE khi phàn nàn hỏng, thiếu, bẩn; POSITIVE khi khen đầy đủ, sạch, hiện đại; không rõ -> NEUTRAL."
+        ),
+        "mang": (
+            "ĐÁNH GIÁ MẠNG/WI-FI. Nếu câu không nói mạng, wifi, internet -> NONE. Mỗi aspect độc lập. NEGATIVE khi chậm, rớt kết nối; POSITIVE khi nhanh, ổn định; không rõ -> NEUTRAL."
+        ),
+        "phong_hoc": (
+            "ĐÁNH GIÁ PHÒNG HỌC. Nếu câu không nói phòng học, bàn ghế, điều hòa, tiếng ồn -> NONE. Mỗi aspect độc lập. NEGATIVE khi nóng, ồn, xuống cấp; POSITIVE khi mát, sạch, đủ tiện nghi; không rõ -> NEUTRAL."
+        ),
+        "phong_thi_nghiem": (
+            "ĐÁNH GIÁ PHÒNG THÍ NGHIỆM/THỰC HÀNH. Nếu câu không nhắc lab, thiết bị thực hành -> NONE. Mỗi aspect độc lập. NEGATIVE khi thiếu máy, phần mềm lỗi; POSITIVE khi đầy đủ, hiện đại; không rõ -> NEUTRAL."
+        ),
+        "thiet_bi": (
+            "ĐÁNH GIÁ THIẾT BỊ GIẢNG DẠY. Nếu câu không nói máy chiếu, micro, loa, TV -> NONE. Mỗi aspect độc lập. NEGATIVE khi hỏng, âm kém; POSITIVE khi hoạt động tốt, rõ ràng; không rõ -> NEUTRAL."
+        ),
+        "thu_vien": (
+            "ĐÁNH GIÁ THƯ VIỆN. Nếu câu không nhắc thư viện, tài liệu, chỗ ngồi -> NONE. Mỗi aspect độc lập. NEGATIVE khi thiếu tài liệu, chật, ồn; POSITIVE khi phong phú, yên tĩnh; không rõ -> NEUTRAL."
+        ),
+        "giu_xe_ve_sinh": (
+            "ĐÁNH GIÁ GIỮ XE/NHÀ VỆ SINH. Nếu câu không nói gửi xe hoặc nhà vệ sinh -> NONE. Mỗi aspect độc lập. NEGATIVE khi bẩn, đắt, mùi khó chịu; POSITIVE khi sạch, thuận tiện; không rõ -> NEUTRAL."
+        ),
+        "cong_quan_ly_dao_tao": (
+            "ĐÁNH GIÁ CỔNG/TRANG QUẢN LÝ ĐÀO TẠO. Nếu câu không nhắc cổng đào tạo, đăng nhập, tra cứu -> NONE. Mỗi aspect độc lập. NEGATIVE khi quá tải, treo, khó dùng; POSITIVE khi ổn định, dễ dùng; không rõ -> NEUTRAL."
+        ),
+    },
+    "khac": {
+        "_default": (
+            "ĐÁNH GIÁ NHÓM KHÁC (học phí, học bổng, hành chính, CLB, KTX, một cửa, đăng ký tín chỉ, điểm rèn luyện). Nếu câu không nhắc rõ đến nhóm này -> NONE. Mỗi aspect đánh giá độc lập (ví dụ: học phí tăng nhưng phòng học tốt -> nhóm khác NEGATIVE, cơ sở vật chất POSITIVE). NEGATIVE khi phàn nàn khó khăn, chậm trễ; POSITIVE khi khen rõ ràng, nhanh chóng; không rõ -> NEUTRAL."
+        ),
+        "hoc_phi": (
+            "ĐÁNH GIÁ HỌC PHÍ. Nếu câu không nhắc học phí, mức thu, đóng tiền -> NONE. Mỗi aspect độc lập. NEGATIVE khi đắt, tăng, thiếu minh bạch; POSITIVE khi hợp lý, minh bạch; không rõ -> NEUTRAL."
+        ),
+        "hoc_bong": (
+            "ĐÁNH GIÁ HỌC BỔNG. Nếu câu không nói tiêu chí, quy trình, kết quả học bổng -> NONE. Mỗi aspect độc lập. NEGATIVE khi khó, chậm, không rõ; POSITIVE khi dễ, minh bạch, kịp thời; không rõ -> NEUTRAL."
+        ),
+        "hanh_chinh": (
+            "ĐÁNH GIÁ THỦ TỤC HÀNH CHÍNH/CTSV. Nếu câu không nhắc hồ sơ, giấy tờ, xử lý -> NONE. Mỗi aspect độc lập. NEGATIVE khi rườm rà, chậm, thiếu phản hồi; POSITIVE khi nhanh, rõ ràng; không rõ -> NEUTRAL."
+        ),
+        "clb": (
+            "ĐÁNH GIÁ CLB/HOẠT ĐỘNG NGOẠI KHÓA. Nếu câu không nói CLB, sự kiện, hoạt động sinh viên -> NONE. Mỗi aspect độc lập. NEGATIVE khi ít hoạt động, thiếu hấp dẫn; POSITIVE khi sôi nổi, hữu ích; không rõ -> NEUTRAL."
+        ),
+        "ktx": (
+            "ĐÁNH GIÁ KÝ TÚC XÁ. Nếu câu không nhắc phòng KTX, an ninh, điện nước -> NONE. Mỗi aspect độc lập. NEGATIVE khi chật, mất an ninh, thiếu điện nước; POSITIVE khi sạch, an toàn, đầy đủ; không rõ -> NEUTRAL."
+        ),
+        "mot_cua": (
+            "ĐÁNH GIÁ VĂN PHÒNG MỘT CỬA. Nếu câu không nhắc một cửa, tiếp nhận, trả kết quả -> NONE. Mỗi aspect độc lập. NEGATIVE khi chờ lâu, đông, xử lý chậm; POSITIVE khi nhanh, rõ ràng; không rõ -> NEUTRAL."
+        ),
+        "dang_ky_tin": (
+            "ĐÁNH GIÁ ĐĂNG KÝ TÍN CHỈ. Nếu câu không nói đăng ký môn, hệ thống đăng ký -> NONE. Mỗi aspect độc lập. NEGATIVE khi quá tải, lỗi, khó dùng; POSITIVE khi ổn định, dễ dùng; không rõ -> NEUTRAL."
+        ),
+        "diem_ren_luyen": (
+            "ĐÁNH GIÁ ĐIỂM RÈN LUYỆN. Nếu câu không nhắc DRL, minh chứng, quy trình -> NONE. Mỗi aspect độc lập. NEGATIVE khi khó, không công bằng; POSITIVE khi rõ ràng, công bằng; không rõ -> NEUTRAL."
+        ),
+    },
+}
+SUBTOPIC_KW = {
+    "giang_vien": {
+        "dung_gio": _no_diacritics_set({
+            "đi dạy","lên lớp","vào lớp","bắt đầu tiết","kết thúc tiết",
+            "giảng viên","giáo viên","thầy giáo","cô giáo","thầy cô",
+            "giảng viên đi dạy","giảng viên lên lớp","giảng viên vào lớp",
+            "thầy đi dạy","cô đi dạy","thầy lên lớp","cô lên lớp"
+        }),
+        "cham_diem": _no_diacritics_set({
+            "chấm điểm","thang điểm","điểm thi","điểm thành phần","điểm tổng kết","phúc khảo",
+            "điểm giữa kỳ","điểm cuối kỳ","điểm nhóm","điểm cá nhân","điểm bonus",
+            "điểm chuyên cần","điểm chuyên đề","rubric","grading",
+            "giảng viên","giáo viên","thầy giáo","cô giáo",
+            "giảng viên chấm điểm","thầy chấm điểm","cô chấm điểm","giáo viên chấm điểm",
+            "thầy giáo chấm điểm","cô giáo chấm điểm"
+        }),
+        "ho_tro": _no_diacritics_set({
+            "tư vấn học tập","giải đáp học tập","phản hồi học tập","cvht",
+            "cố vấn học tập","hướng dẫn học tập","trao đổi học tập","hỏi đáp học tập",
+            "tư vấn sinh viên","giải đáp sinh viên","phản hồi sinh viên",
+            "cố vấn sinh viên","hướng dẫn sinh viên",
+            "giảng viên","giáo viên","thầy giáo","cô giáo",
+            "giảng viên tư vấn","giảng viên hướng dẫn","giảng viên giải đáp",
+            "thầy tư vấn","cô tư vấn","thầy hướng dẫn","cô hướng dẫn",
+            "thầy giáo tư vấn","cô giáo tư vấn"
+        }),
+        "thai_do": _no_diacritics_set({
+            "thái độ","ứng xử","tác phong","phong thái","giao tiếp","cách nói",
+            "ngữ điệu","hành vi","cử chỉ","cách cư xử","thái độ giảng viên",
+            "phong cách","tương tác","thái độ lớp","ngôn ngữ cơ thể",
+            "giảng viên","giáo viên","thầy giáo","cô giáo",
+            "thái độ thầy","thái độ cô","thái độ giáo viên",
+            "thầy giáo thái độ","cô giáo thái độ","giảng viên thái độ"
+        }),
+        "giang_day": _no_diacritics_set({
+            "giảng dạy","truyền đạt","diễn đạt","ví dụ","bài giảng","slide","ghi chú",
+            "ôn tập","bài học","phương pháp","thực hành","lý thuyết",
+            "thảo luận","minh họa","slide giảng","slide bài","giải thích","phong cách giảng",
+            "giảng viên","giáo viên","thầy giáo","cô giáo",
+            "giảng viên giảng dạy","thầy giảng","cô giảng","giáo viên giảng dạy",
+            "thầy giáo giảng dạy","cô giáo giảng dạy"
+        }),
+    },
+    "chuong_trinh": {
+        "lich_hoc": _no_diacritics_set({
+            "lịch học","thời khóa biểu","thời khoá biểu","kế hoạch học tập","xếp lịch","trùng lịch",
+            "đổi lịch","lịch thi","lịch học thêm","ca tối","online","offline","ca sáng",
+            "ca chiều","học bù","thi dồn","thi liên tục","xếp ca","thời gian học","lịch kiểm tra"
+        }),
+        "tin_chi": _no_diacritics_set({
+            "tín chỉ","học phần","tiên quyết","song hành","đăng ký học phần","nợ môn",
+            "đủ tín","số tín","khối lượng học","điều kiện học phần","mã môn","tải học",
+            "phân bổ học phần","lộ trình học","số học phần"
+        }),
+        "de_cuong": _no_diacritics_set({
+            "đề cương","syllabus","giáo trình","tài liệu bắt buộc môn học","tài liệu tham khảo môn học",
+            "mục tiêu học phần","kế hoạch môn","outline","kế hoạch giảng dạy","phân bổ điểm",
+            "tài liệu học môn học","hướng dẫn môn học","phân phối chương trình","khung điểm môn học","thang đánh giá môn học"
+        }),
+        "noi_dung": _no_diacritics_set({
+            "nội dung","thực tế","thực tiễn","lộ trình","khung chương trình",
+            "cập nhật","định hướng nghề","kiến thức","module",
+            "chuyên đề","cấu trúc môn","chương trình học","đề mục","môn học","học liệu"
+        }),
+    },
+    "co_so_vat_chat": {
+        "mang": _no_diacritics_set({
+            "mạng wifi","wifi","wi-fi","wi fi","đăng nhập wifi", "ping wifi","băng thông wifi","wifi trường"
+        }),
+        "phong_hoc": _no_diacritics_set({
+            "phòng học","ánh sáng","đèn phòng học","máy lạnh","điều hòa","điều hoà","quạt",
+            "bàn ghế phòng học","ổ điện phòng học","ổ cắm phòng học","cách âm","sàn nhà","rèm cửa","trần nhà","bảng viết"
+        }),
+        "phong_thi_nghiem": _no_diacritics_set({
+            "phòng thí nghiệm","phòng thực hành","lab","phòng lab","máy thực hành",
+            "cài phần mềm","thiết bị thí nghiệm","dụng cụ lab","phòng máy","thiết bị lab"
+        }),
+        "thiet_bi": _no_diacritics_set({
+            "máy chiếu","micro","mic","loa","âm thanh","tivi","cáp","hdmi","adapter",
+            "thiết bị giảng dạy","máy quay","camera lớp","loa bluetooth","âm lượng",
+            "đầu nối","bộ chia","thiết bị phòng học","tv phòng học"
+        }),
+        "thu_vien": _no_diacritics_set({
+            "thư viện","mượn sách","trả sách","tài liệu số thư viện","ebook thư viện","chỗ ngồi thư viện",
+            "bàn đọc","yên tĩnh thư viện","giờ mở cửa thư viện","mượn giáo trình","tra cứu sách","wifi thư viện",
+            "tra cứu thư viện","kệ sách","tài nguyên số thư viện","khu đọc","mượn tài liệu thư viện","mượn thiết bị thư viện",
+            "tài liệu thư viện","tài liệu mượn thư viện","sách thư viện"
+        }),
+        "giu_xe_ve_sinh": _no_diacritics_set({
+            "bãi giữ xe","nhà giữ xe","gửi xe","thẻ xe","quẹt thẻ","phí gửi xe",
+            "nhà vệ sinh","toilet","giấy vệ sinh","nước rửa tay",
+            "ống nước nhà vệ sinh","cống thoát nhà vệ sinh","sàn nhà vệ sinh","wc nhà vệ sinh"
+        }),
+        "cong_quan_ly_dao_tao": _no_diacritics_set({
+            "trang quản lý đào tạo","cổng đào tạo","hệ thống đào tạo","portal","cổng thông tin",
+            "đăng nhập cổng đào tạo","quên mật khẩu","reset mật khẩu","quá tải","treo","tra cứu điểm",
+            "web đào tạo","cổng sinh viên","hệ thống online","trang web đào tạo"
+        }),
+    },
+    "khac": {
+        "hoc_phi": _no_diacritics_set({
+            "học phí","thu thêm","biên lai","miễn giảm","chính sách học phí","công khai học phí",
+            "đóng tiền","nộp học phí","thu tiền","hoá đơn học phí","chính sách","phiếu thu","biên nhận",
+            "đóng học","nộp lệ phí","phí học","thanh toán học phí","biên lai học phí","phiếu thu học phí"
+        }),
+        "hoc_bong": _no_diacritics_set({
+            "học bổng","học bổng kkht","tiêu chí học bổng","điểm chuẩn học bổng",
+            "nộp hồ sơ học bổng","kết quả học bổng","trễ hạn học bổng","xét học bổng",
+            "điều kiện học bổng","quỹ học bổng","thông báo học bổng","hồ sơ học bổng","điểm xét"
+        }),
+        "hanh_chinh": _no_diacritics_set({
+            "thủ tục hành chính","hành chính","giấy tờ hành chính","đóng dấu","xác nhận sinh viên","giấy xác nhận",
+            "phòng ctsv","tiếp nhận hồ sơ hành chính","trả kết quả hành chính","xin giấy tờ hành chính","nộp hồ sơ hành chính","biểu mẫu hành chính",
+            "phòng đào tạo hành chính","chứng nhận hành chính","xác minh hành chính","giấy phép hành chính","bản sao hành chính","văn thư hành chính"
+        }),
+        "clb": _no_diacritics_set({
+            "câu lạc bộ","clb","tuyển thành viên","hoạt động clb","ngoại khóa","sự kiện","workshop",
+            "đăng ký clb","đoàn hội","event","team","cuộc thi","hoạt động sv",
+            "hoạt động ngoại khoá","nhóm sinh viên","sự kiện trường","đăng ký tham gia"
+        }),
+        "ktx": _no_diacritics_set({
+            "ký túc xá","kí túc xá","ktx","ở ghép","phòng ktx","bảo vệ ktx","giờ giới nghiêm",
+            "điện ktx","nước ktx","khu ở ktx","an ninh ktx","phòng chung ktx",
+            "toà ktx","khu vực ở ktx","quản lý ktx"
+        }),
+        "mot_cua": _no_diacritics_set({
+            "văn phòng một cửa","vp1c","phòng một cửa","nộp hồ sơ một cửa","số thứ tự","lấy giấy một cửa","trả giấy một cửa","trả kết quả một cửa",
+            "hồ sơ một cửa","giấy tờ một cửa","số lượt một cửa","quầy tiếp nhận một cửa","một cửa"
+        }),
+        "dang_ky_tin": _no_diacritics_set({
+            "đăng ký môn","đăng ký tín chỉ","đk tín","đk môn","server đăng ký",
+            "xếp lịch tự động","lọc trùng lịch","hệ thống đăng ký",
+            "đăng ký online","chọn môn","mở lớp","đóng lớp","sắp lịch","hệ thống đăng ký tín chỉ"
+        }),
+        "diem_ren_luyen": _no_diacritics_set({
+            "điểm rèn luyện","drl","đánh giá rèn luyện","minh chứng drl","chấm drl",
+            "minh chứng","điểm rl","bảng drl","đánh giá cá nhân","đánh giá tập thể"
+        }),
+    },
+}
+_VI_LETTER = re.compile(r"[A-Za-zÀ-ỹĐđ]")
+def _is_garbage(txt: str) -> bool:
+    """
+    Kiểm tra text có phải garbage (quá ngắn hoặc không phải tiếng Việt) để bỏ qua.
+    """
+    t = str(txt).strip()
+    if len(t) < 4:
+        return True
+    if len(t.split()) < 2:
+        return True
+    letters = sum(1 for ch in t if _VI_LETTER.match(ch))
+    return (letters / max(1, len(t))) < 0.4
+def _aspect_has_kw(aspect_vi: str, s_norm: str) -> bool:
+    """Kiểm tra aspect có keyword trong sentence không (chỉ keywords >= 3 ký tự)"""
+    for kws in SUBTOPIC_KW.get(aspect_vi, {}).values():
+        for kw in kws:
+            kw_norm = _norm_match(kw)
+            # Chỉ match với keywords >= 3 ký tự để tránh false positive
+            if len(kw_norm) >= 3 and kw_norm in s_norm:
+                return True
+    return False
+def _pick_subprompt(aspect: str, sentence: str) -> str:
+    s = _norm_match(str(sentence))
+    for sub, kws in SUBTOPIC_KW.get(aspect, {}).items():
+        # Chỉ match với keywords >= 3 ký tự
+        for kw in kws:
+            kw_norm = _norm_match(kw)
+            if len(kw_norm) >= 3 and kw_norm in s:
+                return ASPECT_PROMPTS[aspect].get(sub, ASPECT_PROMPTS[aspect]["_default"])
+    return ASPECT_PROMPTS[aspect]["_default"]
+def _has_any_kw(s_norm: str) -> bool:
+    """Kiểm tra sentence có keyword của bất kỳ aspect nào không"""
+    for aspect_vi in ASPECTS_VI:
+        if _aspect_has_kw(aspect_vi, s_norm):
+            return True
+    return False
+def get_prompt(aspect_en: str, sentence: str = "", use_subprompt: bool = False) -> str:
+    """
+    Lấy prompt cho aspect (dùng subprompt nếu cần).
+    """
+    aspect_vi = ASPECT_REVERSE_MAPPING.get(aspect_en, "khac")
+    if use_subprompt and sentence:
+        return _pick_subprompt(aspect_vi, sentence)
+    aspect_prompts = ASPECT_PROMPTS.get(aspect_vi, {})
+    return aspect_prompts.get("_default", "")

models.py CHANGED Viewed

@@ -14,7 +14,6 @@ class User(UserMixin, db.Model):
     is_admin = db.Column(db.Boolean, default=False, nullable=False)
     created_at = db.Column(db.DateTime, default=datetime.utcnow)
-    # Relationship với feedbacks
     feedbacks = db.relationship('Feedback', backref='user', lazy=True)
     def set_password(self, password):
@@ -33,11 +32,11 @@ class Feedback(db.Model):
     id = db.Column(db.Integer, primary_key=True)
     text = db.Column(db.Text, nullable=False)
-    sentiment = db.Column(db.String(20), nullable=False)  # positive, neutral, negative
-    topic = db.Column(db.String(50), nullable=False)      # lecturer, training_program, facility, others
     sentiment_confidence = db.Column(db.Float, nullable=False)
     topic_confidence = db.Column(db.Float, nullable=False)
-    user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)  # Bắt buộc vì đã login_required
     created_at = db.Column(db.DateTime, default=datetime.utcnow)
     def __repr__(self):

     is_admin = db.Column(db.Boolean, default=False, nullable=False)
     created_at = db.Column(db.DateTime, default=datetime.utcnow)
     feedbacks = db.relationship('Feedback', backref='user', lazy=True)
     def set_password(self, password):
     id = db.Column(db.Integer, primary_key=True)
     text = db.Column(db.Text, nullable=False)
+    sentiment = db.Column(db.String(20), nullable=False)
+    topic = db.Column(db.String(50), nullable=False)
     sentiment_confidence = db.Column(db.Float, nullable=False)
     topic_confidence = db.Column(db.Float, nullable=False)
+    user_id = db.Column(db.Integer, db.ForeignKey('users.id'), nullable=False)
     created_at = db.Column(db.DateTime, default=datetime.utcnow)
     def __repr__(self):

requirements.txt CHANGED Viewed

@@ -14,14 +14,7 @@ transformers==4.44.0
 tokenizers==0.19.1
 huggingface-hub>=0.23.2
 safetensors
-datasets>=2.19.0
 # Data Processing and Utilities
-numpy
-requests
-tqdm
 pytz==2023.3
 schedule>=1.2.0
-# Performance Optimization
-hf_transfer>=0.1.4

 tokenizers==0.19.1
 huggingface-hub>=0.23.2
 safetensors
 # Data Processing and Utilities
 pytz==2023.3
 schedule>=1.2.0

static/css/style.css CHANGED Viewed

@@ -904,3 +904,29 @@ a.text-secondary.fw-bold:hover {
     margin-left: 0 !important; /* icon sát trái */
 }

     margin-left: 0 !important; /* icon sát trái */
 }
+/* Multiple Topics Display Styles */
+.multiple-topics-container {
+    display: flex;
+    flex-direction: column;
+    gap: 1rem;
+}
+.topic-sentiment-item {
+    background: var(--gray-100);
+    border-radius: 12px;
+    padding: 1rem;
+    border-left: 4px solid var(--gray-400);
+    transition: all 0.3s ease;
+}
+.topic-sentiment-item:hover {
+    background: var(--gray-200);
+    transform: translateX(4px);
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+.topic-sentiment-item .fw-semibold {
+    color: var(--text-primary);
+    font-size: 1.05rem;
+}

static/js/app.js CHANGED Viewed

@@ -146,20 +146,21 @@ document.addEventListener('DOMContentLoaded', function() {
                 body: JSON.stringify({ text: feedbackText })
             });
             const data = await response.json();
             if (response.ok) {
-                utils.updateResult('sentiment', data.sentiment);
-                utils.updateResult('topic', data.topic);
-                elements.originalText.textContent = feedbackText;
-                elements.results.style.display = 'block';
-                elements.results.classList.add('fade-in');
-                utils.scrollToResults();
                 // Clear the textarea after successful analysis
                 elements.textarea.value = '';
                 utils.updateCharCounter();
-                // Reload feedback history after successful analysis
-                loadFeedbackHistory(1);
             } else {
                 utils.showError(data.error || 'Có lỗi xảy ra khi phân tích feedback!');
             }
@@ -256,8 +257,10 @@ async function loadFeedbackHistory(page = 1, shouldScroll = false) {
             ...filterParams
         });
         const response = await fetch(`/api/feedback-history?${queryParams.toString()}`);
         const data = await response.json();
         if (response.ok) {
             displayFeedbackHistory(data.feedbacks);
             displayPagination(data, page);
@@ -461,6 +464,97 @@ function getSentimentColor(sentiment) {
     return colors[sentiment] || 'secondary';
 }
 // Time Filter Functions
 function initTimeFilter() {
     const timeFilterInputs = document.querySelectorAll('input[name="timeFilter"]');
@@ -578,15 +672,18 @@ function initAnalysisModeToggle() {
     const csvModeInput = document.getElementById('csvMode');
     const singleForm = document.getElementById('singleFeedbackForm');
     const csvForm = document.getElementById('csvUploadForm');
     // Show single form by default
     singleForm.style.display = 'block';
     csvForm.style.display = 'none';
     singleModeInput.addEventListener('change', function() {
         if (this.checked) {
             singleForm.style.display = 'block';
             csvForm.style.display = 'none';
         }
     });

                 body: JSON.stringify({ text: feedbackText })
             });
             const data = await response.json();
+            console.log('Predict response:', data);
             if (response.ok) {
+                // Display multiple topics with sentiments
+                displayMultipleResults(data.results, feedbackText);
                 // Clear the textarea after successful analysis
                 elements.textarea.value = '';
                 utils.updateCharCounter();
+                // Reload feedback history after successful analysis with delay
+                // to ensure database has committed the new data
+                console.log('Reloading feedback history...');
+                setTimeout(() => {
+                    loadFeedbackHistory(1, false);
+                }, 500);
             } else {
                 utils.showError(data.error || 'Có lỗi xảy ra khi phân tích feedback!');
             }
             ...filterParams
         });
+        console.log('Loading feedback history, page:', page);
         const response = await fetch(`/api/feedback-history?${queryParams.toString()}`);
         const data = await response.json();
+        console.log('Feedback history response:', data);
         if (response.ok) {
             displayFeedbackHistory(data.feedbacks);
             displayPagination(data, page);
     return colors[sentiment] || 'secondary';
 }
+// Display multiple topics with sentiments
+function displayMultipleResults(results, text) {
+    const elements = {
+        results: document.getElementById('results'),
+        originalText: document.getElementById('originalText')
+    };
+    // Validate elements exist
+    if (!elements.results) {
+        console.error('❌ Results element not found');
+        return;
+    }
+    if (!results || results.length === 0) {
+        // No topics detected or all below threshold
+        if (elements.originalText) elements.originalText.textContent = text;
+        elements.results.innerHTML = `
+            <div class="alert alert-warning">
+                <i class="fas fa-info-circle me-2"></i>
+                Không phát hiện topic rõ ràng trong feedback này.
+            </div>
+        `;
+        elements.results.style.display = 'block';
+        elements.results.classList.add('fade-in');
+        Utils.scrollToSection(elements.results, 105);
+        return;
+    }
+    // Display original text first (before innerHTML clears it)
+    if (elements.originalText) {
+        elements.originalText.textContent = text;
+    }
+    // Build HTML for multiple topics
+    let html = '<div class="mb-4"><h5 class="mb-3"><i class="fas fa-check-circle me-2"></i>Kết quả phân tích:</h5></div>';
+    html += '<div class="multiple-topics-container">';
+    results.forEach(result => {
+        const sentimentConfig = getSentimentConfig(result.sentiment);
+        const topicConfig = getTopicConfig(result.topic);
+        const sentimentColor = getSentimentColor(result.sentiment);
+        html += `
+            <div class="topic-sentiment-item mb-3">
+                <div class="d-flex align-items-center">
+                    <div class="flex-grow-1">
+                        <div class="d-flex align-items-center mb-2">
+                            <i class="fas ${topicConfig.icon} me-2" style="color: #6B7280;"></i>
+                            <span class="fw-semibold">${topicConfig.label}</span>
+                        </div>
+                        <div>
+                            <span class="badge bg-${sentimentColor} me-2">
+                                <i class="fas ${sentimentConfig.icon} me-1"></i>
+                                ${sentimentConfig.label}
+                            </span>
+                            <small class="text-muted">
+                                <i class="fas fa-percentage me-1"></i>
+                                Độ tin cậy: ${(result.confidence * 100).toFixed(1)}%
+                            </small>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        `;
+    });
+    html += '</div>';
+    // Add original text to the end
+    html += `
+        <div class="card shadow-sm">
+            <div class="card-header bg-primary text-white">
+                <h5 class="card-title mb-0">
+                    <i class="fas fa-quote-left me-2"></i>
+                    Feedback Gốc
+                </h5>
+            </div>
+            <div class="card-body">
+                <blockquote class="blockquote mb-0">
+                    <p class="mb-0">${text}</p>
+                </blockquote>
+            </div>
+        </div>
+    `;
+    elements.results.innerHTML = html;
+    elements.results.style.display = 'block';
+    elements.results.classList.add('fade-in');
+    Utils.scrollToSection(elements.results, 105);
+}
 // Time Filter Functions
 function initTimeFilter() {
     const timeFilterInputs = document.querySelectorAll('input[name="timeFilter"]');
     const csvModeInput = document.getElementById('csvMode');
     const singleForm = document.getElementById('singleFeedbackForm');
     const csvForm = document.getElementById('csvUploadForm');
+    const results = document.getElementById('results');
     // Show single form by default
     singleForm.style.display = 'block';
     csvForm.style.display = 'none';
+    if (results) results.style.display = 'none'; // Hide results when switching to single mode
     singleModeInput.addEventListener('change', function() {
         if (this.checked) {
             singleForm.style.display = 'block';
             csvForm.style.display = 'none';
+            if (results) results.style.display = 'none'; // Hide CSV results
         }
     });