|
|
import os |
|
|
import torch |
|
|
import csv |
|
|
import io |
|
|
import atexit |
|
|
import schedule |
|
|
import time |
|
|
from threading import Thread |
|
|
from transformers import AutoTokenizer |
|
|
from flask import Flask, request, jsonify, render_template, redirect, url_for, flash |
|
|
from flask_login import LoginManager, login_user, logout_user, login_required, current_user |
|
|
from functools import wraps |
|
|
from models import db, User, Feedback |
|
|
from forms import RegistrationForm, LoginForm |
|
|
from PhoBERTPairABSA import PhoBERTPairABSA |
|
|
from datetime import datetime, timedelta |
|
|
import pytz |
|
|
from database_manager import db_manager |
|
|
from model_config import ( |
|
|
get_prompt, ASPECTS_EN, ASPECTS_VI, LABEL_MAP, MAX_LEN, PRED_THRESHOLD, |
|
|
MIN_SENT_PROB, MIN_MARGIN, |
|
|
_is_garbage, _aspect_has_kw, _has_any_kw, _norm_match, ASPECT_REVERSE_MAPPING, |
|
|
BASE_MODEL, NUM_CLASSES, DROPOUT |
|
|
) |
|
|
|
|
|
app = Flask(__name__) |
|
|
app.config['SECRET_KEY'] = 'your-secret-key-change-this-in-production' |
|
|
|
|
|
def backup_database(force: bool = False): |
|
|
"""Backup database to Hugging Face Hub""" |
|
|
try: |
|
|
return db_manager.backup_database(force=force) |
|
|
except Exception: |
|
|
return False |
|
|
|
|
|
def restore_database(): |
|
|
"""Restore database from Hugging Face Hub""" |
|
|
try: |
|
|
return db_manager.restore_database() |
|
|
except Exception: |
|
|
return False |
|
|
|
|
|
def run_scheduler(): |
|
|
"""Run scheduled backup every hour""" |
|
|
while True: |
|
|
schedule.run_pending() |
|
|
time.sleep(60) |
|
|
|
|
|
schedule.every().hour.do(backup_database) |
|
|
scheduler_thread = Thread(target=run_scheduler, daemon=True) |
|
|
scheduler_thread.start() |
|
|
atexit.register(backup_database) |
|
|
|
|
|
VIETNAM_TIMEZONE = pytz.timezone('Asia/Ho_Chi_Minh') |
|
|
|
|
|
def utc_to_vietnam_time(utc_datetime): |
|
|
"""Chuyển đổi thời gian UTC sang múi giờ Việt Nam""" |
|
|
if utc_datetime is None: |
|
|
return None |
|
|
if utc_datetime.tzinfo is None: |
|
|
utc_datetime = pytz.utc.localize(utc_datetime) |
|
|
return utc_datetime.astimezone(VIETNAM_TIMEZONE) |
|
|
|
|
|
db_path = os.path.join(os.getcwd(), 'instance', 'feedback_analysis.db') |
|
|
os.makedirs(os.path.dirname(db_path), exist_ok=True) |
|
|
app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}' |
|
|
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False |
|
|
|
|
|
db.init_app(app) |
|
|
login_manager = LoginManager() |
|
|
login_manager.init_app(app) |
|
|
login_manager.login_view = 'login' |
|
|
login_manager.login_message = 'Vui lòng đăng nhập để sử dụng hệ thống phân tích feedback.' |
|
|
login_manager.login_message_category = 'info' |
|
|
|
|
|
@app.context_processor |
|
|
def utility_processor(): |
|
|
return dict(utc_to_vietnam_time=utc_to_vietnam_time) |
|
|
|
|
|
@login_manager.user_loader |
|
|
def load_user(user_id): |
|
|
return User.query.get(int(user_id)) |
|
|
|
|
|
def analyze_feedback(text): |
|
|
"""Phân tích feedback với model Pair-ABSA""" |
|
|
if tokenizer is None or model is None: |
|
|
return [] |
|
|
|
|
|
text = str(text).strip() |
|
|
if _is_garbage(text): |
|
|
return [] |
|
|
|
|
|
s_norm = _norm_match(text) |
|
|
|
|
|
tau_len = float(PRED_THRESHOLD) |
|
|
|
|
|
logits_list = [] |
|
|
has_keywords = [] |
|
|
|
|
|
with torch.no_grad(): |
|
|
for aspect_en in ASPECTS_EN: |
|
|
aspect_vi = ASPECT_REVERSE_MAPPING.get(aspect_en, "khac") |
|
|
prompt = get_prompt(aspect_en, sentence=text, use_subprompt=True) |
|
|
|
|
|
inputs = tokenizer( |
|
|
prompt, text, |
|
|
return_tensors="pt", |
|
|
truncation="only_second", |
|
|
padding=True, |
|
|
max_length=MAX_LEN |
|
|
).to(device) |
|
|
|
|
|
logits = model(inputs["input_ids"], inputs["attention_mask"]).squeeze(0) |
|
|
logits_list.append(logits) |
|
|
has_keywords.append(_aspect_has_kw(aspect_vi, s_norm)) |
|
|
|
|
|
logits_tensor = torch.stack(logits_list, dim=0) |
|
|
|
|
|
probs = torch.softmax(logits_tensor, dim=-1) |
|
|
p_none = probs[:, 0] |
|
|
conf_not_none = 1.0 - p_none |
|
|
|
|
|
|
|
|
KW_BOOST = 0.02 |
|
|
conf_not_none_boosted = conf_not_none.clone() |
|
|
for i, has_kw in enumerate(has_keywords): |
|
|
if has_kw: |
|
|
conf_not_none_boosted[i] = min(1.0, conf_not_none_boosted[i] + KW_BOOST) |
|
|
|
|
|
|
|
|
|
|
|
keep_indices = [] |
|
|
for i in range(len(ASPECTS_EN)): |
|
|
if has_keywords[i]: |
|
|
|
|
|
if conf_not_none_boosted[i] >= tau_len: |
|
|
keep_indices.append(i) |
|
|
else: |
|
|
|
|
|
if conf_not_none_boosted[i] >= 0.85: |
|
|
keep_indices.append(i) |
|
|
|
|
|
|
|
|
high_confidence_indices = [i for i in keep_indices if conf_not_none_boosted[i] >= 0.95] |
|
|
|
|
|
|
|
|
if len(high_confidence_indices) > 0: |
|
|
|
|
|
keep_indices = [i for i in keep_indices if has_keywords[i] or i in high_confidence_indices] |
|
|
|
|
|
|
|
|
if len(keep_indices) < len(ASPECTS_EN): |
|
|
tau_len_adjusted = tau_len - 0.05 |
|
|
for i in range(len(ASPECTS_EN)): |
|
|
if i not in keep_indices: |
|
|
|
|
|
if has_keywords[i] and conf_not_none_boosted[i] >= tau_len_adjusted + 0.10: |
|
|
keep_indices.append(i) |
|
|
|
|
|
if not keep_indices: |
|
|
return [] |
|
|
|
|
|
results = [] |
|
|
for i in sorted(keep_indices, key=lambda j: float(conf_not_none_boosted[j]), reverse=True): |
|
|
sent_probs = probs[i, 1:].clone() |
|
|
top_idx = int(torch.argmax(sent_probs).item()) |
|
|
top_p = float(sent_probs[top_idx].item()) |
|
|
|
|
|
sent_probs[top_idx] = -1.0 |
|
|
second_p = float(sent_probs.max().item()) |
|
|
margin = top_p - second_p |
|
|
|
|
|
min_margin_adj = MIN_MARGIN |
|
|
if has_keywords[i]: |
|
|
min_margin_adj = MIN_MARGIN - 0.02 |
|
|
|
|
|
if top_p < MIN_SENT_PROB or margin < min_margin_adj: |
|
|
continue |
|
|
|
|
|
sentiment_str = LABEL_MAP[top_idx + 1] |
|
|
results.append({ |
|
|
"topic": ASPECTS_EN[i], |
|
|
"sentiment": sentiment_str, |
|
|
"confidence": float(conf_not_none_boosted[i].item()), |
|
|
"sentiment_confidence": top_p, |
|
|
"margin": margin |
|
|
}) |
|
|
|
|
|
results.sort(key=lambda x: x["confidence"], reverse=True) |
|
|
return results |
|
|
|
|
|
def save_feedback_to_db(text, results, user_id): |
|
|
"""Lưu feedback results vào database""" |
|
|
for result in results: |
|
|
sentiment_conf = result.get('sentiment_confidence', result['confidence']) |
|
|
topic_conf = result['confidence'] |
|
|
feedback = Feedback( |
|
|
text=text, |
|
|
sentiment=result['sentiment'], |
|
|
topic=result['topic'], |
|
|
sentiment_confidence=sentiment_conf, |
|
|
topic_confidence=topic_conf, |
|
|
user_id=user_id |
|
|
) |
|
|
db.session.add(feedback) |
|
|
|
|
|
def admin_required(f): |
|
|
"""Decorator để yêu cầu quyền admin""" |
|
|
@wraps(f) |
|
|
def decorated_function(*args, **kwargs): |
|
|
if not current_user.is_authenticated: |
|
|
return redirect(url_for('login', next=request.url)) |
|
|
if not current_user.is_admin: |
|
|
flash('Bạn không có quyền truy cập trang này. Chỉ admin mới được phép.', 'danger') |
|
|
return redirect(url_for('home')) |
|
|
return f(*args, **kwargs) |
|
|
return decorated_function |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
MODEL_REPO = "Ptul2x5/Student_Feedback_Sentiment" |
|
|
|
|
|
try: |
|
|
os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '0' |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO, use_fast=False) |
|
|
MODEL_URL = f"https://huggingface.co/{MODEL_REPO}/resolve/main/model.bin" |
|
|
loaded = torch.hub.load_state_dict_from_url(MODEL_URL, map_location=device) |
|
|
|
|
|
if isinstance(loaded, dict) and "model_state" in loaded: |
|
|
state_dict = loaded["model_state"] |
|
|
else: |
|
|
state_dict = loaded |
|
|
|
|
|
model = PhoBERTPairABSA(base_model=BASE_MODEL, num_cls=NUM_CLASSES, dropout=DROPOUT) |
|
|
model.load_state_dict(state_dict, strict=False) |
|
|
model.to(device) |
|
|
model.eval() |
|
|
except Exception: |
|
|
model = None |
|
|
tokenizer = None |
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
|
@login_required |
|
|
def home(): |
|
|
return render_template("index.html") |
|
|
|
|
|
@app.route("/register", methods=["GET", "POST"]) |
|
|
def register(): |
|
|
if current_user.is_authenticated: |
|
|
return redirect(url_for('home')) |
|
|
|
|
|
form = RegistrationForm() |
|
|
if form.validate_on_submit(): |
|
|
user = User(username=form.username.data) |
|
|
user.set_password(form.password.data) |
|
|
db.session.add(user) |
|
|
db.session.commit() |
|
|
backup_database() |
|
|
flash('Đăng ký thành công! Vui lòng đăng nhập.', 'success') |
|
|
return redirect(url_for('login')) |
|
|
|
|
|
return render_template('register.html', form=form) |
|
|
|
|
|
@app.route("/login", methods=["GET", "POST"]) |
|
|
def login(): |
|
|
if current_user.is_authenticated: |
|
|
return redirect(url_for('home')) |
|
|
|
|
|
form = LoginForm() |
|
|
if form.validate_on_submit(): |
|
|
user = User.query.filter_by(username=form.username.data).first() |
|
|
if user and user.check_password(form.password.data): |
|
|
login_user(user, remember=True) |
|
|
flash('Đăng nhập thành công!', 'success') |
|
|
next_page = request.args.get('next') |
|
|
return redirect(next_page) if next_page else redirect(url_for('home')) |
|
|
else: |
|
|
flash('Tên đăng nhập hoặc mật khẩu không đúng.', 'danger') |
|
|
|
|
|
return render_template('login.html', form=form) |
|
|
|
|
|
@app.route("/logout") |
|
|
@login_required |
|
|
def logout(): |
|
|
logout_user() |
|
|
flash('Đã đăng xuất thành công!', 'info') |
|
|
return redirect(url_for('home')) |
|
|
|
|
|
@app.route("/api/health", methods=["GET"]) |
|
|
def health(): |
|
|
return jsonify({"status": "healthy"}) |
|
|
|
|
|
@app.route("/my-statistics") |
|
|
@login_required |
|
|
def my_statistics(): |
|
|
try: |
|
|
user_feedbacks = Feedback.query.filter_by(user_id=current_user.id).all() |
|
|
total_feedbacks = len(user_feedbacks) |
|
|
|
|
|
sentiment_stats = db.session.query( |
|
|
Feedback.sentiment, |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).filter_by(user_id=current_user.id).group_by(Feedback.sentiment).all() |
|
|
sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats] |
|
|
|
|
|
topic_stats = db.session.query( |
|
|
Feedback.topic, |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).filter_by(user_id=current_user.id).group_by(Feedback.topic).all() |
|
|
topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats] |
|
|
|
|
|
thirty_days_ago = datetime.now() - timedelta(days=30) |
|
|
daily_stats = db.session.query( |
|
|
db.func.date(Feedback.created_at).label('date'), |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).filter( |
|
|
Feedback.user_id == current_user.id, |
|
|
Feedback.created_at >= thirty_days_ago |
|
|
).group_by( |
|
|
db.func.date(Feedback.created_at) |
|
|
).order_by('date').all() |
|
|
daily_stats = [{'date': str(item.date), 'count': item.count} for item in daily_stats] |
|
|
|
|
|
recent_feedbacks = Feedback.query.filter_by(user_id=current_user.id)\ |
|
|
.order_by(Feedback.created_at.desc()).limit(10).all() |
|
|
|
|
|
return render_template('my_statistics.html', |
|
|
total_feedbacks=total_feedbacks, |
|
|
recent_feedbacks=recent_feedbacks, |
|
|
sentiment_stats=sentiment_stats, |
|
|
topic_stats=topic_stats, |
|
|
daily_stats=daily_stats) |
|
|
except Exception as e: |
|
|
flash(f'Lỗi khi tải dữ liệu: {str(e)}', 'danger') |
|
|
return redirect(url_for('home')) |
|
|
|
|
|
@app.route("/admin/database") |
|
|
@admin_required |
|
|
def view_database(): |
|
|
try: |
|
|
total_users = User.query.count() |
|
|
total_feedbacks = Feedback.query.count() |
|
|
recent_feedbacks = Feedback.query.order_by(Feedback.created_at.desc()).limit(10).all() |
|
|
|
|
|
sentiment_stats = db.session.query( |
|
|
Feedback.sentiment, |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).group_by(Feedback.sentiment).all() |
|
|
sentiment_stats = [{'sentiment': item.sentiment, 'count': item.count} for item in sentiment_stats] |
|
|
|
|
|
topic_stats = db.session.query( |
|
|
Feedback.topic, |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).group_by(Feedback.topic).all() |
|
|
topic_stats = [{'topic': item.topic, 'count': item.count} for item in topic_stats] |
|
|
|
|
|
seven_days_ago = datetime.now() - timedelta(days=7) |
|
|
daily_stats = db.session.query( |
|
|
db.func.date(Feedback.created_at).label('date'), |
|
|
db.func.count(Feedback.id).label('count') |
|
|
).filter(Feedback.created_at >= seven_days_ago).group_by( |
|
|
db.func.date(Feedback.created_at) |
|
|
).order_by('date').all() |
|
|
daily_stats = [{'date': str(item.date), 'count': item.count} for item in daily_stats] |
|
|
|
|
|
return render_template('database_view.html', |
|
|
total_users=total_users, |
|
|
total_feedbacks=total_feedbacks, |
|
|
recent_feedbacks=recent_feedbacks, |
|
|
sentiment_stats=sentiment_stats, |
|
|
topic_stats=topic_stats, |
|
|
daily_stats=daily_stats) |
|
|
except Exception as e: |
|
|
flash(f'Lỗi khi tải dữ liệu: {str(e)}', 'danger') |
|
|
return redirect(url_for('home')) |
|
|
|
|
|
@app.route("/api/feedback-history", methods=["GET"]) |
|
|
@login_required |
|
|
def get_feedback_history(): |
|
|
try: |
|
|
page = request.args.get('page', 1, type=int) |
|
|
per_page = request.args.get('per_page', 10, type=int) |
|
|
time_filter = request.args.get('time_filter', 'all', type=str) |
|
|
start_date = request.args.get('start_date', None, type=str) |
|
|
end_date = request.args.get('end_date', None, type=str) |
|
|
|
|
|
query = Feedback.query.filter_by(user_id=current_user.id) |
|
|
|
|
|
if time_filter != 'all': |
|
|
vietnam_now = utc_to_vietnam_time(datetime.utcnow()) |
|
|
|
|
|
if time_filter == 'today': |
|
|
today_start = vietnam_now.replace(hour=0, minute=0, second=0, microsecond=0) |
|
|
today_start_utc = today_start.astimezone(pytz.utc).replace(tzinfo=None) |
|
|
query = query.filter(Feedback.created_at >= today_start_utc) |
|
|
elif time_filter == 'week': |
|
|
week_ago = vietnam_now - timedelta(days=7) |
|
|
week_ago_utc = week_ago.astimezone(pytz.utc).replace(tzinfo=None) |
|
|
query = query.filter(Feedback.created_at >= week_ago_utc) |
|
|
elif time_filter == 'month': |
|
|
month_ago = vietnam_now - timedelta(days=30) |
|
|
month_ago_utc = month_ago.astimezone(pytz.utc).replace(tzinfo=None) |
|
|
query = query.filter(Feedback.created_at >= month_ago_utc) |
|
|
elif time_filter == 'custom' and start_date and end_date: |
|
|
try: |
|
|
start_datetime = datetime.strptime(start_date, '%Y-%m-%d') |
|
|
end_datetime = datetime.strptime(end_date, '%Y-%m-%d') |
|
|
start_datetime_utc = VIETNAM_TIMEZONE.localize(start_datetime).astimezone(pytz.utc).replace(tzinfo=None) |
|
|
end_datetime_utc = VIETNAM_TIMEZONE.localize(end_datetime.replace(hour=23, minute=59, second=59)).astimezone(pytz.utc).replace(tzinfo=None) |
|
|
query = query.filter(Feedback.created_at >= start_datetime_utc, Feedback.created_at <= end_datetime_utc) |
|
|
except ValueError: |
|
|
return jsonify({'error': 'Định dạng ngày không hợp lệ'}), 400 |
|
|
|
|
|
total_count = query.count() |
|
|
feedbacks = query.order_by(Feedback.created_at.desc()).paginate(page=page, per_page=per_page, error_out=False) |
|
|
|
|
|
feedback_list = [] |
|
|
for feedback in feedbacks.items: |
|
|
feedback_list.append({ |
|
|
'id': feedback.id, |
|
|
'text': feedback.text, |
|
|
'sentiment': feedback.sentiment, |
|
|
'topic': feedback.topic, |
|
|
'sentiment_confidence': feedback.sentiment_confidence, |
|
|
'topic_confidence': feedback.topic_confidence, |
|
|
'created_at': utc_to_vietnam_time(feedback.created_at).strftime('%H:%M:%S %d/%m/%Y') |
|
|
}) |
|
|
|
|
|
return jsonify({ |
|
|
'feedbacks': feedback_list, |
|
|
'total': total_count, |
|
|
'pages': feedbacks.pages, |
|
|
'current_page': page, |
|
|
'has_next': feedbacks.has_next, |
|
|
'has_prev': feedbacks.has_prev |
|
|
}) |
|
|
except Exception as e: |
|
|
return jsonify({"error": f"Có lỗi xảy ra: {str(e)}"}), 500 |
|
|
|
|
|
@app.route("/predict", methods=["POST"]) |
|
|
@login_required |
|
|
def predict(): |
|
|
try: |
|
|
data = request.get_json() |
|
|
text = data.get("text", "").strip() |
|
|
|
|
|
if not text: |
|
|
return jsonify({"error": "Missing 'text' field"}), 400 |
|
|
|
|
|
if len(text) > 1000: |
|
|
return jsonify({"error": "Text quá dài. Vui lòng nhập tối đa 1000 ký tự."}), 400 |
|
|
|
|
|
if tokenizer is None or model is None: |
|
|
return jsonify({"error": "Model or tokenizer not loaded. Please restart the application."}), 500 |
|
|
|
|
|
results = analyze_feedback(text) |
|
|
|
|
|
try: |
|
|
save_feedback_to_db(text, results, current_user.id) |
|
|
db.session.commit() |
|
|
backup_database() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return jsonify({ |
|
|
"results": results, |
|
|
"has_multiple_topics": len(results) > 1 |
|
|
}) |
|
|
except Exception as e: |
|
|
return jsonify({"error": f"Có lỗi xảy ra khi xử lý: {str(e)}"}), 500 |
|
|
|
|
|
@app.route('/admin/backup', methods=['POST']) |
|
|
@admin_required |
|
|
def manual_backup(): |
|
|
try: |
|
|
if backup_database(): |
|
|
return jsonify({"success": True, "message": "Backup completed successfully"}) |
|
|
else: |
|
|
return jsonify({"success": False, "message": "Backup failed"}), 500 |
|
|
except Exception as e: |
|
|
return jsonify({"success": False, "message": f"Backup error: {str(e)}"}), 500 |
|
|
|
|
|
@app.route('/admin/restore', methods=['POST']) |
|
|
@admin_required |
|
|
def manual_restore(): |
|
|
try: |
|
|
if restore_database(): |
|
|
return jsonify({"success": True, "message": "Database restored successfully"}) |
|
|
else: |
|
|
return jsonify({"success": False, "message": "Restore failed"}), 500 |
|
|
except Exception as e: |
|
|
return jsonify({"success": False, "message": f"Restore error: {str(e)}"}), 500 |
|
|
|
|
|
with app.app_context(): |
|
|
db_manager.initialize_database_if_needed() |
|
|
db.create_all() |
|
|
db_manager.backup_database() |
|
|
|
|
|
try: |
|
|
db.session.execute(db.text("SELECT is_admin FROM users LIMIT 1")) |
|
|
except Exception: |
|
|
try: |
|
|
db.session.execute(db.text("ALTER TABLE users ADD COLUMN is_admin BOOLEAN DEFAULT 0")) |
|
|
db.session.commit() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
total_users = User.query.count() |
|
|
admin_user = User.query.filter_by(username='admin').first() |
|
|
if not admin_user and total_users == 0: |
|
|
admin_user = User(username='admin', is_admin=True) |
|
|
admin_user.set_password('123456') |
|
|
db.session.add(admin_user) |
|
|
db.session.commit() |
|
|
elif admin_user and not admin_user.is_admin: |
|
|
admin_user.is_admin = True |
|
|
db.session.commit() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
@app.route("/analyze-csv", methods=["POST"]) |
|
|
@login_required |
|
|
def analyze_csv(): |
|
|
try: |
|
|
if 'csvFile' not in request.files: |
|
|
return jsonify({'error': 'Không tìm thấy file CSV'}), 400 |
|
|
|
|
|
file = request.files['csvFile'] |
|
|
if file.filename == '': |
|
|
return jsonify({'error': 'Chưa chọn file'}), 400 |
|
|
|
|
|
if not file.filename.lower().endswith('.csv'): |
|
|
return jsonify({'error': 'File phải có định dạng CSV'}), 400 |
|
|
|
|
|
try: |
|
|
file_content = file.stream.read().decode("UTF8") |
|
|
except UnicodeDecodeError: |
|
|
return jsonify({'error': 'File CSV phải được mã hóa UTF-8'}), 400 |
|
|
|
|
|
try: |
|
|
stream = io.StringIO(file_content, newline=None) |
|
|
csv_input = csv.DictReader(stream) |
|
|
|
|
|
if not csv_input.fieldnames: |
|
|
return jsonify({'error': 'File CSV không có header'}), 400 |
|
|
|
|
|
feedback_column = None |
|
|
available_columns = [] |
|
|
for col in csv_input.fieldnames: |
|
|
available_columns.append(col) |
|
|
if col.lower().strip() in ['feedback', 'text', 'content', 'comment']: |
|
|
feedback_column = col |
|
|
break |
|
|
|
|
|
if not feedback_column: |
|
|
return jsonify({ |
|
|
'error': f'Không tìm thấy cột chứa feedback. Các cột: {", ".join(available_columns)}' |
|
|
}), 400 |
|
|
|
|
|
rows = list(csv_input) |
|
|
if not rows: |
|
|
return jsonify({'error': 'File CSV không có dữ liệu'}), 400 |
|
|
except csv.Error as e: |
|
|
return jsonify({'error': f'File CSV không đúng định dạng: {str(e)}'}), 400 |
|
|
except Exception as e: |
|
|
return jsonify({'error': f'Lỗi khi đọc file CSV: {str(e)}'}), 400 |
|
|
|
|
|
results = [] |
|
|
processed_count = 0 |
|
|
error_count = 0 |
|
|
|
|
|
for row_num, row in enumerate(rows, start=1): |
|
|
feedback_text = row[feedback_column].strip() |
|
|
|
|
|
if not feedback_text: |
|
|
error_count += 1 |
|
|
results.append({ |
|
|
'row': row_num, |
|
|
'text': '', |
|
|
'error': 'Feedback trống' |
|
|
}) |
|
|
continue |
|
|
|
|
|
try: |
|
|
if tokenizer is None or model is None: |
|
|
results.append({ |
|
|
"row": row_num, |
|
|
"feedback": feedback_text, |
|
|
"error": "Model or tokenizer not loaded" |
|
|
}) |
|
|
continue |
|
|
|
|
|
row_topics = analyze_feedback(feedback_text) |
|
|
|
|
|
try: |
|
|
save_feedback_to_db(feedback_text, row_topics, current_user.id) |
|
|
|
|
|
if row_topics: |
|
|
first = row_topics[0] |
|
|
first_topic = first['topic'] |
|
|
first_sentiment = first['sentiment'] |
|
|
first_sentiment_conf = first.get('sentiment_confidence', first['confidence']) |
|
|
first_topic_conf = first['confidence'] |
|
|
else: |
|
|
first_topic = 'others' |
|
|
first_sentiment = 'neutral' |
|
|
first_sentiment_conf = 0.0 |
|
|
first_topic_conf = 0.0 |
|
|
|
|
|
results.append({ |
|
|
'row': row_num, |
|
|
'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text, |
|
|
'sentiment': first_sentiment, |
|
|
'topic': first_topic, |
|
|
'sentiment_confidence': round(first_sentiment_conf * 100, 1), |
|
|
'topic_confidence': round(first_topic_conf * 100, 1), |
|
|
'success': True |
|
|
}) |
|
|
processed_count += 1 |
|
|
except Exception as db_err: |
|
|
error_count += 1 |
|
|
results.append({ |
|
|
'row': row_num, |
|
|
'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text, |
|
|
'error': f'Lỗi lưu database: {str(db_err)}' |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
error_count += 1 |
|
|
results.append({ |
|
|
'row': row_num, |
|
|
'text': feedback_text[:100] + '...' if len(feedback_text) > 100 else feedback_text, |
|
|
'error': f'Lỗi phân tích: {str(e)}' |
|
|
}) |
|
|
|
|
|
try: |
|
|
db.session.commit() |
|
|
backup_database() |
|
|
except Exception as commit_error: |
|
|
db.session.rollback() |
|
|
return jsonify({'error': f'Lỗi khi lưu dữ liệu: {str(commit_error)}'}), 500 |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'total_rows': len(results), |
|
|
'processed_count': processed_count, |
|
|
'error_count': error_count, |
|
|
'results': results[:50], |
|
|
'message': f'Đã xử lý {processed_count}/{len(results)} feedback thành công' |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
db.session.rollback() |
|
|
return jsonify({ |
|
|
'error': f'Có lỗi xảy ra khi xử lý file CSV: {str(e)}' |
|
|
}), 500 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
debug = os.environ.get("DEBUG", "False").lower() == "true" |
|
|
app.run(host="0.0.0.0", port=7860, debug=debug) |