Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- Dockerfile +22 -0
- database.py +46 -0
- inference.py +126 -0
- main.py +274 -0
- models.py +76 -0
- requirements.txt +65 -0
- schemas.py +54 -0
Dockerfile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
# Thiết lập thư mục làm việc trong container
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Cài đặt các thư viện hệ thống cần thiết cho thư viện 'psycopg2' (nếu dùng PostgreSQL)
|
| 7 |
+
RUN apt-get update && apt-get install -y libpq-dev gcc && rm -rf /var/lib/apt/lists/*
|
| 8 |
+
|
| 9 |
+
# Copy file requirements.txt vào trước để tận dụng cache của Docker
|
| 10 |
+
COPY requirements.txt .
|
| 11 |
+
|
| 12 |
+
# Cài đặt các thư viện Python
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Copy toàn bộ mã nguồn vào container
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
# HuggingFace Spaces mặc định chạy trên cổng 7860
|
| 19 |
+
ENV PORT=7860
|
| 20 |
+
|
| 21 |
+
# Lệnh khởi chạy server (Lưu ý: cổng phải là 7860)
|
| 22 |
+
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port 7860"]
|
database.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from sqlalchemy.orm import sessionmaker, DeclarativeBase, Session
|
| 3 |
+
from typing import Generator
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
# 1. Xác định chính xác vị trí thư mục backend/
|
| 8 |
+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 9 |
+
env_path = os.path.join(CURRENT_DIR, ".env")
|
| 10 |
+
|
| 11 |
+
# 2. Ép nạp biến môi trường từ ĐÚNG tệp .env đó
|
| 12 |
+
load_dotenv(dotenv_path=env_path)
|
| 13 |
+
|
| 14 |
+
# 3. Lấy biến từ môi trường
|
| 15 |
+
DATABASE_URL = os.getenv("DATABASE_URL")
|
| 16 |
+
|
| 17 |
+
# 4. Chốt chặn an toàn
|
| 18 |
+
if not DATABASE_URL:
|
| 19 |
+
raise ValueError("⛔ CẢNH BÁO: Không tìm thấy DATABASE_URL. Hãy kiểm tra lại file .env hoặc biến môi trường trên server deploy!")
|
| 20 |
+
|
| 21 |
+
# 3. Tạo engine thuần túy cho PostgreSQL (hoặc MySQL)
|
| 22 |
+
# Chú ý: Đã xóa hoàn toàn logic của SQLite
|
| 23 |
+
engine = create_engine(DATABASE_URL, pool_pre_ping=True)
|
| 24 |
+
|
| 25 |
+
SessionLocal = sessionmaker(
|
| 26 |
+
bind=engine,
|
| 27 |
+
autocommit=False,
|
| 28 |
+
autoflush=False,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
class Base(DeclarativeBase):
|
| 32 |
+
pass
|
| 33 |
+
|
| 34 |
+
def get_db() -> Generator[Session, None, None]:
|
| 35 |
+
"""FastAPI dependency that provides a database session per request."""
|
| 36 |
+
db = SessionLocal()
|
| 37 |
+
try:
|
| 38 |
+
yield db
|
| 39 |
+
finally:
|
| 40 |
+
db.close()
|
| 41 |
+
|
| 42 |
+
def init_db() -> None:
|
| 43 |
+
"""Create all tables defined via Base metadata."""
|
| 44 |
+
from backend import models # noqa: F401 — ensure models are registered
|
| 45 |
+
|
| 46 |
+
Base.metadata.create_all(bind=engine)
|
inference.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
inference.py - Xử lý Aspect-Based Sentiment Analysis (ABSA)
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
from transformers import AutoModel, AutoTokenizer
|
| 9 |
+
import os
|
| 10 |
+
from safetensors.torch import load_file
|
| 11 |
+
import re
|
| 12 |
+
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
# KHỞI TẠO MODEL THẬT (Load 1 lần khi server start)
|
| 15 |
+
# ---------------------------------------------------------------------------
|
| 16 |
+
|
| 17 |
+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 18 |
+
# Trỏ đến thư mục ml_models/visobert_absa nằm ngay trong backend
|
| 19 |
+
MODEL_DIR = os.path.join(CURRENT_DIR, "ml_models/visobert_absa")
|
| 20 |
+
|
| 21 |
+
def clean_text(text: str) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Hàm làm sạch văn bản (Dọn dẹp noise, lower case) giống hệt với lúc train
|
| 24 |
+
được quy định trong file EDA_Preprocess.ipynb.
|
| 25 |
+
"""
|
| 26 |
+
if not text:
|
| 27 |
+
return ""
|
| 28 |
+
text = str(text).lower()
|
| 29 |
+
text = re.sub(r'[^\w\s]', ' ', text)
|
| 30 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
| 31 |
+
return text
|
| 32 |
+
|
| 33 |
+
print("[INFO] Đang cấu hình và load mô hình HuggingFace ABSA...")
|
| 34 |
+
|
| 35 |
+
class VisoBertMultiTask(nn.Module):
|
| 36 |
+
def __init__(self, model_name: str, n_aspects: int):
|
| 37 |
+
super().__init__()
|
| 38 |
+
self.n_aspects = n_aspects
|
| 39 |
+
self.num_classes = 4
|
| 40 |
+
self.encoder = AutoModel.from_pretrained(model_name)
|
| 41 |
+
hidden_size = self.encoder.config.hidden_size
|
| 42 |
+
self.head = nn.Linear(hidden_size, n_aspects * self.num_classes)
|
| 43 |
+
# Khai báo buffer rỗng để khớp với trọng số loss_weights lưu trong safetensors
|
| 44 |
+
self.register_buffer("loss_weights", torch.ones(4, dtype=torch.float32))
|
| 45 |
+
|
| 46 |
+
def forward(self, input_ids=None, attention_mask=None, labels=None):
|
| 47 |
+
enc = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
|
| 48 |
+
cls = enc.last_hidden_state[:, 0, :]
|
| 49 |
+
logits = self.head(cls).view(-1, self.n_aspects, self.num_classes)
|
| 50 |
+
return logits
|
| 51 |
+
|
| 52 |
+
# Dựa vào dữ liệu từ bạn, model Deep Learning này được retrain đầy đủ với 38 aspect
|
| 53 |
+
ASPECTS_LIST = [
|
| 54 |
+
'AMBIENCE#GENERAL', 'FACILITIES#CLEANLINESS', 'FACILITIES#COMFORT', 'FACILITIES#DESIGN&FEATURES', 'FACILITIES#GENERAL',
|
| 55 |
+
'FACILITIES#MISCELLANEOUS', 'FACILITIES#PRICES', 'FACILITIES#QUALITY', 'FOOD&DRINKS#MISCELLANEOUS', 'FOOD&DRINKS#PRICES',
|
| 56 |
+
'FOOD&DRINKS#QUALITY', 'FOOD&DRINKS#STYLE&OPTIONS', 'HOTEL#CLEANLINESS', 'HOTEL#COMFORT', 'HOTEL#DESIGN&FEATURES',
|
| 57 |
+
'HOTEL#GENERAL', 'HOTEL#MISCELLANEOUS', 'HOTEL#PRICES', 'HOTEL#QUALITY', 'LOCATION#GENERAL', 'RESTAURANT#GENERAL',
|
| 58 |
+
'RESTAURANT#MISCELLANEOUS', 'RESTAURANT#PRICES', 'ROOMS#CLEANLINESS', 'ROOMS#COMFORT', 'ROOMS#DESIGN&FEATURES',
|
| 59 |
+
'ROOMS#GENERAL', 'ROOMS#MISCELLANEOUS', 'ROOMS#PRICES', 'ROOMS#QUALITY', 'ROOM_AMENITIES#CLEANLINESS',
|
| 60 |
+
'ROOM_AMENITIES#COMFORT', 'ROOM_AMENITIES#DESIGN&FEATURES', 'ROOM_AMENITIES#GENERAL', 'ROOM_AMENITIES#MISCELLANEOUS',
|
| 61 |
+
'ROOM_AMENITIES#PRICES', 'ROOM_AMENITIES#QUALITY', 'SERVICE#GENERAL'
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
# Map 4 nhãn sentiment thành tên chữ theo notebook deep_learning
|
| 65 |
+
# 0 = None, 1 = Negative, 2 = Neutral, 3 = Positive
|
| 66 |
+
SENTIMENT_MAP = {
|
| 67 |
+
1: "negative",
|
| 68 |
+
2: "neutral",
|
| 69 |
+
3: "positive"
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
|
| 74 |
+
# uitnlp/visobert là backbone bạn đã dùng để train
|
| 75 |
+
model = VisoBertMultiTask(model_name="uitnlp/visobert", n_aspects=len(ASPECTS_LIST))
|
| 76 |
+
|
| 77 |
+
# Load trọng số từ safetensors
|
| 78 |
+
weights_path = os.path.join(MODEL_DIR, "model.safetensors")
|
| 79 |
+
if os.path.exists(weights_path):
|
| 80 |
+
state_dict = load_file(weights_path)
|
| 81 |
+
model.load_state_dict(state_dict, strict=False)
|
| 82 |
+
print("[INFO] Đã load state_dict thành công!")
|
| 83 |
+
|
| 84 |
+
model.eval()
|
| 85 |
+
print("[INFO] Mô hình đã sẵn sàng!")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"[WARN] Không thể load model HuggingFace, kiểm tra lại thư mục deep_learning_model. Lỗi: {e}")
|
| 88 |
+
model = None
|
| 89 |
+
tokenizer = None
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def process_review(text: str) -> list[dict[str, str]]:
|
| 93 |
+
"""
|
| 94 |
+
Hàm nhận vào câu review và trả về nhiều Aspect (Khía cạnh) cùng Sentiment (Cảm xúc).
|
| 95 |
+
Đầu ra bắt buộc phải có format: [{"aspect": str, "sentiment": str}, ...]
|
| 96 |
+
"""
|
| 97 |
+
if model is None or tokenizer is None:
|
| 98 |
+
raise RuntimeError("Mô hình Deep Learning hiện không khả dụng. Vui lòng liên hệ quản trị viên hoặc kiểm tra lại cấu hình AI.")
|
| 99 |
+
|
| 100 |
+
# Tiền xử lý văn bản như đã làm ở file EDA_Preprocess.ipynb
|
| 101 |
+
cleaned = clean_text(text)
|
| 102 |
+
|
| 103 |
+
# 1. Tiền xử lý bằng Tokenizer sử dụng đoạn văn bản đã dọn dẹp
|
| 104 |
+
inputs = tokenizer(cleaned, return_tensors="pt", truncation=True, padding=True, max_length=256)
|
| 105 |
+
|
| 106 |
+
with torch.no_grad():
|
| 107 |
+
# Đầu ra có shape (1, N_Aspects, 4)
|
| 108 |
+
logits = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
|
| 109 |
+
|
| 110 |
+
predictions = torch.argmax(logits, dim=-1) # Lấy nhãn có xác suất cao nhất của từng aspect
|
| 111 |
+
preds_flat = predictions[0].tolist() # Vì batch_size=1, lấy list của phần tử đầu tiên
|
| 112 |
+
|
| 113 |
+
results = []
|
| 114 |
+
# 2. Ánh xạ về Aspect & Sentiment
|
| 115 |
+
for idx, pred_label in enumerate(preds_flat):
|
| 116 |
+
# 0 = None (review không đề cập đến khía cạnh này) nên ta bỏ qua
|
| 117 |
+
if pred_label != 0 and pred_label in SENTIMENT_MAP:
|
| 118 |
+
aspect_name = ASPECTS_LIST[idx]
|
| 119 |
+
sentiment_name = SENTIMENT_MAP[pred_label]
|
| 120 |
+
results.append({
|
| 121 |
+
"aspect": aspect_name,
|
| 122 |
+
"sentiment": sentiment_name
|
| 123 |
+
})
|
| 124 |
+
|
| 125 |
+
return results
|
| 126 |
+
|
main.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
from datetime import datetime, timedelta, timezone
|
| 6 |
+
from typing import Annotated
|
| 7 |
+
|
| 8 |
+
from fastapi import Depends, FastAPI, HTTPException, status
|
| 9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
+
from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
|
| 11 |
+
from jose import JWTError, jwt
|
| 12 |
+
from passlib.context import CryptContext
|
| 13 |
+
from sqlalchemy import desc
|
| 14 |
+
from sqlalchemy.orm import Session
|
| 15 |
+
|
| 16 |
+
from backend.database import get_db, init_db
|
| 17 |
+
from backend.inference import process_review
|
| 18 |
+
from backend.models import Review, ReviewAspect, User
|
| 19 |
+
from backend.schemas import ReviewCreate, ReviewResponse, Token, UserCreate, UserResponse
|
| 20 |
+
|
| 21 |
+
from dotenv import load_dotenv
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Security configuration
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
load_dotenv()
|
| 27 |
+
|
| 28 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", "change-me-in-production-use-a-long-random-string")
|
| 29 |
+
ALGORITHM = os.getenv("ALGORITHM", "HS256")
|
| 30 |
+
ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", 1440))
|
| 31 |
+
|
| 32 |
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
| 33 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/login")
|
| 34 |
+
|
| 35 |
+
# ---------------------------------------------------------------------------
|
| 36 |
+
# App initialisation
|
| 37 |
+
# ---------------------------------------------------------------------------
|
| 38 |
+
|
| 39 |
+
app = FastAPI(
|
| 40 |
+
title="ABSA Tourism API",
|
| 41 |
+
description="Aspect-Based Sentiment Analysis for hotel & restaurant reviews.",
|
| 42 |
+
version="1.0.0",
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
app.add_middleware(
|
| 46 |
+
CORSMiddleware,
|
| 47 |
+
allow_origins=["*"],
|
| 48 |
+
allow_credentials=True,
|
| 49 |
+
allow_methods=["*"],
|
| 50 |
+
allow_headers=["*"],
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@app.on_event("startup")
|
| 55 |
+
def on_startup() -> None:
|
| 56 |
+
init_db()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ---------------------------------------------------------------------------
|
| 60 |
+
# Auth helpers
|
| 61 |
+
# ---------------------------------------------------------------------------
|
| 62 |
+
|
| 63 |
+
_BCRYPT_MAX_BYTES = 72
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _validate_password_bytes(password: str) -> str:
|
| 67 |
+
"""Strip surrounding whitespace and guard against bcrypt 72-byte limit."""
|
| 68 |
+
password = password.strip()
|
| 69 |
+
if len(password.encode("utf-8")) > _BCRYPT_MAX_BYTES:
|
| 70 |
+
raise HTTPException(
|
| 71 |
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
| 72 |
+
detail=f"Password must not exceed {_BCRYPT_MAX_BYTES} bytes.",
|
| 73 |
+
)
|
| 74 |
+
return password
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _hash_password(plain: str) -> str:
|
| 78 |
+
return pwd_context.hash(plain)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _verify_password(plain: str, hashed: str) -> bool:
|
| 82 |
+
return pwd_context.verify(plain, hashed)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _create_access_token(subject: str) -> str:
|
| 86 |
+
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
| 87 |
+
payload = {"sub": subject, "exp": expire}
|
| 88 |
+
return jwt.encode(payload, SECRET_KEY, algorithm=ALGORITHM)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# ---------------------------------------------------------------------------
|
| 92 |
+
# Dependencies
|
| 93 |
+
# ---------------------------------------------------------------------------
|
| 94 |
+
|
| 95 |
+
def get_current_user(
|
| 96 |
+
token: Annotated[str, Depends(oauth2_scheme)],
|
| 97 |
+
db: Annotated[Session, Depends(get_db)],
|
| 98 |
+
) -> User:
|
| 99 |
+
credentials_exception = HTTPException(
|
| 100 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 101 |
+
detail="Could not validate credentials.",
|
| 102 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 103 |
+
)
|
| 104 |
+
try:
|
| 105 |
+
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
| 106 |
+
username: str | None = payload.get("sub")
|
| 107 |
+
if username is None:
|
| 108 |
+
raise credentials_exception
|
| 109 |
+
except JWTError:
|
| 110 |
+
raise credentials_exception
|
| 111 |
+
|
| 112 |
+
user = db.query(User).filter(User.username == username).first()
|
| 113 |
+
if user is None:
|
| 114 |
+
raise credentials_exception
|
| 115 |
+
return user
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# Convenience type alias for annotated dependency injection
|
| 119 |
+
CurrentUser = Annotated[User, Depends(get_current_user)]
|
| 120 |
+
DBSession = Annotated[Session, Depends(get_db)]
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# ---------------------------------------------------------------------------
|
| 124 |
+
# Auth routes
|
| 125 |
+
# ---------------------------------------------------------------------------
|
| 126 |
+
|
| 127 |
+
@app.post(
|
| 128 |
+
"/register",
|
| 129 |
+
response_model=UserResponse,
|
| 130 |
+
status_code=status.HTTP_201_CREATED,
|
| 131 |
+
summary="Register a new user",
|
| 132 |
+
tags=["Auth"],
|
| 133 |
+
)
|
| 134 |
+
def register(payload: UserCreate, db: DBSession) -> User:
|
| 135 |
+
password = _validate_password_bytes(payload.password)
|
| 136 |
+
|
| 137 |
+
if db.query(User).filter(User.username == payload.username).first():
|
| 138 |
+
raise HTTPException(
|
| 139 |
+
status_code=status.HTTP_409_CONFLICT,
|
| 140 |
+
detail="Username already taken.",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
user = User(
|
| 144 |
+
username=payload.username,
|
| 145 |
+
hashed_password=_hash_password(password),
|
| 146 |
+
)
|
| 147 |
+
db.add(user)
|
| 148 |
+
db.commit()
|
| 149 |
+
db.refresh(user)
|
| 150 |
+
return user
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
@app.post(
|
| 154 |
+
"/login",
|
| 155 |
+
response_model=Token,
|
| 156 |
+
summary="Obtain a JWT bearer token",
|
| 157 |
+
tags=["Auth"],
|
| 158 |
+
)
|
| 159 |
+
def login(
|
| 160 |
+
form: Annotated[OAuth2PasswordRequestForm, Depends()],
|
| 161 |
+
db: DBSession,
|
| 162 |
+
) -> Token:
|
| 163 |
+
password = _validate_password_bytes(form.password)
|
| 164 |
+
|
| 165 |
+
user = db.query(User).filter(User.username == form.username).first()
|
| 166 |
+
if not user or not _verify_password(password, user.hashed_password):
|
| 167 |
+
raise HTTPException(
|
| 168 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 169 |
+
detail="Incorrect username or password.",
|
| 170 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
return Token(access_token=_create_access_token(user.username))
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# ---------------------------------------------------------------------------
|
| 177 |
+
# ABSA routes
|
| 178 |
+
# ---------------------------------------------------------------------------
|
| 179 |
+
|
| 180 |
+
@app.post(
|
| 181 |
+
"/predict",
|
| 182 |
+
response_model=ReviewResponse,
|
| 183 |
+
status_code=status.HTTP_201_CREATED,
|
| 184 |
+
summary="Run ABSA inference and persist the result",
|
| 185 |
+
tags=["ABSA"],
|
| 186 |
+
)
|
| 187 |
+
def predict(
|
| 188 |
+
payload: ReviewCreate,
|
| 189 |
+
current_user: CurrentUser,
|
| 190 |
+
db: DBSession,
|
| 191 |
+
) -> Review:
|
| 192 |
+
try:
|
| 193 |
+
results: list[dict[str, str]] = process_review(payload.review_text)
|
| 194 |
+
except Exception as exc:
|
| 195 |
+
raise HTTPException(
|
| 196 |
+
status_code=status.HTTP_502_BAD_GATEWAY,
|
| 197 |
+
detail=f"Inference error: {exc}",
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
review = Review(
|
| 201 |
+
user_id=current_user.id,
|
| 202 |
+
review_text=payload.review_text,
|
| 203 |
+
)
|
| 204 |
+
db.add(review)
|
| 205 |
+
db.commit()
|
| 206 |
+
db.refresh(review)
|
| 207 |
+
|
| 208 |
+
for res in results:
|
| 209 |
+
aspect = ReviewAspect(
|
| 210 |
+
review_id=review.id,
|
| 211 |
+
aspect=res.get("aspect"),
|
| 212 |
+
sentiment=res.get("sentiment"),
|
| 213 |
+
)
|
| 214 |
+
db.add(aspect)
|
| 215 |
+
|
| 216 |
+
db.commit()
|
| 217 |
+
db.refresh(review)
|
| 218 |
+
return review
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@app.get(
|
| 222 |
+
"/reviews",
|
| 223 |
+
response_model=list[ReviewResponse],
|
| 224 |
+
summary="List all reviews for the current user",
|
| 225 |
+
tags=["ABSA"],
|
| 226 |
+
)
|
| 227 |
+
def list_reviews(current_user: CurrentUser, db: DBSession) -> list[Review]:
|
| 228 |
+
return (
|
| 229 |
+
db.query(Review)
|
| 230 |
+
.filter(Review.user_id == current_user.id)
|
| 231 |
+
.order_by(desc(Review.created_at))
|
| 232 |
+
.all()
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
@app.get(
|
| 237 |
+
"/analytics",
|
| 238 |
+
summary="Sentiment counts grouped by aspect for the current user",
|
| 239 |
+
tags=["ABSA"],
|
| 240 |
+
)
|
| 241 |
+
def analytics(
|
| 242 |
+
current_user: CurrentUser,
|
| 243 |
+
db: DBSession,
|
| 244 |
+
) -> dict[str, dict[str, int]]:
|
| 245 |
+
rows = (
|
| 246 |
+
db.query(ReviewAspect.aspect, ReviewAspect.sentiment)
|
| 247 |
+
.join(Review)
|
| 248 |
+
.filter(
|
| 249 |
+
Review.user_id == current_user.id,
|
| 250 |
+
ReviewAspect.aspect.isnot(None),
|
| 251 |
+
ReviewAspect.sentiment.isnot(None),
|
| 252 |
+
)
|
| 253 |
+
.all()
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Build: { "SERVICE#GENERAL": {"positive": N, "negative": M} }
|
| 257 |
+
result: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
| 258 |
+
for aspect, sentiment in rows:
|
| 259 |
+
result[aspect][sentiment] += 1
|
| 260 |
+
|
| 261 |
+
# Convert inner defaultdicts to plain dicts for JSON serialisation
|
| 262 |
+
return {aspect: dict(counts) for aspect, counts in result.items()}
|
| 263 |
+
|
| 264 |
+
@app.delete(
|
| 265 |
+
"/reviews",
|
| 266 |
+
status_code=status.HTTP_204_NO_CONTENT,
|
| 267 |
+
summary="Xóa toàn bộ review của user hiện tại",
|
| 268 |
+
tags=["ABSA"],
|
| 269 |
+
)
|
| 270 |
+
def clear_all_reviews(current_user: CurrentUser, db: DBSession):
|
| 271 |
+
# Tìm tất cả review của user này và xóa sạch
|
| 272 |
+
db.query(Review).filter(Review.user_id == current_user.id).delete()
|
| 273 |
+
db.commit()
|
| 274 |
+
return
|
models.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timezone
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
from sqlalchemy import DateTime, ForeignKey, Integer, String, Text
|
| 5 |
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
| 6 |
+
|
| 7 |
+
from backend.database import Base
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class User(Base):
|
| 11 |
+
__tablename__ = "users"
|
| 12 |
+
|
| 13 |
+
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 14 |
+
username: Mapped[str] = mapped_column(String(150), unique=True, index=True, nullable=False)
|
| 15 |
+
hashed_password: Mapped[str] = mapped_column(String(255), nullable=False)
|
| 16 |
+
|
| 17 |
+
# Relationships
|
| 18 |
+
reviews: Mapped[List["Review"]] = relationship(
|
| 19 |
+
"Review",
|
| 20 |
+
back_populates="user",
|
| 21 |
+
cascade="all, delete-orphan",
|
| 22 |
+
passive_deletes=True,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
def __repr__(self) -> str:
|
| 26 |
+
return f"<User id={self.id} username={self.username!r}>"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ReviewAspect(Base):
|
| 30 |
+
__tablename__ = "review_aspects"
|
| 31 |
+
|
| 32 |
+
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 33 |
+
review_id: Mapped[int] = mapped_column(
|
| 34 |
+
Integer,
|
| 35 |
+
ForeignKey("reviews.id", ondelete="CASCADE"),
|
| 36 |
+
nullable=False,
|
| 37 |
+
index=True,
|
| 38 |
+
)
|
| 39 |
+
aspect: Mapped[str] = mapped_column(String(100), nullable=False)
|
| 40 |
+
sentiment: Mapped[str] = mapped_column(String(50), nullable=False)
|
| 41 |
+
|
| 42 |
+
# Relationships
|
| 43 |
+
review: Mapped["Review"] = relationship("Review", back_populates="aspects")
|
| 44 |
+
|
| 45 |
+
def __repr__(self) -> str:
|
| 46 |
+
return f"<ReviewAspect id={self.id} review_id={self.review_id} aspect={self.aspect!r} sentiment={self.sentiment!r}>"
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class Review(Base):
|
| 50 |
+
__tablename__ = "reviews"
|
| 51 |
+
|
| 52 |
+
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 53 |
+
user_id: Mapped[int] = mapped_column(
|
| 54 |
+
Integer,
|
| 55 |
+
ForeignKey("users.id", ondelete="CASCADE"),
|
| 56 |
+
nullable=False,
|
| 57 |
+
index=True,
|
| 58 |
+
)
|
| 59 |
+
review_text: Mapped[str] = mapped_column(Text, nullable=False)
|
| 60 |
+
created_at: Mapped[datetime] = mapped_column(
|
| 61 |
+
DateTime(timezone=True),
|
| 62 |
+
default=lambda: datetime.now(timezone.utc),
|
| 63 |
+
nullable=False,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Relationships
|
| 67 |
+
user: Mapped["User"] = relationship("User", back_populates="reviews")
|
| 68 |
+
aspects: Mapped[List["ReviewAspect"]] = relationship(
|
| 69 |
+
"ReviewAspect",
|
| 70 |
+
back_populates="review",
|
| 71 |
+
cascade="all, delete-orphan",
|
| 72 |
+
passive_deletes=True,
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def __repr__(self) -> str:
|
| 76 |
+
return f"<Review id={self.id}>"
|
requirements.txt
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
annotated-doc==0.0.4
|
| 2 |
+
annotated-types==0.7.0
|
| 3 |
+
anyio==4.13.0
|
| 4 |
+
bcrypt==5.0.0
|
| 5 |
+
certifi==2026.2.25
|
| 6 |
+
cffi==2.0.0
|
| 7 |
+
click==8.3.2
|
| 8 |
+
colorama==0.4.6
|
| 9 |
+
cryptography==46.0.7
|
| 10 |
+
ecdsa==0.19.2
|
| 11 |
+
fastapi==0.135.3
|
| 12 |
+
filelock==3.25.2
|
| 13 |
+
fsspec==2026.3.0
|
| 14 |
+
greenlet==3.4.0
|
| 15 |
+
h11==0.16.0
|
| 16 |
+
hf-xet==1.4.3
|
| 17 |
+
httpcore==1.0.9
|
| 18 |
+
httpx==0.28.1
|
| 19 |
+
huggingface_hub==1.10.1
|
| 20 |
+
idna==3.11
|
| 21 |
+
Jinja2==3.1.6
|
| 22 |
+
joblib==1.5.3
|
| 23 |
+
markdown-it-py==4.0.0
|
| 24 |
+
MarkupSafe==3.0.3
|
| 25 |
+
mdurl==0.1.2
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
networkx==3.6.1
|
| 28 |
+
numpy==2.4.4
|
| 29 |
+
packaging==26.0
|
| 30 |
+
pandas==3.0.2
|
| 31 |
+
passlib==1.7.4
|
| 32 |
+
pyasn1==0.6.3
|
| 33 |
+
pycparser==3.0
|
| 34 |
+
pydantic==2.13.0
|
| 35 |
+
pydantic-settings==2.13.1
|
| 36 |
+
pydantic_core==2.46.0
|
| 37 |
+
Pygments==2.20.0
|
| 38 |
+
python-dateutil==2.9.0.post0
|
| 39 |
+
python-dotenv==1.2.2
|
| 40 |
+
python-jose==3.5.0
|
| 41 |
+
python-multipart==0.0.26
|
| 42 |
+
PyYAML==6.0.3
|
| 43 |
+
psycopg2-binary==2.9.11
|
| 44 |
+
regex==2026.4.4
|
| 45 |
+
rich==15.0.0
|
| 46 |
+
rsa==4.9.1
|
| 47 |
+
safetensors==0.7.0
|
| 48 |
+
scikit-learn==1.8.0
|
| 49 |
+
scipy==1.17.1
|
| 50 |
+
setuptools==81.0.0
|
| 51 |
+
shellingham==1.5.4
|
| 52 |
+
six==1.17.0
|
| 53 |
+
SQLAlchemy==2.0.49
|
| 54 |
+
starlette==1.0.0
|
| 55 |
+
sympy==1.14.0
|
| 56 |
+
threadpoolctl==3.6.0
|
| 57 |
+
tokenizers==0.22.2
|
| 58 |
+
torch==2.11.0
|
| 59 |
+
tqdm==4.67.3
|
| 60 |
+
transformers==5.5.4
|
| 61 |
+
typer==0.24.1
|
| 62 |
+
typing-inspection==0.4.2
|
| 63 |
+
typing_extensions==4.15.0
|
| 64 |
+
tzdata==2026.1
|
| 65 |
+
uvicorn==0.44.0
|
schemas.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from typing import Literal
|
| 3 |
+
|
| 4 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# ---------------------------------------------------------------------------
|
| 8 |
+
# User schemas
|
| 9 |
+
# ---------------------------------------------------------------------------
|
| 10 |
+
|
| 11 |
+
class UserCreate(BaseModel):
|
| 12 |
+
username: str = Field(..., min_length=3, max_length=150)
|
| 13 |
+
password: str = Field(..., min_length=6, max_length=72)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class UserResponse(BaseModel):
|
| 17 |
+
model_config = ConfigDict(from_attributes=True)
|
| 18 |
+
|
| 19 |
+
id: int
|
| 20 |
+
username: str
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ---------------------------------------------------------------------------
|
| 24 |
+
# Auth schemas
|
| 25 |
+
# ---------------------------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
class Token(BaseModel):
|
| 28 |
+
access_token: str
|
| 29 |
+
token_type: Literal["bearer"] = "bearer"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# ---------------------------------------------------------------------------
|
| 33 |
+
# Review schemas
|
| 34 |
+
# ---------------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
class ReviewCreate(BaseModel):
|
| 37 |
+
review_text: str = Field(..., min_length=1)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class AspectResponse(BaseModel):
|
| 41 |
+
model_config = ConfigDict(from_attributes=True)
|
| 42 |
+
|
| 43 |
+
id: int
|
| 44 |
+
aspect: str
|
| 45 |
+
sentiment: str
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class ReviewResponse(BaseModel):
|
| 49 |
+
model_config = ConfigDict(from_attributes=True)
|
| 50 |
+
|
| 51 |
+
id: int
|
| 52 |
+
review_text: str
|
| 53 |
+
created_at: datetime
|
| 54 |
+
aspects: list[AspectResponse] = []
|