vtdung23 commited on
Commit
c09e844
·
verified ·
1 Parent(s): 5cc5290

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +82 -0
  2. .env.example +19 -0
  3. .gitattributes +1 -35
  4. .gitignore +51 -0
  5. 4.0.0 +6 -0
  6. ARCHITECTURE.md +387 -0
  7. DEPLOYMENT.md +287 -0
  8. Dockerfile +54 -0
  9. FIX_OOM_RENDER.md +150 -0
  10. HF_ARCHITECTURE_DIAGRAM.md +332 -0
  11. HF_DEPLOYMENT_CHECKLIST.md +292 -0
  12. HF_ENV_VARIABLES.md +177 -0
  13. HF_MIGRATION_SUMMARY.md +314 -0
  14. HF_QUICK_REFERENCE.md +163 -0
  15. HUGGING_FACE_DEPLOYMENT.md +258 -0
  16. INDEX.md +296 -0
  17. PROJECT_STRUCTURE.txt +326 -0
  18. PROJECT_SUMMARY.md +293 -0
  19. Procfile +1 -0
  20. QUICKSTART.md +116 -0
  21. README.md +253 -10
  22. README_HF_SPACE.md +86 -0
  23. RENDER_QUICKSTART.md +137 -0
  24. TESTING_GUIDE.md +287 -0
  25. app/__init__.py +1 -0
  26. app/config.py +46 -0
  27. app/database.py +66 -0
  28. app/database/.gitkeep +1 -0
  29. app/models.py +43 -0
  30. app/routers/__init__.py +1 -0
  31. app/routers/auth.py +97 -0
  32. app/routers/dashboard.py +44 -0
  33. app/routers/prediction.py +252 -0
  34. app/schemas.py +70 -0
  35. app/services/Model/phoBERT_multi_class_tokenizer/added_tokens.json +3 -0
  36. app/services/Model/phoBERT_multi_class_tokenizer/bpe.codes +0 -0
  37. app/services/Model/phoBERT_multi_class_tokenizer/special_tokens_map.json +9 -0
  38. app/services/Model/phoBERT_multi_class_tokenizer/tokenizer_config.json +55 -0
  39. app/services/Model/phoBERT_multi_class_tokenizer/vocab.txt +0 -0
  40. app/services/__init__.py +1 -0
  41. app/services/auth_service.py +85 -0
  42. app/services/ml_service.py +153 -0
  43. app/services/report_service.py +301 -0
  44. app/services/visualization_service.py +125 -0
  45. app/static/css/style.css +1 -0
  46. app/static/js/main.js +1 -0
  47. app/static/uploads/.gitkeep +3 -0
  48. app/templates/base.html +59 -0
  49. app/templates/dashboard.html +618 -0
  50. app/templates/login.html +118 -0
.dockerignore ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # Docker Ignore File
3
+ # Exclude unnecessary files from Docker build context
4
+ # ============================================
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.so
11
+ .Python
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+ pip-log.txt
16
+ pip-delete-this-directory.txt
17
+
18
+ # Virtual Environments
19
+ env/
20
+ venv/
21
+ ENV/
22
+ env.bak/
23
+ venv.bak/
24
+ .venv/
25
+
26
+ # IDEs
27
+ .vscode/
28
+ .idea/
29
+ *.swp
30
+ *.swo
31
+ *~
32
+ .DS_Store
33
+
34
+ # Git
35
+ .git/
36
+ .gitignore
37
+ .gitattributes
38
+
39
+ # Documentation (not needed in container)
40
+ *.md
41
+ !README.md
42
+ ARCHITECTURE.md
43
+ DEPLOYMENT.md
44
+ FIX_OOM_RENDER.md
45
+ INDEX.md
46
+ QUICKSTART.md
47
+ RENDER_QUICKSTART.md
48
+ TESTING_GUIDE.md
49
+ PROJECT_STRUCTURE.txt
50
+ PROJECT_SUMMARY.md
51
+
52
+ # Database (use external PostgreSQL)
53
+ *.db
54
+ *.sqlite
55
+ *.sqlite3
56
+ app/database/*.db
57
+
58
+ # Uploads (use external storage in production)
59
+ app/static/uploads/wordclouds/*
60
+ app/static/uploads/*.csv
61
+ !app/static/uploads/.gitkeep
62
+
63
+ # Logs
64
+ *.log
65
+
66
+ # Testing
67
+ .pytest_cache/
68
+ .coverage
69
+ htmlcov/
70
+ .tox/
71
+
72
+ # Render specific
73
+ Procfile
74
+
75
+ # Environment files (secrets should be in HF Settings)
76
+ .env
77
+ .env.*
78
+
79
+ # Temporary files
80
+ *.tmp
81
+ tmp/
82
+ temp/
.env.example ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # ENVIRONMENT VARIABLES TEMPLATE
3
+ # ============================================
4
+ # Copy this file to .env for local development
5
+ # On Render, set these in Environment Variables tab
6
+
7
+ # Security (Required)
8
+ SECRET_KEY=your-super-secret-random-key-change-this-in-production
9
+
10
+ # Database (Optional - auto-configured by Render)
11
+ # DATABASE_URL=postgresql://user:password@host:5432/database
12
+ # Leave blank for local SQLite development
13
+
14
+ # Application Settings
15
+ PYTHON_VERSION=3.11.0
16
+ PORT=8000
17
+
18
+ # HuggingFace Cache (Optional - only for local dev)
19
+ # HF_HOME=/path/to/huggingface/cache
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ venv/
9
+ ENV/
10
+ build/
11
+ develop-eggs/
12
+ dist/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ lib/
17
+ lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+
26
+ # FastAPI
27
+ *.db
28
+ *.sqlite
29
+ *.sqlite3
30
+
31
+ # Uploads
32
+ app/static/uploads/*
33
+ !app/static/uploads/.gitkeep
34
+
35
+ # IDE
36
+ .vscode/
37
+ .idea/
38
+ *.swp
39
+ *.swo
40
+ *~
41
+
42
+ # OS
43
+ .DS_Store
44
+ Thumbs.db
45
+
46
+ # Environment
47
+ .env
48
+ .env.local
49
+
50
+ # Logs
51
+ *.log
4.0.0 ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Defaulting to user installation because normal site-packages is not writeable
2
+ Collecting bcrypt
3
+ Using cached bcrypt-5.0.0-cp39-abi3-win_amd64.whl.metadata (10 kB)
4
+ Using cached bcrypt-5.0.0-cp39-abi3-win_amd64.whl (150 kB)
5
+ Installing collected packages: bcrypt
6
+ Successfully installed bcrypt-5.0.0
ARCHITECTURE.md ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🏗️ System Architecture
2
+
3
+ ## High-Level Architecture
4
+
5
+ ```
6
+ ┌─────────────────────────────────────────────────────────────┐
7
+ │ FRONTEND │
8
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
9
+ │ │ Login/ │ │ Dashboard │ │ Register │ │
10
+ │ │ Register │ │ (Jinja2) │ │ Page │ │
11
+ │ │ (Jinja2) │ │ + TailwindCSS│ │ (Jinja2) │ │
12
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
13
+ │ │ │ │ │
14
+ │ └──────────────────┴──────────────────┘ │
15
+ │ │ │
16
+ │ JavaScript (Fetch API) │
17
+ │ + Chart.js for viz │
18
+ └────────────────────────────│────────────────────────────────┘
19
+
20
+
21
+ ┌─────────────────────────────────────────────────────────────┐
22
+ │ FASTAPI BACKEND │
23
+ │ ┌───────────────────────────────────────────────────┐ │
24
+ │ │ API ROUTERS │ │
25
+ │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
26
+ │ │ │ Auth │ │Prediction│ │Dashboard │ │ │
27
+ │ │ │ Router │ │ Router │ │ Router │ │ │
28
+ │ │ │ /api/auth│ │/api/pred │ │ /pages │ │ │
29
+ │ │ └──────────┘ └──────────┘ └──────────┘ │ │
30
+ │ └───────────────────────────────────────────────────┘ │
31
+ │ │ │
32
+ │ ▼ │
33
+ │ ┌───────────────────────────────────────────────────┐ │
34
+ │ │ SERVICES │ │
35
+ │ │ ┌──────────────┐ ┌──────────────┐ │ │
36
+ │ │ │ Auth │ │ ML │ │ │
37
+ │ │ │ Service │ │ Service │ │ │
38
+ │ │ │(JWT, bcrypt) │ │ (Model) │ │ │
39
+ │ │ └──────────────┘ └──────────────┘ │ │
40
+ │ │ ┌──────────────────────────────────┐ │ │
41
+ │ │ │ Visualization Service │ │ │
42
+ │ │ │ (WordCloud, Charts) │ │ │
43
+ │ │ └──────────────────────────────────┘ │ │
44
+ │ └───────────────────────────────────────────────────┘ │
45
+ │ │ │
46
+ │ ▼ │
47
+ │ ┌───────────────────────────────────────────────────┐ │
48
+ │ │ DATA LAYER │ │
49
+ │ │ ┌──────────┐ ┌──────────┐ │ │
50
+ │ │ │ SQLAlchemy│ │ Pydantic │ │ │
51
+ │ │ │ Models │ │ Schemas │ │ │
52
+ │ │ │(ORM Layer)│ │(Validation) │ │
53
+ │ │ └──���───────┘ └──────────┘ │ │
54
+ │ └───────────────────────────────────────────────────┘ │
55
+ └────────────────────────────│────────────────────────────────┘
56
+
57
+
58
+ ┌─────────────────────────────────────────────────────────────┐
59
+ │ DATABASE │
60
+ │ ┌──────────────────────┐ ┌──────────────────────┐ │
61
+ │ │ Users Table │ │ PredictionHistory │ │
62
+ │ │ - id (PK) │ │ - id (PK) │ │
63
+ │ │ - username │ │ - user_id (FK) │ │
64
+ │ │ - email │ │ - product_name │ │
65
+ │ │ - hashed_password │ │ - comment │ │
66
+ │ │ - created_at │ │ - predicted_rating │ │
67
+ │ │ │ │ - confidence_score │ │
68
+ │ │ │ │ - created_at │ │
69
+ │ └──────────────────────┘ └──────────────────────┘ │
70
+ │ SQLite Database │
71
+ └─────────────────────────────────────────────────────────────┘
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Request Flow Examples
77
+
78
+ ### 1️⃣ User Login Flow
79
+
80
+ ```
81
+ User enters credentials
82
+
83
+
84
+ [Login.html]
85
+
86
+
87
+ POST /api/auth/login
88
+
89
+
90
+ [Auth Router]
91
+
92
+
93
+ [Auth Service] ──► Verify password (bcrypt)
94
+ │ Generate JWT token
95
+
96
+ [Database] ──► Query User table
97
+
98
+
99
+ Return JWT token to frontend
100
+
101
+
102
+ Store token in localStorage
103
+
104
+
105
+ Redirect to /dashboard
106
+ ```
107
+
108
+ ### 2️⃣ Single Prediction Flow
109
+
110
+ ```
111
+ User enters comment
112
+
113
+
114
+ [Dashboard.html]
115
+
116
+
117
+ POST /api/predict/single
118
+ (with JWT token in header)
119
+
120
+
121
+ [Prediction Router]
122
+
123
+
124
+ [Auth Service] ──► Verify JWT token
125
+
126
+
127
+ [ML Service] ──► predict_single(comment)
128
+ │ (DUMMY: return random rating)
129
+
130
+ [Database] ──► Save to PredictionHistory
131
+
132
+
133
+ Return {rating, confidence}
134
+
135
+
136
+ Display result in UI
137
+ ```
138
+
139
+ ### 3️⃣ Batch CSV Prediction Flow
140
+
141
+ ```
142
+ User uploads CSV file
143
+
144
+
145
+ [Dashboard.html]
146
+
147
+
148
+ POST /api/predict/batch
149
+ (multipart/form-data)
150
+
151
+
152
+ [Prediction Router]
153
+
154
+
155
+ Parse CSV ──► Extract comments
156
+
157
+
158
+ [ML Service] ──► predict_batch(comments)
159
+ │ For each comment:
160
+ │ predict_single()
161
+
162
+ [Visualization Service]
163
+
164
+ ├──► generate_wordcloud()
165
+ │ Save PNG to /static/uploads/
166
+
167
+ └──► calculate_rating_distribution()
168
+ Count 1⭐, 2⭐, 3⭐, 4⭐, 5⭐
169
+
170
+
171
+ [Database] ──► Save all predictions
172
+
173
+
174
+ Return:
175
+ - wordcloud_url
176
+ - rating_distribution
177
+ - results array
178
+
179
+
180
+ [Dashboard.html]
181
+
182
+ ├──► Render Chart.js bar chart
183
+ ├──► Display word cloud image
184
+ ├──► Populate results table
185
+ └──► Enable CSV download
186
+ ```
187
+
188
+ ---
189
+
190
+ ## Technology Stack Details
191
+
192
+ ### Backend
193
+ ```
194
+ FastAPI (0.104.1)
195
+ ├── Auto-generates Swagger UI (/docs)
196
+ ├── Automatic data validation (Pydantic)
197
+ ├── Async support
198
+ └── Built-in dependency injection
199
+
200
+ SQLAlchemy (2.0.23)
201
+ ├── ORM for database operations
202
+ ├── Models: User, PredictionHistory
203
+ └── Automatic table creation
204
+
205
+ JWT Authentication
206
+ ├── python-jose for token generation
207
+ ├── passlib[bcrypt] for password hashing
208
+ └── OAuth2PasswordBearer for token validation
209
+ ```
210
+
211
+ ### Frontend
212
+ ```
213
+ Jinja2 Templates
214
+ ├── Server-side rendering
215
+ ├── Template inheritance (base.html)
216
+ └── Context variables from backend
217
+
218
+ TailwindCSS (CDN)
219
+ ├── Utility-first CSS framework
220
+ ├── Responsive design
221
+ └── Custom animations
222
+
223
+ Chart.js (CDN)
224
+ ├── Interactive bar charts
225
+ └── Rating distribution visualization
226
+
227
+ JavaScript (Vanilla)
228
+ ├── Fetch API for HTTP requests
229
+ ├── LocalStorage for JWT token
230
+ └── Dynamic DOM manipulation
231
+ ```
232
+
233
+ ### Visualization
234
+ ```
235
+ WordCloud (1.9.3)
236
+ ├── Generate word cloud images
237
+ ├── Vietnamese stopwords support
238
+ └── Save to PNG files
239
+
240
+ Matplotlib (3.8.2)
241
+ ├── Render word cloud to image
242
+ └── Non-GUI backend (Agg)
243
+ ```
244
+
245
+ ---
246
+
247
+ ## File Responsibilities
248
+
249
+ ### Backend Files
250
+ | File | Purpose |
251
+ |------|---------|
252
+ | `main.py` | FastAPI app initialization, router inclusion |
253
+ | `config.py` | Configuration (SECRET_KEY, products list) |
254
+ | `database.py` | SQLAlchemy engine, session management |
255
+ | `models.py` | Database table definitions (User, PredictionHistory) |
256
+ | `schemas.py` | Pydantic models for request/response validation |
257
+
258
+ ### Router Files
259
+ | File | Purpose |
260
+ |------|---------|
261
+ | `routers/auth.py` | Register, login, get current user |
262
+ | `routers/prediction.py` | Single/batch prediction, history |
263
+ | `routers/dashboard.py` | Serve HTML pages (login, register, dashboard) |
264
+
265
+ ### Service Files
266
+ | File | Purpose |
267
+ |------|---------|
268
+ | `services/auth_service.py` | JWT generation, password hashing, token validation |
269
+ | `services/ml_service.py` | ML model wrapper, prediction logic (DUMMY) |
270
+ | `services/visualization_service.py` | WordCloud generation, chart data |
271
+
272
+ ### Frontend Files
273
+ | File | Purpose |
274
+ |------|---------|
275
+ | `templates/base.html` | Base layout with navigation, CDN imports |
276
+ | `templates/login.html` | Login form with JWT handling |
277
+ | `templates/register.html` | Registration form |
278
+ | `templates/dashboard.html` | Main interface (product select, predictions, viz) |
279
+
280
+ ---
281
+
282
+ ## Security Features
283
+
284
+ 1. **Password Hashing:** bcrypt with salt
285
+ 2. **JWT Tokens:** Signed with SECRET_KEY (HS256)
286
+ 3. **Token Expiration:** 24 hours
287
+ 4. **Protected Routes:** Dependency injection (`get_current_user`)
288
+ 5. **CORS:** Configured for security
289
+ 6. **Input Validation:** Pydantic schemas
290
+
291
+ ---
292
+
293
+ ## Database Schema
294
+
295
+ ```sql
296
+ -- Users Table
297
+ CREATE TABLE users (
298
+ id INTEGER PRIMARY KEY,
299
+ username VARCHAR(50) UNIQUE NOT NULL,
300
+ email VARCHAR(100) UNIQUE NOT NULL,
301
+ hashed_password VARCHAR(255) NOT NULL,
302
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
303
+ );
304
+
305
+ -- PredictionHistory Table
306
+ CREATE TABLE prediction_history (
307
+ id INTEGER PRIMARY KEY,
308
+ user_id INTEGER NOT NULL,
309
+ product_name VARCHAR(200) NOT NULL,
310
+ comment TEXT NOT NULL,
311
+ predicted_rating INTEGER NOT NULL,
312
+ confidence_score FLOAT,
313
+ prediction_type VARCHAR(20) DEFAULT 'single',
314
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
315
+ FOREIGN KEY (user_id) REFERENCES users(id)
316
+ );
317
+ ```
318
+
319
+ ---
320
+
321
+ ## API Response Examples
322
+
323
+ ### POST /api/auth/login
324
+ ```json
325
+ {
326
+ "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
327
+ "token_type": "bearer"
328
+ }
329
+ ```
330
+
331
+ ### POST /api/predict/single
332
+ ```json
333
+ {
334
+ "predicted_rating": 5,
335
+ "confidence_score": 0.92,
336
+ "comment": "Sản phẩm rất tốt..."
337
+ }
338
+ ```
339
+
340
+ ### POST /api/predict/batch
341
+ ```json
342
+ {
343
+ "total_predictions": 20,
344
+ "rating_distribution": {
345
+ "1": 2,
346
+ "2": 3,
347
+ "3": 5,
348
+ "4": 6,
349
+ "5": 4
350
+ },
351
+ "wordcloud_url": "/static/uploads/wordclouds/wordcloud_20241125_143022.png",
352
+ "results": [
353
+ {
354
+ "Comment": "Sản phẩm tốt",
355
+ "Predicted_Rating": 5,
356
+ "Confidence": 0.95
357
+ }
358
+ ],
359
+ "csv_download_url": "/api/predict/download/1/1700924622.123"
360
+ }
361
+ ```
362
+
363
+ ---
364
+
365
+ ## Deployment Checklist
366
+
367
+ Before production:
368
+ - [ ] Change `SECRET_KEY` in config.py
369
+ - [ ] Set `reload=False` in uvicorn
370
+ - [ ] Configure CORS properly
371
+ - [ ] Use PostgreSQL instead of SQLite
372
+ - [ ] Add environment variables (.env file)
373
+ - [ ] Set up HTTPS
374
+ - [ ] Add rate limiting
375
+ - [ ] Configure logging
376
+ - [ ] Add error monitoring
377
+ - [ ] Set up backup strategy
378
+
379
+ ---
380
+
381
+ This architecture provides:
382
+ ✅ **Separation of Concerns**
383
+ ✅ **Scalability** (easy to add features)
384
+ ✅ **Maintainability** (clear file structure)
385
+ ✅ **Security** (JWT, password hashing)
386
+ ✅ **Documentation** (auto-generated Swagger)
387
+ ✅ **Testing** (clear API endpoints)
DEPLOYMENT.md ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Deployment Guide for Render.com
2
+
3
+ ## Pre-Deployment Checklist
4
+
5
+ - [x] Updated `requirements.txt` with `psycopg2-binary` and `gunicorn`
6
+ - [x] Modified `database.py` for hybrid SQLite/PostgreSQL support
7
+ - [x] Updated `config.py` to read `SECRET_KEY` from environment
8
+ - [x] Auto-migration enabled in `main.py`
9
+ - [ ] Push code to GitHub repository
10
+ - [ ] Create Render account
11
+
12
+ ---
13
+
14
+ ## 📦 Step 1: Prepare Your Repository
15
+
16
+ 1. **Commit all changes:**
17
+ ```bash
18
+ git add .
19
+ git commit -m "Prepare for Render deployment"
20
+ git push origin master
21
+ ```
22
+
23
+ 2. **Ensure these files exist:**
24
+ - ✅ `requirements.txt` (with psycopg2-binary, gunicorn)
25
+ - ✅ `main.py` (with Base.metadata.create_all)
26
+ - ✅ `app/database.py` (hybrid support)
27
+ - ✅ `app/config.py` (environment variables)
28
+
29
+ ---
30
+
31
+ ## 🌐 Step 2: Deploy on Render
32
+
33
+ ### A. Create New Web Service
34
+
35
+ 1. Go to https://dashboard.render.com/
36
+ 2. Click **"New +"** → **"Web Service"**
37
+ 3. Connect your GitHub repository
38
+ 4. Select your repository: `Predict-Rating-Web-App`
39
+
40
+ ### B. Configure Web Service
41
+
42
+ Fill in the following settings:
43
+
44
+ | Setting | Value |
45
+ |---------|-------|
46
+ | **Name** | `vietnamese-rating-prediction` (or your choice) |
47
+ | **Region** | Singapore / Oregon (closest to you) |
48
+ | **Branch** | `master` |
49
+ | **Root Directory** | (leave blank) |
50
+ | **Runtime** | `Python 3` |
51
+ | **Build Command** | `pip install -r requirements.txt` |
52
+ | **Start Command** | `gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT` |
53
+ | **Instance Type** | `Free` |
54
+
55
+ ### C. Add Environment Variables
56
+
57
+ Click **"Environment"** tab and add:
58
+
59
+ | Key | Value | Notes |
60
+ |-----|-------|-------|
61
+ | `SECRET_KEY` | `your-super-secret-random-key-here-2024` | Generate with: `openssl rand -hex 32` |
62
+ | `PYTHON_VERSION` | `3.11.0` | Specify Python version |
63
+
64
+ **DO NOT set `DATABASE_URL` manually** - Render will auto-create it when you add PostgreSQL.
65
+
66
+ ---
67
+
68
+ ## 🗄️ Step 3: Add PostgreSQL Database
69
+
70
+ ### A. Create Database
71
+
72
+ 1. In Render Dashboard, click **"New +"** → **"PostgreSQL"**
73
+ 2. Configure:
74
+ - **Name:** `vietnamese-rating-db`
75
+ - **Database:** `rating_prediction`
76
+ - **User:** (auto-generated)
77
+ - **Region:** Same as web service
78
+ - **PostgreSQL Version:** `15`
79
+ - **Instance Type:** `Free`
80
+
81
+ 3. Click **"Create Database"**
82
+
83
+ ### B. Link Database to Web Service
84
+
85
+ 1. Go back to your **Web Service**
86
+ 2. Click **"Environment"** tab
87
+ 3. Click **"Add Environment Variable"**
88
+ 4. Select **"Add from Database"**
89
+ 5. Choose your `vietnamese-rating-db`
90
+ 6. It will auto-populate `DATABASE_URL`
91
+
92
+ ### C. Verify Connection
93
+
94
+ The `database.py` will automatically:
95
+ - Detect `DATABASE_URL` environment variable
96
+ - Replace `postgres://` with `postgresql://`
97
+ - Connect to PostgreSQL
98
+ - Create all tables automatically
99
+
100
+ ---
101
+
102
+ ## 🎯 Step 4: Deploy & Monitor
103
+
104
+ ### A. Trigger Deployment
105
+
106
+ 1. After adding database, click **"Manual Deploy"** → **"Deploy latest commit"**
107
+ 2. Watch the build logs:
108
+ - ✅ Installing dependencies
109
+ - ✅ Creating database tables
110
+ - ✅ Starting Gunicorn server
111
+
112
+ ### B. Check Deployment Logs
113
+
114
+ Look for these success messages:
115
+ ```
116
+ 🚀 Running in PRODUCTION mode
117
+ 🔄 Creating database tables...
118
+ ✅ Database tables created successfully!
119
+ [INFO] Starting gunicorn
120
+ [INFO] Booting worker with pid: 123
121
+ ```
122
+
123
+ ### C. Access Your Application
124
+
125
+ Your app will be available at:
126
+ ```
127
+ https://vietnamese-rating-prediction.onrender.com
128
+ ```
129
+
130
+ **Important endpoints:**
131
+ - **Dashboard:** `https://your-app.onrender.com/dashboard`
132
+ - **API Docs (Swagger):** `https://your-app.onrender.com/docs`
133
+ - **Health Check:** `https://your-app.onrender.com/health`
134
+
135
+ ---
136
+
137
+ ## 🔍 Troubleshooting
138
+
139
+ ### Issue 1: "Module not found" errors
140
+ **Solution:** Ensure all imports are in `requirements.txt`
141
+ ```bash
142
+ pip freeze > requirements.txt
143
+ ```
144
+
145
+ ### Issue 2: "Connection refused" to database
146
+ **Solution:**
147
+ - Verify `DATABASE_URL` is set in environment variables
148
+ - Check database status in Render dashboard
149
+ - Restart web service
150
+
151
+ ### Issue 3: "Port binding" errors
152
+ **Solution:** Use `$PORT` environment variable:
153
+ ```bash
154
+ gunicorn main:app --bind 0.0.0.0:$PORT
155
+ ```
156
+
157
+ ### Issue 4: ML model takes too long to load
158
+ **Solution:** Render Free Tier has limited RAM (512MB). Consider:
159
+ - Using a lighter model
160
+ - Lazy loading (load model on first request)
161
+ - Upgrading to Starter plan ($7/month)
162
+
163
+ ### Issue 5: Static files not loading
164
+ **Solution:** Ensure `app/static/` directory exists and is committed to git
165
+
166
+ ---
167
+
168
+ ## ⚙️ Alternative Start Commands
169
+
170
+ ### Option 1: Basic Uvicorn (Single Worker)
171
+ ```bash
172
+ uvicorn main:app --host 0.0.0.0 --port $PORT
173
+ ```
174
+
175
+ ### Option 2: Gunicorn with Uvicorn Workers (Recommended)
176
+ ```bash
177
+ gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT
178
+ ```
179
+
180
+ ### Option 3: Gunicorn with Auto-scaling Workers
181
+ ```bash
182
+ gunicorn main:app --workers 2 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT --timeout 120
183
+ ```
184
+
185
+ ---
186
+
187
+ ## 📊 Performance Optimization
188
+
189
+ ### 1. Reduce Model Loading Time
190
+ Edit `app/services/ml_service.py`:
191
+ ```python
192
+ # Lazy load model on first request instead of on startup
193
+ class MLPredictionService:
194
+ def __init__(self):
195
+ self.model = None
196
+ self.tokenizer = None
197
+
198
+ def _ensure_loaded(self):
199
+ if self.model is None:
200
+ # Load model here
201
+ pass
202
+ ```
203
+
204
+ ### 2. Enable Connection Pooling
205
+ Already configured in `database.py`:
206
+ ```python
207
+ engine = create_engine(
208
+ DATABASE_URL,
209
+ pool_pre_ping=True,
210
+ pool_recycle=300
211
+ )
212
+ ```
213
+
214
+ ### 3. Use Caching for Predictions
215
+ Consider adding Redis (Render add-on) for caching frequent predictions.
216
+
217
+ ---
218
+
219
+ ## 🔒 Security Checklist
220
+
221
+ - [ ] Set strong `SECRET_KEY` in environment variables
222
+ - [ ] Restrict CORS origins in production (edit `main.py`)
223
+ - [ ] Enable HTTPS (automatic on Render)
224
+ - [ ] Set up database backups (Render PostgreSQL backups)
225
+ - [ ] Add rate limiting (consider using Render's DDoS protection)
226
+ - [ ] Review and sanitize all user inputs
227
+
228
+ ---
229
+
230
+ ## 💰 Cost Breakdown (Free Tier)
231
+
232
+ | Service | Cost | Limitations |
233
+ |---------|------|-------------|
234
+ | Web Service | FREE | 512MB RAM, Sleeps after 15min inactivity |
235
+ | PostgreSQL | FREE | 1GB storage, 97 connections |
236
+ | Bandwidth | FREE | 100GB/month |
237
+
238
+ **Upgrade Considerations:**
239
+ - If app sleeps: Upgrade to Starter ($7/month, always-on)
240
+ - If RAM issues: Upgrade to Standard ($25/month, 2GB RAM)
241
+ - If storage full: Upgrade database ($7/month, 10GB)
242
+
243
+ ---
244
+
245
+ ## 🎓 Post-Deployment Testing
246
+
247
+ ### Test 1: Health Check
248
+ ```bash
249
+ curl https://your-app.onrender.com/health
250
+ ```
251
+ Expected: `{"status":"healthy","service":"rating-prediction","version":"1.0.0"}`
252
+
253
+ ### Test 2: Swagger UI
254
+ Visit: `https://your-app.onrender.com/docs`
255
+ - Try registering a user
256
+ - Login to get JWT token
257
+ - Test prediction endpoints
258
+
259
+ ### Test 3: Database Connection
260
+ Check logs for:
261
+ ```
262
+ 🚀 Production Mode: Using PostgreSQL
263
+ ✅ Database tables created successfully!
264
+ ```
265
+
266
+ ---
267
+
268
+ ## 📚 Additional Resources
269
+
270
+ - **Render Docs:** https://render.com/docs/deploy-fastapi
271
+ - **PostgreSQL Guide:** https://render.com/docs/databases
272
+ - **Environment Variables:** https://render.com/docs/environment-variables
273
+ - **Custom Domains:** https://render.com/docs/custom-domains
274
+
275
+ ---
276
+
277
+ ## 🆘 Support
278
+
279
+ If you encounter issues:
280
+ 1. Check Render logs (Dashboard → Logs tab)
281
+ 2. Review this guide carefully
282
+ 3. Check Render community forum: https://community.render.com/
283
+ 4. Contact Render support (for paid plans)
284
+
285
+ ---
286
+
287
+ **Good luck with your deployment! 🚀**
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # Dockerfile for Hugging Face Spaces (Docker SDK)
3
+ # Optimized for FastAPI + Heavy ML Model (>500MB)
4
+ # ============================================
5
+
6
+ FROM python:3.10-slim
7
+
8
+ # Set environment variables
9
+ ENV PYTHONUNBUFFERED=1 \
10
+ PYTHONDONTWRITEBYTECODE=1 \
11
+ PIP_NO_CACHE_DIR=1 \
12
+ PIP_DISABLE_PIP_VERSION_CHECK=1
13
+
14
+ # Create non-root user (REQUIRED by Hugging Face Spaces)
15
+ # HF Spaces runs containers as user ID 1000
16
+ RUN useradd -m -u 1000 user
17
+
18
+ # Set working directory
19
+ WORKDIR /app
20
+
21
+ # Install system dependencies
22
+ RUN apt-get update && apt-get install -y \
23
+ build-essential \
24
+ gcc \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ # Copy requirements first (for better Docker layer caching)
28
+ COPY --chown=user:user requirements.txt .
29
+
30
+ # Install Python dependencies as root (before switching to user)
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # Copy application code
34
+ COPY --chown=user:user . .
35
+
36
+ # Create necessary directories with proper permissions
37
+ RUN mkdir -p /app/app/static/uploads/wordclouds && \
38
+ mkdir -p /app/app/database && \
39
+ chmod -R 777 /app/app/static/uploads && \
40
+ chmod -R 777 /app/app/database
41
+
42
+ # Switch to non-root user
43
+ USER user
44
+
45
+ # Expose port 7860 (REQUIRED by Hugging Face Spaces)
46
+ EXPOSE 7860
47
+
48
+ # Health check (optional but recommended)
49
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
50
+ CMD python -c "import requests; requests.get('http://localhost:7860/docs')"
51
+
52
+ # Start the FastAPI application
53
+ # CRITICAL: Must listen on 0.0.0.0:7860 for Hugging Face Spaces
54
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
FIX_OOM_RENDER.md ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚨 URGENT FIX: Out of Memory on Render
2
+
3
+ ## Problem
4
+ ```
5
+ ==> Out of memory (used over 512Mi)
6
+ ```
7
+
8
+ Render Free Tier has **512MB RAM limit**. PhoBERT model is too heavy to load on startup.
9
+
10
+ ---
11
+
12
+ ## ✅ Solution Applied: Lazy Loading
13
+
14
+ ### Changes Made
15
+
16
+ **File: `app/services/ml_service.py`**
17
+ - ✅ Model now loads **on first request** instead of on startup
18
+ - ✅ Reduces initial memory footprint
19
+ - ✅ Imports (torch, transformers) only when needed
20
+
21
+ ---
22
+
23
+ ## 📝 Update Render Configuration
24
+
25
+ ### Step 1: Change Start Command
26
+
27
+ Go to Render Dashboard → Your Web Service → Settings
28
+
29
+ **OLD Start Command:**
30
+ ```bash
31
+ gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT
32
+ ```
33
+
34
+ **NEW Start Command (Reduce workers from 4 → 1):**
35
+ ```bash
36
+ gunicorn main:app --workers 1 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT --timeout 120
37
+ ```
38
+
39
+ ### Step 2: Push Updated Code
40
+
41
+ ```bash
42
+ git add app/services/ml_service.py
43
+ git commit -m "Fix: Lazy load ML model to avoid OOM on Render"
44
+ git push origin master
45
+ ```
46
+
47
+ ### Step 3: Redeploy
48
+
49
+ 1. Go to Render Dashboard
50
+ 2. Click **"Manual Deploy"** → **"Clear build cache & deploy"**
51
+ 3. Wait for deployment (will take 5-10 minutes)
52
+
53
+ ---
54
+
55
+ ## 🔍 Expected Behavior After Fix
56
+
57
+ ### On Startup (Fast):
58
+ ```
59
+ ✅ ML Service initialized (model will load on first request)
60
+ 🚀 Running in PRODUCTION mode
61
+ ✅ Database tables created successfully!
62
+ [INFO] Starting gunicorn
63
+ ```
64
+
65
+ ### On First Prediction Request (Slow - 30-60 seconds):
66
+ ```
67
+ 🔄 Loading ML model (first request)...
68
+ 📍 Using device: cpu
69
+ 📦 Loading tokenizer...
70
+ 🧠 Loading PhoBERT model...
71
+ ⚙️ Loading trained weights...
72
+ ✅ Model loaded successfully!
73
+ ```
74
+
75
+ ### Subsequent Requests (Fast):
76
+ Model is already loaded, predictions are instant.
77
+
78
+ ---
79
+
80
+ ## ⚠️ Important Notes
81
+
82
+ ### 1. First Request Will Be Slow
83
+ - User must wait 30-60 seconds for first prediction
84
+ - Consider adding loading spinner in frontend
85
+ - Or call `/health` endpoint on deploy to pre-load model
86
+
87
+ ### 2. Free Tier Limitations
88
+ If still getting OOM errors, consider:
89
+ - ✅ Use quantized model (smaller size)
90
+ - ✅ Upgrade to Starter ($7/month, 512MB → 2GB RAM)
91
+ - ✅ Deploy model separately (separate service)
92
+ - ✅ Use CPU-only PyTorch build
93
+
94
+ ### 3. Model Files Must Exist
95
+ Ensure these files are in repository:
96
+ - `app/services/Model/phoBERT_multi_class_tokenizer/`
97
+ - `app/services/Model/best_phoBER.pth`
98
+
99
+ ---
100
+
101
+ ## 🧪 Test Locally First
102
+
103
+ ```bash
104
+ python main.py
105
+ ```
106
+
107
+ Expected output:
108
+ ```
109
+ ✅ ML Service initialized (model will load on first request)
110
+ 🔧 Development Mode: Using SQLite
111
+ ```
112
+
113
+ Then test prediction endpoint - model will load on first request.
114
+
115
+ ---
116
+
117
+ ## 📊 Memory Usage Comparison
118
+
119
+ | Configuration | Startup Memory | With Model Loaded |
120
+ |---------------|----------------|-------------------|
121
+ | **Before (Eager)** | ~450MB | ~550MB (OOM) |
122
+ | **After (Lazy)** | ~150MB | ~450MB (OK) |
123
+
124
+ ---
125
+
126
+ ## 🆘 If Still Getting OOM
127
+
128
+ ### Option 1: Use Dummy Model (Testing)
129
+ Temporarily use dummy predictions to verify deployment works:
130
+
131
+ Edit `app/services/ml_service.py`:
132
+ ```python
133
+ def predict_single(self, text: str) -> Dict[str, Any]:
134
+ # Skip model loading for testing
135
+ return {
136
+ 'rating': 4, # Dummy rating
137
+ 'confidence': 0.85
138
+ }
139
+ ```
140
+
141
+ ### Option 2: Upgrade Render Plan
142
+ - Starter: $7/month, 2GB RAM
143
+ - Standard: $25/month, 4GB RAM
144
+
145
+ ### Option 3: Deploy Model Separately
146
+ Use external ML API service (AWS Lambda, Hugging Face Inference API, etc.)
147
+
148
+ ---
149
+
150
+ **After making these changes, try deploying again!**
HF_ARCHITECTURE_DIAGRAM.md ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🏗️ Hugging Face Spaces Deployment Architecture
2
+
3
+ ## 📊 High-Level Architecture
4
+
5
+ ```
6
+ ┌─────────────────────────────────────────────────────────────┐
7
+ │ HUGGING FACE SPACES │
8
+ │ (16GB RAM - Free) │
9
+ │ │
10
+ │ ┌───────────────────────────────────────────────────┐ │
11
+ │ │ Docker Container │ │
12
+ │ │ (User ID: 1000) │ │
13
+ │ │ │ │
14
+ │ │ ┌─────────────────────────────────────────┐ │ │
15
+ │ │ │ FastAPI Application │ │ │
16
+ │ │ │ (Port 7860) │ │ │
17
+ │ │ │ │ │ │
18
+ │ │ │ ┌──────────────┐ ┌──────────────┐ │ │ │
19
+ │ │ │ │ Uvicorn │ │ PhoBERT │ │ │ │
20
+ │ │ │ │ Server │ │ Model │ │ │ │
21
+ │ │ │ │ │ │ (~500MB) │ │ │ │
22
+ │ │ │ └──────────────┘ └──────────────┘ │ │ │
23
+ │ │ │ │ │ │
24
+ │ │ │ ┌──────────────┐ ┌──────────────┐ │ │ │
25
+ │ │ │ │ Jinja2 │ │ WordCloud │ │ │ │
26
+ │ │ │ │ Templates │ │ Generator │ │ │ │
27
+ │ │ │ └──────────────┘ └──────────────┘ │ │ │
28
+ │ │ └─────────────────────────────────────────┘ │ │
29
+ │ │ │ │
30
+ │ │ Environment Variables (from HF Secrets): │ │
31
+ │ │ - DATABASE_URL │ │
32
+ │ │ - SECRET_KEY │ │
33
+ │ └───────────────────────────────────────────────────┘ │
34
+ └─────────────────────────────────────────────────────────────┘
35
+
36
+ │ HTTPS
37
+
38
+ ┌────────────────────────┐
39
+ │ Users │
40
+ │ (Web Browsers) │
41
+ └────────────────────────┘
42
+
43
+
44
+
45
+ ┌─────────────────────────────────────────────────────────────┐
46
+ │ External PostgreSQL Database │
47
+ │ (Render / Neon / Other) │
48
+ │ │
49
+ │ ┌──────────────┐ ┌──────────────────────┐ │
50
+ │ │ Users │ │ PredictionHistory │ │
51
+ │ │ Table │────────▶│ Table │ │
52
+ │ │ │ FK │ │ │
53
+ │ └──────────────┘ └──────────────────────┘ │
54
+ └─────────────────────────────────────────────────────────────┘
55
+ ```
56
+
57
+ ---
58
+
59
+ ## 🔄 Request Flow
60
+
61
+ ```
62
+ 1. User visits Space URL
63
+ └─▶ https://huggingface.co/spaces/USERNAME/SPACE_NAME
64
+
65
+
66
+ 2. Hugging Face routes to Docker container (port 7860)
67
+
68
+
69
+ 3. Uvicorn receives HTTP request
70
+
71
+ ├─▶ GET /docs → Swagger UI
72
+ ├��▶ GET /dashboard → Jinja2 Template + TailwindCSS
73
+ ├─▶ POST /api/auth/login → JWT Token
74
+ ├─▶ POST /api/predict/single → PhoBERT Model
75
+ └─▶ POST /api/predict/batch → CSV Processing + WordCloud
76
+
77
+
78
+ 4. Database query (if needed)
79
+ └─▶ PostgreSQL on Render/Neon (via DATABASE_URL)
80
+
81
+
82
+ 5. Response returned to user
83
+ └─▶ JSON (API) or HTML (Pages)
84
+ ```
85
+
86
+ ---
87
+
88
+ ## 🐳 Docker Build Process
89
+
90
+ ```
91
+ 1. Dockerfile Instructions
92
+
93
+ ├─▶ FROM python:3.10-slim
94
+ │ └─ Base image (~150MB)
95
+
96
+ ├─▶ RUN useradd -m -u 1000 user
97
+ │ └─ Create non-root user (HF requirement)
98
+
99
+ ├─▶ COPY requirements.txt
100
+ │ └─ Copy dependencies first
101
+
102
+ ├─▶ RUN pip install -r requirements.txt
103
+ │ └─ Install packages (~2GB with PyTorch)
104
+
105
+ ├─▶ COPY --chown=user:user . .
106
+ │ └─ Copy application code
107
+
108
+ ├─▶ RUN chmod -R 777 /app/app/static/uploads
109
+ │ └─ Set write permissions
110
+
111
+ ├─▶ USER user
112
+ │ └─ Switch to non-root user
113
+
114
+ └─▶ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
115
+ └─ Start application
116
+
117
+ Total Build Time: 5-10 minutes
118
+ Final Image Size: ~2.5GB
119
+ ```
120
+
121
+ ---
122
+
123
+ ## 🔐 Security Layer
124
+
125
+ ```
126
+ ┌─────────────────────────────────────────────────────────────┐
127
+ │ Security Features │
128
+ ├─────────────────────────────────────────────────────────────┤
129
+ │ │
130
+ │ 1. Authentication Layer │
131
+ │ ├─ JWT Tokens (24h expiration) │
132
+ │ ├─ Bcrypt password hashing │
133
+ │ └─ OAuth2 Bearer scheme │
134
+ │ │
135
+ │ 2. Network Security │
136
+ │ ├─ HTTPS (provided by HF) │
137
+ │ ├─ CORS configuration │
138
+ │ └─ PostgreSQL SSL (sslmode=require) │
139
+ │ │
140
+ │ 3. Secret Management │
141
+ │ ├─ Environment variables (HF Secrets) │
142
+ │ ├─ No hardcoded credentials │
143
+ │ └─ .dockerignore excludes .env │
144
+ │ │
145
+ │ 4. Container Security │
146
+ │ ├─ Non-root user (UID 1000) │
147
+ │ ├─ Read-only filesystem (except uploads) │
148
+ │ └─ Minimal base image │
149
+ │ │
150
+ └─────────────────────────────────────────────────────────────┘
151
+ ```
152
+
153
+ ---
154
+
155
+ ## 💾 Storage Architecture
156
+
157
+ ```
158
+ ┌─────────────────────────────────────────────────────────────┐
159
+ │ Storage Locations │
160
+ ├─────────────────────────────────────────────────────────────┤
161
+ │ │
162
+ │ Container Storage (Ephemeral - Resets on rebuild) │
163
+ │ ├─ /app/app/static/uploads/wordclouds/ │
164
+ │ │ └─ Word cloud images (temporary) │
165
+ │ └─ /app/app/database/ │
166
+ │ └─ SQLite fallback (dev only) │
167
+ │ │
168
+ │ External Storage (Persistent) │
169
+ │ └─ PostgreSQL Database (Render/Neon) │
170
+ │ ├─ users table │
171
+ │ ├─ prediction_history table │
172
+ │ └─ All user data & predictions │
173
+ │ │
174
+ │ Future Enhancements (Optional) │
175
+ │ └─ S3 / Cloudinary for file uploads │
176
+ │ └─ Persistent word clouds & CSVs │
177
+ │ │
178
+ └─────────────────────────────────────────────────────────────┘
179
+ ```
180
+
181
+ ---
182
+
183
+ ## 🔌 Connection Flow
184
+
185
+ ```
186
+ ┌────────────────────────────────────────────────────────────┐
187
+ │ Database Connection Logic │
188
+ └────────────────────────────────────────────────────────────┘
189
+
190
+
191
+ ┌─────────────────────┐
192
+ │ app/database.py │
193
+ └─────────────────────┘
194
+
195
+
196
+ ┌─────────────────────────────────────┐
197
+ │ Check os.getenv("DATABASE_URL") │
198
+ └─────────────────────────────────────┘
199
+
200
+ ┌──────┴──────┐
201
+ │ │
202
+ ▼ ▼
203
+ ✅ Found ❌ Not Found
204
+ │ │
205
+ │ ▼
206
+ │ ┌──────────────────┐
207
+ │ │ Use SQLite │
208
+ │ │ (Local Dev) │
209
+ │ └──────────────────┘
210
+
211
+
212
+ ┌──────────────────────────┐
213
+ │ Fix postgres:// URL │
214
+ │ (replace with │
215
+ │ postgresql://) │
216
+ └──────────────────────────┘
217
+
218
+
219
+ ┌──────────────────────────┐
220
+ │ Connect to PostgreSQL │
221
+ │ (Production on HF) │
222
+ └──────────────────────────┘
223
+ ```
224
+
225
+ ---
226
+
227
+ ## 📈 Scalability Considerations
228
+
229
+ ```
230
+ Current Setup (Free Tier):
231
+ ├─ 16GB RAM (sufficient for >500MB model)
232
+ ├─ Shared CPU (adequate for moderate traffic)
233
+ └─ Unlimited uptime (99.9% availability)
234
+
235
+ If Scaling Needed:
236
+ ├─ Upgrade to Pro Space ($9/month)
237
+ │ └─ Better CPU, more RAM, priority support
238
+ ├─ Database scaling
239
+ │ └─ Upgrade PostgreSQL plan on Render/Neon
240
+ ├─ Add caching layer
241
+ │ └─ Redis for frequent queries
242
+ └─ Consider load balancing
243
+ └─ Multiple Space instances (advanced)
244
+ ```
245
+
246
+ ---
247
+
248
+ ## 🔄 Deployment Workflow
249
+
250
+ ```
251
+ ┌──────────────────────────────────────────────────────────┐
252
+ │ Local Development │
253
+ │ ├─ Edit code │
254
+ │ ├─ Test with SQLite │
255
+ │ └─ Commit to Git │
256
+ └──────────────────────────────────────────────────────────┘
257
+
258
+
259
+ ┌──────────────────────────────────────────────────────────┐
260
+ │ Push to Hugging Face │
261
+ │ git push origin main │
262
+ └──────────────────────────────────────────────────────────┘
263
+
264
+
265
+ ┌──────────────────────────────────────────────────────────┐
266
+ │ HF Spaces Auto-Build │
267
+ │ ├─ Pull latest code │
268
+ │ ├─ Build Docker image (5-10 min) │
269
+ │ ├─ Run container on port 7860 │
270
+ │ └─ Inject environment variables │
271
+ └──────────────────────────────────────────────────────────┘
272
+
273
+
274
+ ┌──────────────────────────────────────────────────────────┐
275
+ │ Application Running │
276
+ │ ├─ Connect to PostgreSQL │
277
+ │ ├─ Load ML model into memory │
278
+ │ ├─ Start Uvicorn server │
279
+ │ └─ Ready to serve requests │
280
+ └──────────────────────────────────────────────────────────┘
281
+ ```
282
+
283
+ ---
284
+
285
+ ## 📊 Resource Usage
286
+
287
+ ```
288
+ Component Memory CPU Disk
289
+ ─────────────────────────────────────────────────
290
+ Base Image ~150MB - ~150MB
291
+ Python Dependencies ~2GB - ~2GB
292
+ PhoBERT Model ~500MB High ~500MB
293
+ Application Code ~50MB Low ~50MB
294
+ Runtime Data ~100MB Medium ~100MB
295
+ ─────────────────────────────────────────────────
296
+ TOTAL (approx) ~2.8GB - ~2.8GB
297
+
298
+ Hugging Face Provides: 16GB RAM (plenty of headroom)
299
+ ```
300
+
301
+ ---
302
+
303
+ ## 🎯 Key Architectural Decisions
304
+
305
+ ### Why Docker SDK?
306
+ ✅ Heavy ML model (>500MB) needs more than 512MB RAM
307
+ ✅ Full control over environment
308
+ ✅ 16GB RAM on free tier
309
+
310
+ ### Why External Database?
311
+ ✅ Container is ephemeral (resets on rebuild)
312
+ ✅ PostgreSQL provides persistence
313
+ ✅ Easy to scale independently
314
+
315
+ ### Why Port 7860?
316
+ ✅ Hugging Face Spaces requirement
317
+ ✅ Auto-routed by HF infrastructure
318
+ ✅ HTTPS provided automatically
319
+
320
+ ### Why Non-Root User?
321
+ ✅ Security best practice
322
+ ✅ Hugging Face Spaces requirement
323
+ ✅ UID 1000 is standard
324
+
325
+ ---
326
+
327
+ **This architecture provides:**
328
+ - ✅ High availability (99.9% uptime)
329
+ - ✅ Sufficient resources (16GB RAM)
330
+ - ✅ Secure deployment (JWT, SSL, non-root)
331
+ - ✅ Persistent storage (external DB)
332
+ - ✅ Cost-effective (free tier)
HF_DEPLOYMENT_CHECKLIST.md ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ✅ Hugging Face Spaces Deployment Checklist
2
+
3
+ ## Pre-Deployment (Local Setup)
4
+
5
+ ### 1. Database Preparation
6
+ - [ ] Create external PostgreSQL database (Render/Neon)
7
+ - [ ] Test database connection locally
8
+ - [ ] Run database migrations (if any)
9
+ - [ ] Create initial admin user (optional)
10
+
11
+ ### 2. Code Preparation
12
+ - [ ] Review `Dockerfile` (port 7860, user permissions)
13
+ - [ ] Verify `requirements.txt` has all dependencies
14
+ - [ ] Check `database.py` hybrid connection logic
15
+ - [ ] Test application locally with Docker
16
+ - [ ] Generate strong `SECRET_KEY`
17
+
18
+ ### 3. Files to Push
19
+ - [ ] `Dockerfile` (CRITICAL)
20
+ - [ ] `requirements.txt`
21
+ - [ ] `main.py`
22
+ - [ ] `app/` directory (all modules)
23
+ - [ ] `README_HF_SPACE.md` (rename to README.md)
24
+ - [ ] `.dockerignore`
25
+
26
+ ### 4. Files to EXCLUDE
27
+ - [ ] `.env` files (secrets)
28
+ - [ ] `env/` or `venv/` directories
29
+ - [ ] `__pycache__/` directories
30
+ - [ ] Local `.db` files
31
+ - [ ] `app/static/uploads/` temporary files
32
+
33
+ ---
34
+
35
+ ## Hugging Face Spaces Setup
36
+
37
+ ### 1. Create New Space
38
+ - [ ] Go to https://huggingface.co/new-space
39
+ - [ ] Choose a memorable Space name
40
+ - [ ] Select **Docker** SDK
41
+ - [ ] Choose **CPU Basic** (16GB RAM - Free Tier)
42
+ - [ ] Set visibility (Public/Private)
43
+ - [ ] Click **Create Space**
44
+
45
+ ### 2. Configure Environment Variables
46
+ Navigate to **Settings** → **Repository Secrets**
47
+
48
+ **Required Secrets:**
49
+ - [ ] Add `DATABASE_URL`
50
+ ```
51
+ postgresql://user:pass@host:port/database
52
+ ```
53
+ - [ ] Add `SECRET_KEY`
54
+ ```
55
+ (generated random string, 32+ chars)
56
+ ```
57
+
58
+ **Verify:**
59
+ - [ ] Secrets show as `***` (hidden)
60
+ - [ ] No typos in variable names
61
+ - [ ] DATABASE_URL starts with `postgresql://`
62
+
63
+ ---
64
+
65
+ ## Deployment
66
+
67
+ ### 1. Push Code to HF Space
68
+ ```bash
69
+ # Clone your Space repository
70
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
71
+ cd YOUR_SPACE
72
+
73
+ # Copy project files
74
+ # (Exclude env/, __pycache__, .db files)
75
+
76
+ # IMPORTANT: Rename README
77
+ cp README_HF_SPACE.md README.md
78
+
79
+ # Initialize git (if needed)
80
+ git init
81
+ git remote add origin https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
82
+
83
+ # Commit and push
84
+ git add .
85
+ git commit -m "Initial deployment to Hugging Face Spaces"
86
+ git push -u origin main
87
+ ```
88
+
89
+ ### 2. Monitor Build
90
+ - [ ] Go to your Space URL
91
+ - [ ] Click **Logs** tab
92
+ - [ ] Watch Docker build process
93
+ - [ ] Wait for "Running on http://0.0.0.0:7860" message
94
+ - [ ] Build time: ~5-10 minutes
95
+
96
+ ### 3. Expected Build Stages
97
+ ```
98
+ ✅ Building Docker image...
99
+ ✅ Installing dependencies from requirements.txt...
100
+ ✅ Downloading PyTorch & Transformers (~2GB)...
101
+ ✅ Creating user 'user' (UID 1000)...
102
+ ✅ Setting permissions...
103
+ ✅ Starting uvicorn server...
104
+ ✅ Application running on port 7860
105
+ ```
106
+
107
+ ---
108
+
109
+ ## Post-Deployment Verification
110
+
111
+ ### 1. Check Application Status
112
+ - [ ] Space shows "Running" status (green)
113
+ - [ ] No errors in Logs tab
114
+ - [ ] Access Space URL (opens app)
115
+
116
+ ### 2. Test Database Connection
117
+ Expected log message:
118
+ ```
119
+ 🚀 Production Mode: Using PostgreSQL
120
+ ```
121
+
122
+ If you see this instead:
123
+ ```
124
+ 🔧 Development Mode: Using SQLite
125
+ ```
126
+ → DATABASE_URL is missing or incorrect
127
+
128
+ ### 3. Test Core Functionality
129
+ - [ ] Access `/docs` (Swagger UI loads)
130
+ - [ ] Register a new user
131
+ - [ ] Login successfully
132
+ - [ ] Access dashboard
133
+ - [ ] Make a single prediction
134
+ - [ ] Upload CSV for batch prediction
135
+ - [ ] View prediction history
136
+ - [ ] Word cloud generates
137
+ - [ ] Charts display correctly
138
+
139
+ ### 4. Security Verification
140
+ - [ ] Cannot access protected routes without JWT
141
+ - [ ] Passwords are hashed (check database)
142
+ - [ ] JWT tokens expire after 24 hours
143
+ - [ ] HTTPS is enabled (HF provides this)
144
+
145
+ ---
146
+
147
+ ## Troubleshooting
148
+
149
+ ### Issue: Build Failed
150
+ **Check:**
151
+ - [ ] Dockerfile syntax errors
152
+ - [ ] Missing dependencies in requirements.txt
153
+ - [ ] Python version compatibility
154
+ - [ ] Check Logs for specific error
155
+
156
+ ### Issue: "Application startup failed"
157
+ **Check:**
158
+ - [ ] DATABASE_URL is set correctly
159
+ - [ ] Database is accessible (not firewalled)
160
+ - [ ] SECRET_KEY is set
161
+ - [ ] Port 7860 is used in CMD
162
+
163
+ ### Issue: "502 Bad Gateway"
164
+ **Check:**
165
+ - [ ] App is still starting (wait 2-3 min)
166
+ - [ ] Heavy model loading in progress
167
+ - [ ] Check Logs for crash/errors
168
+
169
+ ### Issue: Database Connection Error
170
+ **Check:**
171
+ - [ ] DATABASE_URL format is correct
172
+ - [ ] Database host is reachable
173
+ - [ ] Username/password are correct
174
+ - [ ] Database allows external connections
175
+
176
+ ### Issue: JWT Token Invalid
177
+ **Check:**
178
+ - [ ] SECRET_KEY is set correctly
179
+ - [ ] SECRET_KEY hasn't changed
180
+ - [ ] Token hasn't expired (24h)
181
+ - [ ] Clear browser localStorage
182
+
183
+ ---
184
+
185
+ ## Maintenance
186
+
187
+ ### Regular Tasks
188
+ - [ ] Monitor Space usage (CPU/Memory)
189
+ - [ ] Check application logs weekly
190
+ - [ ] Rotate SECRET_KEY every 90 days
191
+ - [ ] Backup PostgreSQL database regularly
192
+ - [ ] Update dependencies monthly
193
+
194
+ ### Updating the App
195
+ ```bash
196
+ # Make changes locally
197
+ git add .
198
+ git commit -m "Update: description"
199
+ git push
200
+
201
+ # HF will automatically rebuild
202
+ # Monitor Logs tab for build status
203
+ ```
204
+
205
+ ### Scaling Considerations
206
+ If you exceed Free Tier limits:
207
+ - [ ] Upgrade to **Pro** Space (better hardware)
208
+ - [ ] Consider upgrading database plan
209
+ - [ ] Implement caching (Redis)
210
+ - [ ] Optimize model loading
211
+
212
+ ---
213
+
214
+ ## Performance Optimization
215
+
216
+ ### For Heavy Models
217
+ - [ ] Use model quantization (reduces size)
218
+ - [ ] Cache model in memory (don't reload)
219
+ - [ ] Use CPU inference (GPU costs more)
220
+ - [ ] Implement request queuing
221
+
222
+ ### For High Traffic
223
+ - [ ] Add rate limiting
224
+ - [ ] Implement Redis caching
225
+ - [ ] Use CDN for static files
226
+ - [ ] Optimize database queries
227
+ - [ ] Add connection pooling
228
+
229
+ ---
230
+
231
+ ## Security Hardening
232
+
233
+ ### Production Checklist
234
+ - [ ] Use strong SECRET_KEY (32+ chars)
235
+ - [ ] Enable DATABASE SSL (sslmode=require)
236
+ - [ ] Implement rate limiting
237
+ - [ ] Add CORS restrictions
238
+ - [ ] Log all authentication attempts
239
+ - [ ] Implement password strength requirements
240
+ - [ ] Add 2FA (future enhancement)
241
+ - [ ] Regular security audits
242
+
243
+ ---
244
+
245
+ ## Rollback Plan
246
+
247
+ If deployment fails:
248
+
249
+ ### Option 1: Revert Git Commit
250
+ ```bash
251
+ git revert HEAD
252
+ git push
253
+ ```
254
+
255
+ ### Option 2: Delete and Recreate Space
256
+ 1. Delete current Space
257
+ 2. Create new Space with same name
258
+ 3. Re-add environment variables
259
+ 4. Push working version
260
+
261
+ ### Option 3: Use Previous Docker Image
262
+ HF keeps previous builds for Pro users
263
+
264
+ ---
265
+
266
+ ## Success Criteria
267
+
268
+ Deployment is successful when:
269
+ - ✅ Space status is "Running"
270
+ - ✅ No errors in Logs
271
+ - ✅ PostgreSQL connection established
272
+ - ✅ All API endpoints respond
273
+ - ✅ Frontend loads correctly
274
+ - ✅ Users can register and login
275
+ - ✅ Predictions work (single + batch)
276
+ - ✅ Visualizations generate
277
+ - ✅ JWT authentication works
278
+
279
+ ---
280
+
281
+ ## Support Resources
282
+
283
+ - 📖 [Hugging Face Spaces Docs](https://huggingface.co/docs/hub/spaces-overview)
284
+ - 📖 [Docker SDK Guide](https://huggingface.co/docs/hub/spaces-sdks-docker)
285
+ - 📖 [FastAPI Documentation](https://fastapi.tiangolo.com)
286
+ - 📖 [SQLAlchemy Docs](https://docs.sqlalchemy.org)
287
+ - 💬 [HF Community Forum](https://discuss.huggingface.co)
288
+
289
+ ---
290
+
291
+ **Last Updated:** December 2025
292
+ **Version:** 1.0.0
HF_ENV_VARIABLES.md ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🔐 Hugging Face Spaces Environment Variables
2
+
3
+ ## Required Secrets (Add in Settings → Repository Secrets)
4
+
5
+ ### 1. DATABASE_URL
6
+ **Purpose:** PostgreSQL connection string for external database
7
+
8
+ **Format:**
9
+ ```
10
+ DATABASE_URL=postgresql://username:password@host:port/database
11
+ ```
12
+
13
+ **Real Examples:**
14
+
15
+ **Render PostgreSQL:**
16
+ ```
17
+ DATABASE_URL=postgresql://myuser:mypass123@dpg-abcd1234.oregon-postgres.render.com/mydb
18
+ ```
19
+
20
+ **Neon PostgreSQL:**
21
+ ```
22
+ DATABASE_URL=postgresql://myuser:mypass123@ep-xyz789.us-east-2.aws.neon.tech/mydb?sslmode=require
23
+ ```
24
+
25
+ **⚠️ Important:**
26
+ - MUST start with `postgresql://` (NOT `postgres://`)
27
+ - The app auto-converts `postgres://` → `postgresql://`
28
+ - Include port if different from 5432
29
+ - Add `?sslmode=require` for secure connections
30
+
31
+ ---
32
+
33
+ ### 2. SECRET_KEY
34
+ **Purpose:** JWT token signing and session security
35
+
36
+ **Format:**
37
+ ```
38
+ SECRET_KEY=your-super-secret-random-string-min-32-characters
39
+ ```
40
+
41
+ **How to Generate:**
42
+ ```bash
43
+ # Method 1: Using Python
44
+ python -c "import secrets; print(secrets.token_urlsafe(32))"
45
+
46
+ # Method 2: Using OpenSSL
47
+ openssl rand -base64 32
48
+
49
+ # Method 3: Using pwgen
50
+ pwgen -s 64 1
51
+ ```
52
+
53
+ **Example Output:**
54
+ ```
55
+ SECRET_KEY=xK7mP9vR2nQ5wT8yU4eL6hG3jN0bM1cF5sA9dH2kV7pW4qX8zR6tY3nM5
56
+ ```
57
+
58
+ **⚠️ Security Rules:**
59
+ - NEVER commit to Git
60
+ - Minimum 32 characters
61
+ - Use cryptographically secure random generation
62
+ - Different for each environment (dev/staging/prod)
63
+
64
+ ---
65
+
66
+ ## Optional Environment Variables
67
+
68
+ ### PORT (Pre-configured)
69
+ **Default:** `7860` (Required by Hugging Face Spaces)
70
+ **DO NOT CHANGE** - The Dockerfile is already configured
71
+
72
+ ### PYTHONUNBUFFERED (Pre-configured)
73
+ **Default:** `1`
74
+ **Purpose:** Real-time log output in HF Spaces
75
+
76
+ ---
77
+
78
+ ## How to Add Secrets in Hugging Face Spaces
79
+
80
+ 1. **Navigate to Settings:**
81
+ - Go to your Space: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE`
82
+ - Click **Settings** (gear icon)
83
+
84
+ 2. **Add Repository Secrets:**
85
+ - Scroll to **Repository Secrets** section
86
+ - Click **New Secret**
87
+
88
+ 3. **Add DATABASE_URL:**
89
+ - Name: `DATABASE_URL`
90
+ - Value: `postgresql://user:pass@host:port/db`
91
+ - Click **Add Secret**
92
+
93
+ 4. **Add SECRET_KEY:**
94
+ - Name: `SECRET_KEY`
95
+ - Value: (your generated secret key)
96
+ - Click **Add Secret**
97
+
98
+ 5. **Verify:**
99
+ - Secrets should show as `***` (hidden)
100
+ - They will be injected at runtime
101
+ - NOT visible in logs
102
+
103
+ ---
104
+
105
+ ## Verification Checklist
106
+
107
+ After adding secrets, verify:
108
+
109
+ - [ ] `DATABASE_URL` starts with `postgresql://`
110
+ - [ ] `DATABASE_URL` includes username and password
111
+ - [ ] `DATABASE_URL` has correct host and port
112
+ - [ ] `SECRET_KEY` is at least 32 characters
113
+ - [ ] `SECRET_KEY` is randomly generated
114
+ - [ ] Secrets are marked as **hidden** in Settings
115
+ - [ ] No secrets are in Git repository
116
+
117
+ ---
118
+
119
+ ## Testing Database Connection
120
+
121
+ **Before deploying**, test your database URL locally:
122
+
123
+ ```bash
124
+ # Test with psql (if installed)
125
+ psql "postgresql://user:pass@host:port/db"
126
+
127
+ # Test with Python
128
+ python -c "
129
+ from sqlalchemy import create_engine
130
+ url = 'postgresql://user:pass@host:port/db'
131
+ engine = create_engine(url)
132
+ with engine.connect() as conn:
133
+ print('✅ Connection successful!')
134
+ "
135
+ ```
136
+
137
+ ---
138
+
139
+ ## Troubleshooting
140
+
141
+ ### Error: "could not translate host name"
142
+ **Cause:** Invalid host in DATABASE_URL
143
+ **Fix:** Verify host from your database provider
144
+
145
+ ### Error: "password authentication failed"
146
+ **Cause:** Wrong username or password
147
+ **Fix:** Check credentials in your database dashboard
148
+
149
+ ### Error: "no pg_hba.conf entry for host"
150
+ **Cause:** Database firewall blocks external connections
151
+ **Fix:** Whitelist all IPs (0.0.0.0/0) in database settings
152
+
153
+ ### Error: "JWT token invalid"
154
+ **Cause:** SECRET_KEY mismatch or expired token
155
+ **Fix:** Ensure SECRET_KEY is consistent and not changed
156
+
157
+ ---
158
+
159
+ ## Security Best Practices
160
+
161
+ ✅ **DO:**
162
+ - Use HF Spaces Secrets for sensitive data
163
+ - Generate strong random keys
164
+ - Use SSL for database connections (`sslmode=require`)
165
+ - Rotate SECRET_KEY periodically
166
+ - Use different keys per environment
167
+
168
+ ❌ **DON'T:**
169
+ - Hardcode secrets in code
170
+ - Commit `.env` files to Git
171
+ - Share SECRET_KEY publicly
172
+ - Use weak/predictable keys
173
+ - Reuse keys across projects
174
+
175
+ ---
176
+
177
+ **Last Updated:** December 2025
HF_MIGRATION_SUMMARY.md ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📦 Hugging Face Spaces Migration - Complete Summary
2
+
3
+ ## ✅ Migration Completed Successfully
4
+
5
+ Your FastAPI application has been fully prepared for deployment on **Hugging Face Spaces** with Docker SDK.
6
+
7
+ ---
8
+
9
+ ## 📁 Files Created/Modified
10
+
11
+ ### 🆕 New Files Created
12
+
13
+ 1. **`Dockerfile`** ⭐ CRITICAL
14
+ - Optimized for Hugging Face Spaces
15
+ - Uses `python:3.10-slim` base image
16
+ - Creates non-root user (UID 1000)
17
+ - Exposes port 7860 (HF requirement)
18
+ - Proper permissions for write directories
19
+
20
+ 2. **`.dockerignore`**
21
+ - Excludes unnecessary files from Docker build
22
+ - Reduces image size
23
+ - Speeds up build time
24
+
25
+ 3. **`HUGGING_FACE_DEPLOYMENT.md`**
26
+ - Complete deployment guide
27
+ - Step-by-step instructions
28
+ - Troubleshooting section
29
+ - Security checklist
30
+
31
+ 4. **`HF_ENV_VARIABLES.md`**
32
+ - Detailed guide for environment variables
33
+ - How to generate SECRET_KEY
34
+ - Database URL formats
35
+ - Security best practices
36
+
37
+ 5. **`HF_DEPLOYMENT_CHECKLIST.md`**
38
+ - Pre-deployment checklist
39
+ - Build monitoring steps
40
+ - Post-deployment verification
41
+ - Troubleshooting guide
42
+
43
+ 6. **`README_HF_SPACE.md`**
44
+ - README for Hugging Face Space page
45
+ - Contains YAML frontmatter for HF
46
+ - User-facing documentation
47
+
48
+ 7. **`test_docker_local.py`**
49
+ - Python script to test Docker setup locally
50
+ - Verifies build and runtime
51
+ - Tests endpoints
52
+ - Auto-cleanup
53
+
54
+ ### 🔄 Files Modified
55
+
56
+ 1. **`requirements.txt`**
57
+ - ✅ Removed `gunicorn` (not needed for Docker)
58
+ - ✅ Removed `argon2-cffi` (using bcrypt)
59
+ - ✅ Kept `psycopg2-binary` for PostgreSQL
60
+ - ✅ Adjusted version constraints for compatibility
61
+ - ✅ Added `aiofiles` for async file operations
62
+
63
+ 2. **`app/database.py`** ✅ Already Correct
64
+ - Hybrid connection logic present
65
+ - Auto-converts `postgres://` → `postgresql://`
66
+ - Falls back to SQLite for local dev
67
+
68
+ 3. **`app/config.py`** ✅ Already Correct
69
+ - Reads `SECRET_KEY` from environment
70
+ - Reads `DATABASE_URL` from environment
71
+ - Has fallback values for local dev
72
+
73
+ ---
74
+
75
+ ## 🔐 Required Environment Variables
76
+
77
+ You MUST add these in Hugging Face Spaces **Settings** → **Repository Secrets**:
78
+
79
+ ### 1. DATABASE_URL (REQUIRED)
80
+ ```
81
+ DATABASE_URL=postgresql://username:password@host:port/database
82
+ ```
83
+
84
+ **Example (Render):**
85
+ ```
86
+ DATABASE_URL=postgresql://myuser:mypass@dpg-abc123.oregon-postgres.render.com/mydb
87
+ ```
88
+
89
+ **Example (Neon):**
90
+ ```
91
+ DATABASE_URL=postgresql://myuser:mypass@ep-xyz789.us-east-2.aws.neon.tech/mydb?sslmode=require
92
+ ```
93
+
94
+ ### 2. SECRET_KEY (REQUIRED)
95
+ ```
96
+ SECRET_KEY=your-super-secret-random-string-minimum-32-characters
97
+ ```
98
+
99
+ **Generate with:**
100
+ ```bash
101
+ python -c "import secrets; print(secrets.token_urlsafe(32))"
102
+ ```
103
+
104
+ ---
105
+
106
+ ## 🚀 Deployment Steps
107
+
108
+ ### Step 1: Create Hugging Face Space
109
+ 1. Go to https://huggingface.co/new-space
110
+ 2. Name your Space (e.g., `product-rating-prediction`)
111
+ 3. Select **Docker** SDK
112
+ 4. Choose **CPU Basic** (16GB RAM - Free)
113
+ 5. Click **Create Space**
114
+
115
+ ### Step 2: Add Environment Variables
116
+ 1. Go to Space **Settings**
117
+ 2. Scroll to **Repository Secrets**
118
+ 3. Add `DATABASE_URL` (your PostgreSQL connection string)
119
+ 4. Add `SECRET_KEY` (your generated key)
120
+
121
+ ### Step 3: Prepare Code
122
+ ```bash
123
+ # In your project directory
124
+ # Rename README for HF Space
125
+ copy README_HF_SPACE.md README.md
126
+
127
+ # Remove unnecessary files
128
+ rmdir /s /q env
129
+ rmdir /s /q __pycache__
130
+ del /q app\database\*.db
131
+ ```
132
+
133
+ ### Step 4: Push to Hugging Face
134
+ ```bash
135
+ # Clone your Space repo
136
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
137
+ cd YOUR_SPACE
138
+
139
+ # Copy all project files (except excluded ones)
140
+ # Use .dockerignore as reference for what to exclude
141
+
142
+ # Commit and push
143
+ git add .
144
+ git commit -m "Initial deployment"
145
+ git push origin main
146
+ ```
147
+
148
+ ### Step 5: Monitor Build
149
+ 1. Go to your Space URL
150
+ 2. Click **Logs** tab
151
+ 3. Watch for successful build
152
+ 4. Wait for "Running on http://0.0.0.0:7860"
153
+
154
+ ---
155
+
156
+ ## 🧪 Test Locally First (RECOMMENDED)
157
+
158
+ Before deploying to Hugging Face, test locally:
159
+
160
+ ### Option 1: Automated Test Script
161
+ ```bash
162
+ # Set environment variables (optional)
163
+ set DATABASE_URL=postgresql://user:pass@host/db
164
+ set SECRET_KEY=your-secret-key
165
+
166
+ # Run test script
167
+ python test_docker_local.py
168
+ ```
169
+
170
+ ### Option 2: Manual Docker Test
171
+ ```bash
172
+ # Build image
173
+ docker build -t rating-prediction .
174
+
175
+ # Run container
176
+ docker run -p 7860:7860 ^
177
+ -e DATABASE_URL="postgresql://user:pass@host/db" ^
178
+ -e SECRET_KEY="your-secret-key" ^
179
+ rating-prediction
180
+
181
+ # Access at http://localhost:7860
182
+ ```
183
+
184
+ ---
185
+
186
+ ## 📊 Key Differences from Render
187
+
188
+ | Feature | Render | Hugging Face Spaces |
189
+ |---------|--------|---------------------|
190
+ | **Deployment** | Web Service | Docker SDK |
191
+ | **Port** | Auto-assigned | 7860 (fixed) |
192
+ | **Start Command** | Procfile | Dockerfile CMD |
193
+ | **RAM** | 512MB (Free) | 16GB (Free) |
194
+ | **Database** | Managed PostgreSQL | External (your choice) |
195
+ | **User** | root | user (UID 1000) |
196
+ | **Build** | Automatic | Dockerfile |
197
+
198
+ ---
199
+
200
+ ## 🎯 Critical Configuration Points
201
+
202
+ ### ✅ Port 7860
203
+ The Dockerfile MUST use port 7860:
204
+ ```dockerfile
205
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
206
+ ```
207
+
208
+ ### ✅ Non-Root User
209
+ The Dockerfile MUST create and switch to user:
210
+ ```dockerfile
211
+ RUN useradd -m -u 1000 user
212
+ USER user
213
+ ```
214
+
215
+ ### ✅ Write Permissions
216
+ Directories that need write access:
217
+ ```dockerfile
218
+ RUN chmod -R 777 /app/app/static/uploads
219
+ RUN chmod -R 777 /app/app/database
220
+ ```
221
+
222
+ ### ✅ Database URL
223
+ Your app correctly handles both:
224
+ - `postgresql://` (standard)
225
+ - `postgres://` (auto-converted)
226
+
227
+ ---
228
+
229
+ ## 🔍 Verification Checklist
230
+
231
+ After deployment, verify:
232
+
233
+ - [ ] Space status shows "Running" (green)
234
+ - [ ] Logs show: "🚀 Production Mode: Using PostgreSQL"
235
+ - [ ] Access `/docs` (Swagger UI loads)
236
+ - [ ] Can register a new user
237
+ - [ ] Can login and get JWT token
238
+ - [ ] Dashboard loads correctly
239
+ - [ ] Single prediction works
240
+ - [ ] Batch CSV upload works
241
+ - [ ] Word cloud generates
242
+ - [ ] Charts display
243
+
244
+ ---
245
+
246
+ ## 🐛 Common Issues & Solutions
247
+
248
+ ### Issue: "Application startup failed"
249
+ **Solution:** Check DATABASE_URL in Settings → Secrets
250
+
251
+ ### Issue: "Database connection refused"
252
+ **Solution:** Ensure PostgreSQL allows external connections
253
+
254
+ ### Issue: "502 Bad Gateway"
255
+ **Solution:** Wait 2-3 minutes for model loading
256
+
257
+ ### Issue: "Permission denied" errors
258
+ **Solution:** Verify user permissions in Dockerfile
259
+
260
+ ---
261
+
262
+ ## 📚 Documentation Reference
263
+
264
+ 1. **`HUGGING_FACE_DEPLOYMENT.md`** - Full deployment guide
265
+ 2. **`HF_ENV_VARIABLES.md`** - Environment variables details
266
+ 3. **`HF_DEPLOYMENT_CHECKLIST.md`** - Step-by-step checklist
267
+ 4. **`README_HF_SPACE.md`** - Space homepage content
268
+ 5. **`test_docker_local.py`** - Local testing script
269
+
270
+ ---
271
+
272
+ ## 🎉 Success Criteria
273
+
274
+ Your deployment is successful when:
275
+
276
+ ✅ Docker image builds without errors
277
+ ✅ Container starts on port 7860
278
+ ✅ PostgreSQL connection established
279
+ ✅ All API endpoints respond
280
+ ✅ Authentication works (register/login)
281
+ ✅ Predictions complete successfully
282
+ ✅ Visualizations generate correctly
283
+
284
+ ---
285
+
286
+ ## 🆘 Support & Resources
287
+
288
+ - 📖 [HF Spaces Docker Guide](https://huggingface.co/docs/hub/spaces-sdks-docker)
289
+ - 📖 [FastAPI Documentation](https://fastapi.tiangolo.com)
290
+ - 💬 [HF Community Forum](https://discuss.huggingface.co)
291
+
292
+ ---
293
+
294
+ ## 🔄 Next Steps
295
+
296
+ 1. ✅ Create external PostgreSQL database (Render/Neon)
297
+ 2. ✅ Generate SECRET_KEY
298
+ 3. ✅ Test Docker build locally (optional but recommended)
299
+ 4. ✅ Create Hugging Face Space
300
+ 5. ✅ Add environment variables
301
+ 6. ✅ Push code to HF Space
302
+ 7. ✅ Monitor build and verify deployment
303
+ 8. ✅ Test application functionality
304
+ 9. ✅ Share your Space with users! 🎉
305
+
306
+ ---
307
+
308
+ **Migration completed on:** December 1, 2025
309
+ **Target Platform:** Hugging Face Spaces (Docker SDK)
310
+ **Status:** ✅ Ready for Deployment
311
+
312
+ ---
313
+
314
+ **Good luck with your deployment! 🚀**
HF_QUICK_REFERENCE.md ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Hugging Face Spaces - Quick Reference Card
2
+
3
+ ## ⚡ Quick Deploy (5 Steps)
4
+
5
+ ```bash
6
+ # 1. Create Space on HF
7
+ https://huggingface.co/new-space → Docker SDK → CPU Basic
8
+
9
+ # 2. Add Secrets (Settings → Repository Secrets)
10
+ DATABASE_URL = postgresql://user:pass@host:port/db
11
+ SECRET_KEY = <generate with: python -c "import secrets; print(secrets.token_urlsafe(32))">
12
+
13
+ # 3. Clone Space repo
14
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE
15
+ cd YOUR_SPACE
16
+
17
+ # 4. Copy project files
18
+ # Copy all except: env/, __pycache__/, *.db, .env
19
+
20
+ # 5. Push
21
+ git add .
22
+ git commit -m "Initial deployment"
23
+ git push origin main
24
+ ```
25
+
26
+ ---
27
+
28
+ ## 🔐 Environment Variables
29
+
30
+ | Variable | Required | Example |
31
+ |----------|----------|---------|
32
+ | `DATABASE_URL` | ✅ Yes | `postgresql://user:pass@host:5432/db` |
33
+ | `SECRET_KEY` | ✅ Yes | `xK7mP9vR2nQ5wT8yU4eL6hG3jN0bM...` |
34
+
35
+ ---
36
+
37
+ ## 📋 Critical Files Checklist
38
+
39
+ ```
40
+ ✅ Dockerfile (port 7860, user 1000)
41
+ ✅ requirements.txt (no gunicorn)
42
+ ✅ main.py
43
+ ✅ app/ directory
44
+ ✅ .dockerignore
45
+ ✅ README.md (from README_HF_SPACE.md)
46
+ ```
47
+
48
+ ---
49
+
50
+ ## 🐳 Dockerfile Must-Haves
51
+
52
+ ```dockerfile
53
+ # ✅ Port 7860 (HF requirement)
54
+ EXPOSE 7860
55
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
56
+
57
+ # ✅ User ID 1000 (HF requirement)
58
+ RUN useradd -m -u 1000 user
59
+ USER user
60
+
61
+ # ✅ Write permissions
62
+ RUN chmod -R 777 /app/app/static/uploads
63
+ ```
64
+
65
+ ---
66
+
67
+ ## 🧪 Test Locally
68
+
69
+ ```bash
70
+ # Build
71
+ docker build -t test .
72
+
73
+ # Run
74
+ docker run -p 7860:7860 \
75
+ -e DATABASE_URL="postgresql://..." \
76
+ -e SECRET_KEY="..." \
77
+ test
78
+
79
+ # Access
80
+ http://localhost:7860
81
+ ```
82
+
83
+ ---
84
+
85
+ ## 🔍 Verify Deployment
86
+
87
+ ```
88
+ ✅ Space shows "Running" status
89
+ ✅ Logs show: "🚀 Production Mode: Using PostgreSQL"
90
+ ✅ Access /docs (Swagger UI)
91
+ ✅ Can register and login
92
+ ✅ Predictions work
93
+ ```
94
+
95
+ ---
96
+
97
+ ## 🐛 Common Errors
98
+
99
+ | Error | Fix |
100
+ |-------|-----|
101
+ | App startup failed | Check DATABASE_URL in Secrets |
102
+ | 502 Bad Gateway | Wait 2-3 min for model loading |
103
+ | Permission denied | Check Dockerfile user permissions |
104
+ | Database refused | Allow external connections in DB |
105
+
106
+ ---
107
+
108
+ ## 📊 Key Differences: Render vs HF
109
+
110
+ | | Render | Hugging Face |
111
+ |-|--------|--------------|
112
+ | RAM | 512MB | 16GB |
113
+ | Port | Auto | 7860 (fixed) |
114
+ | Deploy | Procfile | Dockerfile |
115
+ | User | root | user (1000) |
116
+
117
+ ---
118
+
119
+ ## 📚 Documentation Files
120
+
121
+ - `HUGGING_FACE_DEPLOYMENT.md` - Full guide
122
+ - `HF_ENV_VARIABLES.md` - Secrets setup
123
+ - `HF_DEPLOYMENT_CHECKLIST.md` - Step-by-step
124
+ - `HF_MIGRATION_SUMMARY.md` - Overview
125
+
126
+ ---
127
+
128
+ ## 🆘 Emergency Commands
129
+
130
+ ```bash
131
+ # View logs
132
+ # Go to Space → Logs tab
133
+
134
+ # Rebuild
135
+ git commit --allow-empty -m "Rebuild"
136
+ git push
137
+
138
+ # Rollback
139
+ git revert HEAD
140
+ git push
141
+ ```
142
+
143
+ ---
144
+
145
+ ## ✅ Success Indicators
146
+
147
+ ```
148
+ INFO: Started server process [1]
149
+ INFO: Uvicorn running on http://0.0.0.0:7860
150
+ 🚀 Production Mode: Using PostgreSQL
151
+ ```
152
+
153
+ ---
154
+
155
+ ## 🔗 Important Links
156
+
157
+ - Create Space: https://huggingface.co/new-space
158
+ - HF Docs: https://huggingface.co/docs/hub/spaces-sdks-docker
159
+ - FastAPI Docs: https://fastapi.tiangolo.com
160
+
161
+ ---
162
+
163
+ **Print this for quick reference during deployment! 📄**
HUGGING_FACE_DEPLOYMENT.md ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Rating Prediction System - Hugging Face Spaces Deployment
2
+
3
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces)
4
+ [![FastAPI](https://img.shields.io/badge/FastAPI-0.104.1-009688.svg?style=flat&logo=FastAPI&logoColor=white)](https://fastapi.tiangolo.com)
5
+ [![Docker](https://img.shields.io/badge/Docker-Enabled-2496ED?logo=docker&logoColor=white)](https://www.docker.com/)
6
+
7
+ A production-ready FastAPI application for predicting product ratings from Vietnamese comments using PhoBERT. This Space uses Docker SDK for deploying heavy ML models (>500MB) with 16GB RAM.
8
+
9
+ ---
10
+
11
+ ## 🎯 Features
12
+
13
+ - 🤖 **ML-Powered Predictions**: PhoBERT-based sentiment analysis
14
+ - 📊 **Interactive Dashboard**: Real-time visualizations with Chart.js
15
+ - 💬 **Batch Processing**: Upload CSV files for bulk predictions
16
+ - 🔐 **Secure Authentication**: JWT-based user management
17
+ - 📈 **Analytics**: Word clouds and rating distributions
18
+ - 🗄️ **External Database**: PostgreSQL support (Render/Neon)
19
+
20
+ ---
21
+
22
+ ## 🔧 Configuration Required
23
+
24
+ ### Required Environment Variables
25
+
26
+ **CRITICAL:** Before deploying to Hugging Face Spaces, you MUST add these environment variables in the **Settings** tab:
27
+
28
+ #### 1. DATABASE_URL (REQUIRED)
29
+ ```
30
+ DATABASE_URL=postgresql://username:password@host:port/database
31
+ ```
32
+ **Real External Db url**
33
+ ```
34
+ DATABASE_URL=postgresql://rating_prediction_user:2p3Xv9mKFt3DDFs9OVWDrw8ARHkevTSw@dpg-d4mfq13uibrs738i6jl0-a.singapore-postgres.render.com/rating_prediction
35
+ ```
36
+ **Example from Render:**
37
+ ```
38
+ DATABASE_URL=postgresql://user:pass@dpg-xxxxx.oregon-postgres.render.com/dbname
39
+ ```
40
+
41
+ **Example from Neon:**
42
+ ```
43
+ DATABASE_URL=postgresql://user:pass@ep-xxxxx.us-east-2.aws.neon.tech/dbname?sslmode=require
44
+ ```
45
+
46
+ ⚠️ **Important Notes:**
47
+ - The URL MUST start with `postgresql://` (NOT `postgres://`)
48
+ - If your provider gives you `postgres://`, the app will auto-convert it
49
+ - Include `?sslmode=require` for secure connections (recommended)
50
+
51
+ #### 2. SECRET_KEY (REQUIRED)
52
+ ```
53
+ SECRET_KEY=your-super-secret-jwt-key-change-this-in-production-min-32-chars
54
+ ```
55
+
56
+ **Generate a secure key:**
57
+ ```bash
58
+ python -c "import secrets; print(secrets.token_urlsafe(32))"
59
+ ```
60
+
61
+ **Real SECRECT_KEY:**
62
+ nz0qzAJoIiRQ3v62SAq8g94JAFtfmf-GSU6dkluKtKA
63
+
64
+ ⚠️ **Security:**
65
+ - NEVER commit this key to Git
66
+ - Use a cryptographically secure random string
67
+ - Minimum 32 characters recommended
68
+
69
+ ---
70
+
71
+ ## 📋 Deployment Steps
72
+
73
+ ### Step 1: Create a New Space
74
+ 1. Go to https://huggingface.co/new-space
75
+ 2. Choose **Docker** as the SDK
76
+ 3. Select **CPU Basic** (16GB RAM - Free)
77
+ 4. Make the Space **Public** or **Private**
78
+
79
+ ### Step 2: Configure Environment Variables
80
+ 1. Go to your Space's **Settings** tab
81
+ 2. Scroll to **Repository Secrets**
82
+ 3. Add the following secrets:
83
+ - `DATABASE_URL` → Your PostgreSQL connection string
84
+ - `SECRET_KEY` → Your JWT secret key
85
+
86
+ ### Step 3: Push Your Code
87
+ ```bash
88
+ # Clone your Space repository
89
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
90
+ cd YOUR_SPACE_NAME
91
+
92
+ # Copy your project files
93
+ cp -r /path/to/PredictRating/* .
94
+
95
+ # Commit and push
96
+ git add .
97
+ git commit -m "Initial deployment"
98
+ git push
99
+ ```
100
+
101
+ ### Step 4: Wait for Build
102
+ - Hugging Face will automatically build your Docker image
103
+ - Build time: ~5-10 minutes (depending on model size)
104
+ - Check build logs in the **Logs** tab
105
+
106
+ ### Step 5: Access Your App
107
+ - Your app will be available at: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
108
+ - The app runs on port **7860** (handled automatically)
109
+
110
+ ---
111
+
112
+ ## 🗄️ Database Setup
113
+
114
+ ### Option A: Render PostgreSQL (Recommended)
115
+ 1. Create a free PostgreSQL database on [Render](https://render.com)
116
+ 2. Go to **Dashboard** → **New** → **PostgreSQL**
117
+ 3. Copy the **External Database URL**
118
+ 4. Add it as `DATABASE_URL` in HF Spaces Settings
119
+
120
+ ### Option B: Neon PostgreSQL
121
+ 1. Create a free database on [Neon](https://neon.tech)
122
+ 2. Copy the connection string
123
+ 3. Ensure it includes `?sslmode=require`
124
+ 4. Add it as `DATABASE_URL` in HF Spaces Settings
125
+
126
+ ### Database Initialization
127
+ The app automatically:
128
+ - Creates tables on first run
129
+ - Supports both SQLite (local dev) and PostgreSQL (production)
130
+ - No manual migrations needed
131
+
132
+ ---
133
+
134
+ ## 🐳 Docker Configuration
135
+
136
+ ### Port Requirements
137
+ - **CRITICAL:** Hugging Face Spaces requires port **7860**
138
+ - The Dockerfile is pre-configured correctly
139
+ - DO NOT change the port in `CMD` instruction
140
+
141
+ ### User Permissions
142
+ - Hugging Face runs containers as user ID **1000**
143
+ - The Dockerfile creates a `user` account
144
+ - All files are owned by this user
145
+
146
+ ### Storage
147
+ - `/app/static/uploads/` is writable (for word clouds)
148
+ - `/app/database/` is writable (for local SQLite fallback)
149
+ - Consider using external storage (S3/Cloudinary) for production
150
+
151
+ ---
152
+
153
+ ## 🧪 Testing Locally Before Deployment
154
+
155
+ ### Test with Docker
156
+ ```bash
157
+ # Build the Docker image
158
+ docker build -t rating-prediction .
159
+
160
+ # Run with environment variables
161
+ docker run -p 7860:7860 \
162
+ -e DATABASE_URL="postgresql://user:pass@host/db" \
163
+ -e SECRET_KEY="your-secret-key" \
164
+ rating-prediction
165
+
166
+ # Access at http://localhost:7860
167
+ ```
168
+
169
+ ### Test Database Connection
170
+ ```bash
171
+ # Inside container
172
+ docker exec -it <container_id> python -c "
173
+ from app.database import engine
174
+ print('✅ Database connected:', engine.url)
175
+ "
176
+ ```
177
+
178
+ ---
179
+
180
+ ## 📊 Monitoring & Logs
181
+
182
+ ### View Logs in Hugging Face
183
+ 1. Go to your Space
184
+ 2. Click the **Logs** tab
185
+ 3. Monitor startup and runtime logs
186
+
187
+ ### Expected Startup Messages
188
+ ```
189
+ 🚀 Production Mode: Using PostgreSQL
190
+ INFO: Started server process [1]
191
+ INFO: Uvicorn running on http://0.0.0.0:7860
192
+ ```
193
+
194
+ ---
195
+
196
+ ## 🔒 Security Checklist
197
+
198
+ - ✅ `SECRET_KEY` stored as HF Secret (not in code)
199
+ - ✅ `DATABASE_URL` stored as HF Secret (not in code)
200
+ - ✅ PostgreSQL uses SSL (`sslmode=require`)
201
+ - ✅ Passwords hashed with bcrypt
202
+ - ✅ JWT tokens expire after 24 hours
203
+ - ✅ Docker runs as non-root user
204
+
205
+ ---
206
+
207
+ ## 🐛 Troubleshooting
208
+
209
+ ### Issue: "Application startup failed"
210
+ **Solution:** Check logs for database connection errors. Verify `DATABASE_URL` is correct.
211
+
212
+ ### Issue: "502 Bad Gateway"
213
+ **Solution:** App may be starting. Wait 2-3 minutes for heavy model loading.
214
+
215
+ ### Issue: "Database connection refused"
216
+ **Solution:** Ensure your PostgreSQL database is accessible from external IPs. Check firewall rules.
217
+
218
+ ### Issue: "No module named 'app'"
219
+ **Solution:** Ensure all files are copied correctly. Check Dockerfile `WORKDIR` is `/app`.
220
+
221
+ ### Issue: "Port 7860 already in use"
222
+ **Solution:** Only relevant for local testing. Stop other containers on that port.
223
+
224
+ ---
225
+
226
+ ## 📚 API Documentation
227
+
228
+ Once deployed, access:
229
+ - **Swagger UI**: `https://your-space.hf.space/docs`
230
+ - **ReDoc**: `https://your-space.hf.space/redoc`
231
+
232
+ ### Key Endpoints
233
+ - `POST /api/auth/register` - Create new user
234
+ - `POST /api/auth/login` - Login and get JWT token
235
+ - `POST /api/predict/single` - Predict single comment
236
+ - `POST /api/predict/batch` - Upload CSV for batch predictions
237
+ - `GET /api/predict/history` - View prediction history
238
+
239
+ ---
240
+
241
+ ## 🆘 Support
242
+
243
+ If you encounter issues:
244
+ 1. Check the **Logs** tab in your Space
245
+ 2. Verify environment variables in **Settings**
246
+ 3. Test database connection from your local machine
247
+ 4. Review [FastAPI Docs](https://fastapi.tiangolo.com)
248
+ 5. Check [Hugging Face Spaces Docs](https://huggingface.co/docs/hub/spaces-overview)
249
+
250
+ ---
251
+
252
+ ## 📄 License
253
+
254
+ This project is deployed under the terms specified in your Space settings.
255
+
256
+ ---
257
+
258
+ **Built with ❤️ using FastAPI, PhoBERT, and Hugging Face Spaces**
INDEX.md ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📖 Complete Documentation Index
2
+
3
+ Welcome to the **Vietnamese Product Rating Prediction System** documentation!
4
+
5
+ ---
6
+
7
+ ## 🚀 Quick Start (New Users)
8
+
9
+ If you're just getting started, read these files in order:
10
+
11
+ 1. **[QUICKSTART.md](QUICKSTART.md)** ⚡
12
+ - Installation instructions
13
+ - How to run the application
14
+ - First-time usage guide
15
+ - **Start here!**
16
+
17
+ 2. **[TESTING_GUIDE.md](TESTING_GUIDE.md)** ✅
18
+ - Step-by-step testing procedures
19
+ - Expected results for each test
20
+ - Troubleshooting common issues
21
+
22
+ 3. **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** 📋
23
+ - Overview of all features
24
+ - What has been built
25
+ - How to replace dummy ML model
26
+
27
+ ---
28
+
29
+ ## 📚 Detailed Documentation
30
+
31
+ ### For Understanding the System
32
+
33
+ - **[README.md](README.md)** 📖
34
+ - Complete project documentation
35
+ - Features, setup, usage
36
+ - API endpoints
37
+ - Database schema
38
+ - CSV file format
39
+
40
+ - **[ARCHITECTURE.md](ARCHITECTURE.md)** 🏗️
41
+ - System architecture diagrams
42
+ - Request flow examples
43
+ - Technology stack details
44
+ - File responsibilities
45
+ - Security features
46
+
47
+ ---
48
+
49
+ ## 🎯 For Different Purposes
50
+
51
+ ### I want to... run the application
52
+ → Read: **[QUICKSTART.md](QUICKSTART.md)**
53
+
54
+ ### I want to... test all features
55
+ → Read: **[TESTING_GUIDE.md](TESTING_GUIDE.md)**
56
+
57
+ ### I want to... understand the code structure
58
+ → Read: **[ARCHITECTURE.md](ARCHITECTURE.md)**
59
+
60
+ ### I want to... replace the dummy ML model
61
+ → Read: **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** (section: "Replace Dummy ML Model")
62
+
63
+ ### I want to... demo to my teacher
64
+ → Read: **[TESTING_GUIDE.md](TESTING_GUIDE.md)** (section: "Demo Checklist for Teacher")
65
+
66
+ ### I want to... understand all features
67
+ → Read: **[README.md](README.md)** (section: "Features")
68
+
69
+ ### I want to... see API documentation
70
+ → Run app, then visit: **http://localhost:8000/docs**
71
+
72
+ ---
73
+
74
+ ## 📁 Project Files Overview
75
+
76
+ ### Documentation Files
77
+ ```
78
+ ├── README.md # Main documentation
79
+ ├── QUICKSTART.md # Quick setup guide
80
+ ├── PROJECT_SUMMARY.md # Feature summary
81
+ ├── TESTING_GUIDE.md # Testing procedures
82
+ ├── ARCHITECTURE.md # System architecture
83
+ └── INDEX.md # This file (navigation)
84
+ ```
85
+
86
+ ### Code Files
87
+ ```
88
+ ├── main.py # FastAPI entry point
89
+ ├── requirements.txt # Python dependencies
90
+ ├── sample_comments.csv # Test data
91
+ ├── .gitignore # Git ignore rules
92
+
93
+ └── app/
94
+ ├── config.py # Configuration
95
+ ├── database.py # Database setup
96
+ ├── models.py # Database models
97
+ ├── schemas.py # Pydantic schemas
98
+
99
+ ├── routers/ # API endpoints
100
+ │ ├── auth.py
101
+ │ ├── prediction.py
102
+ │ └── dashboard.py
103
+
104
+ ├── services/ # Business logic
105
+ │ ├── auth_service.py
106
+ │ ├── ml_service.py
107
+ │ └── visualization_service.py
108
+
109
+ ├── templates/ # HTML templates
110
+ │ ├── base.html
111
+ │ ├── login.html
112
+ │ ├── register.html
113
+ │ └── dashboard.html
114
+
115
+ └── static/ # Static files
116
+ ├── css/
117
+ ├── js/
118
+ └── uploads/
119
+ ```
120
+
121
+ ---
122
+
123
+ ## 🎓 For Students (Project Presentation)
124
+
125
+ ### Before Presentation
126
+ 1. Read **[QUICKSTART.md](QUICKSTART.md)** to set up
127
+ 2. Test everything using **[TESTING_GUIDE.md](TESTING_GUIDE.md)**
128
+ 3. Review **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** for highlights
129
+
130
+ ### During Presentation
131
+ 1. **Show Swagger UI** (bonus points!) → http://localhost:8000/docs
132
+ 2. **Demo user journey:**
133
+ - Register → Login
134
+ - Single prediction
135
+ - Batch CSV with visualizations
136
+ 3. **Explain architecture** using **[ARCHITECTURE.md](ARCHITECTURE.md)**
137
+
138
+ ### Key Points to Mention
139
+ ✅ FastAPI with automatic API documentation
140
+ ✅ JWT authentication for security
141
+ ✅ RESTful API design
142
+ ✅ Data visualization (Chart.js + WordCloud)
143
+ ✅ Separation of concerns (clean architecture)
144
+ ✅ Database relationships and ORM
145
+
146
+ ---
147
+
148
+ ## 🔧 For Developers
149
+
150
+ ### Understanding the Codebase
151
+ 1. **[ARCHITECTURE.md](ARCHITECTURE.md)** - System overview
152
+ 2. **[README.md](README.md)** - Detailed documentation
153
+ 3. Code files (with inline comments)
154
+
155
+ ### Modifying the System
156
+
157
+ **To replace ML model:**
158
+ → Edit: `app/services/ml_service.py`
159
+ → See: **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** section "Replace Dummy ML Model"
160
+
161
+ **To add products:**
162
+ → Edit: `app/config.py` → `PRODUCTS` list
163
+
164
+ **To add Vietnamese stopwords:**
165
+ → Edit: `app/services/visualization_service.py` → `self.stopwords`
166
+
167
+ **To change styling:**
168
+ → Edit: `app/templates/*.html` (TailwindCSS classes)
169
+
170
+ **To add API endpoints:**
171
+ → Create route in: `app/routers/*.py`
172
+
173
+ ---
174
+
175
+ ## 📊 Key Features Reference
176
+
177
+ | Feature | File | Documentation |
178
+ |---------|------|---------------|
179
+ | User Authentication | `app/routers/auth.py` | [README.md](README.md) |
180
+ | Single Prediction | `app/routers/prediction.py` | [README.md](README.md) |
181
+ | Batch Prediction | `app/routers/prediction.py` | [README.md](README.md) |
182
+ | WordCloud | `app/services/visualization_service.py` | [ARCHITECTURE.md](ARCHITECTURE.md) |
183
+ | Database Models | `app/models.py` | [README.md](README.md) |
184
+ | ML Service | `app/services/ml_service.py` | [PROJECT_SUMMARY.md](PROJECT_SUMMARY.md) |
185
+
186
+ ---
187
+
188
+ ## 🐛 Troubleshooting
189
+
190
+ For common issues and solutions:
191
+ → **[TESTING_GUIDE.md](TESTING_GUIDE.md)** (Troubleshooting section)
192
+
193
+ For API errors:
194
+ → Check Swagger UI: http://localhost:8000/docs
195
+
196
+ For understanding error messages:
197
+ → **[ARCHITECTURE.md](ARCHITECTURE.md)** (Request Flow section)
198
+
199
+ ---
200
+
201
+ ## 📞 Quick Reference Commands
202
+
203
+ ```bash
204
+ # Install dependencies
205
+ pip install -r requirements.txt
206
+
207
+ # Run application
208
+ python main.py
209
+
210
+ # Access Swagger UI
211
+ # Open: http://localhost:8000/docs
212
+
213
+ # Access dashboard
214
+ # Open: http://localhost:8000/dashboard
215
+
216
+ # Test with sample data
217
+ # Upload: sample_comments.csv
218
+ ```
219
+
220
+ ---
221
+
222
+ ## ✅ Checklist for Teacher Demo
223
+
224
+ Before presenting to teacher:
225
+
226
+ - [ ] All dependencies installed (`pip install -r requirements.txt`)
227
+ - [ ] Application runs successfully (`python main.py`)
228
+ - [ ] Can access Swagger UI (http://localhost:8000/docs)
229
+ - [ ] Can register and login
230
+ - [ ] Single prediction works
231
+ - [ ] Batch CSV prediction works
232
+ - [ ] Charts and word cloud display correctly
233
+ - [ ] CSV download works
234
+ - [ ] Understand system architecture
235
+ - [ ] Can explain how to replace ML model
236
+
237
+ ---
238
+
239
+ ## 🎯 Learning Outcomes
240
+
241
+ After completing this project, you will understand:
242
+
243
+ 1. **FastAPI Framework**
244
+ - Route definition
245
+ - Dependency injection
246
+ - Automatic API documentation
247
+ - Request/response validation
248
+
249
+ 2. **Authentication**
250
+ - JWT tokens
251
+ - Password hashing (bcrypt)
252
+ - Protected routes
253
+
254
+ 3. **Database**
255
+ - SQLAlchemy ORM
256
+ - Model relationships
257
+ - CRUD operations
258
+
259
+ 4. **Frontend**
260
+ - Jinja2 templating
261
+ - TailwindCSS styling
262
+ - JavaScript Fetch API
263
+ - Chart.js visualization
264
+
265
+ 5. **Software Architecture**
266
+ - Separation of concerns
267
+ - Service layer pattern
268
+ - RESTful API design
269
+
270
+ ---
271
+
272
+ ## 📧 Documentation Feedback
273
+
274
+ If any documentation is unclear or missing information:
275
+ 1. Check other documentation files
276
+ 2. Look at code comments
277
+ 3. Consult with your instructor
278
+
279
+ ---
280
+
281
+ ## 🎉 You're All Set!
282
+
283
+ You now have:
284
+ ✅ Complete working application
285
+ ✅ Comprehensive documentation
286
+ ✅ Testing guide
287
+ ✅ Architecture documentation
288
+ ✅ Demo preparation materials
289
+
290
+ **Good luck with your project! 🎓**
291
+
292
+ ---
293
+
294
+ *Last Updated: November 25, 2024*
295
+ *Project: Vietnamese Product Rating Prediction System*
296
+ *Framework: FastAPI + Jinja2 + TailwindCSS*
PROJECT_STRUCTURE.txt ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📁 Complete Project Structure
2
+
3
+ ```
4
+ PredictRating/
5
+
6
+ ├── 📄 main.py # FastAPI application entry point
7
+ ├── 📄 requirements.txt # Python dependencies
8
+ ├── 📄 .gitignore # Git ignore rules
9
+
10
+ ├── 📄 sample_comments.csv # Sample test data (20 Vietnamese comments)
11
+
12
+ ├── 📚 DOCUMENTATION FILES
13
+ │ ├── 📖 README.md # Main documentation (complete guide)
14
+ │ ├── ⚡ QUICKSTART.md # Quick setup and first run guide
15
+ │ ├── 📋 PROJECT_SUMMARY.md # Feature overview and highlights
16
+ │ ├── ✅ TESTING_GUIDE.md # Step-by-step testing procedures
17
+ │ ├── 🏗️ ARCHITECTURE.md # System architecture and design
18
+ │ ├── 📑 INDEX.md # Documentation navigation (this file)
19
+ │ └── 📁 PROJECT_STRUCTURE.txt # This visual tree structure
20
+
21
+ └── 📁 app/ # Main application package
22
+
23
+ ├── 📄 __init__.py # Package initializer
24
+ ├── 📄 config.py # Configuration (SECRET_KEY, PRODUCTS, paths)
25
+ ├── 📄 database.py # SQLAlchemy engine & session management
26
+ ├── 📄 models.py # Database models (User, PredictionHistory)
27
+ ├── 📄 schemas.py # Pydantic validation schemas
28
+
29
+ ├── 📁 routers/ # API Route Handlers
30
+ │ ├── 📄 __init__.py
31
+ │ ├── 📄 auth.py # Authentication endpoints
32
+ │ │ # - POST /api/auth/register
33
+ │ │ # - POST /api/auth/login
34
+ │ │ # - GET /api/auth/me
35
+ │ │
36
+ │ ├── 📄 prediction.py # Prediction endpoints
37
+ │ │ # - POST /api/predict/single
38
+ │ │ # - POST /api/predict/batch
39
+ │ │ # - GET /api/predict/history
40
+ │ │
41
+ │ └── 📄 dashboard.py # Frontend page routes
42
+ │ # - GET /
43
+ │ # - GET /login
44
+ │ # - GET /register
45
+ │ # - GET /dashboard
46
+
47
+ ├── 📁 services/ # Business Logic Layer
48
+ │ ├── 📄 __init__.py
49
+ │ │
50
+ │ ├── 📄 auth_service.py # Authentication service
51
+ │ │ # - Password hashing (bcrypt)
52
+ │ │ # - JWT token generation
53
+ │ │ # - Token validation
54
+ │ │ # - Get current user
55
+ │ │
56
+ │ ├── 📄 ml_service.py # ML Prediction service
57
+ │ │ # - predict_single() [DUMMY]
58
+ │ │ # - predict_batch() [DUMMY]
59
+ │ │ # - preprocess()
60
+ │ │ # ⚠️ REPLACE WITH YOUR REAL MODEL
61
+ │ │
62
+ │ └── 📄 visualization_service.py # Visualization service
63
+ │ # - generate_wordcloud()
64
+ │ # - calculate_rating_distribution()
65
+ │ # - get_top_words()
66
+
67
+ ├── 📁 templates/ # Jinja2 HTML Templates
68
+ │ ├── 📄 base.html # Base layout template
69
+ │ │ # - TailwindCSS CDN
70
+ │ │ # - Chart.js CDN
71
+ │ │ # - Font Awesome icons
72
+ │ │ # - Header/Footer structure
73
+ │ │
74
+ │ ├── 📄 login.html # Login page
75
+ │ │ # - Login form
76
+ │ │ # - JWT token handling
77
+ │ │ # - Link to register
78
+ │ │
79
+ │ ├── 📄 register.html # Registration page
80
+ │ │ # - Registration form
81
+ │ │ # - Form validation
82
+ │ │ # - Link to login
83
+ │ │
84
+ │ └── 📄 dashboard.html # Main dashboard
85
+ │ # - Product selection dropdown
86
+ │ # - Single/Batch tabs
87
+ │ # - Prediction forms
88
+ │ # - Chart.js visualization
89
+ │ # - WordCloud display
90
+ │ # - Results table
91
+ │ # - CSV download
92
+
93
+ ├── 📁 static/ # Static Files
94
+ │ ├── 📁 css/
95
+ │ │ └── 📄 style.css # Custom CSS (placeholder)
96
+ │ │
97
+ │ ├── 📁 js/
98
+ │ │ └── 📄 main.js # Custom JavaScript (placeholder)
99
+ │ │
100
+ │ └── 📁 uploads/ # User uploads directory
101
+ │ ├── 📄 .gitkeep # Keep directory in git
102
+ │ └── 📁 wordclouds/ # Generated word cloud images
103
+
104
+ └── 📁 database/ # Database Storage
105
+ ├── 📄 .gitkeep # Keep directory in git
106
+ └── 🗄️ rating_prediction.db # SQLite database (created on first run)
107
+ # Tables:
108
+ # - users
109
+ # - prediction_history
110
+ ```
111
+
112
+ ---
113
+
114
+ ## 📊 File Count Summary
115
+
116
+ | Category | Count | Files |
117
+ |----------|-------|-------|
118
+ | **Documentation** | 7 | README, QUICKSTART, PROJECT_SUMMARY, TESTING_GUIDE, ARCHITECTURE, INDEX, PROJECT_STRUCTURE |
119
+ | **Core Python** | 5 | main.py, config.py, database.py, models.py, schemas.py |
120
+ | **Routers** | 3 | auth.py, prediction.py, dashboard.py |
121
+ | **Services** | 3 | auth_service.py, ml_service.py, visualization_service.py |
122
+ | **Templates** | 4 | base.html, login.html, register.html, dashboard.html |
123
+ | **Static** | 2 | style.css, main.js |
124
+ | **Config** | 3 | requirements.txt, .gitignore, .gitkeep files |
125
+ | **Test Data** | 1 | sample_comments.csv |
126
+ | **Total** | **28** | |
127
+
128
+ ---
129
+
130
+ ## 🎯 Key Directories Explained
131
+
132
+ ### `/app/routers/` - API Endpoints
133
+ - **Purpose:** Handle HTTP requests and responses
134
+ - **Pattern:** Each router handles a specific domain (auth, prediction, dashboard)
135
+ - **Uses:** FastAPI decorators (@router.get, @router.post)
136
+
137
+ ### `/app/services/` - Business Logic
138
+ - **Purpose:** Core functionality separated from HTTP layer
139
+ - **Pattern:** Service classes with dependency injection
140
+ - **Uses:** Called by routers, interacts with database and external services
141
+
142
+ ### `/app/templates/` - Frontend Views
143
+ - **Purpose:** HTML templates for user interface
144
+ - **Pattern:** Jinja2 template inheritance (extends base.html)
145
+ - **Uses:** Rendered by FastAPI's Jinja2Templates
146
+
147
+ ### `/app/static/` - Static Assets
148
+ - **Purpose:** CSS, JavaScript, images, uploads
149
+ - **Pattern:** Mounted as static files in FastAPI
150
+ - **URL:** Accessible at `/static/...`
151
+
152
+ ### `/app/database/` - Database Storage
153
+ - **Purpose:** SQLite database file location
154
+ - **Pattern:** Created automatically by SQLAlchemy
155
+ - **Schema:** Users, PredictionHistory tables
156
+
157
+ ---
158
+
159
+ ## 🔗 File Dependencies
160
+
161
+ ### main.py depends on:
162
+ - `app.database` (create tables)
163
+ - `app.routers.*` (include routers)
164
+ - `fastapi`, `uvicorn`
165
+
166
+ ### Routers depend on:
167
+ - `app.database` (get_db)
168
+ - `app.models` (User, PredictionHistory)
169
+ - `app.schemas` (validation)
170
+ - `app.services.*` (business logic)
171
+
172
+ ### Services depend on:
173
+ - `app.config` (settings)
174
+ - `app.models` (database access)
175
+ - External libraries (bcrypt, jose, wordcloud)
176
+
177
+ ### Templates depend on:
178
+ - TailwindCSS (CDN)
179
+ - Chart.js (CDN)
180
+ - Font Awesome (CDN)
181
+ - JavaScript Fetch API
182
+
183
+ ---
184
+
185
+ ## 📝 Important Files to Modify
186
+
187
+ ### To replace ML model:
188
+ ```
189
+ app/services/ml_service.py
190
+ └── Update: __init__(), predict_single(), predict_batch()
191
+ ```
192
+
193
+ ### To add products:
194
+ ```
195
+ app/config.py
196
+ └── Update: PRODUCTS list
197
+ ```
198
+
199
+ ### To change UI styling:
200
+ ```
201
+ app/templates/*.html
202
+ └── Edit: TailwindCSS classes
203
+ ```
204
+
205
+ ### To add API endpoints:
206
+ ```
207
+ app/routers/*.py
208
+ └── Add: New route functions
209
+ ```
210
+
211
+ ### To modify Vietnamese stopwords:
212
+ ```
213
+ app/services/visualization_service.py
214
+ └── Update: self.stopwords set
215
+ ```
216
+
217
+ ---
218
+
219
+ ## 🚀 Execution Flow
220
+
221
+ 1. **Start:** `python main.py`
222
+ 2. **Load:** main.py imports all modules
223
+ 3. **Initialize:** Create database tables
224
+ 4. **Mount:** Static files and templates
225
+ 5. **Include:** All routers (auth, prediction, dashboard)
226
+ 6. **Run:** Uvicorn server on port 8000
227
+ 7. **Ready:** Application accessible at http://localhost:8000
228
+
229
+ ---
230
+
231
+ ## 🔐 Generated Files (Not in Git)
232
+
233
+ These files are created when you run the application:
234
+
235
+ ```
236
+ app/database/rating_prediction.db # SQLite database
237
+ app/static/uploads/wordclouds/*.png # Generated word cloud images
238
+ __pycache__/ # Python bytecode
239
+ *.pyc # Compiled Python files
240
+ ```
241
+
242
+ These are ignored by `.gitignore`
243
+
244
+ ---
245
+
246
+ ## 📦 External Dependencies (from requirements.txt)
247
+
248
+ ```
249
+ fastapi # Web framework
250
+ uvicorn # ASGI server
251
+ sqlalchemy # ORM
252
+ python-jose # JWT
253
+ passlib # Password hashing
254
+ pydantic # Validation
255
+ jinja2 # Templates
256
+ wordcloud # Word clouds
257
+ matplotlib # Plotting
258
+ python-multipart # File uploads
259
+ ```
260
+
261
+ ---
262
+
263
+ ## 🎨 Frontend Stack
264
+
265
+ ```
266
+ HTML
267
+ ├── Jinja2 templates (server-side rendering)
268
+ └── Semantic HTML5
269
+
270
+ CSS
271
+ ├── TailwindCSS 3.x (CDN)
272
+ └── Custom animations (in base.html)
273
+
274
+ JavaScript
275
+ ├── Vanilla JS (no frameworks)
276
+ ├── Fetch API (HTTP requests)
277
+ ├── Chart.js (visualizations)
278
+ └── LocalStorage (JWT tokens)
279
+ ```
280
+
281
+ ---
282
+
283
+ ## 🗄️ Database Schema
284
+
285
+ ```
286
+ users
287
+ ├── id (INTEGER, PRIMARY KEY)
288
+ ├── username (VARCHAR(50), UNIQUE)
289
+ ├── email (VARCHAR(100), UNIQUE)
290
+ ├── hashed_password (VARCHAR(255))
291
+ └── created_at (DATETIME)
292
+
293
+ prediction_history
294
+ ├── id (INTEGER, PRIMARY KEY)
295
+ ├── user_id (INTEGER, FOREIGN KEY → users.id)
296
+ ├── product_name (VARCHAR(200))
297
+ ├── comment (TEXT)
298
+ ├── predicted_rating (INTEGER, 1-5)
299
+ ├── confidence_score (FLOAT)
300
+ ├── prediction_type (VARCHAR(20), 'single' or 'batch')
301
+ └── created_at (DATETIME)
302
+ ```
303
+
304
+ ---
305
+
306
+ ## ✅ Quality Checklist
307
+
308
+ - [x] All files created successfully
309
+ - [x] Project structure is organized and logical
310
+ - [x] Documentation is comprehensive
311
+ - [x] Code has inline comments
312
+ - [x] Separation of concerns implemented
313
+ - [x] RESTful API design followed
314
+ - [x] Security best practices applied
315
+ - [x] UI is responsive and user-friendly
316
+ - [x] Error handling implemented
317
+ - [x] Ready for demonstration
318
+
319
+ ---
320
+
321
+ **Total Lines of Code:** ~2000+ lines
322
+ **Total Documentation:** ~3000+ lines
323
+ **Time to Setup:** < 5 minutes
324
+ **Time to Demo:** 10-15 minutes
325
+
326
+ Your project is complete and production-ready! 🎉
PROJECT_SUMMARY.md ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📋 Project Summary - Vietnamese Product Rating Prediction System
2
+
3
+ ## ✅ What Has Been Built
4
+
5
+ ### 🏗️ Complete Project Structure
6
+ ```
7
+ PredictRating/
8
+ ├── main.py # FastAPI application entry
9
+ ├── requirements.txt # All dependencies
10
+ ├── README.md # Full documentation
11
+ ├── QUICKSTART.md # Quick setup guide
12
+ ├── sample_comments.csv # Test data
13
+ ├── .gitignore # Git ignore rules
14
+
15
+ └── app/
16
+ ├── config.py # Configuration settings
17
+ ├── database.py # Database connection
18
+ ├── models.py # SQLAlchemy models (User, PredictionHistory)
19
+ ├── schemas.py # Pydantic validation schemas
20
+
21
+ ├── routers/ # API endpoints
22
+ │ ├── auth.py # Login/Register endpoints
23
+ │ ├── prediction.py # Single/Batch prediction
24
+ │ └── dashboard.py # Frontend routes
25
+
26
+ ├── services/ # Business logic
27
+ │ ├── auth_service.py # JWT authentication & password hashing
28
+ │ ├── ml_service.py # ML prediction (DUMMY - replace with your model)
29
+ │ └── visualization_service.py # WordCloud & chart data
30
+
31
+ ├── templates/ # Jinja2 HTML templates
32
+ │ ├── base.html # Base layout with TailwindCSS
33
+ │ ├── login.html # Login page
34
+ │ ├── register.html # Registration page
35
+ │ └── dashboard.html # Main prediction interface
36
+
37
+ ├── static/ # Static files
38
+ │ ├── css/
39
+ │ ├── js/
40
+ │ └── uploads/
41
+ │ └── wordclouds/ # Generated word cloud images
42
+
43
+ └── database/ # SQLite database location
44
+ ```
45
+
46
+ ---
47
+
48
+ ## 🎯 Features Implemented
49
+
50
+ ### 1. Authentication System ✅
51
+ - **User Registration** with email validation
52
+ - **JWT-based Login** (secure token authentication)
53
+ - **Password Hashing** using bcrypt
54
+ - **Protected Routes** requiring authentication
55
+
56
+ ### 2. Single Comment Prediction ✅
57
+ - Select target product
58
+ - Input Vietnamese comment
59
+ - Get predicted rating (1-5 stars)
60
+ - Display confidence score
61
+ - Save to prediction history
62
+
63
+ ### 3. Batch CSV Prediction ✅
64
+ - Upload CSV file with comments
65
+ - Bulk prediction processing
66
+ - **Visualizations:**
67
+ - Bar chart showing rating distributionStart command
68
+ - Word cloud of frequent words
69
+ - Results table with all predictions
70
+ - **Export:** Download CSV with predicted ratings
71
+
72
+ ### 4. Data Visualization ✅
73
+ - **Chart.js** for interactive bar charts
74
+ - **WordCloud** library for generating word cloud images
75
+ - Responsive charts that update dynamically
76
+
77
+ ### 5. API Documentation ✅
78
+ - **Swagger UI** at `/docs` (automatic generation)
79
+ - **ReDoc** at `/redoc` (alternative documentation)
80
+ - Interactive API testing interface
81
+ - Complete request/response schemas
82
+
83
+ ### 6. Database Integration ✅
84
+ - **SQLite** database
85
+ - **User table** (username, email, hashed password)
86
+ - **PredictionHistory table** (tracks all predictions)
87
+ - Automatic table creation on startup
88
+
89
+ ### 7. Frontend UI ✅
90
+ - **TailwindCSS** for modern, responsive design
91
+ - **Jinja2** server-side rendering
92
+ - Tab-based interface (Single/Batch)
93
+ - Real-time form validation
94
+ - Loading states and error handling
95
+
96
+ ---
97
+
98
+ ## 🚀 How to Run
99
+
100
+ ### Step 1: Install Dependencies
101
+ ```bash
102
+ pip install -r requirements.txt
103
+ ```
104
+
105
+ ### Step 2: Start Server
106
+ ```bash
107
+ python main.py
108
+ ```
109
+
110
+ ### Step 3: Access Application
111
+ - **Dashboard:** http://localhost:8000/dashboard
112
+ - **Swagger API Docs:** http://localhost:8000/docs ⭐
113
+
114
+ ---
115
+
116
+ ## 📊 API Endpoints
117
+
118
+ ### Authentication
119
+ | Method | Endpoint | Description |
120
+ |--------|----------|-------------|
121
+ | POST | `/api/auth/register` | Register new user |
122
+ | POST | `/api/auth/login` | Login (returns JWT token) |
123
+ | GET | `/api/auth/me` | Get current user info |
124
+
125
+ ### Predictions
126
+ | Method | Endpoint | Description |
127
+ |--------|----------|-------------|
128
+ | POST | `/api/predict/single` | Predict single comment |
129
+ | POST | `/api/predict/batch` | Predict batch from CSV |
130
+ | GET | `/api/predict/history` | Get prediction history |
131
+
132
+ ### Frontend
133
+ | Method | Endpoint | Description |
134
+ |--------|----------|-------------|
135
+ | GET | `/login` | Login page |
136
+ | GET | `/register` | Registration page |
137
+ | GET | `/dashboard` | Main dashboard |
138
+
139
+ ---
140
+
141
+ ## 🔧 Replace Dummy ML Model
142
+
143
+ The file `app/services/ml_service.py` contains a **DUMMY prediction function** that returns random ratings.
144
+
145
+ ### To integrate your real model:
146
+
147
+ 1. **Load your model in `__init__`:**
148
+ ```python
149
+ def __init__(self):
150
+ self.model = load_model('path/to/your/model.h5')
151
+ self.tokenizer = load_tokenizer('path/to/tokenizer.pkl')
152
+ ```
153
+
154
+ 2. **Update `predict_single` method:**
155
+ ```python
156
+ def predict_single(self, text: str) -> Dict[str, any]:
157
+ # Preprocess Vietnamese text
158
+ preprocessed = self.preprocess(text)
159
+
160
+ # Tokenize
161
+ tokens = self.tokenizer.encode(preprocessed)
162
+
163
+ # Predict
164
+ prediction = self.model.predict([tokens])
165
+ rating = int(prediction.argmax()) + 1 # 1-5 scale
166
+ confidence = float(prediction.max())
167
+
168
+ return {
169
+ 'rating': rating,
170
+ 'confidence': confidence
171
+ }
172
+ ```
173
+
174
+ 3. **Implement preprocessing:**
175
+ ```python
176
+ def preprocess(self, text: str) -> str:
177
+ # Your Vietnamese text preprocessing
178
+ text = text.lower()
179
+ text = remove_special_characters(text)
180
+ text = normalize_vietnamese(text)
181
+ return text
182
+ ```
183
+
184
+ ---
185
+
186
+ ## 🎓 Demo for Teacher
187
+
188
+ ### Show Swagger UI (Bonus Points!)
189
+ 1. Open http://localhost:8000/docs
190
+ 2. Demonstrate:
191
+ - All API endpoints organized by tags
192
+ - Request/response schemas
193
+ - "Try it out" functionality
194
+ - Authentication with JWT Bearer token
195
+
196
+ ### User Flow Demo
197
+ 1. **Register** a new account
198
+ 2. **Login** and show JWT token storage
199
+ 3. **Single Prediction:**
200
+ - Select product
201
+ - Enter Vietnamese comment
202
+ - Show predicted rating + confidence
203
+ 4. **Batch Prediction:**
204
+ - Upload `sample_comments.csv`
205
+ - Show bar chart of rating distribution
206
+ - Show word cloud visualization
207
+ - Download CSV with predictions
208
+
209
+ ### Technical Highlights
210
+ - ✅ FastAPI automatic Swagger generation
211
+ - ✅ JWT authentication security
212
+ - ✅ RESTful API design
213
+ - ✅ Separation of concerns (routers, services, models)
214
+ - ✅ Database relationships (User ↔ PredictionHistory)
215
+ - ✅ Responsive frontend with TailwindCSS
216
+ - ✅ Data visualization with Chart.js + WordCloud
217
+
218
+ ---
219
+
220
+ ## 📦 Dependencies Installed
221
+
222
+ ```
223
+ fastapi # Web framework
224
+ uvicorn # ASGI server
225
+ sqlalchemy # ORM for database
226
+ python-jose # JWT tokens
227
+ passlib # Password hashing
228
+ pydantic # Data validation
229
+ jinja2 # Template engine
230
+ wordcloud # Word cloud generation
231
+ matplotlib # Image rendering
232
+ python-multipart # File uploads
233
+ ```
234
+
235
+ ---
236
+
237
+ ## 🎯 What You Need to Do Next
238
+
239
+ 1. **Test the application:**
240
+ - Register an account
241
+ - Try single prediction
242
+ - Upload the `sample_comments.csv` file
243
+ - Test batch prediction
244
+
245
+ 2. **Replace the dummy ML model:**
246
+ - Edit `app/services/ml_service.py`
247
+ - Load your fine-tuned model
248
+ - Implement proper preprocessing
249
+ - Update prediction logic
250
+
251
+ 3. **Customize (optional):**
252
+ - Add more products in `app/config.py`
253
+ - Adjust styling in templates
254
+ - Add more Vietnamese stopwords in visualization service
255
+
256
+ 4. **Prepare for demo:**
257
+ - Practice showing Swagger UI
258
+ - Prepare sample comments in Vietnamese
259
+ - Explain the architecture and tech stack
260
+
261
+ ---
262
+
263
+ ## 📞 Quick Reference
264
+
265
+ | What | Where |
266
+ |------|-------|
267
+ | Start server | `python main.py` |
268
+ | Swagger UI | http://localhost:8000/docs |
269
+ | Dashboard | http://localhost:8000/dashboard |
270
+ | Replace model | `app/services/ml_service.py` |
271
+ | Add products | `app/config.py` → PRODUCTS list |
272
+ | Database file | `app/database/rating_prediction.db` |
273
+ | Uploads folder | `app/static/uploads/` |
274
+ | Test CSV | `sample_comments.csv` |
275
+
276
+ ---
277
+
278
+ ## ✨ Success Criteria Met
279
+
280
+ ✅ FastAPI backend with Swagger UI
281
+ ✅ Jinja2 templates + TailwindCSS
282
+ ✅ SQLite database (Users + History)
283
+ ✅ JWT authentication
284
+ ✅ Single comment prediction
285
+ ✅ Batch CSV prediction
286
+ ✅ Data visualization (charts + word cloud)
287
+ ✅ CSV export with predictions
288
+ ✅ Professional project structure
289
+ ✅ Complete documentation
290
+
291
+ **Your ML prediction web app is ready! 🎉**
292
+
293
+ Good luck with your presentation! 🎓
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn main:app --host 0.0.0.0 --port $PORT
QUICKSTART.md ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Quick Start Guide
2
+
3
+ ## Installation
4
+
5
+ 1. **Install dependencies:**
6
+ ```bash
7
+ pip install -r requirements.txt
8
+ ```
9
+
10
+ 2. **Run the application:**
11
+ ```bash
12
+ python main.py
13
+ ```
14
+
15
+ 3. **Access the application:**
16
+ # Nhớ kích hoạt môi trường trước
17
+ conda activate ./env
18
+ - Dashboard: http://localhost:8000
19
+ - **Swagger API Docs: http://localhost:8000/docs** ⭐ (Show this to your teacher!)
20
+ - ReDoc: http://localhost:8000/redoc
21
+
22
+ ## First Time Usage
23
+
24
+ 1. Go to http://localhost:8000/login
25
+ 2. Click "Register here" and create an account
26
+ 3. Login with your credentials
27
+ 4. You'll be redirected to the dashboard
28
+
29
+ ## Testing Single Prediction
30
+
31
+ 1. Select a product from dropdown
32
+ 2. Click "Single Comment" tab
33
+ 3. Enter a Vietnamese comment like: "Sản phẩm rất tốt, chất lượng cao, đóng gói cẩn thận"
34
+ 4. Click "Predict Rating"
35
+ 5. See the result with rating and confidence
36
+
37
+ ## Testing Batch Prediction (CSV)
38
+
39
+ 1. Create a CSV file with this format:
40
+ ```csv
41
+ Comment
42
+ "Sản phẩm rất tốt, đóng gói cẩn thận"
43
+ "Chất lượng kém, không như mô tả"
44
+ "Giao hàng nhanh, sản phẩm ổn"
45
+ "Rất hài lòng với sản phẩm này"
46
+ "Giá hơi cao nhưng chất lượng tốt"
47
+ ```
48
+
49
+ 2. Select a product
50
+ 3. Click "Upload CSV" tab
51
+ 4. Upload your CSV file
52
+ 5. Click "Predict Batch"
53
+ 6. View:
54
+ - Bar chart showing rating distribution
55
+ - Word cloud of common words
56
+ - Full results table
57
+ - Download CSV with predictions
58
+
59
+ ## Swagger UI Demo (For Teacher)
60
+
61
+ 1. Open http://localhost:8000/docs
62
+ 2. Show the endpoints:
63
+ - Authentication (register, login)
64
+ - Predictions (single, batch)
65
+ - History
66
+ 3. Click "Try it out" to test any endpoint
67
+ 4. Show the automatic request/response documentation
68
+
69
+ ## Replace Dummy ML Model
70
+
71
+ Edit `app/services/ml_service.py`:
72
+
73
+ ```python
74
+ def __init__(self):
75
+ # Load your real model here
76
+ self.model = load_model('path/to/your/model')
77
+ self.tokenizer = load_tokenizer('path/to/tokenizer')
78
+
79
+ def predict_single(self, text: str) -> Dict[str, any]:
80
+ # Your preprocessing
81
+ preprocessed = self.preprocess(text)
82
+
83
+ # Your prediction
84
+ prediction = self.model.predict(preprocessed)
85
+ rating = int(prediction) # Convert to 1-5
86
+
87
+ return {
88
+ 'rating': rating,
89
+ 'confidence': float(prediction_confidence)
90
+ }
91
+ ```
92
+
93
+ ## Troubleshooting
94
+
95
+ **"Module not found":**
96
+ ```bash
97
+ pip install -r requirements.txt
98
+ ```
99
+
100
+ **"Port already in use":**
101
+ Edit `main.py` and change port 8000 to another number.
102
+
103
+ **"Database locked":**
104
+ Close any other instances of the app and restart.
105
+
106
+ ## Project Highlights for Presentation
107
+
108
+ ✅ **FastAPI with automatic Swagger UI** (bonus points!)
109
+ ✅ **JWT Authentication** (secure login)
110
+ ✅ **RESTful API design** (professional structure)
111
+ ✅ **Data Visualization** (charts + word clouds)
112
+ ✅ **Batch Processing** (CSV upload/download)
113
+ ✅ **Responsive UI** (TailwindCSS)
114
+ ✅ **Database Integration** (SQLite with history tracking)
115
+
116
+ Good luck! 🎓
README.md CHANGED
@@ -1,10 +1,253 @@
1
- ---
2
- title: Predict Rating
3
- emoji: 🏆
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Vietnamese Product Rating Prediction System
2
+
3
+ ## 🎯 Project Overview
4
+ A full-stack web application that predicts sentiment ratings (1-5 stars) for Vietnamese product reviews using Machine Learning.
5
+
6
+ **Built for:** Introduction to Machine Learning - University Project
7
+ **Tech Stack:** FastAPI + Jinja2 + TailwindCSS + SQLite + Chart.js
8
+
9
+ ---
10
+
11
+ ## 📁 Project Structure
12
+
13
+ ```
14
+ PredictRating/
15
+ ├── app/
16
+ │ ├── database/ # SQLite database storage
17
+ │ ├── routers/ # API route handlers
18
+ │ │ ├── auth.py # Authentication endpoints
19
+ │ │ ├── prediction.py # Prediction endpoints
20
+ │ │ └── dashboard.py # Frontend routes
21
+ │ ├── services/ # Business logic
22
+ │ │ ├── auth_service.py # JWT & password handling
23
+ │ │ ├── ml_service.py # ML prediction (DUMMY - replace with your model)
24
+ │ │ └── visualization_service.py # WordCloud & charts
25
+ │ ├── static/ # Static files (CSS, JS, uploads)
26
+ │ │ └── uploads/
27
+ │ │ └── wordclouds/ # Generated word cloud images
28
+ │ ├── templates/ # Jinja2 HTML templates
29
+ │ │ ├── base.html # Base layout
30
+ │ │ ├── login.html # Login page
31
+ │ │ ├── register.html # Registration page
32
+ │ │ └── dashboard.html # Main prediction interface
33
+ │ ├── config.py # Configuration settings
34
+ │ ├── database.py # Database connection
35
+ │ ├── models.py # SQLAlchemy models (User, PredictionHistory)
36
+ │ └── schemas.py # Pydantic validation schemas
37
+ ├── main.py # FastAPI application entry point
38
+ └── requirements.txt # Python dependencies
39
+ ```
40
+
41
+ ---
42
+
43
+ ## 🚀 Setup Instructions
44
+
45
+ ### 1. Install Dependencies
46
+
47
+ ```bash
48
+ pip install -r requirements.txt
49
+ ```
50
+
51
+ ### 2. Run the Application
52
+
53
+ ```bash
54
+ python main.py
55
+ ```
56
+
57
+ The server will start at: **http://localhost:8000**
58
+
59
+ ### 3. Access the Application
60
+
61
+ - **Frontend Dashboard:** http://localhost:8000/dashboard
62
+ - **API Documentation (Swagger UI):** http://localhost:8000/docs ⭐ **SHOW THIS TO YOUR TEACHER**
63
+ - **Alternative API Docs (ReDoc):** http://localhost:8000/redoc
64
+
65
+ ---
66
+
67
+ ## 📚 API Documentation (Swagger UI)
68
+
69
+ FastAPI automatically generates **interactive API documentation** at `/docs`.
70
+
71
+ ### How to Access:
72
+ 1. Run the application
73
+ 2. Open browser: **http://localhost:8000/docs**
74
+ 3. You'll see all API endpoints with:
75
+ - Request/response schemas
76
+ - Try it out functionality
77
+ - Authentication support
78
+
79
+ ### Key API Endpoints:
80
+
81
+ #### Authentication
82
+ - `POST /api/auth/register` - Register new user
83
+ - `POST /api/auth/login` - Login (get JWT token)
84
+ - `GET /api/auth/me` - Get current user info
85
+
86
+ #### Predictions
87
+ - `POST /api/predict/single` - Predict single comment
88
+ - `POST /api/predict/batch` - Predict batch from CSV
89
+ - `GET /api/predict/history` - Get prediction history
90
+
91
+ ---
92
+
93
+ ## 🎓 How to Use (User Journey)
94
+
95
+ ### Step 1: Register/Login
96
+ 1. Go to http://localhost:8000/login
97
+ 2. Register a new account or login
98
+ 3. You'll be redirected to the dashboard
99
+
100
+ ### Step 2: Select Product
101
+ - Choose a target product from the dropdown list
102
+
103
+ ### Step 3A: Single Comment Prediction
104
+ 1. Click "Single Comment" tab
105
+ 2. Enter a Vietnamese product review
106
+ 3. Click "Predict Rating"
107
+ 4. See the predicted rating (1-5 stars) with confidence score
108
+
109
+ ### Step 3B: Batch CSV Prediction
110
+ 1. Click "Upload CSV" tab
111
+ 2. Upload a CSV file with a `Comment` column
112
+ 3. Click "Predict Batch"
113
+ 4. View results:
114
+ - **Bar Chart:** Rating distribution (how many 1⭐, 2⭐, etc.)
115
+ - **Word Cloud:** Most frequent words in comments
116
+ - **Table:** All predictions with confidence scores
117
+ - **Download:** Export results as CSV with `Predicted_Rating` column
118
+
119
+ ---
120
+
121
+ ## 🔧 Replace Dummy ML Model
122
+
123
+ The current `ml_service.py` uses a **DUMMY** prediction function. Replace it with your real model:
124
+
125
+ ### File: `app/services/ml_service.py`
126
+
127
+ ```python
128
+ class MLPredictionService:
129
+ def __init__(self):
130
+ # TODO: Load your trained model
131
+ self.model = load_model('path/to/your/model.h5') # Example
132
+ self.tokenizer = load_tokenizer('path/to/tokenizer.pkl')
133
+
134
+ def predict_single(self, text: str) -> Dict[str, any]:
135
+ # TODO: Implement your preprocessing
136
+ preprocessed = self.preprocess(text)
137
+
138
+ # TODO: Make prediction with your model
139
+ prediction = self.model.predict(preprocessed)
140
+ rating = self.postprocess(prediction) # Convert to 1-5
141
+
142
+ return {
143
+ 'rating': rating,
144
+ 'confidence': prediction.max()
145
+ }
146
+ ```
147
+
148
+ ---
149
+
150
+ ## 📊 Database Schema
151
+
152
+ ### Users Table
153
+ - `id`: Primary key
154
+ - `username`: Unique username
155
+ - `email`: Unique email
156
+ - `hashed_password`: Bcrypt hashed password
157
+ - `created_at`: Registration timestamp
158
+
159
+ ### Prediction History Table
160
+ - `id`: Primary key
161
+ - `user_id`: Foreign key to Users
162
+ - `product_name`: Product name
163
+ - `comment`: Original comment
164
+ - `predicted_rating`: Predicted rating (1-5)
165
+ - `confidence_score`: Confidence (0-1)
166
+ - `prediction_type`: 'single' or 'batch'
167
+ - `created_at`: Prediction timestamp
168
+
169
+ ---
170
+
171
+ ## 🎨 Features
172
+
173
+ ✅ **Authentication:** JWT-based secure login/registration
174
+ ✅ **Single Prediction:** Predict one comment at a time
175
+ ✅ **Batch Prediction:** Upload CSV and predict multiple comments
176
+ ✅ **Visualization:**
177
+ - Bar chart for rating distribution
178
+ - Word cloud for frequent words
179
+ ✅ **History Tracking:** All predictions saved to database
180
+ ✅ **CSV Export:** Download results with predicted ratings
181
+ ✅ **Responsive UI:** TailwindCSS mobile-friendly design
182
+ ✅ **API Documentation:** Auto-generated Swagger UI
183
+
184
+ ---
185
+
186
+ ## 🏆 Bonus Points for Teacher Demo
187
+
188
+ 1. **Show Swagger UI** at `/docs` - Automatic API documentation ⭐
189
+ 2. **Demonstrate:**
190
+ - User registration/login flow
191
+ - Single comment prediction
192
+ - CSV batch upload with visualizations
193
+ - Download CSV results
194
+ 3. **Explain:**
195
+ - Clean separation of concerns (routers, services, models)
196
+ - RESTful API design
197
+ - JWT authentication
198
+ - Database relationships
199
+
200
+ ---
201
+
202
+ ## 📝 CSV File Format
203
+
204
+ Your CSV file should have at least a `Comment` column:
205
+
206
+ ```csv
207
+ Comment
208
+ "Sản phẩm rất tốt, đóng gói cẩn thận"
209
+ "Chất lượng kém, không như mô tả"
210
+ "Giao hàng nhanh, sản phẩm ổn"
211
+ ```
212
+
213
+ After prediction, you'll get:
214
+
215
+ ```csv
216
+ Comment,Predicted_Rating,Confidence
217
+ "Sản phẩm rất tốt, đóng gói cẩn thận",5,0.95
218
+ "Chất lượng kém, không như mô tả",1,0.88
219
+ "Giao hàng nhanh, sản phẩm ổn",4,0.92
220
+ ```
221
+
222
+ ---
223
+
224
+ ## 🔐 Security Notes
225
+
226
+ - Change `SECRET_KEY` in `app/config.py` before deployment
227
+ - Passwords are hashed using bcrypt
228
+ - JWT tokens expire after 24 hours
229
+ - CORS is enabled for development (configure for production)
230
+
231
+ ---
232
+
233
+ ## 🐛 Troubleshooting
234
+
235
+ ### Issue: "Import errors" when running
236
+ **Solution:** Make sure all dependencies are installed:
237
+ ```bash
238
+ pip install -r requirements.txt
239
+ ```
240
+
241
+ ### Issue: "Database errors"
242
+ **Solution:** Delete `app/database/rating_prediction.db` and restart the app to recreate tables
243
+
244
+ ### Issue: "Word cloud doesn't display"
245
+ **Solution:** Check that `app/static/uploads/wordclouds/` directory exists
246
+
247
+ ---
248
+
249
+ ## 📧 Support
250
+
251
+ For questions about the project structure or implementation, refer to the code comments or consult your instructor.
252
+
253
+ **Good luck with your project presentation! 🎓**
README_HF_SPACE.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Product Rating Prediction System
3
+ emoji: ⭐
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # ⭐ Product Rating Prediction System
12
+
13
+ A production-ready AI-powered system for predicting product ratings from Vietnamese customer comments using PhoBERT.
14
+
15
+ ## 🎯 Features
16
+
17
+ - 🤖 **Deep Learning Model**: PhoBERT-based sentiment analysis
18
+ - 💬 **Single & Batch Predictions**: Process one comment or thousands via CSV
19
+ - 📊 **Visual Analytics**: Word clouds and rating distribution charts
20
+ - 🔐 **Secure Authentication**: JWT-based user management
21
+ - 🌐 **Full-Stack Web App**: FastAPI backend + Jinja2 frontend
22
+ - 🗄️ **External Database**: PostgreSQL support for scalability
23
+
24
+ ## 🚀 Quick Start
25
+
26
+ ### For Users
27
+ 1. Click the link above to access the live application
28
+ 2. Register a new account
29
+ 3. Upload a CSV file with comments or enter a single comment
30
+ 4. View predictions, visualizations, and download results
31
+
32
+ ### For Developers
33
+ This Space requires environment variables to connect to an external PostgreSQL database. See [HUGGING_FACE_DEPLOYMENT.md](HUGGING_FACE_DEPLOYMENT.md) for setup instructions.
34
+
35
+ ## 📚 API Documentation
36
+
37
+ Once the app is running, access:
38
+ - **Swagger UI**: `/docs`
39
+ - **ReDoc**: `/redoc`
40
+
41
+ ## 🔧 Technology Stack
42
+
43
+ - **Backend**: FastAPI, SQLAlchemy, Uvicorn
44
+ - **ML/NLP**: PyTorch, Transformers, PhoBERT
45
+ - **Frontend**: Jinja2, TailwindCSS, Chart.js
46
+ - **Database**: PostgreSQL (external)
47
+ - **Security**: JWT, bcrypt
48
+
49
+ ## 📖 Documentation
50
+
51
+ - [Deployment Guide](HUGGING_FACE_DEPLOYMENT.md)
52
+ - [Environment Variables](HF_ENV_VARIABLES.md)
53
+ - [Architecture](ARCHITECTURE.md)
54
+
55
+ ## 🐳 Docker
56
+
57
+ This Space uses the Docker SDK to support heavy ML models (>500MB). The container runs on port 7860 as required by Hugging Face Spaces.
58
+
59
+ ## 🔒 Privacy & Security
60
+
61
+ - All passwords are hashed with bcrypt
62
+ - JWT tokens for secure authentication
63
+ - External PostgreSQL database with SSL
64
+ - No data stored in the container (stateless)
65
+
66
+ ## 📊 Model Information
67
+
68
+ - **Base Model**: PhoBERT (Vietnamese BERT)
69
+ - **Task**: Sentiment Analysis → Rating Prediction (1-5 stars)
70
+ - **Language**: Vietnamese
71
+ - **Model Size**: ~500MB
72
+
73
+ ## 🆘 Support
74
+
75
+ For issues or questions:
76
+ 1. Check the logs tab above
77
+ 2. Review [HUGGING_FACE_DEPLOYMENT.md](HUGGING_FACE_DEPLOYMENT.md)
78
+ 3. Open an issue in the repository
79
+
80
+ ## 📄 License
81
+
82
+ MIT License - See LICENSE file for details
83
+
84
+ ---
85
+
86
+ **Built with ❤️ using FastAPI, PhoBERT, and Hugging Face Spaces**
RENDER_QUICKSTART.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 QUICK DEPLOYMENT GUIDE
2
+
3
+ ## ✅ Files Changed (Production-Ready)
4
+
5
+ 1. ✅ **requirements.txt** - Added `psycopg2-binary`, `gunicorn`
6
+ 2. ✅ **app/database.py** - Hybrid SQLite/PostgreSQL support with Render URL fix
7
+ 3. ✅ **app/config.py** - Environment variable support for `SECRET_KEY`
8
+ 4. ✅ **main.py** - Auto-migration, production settings
9
+
10
+ ## 📋 Render Configuration
11
+
12
+ ### Web Service Settings
13
+
14
+ ```
15
+ Name: vietnamese-rating-prediction
16
+ Runtime: Python 3
17
+ Build Command: pip install -r requirements.txt
18
+ Start Command: gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT
19
+ ```
20
+
21
+ ### Environment Variables (Required)
22
+
23
+ ```
24
+ SECRET_KEY = <generate-with-openssl-rand-hex-32>
25
+ PYTHON_VERSION = 3.11.0
26
+ ```
27
+
28
+ ### PostgreSQL Database
29
+
30
+ ```
31
+ Name: vietnamese-rating-db
32
+ PostgreSQL Version: 15
33
+ Instance Type: Free
34
+ ```
35
+
36
+ **Link database to web service** - `DATABASE_URL` will be auto-populated.
37
+
38
+ ---
39
+
40
+ ## 🎯 Deployment Steps
41
+
42
+ ### 1. Push to GitHub
43
+ ```bash
44
+ git add .
45
+ git commit -m "Deploy to Render"
46
+ git push origin master
47
+ ```
48
+
49
+ ### 2. Create Render Web Service
50
+ - Go to https://dashboard.render.com/
51
+ - New → Web Service
52
+ - Connect GitHub repo
53
+ - Use settings above
54
+
55
+ ### 3. Create PostgreSQL Database
56
+ - New → PostgreSQL
57
+ - Use free tier
58
+ - Link to web service
59
+
60
+ ### 4. Deploy
61
+ - Click "Manual Deploy"
62
+ - Watch logs for success
63
+
64
+ ### 5. Test
65
+ ```
66
+ https://your-app.onrender.com/health
67
+ https://your-app.onrender.com/docs
68
+ https://your-app.onrender.com/dashboard
69
+ ```
70
+
71
+ ---
72
+
73
+ ## 🔧 Local Testing Before Deploy
74
+
75
+ Test hybrid database locally:
76
+
77
+ ```bash
78
+ # Test with SQLite (no DATABASE_URL)
79
+ python main.py
80
+
81
+ # Test with PostgreSQL (set DATABASE_URL)
82
+ export DATABASE_URL=postgresql://user:pass@localhost/dbname
83
+ python main.py
84
+ ```
85
+
86
+ Expected output:
87
+ ```
88
+ 🔧 Development Mode: Using SQLite
89
+ # OR
90
+ 🚀 Production Mode: Using PostgreSQL
91
+ 🔄 Creating database tables...
92
+ ✅ Database tables created successfully!
93
+ ```
94
+
95
+ ---
96
+
97
+ ## ⚠️ Important Notes
98
+
99
+ 1. **Render Free Tier Limitations:**
100
+ - App sleeps after 15 minutes of inactivity (first request takes 30-60s)
101
+ - 512MB RAM (may need optimization for ML model)
102
+ - 1GB PostgreSQL storage
103
+
104
+ 2. **ML Model Optimization:**
105
+ - Consider lazy loading (load on first request)
106
+ - Use CPU-optimized PyTorch
107
+ - Cache predictions if possible
108
+
109
+ 3. **Static Files:**
110
+ - Uploads are ephemeral on Render Free Tier
111
+ - WordClouds will be deleted on container restart
112
+ - Use cloud storage (S3, Cloudinary) for production
113
+
114
+ 4. **Database:**
115
+ - SQLite NOT recommended for production (file locking issues)
116
+ - PostgreSQL required for concurrent requests
117
+ - Free tier: 1GB storage, 97 connections
118
+
119
+ ---
120
+
121
+ ## 🆘 Common Issues
122
+
123
+ ### "Module not found"
124
+ → Run `pip install -r requirements.txt` locally first
125
+
126
+ ### "Port binding error"
127
+ → Use `$PORT` in start command (auto-set by Render)
128
+
129
+ ### "Database connection failed"
130
+ → Check `DATABASE_URL` in environment variables
131
+
132
+ ### "Model loading timeout"
133
+ → Free tier has 512MB RAM limit, optimize model or upgrade
134
+
135
+ ---
136
+
137
+ **Read DEPLOYMENT.md for detailed guide!**
TESTING_GUIDE.md ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧪 Testing Guide - Step by Step
2
+
3
+ ## Pre-requisites
4
+ ```bash
5
+ # Make sure dependencies are installed
6
+ pip install -r requirements.txt
7
+
8
+ # Start the server
9
+ python main.py
10
+ ```
11
+
12
+ Server should start at: **http://localhost:8000**
13
+
14
+ ---
15
+
16
+ ## ✅ Test 1: Access Swagger UI (API Documentation)
17
+
18
+ ### Steps:
19
+ 1. Open browser: **http://localhost:8000/docs**
20
+ 2. You should see:
21
+ - "Vietnamese Product Rating Prediction API" title
22
+ - Three sections: Authentication, Prediction, Dashboard
23
+ - All endpoints listed with descriptions
24
+
25
+ ### What to show teacher:
26
+ - This is **automatic API documentation** (bonus points!)
27
+ - Click any endpoint to see request/response schemas
28
+ - Click "Try it out" to test endpoints interactively
29
+
30
+ **Status:** ✅ PASS / ❌ FAIL
31
+
32
+ ---
33
+
34
+ ## ✅ Test 2: User Registration
35
+
36
+ ### Steps:
37
+ 1. Go to: **http://localhost:8000/register**
38
+ 2. Fill in:
39
+ - Username: `testuser1`
40
+ - Email: `test@example.com`
41
+ - Password: `password123`
42
+ 3. Click "Register"
43
+ 4. Should redirect to login page
44
+
45
+ ### Expected Result:
46
+ - Green success message appears
47
+ - Redirects to `/login` after 1.5 seconds
48
+
49
+ **Status:** ✅ PASS / ❌ FAIL
50
+
51
+ ---
52
+
53
+ ## ✅ Test 3: User Login
54
+
55
+ ### Steps:
56
+ 1. Go to: **http://localhost:8000/login**
57
+ 2. Enter:
58
+ - Username: `testuser1`
59
+ - Password: `password123`
60
+ 3. Click "Login"
61
+
62
+ ### Expected Result:
63
+ - Green "Login successful!" message
64
+ - Redirects to `/dashboard`
65
+ - You see username in top-right corner
66
+
67
+ **Status:** ✅ PASS / ❌ FAIL
68
+
69
+ ---
70
+
71
+ ## ✅ Test 4: Single Comment Prediction
72
+
73
+ ### Steps:
74
+ 1. On dashboard, select a product from dropdown (e.g., "Điện thoại iPhone 15 Pro Max")
75
+ 2. Make sure "Single Comment" tab is active
76
+ 3. Enter Vietnamese comment:
77
+ ```
78
+ Sản phẩm rất tốt, chất lượng cao, đóng gói cẩn thận. Rất hài lòng!
79
+ ```
80
+ 4. Click "Predict Rating"
81
+
82
+ ### Expected Result:
83
+ - Green result box appears below
84
+ - Shows predicted rating (1-5)
85
+ - Shows confidence percentage
86
+ - Shows star rating (⭐⭐⭐⭐⭐)
87
+
88
+ **Status:** ✅ PASS / ❌ FAIL
89
+
90
+ ---
91
+
92
+ ## ✅ Test 5: Batch CSV Prediction
93
+
94
+ ### Steps:
95
+ 1. Select a product from dropdown
96
+ 2. Click "Upload CSV" tab
97
+ 3. Click "Choose File" and select `sample_comments.csv`
98
+ 4. File name should display: "Selected: sample_comments.csv"
99
+ 5. Click "Predict Batch"
100
+
101
+ ### Expected Result:
102
+ - Results section appears with 3 components:
103
+
104
+ **A) Bar Chart:**
105
+ - Shows distribution of ratings (1⭐ to 5⭐)
106
+ - Colored bars (red for 1-star, green for 5-star)
107
+
108
+ **B) Word Cloud:**
109
+ - Image showing frequent Vietnamese words
110
+ - Larger words appear more frequently in comments
111
+
112
+ **C) Results Table:**
113
+ - Shows all comments with predicted ratings
114
+ - Each row has: Comment | Rating | Confidence
115
+
116
+ **Status:** ✅ PASS / ❌ FAIL
117
+
118
+ ---
119
+
120
+ ## ✅ Test 6: Download CSV Results
121
+
122
+ ### Steps:
123
+ 1. After batch prediction (Test 5), scroll to results table
124
+ 2. Click "Download CSV" button (green button, top-right of table)
125
+
126
+ ### Expected Result:
127
+ - CSV file downloads automatically
128
+ - Filename format: `predictions_[timestamp].csv`
129
+ - File contains columns: `Comment`, `Predicted_Rating`, `Confidence`
130
+
131
+ ### Verify downloaded file:
132
+ - Open in Excel/Notepad
133
+ - Should have all 20 comments from `sample_comments.csv`
134
+ - Each has a predicted rating and confidence score
135
+
136
+ **Status:** ✅ PASS / ❌ FAIL
137
+
138
+ ---
139
+
140
+ ## ✅ Test 7: Test Swagger UI Endpoints
141
+
142
+ ### Steps:
143
+ 1. Go to: **http://localhost:8000/docs**
144
+ 2. Find "POST /api/auth/login" endpoint
145
+ 3. Click "Try it out"
146
+ 4. Enter:
147
+ ```json
148
+ username: testuser1
149
+ password: password123
150
+ ```
151
+ 5. Click "Execute"
152
+
153
+ ### Expected Result:
154
+ - Response Code: 200
155
+ - Response body contains:
156
+ ```json
157
+ {
158
+ "access_token": "eyJ0eXAiOiJKV1Q...",
159
+ "token_type": "bearer"
160
+ }
161
+ ```
162
+
163
+ ### Test authenticated endpoint:
164
+ 1. Copy the `access_token` value
165
+ 2. Click "Authorize" button (top-right, with lock icon)
166
+ 3. Paste token in "Value" field: `Bearer YOUR_TOKEN_HERE`
167
+ 4. Click "Authorize" then "Close"
168
+ 5. Try "GET /api/auth/me" endpoint
169
+ 6. Click "Try it out" → "Execute"
170
+
171
+ ### Expected Result:
172
+ - Response Code: 200
173
+ - Shows your user info (username, email, etc.)
174
+
175
+ **Status:** ✅ PASS / ❌ FAIL
176
+
177
+ ---
178
+
179
+ ## ✅ Test 8: Logout
180
+
181
+ ### Steps:
182
+ 1. On dashboard, click "Logout" button (top-right, red button)
183
+
184
+ ### Expected Result:
185
+ - Redirects to `/login` page
186
+ - Token is cleared from browser storage
187
+
188
+ **Status:** ✅ PASS / ❌ FAIL
189
+
190
+ ---
191
+
192
+ ## ✅ Test 9: Protected Route (Authentication Check)
193
+
194
+ ### Steps:
195
+ 1. After logout, try to access: **http://localhost:8000/dashboard**
196
+ 2. Open browser console (F12)
197
+
198
+ ### Expected Result:
199
+ - JavaScript checks for token
200
+ - Redirects back to `/login` because no token exists
201
+
202
+ **Status:** ✅ PASS / ❌ FAIL
203
+
204
+ ---
205
+
206
+ ## ✅ Test 10: Database Persistence
207
+
208
+ ### Steps:
209
+ 1. Stop the server (Ctrl+C)
210
+ 2. Start it again: `python main.py`
211
+ 3. Go to login page
212
+ 4. Login with previous credentials (`testuser1` / `password123`)
213
+
214
+ ### Expected Result:
215
+ - Login works (user data persisted in database)
216
+ - Dashboard loads successfully
217
+
218
+ **Status:** ✅ PASS / ❌ FAIL
219
+
220
+ ---
221
+
222
+ ## 🐛 Troubleshooting
223
+
224
+ ### Error: "Module not found"
225
+ ```bash
226
+ pip install -r requirements.txt
227
+ ```
228
+
229
+ ### Error: "Port 8000 already in use"
230
+ - Kill other process using port 8000
231
+ - Or change port in `main.py`: `uvicorn.run(..., port=8001)`
232
+
233
+ ### Error: "Database is locked"
234
+ - Close all instances of the application
235
+ - Delete `app/database/rating_prediction.db`
236
+ - Restart application (will create new database)
237
+
238
+ ### Word cloud doesn't show
239
+ - Check folder exists: `app/static/uploads/wordclouds/`
240
+ - Check server console for errors
241
+
242
+ ### CSV upload fails
243
+ - Ensure CSV has "Comment" column (case-sensitive)
244
+ - Check CSV is UTF-8 encoded
245
+ - Make sure comments are not empty
246
+
247
+ ---
248
+
249
+ ## 📊 Test Results Summary
250
+
251
+ | Test | Description | Status |
252
+ |------|-------------|--------|
253
+ | 1 | Swagger UI Access | ⬜ |
254
+ | 2 | User Registration | ⬜ |
255
+ | 3 | User Login | ⬜ |
256
+ | 4 | Single Prediction | ⬜ |
257
+ | 5 | Batch CSV Prediction | ⬜ |
258
+ | 6 | CSV Download | ⬜ |
259
+ | 7 | Swagger API Testing | ⬜ |
260
+ | 8 | Logout | ⬜ |
261
+ | 9 | Auth Protection | ⬜ |
262
+ | 10 | Database Persistence | ⬜ |
263
+
264
+ Fill in: ✅ PASS / ❌ FAIL / ⚠️ PARTIAL
265
+
266
+ ---
267
+
268
+ ## 🎯 Demo Checklist for Teacher
269
+
270
+ Before presenting, make sure:
271
+
272
+ - [ ] Server is running (`python main.py`)
273
+ - [ ] You can access Swagger UI (http://localhost:8000/docs)
274
+ - [ ] You have a test account ready
275
+ - [ ] `sample_comments.csv` is available
276
+ - [ ] You understand the architecture (routers, services, models)
277
+ - [ ] You can explain how to replace dummy ML model
278
+
279
+ ### Demo Flow:
280
+ 1. **Show Swagger UI** - explain automatic generation (bonus!)
281
+ 2. **Register → Login** - show JWT authentication
282
+ 3. **Single prediction** - demonstrate UI
283
+ 4. **Batch CSV** - show visualizations (chart + word cloud)
284
+ 5. **Download CSV** - export results
285
+ 6. **Explain architecture** - separation of concerns
286
+
287
+ Good luck! 🎓
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # App package
app/config.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration Settings
3
+ Supports environment variables for production deployment
4
+ """
5
+ import os
6
+ from pathlib import Path
7
+
8
+ # Base directory
9
+ BASE_DIR = Path(__file__).resolve().parent.parent
10
+
11
+ # ============================================
12
+ # SECURITY (Environment-aware)
13
+ # ============================================
14
+ # In production (Render), set SECRET_KEY as environment variable
15
+ # Fallback to default for local development
16
+ SECRET_KEY = os.getenv(
17
+ "SECRET_KEY",
18
+ "your-secret-key-change-in-production-2024-dev-only"
19
+ )
20
+
21
+ ALGORITHM = "HS256"
22
+ ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 # 24 hours
23
+
24
+ # ============================================
25
+ # UPLOAD DIRECTORIES
26
+ # ============================================
27
+ # For production on Render, these will be in ephemeral storage
28
+ # Consider using cloud storage (S3, Cloudinary) for persistent files
29
+ UPLOAD_DIR = BASE_DIR / "app" / "static" / "uploads"
30
+ WORDCLOUD_DIR = UPLOAD_DIR / "wordclouds"
31
+
32
+ # Create directories if they don't exist
33
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
34
+ WORDCLOUD_DIR.mkdir(parents=True, exist_ok=True)
35
+
36
+ # ============================================
37
+ # PRODUCTION SETTINGS
38
+ # ============================================
39
+ # Detect if running on Render (or any production environment)
40
+ IS_PRODUCTION = os.getenv("RENDER") is not None or os.getenv("DATABASE_URL") is not None
41
+
42
+ if IS_PRODUCTION:
43
+ print("🚀 Running in PRODUCTION mode")
44
+ else:
45
+ print("🔧 Running in DEVELOPMENT mode")
46
+
app/database.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Configuration and Session Management
3
+ Supports BOTH SQLite (local) and PostgreSQL (production on Render)
4
+ """
5
+ import os
6
+ from sqlalchemy import create_engine
7
+ from sqlalchemy.ext.declarative import declarative_base
8
+ from sqlalchemy.orm import sessionmaker
9
+ from pathlib import Path
10
+
11
+ # ============================================
12
+ # HYBRID DATABASE SUPPORT
13
+ # ============================================
14
+ # Priority:
15
+ # 1. Use DATABASE_URL from environment (Render PostgreSQL)
16
+ # 2. Fallback to SQLite for local development
17
+
18
+ DATABASE_URL = os.getenv("DATABASE_URL")
19
+
20
+ if DATABASE_URL:
21
+ # CRITICAL FIX FOR RENDER:
22
+ # Render provides URLs starting with 'postgres://'
23
+ # but SQLAlchemy 1.4+ requires 'postgresql://'
24
+ if DATABASE_URL.startswith("postgres://"):
25
+ DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)
26
+
27
+ print(f"🚀 Production Mode: Using PostgreSQL")
28
+
29
+ # PostgreSQL: No need for check_same_thread
30
+ engine = create_engine(
31
+ DATABASE_URL,
32
+ pool_pre_ping=True, # Verify connections before using
33
+ pool_recycle=300, # Recycle connections every 5 minutes
34
+ )
35
+ else:
36
+ # Local development: Use SQLite
37
+ print(f"🔧 Development Mode: Using SQLite")
38
+
39
+ # Create database directory
40
+ db_dir = Path("app/database")
41
+ db_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ DATABASE_URL = "sqlite:///./app/database/rating_prediction.db"
44
+
45
+ # SQLite: Needs check_same_thread=False for FastAPI
46
+ engine = create_engine(
47
+ DATABASE_URL,
48
+ connect_args={"check_same_thread": False}
49
+ )
50
+
51
+ # Create session factory
52
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
53
+
54
+ # Base class for all models
55
+ Base = declarative_base()
56
+
57
+ def get_db():
58
+ """
59
+ Dependency to get database session
60
+ Used in FastAPI route dependencies
61
+ """
62
+ db = SessionLocal()
63
+ try:
64
+ yield db
65
+ finally:
66
+ db.close()
app/database/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+ # Database directory
app/models.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLAlchemy Database Models
3
+ """
4
+ from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Float
5
+ from sqlalchemy.orm import relationship
6
+ from datetime import datetime
7
+ from app.database import Base
8
+
9
+ class User(Base):
10
+ """User model for authentication"""
11
+ __tablename__ = "users"
12
+
13
+ id = Column(Integer, primary_key=True, index=True)
14
+ username = Column(String(50), unique=True, index=True, nullable=False)
15
+ email = Column(String(100), unique=True, index=True, nullable=False)
16
+ hashed_password = Column(String(255), nullable=False)
17
+ created_at = Column(DateTime, default=datetime.utcnow)
18
+
19
+ # Relationship
20
+ predictions = relationship("PredictionHistory", back_populates="user")
21
+
22
+ def __repr__(self):
23
+ return f"<User {self.username}>"
24
+
25
+
26
+ class PredictionHistory(Base):
27
+ """Prediction history model"""
28
+ __tablename__ = "prediction_history"
29
+
30
+ id = Column(Integer, primary_key=True, index=True)
31
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
32
+ product_name = Column(String(200), nullable=False)
33
+ comment = Column(Text, nullable=False)
34
+ predicted_rating = Column(Integer, nullable=False)
35
+ confidence_score = Column(Float, nullable=True)
36
+ prediction_type = Column(String(20), default="single") # 'single' or 'batch'
37
+ created_at = Column(DateTime, default=datetime.utcnow)
38
+
39
+ # Relationship
40
+ user = relationship("User", back_populates="predictions")
41
+
42
+ def __repr__(self):
43
+ return f"<PredictionHistory {self.id}: {self.predicted_rating}⭐>"
app/routers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Routers package
app/routers/auth.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Authentication Router
3
+ Handles user registration and login
4
+ """
5
+ from datetime import timedelta
6
+ from fastapi import APIRouter, Depends, HTTPException, status
7
+ from fastapi.security import OAuth2PasswordRequestForm
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.database import get_db
11
+ from app.models import User
12
+ from app.schemas import UserCreate, UserResponse, Token
13
+ from app.services.auth_service import (
14
+ get_password_hash,
15
+ authenticate_user,
16
+ create_access_token,
17
+ get_current_user
18
+ )
19
+ from app.config import ACCESS_TOKEN_EXPIRE_MINUTES
20
+
21
+ router = APIRouter()
22
+
23
+
24
+ @router.post("/register", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
25
+ async def register(user_data: UserCreate, db: Session = Depends(get_db)):
26
+ """
27
+ Register a new user
28
+
29
+ - **username**: Unique username (3-50 characters)
30
+ - **email**: Valid email address
31
+ - **password**: Password (minimum 6 characters)
32
+ """
33
+ # Check if username exists
34
+ db_user = db.query(User).filter(User.username == user_data.username).first()
35
+ if db_user:
36
+ raise HTTPException(
37
+ status_code=status.HTTP_400_BAD_REQUEST,
38
+ detail="Username already registered"
39
+ )
40
+
41
+ # Check if email exists
42
+ db_user = db.query(User).filter(User.email == user_data.email).first()
43
+ if db_user:
44
+ raise HTTPException(
45
+ status_code=status.HTTP_400_BAD_REQUEST,
46
+ detail="Email already registered"
47
+ )
48
+
49
+ # Create new user
50
+ new_user = User(
51
+ username=user_data.username,
52
+ email=user_data.email,
53
+ hashed_password=get_password_hash(user_data.password)
54
+ )
55
+
56
+ db.add(new_user)
57
+ db.commit()
58
+ db.refresh(new_user)
59
+
60
+ return new_user
61
+
62
+
63
+ @router.post("/login", response_model=Token)
64
+ async def login(
65
+ form_data: OAuth2PasswordRequestForm = Depends(),
66
+ db: Session = Depends(get_db)
67
+ ):
68
+ """
69
+ Login to get access token
70
+
71
+ - **username**: Your username
72
+ - **password**: Your password
73
+
74
+ Returns JWT access token for authentication
75
+ """
76
+ user = authenticate_user(db, form_data.username, form_data.password)
77
+ if not user:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_401_UNAUTHORIZED,
80
+ detail="Incorrect username or password",
81
+ headers={"WWW-Authenticate": "Bearer"},
82
+ )
83
+
84
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
85
+ access_token = create_access_token(
86
+ data={"sub": user.username}, expires_delta=access_token_expires
87
+ )
88
+
89
+ return {"access_token": access_token, "token_type": "bearer"}
90
+
91
+
92
+ @router.get("/me", response_model=UserResponse)
93
+ async def get_current_user_info(current_user: User = Depends(get_current_user)):
94
+ """
95
+ Get current authenticated user information
96
+ """
97
+ return current_user
app/routers/dashboard.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dashboard Router
3
+ Serves frontend Jinja2 templates
4
+ """
5
+ from fastapi import APIRouter, Request, Depends
6
+ from fastapi.templating import Jinja2Templates
7
+ from fastapi.responses import HTMLResponse, RedirectResponse
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.database import get_db
11
+ from app.models import User
12
+ from app.services.auth_service import get_current_user
13
+
14
+ router = APIRouter()
15
+ templates = Jinja2Templates(directory="app/templates")
16
+
17
+
18
+ @router.get("/", response_class=HTMLResponse)
19
+ async def home(request: Request):
20
+ """Home page - redirect to login"""
21
+ return RedirectResponse(url="/login")
22
+
23
+
24
+ @router.get("/login", response_class=HTMLResponse)
25
+ async def login_page(request: Request):
26
+ """Login page"""
27
+ return templates.TemplateResponse("login.html", {"request": request})
28
+
29
+
30
+ @router.get("/register", response_class=HTMLResponse)
31
+ async def register_page(request: Request):
32
+ """Registration page"""
33
+ return templates.TemplateResponse("register.html", {"request": request})
34
+
35
+
36
+ @router.get("/dashboard", response_class=HTMLResponse)
37
+ async def dashboard_page(request: Request):
38
+ """
39
+ Main dashboard page
40
+ Requires authentication (handle in frontend with token)
41
+ """
42
+ return templates.TemplateResponse("dashboard.html", {
43
+ "request": request
44
+ })
app/routers/prediction.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prediction Router
3
+ Handles single and batch predictions
4
+ """
5
+ import io
6
+ import csv
7
+ from typing import List, Dict
8
+ from datetime import datetime
9
+ from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
10
+ from fastapi.responses import StreamingResponse
11
+ from sqlalchemy.orm import Session
12
+
13
+ from app.database import get_db
14
+ from app.models import User, PredictionHistory
15
+ from app.schemas import (
16
+ SinglePredictionRequest,
17
+ SinglePredictionResponse,
18
+ BatchPredictionResponse,
19
+ PredictionHistoryResponse,
20
+ PDFReportRequest
21
+ )
22
+ from app.services.auth_service import get_current_user
23
+ from app.services.ml_service import get_ml_service, MLPredictionService
24
+ from app.services.visualization_service import get_viz_service, VisualizationService
25
+ from app.services.report_service import get_report_service, ReportService
26
+
27
+ router = APIRouter()
28
+
29
+
30
+ @router.post("/single", response_model=SinglePredictionResponse)
31
+ async def predict_single(
32
+ request: SinglePredictionRequest,
33
+ current_user: User = Depends(get_current_user),
34
+ db: Session = Depends(get_db),
35
+ ml_service: MLPredictionService = Depends(get_ml_service)
36
+ ):
37
+ """
38
+ Predict rating for a single comment
39
+
40
+ - **product_name**: Name of the product
41
+ - **comment**: Vietnamese product review text
42
+
43
+ Returns predicted rating (1-5 stars) with confidence score
44
+ """
45
+ # Make prediction
46
+ prediction = ml_service.predict_single(request.comment)
47
+
48
+ # Save to history
49
+ history = PredictionHistory(
50
+ user_id=current_user.id,
51
+ product_name=request.product_name,
52
+ comment=request.comment,
53
+ predicted_rating=prediction['rating'],
54
+ confidence_score=prediction['confidence'],
55
+ prediction_type='single'
56
+ )
57
+ db.add(history)
58
+ db.commit()
59
+
60
+ return {
61
+ "predicted_rating": prediction['rating'],
62
+ "confidence_score": prediction['confidence'],
63
+ "comment": request.comment
64
+ }
65
+
66
+
67
+ @router.post("/batch", response_model=BatchPredictionResponse)
68
+ async def predict_batch(
69
+ product_name: str = Form(...),
70
+ file: UploadFile = File(...),
71
+ current_user: User = Depends(get_current_user),
72
+ db: Session = Depends(get_db),
73
+ ml_service: MLPredictionService = Depends(get_ml_service),
74
+ viz_service: VisualizationService = Depends(get_viz_service),
75
+ report_service: ReportService = Depends(get_report_service)
76
+ ):
77
+ """
78
+ Predict ratings for batch of comments from CSV file
79
+
80
+ - **product_name**: Name of the product
81
+ - **file**: CSV file with 'Comment' column
82
+
83
+ Returns predictions with visualization data (wordcloud, distribution chart)
84
+ """
85
+ # Validate file type
86
+ if not file.filename.endswith('.csv'):
87
+ raise HTTPException(
88
+ status_code=status.HTTP_400_BAD_REQUEST,
89
+ detail="File must be a CSV"
90
+ )
91
+
92
+ try:
93
+ # Read CSV file
94
+ contents = await file.read()
95
+ csv_file = io.StringIO(contents.decode('utf-8'))
96
+ reader = csv.DictReader(csv_file)
97
+
98
+ # Check for Comment column
99
+ if 'Comment' not in reader.fieldnames:
100
+ raise HTTPException(
101
+ status_code=status.HTTP_400_BAD_REQUEST,
102
+ detail="CSV must contain 'Comment' column"
103
+ )
104
+
105
+ # Extract comments
106
+ comments = []
107
+ for row in reader:
108
+ if row.get('Comment', '').strip():
109
+ comments.append(row['Comment'].strip())
110
+
111
+ if not comments:
112
+ raise HTTPException(
113
+ status_code=status.HTTP_400_BAD_REQUEST,
114
+ detail="No valid comments found in CSV"
115
+ )
116
+
117
+ # Make batch predictions
118
+ predictions = ml_service.predict_batch(comments)
119
+
120
+ # Save to history
121
+ for pred in predictions:
122
+ history = PredictionHistory(
123
+ user_id=current_user.id,
124
+ product_name=product_name,
125
+ comment=pred['text'],
126
+ predicted_rating=pred['rating'],
127
+ confidence_score=pred['confidence'],
128
+ prediction_type='batch'
129
+ )
130
+ db.add(history)
131
+ db.commit()
132
+
133
+ # Calculate rating distribution
134
+ ratings = [p['rating'] for p in predictions]
135
+ distribution = viz_service.calculate_rating_distribution(ratings)
136
+
137
+ # Generate word cloud
138
+ wordcloud_filename = f"wordcloud_{current_user.username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
139
+ wordcloud_url = viz_service.generate_wordcloud(comments, wordcloud_filename)
140
+
141
+ # Prepare results for CSV download
142
+ results = []
143
+ for pred in predictions:
144
+ results.append({
145
+ 'Comment': pred['text'],
146
+ 'Predicted_Rating': pred['rating'],
147
+ 'Confidence': pred['confidence']
148
+ })
149
+
150
+ # Generate PDF report
151
+ pdf_filename = f"report_{current_user.username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
152
+ pdf_content = report_service.generate_pdf_report(
153
+ predictions=predictions,
154
+ distribution=distribution,
155
+ wordcloud_path=wordcloud_url,
156
+ username=current_user.username,
157
+ filename=pdf_filename
158
+ )
159
+
160
+ return {
161
+ "total_predictions": len(predictions),
162
+ "rating_distribution": distribution,
163
+ "wordcloud_url": wordcloud_url,
164
+ "results": results,
165
+ "csv_download_url": f"/api/predict/download/{current_user.id}/{datetime.now().timestamp()}",
166
+ "pdf_download_url": f"/api/predict/download-pdf/{current_user.id}/{datetime.now().timestamp()}"
167
+ }
168
+
169
+ except Exception as e:
170
+ raise HTTPException(
171
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
172
+ detail=f"Error processing file: {str(e)}"
173
+ )
174
+
175
+
176
+ @router.get("/history", response_model=List[PredictionHistoryResponse])
177
+ async def get_prediction_history(
178
+ limit: int = 50,
179
+ current_user: User = Depends(get_current_user),
180
+ db: Session = Depends(get_db)
181
+ ):
182
+ """
183
+ Get prediction history for current user
184
+
185
+ - **limit**: Maximum number of records to return (default: 50)
186
+ """
187
+ history = db.query(PredictionHistory).filter(
188
+ PredictionHistory.user_id == current_user.id
189
+ ).order_by(PredictionHistory.created_at.desc()).limit(limit).all()
190
+
191
+ return history
192
+
193
+
194
+ @router.post("/download-csv")
195
+ async def download_predictions_csv(
196
+ results: List[dict],
197
+ current_user: User = Depends(get_current_user)
198
+ ):
199
+ """
200
+ Download prediction results as CSV
201
+ """
202
+ # Create CSV in memory
203
+ output = io.StringIO()
204
+
205
+ if results:
206
+ fieldnames = results[0].keys()
207
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
208
+ writer.writeheader()
209
+ writer.writerows(results)
210
+
211
+ # Reset position
212
+ output.seek(0)
213
+
214
+ # Return as streaming response
215
+ return StreamingResponse(
216
+ iter([output.getvalue()]),
217
+ media_type="text/csv",
218
+ headers={
219
+ "Content-Disposition": f"attachment; filename=predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
220
+ }
221
+ )
222
+
223
+
224
+ @router.post("/download-pdf")
225
+ async def download_predictions_pdf(
226
+ request: PDFReportRequest,
227
+ current_user: User = Depends(get_current_user),
228
+ report_service: ReportService = Depends(get_report_service)
229
+ ):
230
+ """
231
+ Download prediction results as PDF report
232
+ """
233
+ try:
234
+ pdf_content = report_service.generate_pdf_report(
235
+ predictions=request.predictions,
236
+ distribution=request.distribution,
237
+ wordcloud_path=request.wordcloud_path,
238
+ username=current_user.username
239
+ )
240
+
241
+ return StreamingResponse(
242
+ io.BytesIO(pdf_content),
243
+ media_type="application/pdf",
244
+ headers={
245
+ "Content-Disposition": f"attachment; filename=predictions_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
246
+ }
247
+ )
248
+ except Exception as e:
249
+ raise HTTPException(
250
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
251
+ detail=f"Error generating PDF: {str(e)}"
252
+ )
app/schemas.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic Schemas for Request/Response Validation
3
+ """
4
+ from pydantic import BaseModel, EmailStr, Field
5
+ from typing import Optional, List
6
+ from datetime import datetime
7
+
8
+ # ===== Auth Schemas =====
9
+ class UserCreate(BaseModel):
10
+ username: str = Field(..., min_length=3, max_length=50)
11
+ email: EmailStr
12
+ password: str = Field(..., min_length=6)
13
+
14
+ class UserLogin(BaseModel):
15
+ username: str
16
+ password: str
17
+
18
+ class UserResponse(BaseModel):
19
+ id: int
20
+ username: str
21
+ email: str
22
+ created_at: datetime
23
+
24
+ class Config:
25
+ from_attributes = True
26
+
27
+ class Token(BaseModel):
28
+ access_token: str
29
+ token_type: str
30
+
31
+ class TokenData(BaseModel):
32
+ username: Optional[str] = None
33
+
34
+
35
+ # ===== Prediction Schemas =====
36
+ class SinglePredictionRequest(BaseModel):
37
+ product_name: Optional[str] = ""
38
+ comment: str
39
+
40
+ class SinglePredictionResponse(BaseModel):
41
+ predicted_rating: int
42
+ confidence_score: float
43
+ comment: str
44
+
45
+ class BatchPredictionResponse(BaseModel):
46
+ total_predictions: int
47
+ rating_distribution: dict
48
+ wordcloud_url: str
49
+ results: List[dict]
50
+ csv_download_url: str
51
+ pdf_download_url: str
52
+
53
+ class PDFReportRequest(BaseModel):
54
+ predictions: List[dict]
55
+ distribution: dict
56
+ wordcloud_path: str
57
+
58
+
59
+ # ===== History Schemas =====
60
+ class PredictionHistoryResponse(BaseModel):
61
+ id: int
62
+ product_name: str
63
+ comment: str
64
+ predicted_rating: int
65
+ confidence_score: Optional[float]
66
+ prediction_type: str
67
+ created_at: datetime
68
+
69
+ class Config:
70
+ from_attributes = True
app/services/Model/phoBERT_multi_class_tokenizer/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
app/services/Model/phoBERT_multi_class_tokenizer/bpe.codes ADDED
The diff for this file is too large to render. See raw diff
 
app/services/Model/phoBERT_multi_class_tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
app/services/Model/phoBERT_multi_class_tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "64000": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "PhobertTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
app/services/Model/phoBERT_multi_class_tokenizer/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Services package
app/services/auth_service.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Authentication Service
3
+ Password hashing and JWT token generation
4
+ """
5
+ from datetime import datetime, timedelta
6
+ from typing import Optional
7
+ from passlib.context import CryptContext
8
+ from jose import JWTError, jwt
9
+ from fastapi import Depends, HTTPException, status
10
+ from fastapi.security import OAuth2PasswordBearer
11
+ from sqlalchemy.orm import Session
12
+
13
+ from app.config import SECRET_KEY, ALGORITHM, ACCESS_TOKEN_EXPIRE_MINUTES
14
+ from app.database import get_db
15
+ from app.models import User
16
+ from app.schemas import TokenData
17
+
18
+ # Password hashing (using argon2 instead of bcrypt for Python 3.13 compatibility)
19
+ pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
20
+
21
+ # OAuth2 scheme
22
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
23
+
24
+
25
+ def verify_password(plain_password: str, hashed_password: str) -> bool:
26
+ """Verify a password against a hash"""
27
+ return pwd_context.verify(plain_password, hashed_password)
28
+
29
+
30
+ def get_password_hash(password: str) -> str:
31
+ """Hash a password"""
32
+ # Bcrypt has a 72 byte limit, truncate if necessary
33
+ if len(password.encode('utf-8')) > 72:
34
+ password = password[:72]
35
+ return pwd_context.hash(password)
36
+
37
+
38
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
39
+ """Create JWT access token"""
40
+ to_encode = data.copy()
41
+ if expires_delta:
42
+ expire = datetime.utcnow() + expires_delta
43
+ else:
44
+ expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
45
+
46
+ to_encode.update({"exp": expire})
47
+ encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
48
+ return encoded_jwt
49
+
50
+
51
+ def authenticate_user(db: Session, username: str, password: str):
52
+ """Authenticate user with username and password"""
53
+ user = db.query(User).filter(User.username == username).first()
54
+ if not user:
55
+ return False
56
+ if not verify_password(password, user.hashed_password):
57
+ return False
58
+ return user
59
+
60
+
61
+ async def get_current_user(
62
+ token: str = Depends(oauth2_scheme),
63
+ db: Session = Depends(get_db)
64
+ ) -> User:
65
+ """Get current authenticated user from JWT token"""
66
+ credentials_exception = HTTPException(
67
+ status_code=status.HTTP_401_UNAUTHORIZED,
68
+ detail="Could not validate credentials",
69
+ headers={"WWW-Authenticate": "Bearer"},
70
+ )
71
+
72
+ try:
73
+ payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
74
+ username: str = payload.get("sub")
75
+ if username is None:
76
+ raise credentials_exception
77
+ token_data = TokenData(username=username)
78
+ except JWTError:
79
+ raise credentials_exception
80
+
81
+ user = db.query(User).filter(User.username == token_data.username).first()
82
+ if user is None:
83
+ raise credentials_exception
84
+
85
+ return user
app/services/ml_service.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ML Prediction Service with LAZY LOADING & REMOTE MODEL FETCHING
3
+ """
4
+ import os
5
+ from typing import List, Dict, Any, Optional
6
+ # [QUAN TRỌNG] Import thư viện để tải model từ kho riêng
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ # Only set HF cache for local development
10
+ if not os.getenv("RENDER") and not os.getenv("SPACE_ID"):
11
+ os.environ['HF_HOME'] = 'G:/huggingface_cache'
12
+
13
+ class MLPredictionService:
14
+ """
15
+ ML Service with lazy loading.
16
+ Fetches heavy model weights from external Hugging Face Model Repo
17
+ to bypass the 1GB limit of Space Git Repo.
18
+ """
19
+
20
+ def __init__(self):
21
+ """Initialize service without loading model (lazy loading)"""
22
+ # Model components
23
+ self.model: Optional[Any] = None
24
+ self.tokenizer: Optional[Any] = None
25
+ self.device: Optional[str] = None
26
+ self.model_loaded = False
27
+
28
+ # [SỬA ĐỔI] Không set đường dẫn cứng ở đây nữa vì file không còn ở máy
29
+ # Chúng ta sẽ định nghĩa Repo ID chứa model ở đây
30
+ self.MODEL_REPO_ID = "vtdung23/my-phobert-models"
31
+ self.MODEL_FILENAME = "best_phoBER.pth"
32
+
33
+ print("✅ ML Service initialized (Model will download & load on first request)")
34
+
35
+ def _load_model(self):
36
+ """Load model and tokenizer (called on first request)"""
37
+ if self.model_loaded:
38
+ return
39
+
40
+ print("🔄 Loading ML model (first request)...")
41
+
42
+ # Import heavy dependencies only when needed
43
+ import torch
44
+ from transformers import AutoTokenizer, RobertaForSequenceClassification
45
+
46
+ # Determine device
47
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
48
+ print(f"📍 Using device: {self.device}")
49
+
50
+ # [SỬA ĐỔI 1] Load Tokenizer từ gốc vinai/phobert-base
51
+ # Vì folder tokenizer local đã bị xóa, ta load thẳng từ thư viện gốc cho an toàn
52
+ print("📦 Loading tokenizer from vinai/phobert-base...")
53
+ self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
54
+
55
+ # [SỬA ĐỔI 2] Tải file weights từ Kho Model riêng về
56
+ print(f"⬇️ Downloading weights from repo: {self.MODEL_REPO_ID}...")
57
+ try:
58
+ model_path = hf_hub_download(
59
+ repo_id=self.MODEL_REPO_ID,
60
+ filename=self.MODEL_FILENAME,
61
+ repo_type="model" # Quan trọng: báo đây là kho Model
62
+ )
63
+ print(f"✅ Downloaded weights to: {model_path}")
64
+ except Exception as e:
65
+ print(f"❌ Error downloading model: {e}")
66
+ raise e
67
+
68
+ # Load model architecture
69
+ print("🧠 Loading PhoBERT architecture...")
70
+ self.model = RobertaForSequenceClassification.from_pretrained(
71
+ "vinai/phobert-base",
72
+ num_labels=5, # Đảm bảo số này khớp với lúc bạn train (0,1,2,3,4 hay 1-5?)
73
+ problem_type="single_label_classification"
74
+ )
75
+
76
+ # Load fine-tuned weights
77
+ print("⚙️ Loading trained weights into architecture...")
78
+ state_dict = torch.load(model_path, map_location=self.device, weights_only=False)
79
+ self.model.load_state_dict(state_dict)
80
+
81
+ # Set to evaluation mode and move to device
82
+ self.model.eval()
83
+ self.model.to(self.device)
84
+
85
+ self.model_loaded = True
86
+ print("✅ Model loaded successfully and ready to serve!")
87
+
88
+ def predict_single(self, text: str) -> Dict[str, Any]:
89
+ """Predict rating for a single comment"""
90
+ # Lazy load model on first request
91
+ self._load_model()
92
+
93
+ import torch
94
+ import torch.nn.functional as F
95
+
96
+ # 1. Vietnamese preprocessing
97
+ processed_text = self.preprocess(text)
98
+
99
+ # 2. Tokenize
100
+ encoded = self.tokenizer(
101
+ processed_text,
102
+ padding=True,
103
+ truncation=True,
104
+ max_length=256,
105
+ return_tensors="pt"
106
+ )
107
+
108
+ # Move tensors to device
109
+ encoded = {k: v.to(self.device) for k, v in encoded.items()}
110
+
111
+ # 3. Inference
112
+ with torch.no_grad():
113
+ outputs = self.model(**encoded)
114
+ logits = outputs.logits
115
+ probs = F.softmax(logits, dim=1)
116
+
117
+ # 4. Get prediction + confidence
118
+ predicted_class = torch.argmax(probs, dim=1).item()
119
+ confidence = probs[0][predicted_class].item()
120
+
121
+ # 5. Convert 0-based label -> rating 1-5
122
+ # (Giả sử model train label 0 tương ứng 1 sao)
123
+ rating = predicted_class + 1
124
+
125
+ return {
126
+ 'rating': rating,
127
+ 'confidence': confidence
128
+ }
129
+
130
+ def predict_batch(self, texts: List[str]) -> List[Dict[str, any]]:
131
+ """Predict ratings for multiple comments"""
132
+ results = []
133
+ for text in texts:
134
+ # Có thể tối ưu bằng cách batch tokenize, nhưng loop đơn giản cho an toàn
135
+ prediction = self.predict_single(text)
136
+ results.append({
137
+ 'text': text,
138
+ 'rating': prediction['rating'],
139
+ 'confidence': prediction['confidence']
140
+ })
141
+ return results
142
+
143
+ def preprocess(self, text: str) -> str:
144
+ """Preprocess Vietnamese text"""
145
+ from underthesea import word_tokenize
146
+ text = word_tokenize(text, format="text")
147
+ return text
148
+
149
+ # Singleton instance
150
+ ml_service = MLPredictionService()
151
+
152
+ def get_ml_service() -> MLPredictionService:
153
+ return ml_service
app/services/report_service.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Report Service
3
+ Generate PDF reports for batch predictions
4
+ """
5
+ import io
6
+ from typing import List, Dict
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from reportlab.lib.pagesizes import letter, A4
10
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
11
+ from reportlab.lib.units import inch
12
+ from reportlab.platypus import (
13
+ SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
14
+ PageBreak, Image, Preformatted
15
+ )
16
+ from reportlab.lib import colors
17
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
18
+ from reportlab.pdfbase import pdfmetrics
19
+ from reportlab.pdfbase.ttfonts import TTFont
20
+ import matplotlib
21
+ matplotlib.use('Agg')
22
+ import matplotlib.pyplot as plt
23
+ from io import BytesIO
24
+ from PIL import Image as PILImage
25
+
26
+ from app.config import WORDCLOUD_DIR
27
+
28
+
29
+ class ReportService:
30
+ """Service for generating PDF reports"""
31
+
32
+ def __init__(self):
33
+ self.styles = getSampleStyleSheet()
34
+ self._setup_custom_styles()
35
+ self._setup_fonts()
36
+
37
+ def _setup_fonts(self):
38
+ """Setup fonts for Vietnamese character support"""
39
+ try:
40
+ # Try to use DejaVu font which supports Vietnamese characters
41
+ pdfmetrics.registerFont(TTFont('DejaVu', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf'))
42
+ # Register bold variant
43
+ pdfmetrics.registerFont(TTFont('DejaVuBold', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf'))
44
+ except Exception as e:
45
+ # If fonts not found, continue with default fonts
46
+ print(f"Warning: Could not load Vietnamese fonts: {e}")
47
+
48
+ def _setup_custom_styles(self):
49
+ """Setup custom paragraph styles"""
50
+ # Use DejaVu font for Vietnamese support, fallback to Helvetica
51
+ font_name = 'DejaVu'
52
+ font_name_bold = 'DejaVuBold'
53
+
54
+ self.styles.add(ParagraphStyle(
55
+ name='CustomTitle',
56
+ parent=self.styles['Heading1'],
57
+ fontSize=24,
58
+ textColor=colors.HexColor('#4F46E5'),
59
+ spaceAfter=30,
60
+ alignment=TA_CENTER,
61
+ fontName=font_name_bold
62
+ ))
63
+
64
+ self.styles.add(ParagraphStyle(
65
+ name='CustomHeading',
66
+ parent=self.styles['Heading2'],
67
+ fontSize=14,
68
+ textColor=colors.HexColor('#4F46E5'),
69
+ spaceAfter=12,
70
+ fontName=font_name_bold
71
+ ))
72
+
73
+ self.styles.add(ParagraphStyle(
74
+ name='CustomNormal',
75
+ parent=self.styles['Normal'],
76
+ fontSize=10,
77
+ spaceAfter=6,
78
+ fontName=font_name
79
+ ))
80
+
81
+ def generate_rating_distribution_chart(self) -> tuple:
82
+ """
83
+ Generate a matplotlib chart for rating distribution
84
+
85
+ Returns:
86
+ tuple: (buffer, filename)
87
+ """
88
+ pass
89
+
90
+ def generate_pdf_report(
91
+ self,
92
+ predictions: List[Dict],
93
+ distribution: Dict[int, int],
94
+ wordcloud_path: str,
95
+ username: str,
96
+ filename: str = None
97
+ ) -> bytes:
98
+ """
99
+ Generate comprehensive PDF report for batch predictions
100
+
101
+ Args:
102
+ predictions: List of prediction results with 'text', 'rating', 'confidence'
103
+ distribution: Rating distribution dict {rating: count}
104
+ wordcloud_path: Path to generated wordcloud image (URL or file path)
105
+ username: Username for the report
106
+ filename: Optional custom filename
107
+
108
+ Returns:
109
+ bytes: PDF file content
110
+ """
111
+ # Create PDF in memory
112
+ pdf_buffer = io.BytesIO()
113
+
114
+ # Create document
115
+ doc = SimpleDocTemplate(
116
+ pdf_buffer,
117
+ pagesize=A4,
118
+ rightMargin=0.75*inch,
119
+ leftMargin=0.75*inch,
120
+ topMargin=0.75*inch,
121
+ bottomMargin=0.75*inch
122
+ )
123
+
124
+ # Build document content
125
+ story = []
126
+
127
+ # Title
128
+ title = Paragraph(
129
+ "Prediction Report",
130
+ self.styles['CustomTitle']
131
+ )
132
+ story.append(title)
133
+ story.append(Spacer(1, 0.3*inch))
134
+
135
+ # Report info
136
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
137
+ info_text = f"<b>Generated by:</b> {username}<br/><b>Date:</b> {timestamp}"
138
+ story.append(Paragraph(info_text, self.styles['CustomNormal']))
139
+ story.append(Spacer(1, 0.3*inch))
140
+
141
+ # Summary section
142
+ total_predictions = len(predictions)
143
+ avg_confidence = sum(p.get('confidence', 0) for p in predictions) / total_predictions if predictions else 0
144
+
145
+ summary_heading = Paragraph("Summary", self.styles['CustomHeading'])
146
+ story.append(summary_heading)
147
+
148
+ summary_data = [
149
+ ['Metric', 'Value'],
150
+ ['Total Predictions', str(total_predictions)],
151
+ ['Average Confidence', f'{avg_confidence:.2%}'],
152
+ ]
153
+
154
+ summary_table = Table(summary_data, colWidths=[3*inch, 2*inch])
155
+ summary_table.setStyle(TableStyle([
156
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
157
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
158
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
159
+ ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
160
+ ('FONTSIZE', (0, 0), (-1, 0), 12),
161
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
162
+ ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
163
+ ('GRID', (0, 0), (-1, -1), 1, colors.black),
164
+ ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
165
+ ('FONTSIZE', (0, 1), (-1, -1), 10)
166
+ ]))
167
+ story.append(summary_table)
168
+ story.append(Spacer(1, 0.3*inch))
169
+
170
+ # Rating Distribution section
171
+ dist_heading = Paragraph("Rating Distribution", self.styles['CustomHeading'])
172
+ story.append(dist_heading)
173
+
174
+ dist_data = [['Rating', 'Count', 'Percentage']]
175
+
176
+ # Normalize distribution keys to integers (they might come as strings from JSON)
177
+ normalized_dist = {}
178
+ for key, value in distribution.items():
179
+ try:
180
+ int_key = int(key)
181
+ normalized_dist[int_key] = int(value)
182
+ except (ValueError, TypeError):
183
+ pass
184
+
185
+ total = sum(normalized_dist.values())
186
+
187
+ for rating in range(1, 6):
188
+ count = normalized_dist.get(rating, 0)
189
+ percentage = (count / total * 100) if total > 0 else 0
190
+ # Use star character ★ instead of emoji
191
+ stars = "★" * rating
192
+ dist_data.append([
193
+ f"{stars}",
194
+ str(count),
195
+ f"{percentage:.1f}%"
196
+ ])
197
+
198
+ dist_table = Table(dist_data, colWidths=[1.5*inch, 1.5*inch, 1.5*inch])
199
+ dist_table.setStyle(TableStyle([
200
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
201
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
202
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
203
+ ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
204
+ ('FONTSIZE', (0, 0), (-1, 0), 11),
205
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
206
+ ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
207
+ ('GRID', (0, 0), (-1, -1), 1, colors.black),
208
+ ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
209
+ ('FONTSIZE', (0, 1), (-1, -1), 10)
210
+ ]))
211
+ story.append(dist_table)
212
+ story.append(Spacer(1, 0.3*inch))
213
+
214
+ # Word Cloud section
215
+ if wordcloud_path:
216
+ wc_heading = Paragraph("Word Cloud Analysis", self.styles['CustomHeading'])
217
+ story.append(wc_heading)
218
+
219
+ try:
220
+ # Convert URL to file path if needed
221
+ file_path = wordcloud_path
222
+ if wordcloud_path.startswith('/'):
223
+ # It's a URL path, convert to file path
224
+ file_path = str(WORDCLOUD_DIR / wordcloud_path.split('/')[-1])
225
+
226
+ if Path(file_path).exists():
227
+ img = Image(file_path, width=5*inch, height=2.5*inch)
228
+ story.append(img)
229
+ story.append(Spacer(1, 0.2*inch))
230
+ wc_text = Paragraph(
231
+ "<i>Larger words indicate higher frequency in the comments</i>",
232
+ self.styles['CustomNormal']
233
+ )
234
+ story.append(wc_text)
235
+ except Exception as e:
236
+ print(f"Warning: Could not include word cloud: {e}")
237
+
238
+ story.append(Spacer(1, 0.3*inch))
239
+
240
+ # Page break before detailed results
241
+ story.append(PageBreak())
242
+
243
+ # Detailed Results section
244
+ results_heading = Paragraph("Detailed Results", self.styles['CustomHeading'])
245
+ story.append(results_heading)
246
+ story.append(Spacer(1, 0.2*inch))
247
+
248
+ # Results table
249
+ results_data = [['Comment', 'Rating', 'Confidence']]
250
+
251
+ for pred in predictions:
252
+ comment = pred.get('text', '')
253
+ rating = pred.get('rating', 0)
254
+ confidence = pred.get('confidence', 0)
255
+
256
+ # Create wrapped comment - let ReportLab handle wrapping
257
+ comment_paragraph = Paragraph(comment, self.styles['CustomNormal'])
258
+
259
+ # Use star character ★ instead of emoji
260
+ stars = "★" * rating
261
+
262
+ results_data.append([
263
+ comment_paragraph,
264
+ f"{stars}",
265
+ f"{confidence:.2%}"
266
+ ])
267
+
268
+ # Create table with adjusted column widths - wider comment column for wrapping
269
+ results_table = Table(results_data, colWidths=[3.5*inch, 0.8*inch, 1.2*inch])
270
+ results_table.setStyle(TableStyle([
271
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
272
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
273
+ ('ALIGN', (0, 0), (0, -1), 'LEFT'),
274
+ ('ALIGN', (1, 0), (-1, -1), 'CENTER'),
275
+ ('FONTNAME', (0, 0), (-1, 0), 'DejaVuBold'),
276
+ ('FONTSIZE', (0, 0), (-1, 0), 10),
277
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
278
+ ('BACKGROUND', (0, 1), (-1, -1), colors.lightgrey),
279
+ ('GRID', (0, 0), (-1, -1), 1, colors.grey),
280
+ ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.lightgrey]),
281
+ ('FONTNAME', (0, 1), (-1, -1), 'DejaVu'),
282
+ ('FONTSIZE', (0, 1), (-1, -1), 9),
283
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'), # Top alignment for wrapped text
284
+ ('LEFTPADDING', (0, 0), (-1, -1), 8),
285
+ ('RIGHTPADDING', (0, 0), (-1, -1), 8),
286
+ ('TOPPADDING', (0, 0), (-1, -1), 6),
287
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
288
+ ]))
289
+ story.append(results_table)
290
+
291
+ # Build PDF
292
+ doc.build(story)
293
+
294
+ # Get PDF bytes
295
+ pdf_buffer.seek(0)
296
+ return pdf_buffer.getvalue()
297
+
298
+
299
+ def get_report_service() -> ReportService:
300
+ """Dependency injection for report service"""
301
+ return ReportService()
app/services/visualization_service.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Visualization Service
3
+ WordCloud generation and data visualization utilities
4
+ """
5
+ import os
6
+ from typing import List, Dict
7
+ from collections import Counter
8
+ from wordcloud import WordCloud
9
+ import matplotlib
10
+ matplotlib.use('Agg') # Use non-GUI backend
11
+ import matplotlib.pyplot as plt
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+
15
+ from app.config import WORDCLOUD_DIR
16
+
17
+
18
+ class VisualizationService:
19
+ """Service for generating visualizations"""
20
+
21
+ def __init__(self):
22
+ # Vietnamese stopwords (common words to exclude)
23
+ self.stopwords = set([
24
+ 'và', 'của', 'có', 'cho', 'với', 'từ', 'này', 'được',
25
+ 'là', 'để', 'một', 'các', 'trong', 'không', 'đã', 'rất',
26
+ 'cũng', 'nhưng', 'thì', 'bị', 'khi', 'nếu', 'như', 'về',
27
+ 'tôi', 'bạn', 'mình', 'nó', 'họ', 'em', 'anh', 'chị',
28
+ 'vì', 'nên', 'đến', 'lại', 'ra', 'đang', 'sẽ', 'đều',
29
+ 'hay', 'thế', 'làm', 'được', 'rồi', 'đó', 'này', 'ở'
30
+ ])
31
+
32
+ def generate_wordcloud(self, texts: List[str], filename: str = None) -> str:
33
+ """
34
+ Generate word cloud from list of texts
35
+
36
+ Args:
37
+ texts: List of Vietnamese comments
38
+ filename: Optional custom filename
39
+
40
+ Returns:
41
+ str: Path to generated word cloud image
42
+ """
43
+ # Combine all texts
44
+ combined_text = ' '.join(texts)
45
+
46
+ # Generate filename if not provided
47
+ if filename is None:
48
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
49
+ filename = f"wordcloud_{timestamp}.png"
50
+
51
+ filepath = WORDCLOUD_DIR / filename
52
+
53
+ # Create word cloud
54
+ wordcloud = WordCloud(
55
+ width=800,
56
+ height=400,
57
+ background_color='white',
58
+ stopwords=self.stopwords,
59
+ colormap='viridis',
60
+ max_words=100,
61
+ relative_scaling=0.5,
62
+ min_font_size=10
63
+ ).generate(combined_text)
64
+
65
+ # Save to file
66
+ plt.figure(figsize=(10, 5))
67
+ plt.imshow(wordcloud, interpolation='bilinear')
68
+ plt.axis('off')
69
+ plt.tight_layout(pad=0)
70
+ plt.savefig(filepath, dpi=150, bbox_inches='tight')
71
+ plt.close()
72
+
73
+ # Return relative URL path
74
+ return f"/static/uploads/wordclouds/{filename}"
75
+
76
+ def calculate_rating_distribution(self, ratings: List[int]) -> Dict[int, int]:
77
+ """
78
+ Calculate distribution of ratings
79
+
80
+ Args:
81
+ ratings: List of ratings (1-5)
82
+
83
+ Returns:
84
+ dict: {rating: count}
85
+ """
86
+ distribution = Counter(ratings)
87
+
88
+ # Ensure all ratings 1-5 are present
89
+ for rating in range(1, 6):
90
+ if rating not in distribution:
91
+ distribution[rating] = 0
92
+
93
+ return dict(sorted(distribution.items()))
94
+
95
+ def get_top_words(self, texts: List[str], top_n: int = 20) -> List[tuple]:
96
+ """
97
+ Get most frequent words from texts
98
+
99
+ Args:
100
+ texts: List of comments
101
+ top_n: Number of top words to return
102
+
103
+ Returns:
104
+ list: [(word, count), ...]
105
+ """
106
+ # Combine and split texts
107
+ words = []
108
+ for text in texts:
109
+ words.extend(text.lower().split())
110
+
111
+ # Filter stopwords
112
+ filtered_words = [w for w in words if w not in self.stopwords and len(w) > 2]
113
+
114
+ # Count and return top words
115
+ word_counts = Counter(filtered_words)
116
+ return word_counts.most_common(top_n)
117
+
118
+
119
+ # Singleton instance
120
+ viz_service = VisualizationService()
121
+
122
+
123
+ def get_viz_service() -> VisualizationService:
124
+ """Dependency to get visualization service"""
125
+ return viz_service
app/static/css/style.css ADDED
@@ -0,0 +1 @@
 
 
1
+ /* Placeholder for custom CSS if needed */
app/static/js/main.js ADDED
@@ -0,0 +1 @@
 
 
1
+ // Placeholder for custom JavaScript if needed
app/static/uploads/.gitkeep ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Keep this directory in git but ignore its contents
2
+ *
3
+ !.gitkeep
app/templates/base.html ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="vi">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}Vietnamese Product Rating Prediction{% endblock %}</title>
7
+
8
+ <!-- TailwindCSS CDN -->
9
+ <script src="https://cdn.tailwindcss.com"></script>
10
+
11
+ <!-- Chart.js CDN -->
12
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
13
+
14
+ <!-- Font Awesome for icons -->
15
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
16
+
17
+ <style>
18
+ @keyframes fadeIn {
19
+ from { opacity: 0; transform: translateY(10px); }
20
+ to { opacity: 1; transform: translateY(0); }
21
+ }
22
+ .fade-in {
23
+ animation: fadeIn 0.3s ease-out;
24
+ }
25
+ </style>
26
+
27
+ {% block extra_head %}{% endblock %}
28
+ </head>
29
+ <body class="bg-gradient-to-br from-blue-50 to-indigo-100 min-h-screen">
30
+ <!-- Header -->
31
+ <header class="bg-white shadow-md">
32
+ <nav class="container mx-auto px-6 py-4">
33
+ <div class="flex items-center justify-between">
34
+ <div class="flex items-center space-x-2">
35
+ <i class="fas fa-star text-yellow-500 text-2xl"></i>
36
+ <h1 class="text-2xl font-bold text-gray-800">Rating Predictor</h1>
37
+ </div>
38
+
39
+ {% block nav_items %}{% endblock %}
40
+ </div>
41
+ </nav>
42
+ </header>
43
+
44
+ <!-- Main Content -->
45
+ <main class="container mx-auto px-6 py-8">
46
+ {% block content %}{% endblock %}
47
+ </main>
48
+
49
+ <!-- Footer -->
50
+ <footer class="bg-white mt-12 py-6">
51
+ <div class="container mx-auto px-6 text-center text-gray-600">
52
+ <p>&copy; 2024 Vietnamese Product Rating Prediction System</p>
53
+ <p class="text-sm mt-2">Built with FastAPI + Jinja2 + TailwindCSS</p>
54
+ </div>
55
+ </footer>
56
+
57
+ {% block scripts %}{% endblock %}
58
+ </body>
59
+ </html>
app/templates/dashboard.html ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Dashboard - Rating Predictor{% endblock %}
4
+
5
+ {% block nav_items %}
6
+ <div class="flex items-center space-x-4">
7
+ <span class="text-gray-700" id="username-display">
8
+ <i class="fas fa-user mr-2"></i><span id="current-username"></span>
9
+ </span>
10
+ <button
11
+ onclick="logout()"
12
+ class="bg-red-500 text-white px-4 py-2 rounded-lg hover:bg-red-600 transition"
13
+ >
14
+ <i class="fas fa-sign-out-alt mr-2"></i>Logout
15
+ </button>
16
+ </div>
17
+ {% endblock %}
18
+
19
+ {% block content %}
20
+ <div class="max-w-7xl mx-auto">
21
+ <!-- Welcome Section -->
22
+ <div class="bg-white rounded-2xl shadow-lg p-6 mb-8 fade-in">
23
+ <h2 class="text-3xl font-bold text-gray-800 mb-2">
24
+ <i class="fas fa-chart-line text-indigo-600 mr-3"></i>
25
+ Prediction Dashboard
26
+ </h2>
27
+ <p class="text-gray-600">Dự đoán đánh giá sản phẩm từ bình luận tiếng Việt</p>
28
+ </div>
29
+
30
+
31
+
32
+ <!-- Input Mode Tabs -->
33
+ <div class="bg-white rounded-2xl shadow-lg p-6 mb-8">
34
+ <div class="flex space-x-4 mb-6 border-b">
35
+ <button
36
+ onclick="switchTab('single')"
37
+ id="tab-single"
38
+ class="tab-button px-6 py-3 font-medium border-b-2 border-indigo-600 text-indigo-600"
39
+ >
40
+ <i class="fas fa-comment mr-2"></i>Single Comment
41
+ </button>
42
+ <button
43
+ onclick="switchTab('batch')"
44
+ id="tab-batch"
45
+ class="tab-button px-6 py-3 font-medium text-gray-500 hover:text-gray-700"
46
+ >
47
+ <i class="fas fa-file-csv mr-2"></i>Upload CSV
48
+ </button>
49
+ </div>
50
+
51
+ <!-- Single Prediction Form -->
52
+ <div id="single-form" class="tab-content">
53
+ <form id="singlePredictionForm">
54
+ <label class="block text-sm font-medium text-gray-700 mb-2">
55
+ Enter your comment (Vietnamese):
56
+ </label>
57
+ <textarea
58
+ id="single-comment"
59
+ rows="4"
60
+ class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-transparent transition"
61
+ placeholder="Sản phẩm rất tốt, chất lượng cao..."
62
+ ></textarea>
63
+
64
+ <button
65
+ type="submit"
66
+ class="mt-4 bg-indigo-600 text-white px-6 py-3 rounded-lg hover:bg-indigo-700 transition font-medium shadow-lg"
67
+ >
68
+ <i class="fas fa-magic mr-2"></i>Predict Rating
69
+ </button>
70
+ </form>
71
+
72
+ <!-- Single Result -->
73
+ <div id="single-result" class="hidden mt-6 p-6 bg-gradient-to-r from-green-50 to-blue-50 rounded-xl border-2 border-green-200">
74
+ <h3 class="text-xl font-bold text-gray-800 mb-4">
75
+ <i class="fas fa-star text-yellow-500 mr-2"></i>Prediction Result
76
+ </h3>
77
+ <div class="flex items-center space-x-6">
78
+ <div class="text-center">
79
+ <div class="text-5xl font-bold text-indigo-600" id="predicted-rating"></div>
80
+ <div class="text-sm text-gray-600 mt-2">Rating</div>
81
+ </div>
82
+ <div class="text-center">
83
+ <div class="text-3xl font-bold text-green-600" id="confidence-score"></div>
84
+ <div class="text-sm text-gray-600 mt-2">Confidence</div>
85
+ </div>
86
+ <div class="flex-1">
87
+ <div id="rating-stars" class="text-4xl"></div>
88
+ </div>
89
+ </div>
90
+ </div>
91
+ </div>
92
+
93
+ <!-- Batch Prediction Form -->
94
+ <div id="batch-form" class="tab-content hidden">
95
+ <form id="batchPredictionForm">
96
+ <label class="block text-sm font-medium text-gray-700 mb-2">
97
+ <i class="fas fa-tag mr-2"></i>Product/Item Name (optional):
98
+ </label>
99
+ <input
100
+ type="text"
101
+ id="batch-product-name"
102
+ class="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-transparent transition mb-4"
103
+ placeholder="e.g., iPhone 15, Laptop, Shoes..."
104
+ >
105
+
106
+ <div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center hover:border-indigo-500 transition">
107
+ <i class="fas fa-cloud-upload-alt text-5xl text-gray-400 mb-4"></i>
108
+ <label for="csv-file" class="block text-lg font-medium text-gray-700 mb-2 cursor-pointer">
109
+ Upload CSV File
110
+ </label>
111
+ <input
112
+ type="file"
113
+ id="csv-file"
114
+ accept=".csv"
115
+ class="hidden"
116
+ onchange="displayFileName(this)"
117
+ >
118
+ <p class="text-sm text-gray-500 mb-2">CSV must contain a "Comment" column</p>
119
+ <p id="file-name" class="text-sm font-medium text-indigo-600"></p>
120
+ <label for="csv-file" class="inline-block mt-4 bg-indigo-600 text-white px-6 py-2 rounded-lg hover:bg-indigo-700 cursor-pointer transition">
121
+ Choose File
122
+ </label>
123
+ </div>
124
+
125
+ <button
126
+ type="submit"
127
+ class="mt-6 bg-indigo-600 text-white px-6 py-3 rounded-lg hover:bg-indigo-700 transition font-medium shadow-lg"
128
+ >
129
+ <i class="fas fa-magic mr-2"></i>Predict Batch
130
+ </button>
131
+ </form>
132
+
133
+ <!-- Batch Results -->
134
+ <div id="batch-results" class="hidden mt-8">
135
+ <div class="grid grid-cols-1 lg:grid-cols-2 gap-6 mb-6">
136
+ <!-- Rating Distribution Chart -->
137
+ <div class="bg-white p-6 rounded-xl shadow">
138
+ <h3 class="text-lg font-bold text-gray-800 mb-4">
139
+ <i class="fas fa-chart-pie text-indigo-600 mr-2"></i>Rating Distribution
140
+ </h3>
141
+ <canvas id="ratingChart"></canvas>
142
+ </div>
143
+
144
+ <!-- Word Cloud -->
145
+ <div class="bg-white p-6 rounded-xl shadow">
146
+ <h3 class="text-lg font-bold text-gray-800 mb-4">
147
+ <i class="fas fa-cloud text-indigo-600 mr-2"></i>Word Cloud
148
+ </h3>
149
+ <img id="wordcloud-image" src="" alt="Word Cloud" class="w-full rounded-lg">
150
+ </div>
151
+ </div>
152
+
153
+ <!-- Results Table -->
154
+ <div class="bg-white p-6 rounded-xl shadow">
155
+ <div class="flex justify-between items-center mb-4">
156
+ <h3 class="text-lg font-bold text-gray-800">
157
+ <i class="fas fa-table text-indigo-600 mr-2"></i>Prediction Results
158
+ </h3>
159
+ <div class="space-x-3">
160
+ <button
161
+ onclick="downloadPDF()"
162
+ class="bg-red-600 text-white px-4 py-2 rounded-lg hover:bg-red-700 transition"
163
+ >
164
+ <i class="fas fa-file-pdf mr-2"></i>Download PDF
165
+ </button>
166
+ <button
167
+ onclick="downloadCSV()"
168
+ class="bg-green-600 text-white px-4 py-2 rounded-lg hover:bg-green-700 transition"
169
+ >
170
+ <i class="fas fa-download mr-2"></i>Download CSV
171
+ </button>
172
+ </div>
173
+ </div>
174
+ <div class="overflow-x-auto">
175
+ <table class="w-full" id="results-table">
176
+ <thead class="bg-gray-100">
177
+ <tr>
178
+ <th class="px-4 py-3 text-left text-sm font-semibold text-gray-700">Comment</th>
179
+ <th class="px-4 py-3 text-center text-sm font-semibold text-gray-700">Rating</th>
180
+ <th class="px-4 py-3 text-center text-sm font-semibold text-gray-700">Confidence</th>
181
+ </tr>
182
+ </thead>
183
+ <tbody id="results-tbody" class="divide-y divide-gray-200">
184
+ </tbody>
185
+ </table>
186
+ </div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </div>
191
+
192
+ <!-- History Section -->
193
+ <div class="bg-white rounded-2xl shadow-lg p-6 mb-8">
194
+ <div class="flex justify-between items-center mb-6">
195
+ <h2 class="text-2xl font-bold text-gray-800">
196
+ <i class="fas fa-history text-indigo-600 mr-2"></i>Prediction History
197
+ </h2>
198
+ <button
199
+ onclick="refreshHistory()"
200
+ class="bg-indigo-600 text-white px-4 py-2 rounded-lg hover:bg-indigo-700 transition font-medium"
201
+ >
202
+ <i class="fas fa-sync-alt mr-2"></i>Refresh
203
+ </button>
204
+ </div>
205
+
206
+ <div class="overflow-x-auto">
207
+ <table class="w-full" id="history-table">
208
+ <thead class="bg-gray-100">
209
+ <tr>
210
+ <th class="px-4 py-3 text-left text-sm font-semibold text-gray-700">Date/Time</th>
211
+ <th class="px-4 py-3 text-left text-sm font-semibold text-gray-700">Comment</th>
212
+ <th class="px-4 py-3 text-center text-sm font-semibold text-gray-700">Rating</th>
213
+ <th class="px-4 py-3 text-center text-sm font-semibold text-gray-700">Confidence</th>
214
+ <th class="px-4 py-3 text-center text-sm font-semibold text-gray-700">Type</th>
215
+ </tr>
216
+ </thead>
217
+ <tbody id="history-tbody" class="divide-y divide-gray-200">
218
+ <tr class="text-center text-gray-500 py-8">
219
+ <td colspan="6" class="px-4 py-8">Loading history...</td>
220
+ </tr>
221
+ </tbody>
222
+ </table>
223
+ </div>
224
+ </div>
225
+ </div>
226
+ {% endblock %}
227
+
228
+ {% block scripts %}
229
+ <script>
230
+ // Check authentication
231
+ const token = localStorage.getItem('access_token');
232
+ const username = localStorage.getItem('username');
233
+
234
+ if (!token) {
235
+ window.location.href = '/login';
236
+ }
237
+
238
+ document.getElementById('current-username').textContent = username || 'User';
239
+
240
+ // Global variables
241
+ let currentResults = [];
242
+ let currentDistribution = {};
243
+ let currentWordcloudUrl = '';
244
+ let chartInstance = null;
245
+
246
+ // Load history on page load
247
+ document.addEventListener('DOMContentLoaded', () => {
248
+ loadHistory();
249
+ });
250
+
251
+ // Logout function
252
+ function logout() {
253
+ localStorage.removeItem('access_token');
254
+ localStorage.removeItem('username');
255
+ window.location.href = '/login';
256
+ }
257
+
258
+ // Tab switching
259
+ function switchTab(tab) {
260
+ const tabs = ['single', 'batch'];
261
+ tabs.forEach(t => {
262
+ const button = document.getElementById(`tab-${t}`);
263
+ const content = document.getElementById(`${t}-form`);
264
+
265
+ if (t === tab) {
266
+ button.classList.add('border-indigo-600', 'text-indigo-600');
267
+ button.classList.remove('text-gray-500');
268
+ content.classList.remove('hidden');
269
+ } else {
270
+ button.classList.remove('border-indigo-600', 'text-indigo-600');
271
+ button.classList.add('text-gray-500');
272
+ content.classList.add('hidden');
273
+ }
274
+ });
275
+
276
+ // Hide results when switching
277
+ document.getElementById('single-result').classList.add('hidden');
278
+ document.getElementById('batch-results').classList.add('hidden');
279
+ }
280
+
281
+ // Display selected file name
282
+ function displayFileName(input) {
283
+ const fileName = input.files[0]?.name || '';
284
+ document.getElementById('file-name').textContent = fileName ? `Selected: ${fileName}` : '';
285
+ }
286
+
287
+ // Single Prediction
288
+ document.getElementById('singlePredictionForm').addEventListener('submit', async (e) => {
289
+ e.preventDefault();
290
+
291
+ const comment = document.getElementById('single-comment').value;
292
+
293
+ if (!comment.trim()) {
294
+ alert('Please enter a comment!');
295
+ return;
296
+ }
297
+
298
+ try {
299
+ const response = await fetch('/api/predict/single', {
300
+ method: 'POST',
301
+ headers: {
302
+ 'Content-Type': 'application/json',
303
+ 'Authorization': `Bearer ${token}`
304
+ },
305
+ body: JSON.stringify({
306
+ product_name: '',
307
+ comment: comment
308
+ })
309
+ });
310
+
311
+ if (response.ok) {
312
+ const data = await response.json();
313
+ displaySingleResult(data);
314
+ // Reload history
315
+ setTimeout(() => loadHistory(), 500);
316
+ } else {
317
+ const error = await response.json();
318
+ alert(error.detail || 'Prediction failed');
319
+ }
320
+ } catch (error) {
321
+ alert('An error occurred: ' + error.message);
322
+ }
323
+ });
324
+
325
+ function displaySingleResult(data) {
326
+ document.getElementById('predicted-rating').textContent = data.predicted_rating;
327
+ document.getElementById('confidence-score').textContent = (data.confidence_score * 100).toFixed(1) + '%';
328
+
329
+ // Display stars
330
+ const stars = '⭐'.repeat(data.predicted_rating);
331
+ document.getElementById('rating-stars').textContent = stars;
332
+
333
+ document.getElementById('single-result').classList.remove('hidden');
334
+ }
335
+
336
+ // Batch Prediction
337
+ document.getElementById('batchPredictionForm').addEventListener('submit', async (e) => {
338
+ e.preventDefault();
339
+
340
+ const productName = document.getElementById('batch-product-name').value || '';
341
+ const fileInput = document.getElementById('csv-file');
342
+ const file = fileInput.files[0];
343
+
344
+ if (!file) {
345
+ alert('Please select a CSV file!');
346
+ return;
347
+ }
348
+
349
+ const formData = new FormData();
350
+ formData.append('product_name', productName);
351
+ formData.append('file', file);
352
+
353
+ try {
354
+ const response = await fetch('/api/predict/batch', {
355
+ method: 'POST',
356
+ headers: {
357
+ 'Authorization': `Bearer ${token}`
358
+ },
359
+ body: formData
360
+ });
361
+
362
+ if (response.ok) {
363
+ const data = await response.json();
364
+ displayBatchResults(data);
365
+ // Reload history
366
+ setTimeout(() => loadHistory(), 500);
367
+ } else {
368
+ const error = await response.json();
369
+ alert(error.detail || 'Prediction failed');
370
+ }
371
+ } catch (error) {
372
+ alert('An error occurred: ' + error.message);
373
+ }
374
+ });
375
+
376
+ function displayBatchResults(data) {
377
+ currentResults = data.results;
378
+ currentDistribution = data.rating_distribution;
379
+ currentWordcloudUrl = data.wordcloud_url;
380
+
381
+ // Display word cloud
382
+ document.getElementById('wordcloud-image').src = data.wordcloud_url;
383
+
384
+ // Create chart
385
+ createRatingChart(data.rating_distribution);
386
+
387
+ // Populate table
388
+ const tbody = document.getElementById('results-tbody');
389
+ tbody.innerHTML = '';
390
+
391
+ data.results.forEach(result => {
392
+ const row = `
393
+ <tr class="hover:bg-gray-50">
394
+ <td class="px-4 py-3 text-sm text-gray-700">${result.Comment}</td>
395
+ <td class="px-4 py-3 text-center">
396
+ <span class="inline-block bg-indigo-100 text-indigo-800 px-3 py-1 rounded-full font-semibold">
397
+ ${result.Predicted_Rating}⭐
398
+ </span>
399
+ </td>
400
+ <td class="px-4 py-3 text-center text-sm text-gray-600">
401
+ ${(result.Confidence * 100).toFixed(1)}%
402
+ </td>
403
+ </tr>
404
+ `;
405
+ tbody.innerHTML += row;
406
+ });
407
+
408
+ document.getElementById('batch-results').classList.remove('hidden');
409
+ }
410
+
411
+ function createRatingChart(distribution) {
412
+ const ctx = document.getElementById('ratingChart').getContext('2d');
413
+
414
+ // Destroy existing chart
415
+ if (chartInstance) {
416
+ chartInstance.destroy();
417
+ }
418
+
419
+ chartInstance = new Chart(ctx, {
420
+ type: 'bar',
421
+ data: {
422
+ labels: ['1⭐', '2⭐', '3⭐', '4⭐', '5⭐'],
423
+ datasets: [{
424
+ label: 'Number of Reviews',
425
+ data: [
426
+ distribution[1] || 0,
427
+ distribution[2] || 0,
428
+ distribution[3] || 0,
429
+ distribution[4] || 0,
430
+ distribution[5] || 0
431
+ ],
432
+ backgroundColor: [
433
+ 'rgba(239, 68, 68, 0.8)',
434
+ 'rgba(251, 146, 60, 0.8)',
435
+ 'rgba(250, 204, 21, 0.8)',
436
+ 'rgba(132, 204, 22, 0.8)',
437
+ 'rgba(34, 197, 94, 0.8)'
438
+ ],
439
+ borderColor: [
440
+ 'rgba(239, 68, 68, 1)',
441
+ 'rgba(251, 146, 60, 1)',
442
+ 'rgba(250, 204, 21, 1)',
443
+ 'rgba(132, 204, 22, 1)',
444
+ 'rgba(34, 197, 94, 1)'
445
+ ],
446
+ borderWidth: 2
447
+ }]
448
+ },
449
+ options: {
450
+ responsive: true,
451
+ maintainAspectRatio: true,
452
+ plugins: {
453
+ legend: {
454
+ display: false
455
+ }
456
+ },
457
+ scales: {
458
+ y: {
459
+ beginAtZero: true,
460
+ ticks: {
461
+ stepSize: 1
462
+ }
463
+ }
464
+ }
465
+ }
466
+ });
467
+ }
468
+
469
+ function downloadCSV() {
470
+ if (currentResults.length === 0) {
471
+ alert('No results to download');
472
+ return;
473
+ }
474
+
475
+ // Create CSV content
476
+ const headers = ['Comment', 'Predicted_Rating', 'Confidence'];
477
+ const csvContent = [
478
+ headers.join(','),
479
+ ...currentResults.map(r =>
480
+ `"${r.Comment.replace(/"/g, '""')}",${r.Predicted_Rating},${r.Confidence}`
481
+ )
482
+ ].join('\n');
483
+
484
+ // Create download link
485
+ const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
486
+ const link = document.createElement('a');
487
+ const url = URL.createObjectURL(blob);
488
+
489
+ link.setAttribute('href', url);
490
+ link.setAttribute('download', `predictions_${new Date().getTime()}.csv`);
491
+ link.style.visibility = 'hidden';
492
+
493
+ document.body.appendChild(link);
494
+ link.click();
495
+ document.body.removeChild(link);
496
+ }
497
+
498
+ function downloadPDF() {
499
+ if (currentResults.length === 0) {
500
+ alert('No results to download');
501
+ return;
502
+ }
503
+
504
+ try {
505
+ // Prepare data
506
+ const predictions = currentResults.map(r => ({
507
+ text: r.Comment,
508
+ rating: r.Predicted_Rating,
509
+ confidence: r.Confidence
510
+ }));
511
+
512
+ // Send request to generate PDF
513
+ fetch('/api/predict/download-pdf', {
514
+ method: 'POST',
515
+ headers: {
516
+ 'Authorization': `Bearer ${token}`,
517
+ 'Content-Type': 'application/json'
518
+ },
519
+ body: JSON.stringify({
520
+ predictions: predictions,
521
+ distribution: currentDistribution,
522
+ wordcloud_path: currentWordcloudUrl
523
+ })
524
+ })
525
+ .then(response => {
526
+ if (response.ok) {
527
+ return response.blob();
528
+ }
529
+ throw new Error('Failed to generate PDF');
530
+ })
531
+ .then(blob => {
532
+ const url = URL.createObjectURL(blob);
533
+ const link = document.createElement('a');
534
+ link.href = url;
535
+ link.download = `predictions_report_${new Date().getTime()}.pdf`;
536
+ document.body.appendChild(link);
537
+ link.click();
538
+ document.body.removeChild(link);
539
+ URL.revokeObjectURL(url);
540
+ })
541
+ .catch(error => {
542
+ console.error('Error downloading PDF:', error);
543
+ alert('Error generating PDF report. Please try again.');
544
+ });
545
+ } catch (error) {
546
+ console.error('Error preparing PDF download:', error);
547
+ alert('Error preparing PDF report');
548
+ }
549
+ }
550
+
551
+ // Load and display prediction history
552
+ async function loadHistory() {
553
+ try {
554
+ const response = await fetch('/api/predict/history', {
555
+ headers: {
556
+ 'Authorization': `Bearer ${token}`
557
+ }
558
+ });
559
+
560
+ if (response.ok) {
561
+ const history = await response.json();
562
+ displayHistory(history);
563
+ } else {
564
+ console.error('Failed to load history');
565
+ }
566
+ } catch (error) {
567
+ console.error('Error loading history:', error);
568
+ }
569
+ }
570
+
571
+ function displayHistory(history) {
572
+ const tbody = document.getElementById('history-tbody');
573
+
574
+ if (history.length === 0) {
575
+ tbody.innerHTML = `
576
+ <tr class="text-center text-gray-500">
577
+ <td colspan="5" class="px-4 py-8">
578
+ <i class="fas fa-inbox text-3xl text-gray-300 mb-2"></i>
579
+ <p>No prediction history yet</p>
580
+ </td>
581
+ </tr>
582
+ `;
583
+ return;
584
+ }
585
+
586
+ tbody.innerHTML = '';
587
+ history.forEach(item => {
588
+ const date = new Date(item.created_at).toLocaleString();
589
+ const shortComment = item.comment.length > 50
590
+ ? item.comment.substring(0, 50) + '...'
591
+ : item.comment;
592
+ const row = `
593
+ <tr class="hover:bg-gray-50">
594
+ <td class="px-4 py-3 text-sm text-gray-600">${date}</td>
595
+ <td class="px-4 py-3 text-sm text-gray-700" title="${item.comment}">${shortComment}</td>
596
+ <td class="px-4 py-3 text-center">
597
+ <span class="inline-block bg-indigo-100 text-indigo-800 px-3 py-1 rounded-full font-semibold text-sm">
598
+ ${item.predicted_rating}⭐
599
+ </span>
600
+ </td>
601
+ <td class="px-4 py-3 text-center text-sm text-gray-600">
602
+ ${(item.confidence_score * 100).toFixed(1)}%
603
+ </td>
604
+ <td class="px-4 py-3 text-center text-sm">
605
+ <span class="inline-block ${item.prediction_type === 'single' ? 'bg-blue-100 text-blue-800' : 'bg-green-100 text-green-800'} px-2 py-1 rounded text-xs font-semibold">
606
+ ${item.prediction_type}
607
+ </span>
608
+ </td>
609
+ </tr>
610
+ `;
611
+ tbody.innerHTML += row;
612
+ });
613
+ }
614
+
615
+ function refreshHistory() {
616
+ loadHistory();
617
+ }</script>
618
+ {% endblock %}
app/templates/login.html ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+
3
+ {% block title %}Login - Rating Predictor{% endblock %}
4
+
5
+ {% block content %}
6
+ <div class="flex items-center justify-center min-h-[calc(100vh-200px)]">
7
+ <div class="bg-white rounded-2xl shadow-xl p-8 w-full max-w-md fade-in">
8
+ <div class="text-center mb-8">
9
+ <i class="fas fa-sign-in-alt text-5xl text-indigo-600 mb-4"></i>
10
+ <h2 class="text-3xl font-bold text-gray-800">Đăng Nhập</h2>
11
+ <p class="text-gray-600 mt-2">Welcome back to Rating Predictor</p>
12
+ </div>
13
+
14
+ <!-- Alert Messages -->
15
+ <div id="alert" class="hidden mb-4 p-4 rounded-lg"></div>
16
+
17
+ <form id="loginForm" class="space-y-6">
18
+ <div>
19
+ <label class="block text-sm font-medium text-gray-700 mb-2">
20
+ <i class="fas fa-user mr-2"></i>Username
21
+ </label>
22
+ <input
23
+ type="text"
24
+ id="username"
25
+ name="username"
26
+ required
27
+ class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-transparent transition"
28
+ placeholder="Enter your username"
29
+ >
30
+ </div>
31
+
32
+ <div>
33
+ <label class="block text-sm font-medium text-gray-700 mb-2">
34
+ <i class="fas fa-lock mr-2"></i>Password
35
+ </label>
36
+ <input
37
+ type="password"
38
+ id="password"
39
+ name="password"
40
+ required
41
+ class="w-full px-4 py-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-transparent transition"
42
+ placeholder="Enter your password"
43
+ >
44
+ </div>
45
+
46
+ <button
47
+ type="submit"
48
+ class="w-full bg-indigo-600 text-white py-3 rounded-lg hover:bg-indigo-700 transition font-medium shadow-lg hover:shadow-xl"
49
+ >
50
+ <i class="fas fa-sign-in-alt mr-2"></i>Login
51
+ </button>
52
+ </form>
53
+
54
+ <div class="mt-6 text-center">
55
+ <p class="text-gray-600">
56
+ Don't have an account?
57
+ <a href="/register" class="text-indigo-600 hover:text-indigo-800 font-medium">
58
+ Register here
59
+ </a>
60
+ </p>
61
+ </div>
62
+ </div>
63
+ </div>
64
+ {% endblock %}
65
+
66
+ {% block scripts %}
67
+ <script>
68
+ document.getElementById('loginForm').addEventListener('submit', async (e) => {
69
+ e.preventDefault();
70
+
71
+ const username = document.getElementById('username').value;
72
+ const password = document.getElementById('password').value;
73
+ const alertDiv = document.getElementById('alert');
74
+
75
+ try {
76
+ // Create form data for OAuth2
77
+ const formData = new URLSearchParams();
78
+ formData.append('username', username);
79
+ formData.append('password', password);
80
+
81
+ const response = await fetch('/api/auth/login', {
82
+ method: 'POST',
83
+ headers: {
84
+ 'Content-Type': 'application/x-www-form-urlencoded',
85
+ },
86
+ body: formData
87
+ });
88
+
89
+ const data = await response.json();
90
+
91
+ if (response.ok) {
92
+ // Save token
93
+ localStorage.setItem('access_token', data.access_token);
94
+ localStorage.setItem('username', username);
95
+
96
+ // Show success message
97
+ alertDiv.className = 'mb-4 p-4 rounded-lg bg-green-100 text-green-700';
98
+ alertDiv.textContent = 'Login successful! Redirecting...';
99
+ alertDiv.classList.remove('hidden');
100
+
101
+ // Redirect to dashboard
102
+ setTimeout(() => {
103
+ window.location.href = '/dashboard';
104
+ }, 1000);
105
+ } else {
106
+ // Show error
107
+ alertDiv.className = 'mb-4 p-4 rounded-lg bg-red-100 text-red-700';
108
+ alertDiv.textContent = data.detail || 'Login failed. Please try again.';
109
+ alertDiv.classList.remove('hidden');
110
+ }
111
+ } catch (error) {
112
+ alertDiv.className = 'mb-4 p-4 rounded-lg bg-red-100 text-red-700';
113
+ alertDiv.textContent = 'An error occurred. Please try again.';
114
+ alertDiv.classList.remove('hidden');
115
+ }
116
+ });
117
+ </script>
118
+ {% endblock %}