vtdung23 commited on
Commit
92db116
·
0 Parent(s):

Enhanced app with Dark Mode, Toast Notifications, SHAP Explanation, N-gram Analysis, Keyword Highlighting

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +82 -0
  2. .env.example +19 -0
  3. .gitattributes +1 -0
  4. .vscode/settings.json +5 -0
  5. 4.0.0 +6 -0
  6. ARCHITECTURE.md +387 -0
  7. DEPLOYMENT.md +287 -0
  8. Dockerfile +61 -0
  9. HUGGING_FACE_DEPLOYMENT.md +258 -0
  10. INDEX.md +296 -0
  11. PROJECT_STRUCTURE.txt +326 -0
  12. PROJECT_SUMMARY.md +293 -0
  13. Procfile +1 -0
  14. QUICKSTART.md +116 -0
  15. README.md +12 -0
  16. README_HF_SPACE.md +86 -0
  17. RENDER_QUICKSTART.md +137 -0
  18. __pycache__/main.cpython-311.pyc +0 -0
  19. __pycache__/main.cpython-313.pyc +0 -0
  20. app/__init__.py +1 -0
  21. app/__pycache__/__init__.cpython-311.pyc +0 -0
  22. app/__pycache__/__init__.cpython-313.pyc +0 -0
  23. app/__pycache__/config.cpython-311.pyc +0 -0
  24. app/__pycache__/config.cpython-313.pyc +0 -0
  25. app/__pycache__/database.cpython-311.pyc +0 -0
  26. app/__pycache__/database.cpython-313.pyc +0 -0
  27. app/__pycache__/models.cpython-311.pyc +0 -0
  28. app/__pycache__/models.cpython-313.pyc +0 -0
  29. app/__pycache__/schemas.cpython-311.pyc +0 -0
  30. app/__pycache__/schemas.cpython-313.pyc +0 -0
  31. app/config.py +46 -0
  32. app/database.py +66 -0
  33. app/database/.gitkeep +1 -0
  34. app/database/rating_prediction.db +0 -0
  35. app/models.py +43 -0
  36. app/routers/__init__.py +1 -0
  37. app/routers/__pycache__/__init__.cpython-311.pyc +0 -0
  38. app/routers/__pycache__/__init__.cpython-313.pyc +0 -0
  39. app/routers/__pycache__/auth.cpython-311.pyc +0 -0
  40. app/routers/__pycache__/auth.cpython-313.pyc +0 -0
  41. app/routers/__pycache__/dashboard.cpython-311.pyc +0 -0
  42. app/routers/__pycache__/dashboard.cpython-313.pyc +0 -0
  43. app/routers/__pycache__/prediction.cpython-311.pyc +0 -0
  44. app/routers/__pycache__/prediction.cpython-313.pyc +0 -0
  45. app/routers/auth.py +97 -0
  46. app/routers/dashboard.py +44 -0
  47. app/routers/prediction.py +359 -0
  48. app/schemas.py +114 -0
  49. app/services/Model/phoBERT_multi_class_tokenizer/added_tokens.json +3 -0
  50. app/services/Model/phoBERT_multi_class_tokenizer/bpe.codes +0 -0
.dockerignore ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # Docker Ignore File
3
+ # Exclude unnecessary files from Docker build context
4
+ # ============================================
5
+
6
+ # Python
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+ *.so
11
+ .Python
12
+ *.egg-info/
13
+ dist/
14
+ build/
15
+ pip-log.txt
16
+ pip-delete-this-directory.txt
17
+
18
+ # Virtual Environments
19
+ env/
20
+ venv/
21
+ ENV/
22
+ env.bak/
23
+ venv.bak/
24
+ .venv/
25
+
26
+ # IDEs
27
+ .vscode/
28
+ .idea/
29
+ *.swp
30
+ *.swo
31
+ *~
32
+ .DS_Store
33
+
34
+ # Git
35
+ .git/
36
+ .gitignore
37
+ .gitattributes
38
+
39
+ # Documentation (not needed in container)
40
+ *.md
41
+ !README.md
42
+ ARCHITECTURE.md
43
+ DEPLOYMENT.md
44
+ FIX_OOM_RENDER.md
45
+ INDEX.md
46
+ QUICKSTART.md
47
+ RENDER_QUICKSTART.md
48
+ TESTING_GUIDE.md
49
+ PROJECT_STRUCTURE.txt
50
+ PROJECT_SUMMARY.md
51
+
52
+ # Database (use external PostgreSQL)
53
+ *.db
54
+ *.sqlite
55
+ *.sqlite3
56
+ app/database/*.db
57
+
58
+ # Uploads (use external storage in production)
59
+ app/static/uploads/wordclouds/*
60
+ app/static/uploads/*.csv
61
+ !app/static/uploads/.gitkeep
62
+
63
+ # Logs
64
+ *.log
65
+
66
+ # Testing
67
+ .pytest_cache/
68
+ .coverage
69
+ htmlcov/
70
+ .tox/
71
+
72
+ # Render specific
73
+ Procfile
74
+
75
+ # Environment files (secrets should be in HF Settings)
76
+ .env
77
+ .env.*
78
+
79
+ # Temporary files
80
+ *.tmp
81
+ tmp/
82
+ temp/
.env.example ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # ENVIRONMENT VARIABLES TEMPLATE
3
+ # ============================================
4
+ # Copy this file to .env for local development
5
+ # On Render, set these in Environment Variables tab
6
+
7
+ # Security (Required)
8
+ SECRET_KEY=your-super-secret-random-key-change-this-in-production
9
+
10
+ # Database (Optional - auto-configured by Render)
11
+ # DATABASE_URL=postgresql://user:password@host:5432/database
12
+ # Leave blank for local SQLite development
13
+
14
+ # Application Settings
15
+ PYTHON_VERSION=3.11.0
16
+ PORT=8000
17
+
18
+ # HuggingFace Cache (Optional - only for local dev)
19
+ # HF_HOME=/path/to/huggingface/cache
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pth filter=lfs diff=lfs merge=lfs -text
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:conda",
3
+ "python-envs.defaultPackageManager": "ms-python.python:conda",
4
+ "python-envs.pythonProjects": []
5
+ }
4.0.0 ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Defaulting to user installation because normal site-packages is not writeable
2
+ Collecting bcrypt
3
+ Using cached bcrypt-5.0.0-cp39-abi3-win_amd64.whl.metadata (10 kB)
4
+ Using cached bcrypt-5.0.0-cp39-abi3-win_amd64.whl (150 kB)
5
+ Installing collected packages: bcrypt
6
+ Successfully installed bcrypt-5.0.0
ARCHITECTURE.md ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🏗️ System Architecture
2
+
3
+ ## High-Level Architecture
4
+
5
+ ```
6
+ ┌─────────────────────────────────────────────────────────────┐
7
+ │ FRONTEND │
8
+ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
9
+ │ │ Login/ │ │ Dashboard │ │ Register │ │
10
+ │ │ Register │ │ (Jinja2) │ │ Page │ │
11
+ │ │ (Jinja2) │ │ + TailwindCSS│ │ (Jinja2) │ │
12
+ │ └──────────────┘ └──────────────┘ └──────────────┘ │
13
+ │ │ │ │ │
14
+ │ └──────────────────┴──────────────────┘ │
15
+ │ │ │
16
+ │ JavaScript (Fetch API) │
17
+ │ + Chart.js for viz │
18
+ └────────────────────────────│────────────────────────────────┘
19
+
20
+
21
+ ┌─────────────────────────────────────────────────────────────┐
22
+ │ FASTAPI BACKEND │
23
+ │ ┌───────────────────────────────────────────────────┐ │
24
+ │ │ API ROUTERS │ │
25
+ │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
26
+ │ │ │ Auth │ │Prediction│ │Dashboard │ │ │
27
+ │ │ │ Router │ │ Router │ │ Router │ │ │
28
+ │ │ │ /api/auth│ │/api/pred │ │ /pages │ │ │
29
+ │ │ └──────────┘ └──────────┘ └──────────┘ │ │
30
+ │ └───────────────────────────────────────────────────┘ │
31
+ │ │ │
32
+ │ ▼ │
33
+ │ ┌───────────────────────────────────────────────────┐ │
34
+ │ │ SERVICES │ │
35
+ │ │ ┌──────────────┐ ┌──────────────┐ │ │
36
+ │ │ │ Auth │ │ ML │ │ │
37
+ │ │ │ Service │ │ Service │ │ │
38
+ │ │ │(JWT, bcrypt) │ │ (Model) │ │ │
39
+ │ │ └──────────────┘ └──────────────┘ │ │
40
+ │ │ ┌──────────────────────────────────┐ │ │
41
+ │ │ │ Visualization Service │ │ │
42
+ │ │ │ (WordCloud, Charts) │ │ │
43
+ │ │ └──────────────────────────────────┘ │ │
44
+ │ └───────────────────────────────────────────────────┘ │
45
+ │ │ │
46
+ │ ▼ │
47
+ │ ┌───────────────────────────────────────────────────┐ │
48
+ │ │ DATA LAYER │ │
49
+ │ │ ┌──────────┐ ┌──────────┐ │ │
50
+ │ │ │ SQLAlchemy│ │ Pydantic │ │ │
51
+ │ │ │ Models │ │ Schemas │ │ │
52
+ │ │ │(ORM Layer)│ │(Validation) │ │
53
+ │ │ └──────────┘ └────��─────┘ │ │
54
+ │ └───────────────────────────────────────────────────┘ │
55
+ └────────────────────────────│────────────────────────────────┘
56
+
57
+
58
+ ┌─────────────────────────────────────────────────────────────┐
59
+ │ DATABASE │
60
+ │ ┌──────────────────────┐ ┌──────────────────────┐ │
61
+ │ │ Users Table │ │ PredictionHistory │ │
62
+ │ │ - id (PK) │ │ - id (PK) │ │
63
+ │ │ - username │ │ - user_id (FK) │ │
64
+ │ │ - email │ │ - product_name │ │
65
+ │ │ - hashed_password │ │ - comment │ │
66
+ │ │ - created_at │ │ - predicted_rating │ │
67
+ │ │ │ │ - confidence_score │ │
68
+ │ │ │ │ - created_at │ │
69
+ │ └──────────────────────┘ └──────────────────────┘ │
70
+ │ SQLite Database │
71
+ └─────────────────────────────────────────────────────────────┘
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Request Flow Examples
77
+
78
+ ### 1️⃣ User Login Flow
79
+
80
+ ```
81
+ User enters credentials
82
+
83
+
84
+ [Login.html]
85
+
86
+
87
+ POST /api/auth/login
88
+
89
+
90
+ [Auth Router]
91
+
92
+
93
+ [Auth Service] ──► Verify password (bcrypt)
94
+ │ Generate JWT token
95
+
96
+ [Database] ──► Query User table
97
+
98
+
99
+ Return JWT token to frontend
100
+
101
+
102
+ Store token in localStorage
103
+
104
+
105
+ Redirect to /dashboard
106
+ ```
107
+
108
+ ### 2️⃣ Single Prediction Flow
109
+
110
+ ```
111
+ User enters comment
112
+
113
+
114
+ [Dashboard.html]
115
+
116
+
117
+ POST /api/predict/single
118
+ (with JWT token in header)
119
+
120
+
121
+ [Prediction Router]
122
+
123
+
124
+ [Auth Service] ──► Verify JWT token
125
+
126
+
127
+ [ML Service] ──► predict_single(comment)
128
+ │ (DUMMY: return random rating)
129
+
130
+ [Database] ──► Save to PredictionHistory
131
+
132
+
133
+ Return {rating, confidence}
134
+
135
+
136
+ Display result in UI
137
+ ```
138
+
139
+ ### 3️⃣ Batch CSV Prediction Flow
140
+
141
+ ```
142
+ User uploads CSV file
143
+
144
+
145
+ [Dashboard.html]
146
+
147
+
148
+ POST /api/predict/batch
149
+ (multipart/form-data)
150
+
151
+
152
+ [Prediction Router]
153
+
154
+
155
+ Parse CSV ──► Extract comments
156
+
157
+
158
+ [ML Service] ──► predict_batch(comments)
159
+ │ For each comment:
160
+ │ predict_single()
161
+
162
+ [Visualization Service]
163
+
164
+ ├──► generate_wordcloud()
165
+ │ Save PNG to /static/uploads/
166
+
167
+ └──► calculate_rating_distribution()
168
+ Count 1⭐, 2⭐, 3⭐, 4⭐, 5⭐
169
+
170
+
171
+ [Database] ──► Save all predictions
172
+
173
+
174
+ Return:
175
+ - wordcloud_url
176
+ - rating_distribution
177
+ - results array
178
+
179
+
180
+ [Dashboard.html]
181
+
182
+ ├──► Render Chart.js bar chart
183
+ ├──► Display word cloud image
184
+ ├──► Populate results table
185
+ └──► Enable CSV download
186
+ ```
187
+
188
+ ---
189
+
190
+ ## Technology Stack Details
191
+
192
+ ### Backend
193
+ ```
194
+ FastAPI (0.104.1)
195
+ ├── Auto-generates Swagger UI (/docs)
196
+ ├── Automatic data validation (Pydantic)
197
+ ├── Async support
198
+ └── Built-in dependency injection
199
+
200
+ SQLAlchemy (2.0.23)
201
+ ├── ORM for database operations
202
+ ├── Models: User, PredictionHistory
203
+ └── Automatic table creation
204
+
205
+ JWT Authentication
206
+ ├── python-jose for token generation
207
+ ├── passlib[bcrypt] for password hashing
208
+ └── OAuth2PasswordBearer for token validation
209
+ ```
210
+
211
+ ### Frontend
212
+ ```
213
+ Jinja2 Templates
214
+ ├── Server-side rendering
215
+ ├── Template inheritance (base.html)
216
+ └── Context variables from backend
217
+
218
+ TailwindCSS (CDN)
219
+ ├── Utility-first CSS framework
220
+ ├── Responsive design
221
+ └── Custom animations
222
+
223
+ Chart.js (CDN)
224
+ ├── Interactive bar charts
225
+ └── Rating distribution visualization
226
+
227
+ JavaScript (Vanilla)
228
+ ├── Fetch API for HTTP requests
229
+ ├── LocalStorage for JWT token
230
+ └── Dynamic DOM manipulation
231
+ ```
232
+
233
+ ### Visualization
234
+ ```
235
+ WordCloud (1.9.3)
236
+ ├── Generate word cloud images
237
+ ├── Vietnamese stopwords support
238
+ └── Save to PNG files
239
+
240
+ Matplotlib (3.8.2)
241
+ ├── Render word cloud to image
242
+ └── Non-GUI backend (Agg)
243
+ ```
244
+
245
+ ---
246
+
247
+ ## File Responsibilities
248
+
249
+ ### Backend Files
250
+ | File | Purpose |
251
+ |------|---------|
252
+ | `main.py` | FastAPI app initialization, router inclusion |
253
+ | `config.py` | Configuration (SECRET_KEY, products list) |
254
+ | `database.py` | SQLAlchemy engine, session management |
255
+ | `models.py` | Database table definitions (User, PredictionHistory) |
256
+ | `schemas.py` | Pydantic models for request/response validation |
257
+
258
+ ### Router Files
259
+ | File | Purpose |
260
+ |------|---------|
261
+ | `routers/auth.py` | Register, login, get current user |
262
+ | `routers/prediction.py` | Single/batch prediction, history |
263
+ | `routers/dashboard.py` | Serve HTML pages (login, register, dashboard) |
264
+
265
+ ### Service Files
266
+ | File | Purpose |
267
+ |------|---------|
268
+ | `services/auth_service.py` | JWT generation, password hashing, token validation |
269
+ | `services/ml_service.py` | ML model wrapper, prediction logic (DUMMY) |
270
+ | `services/visualization_service.py` | WordCloud generation, chart data |
271
+
272
+ ### Frontend Files
273
+ | File | Purpose |
274
+ |------|---------|
275
+ | `templates/base.html` | Base layout with navigation, CDN imports |
276
+ | `templates/login.html` | Login form with JWT handling |
277
+ | `templates/register.html` | Registration form |
278
+ | `templates/dashboard.html` | Main interface (product select, predictions, viz) |
279
+
280
+ ---
281
+
282
+ ## Security Features
283
+
284
+ 1. **Password Hashing:** bcrypt with salt
285
+ 2. **JWT Tokens:** Signed with SECRET_KEY (HS256)
286
+ 3. **Token Expiration:** 24 hours
287
+ 4. **Protected Routes:** Dependency injection (`get_current_user`)
288
+ 5. **CORS:** Configured for security
289
+ 6. **Input Validation:** Pydantic schemas
290
+
291
+ ---
292
+
293
+ ## Database Schema
294
+
295
+ ```sql
296
+ -- Users Table
297
+ CREATE TABLE users (
298
+ id INTEGER PRIMARY KEY,
299
+ username VARCHAR(50) UNIQUE NOT NULL,
300
+ email VARCHAR(100) UNIQUE NOT NULL,
301
+ hashed_password VARCHAR(255) NOT NULL,
302
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
303
+ );
304
+
305
+ -- PredictionHistory Table
306
+ CREATE TABLE prediction_history (
307
+ id INTEGER PRIMARY KEY,
308
+ user_id INTEGER NOT NULL,
309
+ product_name VARCHAR(200) NOT NULL,
310
+ comment TEXT NOT NULL,
311
+ predicted_rating INTEGER NOT NULL,
312
+ confidence_score FLOAT,
313
+ prediction_type VARCHAR(20) DEFAULT 'single',
314
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
315
+ FOREIGN KEY (user_id) REFERENCES users(id)
316
+ );
317
+ ```
318
+
319
+ ---
320
+
321
+ ## API Response Examples
322
+
323
+ ### POST /api/auth/login
324
+ ```json
325
+ {
326
+ "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
327
+ "token_type": "bearer"
328
+ }
329
+ ```
330
+
331
+ ### POST /api/predict/single
332
+ ```json
333
+ {
334
+ "predicted_rating": 5,
335
+ "confidence_score": 0.92,
336
+ "comment": "Sản phẩm rất tốt..."
337
+ }
338
+ ```
339
+
340
+ ### POST /api/predict/batch
341
+ ```json
342
+ {
343
+ "total_predictions": 20,
344
+ "rating_distribution": {
345
+ "1": 2,
346
+ "2": 3,
347
+ "3": 5,
348
+ "4": 6,
349
+ "5": 4
350
+ },
351
+ "wordcloud_url": "/static/uploads/wordclouds/wordcloud_20241125_143022.png",
352
+ "results": [
353
+ {
354
+ "Comment": "Sản phẩm tốt",
355
+ "Predicted_Rating": 5,
356
+ "Confidence": 0.95
357
+ }
358
+ ],
359
+ "csv_download_url": "/api/predict/download/1/1700924622.123"
360
+ }
361
+ ```
362
+
363
+ ---
364
+
365
+ ## Deployment Checklist
366
+
367
+ Before production:
368
+ - [ ] Change `SECRET_KEY` in config.py
369
+ - [ ] Set `reload=False` in uvicorn
370
+ - [ ] Configure CORS properly
371
+ - [ ] Use PostgreSQL instead of SQLite
372
+ - [ ] Add environment variables (.env file)
373
+ - [ ] Set up HTTPS
374
+ - [ ] Add rate limiting
375
+ - [ ] Configure logging
376
+ - [ ] Add error monitoring
377
+ - [ ] Set up backup strategy
378
+
379
+ ---
380
+
381
+ This architecture provides:
382
+ ✅ **Separation of Concerns**
383
+ ✅ **Scalability** (easy to add features)
384
+ ✅ **Maintainability** (clear file structure)
385
+ ✅ **Security** (JWT, password hashing)
386
+ ✅ **Documentation** (auto-generated Swagger)
387
+ ✅ **Testing** (clear API endpoints)
DEPLOYMENT.md ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Deployment Guide for Render.com
2
+
3
+ ## Pre-Deployment Checklist
4
+
5
+ - [x] Updated `requirements.txt` with `psycopg2-binary` and `gunicorn`
6
+ - [x] Modified `database.py` for hybrid SQLite/PostgreSQL support
7
+ - [x] Updated `config.py` to read `SECRET_KEY` from environment
8
+ - [x] Auto-migration enabled in `main.py`
9
+ - [ ] Push code to GitHub repository
10
+ - [ ] Create Render account
11
+
12
+ ---
13
+
14
+ ## 📦 Step 1: Prepare Your Repository
15
+
16
+ 1. **Commit all changes:**
17
+ ```bash
18
+ git add .
19
+ git commit -m "Prepare for Render deployment"
20
+ git push origin master
21
+ ```
22
+
23
+ 2. **Ensure these files exist:**
24
+ - ✅ `requirements.txt` (with psycopg2-binary, gunicorn)
25
+ - ✅ `main.py` (with Base.metadata.create_all)
26
+ - ✅ `app/database.py` (hybrid support)
27
+ - ✅ `app/config.py` (environment variables)
28
+
29
+ ---
30
+
31
+ ## 🌐 Step 2: Deploy on Render
32
+
33
+ ### A. Create New Web Service
34
+
35
+ 1. Go to https://dashboard.render.com/
36
+ 2. Click **"New +"** → **"Web Service"**
37
+ 3. Connect your GitHub repository
38
+ 4. Select your repository: `Predict-Rating-Web-App`
39
+
40
+ ### B. Configure Web Service
41
+
42
+ Fill in the following settings:
43
+
44
+ | Setting | Value |
45
+ |---------|-------|
46
+ | **Name** | `vietnamese-rating-prediction` (or your choice) |
47
+ | **Region** | Singapore / Oregon (closest to you) |
48
+ | **Branch** | `master` |
49
+ | **Root Directory** | (leave blank) |
50
+ | **Runtime** | `Python 3` |
51
+ | **Build Command** | `pip install -r requirements.txt` |
52
+ | **Start Command** | `gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT` |
53
+ | **Instance Type** | `Free` |
54
+
55
+ ### C. Add Environment Variables
56
+
57
+ Click **"Environment"** tab and add:
58
+
59
+ | Key | Value | Notes |
60
+ |-----|-------|-------|
61
+ | `SECRET_KEY` | `your-super-secret-random-key-here-2024` | Generate with: `openssl rand -hex 32` |
62
+ | `PYTHON_VERSION` | `3.11.0` | Specify Python version |
63
+
64
+ **DO NOT set `DATABASE_URL` manually** - Render will auto-create it when you add PostgreSQL.
65
+
66
+ ---
67
+
68
+ ## 🗄️ Step 3: Add PostgreSQL Database
69
+
70
+ ### A. Create Database
71
+
72
+ 1. In Render Dashboard, click **"New +"** → **"PostgreSQL"**
73
+ 2. Configure:
74
+ - **Name:** `vietnamese-rating-db`
75
+ - **Database:** `rating_prediction`
76
+ - **User:** (auto-generated)
77
+ - **Region:** Same as web service
78
+ - **PostgreSQL Version:** `15`
79
+ - **Instance Type:** `Free`
80
+
81
+ 3. Click **"Create Database"**
82
+
83
+ ### B. Link Database to Web Service
84
+
85
+ 1. Go back to your **Web Service**
86
+ 2. Click **"Environment"** tab
87
+ 3. Click **"Add Environment Variable"**
88
+ 4. Select **"Add from Database"**
89
+ 5. Choose your `vietnamese-rating-db`
90
+ 6. It will auto-populate `DATABASE_URL`
91
+
92
+ ### C. Verify Connection
93
+
94
+ The `database.py` will automatically:
95
+ - Detect `DATABASE_URL` environment variable
96
+ - Replace `postgres://` with `postgresql://`
97
+ - Connect to PostgreSQL
98
+ - Create all tables automatically
99
+
100
+ ---
101
+
102
+ ## 🎯 Step 4: Deploy & Monitor
103
+
104
+ ### A. Trigger Deployment
105
+
106
+ 1. After adding database, click **"Manual Deploy"** → **"Deploy latest commit"**
107
+ 2. Watch the build logs:
108
+ - ✅ Installing dependencies
109
+ - ✅ Creating database tables
110
+ - ✅ Starting Gunicorn server
111
+
112
+ ### B. Check Deployment Logs
113
+
114
+ Look for these success messages:
115
+ ```
116
+ 🚀 Running in PRODUCTION mode
117
+ 🔄 Creating database tables...
118
+ ✅ Database tables created successfully!
119
+ [INFO] Starting gunicorn
120
+ [INFO] Booting worker with pid: 123
121
+ ```
122
+
123
+ ### C. Access Your Application
124
+
125
+ Your app will be available at:
126
+ ```
127
+ https://vietnamese-rating-prediction.onrender.com
128
+ ```
129
+
130
+ **Important endpoints:**
131
+ - **Dashboard:** `https://your-app.onrender.com/dashboard`
132
+ - **API Docs (Swagger):** `https://your-app.onrender.com/docs`
133
+ - **Health Check:** `https://your-app.onrender.com/health`
134
+
135
+ ---
136
+
137
+ ## 🔍 Troubleshooting
138
+
139
+ ### Issue 1: "Module not found" errors
140
+ **Solution:** Ensure all imports are in `requirements.txt`
141
+ ```bash
142
+ pip freeze > requirements.txt
143
+ ```
144
+
145
+ ### Issue 2: "Connection refused" to database
146
+ **Solution:**
147
+ - Verify `DATABASE_URL` is set in environment variables
148
+ - Check database status in Render dashboard
149
+ - Restart web service
150
+
151
+ ### Issue 3: "Port binding" errors
152
+ **Solution:** Use `$PORT` environment variable:
153
+ ```bash
154
+ gunicorn main:app --bind 0.0.0.0:$PORT
155
+ ```
156
+
157
+ ### Issue 4: ML model takes too long to load
158
+ **Solution:** Render Free Tier has limited RAM (512MB). Consider:
159
+ - Using a lighter model
160
+ - Lazy loading (load model on first request)
161
+ - Upgrading to Starter plan ($7/month)
162
+
163
+ ### Issue 5: Static files not loading
164
+ **Solution:** Ensure `app/static/` directory exists and is committed to git
165
+
166
+ ---
167
+
168
+ ## ⚙️ Alternative Start Commands
169
+
170
+ ### Option 1: Basic Uvicorn (Single Worker)
171
+ ```bash
172
+ uvicorn main:app --host 0.0.0.0 --port $PORT
173
+ ```
174
+
175
+ ### Option 2: Gunicorn with Uvicorn Workers (Recommended)
176
+ ```bash
177
+ gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT
178
+ ```
179
+
180
+ ### Option 3: Gunicorn with Auto-scaling Workers
181
+ ```bash
182
+ gunicorn main:app --workers 2 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT --timeout 120
183
+ ```
184
+
185
+ ---
186
+
187
+ ## 📊 Performance Optimization
188
+
189
+ ### 1. Reduce Model Loading Time
190
+ Edit `app/services/ml_service.py`:
191
+ ```python
192
+ # Lazy load model on first request instead of on startup
193
+ class MLPredictionService:
194
+ def __init__(self):
195
+ self.model = None
196
+ self.tokenizer = None
197
+
198
+ def _ensure_loaded(self):
199
+ if self.model is None:
200
+ # Load model here
201
+ pass
202
+ ```
203
+
204
+ ### 2. Enable Connection Pooling
205
+ Already configured in `database.py`:
206
+ ```python
207
+ engine = create_engine(
208
+ DATABASE_URL,
209
+ pool_pre_ping=True,
210
+ pool_recycle=300
211
+ )
212
+ ```
213
+
214
+ ### 3. Use Caching for Predictions
215
+ Consider adding Redis (Render add-on) for caching frequent predictions.
216
+
217
+ ---
218
+
219
+ ## 🔒 Security Checklist
220
+
221
+ - [ ] Set strong `SECRET_KEY` in environment variables
222
+ - [ ] Restrict CORS origins in production (edit `main.py`)
223
+ - [ ] Enable HTTPS (automatic on Render)
224
+ - [ ] Set up database backups (Render PostgreSQL backups)
225
+ - [ ] Add rate limiting (consider using Render's DDoS protection)
226
+ - [ ] Review and sanitize all user inputs
227
+
228
+ ---
229
+
230
+ ## 💰 Cost Breakdown (Free Tier)
231
+
232
+ | Service | Cost | Limitations |
233
+ |---------|------|-------------|
234
+ | Web Service | FREE | 512MB RAM, Sleeps after 15min inactivity |
235
+ | PostgreSQL | FREE | 1GB storage, 97 connections |
236
+ | Bandwidth | FREE | 100GB/month |
237
+
238
+ **Upgrade Considerations:**
239
+ - If app sleeps: Upgrade to Starter ($7/month, always-on)
240
+ - If RAM issues: Upgrade to Standard ($25/month, 2GB RAM)
241
+ - If storage full: Upgrade database ($7/month, 10GB)
242
+
243
+ ---
244
+
245
+ ## 🎓 Post-Deployment Testing
246
+
247
+ ### Test 1: Health Check
248
+ ```bash
249
+ curl https://your-app.onrender.com/health
250
+ ```
251
+ Expected: `{"status":"healthy","service":"rating-prediction","version":"1.0.0"}`
252
+
253
+ ### Test 2: Swagger UI
254
+ Visit: `https://your-app.onrender.com/docs`
255
+ - Try registering a user
256
+ - Login to get JWT token
257
+ - Test prediction endpoints
258
+
259
+ ### Test 3: Database Connection
260
+ Check logs for:
261
+ ```
262
+ 🚀 Production Mode: Using PostgreSQL
263
+ ✅ Database tables created successfully!
264
+ ```
265
+
266
+ ---
267
+
268
+ ## 📚 Additional Resources
269
+
270
+ - **Render Docs:** https://render.com/docs/deploy-fastapi
271
+ - **PostgreSQL Guide:** https://render.com/docs/databases
272
+ - **Environment Variables:** https://render.com/docs/environment-variables
273
+ - **Custom Domains:** https://render.com/docs/custom-domains
274
+
275
+ ---
276
+
277
+ ## 🆘 Support
278
+
279
+ If you encounter issues:
280
+ 1. Check Render logs (Dashboard → Logs tab)
281
+ 2. Review this guide carefully
282
+ 3. Check Render community forum: https://community.render.com/
283
+ 4. Contact Render support (for paid plans)
284
+
285
+ ---
286
+
287
+ **Good luck with your deployment! 🚀**
Dockerfile ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================
2
+ # Dockerfile for Hugging Face Spaces (Docker SDK)
3
+ # Optimized for FastAPI + Heavy ML Model (>500MB)
4
+ # ============================================
5
+
6
+ FROM python:3.10-slim
7
+
8
+ RUN apt-get update && apt-get install -y \
9
+ fonts-dejavu \
10
+ fonts-dejavu-core \
11
+ fonts-dejavu-extra \
12
+ fontconfig \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Set environment variables
16
+ ENV PYTHONUNBUFFERED=1 \
17
+ PYTHONDONTWRITEBYTECODE=1 \
18
+ PIP_NO_CACHE_DIR=1 \
19
+ PIP_DISABLE_PIP_VERSION_CHECK=1
20
+
21
+ # Create non-root user (REQUIRED by Hugging Face Spaces)
22
+ # HF Spaces runs containers as user ID 1000
23
+ RUN useradd -m -u 1000 user
24
+
25
+ # Set working directory
26
+ WORKDIR /app
27
+
28
+ # Install system dependencies
29
+ RUN apt-get update && apt-get install -y \
30
+ build-essential \
31
+ gcc \
32
+ && rm -rf /var/lib/apt/lists/*
33
+
34
+ # Copy requirements first (for better Docker layer caching)
35
+ COPY --chown=user:user requirements.txt .
36
+
37
+ # Install Python dependencies as root (before switching to user)
38
+ RUN pip install --no-cache-dir -r requirements.txt
39
+
40
+ # Copy application code
41
+ COPY --chown=user:user . .
42
+
43
+ # Create necessary directories with proper permissions
44
+ RUN mkdir -p /app/app/static/uploads/wordclouds && \
45
+ mkdir -p /app/app/database && \
46
+ chmod -R 777 /app/app/static/uploads && \
47
+ chmod -R 777 /app/app/database
48
+
49
+ # Switch to non-root user
50
+ USER user
51
+
52
+ # Expose port 7860 (REQUIRED by Hugging Face Spaces)
53
+ EXPOSE 7860
54
+
55
+ # Health check (optional but recommended)
56
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
57
+ CMD python -c "import requests; requests.get('http://localhost:7860/docs')"
58
+
59
+ # Start the FastAPI application
60
+ # CRITICAL: Must listen on 0.0.0.0:7860 for Hugging Face Spaces
61
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
HUGGING_FACE_DEPLOYMENT.md ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Rating Prediction System - Hugging Face Spaces Deployment
2
+
3
+ [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces)
4
+ [![FastAPI](https://img.shields.io/badge/FastAPI-0.104.1-009688.svg?style=flat&logo=FastAPI&logoColor=white)](https://fastapi.tiangolo.com)
5
+ [![Docker](https://img.shields.io/badge/Docker-Enabled-2496ED?logo=docker&logoColor=white)](https://www.docker.com/)
6
+
7
+ A production-ready FastAPI application for predicting product ratings from Vietnamese comments using PhoBERT. This Space uses Docker SDK for deploying heavy ML models (>500MB) with 16GB RAM.
8
+
9
+ ---
10
+
11
+ ## 🎯 Features
12
+
13
+ - 🤖 **ML-Powered Predictions**: PhoBERT-based sentiment analysis
14
+ - 📊 **Interactive Dashboard**: Real-time visualizations with Chart.js
15
+ - 💬 **Batch Processing**: Upload CSV files for bulk predictions
16
+ - 🔐 **Secure Authentication**: JWT-based user management
17
+ - 📈 **Analytics**: Word clouds and rating distributions
18
+ - 🗄️ **External Database**: PostgreSQL support (Render/Neon)
19
+
20
+ ---
21
+
22
+ ## 🔧 Configuration Required
23
+
24
+ ### Required Environment Variables
25
+
26
+ **CRITICAL:** Before deploying to Hugging Face Spaces, you MUST add these environment variables in the **Settings** tab:
27
+
28
+ #### 1. DATABASE_URL (REQUIRED)
29
+ ```
30
+ DATABASE_URL=postgresql://username:password@host:port/database
31
+ ```
32
+ **Real External Db url**
33
+ ```
34
+ DATABASE_URL=postgresql://rating_prediction_user:2p3Xv9mKFt3DDFs9OVWDrw8ARHkevTSw@dpg-d4mfq13uibrs738i6jl0-a.singapore-postgres.render.com/rating_prediction
35
+ ```
36
+ **Example from Render:**
37
+ ```
38
+ DATABASE_URL=postgresql://user:pass@dpg-xxxxx.oregon-postgres.render.com/dbname
39
+ ```
40
+
41
+ **Example from Neon:**
42
+ ```
43
+ DATABASE_URL=postgresql://user:pass@ep-xxxxx.us-east-2.aws.neon.tech/dbname?sslmode=require
44
+ ```
45
+
46
+ ⚠️ **Important Notes:**
47
+ - The URL MUST start with `postgresql://` (NOT `postgres://`)
48
+ - If your provider gives you `postgres://`, the app will auto-convert it
49
+ - Include `?sslmode=require` for secure connections (recommended)
50
+
51
+ #### 2. SECRET_KEY (REQUIRED)
52
+ ```
53
+ SECRET_KEY=your-super-secret-jwt-key-change-this-in-production-min-32-chars
54
+ ```
55
+
56
+ **Generate a secure key:**
57
+ ```bash
58
+ python -c "import secrets; print(secrets.token_urlsafe(32))"
59
+ ```
60
+
61
+ **Real SECRECT_KEY:**
62
+ nz0qzAJoIiRQ3v62SAq8g94JAFtfmf-GSU6dkluKtKA
63
+
64
+ ⚠️ **Security:**
65
+ - NEVER commit this key to Git
66
+ - Use a cryptographically secure random string
67
+ - Minimum 32 characters recommended
68
+
69
+ ---
70
+
71
+ ## 📋 Deployment Steps
72
+
73
+ ### Step 1: Create a New Space
74
+ 1. Go to https://huggingface.co/new-space
75
+ 2. Choose **Docker** as the SDK
76
+ 3. Select **CPU Basic** (16GB RAM - Free)
77
+ 4. Make the Space **Public** or **Private**
78
+
79
+ ### Step 2: Configure Environment Variables
80
+ 1. Go to your Space's **Settings** tab
81
+ 2. Scroll to **Repository Secrets**
82
+ 3. Add the following secrets:
83
+ - `DATABASE_URL` → Your PostgreSQL connection string
84
+ - `SECRET_KEY` → Your JWT secret key
85
+
86
+ ### Step 3: Push Your Code
87
+ ```bash
88
+ # Clone your Space repository
89
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
90
+ cd YOUR_SPACE_NAME
91
+
92
+ # Copy your project files
93
+ cp -r /path/to/PredictRating/* .
94
+
95
+ # Commit and push
96
+ git add .
97
+ git commit -m "Initial deployment"
98
+ git push
99
+ ```
100
+
101
+ ### Step 4: Wait for Build
102
+ - Hugging Face will automatically build your Docker image
103
+ - Build time: ~5-10 minutes (depending on model size)
104
+ - Check build logs in the **Logs** tab
105
+
106
+ ### Step 5: Access Your App
107
+ - Your app will be available at: `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
108
+ - The app runs on port **7860** (handled automatically)
109
+
110
+ ---
111
+
112
+ ## 🗄️ Database Setup
113
+
114
+ ### Option A: Render PostgreSQL (Recommended)
115
+ 1. Create a free PostgreSQL database on [Render](https://render.com)
116
+ 2. Go to **Dashboard** → **New** → **PostgreSQL**
117
+ 3. Copy the **External Database URL**
118
+ 4. Add it as `DATABASE_URL` in HF Spaces Settings
119
+
120
+ ### Option B: Neon PostgreSQL
121
+ 1. Create a free database on [Neon](https://neon.tech)
122
+ 2. Copy the connection string
123
+ 3. Ensure it includes `?sslmode=require`
124
+ 4. Add it as `DATABASE_URL` in HF Spaces Settings
125
+
126
+ ### Database Initialization
127
+ The app automatically:
128
+ - Creates tables on first run
129
+ - Supports both SQLite (local dev) and PostgreSQL (production)
130
+ - No manual migrations needed
131
+
132
+ ---
133
+
134
+ ## 🐳 Docker Configuration
135
+
136
+ ### Port Requirements
137
+ - **CRITICAL:** Hugging Face Spaces requires port **7860**
138
+ - The Dockerfile is pre-configured correctly
139
+ - DO NOT change the port in `CMD` instruction
140
+
141
+ ### User Permissions
142
+ - Hugging Face runs containers as user ID **1000**
143
+ - The Dockerfile creates a `user` account
144
+ - All files are owned by this user
145
+
146
+ ### Storage
147
+ - `/app/static/uploads/` is writable (for word clouds)
148
+ - `/app/database/` is writable (for local SQLite fallback)
149
+ - Consider using external storage (S3/Cloudinary) for production
150
+
151
+ ---
152
+
153
+ ## 🧪 Testing Locally Before Deployment
154
+
155
+ ### Test with Docker
156
+ ```bash
157
+ # Build the Docker image
158
+ docker build -t rating-prediction .
159
+
160
+ # Run with environment variables
161
+ docker run -p 7860:7860 \
162
+ -e DATABASE_URL="postgresql://user:pass@host/db" \
163
+ -e SECRET_KEY="your-secret-key" \
164
+ rating-prediction
165
+
166
+ # Access at http://localhost:7860
167
+ ```
168
+
169
+ ### Test Database Connection
170
+ ```bash
171
+ # Inside container
172
+ docker exec -it <container_id> python -c "
173
+ from app.database import engine
174
+ print('✅ Database connected:', engine.url)
175
+ "
176
+ ```
177
+
178
+ ---
179
+
180
+ ## 📊 Monitoring & Logs
181
+
182
+ ### View Logs in Hugging Face
183
+ 1. Go to your Space
184
+ 2. Click the **Logs** tab
185
+ 3. Monitor startup and runtime logs
186
+
187
+ ### Expected Startup Messages
188
+ ```
189
+ 🚀 Production Mode: Using PostgreSQL
190
+ INFO: Started server process [1]
191
+ INFO: Uvicorn running on http://0.0.0.0:7860
192
+ ```
193
+
194
+ ---
195
+
196
+ ## 🔒 Security Checklist
197
+
198
+ - ✅ `SECRET_KEY` stored as HF Secret (not in code)
199
+ - ✅ `DATABASE_URL` stored as HF Secret (not in code)
200
+ - ✅ PostgreSQL uses SSL (`sslmode=require`)
201
+ - ✅ Passwords hashed with bcrypt
202
+ - ✅ JWT tokens expire after 24 hours
203
+ - ✅ Docker runs as non-root user
204
+
205
+ ---
206
+
207
+ ## 🐛 Troubleshooting
208
+
209
+ ### Issue: "Application startup failed"
210
+ **Solution:** Check logs for database connection errors. Verify `DATABASE_URL` is correct.
211
+
212
+ ### Issue: "502 Bad Gateway"
213
+ **Solution:** App may be starting. Wait 2-3 minutes for heavy model loading.
214
+
215
+ ### Issue: "Database connection refused"
216
+ **Solution:** Ensure your PostgreSQL database is accessible from external IPs. Check firewall rules.
217
+
218
+ ### Issue: "No module named 'app'"
219
+ **Solution:** Ensure all files are copied correctly. Check Dockerfile `WORKDIR` is `/app`.
220
+
221
+ ### Issue: "Port 7860 already in use"
222
+ **Solution:** Only relevant for local testing. Stop other containers on that port.
223
+
224
+ ---
225
+
226
+ ## 📚 API Documentation
227
+
228
+ Once deployed, access:
229
+ - **Swagger UI**: `https://your-space.hf.space/docs`
230
+ - **ReDoc**: `https://your-space.hf.space/redoc`
231
+
232
+ ### Key Endpoints
233
+ - `POST /api/auth/register` - Create new user
234
+ - `POST /api/auth/login` - Login and get JWT token
235
+ - `POST /api/predict/single` - Predict single comment
236
+ - `POST /api/predict/batch` - Upload CSV for batch predictions
237
+ - `GET /api/predict/history` - View prediction history
238
+
239
+ ---
240
+
241
+ ## 🆘 Support
242
+
243
+ If you encounter issues:
244
+ 1. Check the **Logs** tab in your Space
245
+ 2. Verify environment variables in **Settings**
246
+ 3. Test database connection from your local machine
247
+ 4. Review [FastAPI Docs](https://fastapi.tiangolo.com)
248
+ 5. Check [Hugging Face Spaces Docs](https://huggingface.co/docs/hub/spaces-overview)
249
+
250
+ ---
251
+
252
+ ## 📄 License
253
+
254
+ This project is deployed under the terms specified in your Space settings.
255
+
256
+ ---
257
+
258
+ **Built with ❤️ using FastAPI, PhoBERT, and Hugging Face Spaces**
INDEX.md ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📖 Complete Documentation Index
2
+
3
+ Welcome to the **Vietnamese Product Rating Prediction System** documentation!
4
+
5
+ ---
6
+
7
+ ## 🚀 Quick Start (New Users)
8
+
9
+ If you're just getting started, read these files in order:
10
+
11
+ 1. **[QUICKSTART.md](QUICKSTART.md)** ⚡
12
+ - Installation instructions
13
+ - How to run the application
14
+ - First-time usage guide
15
+ - **Start here!**
16
+
17
+ 2. **[TESTING_GUIDE.md](TESTING_GUIDE.md)** ✅
18
+ - Step-by-step testing procedures
19
+ - Expected results for each test
20
+ - Troubleshooting common issues
21
+
22
+ 3. **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** 📋
23
+ - Overview of all features
24
+ - What has been built
25
+ - How to replace dummy ML model
26
+
27
+ ---
28
+
29
+ ## 📚 Detailed Documentation
30
+
31
+ ### For Understanding the System
32
+
33
+ - **[README.md](README.md)** 📖
34
+ - Complete project documentation
35
+ - Features, setup, usage
36
+ - API endpoints
37
+ - Database schema
38
+ - CSV file format
39
+
40
+ - **[ARCHITECTURE.md](ARCHITECTURE.md)** 🏗️
41
+ - System architecture diagrams
42
+ - Request flow examples
43
+ - Technology stack details
44
+ - File responsibilities
45
+ - Security features
46
+
47
+ ---
48
+
49
+ ## 🎯 For Different Purposes
50
+
51
+ ### I want to... run the application
52
+ → Read: **[QUICKSTART.md](QUICKSTART.md)**
53
+
54
+ ### I want to... test all features
55
+ → Read: **[TESTING_GUIDE.md](TESTING_GUIDE.md)**
56
+
57
+ ### I want to... understand the code structure
58
+ → Read: **[ARCHITECTURE.md](ARCHITECTURE.md)**
59
+
60
+ ### I want to... replace the dummy ML model
61
+ → Read: **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** (section: "Replace Dummy ML Model")
62
+
63
+ ### I want to... demo to my teacher
64
+ → Read: **[TESTING_GUIDE.md](TESTING_GUIDE.md)** (section: "Demo Checklist for Teacher")
65
+
66
+ ### I want to... understand all features
67
+ → Read: **[README.md](README.md)** (section: "Features")
68
+
69
+ ### I want to... see API documentation
70
+ → Run app, then visit: **http://localhost:8000/docs**
71
+
72
+ ---
73
+
74
+ ## 📁 Project Files Overview
75
+
76
+ ### Documentation Files
77
+ ```
78
+ ├── README.md # Main documentation
79
+ ├── QUICKSTART.md # Quick setup guide
80
+ ├── PROJECT_SUMMARY.md # Feature summary
81
+ ├── TESTING_GUIDE.md # Testing procedures
82
+ ├── ARCHITECTURE.md # System architecture
83
+ └── INDEX.md # This file (navigation)
84
+ ```
85
+
86
+ ### Code Files
87
+ ```
88
+ ├── main.py # FastAPI entry point
89
+ ├── requirements.txt # Python dependencies
90
+ ├── sample_comments.csv # Test data
91
+ ├── .gitignore # Git ignore rules
92
+
93
+ └── app/
94
+ ├── config.py # Configuration
95
+ ├── database.py # Database setup
96
+ ├── models.py # Database models
97
+ ├── schemas.py # Pydantic schemas
98
+
99
+ ├── routers/ # API endpoints
100
+ │ ├── auth.py
101
+ │ ├── prediction.py
102
+ │ └── dashboard.py
103
+
104
+ ├── services/ # Business logic
105
+ │ ├── auth_service.py
106
+ │ ├── ml_service.py
107
+ │ └── visualization_service.py
108
+
109
+ ├── templates/ # HTML templates
110
+ │ ├── base.html
111
+ │ ├── login.html
112
+ │ ├── register.html
113
+ │ └── dashboard.html
114
+
115
+ └── static/ # Static files
116
+ ├── css/
117
+ ├── js/
118
+ └── uploads/
119
+ ```
120
+
121
+ ---
122
+
123
+ ## 🎓 For Students (Project Presentation)
124
+
125
+ ### Before Presentation
126
+ 1. Read **[QUICKSTART.md](QUICKSTART.md)** to set up
127
+ 2. Test everything using **[TESTING_GUIDE.md](TESTING_GUIDE.md)**
128
+ 3. Review **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** for highlights
129
+
130
+ ### During Presentation
131
+ 1. **Show Swagger UI** (bonus points!) → http://localhost:8000/docs
132
+ 2. **Demo user journey:**
133
+ - Register → Login
134
+ - Single prediction
135
+ - Batch CSV with visualizations
136
+ 3. **Explain architecture** using **[ARCHITECTURE.md](ARCHITECTURE.md)**
137
+
138
+ ### Key Points to Mention
139
+ ✅ FastAPI with automatic API documentation
140
+ ✅ JWT authentication for security
141
+ ✅ RESTful API design
142
+ ✅ Data visualization (Chart.js + WordCloud)
143
+ ✅ Separation of concerns (clean architecture)
144
+ ✅ Database relationships and ORM
145
+
146
+ ---
147
+
148
+ ## 🔧 For Developers
149
+
150
+ ### Understanding the Codebase
151
+ 1. **[ARCHITECTURE.md](ARCHITECTURE.md)** - System overview
152
+ 2. **[README.md](README.md)** - Detailed documentation
153
+ 3. Code files (with inline comments)
154
+
155
+ ### Modifying the System
156
+
157
+ **To replace ML model:**
158
+ → Edit: `app/services/ml_service.py`
159
+ → See: **[PROJECT_SUMMARY.md](PROJECT_SUMMARY.md)** section "Replace Dummy ML Model"
160
+
161
+ **To add products:**
162
+ → Edit: `app/config.py` → `PRODUCTS` list
163
+
164
+ **To add Vietnamese stopwords:**
165
+ → Edit: `app/services/visualization_service.py` → `self.stopwords`
166
+
167
+ **To change styling:**
168
+ → Edit: `app/templates/*.html` (TailwindCSS classes)
169
+
170
+ **To add API endpoints:**
171
+ → Create route in: `app/routers/*.py`
172
+
173
+ ---
174
+
175
+ ## 📊 Key Features Reference
176
+
177
+ | Feature | File | Documentation |
178
+ |---------|------|---------------|
179
+ | User Authentication | `app/routers/auth.py` | [README.md](README.md) |
180
+ | Single Prediction | `app/routers/prediction.py` | [README.md](README.md) |
181
+ | Batch Prediction | `app/routers/prediction.py` | [README.md](README.md) |
182
+ | WordCloud | `app/services/visualization_service.py` | [ARCHITECTURE.md](ARCHITECTURE.md) |
183
+ | Database Models | `app/models.py` | [README.md](README.md) |
184
+ | ML Service | `app/services/ml_service.py` | [PROJECT_SUMMARY.md](PROJECT_SUMMARY.md) |
185
+
186
+ ---
187
+
188
+ ## 🐛 Troubleshooting
189
+
190
+ For common issues and solutions:
191
+ → **[TESTING_GUIDE.md](TESTING_GUIDE.md)** (Troubleshooting section)
192
+
193
+ For API errors:
194
+ → Check Swagger UI: http://localhost:8000/docs
195
+
196
+ For understanding error messages:
197
+ → **[ARCHITECTURE.md](ARCHITECTURE.md)** (Request Flow section)
198
+
199
+ ---
200
+
201
+ ## 📞 Quick Reference Commands
202
+
203
+ ```bash
204
+ # Install dependencies
205
+ pip install -r requirements.txt
206
+
207
+ # Run application
208
+ python main.py
209
+
210
+ # Access Swagger UI
211
+ # Open: http://localhost:8000/docs
212
+
213
+ # Access dashboard
214
+ # Open: http://localhost:8000/dashboard
215
+
216
+ # Test with sample data
217
+ # Upload: sample_comments.csv
218
+ ```
219
+
220
+ ---
221
+
222
+ ## ✅ Checklist for Teacher Demo
223
+
224
+ Before presenting to teacher:
225
+
226
+ - [ ] All dependencies installed (`pip install -r requirements.txt`)
227
+ - [ ] Application runs successfully (`python main.py`)
228
+ - [ ] Can access Swagger UI (http://localhost:8000/docs)
229
+ - [ ] Can register and login
230
+ - [ ] Single prediction works
231
+ - [ ] Batch CSV prediction works
232
+ - [ ] Charts and word cloud display correctly
233
+ - [ ] CSV download works
234
+ - [ ] Understand system architecture
235
+ - [ ] Can explain how to replace ML model
236
+
237
+ ---
238
+
239
+ ## 🎯 Learning Outcomes
240
+
241
+ After completing this project, you will understand:
242
+
243
+ 1. **FastAPI Framework**
244
+ - Route definition
245
+ - Dependency injection
246
+ - Automatic API documentation
247
+ - Request/response validation
248
+
249
+ 2. **Authentication**
250
+ - JWT tokens
251
+ - Password hashing (bcrypt)
252
+ - Protected routes
253
+
254
+ 3. **Database**
255
+ - SQLAlchemy ORM
256
+ - Model relationships
257
+ - CRUD operations
258
+
259
+ 4. **Frontend**
260
+ - Jinja2 templating
261
+ - TailwindCSS styling
262
+ - JavaScript Fetch API
263
+ - Chart.js visualization
264
+
265
+ 5. **Software Architecture**
266
+ - Separation of concerns
267
+ - Service layer pattern
268
+ - RESTful API design
269
+
270
+ ---
271
+
272
+ ## 📧 Documentation Feedback
273
+
274
+ If any documentation is unclear or missing information:
275
+ 1. Check other documentation files
276
+ 2. Look at code comments
277
+ 3. Consult with your instructor
278
+
279
+ ---
280
+
281
+ ## 🎉 You're All Set!
282
+
283
+ You now have:
284
+ ✅ Complete working application
285
+ ✅ Comprehensive documentation
286
+ ✅ Testing guide
287
+ ✅ Architecture documentation
288
+ ✅ Demo preparation materials
289
+
290
+ **Good luck with your project! 🎓**
291
+
292
+ ---
293
+
294
+ *Last Updated: November 25, 2024*
295
+ *Project: Vietnamese Product Rating Prediction System*
296
+ *Framework: FastAPI + Jinja2 + TailwindCSS*
PROJECT_STRUCTURE.txt ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📁 Complete Project Structure
2
+
3
+ ```
4
+ PredictRating/
5
+
6
+ ├── 📄 main.py # FastAPI application entry point
7
+ ├── 📄 requirements.txt # Python dependencies
8
+ ├── 📄 .gitignore # Git ignore rules
9
+
10
+ ├── 📄 sample_comments.csv # Sample test data (20 Vietnamese comments)
11
+
12
+ ├── 📚 DOCUMENTATION FILES
13
+ │ ├── 📖 README.md # Main documentation (complete guide)
14
+ │ ├── ⚡ QUICKSTART.md # Quick setup and first run guide
15
+ │ ├── 📋 PROJECT_SUMMARY.md # Feature overview and highlights
16
+ │ ├── ✅ TESTING_GUIDE.md # Step-by-step testing procedures
17
+ │ ├── 🏗️ ARCHITECTURE.md # System architecture and design
18
+ │ ├── 📑 INDEX.md # Documentation navigation (this file)
19
+ │ └── 📁 PROJECT_STRUCTURE.txt # This visual tree structure
20
+
21
+ └── 📁 app/ # Main application package
22
+
23
+ ├── 📄 __init__.py # Package initializer
24
+ ├── 📄 config.py # Configuration (SECRET_KEY, PRODUCTS, paths)
25
+ ├── 📄 database.py # SQLAlchemy engine & session management
26
+ ├── 📄 models.py # Database models (User, PredictionHistory)
27
+ ├── 📄 schemas.py # Pydantic validation schemas
28
+
29
+ ├── 📁 routers/ # API Route Handlers
30
+ │ ├── 📄 __init__.py
31
+ │ ├── 📄 auth.py # Authentication endpoints
32
+ │ │ # - POST /api/auth/register
33
+ │ │ # - POST /api/auth/login
34
+ │ │ # - GET /api/auth/me
35
+ │ │
36
+ │ ├── 📄 prediction.py # Prediction endpoints
37
+ │ │ # - POST /api/predict/single
38
+ │ │ # - POST /api/predict/batch
39
+ │ │ # - GET /api/predict/history
40
+ │ │
41
+ │ └── 📄 dashboard.py # Frontend page routes
42
+ │ # - GET /
43
+ │ # - GET /login
44
+ │ # - GET /register
45
+ │ # - GET /dashboard
46
+
47
+ ├── 📁 services/ # Business Logic Layer
48
+ │ ├── 📄 __init__.py
49
+ │ │
50
+ │ ├── 📄 auth_service.py # Authentication service
51
+ │ │ # - Password hashing (bcrypt)
52
+ │ │ # - JWT token generation
53
+ │ │ # - Token validation
54
+ │ │ # - Get current user
55
+ │ │
56
+ │ ├── 📄 ml_service.py # ML Prediction service
57
+ │ │ # - predict_single() [DUMMY]
58
+ │ │ # - predict_batch() [DUMMY]
59
+ │ │ # - preprocess()
60
+ │ │ # ⚠️ REPLACE WITH YOUR REAL MODEL
61
+ │ │
62
+ │ └── 📄 visualization_service.py # Visualization service
63
+ │ # - generate_wordcloud()
64
+ │ # - calculate_rating_distribution()
65
+ │ # - get_top_words()
66
+
67
+ ├── 📁 templates/ # Jinja2 HTML Templates
68
+ │ ├── 📄 base.html # Base layout template
69
+ │ │ # - TailwindCSS CDN
70
+ │ │ # - Chart.js CDN
71
+ │ │ # - Font Awesome icons
72
+ │ │ # - Header/Footer structure
73
+ │ │
74
+ │ ├── 📄 login.html # Login page
75
+ │ │ # - Login form
76
+ │ │ # - JWT token handling
77
+ │ │ # - Link to register
78
+ │ │
79
+ │ ├── 📄 register.html # Registration page
80
+ │ │ # - Registration form
81
+ │ │ # - Form validation
82
+ │ │ # - Link to login
83
+ │ │
84
+ │ └── 📄 dashboard.html # Main dashboard
85
+ │ # - Product selection dropdown
86
+ │ # - Single/Batch tabs
87
+ │ # - Prediction forms
88
+ │ # - Chart.js visualization
89
+ │ # - WordCloud display
90
+ │ # - Results table
91
+ │ # - CSV download
92
+
93
+ ├── 📁 static/ # Static Files
94
+ │ ├── 📁 css/
95
+ │ │ └── 📄 style.css # Custom CSS (placeholder)
96
+ │ │
97
+ │ ├── 📁 js/
98
+ │ │ └── 📄 main.js # Custom JavaScript (placeholder)
99
+ │ │
100
+ │ └── 📁 uploads/ # User uploads directory
101
+ │ ├── 📄 .gitkeep # Keep directory in git
102
+ │ └── 📁 wordclouds/ # Generated word cloud images
103
+
104
+ └── 📁 database/ # Database Storage
105
+ ├── 📄 .gitkeep # Keep directory in git
106
+ └── 🗄️ rating_prediction.db # SQLite database (created on first run)
107
+ # Tables:
108
+ # - users
109
+ # - prediction_history
110
+ ```
111
+
112
+ ---
113
+
114
+ ## 📊 File Count Summary
115
+
116
+ | Category | Count | Files |
117
+ |----------|-------|-------|
118
+ | **Documentation** | 7 | README, QUICKSTART, PROJECT_SUMMARY, TESTING_GUIDE, ARCHITECTURE, INDEX, PROJECT_STRUCTURE |
119
+ | **Core Python** | 5 | main.py, config.py, database.py, models.py, schemas.py |
120
+ | **Routers** | 3 | auth.py, prediction.py, dashboard.py |
121
+ | **Services** | 3 | auth_service.py, ml_service.py, visualization_service.py |
122
+ | **Templates** | 4 | base.html, login.html, register.html, dashboard.html |
123
+ | **Static** | 2 | style.css, main.js |
124
+ | **Config** | 3 | requirements.txt, .gitignore, .gitkeep files |
125
+ | **Test Data** | 1 | sample_comments.csv |
126
+ | **Total** | **28** | |
127
+
128
+ ---
129
+
130
+ ## 🎯 Key Directories Explained
131
+
132
+ ### `/app/routers/` - API Endpoints
133
+ - **Purpose:** Handle HTTP requests and responses
134
+ - **Pattern:** Each router handles a specific domain (auth, prediction, dashboard)
135
+ - **Uses:** FastAPI decorators (@router.get, @router.post)
136
+
137
+ ### `/app/services/` - Business Logic
138
+ - **Purpose:** Core functionality separated from HTTP layer
139
+ - **Pattern:** Service classes with dependency injection
140
+ - **Uses:** Called by routers, interacts with database and external services
141
+
142
+ ### `/app/templates/` - Frontend Views
143
+ - **Purpose:** HTML templates for user interface
144
+ - **Pattern:** Jinja2 template inheritance (extends base.html)
145
+ - **Uses:** Rendered by FastAPI's Jinja2Templates
146
+
147
+ ### `/app/static/` - Static Assets
148
+ - **Purpose:** CSS, JavaScript, images, uploads
149
+ - **Pattern:** Mounted as static files in FastAPI
150
+ - **URL:** Accessible at `/static/...`
151
+
152
+ ### `/app/database/` - Database Storage
153
+ - **Purpose:** SQLite database file location
154
+ - **Pattern:** Created automatically by SQLAlchemy
155
+ - **Schema:** Users, PredictionHistory tables
156
+
157
+ ---
158
+
159
+ ## 🔗 File Dependencies
160
+
161
+ ### main.py depends on:
162
+ - `app.database` (create tables)
163
+ - `app.routers.*` (include routers)
164
+ - `fastapi`, `uvicorn`
165
+
166
+ ### Routers depend on:
167
+ - `app.database` (get_db)
168
+ - `app.models` (User, PredictionHistory)
169
+ - `app.schemas` (validation)
170
+ - `app.services.*` (business logic)
171
+
172
+ ### Services depend on:
173
+ - `app.config` (settings)
174
+ - `app.models` (database access)
175
+ - External libraries (bcrypt, jose, wordcloud)
176
+
177
+ ### Templates depend on:
178
+ - TailwindCSS (CDN)
179
+ - Chart.js (CDN)
180
+ - Font Awesome (CDN)
181
+ - JavaScript Fetch API
182
+
183
+ ---
184
+
185
+ ## 📝 Important Files to Modify
186
+
187
+ ### To replace ML model:
188
+ ```
189
+ app/services/ml_service.py
190
+ └── Update: __init__(), predict_single(), predict_batch()
191
+ ```
192
+
193
+ ### To add products:
194
+ ```
195
+ app/config.py
196
+ └── Update: PRODUCTS list
197
+ ```
198
+
199
+ ### To change UI styling:
200
+ ```
201
+ app/templates/*.html
202
+ └── Edit: TailwindCSS classes
203
+ ```
204
+
205
+ ### To add API endpoints:
206
+ ```
207
+ app/routers/*.py
208
+ └── Add: New route functions
209
+ ```
210
+
211
+ ### To modify Vietnamese stopwords:
212
+ ```
213
+ app/services/visualization_service.py
214
+ └── Update: self.stopwords set
215
+ ```
216
+
217
+ ---
218
+
219
+ ## 🚀 Execution Flow
220
+
221
+ 1. **Start:** `python main.py`
222
+ 2. **Load:** main.py imports all modules
223
+ 3. **Initialize:** Create database tables
224
+ 4. **Mount:** Static files and templates
225
+ 5. **Include:** All routers (auth, prediction, dashboard)
226
+ 6. **Run:** Uvicorn server on port 8000
227
+ 7. **Ready:** Application accessible at http://localhost:8000
228
+
229
+ ---
230
+
231
+ ## 🔐 Generated Files (Not in Git)
232
+
233
+ These files are created when you run the application:
234
+
235
+ ```
236
+ app/database/rating_prediction.db # SQLite database
237
+ app/static/uploads/wordclouds/*.png # Generated word cloud images
238
+ __pycache__/ # Python bytecode
239
+ *.pyc # Compiled Python files
240
+ ```
241
+
242
+ These are ignored by `.gitignore`
243
+
244
+ ---
245
+
246
+ ## 📦 External Dependencies (from requirements.txt)
247
+
248
+ ```
249
+ fastapi # Web framework
250
+ uvicorn # ASGI server
251
+ sqlalchemy # ORM
252
+ python-jose # JWT
253
+ passlib # Password hashing
254
+ pydantic # Validation
255
+ jinja2 # Templates
256
+ wordcloud # Word clouds
257
+ matplotlib # Plotting
258
+ python-multipart # File uploads
259
+ ```
260
+
261
+ ---
262
+
263
+ ## 🎨 Frontend Stack
264
+
265
+ ```
266
+ HTML
267
+ ├── Jinja2 templates (server-side rendering)
268
+ └── Semantic HTML5
269
+
270
+ CSS
271
+ ├── TailwindCSS 3.x (CDN)
272
+ └── Custom animations (in base.html)
273
+
274
+ JavaScript
275
+ ├── Vanilla JS (no frameworks)
276
+ ├── Fetch API (HTTP requests)
277
+ ├── Chart.js (visualizations)
278
+ └── LocalStorage (JWT tokens)
279
+ ```
280
+
281
+ ---
282
+
283
+ ## 🗄️ Database Schema
284
+
285
+ ```
286
+ users
287
+ ├── id (INTEGER, PRIMARY KEY)
288
+ ├── username (VARCHAR(50), UNIQUE)
289
+ ├── email (VARCHAR(100), UNIQUE)
290
+ ├── hashed_password (VARCHAR(255))
291
+ └── created_at (DATETIME)
292
+
293
+ prediction_history
294
+ ├── id (INTEGER, PRIMARY KEY)
295
+ ├── user_id (INTEGER, FOREIGN KEY → users.id)
296
+ ├── product_name (VARCHAR(200))
297
+ ├── comment (TEXT)
298
+ ├── predicted_rating (INTEGER, 1-5)
299
+ ├── confidence_score (FLOAT)
300
+ ├── prediction_type (VARCHAR(20), 'single' or 'batch')
301
+ └── created_at (DATETIME)
302
+ ```
303
+
304
+ ---
305
+
306
+ ## ✅ Quality Checklist
307
+
308
+ - [x] All files created successfully
309
+ - [x] Project structure is organized and logical
310
+ - [x] Documentation is comprehensive
311
+ - [x] Code has inline comments
312
+ - [x] Separation of concerns implemented
313
+ - [x] RESTful API design followed
314
+ - [x] Security best practices applied
315
+ - [x] UI is responsive and user-friendly
316
+ - [x] Error handling implemented
317
+ - [x] Ready for demonstration
318
+
319
+ ---
320
+
321
+ **Total Lines of Code:** ~2000+ lines
322
+ **Total Documentation:** ~3000+ lines
323
+ **Time to Setup:** < 5 minutes
324
+ **Time to Demo:** 10-15 minutes
325
+
326
+ Your project is complete and production-ready! 🎉
PROJECT_SUMMARY.md ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 📋 Project Summary - Vietnamese Product Rating Prediction System
2
+
3
+ ## ✅ What Has Been Built
4
+
5
+ ### 🏗️ Complete Project Structure
6
+ ```
7
+ PredictRating/
8
+ ├── main.py # FastAPI application entry
9
+ ├── requirements.txt # All dependencies
10
+ ├── README.md # Full documentation
11
+ ├── QUICKSTART.md # Quick setup guide
12
+ ├── sample_comments.csv # Test data
13
+ ├── .gitignore # Git ignore rules
14
+
15
+ └── app/
16
+ ├── config.py # Configuration settings
17
+ ├── database.py # Database connection
18
+ ├── models.py # SQLAlchemy models (User, PredictionHistory)
19
+ ├── schemas.py # Pydantic validation schemas
20
+
21
+ ├── routers/ # API endpoints
22
+ │ ├── auth.py # Login/Register endpoints
23
+ │ ├── prediction.py # Single/Batch prediction
24
+ │ └── dashboard.py # Frontend routes
25
+
26
+ ├── services/ # Business logic
27
+ │ ├── auth_service.py # JWT authentication & password hashing
28
+ │ ├── ml_service.py # ML prediction (DUMMY - replace with your model)
29
+ │ └── visualization_service.py # WordCloud & chart data
30
+
31
+ ├── templates/ # Jinja2 HTML templates
32
+ │ ├── base.html # Base layout with TailwindCSS
33
+ │ ├── login.html # Login page
34
+ │ ├── register.html # Registration page
35
+ │ └── dashboard.html # Main prediction interface
36
+
37
+ ├── static/ # Static files
38
+ │ ├── css/
39
+ │ ├── js/
40
+ │ └── uploads/
41
+ │ └── wordclouds/ # Generated word cloud images
42
+
43
+ └── database/ # SQLite database location
44
+ ```
45
+
46
+ ---
47
+
48
+ ## 🎯 Features Implemented
49
+
50
+ ### 1. Authentication System ✅
51
+ - **User Registration** with email validation
52
+ - **JWT-based Login** (secure token authentication)
53
+ - **Password Hashing** using bcrypt
54
+ - **Protected Routes** requiring authentication
55
+
56
+ ### 2. Single Comment Prediction ✅
57
+ - Select target product
58
+ - Input Vietnamese comment
59
+ - Get predicted rating (1-5 stars)
60
+ - Display confidence score
61
+ - Save to prediction history
62
+
63
+ ### 3. Batch CSV Prediction ✅
64
+ - Upload CSV file with comments
65
+ - Bulk prediction processing
66
+ - **Visualizations:**
67
+ - Bar chart showing rating distributionStart command
68
+ - Word cloud of frequent words
69
+ - Results table with all predictions
70
+ - **Export:** Download CSV with predicted ratings
71
+
72
+ ### 4. Data Visualization ✅
73
+ - **Chart.js** for interactive bar charts
74
+ - **WordCloud** library for generating word cloud images
75
+ - Responsive charts that update dynamically
76
+
77
+ ### 5. API Documentation ✅
78
+ - **Swagger UI** at `/docs` (automatic generation)
79
+ - **ReDoc** at `/redoc` (alternative documentation)
80
+ - Interactive API testing interface
81
+ - Complete request/response schemas
82
+
83
+ ### 6. Database Integration ✅
84
+ - **SQLite** database
85
+ - **User table** (username, email, hashed password)
86
+ - **PredictionHistory table** (tracks all predictions)
87
+ - Automatic table creation on startup
88
+
89
+ ### 7. Frontend UI ✅
90
+ - **TailwindCSS** for modern, responsive design
91
+ - **Jinja2** server-side rendering
92
+ - Tab-based interface (Single/Batch)
93
+ - Real-time form validation
94
+ - Loading states and error handling
95
+
96
+ ---
97
+
98
+ ## 🚀 How to Run
99
+
100
+ ### Step 1: Install Dependencies
101
+ ```bash
102
+ pip install -r requirements.txt
103
+ ```
104
+
105
+ ### Step 2: Start Server
106
+ ```bash
107
+ python main.py
108
+ ```
109
+
110
+ ### Step 3: Access Application
111
+ - **Dashboard:** http://localhost:8000/dashboard
112
+ - **Swagger API Docs:** http://localhost:8000/docs ⭐
113
+
114
+ ---
115
+
116
+ ## 📊 API Endpoints
117
+
118
+ ### Authentication
119
+ | Method | Endpoint | Description |
120
+ |--------|----------|-------------|
121
+ | POST | `/api/auth/register` | Register new user |
122
+ | POST | `/api/auth/login` | Login (returns JWT token) |
123
+ | GET | `/api/auth/me` | Get current user info |
124
+
125
+ ### Predictions
126
+ | Method | Endpoint | Description |
127
+ |--------|----------|-------------|
128
+ | POST | `/api/predict/single` | Predict single comment |
129
+ | POST | `/api/predict/batch` | Predict batch from CSV |
130
+ | GET | `/api/predict/history` | Get prediction history |
131
+
132
+ ### Frontend
133
+ | Method | Endpoint | Description |
134
+ |--------|----------|-------------|
135
+ | GET | `/login` | Login page |
136
+ | GET | `/register` | Registration page |
137
+ | GET | `/dashboard` | Main dashboard |
138
+
139
+ ---
140
+
141
+ ## 🔧 Replace Dummy ML Model
142
+
143
+ The file `app/services/ml_service.py` contains a **DUMMY prediction function** that returns random ratings.
144
+
145
+ ### To integrate your real model:
146
+
147
+ 1. **Load your model in `__init__`:**
148
+ ```python
149
+ def __init__(self):
150
+ self.model = load_model('path/to/your/model.h5')
151
+ self.tokenizer = load_tokenizer('path/to/tokenizer.pkl')
152
+ ```
153
+
154
+ 2. **Update `predict_single` method:**
155
+ ```python
156
+ def predict_single(self, text: str) -> Dict[str, any]:
157
+ # Preprocess Vietnamese text
158
+ preprocessed = self.preprocess(text)
159
+
160
+ # Tokenize
161
+ tokens = self.tokenizer.encode(preprocessed)
162
+
163
+ # Predict
164
+ prediction = self.model.predict([tokens])
165
+ rating = int(prediction.argmax()) + 1 # 1-5 scale
166
+ confidence = float(prediction.max())
167
+
168
+ return {
169
+ 'rating': rating,
170
+ 'confidence': confidence
171
+ }
172
+ ```
173
+
174
+ 3. **Implement preprocessing:**
175
+ ```python
176
+ def preprocess(self, text: str) -> str:
177
+ # Your Vietnamese text preprocessing
178
+ text = text.lower()
179
+ text = remove_special_characters(text)
180
+ text = normalize_vietnamese(text)
181
+ return text
182
+ ```
183
+
184
+ ---
185
+
186
+ ## 🎓 Demo for Teacher
187
+
188
+ ### Show Swagger UI (Bonus Points!)
189
+ 1. Open http://localhost:8000/docs
190
+ 2. Demonstrate:
191
+ - All API endpoints organized by tags
192
+ - Request/response schemas
193
+ - "Try it out" functionality
194
+ - Authentication with JWT Bearer token
195
+
196
+ ### User Flow Demo
197
+ 1. **Register** a new account
198
+ 2. **Login** and show JWT token storage
199
+ 3. **Single Prediction:**
200
+ - Select product
201
+ - Enter Vietnamese comment
202
+ - Show predicted rating + confidence
203
+ 4. **Batch Prediction:**
204
+ - Upload `sample_comments.csv`
205
+ - Show bar chart of rating distribution
206
+ - Show word cloud visualization
207
+ - Download CSV with predictions
208
+
209
+ ### Technical Highlights
210
+ - ✅ FastAPI automatic Swagger generation
211
+ - ✅ JWT authentication security
212
+ - ✅ RESTful API design
213
+ - ✅ Separation of concerns (routers, services, models)
214
+ - ✅ Database relationships (User ↔ PredictionHistory)
215
+ - ✅ Responsive frontend with TailwindCSS
216
+ - ✅ Data visualization with Chart.js + WordCloud
217
+
218
+ ---
219
+
220
+ ## 📦 Dependencies Installed
221
+
222
+ ```
223
+ fastapi # Web framework
224
+ uvicorn # ASGI server
225
+ sqlalchemy # ORM for database
226
+ python-jose # JWT tokens
227
+ passlib # Password hashing
228
+ pydantic # Data validation
229
+ jinja2 # Template engine
230
+ wordcloud # Word cloud generation
231
+ matplotlib # Image rendering
232
+ python-multipart # File uploads
233
+ ```
234
+
235
+ ---
236
+
237
+ ## 🎯 What You Need to Do Next
238
+
239
+ 1. **Test the application:**
240
+ - Register an account
241
+ - Try single prediction
242
+ - Upload the `sample_comments.csv` file
243
+ - Test batch prediction
244
+
245
+ 2. **Replace the dummy ML model:**
246
+ - Edit `app/services/ml_service.py`
247
+ - Load your fine-tuned model
248
+ - Implement proper preprocessing
249
+ - Update prediction logic
250
+
251
+ 3. **Customize (optional):**
252
+ - Add more products in `app/config.py`
253
+ - Adjust styling in templates
254
+ - Add more Vietnamese stopwords in visualization service
255
+
256
+ 4. **Prepare for demo:**
257
+ - Practice showing Swagger UI
258
+ - Prepare sample comments in Vietnamese
259
+ - Explain the architecture and tech stack
260
+
261
+ ---
262
+
263
+ ## 📞 Quick Reference
264
+
265
+ | What | Where |
266
+ |------|-------|
267
+ | Start server | `python main.py` |
268
+ | Swagger UI | http://localhost:8000/docs |
269
+ | Dashboard | http://localhost:8000/dashboard |
270
+ | Replace model | `app/services/ml_service.py` |
271
+ | Add products | `app/config.py` → PRODUCTS list |
272
+ | Database file | `app/database/rating_prediction.db` |
273
+ | Uploads folder | `app/static/uploads/` |
274
+ | Test CSV | `sample_comments.csv` |
275
+
276
+ ---
277
+
278
+ ## ✨ Success Criteria Met
279
+
280
+ ✅ FastAPI backend with Swagger UI
281
+ ✅ Jinja2 templates + TailwindCSS
282
+ ✅ SQLite database (Users + History)
283
+ ✅ JWT authentication
284
+ ✅ Single comment prediction
285
+ ✅ Batch CSV prediction
286
+ ✅ Data visualization (charts + word cloud)
287
+ ✅ CSV export with predictions
288
+ ✅ Professional project structure
289
+ ✅ Complete documentation
290
+
291
+ **Your ML prediction web app is ready! 🎉**
292
+
293
+ Good luck with your presentation! 🎓
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn main:app --host 0.0.0.0 --port $PORT
QUICKSTART.md ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Quick Start Guide
2
+
3
+ ## Installation
4
+
5
+ 1. **Install dependencies:**
6
+ ```bash
7
+ pip install -r requirements.txt
8
+ ```
9
+
10
+ 2. **Run the application:**
11
+ ```bash
12
+ python main.py
13
+ ```
14
+
15
+ 3. **Access the application:**
16
+ # Nhớ kích hoạt môi trường trước
17
+ conda activate ./env
18
+ - Dashboard: http://localhost:8000
19
+ - **Swagger API Docs: http://localhost:8000/docs** ⭐ (Show this to your teacher!)
20
+ - ReDoc: http://localhost:8000/redoc
21
+
22
+ ## First Time Usage
23
+
24
+ 1. Go to http://localhost:8000/login
25
+ 2. Click "Register here" and create an account
26
+ 3. Login with your credentials
27
+ 4. You'll be redirected to the dashboard
28
+
29
+ ## Testing Single Prediction
30
+
31
+ 1. Select a product from dropdown
32
+ 2. Click "Single Comment" tab
33
+ 3. Enter a Vietnamese comment like: "Sản phẩm rất tốt, chất lượng cao, đóng gói cẩn thận"
34
+ 4. Click "Predict Rating"
35
+ 5. See the result with rating and confidence
36
+
37
+ ## Testing Batch Prediction (CSV)
38
+
39
+ 1. Create a CSV file with this format:
40
+ ```csv
41
+ Comment
42
+ "Sản phẩm rất tốt, đóng gói cẩn thận"
43
+ "Chất lượng kém, không như mô tả"
44
+ "Giao hàng nhanh, sản phẩm ổn"
45
+ "Rất hài lòng với sản phẩm này"
46
+ "Giá hơi cao nhưng chất lượng tốt"
47
+ ```
48
+
49
+ 2. Select a product
50
+ 3. Click "Upload CSV" tab
51
+ 4. Upload your CSV file
52
+ 5. Click "Predict Batch"
53
+ 6. View:
54
+ - Bar chart showing rating distribution
55
+ - Word cloud of common words
56
+ - Full results table
57
+ - Download CSV with predictions
58
+
59
+ ## Swagger UI Demo (For Teacher)
60
+
61
+ 1. Open http://localhost:8000/docs
62
+ 2. Show the endpoints:
63
+ - Authentication (register, login)
64
+ - Predictions (single, batch)
65
+ - History
66
+ 3. Click "Try it out" to test any endpoint
67
+ 4. Show the automatic request/response documentation
68
+
69
+ ## Replace Dummy ML Model
70
+
71
+ Edit `app/services/ml_service.py`:
72
+
73
+ ```python
74
+ def __init__(self):
75
+ # Load your real model here
76
+ self.model = load_model('path/to/your/model')
77
+ self.tokenizer = load_tokenizer('path/to/tokenizer')
78
+
79
+ def predict_single(self, text: str) -> Dict[str, any]:
80
+ # Your preprocessing
81
+ preprocessed = self.preprocess(text)
82
+
83
+ # Your prediction
84
+ prediction = self.model.predict(preprocessed)
85
+ rating = int(prediction) # Convert to 1-5
86
+
87
+ return {
88
+ 'rating': rating,
89
+ 'confidence': float(prediction_confidence)
90
+ }
91
+ ```
92
+
93
+ ## Troubleshooting
94
+
95
+ **"Module not found":**
96
+ ```bash
97
+ pip install -r requirements.txt
98
+ ```
99
+
100
+ **"Port already in use":**
101
+ Edit `main.py` and change port 8000 to another number.
102
+
103
+ **"Database locked":**
104
+ Close any other instances of the app and restart.
105
+
106
+ ## Project Highlights for Presentation
107
+
108
+ ✅ **FastAPI with automatic Swagger UI** (bonus points!)
109
+ ✅ **JWT Authentication** (secure login)
110
+ ✅ **RESTful API design** (professional structure)
111
+ ✅ **Data Visualization** (charts + word clouds)
112
+ ✅ **Batch Processing** (CSV upload/download)
113
+ ✅ **Responsive UI** (TailwindCSS)
114
+ ✅ **Database Integration** (SQLite with history tracking)
115
+
116
+ Good luck! 🎓
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Predict Rating
3
+ emoji: 📈
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ ---
10
+
11
+ # Predict Rating App
12
+ This is a FastAPI application deployed on Hugging Face Spaces using Docker.
README_HF_SPACE.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Product Rating Prediction System
3
+ emoji: ⭐
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # ⭐ Product Rating Prediction System
12
+
13
+ A production-ready AI-powered system for predicting product ratings from Vietnamese customer comments using PhoBERT.
14
+
15
+ ## 🎯 Features
16
+
17
+ - 🤖 **Deep Learning Model**: PhoBERT-based sentiment analysis
18
+ - 💬 **Single & Batch Predictions**: Process one comment or thousands via CSV
19
+ - 📊 **Visual Analytics**: Word clouds and rating distribution charts
20
+ - 🔐 **Secure Authentication**: JWT-based user management
21
+ - 🌐 **Full-Stack Web App**: FastAPI backend + Jinja2 frontend
22
+ - 🗄️ **External Database**: PostgreSQL support for scalability
23
+
24
+ ## 🚀 Quick Start
25
+
26
+ ### For Users
27
+ 1. Click the link above to access the live application
28
+ 2. Register a new account
29
+ 3. Upload a CSV file with comments or enter a single comment
30
+ 4. View predictions, visualizations, and download results
31
+
32
+ ### For Developers
33
+ This Space requires environment variables to connect to an external PostgreSQL database. See [HUGGING_FACE_DEPLOYMENT.md](HUGGING_FACE_DEPLOYMENT.md) for setup instructions.
34
+
35
+ ## 📚 API Documentation
36
+
37
+ Once the app is running, access:
38
+ - **Swagger UI**: `/docs`
39
+ - **ReDoc**: `/redoc`
40
+
41
+ ## 🔧 Technology Stack
42
+
43
+ - **Backend**: FastAPI, SQLAlchemy, Uvicorn
44
+ - **ML/NLP**: PyTorch, Transformers, PhoBERT
45
+ - **Frontend**: Jinja2, TailwindCSS, Chart.js
46
+ - **Database**: PostgreSQL (external)
47
+ - **Security**: JWT, bcrypt
48
+
49
+ ## 📖 Documentation
50
+
51
+ - [Deployment Guide](HUGGING_FACE_DEPLOYMENT.md)
52
+ - [Environment Variables](HF_ENV_VARIABLES.md)
53
+ - [Architecture](ARCHITECTURE.md)
54
+
55
+ ## 🐳 Docker
56
+
57
+ This Space uses the Docker SDK to support heavy ML models (>500MB). The container runs on port 7860 as required by Hugging Face Spaces.
58
+
59
+ ## 🔒 Privacy & Security
60
+
61
+ - All passwords are hashed with bcrypt
62
+ - JWT tokens for secure authentication
63
+ - External PostgreSQL database with SSL
64
+ - No data stored in the container (stateless)
65
+
66
+ ## 📊 Model Information
67
+
68
+ - **Base Model**: PhoBERT (Vietnamese BERT)
69
+ - **Task**: Sentiment Analysis → Rating Prediction (1-5 stars)
70
+ - **Language**: Vietnamese
71
+ - **Model Size**: ~500MB
72
+
73
+ ## 🆘 Support
74
+
75
+ For issues or questions:
76
+ 1. Check the logs tab above
77
+ 2. Review [HUGGING_FACE_DEPLOYMENT.md](HUGGING_FACE_DEPLOYMENT.md)
78
+ 3. Open an issue in the repository
79
+
80
+ ## 📄 License
81
+
82
+ MIT License - See LICENSE file for details
83
+
84
+ ---
85
+
86
+ **Built with ❤️ using FastAPI, PhoBERT, and Hugging Face Spaces**
RENDER_QUICKSTART.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 QUICK DEPLOYMENT GUIDE
2
+
3
+ ## ✅ Files Changed (Production-Ready)
4
+
5
+ 1. ✅ **requirements.txt** - Added `psycopg2-binary`, `gunicorn`
6
+ 2. ✅ **app/database.py** - Hybrid SQLite/PostgreSQL support with Render URL fix
7
+ 3. ✅ **app/config.py** - Environment variable support for `SECRET_KEY`
8
+ 4. ✅ **main.py** - Auto-migration, production settings
9
+
10
+ ## 📋 Render Configuration
11
+
12
+ ### Web Service Settings
13
+
14
+ ```
15
+ Name: vietnamese-rating-prediction
16
+ Runtime: Python 3
17
+ Build Command: pip install -r requirements.txt
18
+ Start Command: gunicorn main:app --workers 4 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:$PORT
19
+ ```
20
+
21
+ ### Environment Variables (Required)
22
+
23
+ ```
24
+ SECRET_KEY = <generate-with-openssl-rand-hex-32>
25
+ PYTHON_VERSION = 3.11.0
26
+ ```
27
+
28
+ ### PostgreSQL Database
29
+
30
+ ```
31
+ Name: vietnamese-rating-db
32
+ PostgreSQL Version: 15
33
+ Instance Type: Free
34
+ ```
35
+
36
+ **Link database to web service** - `DATABASE_URL` will be auto-populated.
37
+
38
+ ---
39
+
40
+ ## 🎯 Deployment Steps
41
+
42
+ ### 1. Push to GitHub
43
+ ```bash
44
+ git add .
45
+ git commit -m "Deploy to Render"
46
+ git push origin master
47
+ ```
48
+
49
+ ### 2. Create Render Web Service
50
+ - Go to https://dashboard.render.com/
51
+ - New → Web Service
52
+ - Connect GitHub repo
53
+ - Use settings above
54
+
55
+ ### 3. Create PostgreSQL Database
56
+ - New → PostgreSQL
57
+ - Use free tier
58
+ - Link to web service
59
+
60
+ ### 4. Deploy
61
+ - Click "Manual Deploy"
62
+ - Watch logs for success
63
+
64
+ ### 5. Test
65
+ ```
66
+ https://your-app.onrender.com/health
67
+ https://your-app.onrender.com/docs
68
+ https://your-app.onrender.com/dashboard
69
+ ```
70
+
71
+ ---
72
+
73
+ ## 🔧 Local Testing Before Deploy
74
+
75
+ Test hybrid database locally:
76
+
77
+ ```bash
78
+ # Test with SQLite (no DATABASE_URL)
79
+ python main.py
80
+
81
+ # Test with PostgreSQL (set DATABASE_URL)
82
+ export DATABASE_URL=postgresql://user:pass@localhost/dbname
83
+ python main.py
84
+ ```
85
+
86
+ Expected output:
87
+ ```
88
+ 🔧 Development Mode: Using SQLite
89
+ # OR
90
+ 🚀 Production Mode: Using PostgreSQL
91
+ 🔄 Creating database tables...
92
+ ✅ Database tables created successfully!
93
+ ```
94
+
95
+ ---
96
+
97
+ ## ⚠️ Important Notes
98
+
99
+ 1. **Render Free Tier Limitations:**
100
+ - App sleeps after 15 minutes of inactivity (first request takes 30-60s)
101
+ - 512MB RAM (may need optimization for ML model)
102
+ - 1GB PostgreSQL storage
103
+
104
+ 2. **ML Model Optimization:**
105
+ - Consider lazy loading (load on first request)
106
+ - Use CPU-optimized PyTorch
107
+ - Cache predictions if possible
108
+
109
+ 3. **Static Files:**
110
+ - Uploads are ephemeral on Render Free Tier
111
+ - WordClouds will be deleted on container restart
112
+ - Use cloud storage (S3, Cloudinary) for production
113
+
114
+ 4. **Database:**
115
+ - SQLite NOT recommended for production (file locking issues)
116
+ - PostgreSQL required for concurrent requests
117
+ - Free tier: 1GB storage, 97 connections
118
+
119
+ ---
120
+
121
+ ## 🆘 Common Issues
122
+
123
+ ### "Module not found"
124
+ → Run `pip install -r requirements.txt` locally first
125
+
126
+ ### "Port binding error"
127
+ → Use `$PORT` in start command (auto-set by Render)
128
+
129
+ ### "Database connection failed"
130
+ → Check `DATABASE_URL` in environment variables
131
+
132
+ ### "Model loading timeout"
133
+ → Free tier has 512MB RAM limit, optimize model or upgrade
134
+
135
+ ---
136
+
137
+ **Read DEPLOYMENT.md for detailed guide!**
__pycache__/main.cpython-311.pyc ADDED
Binary file (3.08 kB). View file
 
__pycache__/main.cpython-313.pyc ADDED
Binary file (2.58 kB). View file
 
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # App package
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (160 Bytes). View file
 
app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (148 Bytes). View file
 
app/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.33 kB). View file
 
app/__pycache__/config.cpython-313.pyc ADDED
Binary file (1.25 kB). View file
 
app/__pycache__/database.cpython-311.pyc ADDED
Binary file (2.01 kB). View file
 
app/__pycache__/database.cpython-313.pyc ADDED
Binary file (1.73 kB). View file
 
app/__pycache__/models.cpython-311.pyc ADDED
Binary file (2.94 kB). View file
 
app/__pycache__/models.cpython-313.pyc ADDED
Binary file (2.46 kB). View file
 
app/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (4.58 kB). View file
 
app/__pycache__/schemas.cpython-313.pyc ADDED
Binary file (6.59 kB). View file
 
app/config.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration Settings
3
+ Supports environment variables for production deployment
4
+ """
5
+ import os
6
+ from pathlib import Path
7
+
8
+ # Base directory
9
+ BASE_DIR = Path(__file__).resolve().parent.parent
10
+
11
+ # ============================================
12
+ # SECURITY (Environment-aware)
13
+ # ============================================
14
+ # In production (Render), set SECRET_KEY as environment variable
15
+ # Fallback to default for local development
16
+ SECRET_KEY = os.getenv(
17
+ "SECRET_KEY",
18
+ "your-secret-key-change-in-production-2024-dev-only"
19
+ )
20
+
21
+ ALGORITHM = "HS256"
22
+ ACCESS_TOKEN_EXPIRE_MINUTES = 60 * 24 # 24 hours
23
+
24
+ # ============================================
25
+ # UPLOAD DIRECTORIES
26
+ # ============================================
27
+ # For production on Render, these will be in ephemeral storage
28
+ # Consider using cloud storage (S3, Cloudinary) for persistent files
29
+ UPLOAD_DIR = BASE_DIR / "app" / "static" / "uploads"
30
+ WORDCLOUD_DIR = UPLOAD_DIR / "wordclouds"
31
+
32
+ # Create directories if they don't exist
33
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
34
+ WORDCLOUD_DIR.mkdir(parents=True, exist_ok=True)
35
+
36
+ # ============================================
37
+ # PRODUCTION SETTINGS
38
+ # ============================================
39
+ # Detect if running on Render (or any production environment)
40
+ IS_PRODUCTION = os.getenv("RENDER") is not None or os.getenv("DATABASE_URL") is not None
41
+
42
+ if IS_PRODUCTION:
43
+ print("🚀 Running in PRODUCTION mode")
44
+ else:
45
+ print("🔧 Running in DEVELOPMENT mode")
46
+
app/database.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Configuration and Session Management
3
+ Supports BOTH SQLite (local) and PostgreSQL (production on Render)
4
+ """
5
+ import os
6
+ from sqlalchemy import create_engine
7
+ from sqlalchemy.ext.declarative import declarative_base
8
+ from sqlalchemy.orm import sessionmaker
9
+ from pathlib import Path
10
+
11
+ # ============================================
12
+ # HYBRID DATABASE SUPPORT
13
+ # ============================================
14
+ # Priority:
15
+ # 1. Use DATABASE_URL from environment (Render PostgreSQL)
16
+ # 2. Fallback to SQLite for local development
17
+
18
+ DATABASE_URL = os.getenv("DATABASE_URL")
19
+
20
+ if DATABASE_URL:
21
+ # CRITICAL FIX FOR RENDER:
22
+ # Render provides URLs starting with 'postgres://'
23
+ # but SQLAlchemy 1.4+ requires 'postgresql://'
24
+ if DATABASE_URL.startswith("postgres://"):
25
+ DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)
26
+
27
+ print(f"🚀 Production Mode: Using PostgreSQL")
28
+
29
+ # PostgreSQL: No need for check_same_thread
30
+ engine = create_engine(
31
+ DATABASE_URL,
32
+ pool_pre_ping=True, # Verify connections before using
33
+ pool_recycle=300, # Recycle connections every 5 minutes
34
+ )
35
+ else:
36
+ # Local development: Use SQLite
37
+ print(f"🔧 Development Mode: Using SQLite")
38
+
39
+ # Create database directory
40
+ db_dir = Path("app/database")
41
+ db_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ DATABASE_URL = "sqlite:///./app/database/rating_prediction.db"
44
+
45
+ # SQLite: Needs check_same_thread=False for FastAPI
46
+ engine = create_engine(
47
+ DATABASE_URL,
48
+ connect_args={"check_same_thread": False}
49
+ )
50
+
51
+ # Create session factory
52
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
53
+
54
+ # Base class for all models
55
+ Base = declarative_base()
56
+
57
+ def get_db():
58
+ """
59
+ Dependency to get database session
60
+ Used in FastAPI route dependencies
61
+ """
62
+ db = SessionLocal()
63
+ try:
64
+ yield db
65
+ finally:
66
+ db.close()
app/database/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+ # Database directory
app/database/rating_prediction.db ADDED
Binary file (36.9 kB). View file
 
app/models.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SQLAlchemy Database Models
3
+ """
4
+ from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Float
5
+ from sqlalchemy.orm import relationship
6
+ from datetime import datetime
7
+ from app.database import Base
8
+
9
+ class User(Base):
10
+ """User model for authentication"""
11
+ __tablename__ = "users"
12
+
13
+ id = Column(Integer, primary_key=True, index=True)
14
+ username = Column(String(50), unique=True, index=True, nullable=False)
15
+ email = Column(String(100), unique=True, index=True, nullable=False)
16
+ hashed_password = Column(String(255), nullable=False)
17
+ created_at = Column(DateTime, default=datetime.utcnow)
18
+
19
+ # Relationship
20
+ predictions = relationship("PredictionHistory", back_populates="user")
21
+
22
+ def __repr__(self):
23
+ return f"<User {self.username}>"
24
+
25
+
26
+ class PredictionHistory(Base):
27
+ """Prediction history model"""
28
+ __tablename__ = "prediction_history"
29
+
30
+ id = Column(Integer, primary_key=True, index=True)
31
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=False)
32
+ product_name = Column(String(200), nullable=False)
33
+ comment = Column(Text, nullable=False)
34
+ predicted_rating = Column(Integer, nullable=False)
35
+ confidence_score = Column(Float, nullable=True)
36
+ prediction_type = Column(String(20), default="single") # 'single' or 'batch'
37
+ created_at = Column(DateTime, default=datetime.utcnow)
38
+
39
+ # Relationship
40
+ user = relationship("User", back_populates="predictions")
41
+
42
+ def __repr__(self):
43
+ return f"<PredictionHistory {self.id}: {self.predicted_rating}⭐>"
app/routers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Routers package
app/routers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (168 Bytes). View file
 
app/routers/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (156 Bytes). View file
 
app/routers/__pycache__/auth.cpython-311.pyc ADDED
Binary file (4.36 kB). View file
 
app/routers/__pycache__/auth.cpython-313.pyc ADDED
Binary file (3.85 kB). View file
 
app/routers/__pycache__/dashboard.cpython-311.pyc ADDED
Binary file (2.45 kB). View file
 
app/routers/__pycache__/dashboard.cpython-313.pyc ADDED
Binary file (2.1 kB). View file
 
app/routers/__pycache__/prediction.cpython-311.pyc ADDED
Binary file (11.1 kB). View file
 
app/routers/__pycache__/prediction.cpython-313.pyc ADDED
Binary file (13.6 kB). View file
 
app/routers/auth.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Authentication Router
3
+ Handles user registration and login
4
+ """
5
+ from datetime import timedelta
6
+ from fastapi import APIRouter, Depends, HTTPException, status
7
+ from fastapi.security import OAuth2PasswordRequestForm
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.database import get_db
11
+ from app.models import User
12
+ from app.schemas import UserCreate, UserResponse, Token
13
+ from app.services.auth_service import (
14
+ get_password_hash,
15
+ authenticate_user,
16
+ create_access_token,
17
+ get_current_user
18
+ )
19
+ from app.config import ACCESS_TOKEN_EXPIRE_MINUTES
20
+
21
+ router = APIRouter()
22
+
23
+
24
+ @router.post("/register", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
25
+ async def register(user_data: UserCreate, db: Session = Depends(get_db)):
26
+ """
27
+ Register a new user
28
+
29
+ - **username**: Unique username (3-50 characters)
30
+ - **email**: Valid email address
31
+ - **password**: Password (minimum 6 characters)
32
+ """
33
+ # Check if username exists
34
+ db_user = db.query(User).filter(User.username == user_data.username).first()
35
+ if db_user:
36
+ raise HTTPException(
37
+ status_code=status.HTTP_400_BAD_REQUEST,
38
+ detail="Username already registered"
39
+ )
40
+
41
+ # Check if email exists
42
+ db_user = db.query(User).filter(User.email == user_data.email).first()
43
+ if db_user:
44
+ raise HTTPException(
45
+ status_code=status.HTTP_400_BAD_REQUEST,
46
+ detail="Email already registered"
47
+ )
48
+
49
+ # Create new user
50
+ new_user = User(
51
+ username=user_data.username,
52
+ email=user_data.email,
53
+ hashed_password=get_password_hash(user_data.password)
54
+ )
55
+
56
+ db.add(new_user)
57
+ db.commit()
58
+ db.refresh(new_user)
59
+
60
+ return new_user
61
+
62
+
63
+ @router.post("/login", response_model=Token)
64
+ async def login(
65
+ form_data: OAuth2PasswordRequestForm = Depends(),
66
+ db: Session = Depends(get_db)
67
+ ):
68
+ """
69
+ Login to get access token
70
+
71
+ - **username**: Your username
72
+ - **password**: Your password
73
+
74
+ Returns JWT access token for authentication
75
+ """
76
+ user = authenticate_user(db, form_data.username, form_data.password)
77
+ if not user:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_401_UNAUTHORIZED,
80
+ detail="Incorrect username or password",
81
+ headers={"WWW-Authenticate": "Bearer"},
82
+ )
83
+
84
+ access_token_expires = timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
85
+ access_token = create_access_token(
86
+ data={"sub": user.username}, expires_delta=access_token_expires
87
+ )
88
+
89
+ return {"access_token": access_token, "token_type": "bearer"}
90
+
91
+
92
+ @router.get("/me", response_model=UserResponse)
93
+ async def get_current_user_info(current_user: User = Depends(get_current_user)):
94
+ """
95
+ Get current authenticated user information
96
+ """
97
+ return current_user
app/routers/dashboard.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dashboard Router
3
+ Serves frontend Jinja2 templates
4
+ """
5
+ from fastapi import APIRouter, Request, Depends
6
+ from fastapi.templating import Jinja2Templates
7
+ from fastapi.responses import HTMLResponse, RedirectResponse
8
+ from sqlalchemy.orm import Session
9
+
10
+ from app.database import get_db
11
+ from app.models import User
12
+ from app.services.auth_service import get_current_user
13
+
14
+ router = APIRouter()
15
+ templates = Jinja2Templates(directory="app/templates")
16
+
17
+
18
+ @router.get("/", response_class=HTMLResponse)
19
+ async def home(request: Request):
20
+ """Home page - redirect to login"""
21
+ return RedirectResponse(url="/login")
22
+
23
+
24
+ @router.get("/login", response_class=HTMLResponse)
25
+ async def login_page(request: Request):
26
+ """Login page"""
27
+ return templates.TemplateResponse("login.html", {"request": request})
28
+
29
+
30
+ @router.get("/register", response_class=HTMLResponse)
31
+ async def register_page(request: Request):
32
+ """Registration page"""
33
+ return templates.TemplateResponse("register.html", {"request": request})
34
+
35
+
36
+ @router.get("/dashboard", response_class=HTMLResponse)
37
+ async def dashboard_page(request: Request):
38
+ """
39
+ Main dashboard page
40
+ Requires authentication (handle in frontend with token)
41
+ """
42
+ return templates.TemplateResponse("dashboard.html", {
43
+ "request": request
44
+ })
app/routers/prediction.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prediction Router
3
+ Handles single and batch predictions with enhanced features:
4
+ - Keyword highlighting
5
+ - SHAP/Interpretability explanation
6
+ - N-gram analysis
7
+ """
8
+ import io
9
+ import csv
10
+ from typing import List, Dict
11
+ from datetime import datetime
12
+ from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
13
+ from fastapi.responses import StreamingResponse
14
+ from sqlalchemy.orm import Session
15
+
16
+ from app.database import get_db
17
+ from app.models import User, PredictionHistory
18
+ from app.schemas import (
19
+ SinglePredictionRequest,
20
+ SinglePredictionResponse,
21
+ BatchPredictionResponse,
22
+ PredictionHistoryResponse,
23
+ PDFReportRequest,
24
+ NgramAnalysisRequest,
25
+ NgramAnalysisResponse
26
+ )
27
+ from app.services.auth_service import get_current_user
28
+ from app.services.ml_service import get_ml_service, MLPredictionService
29
+ from app.services.visualization_service import get_viz_service, VisualizationService
30
+ from app.services.report_service import get_report_service, ReportService
31
+
32
+ router = APIRouter()
33
+
34
+
35
+ def highlight_text(text: str, positive_keywords: List[str], negative_keywords: List[str]) -> str:
36
+ """Apply HTML highlighting to keywords in text"""
37
+ highlighted = text
38
+
39
+ # Sort by length (longer first) to avoid partial matches
40
+ for word in sorted(negative_keywords, key=len, reverse=True):
41
+ highlighted = highlighted.replace(
42
+ word,
43
+ f'<span class="highlight-negative">{word}</span>'
44
+ )
45
+
46
+ for word in sorted(positive_keywords, key=len, reverse=True):
47
+ highlighted = highlighted.replace(
48
+ word,
49
+ f'<span class="highlight-positive">{word}</span>'
50
+ )
51
+
52
+ return highlighted
53
+
54
+
55
+ @router.post("/single", response_model=SinglePredictionResponse)
56
+ async def predict_single(
57
+ request: SinglePredictionRequest,
58
+ current_user: User = Depends(get_current_user),
59
+ db: Session = Depends(get_db),
60
+ ml_service: MLPredictionService = Depends(get_ml_service)
61
+ ):
62
+ """
63
+ Predict rating for a single comment with optional explanation
64
+
65
+ - **product_name**: Name of the product
66
+ - **comment**: Vietnamese product review text
67
+ - **include_explanation**: Whether to include SHAP-like explanation
68
+
69
+ Returns predicted rating (1-5 stars) with confidence score,
70
+ keyword highlighting, and optionally word importance explanation
71
+ """
72
+ # Check if explanation is requested
73
+ if request.include_explanation:
74
+ # Use enhanced prediction with explanation
75
+ result = ml_service.predict_with_explanation(request.comment)
76
+ prediction = {
77
+ 'rating': result['rating'],
78
+ 'confidence': result['confidence']
79
+ }
80
+ explanation = result.get('explanation')
81
+ keywords = result.get('keywords')
82
+ else:
83
+ # Use standard prediction
84
+ prediction = ml_service.predict_single(request.comment)
85
+ # Still get keyword analysis for highlighting
86
+ keywords = ml_service.keyword_analyzer.analyze(request.comment)
87
+ explanation = None
88
+
89
+ # Generate highlighted text
90
+ highlighted_comment = highlight_text(
91
+ request.comment,
92
+ keywords.get('positive_keywords', []) if isinstance(keywords, dict) else keywords.positive_keywords if keywords else [],
93
+ keywords.get('negative_keywords', []) if isinstance(keywords, dict) else keywords.negative_keywords if keywords else []
94
+ )
95
+
96
+ # Save to history
97
+ history = PredictionHistory(
98
+ user_id=current_user.id,
99
+ product_name=request.product_name,
100
+ comment=request.comment,
101
+ predicted_rating=prediction['rating'],
102
+ confidence_score=prediction['confidence'],
103
+ prediction_type='single'
104
+ )
105
+ db.add(history)
106
+ db.commit()
107
+
108
+ return {
109
+ "predicted_rating": prediction['rating'],
110
+ "confidence_score": prediction['confidence'],
111
+ "comment": request.comment,
112
+ "highlighted_comment": highlighted_comment,
113
+ "explanation": explanation,
114
+ "keywords": keywords
115
+ }
116
+
117
+
118
+ @router.post("/batch", response_model=BatchPredictionResponse)
119
+ async def predict_batch(
120
+ product_name: str = Form(None),
121
+ file: UploadFile = File(...),
122
+ current_user: User = Depends(get_current_user),
123
+ db: Session = Depends(get_db),
124
+ ml_service: MLPredictionService = Depends(get_ml_service),
125
+ viz_service: VisualizationService = Depends(get_viz_service),
126
+ report_service: ReportService = Depends(get_report_service)
127
+ ):
128
+ """
129
+ Predict ratings for batch of comments from CSV file with enhanced analysis
130
+
131
+ - **product_name**: Name of the product
132
+ - **file**: CSV file with 'Comment' column
133
+
134
+ Returns predictions with:
135
+ - Visualization data (wordcloud, distribution chart)
136
+ - N-gram analysis (unigrams, bigrams, trigrams)
137
+ - Keyword frequency analysis
138
+ """
139
+ # Validate file type
140
+ if not file.filename.endswith('.csv'):
141
+ raise HTTPException(
142
+ status_code=status.HTTP_400_BAD_REQUEST,
143
+ detail="File must be a CSV"
144
+ )
145
+
146
+ try:
147
+ # Read CSV file
148
+ contents = await file.read()
149
+ csv_file = io.StringIO(contents.decode('utf-8'))
150
+ reader = csv.DictReader(csv_file)
151
+
152
+ # Check for Comment column
153
+ if 'Comment' not in reader.fieldnames:
154
+ raise HTTPException(
155
+ status_code=status.HTTP_400_BAD_REQUEST,
156
+ detail="CSV must contain 'Comment' column"
157
+ )
158
+
159
+ # Extract comments
160
+ comments = []
161
+ for row in reader:
162
+ if row.get('Comment', '').strip():
163
+ comments.append(row['Comment'].strip())
164
+
165
+ if not comments:
166
+ raise HTTPException(
167
+ status_code=status.HTTP_400_BAD_REQUEST,
168
+ detail="No valid comments found in CSV"
169
+ )
170
+
171
+ # Make batch predictions with analysis
172
+ batch_result = ml_service.predict_batch_with_analysis(comments)
173
+ predictions = batch_result['predictions']
174
+ ngrams = batch_result['ngrams']
175
+ keyword_frequency = batch_result['keyword_frequency']
176
+
177
+ final_product_name = product_name if product_name else "Unknown Product"
178
+
179
+ # Save to history
180
+ for pred in predictions:
181
+ history = PredictionHistory(
182
+ user_id=current_user.id,
183
+ product_name=final_product_name,
184
+ comment=pred['text'],
185
+ predicted_rating=pred['rating'],
186
+ confidence_score=pred['confidence'],
187
+ prediction_type='batch'
188
+ )
189
+ db.add(history)
190
+ db.commit()
191
+
192
+ # Calculate rating distribution
193
+ ratings = [p['rating'] for p in predictions]
194
+ distribution = viz_service.calculate_rating_distribution(ratings)
195
+
196
+ # Generate word cloud
197
+ wordcloud_filename = f"wordcloud_{current_user.username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
198
+ wordcloud_url = viz_service.generate_wordcloud(comments, wordcloud_filename)
199
+
200
+ # Prepare results for CSV download
201
+ results = []
202
+ for pred in predictions:
203
+ results.append({
204
+ 'Comment': pred['text'],
205
+ 'Predicted_Rating': pred['rating'],
206
+ 'Confidence': pred['confidence']
207
+ })
208
+
209
+ # Generate PDF report
210
+ pdf_filename = f"report_{current_user.username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
211
+ pdf_content = report_service.generate_pdf_report(
212
+ predictions=predictions,
213
+ distribution=distribution,
214
+ wordcloud_path=wordcloud_url,
215
+ username=current_user.username,
216
+ filename=pdf_filename
217
+ )
218
+
219
+ return {
220
+ "total_predictions": len(predictions),
221
+ "rating_distribution": distribution,
222
+ "wordcloud_url": wordcloud_url,
223
+ "results": results,
224
+ "csv_download_url": f"/api/predict/download/{current_user.id}/{datetime.now().timestamp()}",
225
+ "pdf_download_url": f"/api/predict/download-pdf/{current_user.id}/{datetime.now().timestamp()}",
226
+ "ngrams": ngrams,
227
+ "keyword_frequency": keyword_frequency
228
+ }
229
+
230
+ except Exception as e:
231
+ raise HTTPException(
232
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
233
+ detail=f"Error processing file: {str(e)}"
234
+ )
235
+
236
+
237
+ @router.get("/history", response_model=List[PredictionHistoryResponse])
238
+ async def get_prediction_history(
239
+ limit: int = 50,
240
+ current_user: User = Depends(get_current_user),
241
+ db: Session = Depends(get_db)
242
+ ):
243
+ """
244
+ Get prediction history for current user
245
+
246
+ - **limit**: Maximum number of records to return (default: 50)
247
+ """
248
+ history = db.query(PredictionHistory).filter(
249
+ PredictionHistory.user_id == current_user.id
250
+ ).order_by(PredictionHistory.created_at.desc()).limit(limit).all()
251
+
252
+ return history
253
+
254
+
255
+ @router.post("/download-csv")
256
+ async def download_predictions_csv(
257
+ results: List[dict],
258
+ current_user: User = Depends(get_current_user)
259
+ ):
260
+ """
261
+ Download prediction results as CSV
262
+ """
263
+ # Create CSV in memory
264
+ output = io.StringIO()
265
+
266
+ if results:
267
+ fieldnames = results[0].keys()
268
+ writer = csv.DictWriter(output, fieldnames=fieldnames)
269
+ writer.writeheader()
270
+ writer.writerows(results)
271
+
272
+ # Reset position
273
+ output.seek(0)
274
+
275
+ # Return as streaming response
276
+ return StreamingResponse(
277
+ iter([output.getvalue()]),
278
+ media_type="text/csv",
279
+ headers={
280
+ "Content-Disposition": f"attachment; filename=predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
281
+ }
282
+ )
283
+
284
+
285
+ @router.post("/download-pdf")
286
+ async def download_predictions_pdf(
287
+ request: PDFReportRequest,
288
+ current_user: User = Depends(get_current_user),
289
+ report_service: ReportService = Depends(get_report_service)
290
+ ):
291
+ """
292
+ Download prediction results as PDF report
293
+ """
294
+ try:
295
+ pdf_content = report_service.generate_pdf_report(
296
+ predictions=request.predictions,
297
+ distribution=request.distribution,
298
+ wordcloud_path=request.wordcloud_path,
299
+ username=current_user.username
300
+ )
301
+
302
+ return StreamingResponse(
303
+ io.BytesIO(pdf_content),
304
+ media_type="application/pdf",
305
+ headers={
306
+ "Content-Disposition": f"attachment; filename=predictions_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
307
+ }
308
+ )
309
+ except Exception as e:
310
+ raise HTTPException(
311
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
312
+ detail=f"Error generating PDF: {str(e)}"
313
+ )
314
+
315
+
316
+ @router.post("/analyze-ngrams", response_model=NgramAnalysisResponse)
317
+ async def analyze_ngrams(
318
+ request: NgramAnalysisRequest,
319
+ current_user: User = Depends(get_current_user),
320
+ ml_service: MLPredictionService = Depends(get_ml_service)
321
+ ):
322
+ """
323
+ Analyze n-grams (unigrams, bigrams, trigrams) for a list of texts
324
+
325
+ - **texts**: List of Vietnamese text comments
326
+
327
+ Returns frequency analysis of word patterns
328
+ """
329
+ if not request.texts:
330
+ raise HTTPException(
331
+ status_code=status.HTTP_400_BAD_REQUEST,
332
+ detail="No texts provided for analysis"
333
+ )
334
+
335
+ ngrams = ml_service.analyze_ngrams(request.texts)
336
+
337
+ return ngrams
338
+
339
+
340
+ @router.post("/explain")
341
+ async def explain_prediction(
342
+ request: SinglePredictionRequest,
343
+ current_user: User = Depends(get_current_user),
344
+ ml_service: MLPredictionService = Depends(get_ml_service)
345
+ ):
346
+ """
347
+ Get detailed explanation for a prediction
348
+
349
+ Returns word importance scores and keyword analysis
350
+ """
351
+ result = ml_service.predict_with_explanation(request.comment)
352
+
353
+ return {
354
+ "predicted_rating": result['rating'],
355
+ "confidence_score": result['confidence'],
356
+ "comment": request.comment,
357
+ "explanation": result['explanation'],
358
+ "keywords": result['keywords']
359
+ }
app/schemas.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic Schemas for Request/Response Validation
3
+ """
4
+ from pydantic import BaseModel, EmailStr, Field
5
+ from typing import Optional, List, Dict, Any
6
+ from datetime import datetime
7
+
8
+ # ===== Auth Schemas =====
9
+ class UserCreate(BaseModel):
10
+ username: str = Field(..., min_length=3, max_length=50)
11
+ email: EmailStr
12
+ password: str = Field(..., min_length=6)
13
+
14
+ class UserLogin(BaseModel):
15
+ username: str
16
+ password: str
17
+
18
+ class UserResponse(BaseModel):
19
+ id: int
20
+ username: str
21
+ email: str
22
+ created_at: datetime
23
+
24
+ class Config:
25
+ from_attributes = True
26
+
27
+ class Token(BaseModel):
28
+ access_token: str
29
+ token_type: str
30
+
31
+ class TokenData(BaseModel):
32
+ username: Optional[str] = None
33
+
34
+
35
+ # ===== Prediction Schemas =====
36
+ class SinglePredictionRequest(BaseModel):
37
+ product_name: Optional[str] = ""
38
+ comment: str
39
+ include_explanation: Optional[bool] = False
40
+
41
+ class ExplanationData(BaseModel):
42
+ words: List[str]
43
+ importance_scores: List[float]
44
+ overall_sentiment: str
45
+
46
+ class KeywordData(BaseModel):
47
+ positive_keywords: List[str]
48
+ negative_keywords: List[str]
49
+ positive_count: int
50
+ negative_count: int
51
+
52
+ class SinglePredictionResponse(BaseModel):
53
+ predicted_rating: int
54
+ confidence_score: float
55
+ comment: str
56
+ highlighted_comment: Optional[str] = None
57
+ explanation: Optional[ExplanationData] = None
58
+ keywords: Optional[KeywordData] = None
59
+
60
+ class NgramItem(BaseModel):
61
+ ngram: str
62
+ count: int
63
+
64
+ class NgramAnalysis(BaseModel):
65
+ unigrams: List[NgramItem]
66
+ bigrams: List[NgramItem]
67
+ trigrams: List[NgramItem]
68
+
69
+ class KeywordFrequencyItem(BaseModel):
70
+ word: str
71
+ count: int
72
+
73
+ class KeywordFrequency(BaseModel):
74
+ positive: List[KeywordFrequencyItem]
75
+ negative: List[KeywordFrequencyItem]
76
+
77
+ class BatchPredictionResponse(BaseModel):
78
+ total_predictions: int
79
+ rating_distribution: dict
80
+ wordcloud_url: str
81
+ results: List[dict]
82
+ csv_download_url: str
83
+ pdf_download_url: str
84
+ ngrams: Optional[NgramAnalysis] = None
85
+ keyword_frequency: Optional[KeywordFrequency] = None
86
+
87
+ class PDFReportRequest(BaseModel):
88
+ predictions: List[dict]
89
+ distribution: dict
90
+ wordcloud_path: str
91
+
92
+
93
+ # ===== History Schemas =====
94
+ class PredictionHistoryResponse(BaseModel):
95
+ id: int
96
+ product_name: str
97
+ comment: str
98
+ predicted_rating: int
99
+ confidence_score: Optional[float]
100
+ prediction_type: str
101
+ created_at: datetime
102
+
103
+ class Config:
104
+ from_attributes = True
105
+
106
+
107
+ # ===== Analysis Schemas =====
108
+ class NgramAnalysisRequest(BaseModel):
109
+ texts: List[str]
110
+
111
+ class NgramAnalysisResponse(BaseModel):
112
+ unigrams: List[NgramItem]
113
+ bigrams: List[NgramItem]
114
+ trigrams: List[NgramItem]
app/services/Model/phoBERT_multi_class_tokenizer/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
app/services/Model/phoBERT_multi_class_tokenizer/bpe.codes ADDED
The diff for this file is too large to render. See raw diff