Spaces:
Paused
Paused
Paul commited on
Commit ·
75146bf
1
Parent(s): a29f4ee
Initial commit
Browse files- CURL_EXAMPLES.md +177 -0
- DEPLOYMENT.md +111 -0
- GOOGLE_DRIVE_SETUP.md +202 -0
- QUICKSTART.md +93 -0
- README copy.md +125 -0
- VERCEL_ISSUES.md +286 -0
- __pycache__/app.cpython-313.pyc +0 -0
- __pycache__/main.cpython-313.pyc +0 -0
- __pycache__/ml_service.cpython-313.pyc +0 -0
- __pycache__/schemas.cpython-313.pyc +0 -0
- api/index.py +15 -0
- main.py +90 -0
- ml_service.py +102 -0
- model/.DS_Store +0 -0
- model/config.json +80 -0
- model/mlb.pkl +0 -0
- model/special_tokens_map.json +7 -0
- model/tokenizer.json +0 -0
- model/tokenizer_config.json +56 -0
- model/vocab.txt +0 -0
- requirements.txt +10 -0
- schemas.py +19 -0
- start.sh +16 -0
- vercel.json +14 -0
CURL_EXAMPLES.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CURL Examples for ML Text Classification API
|
| 2 |
+
|
| 3 |
+
## Local Testing
|
| 4 |
+
|
| 5 |
+
### 1. Health Check
|
| 6 |
+
```bash
|
| 7 |
+
curl http://localhost:8000/health
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
Expected Response:
|
| 11 |
+
```json
|
| 12 |
+
{"status": "healthy"}
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
### 2. Root Endpoint
|
| 16 |
+
```bash
|
| 17 |
+
curl http://localhost:8000/
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
Expected Response:
|
| 21 |
+
```json
|
| 22 |
+
{
|
| 23 |
+
"message": "ML Text Classification API",
|
| 24 |
+
"version": "1.0.0",
|
| 25 |
+
"endpoints": {
|
| 26 |
+
"health": "/health",
|
| 27 |
+
"predict": "/predict"
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### 3. Prediction Endpoint (Main)
|
| 33 |
+
```bash
|
| 34 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 35 |
+
-H "Content-Type: application/json" \
|
| 36 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
Expected Response:
|
| 40 |
+
```json
|
| 41 |
+
{
|
| 42 |
+
"results": [
|
| 43 |
+
{"label": "label_name_1", "score": 0.95},
|
| 44 |
+
{"label": "label_name_2", "score": 0.03},
|
| 45 |
+
...
|
| 46 |
+
]
|
| 47 |
+
}
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
## Vercel / Production Deployment
|
| 51 |
+
|
| 52 |
+
After deploying, replace `your-project.vercel.app` with your actual domain:
|
| 53 |
+
|
| 54 |
+
### 1. Health Check
|
| 55 |
+
```bash
|
| 56 |
+
curl https://your-project.vercel.app/health
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### 2. Root Endpoint
|
| 60 |
+
```bash
|
| 61 |
+
curl https://your-project.vercel.app/
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 3. Prediction Endpoint
|
| 65 |
+
```bash
|
| 66 |
+
curl -X POST "https://your-project.vercel.app/predict" \
|
| 67 |
+
-H "Content-Type: application/json" \
|
| 68 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
## Advanced Examples
|
| 72 |
+
|
| 73 |
+
### Using jq for Pretty Output
|
| 74 |
+
```bash
|
| 75 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 76 |
+
-H "Content-Type: application/json" \
|
| 77 |
+
-d '{"text": "Hi ||| Hi anh"}' | jq '.'
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
### Sorting Results by Score
|
| 81 |
+
```bash
|
| 82 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 83 |
+
-H "Content-Type: application/json" \
|
| 84 |
+
-d '{"text": "Hi ||| Hi anh"}' | jq '.results | sort_by(.score) | reverse'
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### Getting Top 3 Predictions
|
| 88 |
+
```bash
|
| 89 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 90 |
+
-H "Content-Type: application/json" \
|
| 91 |
+
-d '{"text": "Hi ||| Hi anh"}' | jq '.results | sort_by(.score) | reverse | .[0:3]'
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
## Testing with Different Text
|
| 95 |
+
|
| 96 |
+
### Example 1
|
| 97 |
+
```bash
|
| 98 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 99 |
+
-H "Content-Type: application/json" \
|
| 100 |
+
-d '{"text": "Hello ||| Hi there"}'
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
### Example 2
|
| 104 |
+
```bash
|
| 105 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 106 |
+
-H "Content-Type: application/json" \
|
| 107 |
+
-d '{"text": "Xin chào ||| Chào bạn"}'
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
## Error Handling
|
| 111 |
+
|
| 112 |
+
### Empty Text (400 Error)
|
| 113 |
+
```bash
|
| 114 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 115 |
+
-H "Content-Type: application/json" \
|
| 116 |
+
-d '{"text": ""}'
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
Expected Response:
|
| 120 |
+
```json
|
| 121 |
+
{
|
| 122 |
+
"detail": "Text field is required and cannot be empty"
|
| 123 |
+
}
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### Missing Text Field (422 Error)
|
| 127 |
+
```bash
|
| 128 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 129 |
+
-H "Content-Type: application/json" \
|
| 130 |
+
-d '{}'
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
Expected Response:
|
| 134 |
+
```json
|
| 135 |
+
{
|
| 136 |
+
"detail": [
|
| 137 |
+
{
|
| 138 |
+
"type": "missing",
|
| 139 |
+
"loc": ["body", "text"],
|
| 140 |
+
"msg": "Field required",
|
| 141 |
+
"input": {}
|
| 142 |
+
}
|
| 143 |
+
]
|
| 144 |
+
}
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
## Python Requests Examples
|
| 148 |
+
|
| 149 |
+
If you prefer Python instead of curl:
|
| 150 |
+
|
| 151 |
+
```python
|
| 152 |
+
import requests
|
| 153 |
+
|
| 154 |
+
# Prediction
|
| 155 |
+
response = requests.post(
|
| 156 |
+
"http://localhost:8000/predict",
|
| 157 |
+
json={"text": "Hi ||| Hi anh"}
|
| 158 |
+
)
|
| 159 |
+
print(response.json())
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
## JavaScript/Fetch Examples
|
| 163 |
+
|
| 164 |
+
```javascript
|
| 165 |
+
fetch('http://localhost:8000/predict', {
|
| 166 |
+
method: 'POST',
|
| 167 |
+
headers: {
|
| 168 |
+
'Content-Type': 'application/json',
|
| 169 |
+
},
|
| 170 |
+
body: JSON.stringify({
|
| 171 |
+
text: 'Hi ||| Hi anh'
|
| 172 |
+
})
|
| 173 |
+
})
|
| 174 |
+
.then(response => response.json())
|
| 175 |
+
.then(data => console.log(data));
|
| 176 |
+
```
|
| 177 |
+
|
DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Vercel Deployment
|
| 4 |
+
|
| 5 |
+
### Quick Deploy
|
| 6 |
+
|
| 7 |
+
1. Install Vercel CLI:
|
| 8 |
+
```bash
|
| 9 |
+
npm i -g vercel
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
2. Login to Vercel:
|
| 13 |
+
```bash
|
| 14 |
+
vercel login
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
3. Deploy:
|
| 18 |
+
```bash
|
| 19 |
+
vercel
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Important Limitations
|
| 23 |
+
|
| 24 |
+
⚠️ **WARNING:** Your model weights are **256MB**, which exceeds Vercel's free tier limit (50MB limit).
|
| 25 |
+
|
| 26 |
+
**Current Stack:**
|
| 27 |
+
- FastAPI ✅ (Fully configured)
|
| 28 |
+
- PyTorch (~800MB)
|
| 29 |
+
- Transformers (~100-300MB)
|
| 30 |
+
- Your model: **256MB**
|
| 31 |
+
- **Total: ~1.4GB+**
|
| 32 |
+
|
| 33 |
+
### Solutions
|
| 34 |
+
|
| 35 |
+
#### Option 1: Use Vercel Pro with Larger Limits
|
| 36 |
+
Upgrade to Vercel Pro for increased limits, but still may have issues.
|
| 37 |
+
|
| 38 |
+
#### Option 2: Alternative Platforms (Recommended)
|
| 39 |
+
|
| 40 |
+
**AWS Lambda + Lambda Layers:**
|
| 41 |
+
- Better for ML workloads
|
| 42 |
+
- Supports larger packages
|
| 43 |
+
- Cost-effective
|
| 44 |
+
|
| 45 |
+
**Google Cloud Run:**
|
| 46 |
+
- Docker-based deployment
|
| 47 |
+
- Auto-scaling
|
| 48 |
+
- Better ML support
|
| 49 |
+
|
| 50 |
+
**Railway / Render:**
|
| 51 |
+
- Docker deployments
|
| 52 |
+
- No strict size limits
|
| 53 |
+
- Easy setup
|
| 54 |
+
|
| 55 |
+
**Hugging Face Inference API:**
|
| 56 |
+
- Host model separately
|
| 57 |
+
- Call via API
|
| 58 |
+
- Free tier available
|
| 59 |
+
|
| 60 |
+
#### Option 3: Hybrid Approach
|
| 61 |
+
- Deploy FastAPI to Vercel (small)
|
| 62 |
+
- Host model on Hugging Face / AWS SageMaker
|
| 63 |
+
- Call model API from FastAPI
|
| 64 |
+
|
| 65 |
+
### Testing Before Deploy
|
| 66 |
+
|
| 67 |
+
Check your deployment size:
|
| 68 |
+
```bash
|
| 69 |
+
du -sh model/
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
If >50MB, Vercel will likely fail.
|
| 73 |
+
|
| 74 |
+
### Current Configuration
|
| 75 |
+
|
| 76 |
+
✅ **main.py** - FastAPI application (Vercel entry point)
|
| 77 |
+
✅ **vercel.json** - Vercel configuration
|
| 78 |
+
✅ **requirements.txt** - Python dependencies
|
| 79 |
+
|
| 80 |
+
## Endpoints After Deployment
|
| 81 |
+
|
| 82 |
+
Once deployed (regardless of platform):
|
| 83 |
+
|
| 84 |
+
**Health Check:**
|
| 85 |
+
```
|
| 86 |
+
GET https://your-app-url/health
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
**Root:**
|
| 90 |
+
```
|
| 91 |
+
GET https://your-app-url/
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
**Prediction:**
|
| 95 |
+
```
|
| 96 |
+
POST https://your-app-url/predict
|
| 97 |
+
Content-Type: application/json
|
| 98 |
+
|
| 99 |
+
{
|
| 100 |
+
"text": "Hi ||| Hi anh"
|
| 101 |
+
}
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
## Example CURL
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
curl -X POST "https://your-app-url/predict" \
|
| 108 |
+
-H "Content-Type: application/json" \
|
| 109 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 110 |
+
```
|
| 111 |
+
|
GOOGLE_DRIVE_SETUP.md
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Google Drive Model Setup
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
The ML model (`model.safetensors`, 256MB) is stored on Google Drive instead of in the repository. The application will automatically download it on first run.
|
| 5 |
+
|
| 6 |
+
## Setup Instructions
|
| 7 |
+
|
| 8 |
+
### Step 1: Upload Model to Google Drive
|
| 9 |
+
|
| 10 |
+
1. Upload `model.safetensors` to your Google Drive
|
| 11 |
+
2. Right-click the file and select "Share"
|
| 12 |
+
3. Set permissions to "Anyone with the link"
|
| 13 |
+
4. Copy the file ID from the sharing URL
|
| 14 |
+
|
| 15 |
+
### Step 2: Get Google Drive File ID
|
| 16 |
+
|
| 17 |
+
From the sharing URL:
|
| 18 |
+
```
|
| 19 |
+
https://drive.google.com/file/d/FILE_ID_HERE/view?usp=sharing
|
| 20 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 21 |
+
This is your FILE_ID
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
Or from a direct link:
|
| 25 |
+
```
|
| 26 |
+
https://drive.google.com/uc?id=FILE_ID_HERE
|
| 27 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 28 |
+
This is your FILE_ID
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### Step 3: Configure Application
|
| 32 |
+
|
| 33 |
+
You have **3 options** to provide the Google Drive file ID:
|
| 34 |
+
|
| 35 |
+
#### Option 1: Environment Variable (Recommended)
|
| 36 |
+
Create a `.env` file in the project root:
|
| 37 |
+
```bash
|
| 38 |
+
GDRIVE_MODEL_ID=YOUR_FILE_ID_HERE
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
#### Option 2: Update ml_service.py
|
| 42 |
+
Edit the `get_ml_service()` function:
|
| 43 |
+
```python
|
| 44 |
+
def get_ml_service() -> MLInferenceService:
|
| 45 |
+
"""Get or create the global ML service instance."""
|
| 46 |
+
global _ml_service
|
| 47 |
+
if _ml_service is None:
|
| 48 |
+
_ml_service = MLInferenceService(
|
| 49 |
+
gdrive_file_id="YOUR_FILE_ID_HERE" # Add your ID here
|
| 50 |
+
)
|
| 51 |
+
_ml_service.load_model()
|
| 52 |
+
return _ml_service
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
#### Option 3: Set Environment Variable in Shell
|
| 56 |
+
```bash
|
| 57 |
+
export GDRIVE_MODEL_ID=YOUR_FILE_ID_HERE
|
| 58 |
+
python main.py
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### Step 4: Install Dependencies
|
| 62 |
+
|
| 63 |
+
Make sure `gdown` is installed:
|
| 64 |
+
```bash
|
| 65 |
+
pip install -r requirements.txt
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
### Step 5: Test
|
| 69 |
+
|
| 70 |
+
Run the application:
|
| 71 |
+
```bash
|
| 72 |
+
source venv/bin/activate
|
| 73 |
+
python main.py
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
On first run, you should see:
|
| 77 |
+
```
|
| 78 |
+
Downloading model from Google Drive...
|
| 79 |
+
[Progress bar]
|
| 80 |
+
Model downloaded successfully to ./model/model.safetensors
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## Files in Repository
|
| 84 |
+
|
| 85 |
+
### ❌ Excluded (Too Large for GitHub)
|
| 86 |
+
- `model/model.safetensors` (256MB)
|
| 87 |
+
|
| 88 |
+
### ✅ Included
|
| 89 |
+
- `model/config.json`
|
| 90 |
+
- `model/mlb.pkl`
|
| 91 |
+
- `model/tokenizer.json`
|
| 92 |
+
- `model/tokenizer_config.json`
|
| 93 |
+
- `model/special_tokens_map.json`
|
| 94 |
+
- `model/vocab.txt`
|
| 95 |
+
|
| 96 |
+
## How It Works
|
| 97 |
+
|
| 98 |
+
1. On first API request, `load_model()` is called
|
| 99 |
+
2. If `model.safetensors` doesn't exist locally:
|
| 100 |
+
- Check if `GDRIVE_MODEL_ID` is set
|
| 101 |
+
- Download from Google Drive using `gdown`
|
| 102 |
+
- Save to `./model/model.safetensors`
|
| 103 |
+
3. Load model using transformers
|
| 104 |
+
4. Create inference pipeline
|
| 105 |
+
|
| 106 |
+
**Subsequent runs will use the cached local file** - no re-download needed.
|
| 107 |
+
|
| 108 |
+
## .gitignore Configuration
|
| 109 |
+
|
| 110 |
+
Make sure your `.gitignore` includes:
|
| 111 |
+
```
|
| 112 |
+
model/*.safetensors
|
| 113 |
+
model/*.pkl # Uncomment if you want to exclude mlb.pkl too
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
## Troubleshooting
|
| 117 |
+
|
| 118 |
+
### "File not found or insufficient permissions"
|
| 119 |
+
- Check the file ID is correct
|
| 120 |
+
- Verify file sharing is set to "Anyone with the link"
|
| 121 |
+
- Try opening the sharing URL in an incognito window
|
| 122 |
+
|
| 123 |
+
### "Download interrupted"
|
| 124 |
+
- File is large (256MB), ensure stable internet
|
| 125 |
+
- Script will retry on next run
|
| 126 |
+
- You can manually download and place in `model/` folder
|
| 127 |
+
|
| 128 |
+
### "gdown not found"
|
| 129 |
+
```bash
|
| 130 |
+
pip install gdown
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### Local Testing Without Download
|
| 134 |
+
If you have the model file locally, just place it in `model/` folder:
|
| 135 |
+
```bash
|
| 136 |
+
cp /path/to/your/model.safetensors ./model/
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
## Deployment Considerations
|
| 140 |
+
|
| 141 |
+
### Vercel / Serverless
|
| 142 |
+
When deploying to serverless platforms:
|
| 143 |
+
1. Upload model during build time, OR
|
| 144 |
+
2. Set `GDRIVE_MODEL_ID` as environment variable
|
| 145 |
+
3. First request will be slow (download + load)
|
| 146 |
+
4. Subsequent requests will be fast (cached model)
|
| 147 |
+
|
| 148 |
+
### Docker / VMs
|
| 149 |
+
For Docker deployments, you have options:
|
| 150 |
+
1. **Build-time download** - Download in Dockerfile
|
| 151 |
+
2. **Runtime download** - Use GDRIVE_MODEL_ID env var
|
| 152 |
+
3. **Volume mount** - Mount model directory
|
| 153 |
+
|
| 154 |
+
Example Dockerfile:
|
| 155 |
+
```dockerfile
|
| 156 |
+
FROM python:3.13
|
| 157 |
+
|
| 158 |
+
WORKDIR /app
|
| 159 |
+
COPY requirements.txt .
|
| 160 |
+
RUN pip install -r requirements.txt
|
| 161 |
+
|
| 162 |
+
COPY . .
|
| 163 |
+
|
| 164 |
+
# Option 1: Download at build time
|
| 165 |
+
RUN gdown YOUR_FILE_ID -O model/model.safetensors
|
| 166 |
+
|
| 167 |
+
# Option 2: Let runtime download
|
| 168 |
+
# ENV GDRIVE_MODEL_ID=YOUR_FILE_ID
|
| 169 |
+
|
| 170 |
+
CMD ["python", "main.py"]
|
| 171 |
+
```
|
| 172 |
+
|
| 173 |
+
## Security Notes
|
| 174 |
+
|
| 175 |
+
- ✅ Google Drive ID is not sensitive (public file)
|
| 176 |
+
- ✅ File ID can be safely committed to git
|
| 177 |
+
- ⚠️ Consider rate limiting your API if sharing publicly
|
| 178 |
+
- ⚠️ Monitor Google Drive quota if many deployments
|
| 179 |
+
|
| 180 |
+
## Alternative Storage Options
|
| 181 |
+
|
| 182 |
+
If Google Drive doesn't meet your needs:
|
| 183 |
+
|
| 184 |
+
1. **AWS S3** - Use boto3 to download from S3
|
| 185 |
+
2. **Hugging Face Hub** - Transformers library supports this natively
|
| 186 |
+
3. **HTTP Server** - Host on any HTTP server
|
| 187 |
+
4. **IPFS** - Decentralized storage
|
| 188 |
+
5. **Dropbox / OneDrive** - Similar to Google Drive approach
|
| 189 |
+
|
| 190 |
+
## Example Usage
|
| 191 |
+
|
| 192 |
+
```python
|
| 193 |
+
from ml_service import get_ml_service
|
| 194 |
+
|
| 195 |
+
# Will auto-download from Google Drive if needed
|
| 196 |
+
ml_service = get_ml_service()
|
| 197 |
+
|
| 198 |
+
# Use normally
|
| 199 |
+
predictions = ml_service.predict("Hi ||| Hi anh")
|
| 200 |
+
print(predictions)
|
| 201 |
+
```
|
| 202 |
+
|
QUICKSTART.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Quick Start Guide
|
| 2 |
+
|
| 3 |
+
## ✅ How to Run the Application
|
| 4 |
+
|
| 5 |
+
### Step 1: Activate Virtual Environment
|
| 6 |
+
```bash
|
| 7 |
+
source venv/bin/activate
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
### Step 2: Run the Server
|
| 11 |
+
```bash
|
| 12 |
+
python main.py
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
You should see:
|
| 16 |
+
```
|
| 17 |
+
INFO: Started server process
|
| 18 |
+
INFO: Waiting for application startup.
|
| 19 |
+
INFO: Application startup complete.
|
| 20 |
+
INFO: Uvicorn running on http://0.0.0.0:8000
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
**Note:** The model loads on first request, which may take 10-30 seconds.
|
| 24 |
+
|
| 25 |
+
### Step 3: Test the API
|
| 26 |
+
|
| 27 |
+
Open a **new terminal** (keep the server running) and test:
|
| 28 |
+
|
| 29 |
+
**Health Check:**
|
| 30 |
+
```bash
|
| 31 |
+
curl http://localhost:8000/health
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
**Main Prediction Endpoint:**
|
| 35 |
+
```bash
|
| 36 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 37 |
+
-H "Content-Type: application/json" \
|
| 38 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
**Get API Info:**
|
| 42 |
+
```bash
|
| 43 |
+
curl http://localhost:8000/
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
## 🌐 Interactive API Documentation
|
| 47 |
+
|
| 48 |
+
Visit: **http://localhost:8000/docs**
|
| 49 |
+
|
| 50 |
+
This provides a Swagger UI to test all endpoints interactively.
|
| 51 |
+
|
| 52 |
+
## 📁 Alternative: Using Uvicorn Directly
|
| 53 |
+
|
| 54 |
+
Instead of `python main.py`, you can also use:
|
| 55 |
+
|
| 56 |
+
```bash
|
| 57 |
+
source venv/bin/activate
|
| 58 |
+
uvicorn main:app --reload --host 0.0.0.0 --port 8000
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
The `--reload` flag enables auto-reload on code changes.
|
| 62 |
+
|
| 63 |
+
## 🛑 Stopping the Server
|
| 64 |
+
|
| 65 |
+
Press `Ctrl + C` in the terminal running the server.
|
| 66 |
+
|
| 67 |
+
## 🚀 Deploying to Vercel
|
| 68 |
+
|
| 69 |
+
See `DEPLOYMENT.md` for detailed instructions.
|
| 70 |
+
|
| 71 |
+
**Quick deploy:**
|
| 72 |
+
```bash
|
| 73 |
+
vercel login
|
| 74 |
+
vercel
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
## 📝 Key Commands Summary
|
| 78 |
+
|
| 79 |
+
| Task | Command |
|
| 80 |
+
|------|---------|
|
| 81 |
+
| Setup | `python3 -m venv venv && source venv/bin/activate && pip install -r requirements.txt` |
|
| 82 |
+
| Run | `source venv/bin/activate && python main.py` |
|
| 83 |
+
| Health | `curl http://localhost:8000/health` |
|
| 84 |
+
| Predict | `curl -X POST http://localhost:8000/predict -H "Content-Type: application/json" -d '{"text": "Hi \|\|\| Hi anh"}'` |
|
| 85 |
+
| Docs | Open `http://localhost:8000/docs` in browser |
|
| 86 |
+
|
| 87 |
+
## ⚠️ Important Notes
|
| 88 |
+
|
| 89 |
+
- **Python 3.13**: The project uses Python 3.13. If you have issues, ensure you're using the correct Python version.
|
| 90 |
+
- **First Request**: The model loads on the first `/predict` request. This takes 10-30 seconds.
|
| 91 |
+
- **Port 8000**: Make sure port 8000 is free. If not, modify `main.py` to use a different port.
|
| 92 |
+
- **Model Size**: The model is 256MB, which exceeds Vercel's free tier limits. Consider alternative hosting (see `DEPLOYMENT.md`).
|
| 93 |
+
|
README copy.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ML Text Classification API
|
| 2 |
+
|
| 3 |
+
A FastAPI-based REST API for multi-label text classification using DistilBERT.
|
| 4 |
+
|
| 5 |
+
## Quick Start
|
| 6 |
+
|
| 7 |
+
### Step 1: Setup
|
| 8 |
+
|
| 9 |
+
1. Create a virtual environment (recommended):
|
| 10 |
+
```bash
|
| 11 |
+
python3 -m venv venv
|
| 12 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
2. Install dependencies:
|
| 16 |
+
```bash
|
| 17 |
+
pip install -r requirements.txt
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### Step 2: Configure Google Drive Model
|
| 21 |
+
|
| 22 |
+
The model file (256MB) is stored on Google Drive. Configure it before first run:
|
| 23 |
+
|
| 24 |
+
**Option 1: Set environment variable**
|
| 25 |
+
```bash
|
| 26 |
+
export GDRIVE_MODEL_ID=YOUR_FILE_ID_HERE
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
**Option 2: Create `.env` file**
|
| 30 |
+
```bash
|
| 31 |
+
echo "GDRIVE_MODEL_ID=YOUR_FILE_ID_HERE" > .env
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
See [GOOGLE_DRIVE_SETUP.md](GOOGLE_DRIVE_SETUP.md) for detailed instructions on getting your Google Drive file ID.
|
| 35 |
+
|
| 36 |
+
3. Run the application:
|
| 37 |
+
```bash
|
| 38 |
+
python main.py
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
**Note:** If `pip` command not found, use `python3 -m pip install -r requirements.txt`
|
| 42 |
+
|
| 43 |
+
Or using uvicorn directly:
|
| 44 |
+
```bash
|
| 45 |
+
uvicorn main:app --reload --host 0.0.0.0 --port 8000
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## API Endpoints
|
| 49 |
+
|
| 50 |
+
### Root
|
| 51 |
+
- **GET** `/` - Get API information
|
| 52 |
+
|
| 53 |
+
### Health Check
|
| 54 |
+
- **GET** `/health` - Check API health status
|
| 55 |
+
|
| 56 |
+
### Prediction
|
| 57 |
+
- **POST** `/predict` - Classify text
|
| 58 |
+
|
| 59 |
+
Request body:
|
| 60 |
+
```json
|
| 61 |
+
{
|
| 62 |
+
"text": "Hi ||| Hi anh"
|
| 63 |
+
}
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
Response:
|
| 67 |
+
```json
|
| 68 |
+
{
|
| 69 |
+
"results": [
|
| 70 |
+
{
|
| 71 |
+
"label": "label_name_1",
|
| 72 |
+
"score": 0.85
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"label": "label_name_2",
|
| 76 |
+
"score": 0.65
|
| 77 |
+
}
|
| 78 |
+
]
|
| 79 |
+
}
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
## Testing
|
| 83 |
+
|
| 84 |
+
### Local Testing
|
| 85 |
+
|
| 86 |
+
Test the API using curl:
|
| 87 |
+
```bash
|
| 88 |
+
curl -X POST "http://localhost:8000/predict" \
|
| 89 |
+
-H "Content-Type: application/json" \
|
| 90 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
Or use the interactive docs at `http://localhost:8000/docs`
|
| 94 |
+
|
| 95 |
+
### Vercel Deployment
|
| 96 |
+
|
| 97 |
+
#### CURL Examples
|
| 98 |
+
|
| 99 |
+
After deploying to Vercel:
|
| 100 |
+
|
| 101 |
+
**1. Health Check:**
|
| 102 |
+
```bash
|
| 103 |
+
curl https://your-project.vercel.app/health
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
**2. Root Endpoint:**
|
| 107 |
+
```bash
|
| 108 |
+
curl https://your-project.vercel.app/
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
**3. Prediction Endpoint:**
|
| 112 |
+
```bash
|
| 113 |
+
curl -X POST "https://your-project.vercel.app/predict" \
|
| 114 |
+
-H "Content-Type: application/json" \
|
| 115 |
+
-d '{"text": "Hi ||| Hi anh"}'
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
**⚠️ Important:** Vercel has strict size limitations and **is NOT suitable for ML workloads** with PyTorch/Transformers models. Your deployment (~1.4GB) exceeds Vercel limits.
|
| 119 |
+
|
| 120 |
+
**Recommended alternatives:**
|
| 121 |
+
- **Railway.app** - Best for ML (easy deployment, Docker support)
|
| 122 |
+
- **Render.com** - Free tier available
|
| 123 |
+
- **Google Cloud Run** - Production-grade serverless
|
| 124 |
+
- See [VERCEL_ISSUES.md](VERCEL_ISSUES.md) for detailed deployment guide
|
| 125 |
+
|
VERCEL_ISSUES.md
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Vercel Deployment Issues & Solutions
|
| 2 |
+
|
| 3 |
+
## Error: "data is too long"
|
| 4 |
+
|
| 5 |
+
This error occurs when:
|
| 6 |
+
1. Model files are too large for Vercel's limits
|
| 7 |
+
2. Response data exceeds size limits
|
| 8 |
+
3. Cold start timeout issues
|
| 9 |
+
|
| 10 |
+
## Vercel Limitations
|
| 11 |
+
|
| 12 |
+
- **Function size**: 50MB (uncompressed)
|
| 13 |
+
- **Response body**: 4.5MB max
|
| 14 |
+
- **Cold start timeout**: 10s (free tier), 60s (pro)
|
| 15 |
+
- **Total deployment**: 100MB
|
| 16 |
+
|
| 17 |
+
## Your Current Situation
|
| 18 |
+
|
| 19 |
+
### Model Size Breakdown:
|
| 20 |
+
- PyTorch: ~800MB
|
| 21 |
+
- Transformers: ~200-300MB
|
| 22 |
+
- Your model: 256MB
|
| 23 |
+
- Other dependencies: ~100MB
|
| 24 |
+
- **Total: ~1.4GB** ❌
|
| 25 |
+
|
| 26 |
+
### This Exceeds ALL Vercel Limits!
|
| 27 |
+
|
| 28 |
+
## Solutions
|
| 29 |
+
|
| 30 |
+
### ❌ Option 1: Vercel (Not Recommended)
|
| 31 |
+
Vercel is **NOT suitable** for your ML workload.
|
| 32 |
+
|
| 33 |
+
### ✅ Option 2: Alternative Platforms (Recommended)
|
| 34 |
+
|
| 35 |
+
#### Option 2A: Railway.app
|
| 36 |
+
**Best for ML deployments**
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
# Install Railway CLI
|
| 40 |
+
npm i -g @railway/cli
|
| 41 |
+
|
| 42 |
+
# Login
|
| 43 |
+
railway login
|
| 44 |
+
|
| 45 |
+
# Deploy
|
| 46 |
+
railway up
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
**Why Railway:**
|
| 50 |
+
- Docker-based (your app runs in a container)
|
| 51 |
+
- No strict size limits
|
| 52 |
+
- Better for Python/ML workloads
|
| 53 |
+
- Free tier: $5 credit/month
|
| 54 |
+
- Auto-deploy from GitHub
|
| 55 |
+
|
| 56 |
+
#### Option 2B: Render.com
|
| 57 |
+
**Similar to Railway**
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
# Just push to GitHub
|
| 61 |
+
git push origin main
|
| 62 |
+
|
| 63 |
+
# Connect GitHub repo to Render
|
| 64 |
+
# Render auto-detects FastAPI
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
**Why Render:**
|
| 68 |
+
- Free tier available
|
| 69 |
+
- Docker support
|
| 70 |
+
- Easy GitHub integration
|
| 71 |
+
- Auto-scaling
|
| 72 |
+
|
| 73 |
+
#### Option 2C: Google Cloud Run
|
| 74 |
+
**Best for production ML**
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
# Create Dockerfile
|
| 78 |
+
# Build and push
|
| 79 |
+
gcloud run deploy lovebird-api \
|
| 80 |
+
--source . \
|
| 81 |
+
--region asia-southeast1 \
|
| 82 |
+
--allow-unauthenticated
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
**Why Cloud Run:**
|
| 86 |
+
- Serverless (like Vercel)
|
| 87 |
+
- Better ML support
|
| 88 |
+
- Auto-scaling to zero
|
| 89 |
+
- Pay per use
|
| 90 |
+
|
| 91 |
+
#### Option 2D: AWS Lambda + EFS/S3
|
| 92 |
+
**Enterprise-grade solution**
|
| 93 |
+
|
| 94 |
+
Pros:
|
| 95 |
+
- Model can be stored in S3
|
| 96 |
+
- Load into Lambda on cold start
|
| 97 |
+
- EFS for persistent storage
|
| 98 |
+
|
| 99 |
+
Cons:
|
| 100 |
+
- More complex setup
|
| 101 |
+
- Cold start still slow
|
| 102 |
+
|
| 103 |
+
### ✅ Option 3: Hybrid Approach
|
| 104 |
+
|
| 105 |
+
#### 3A: FastAPI on Vercel + Hugging Face Inference API
|
| 106 |
+
|
| 107 |
+
1. Deploy lightweight FastAPI to Vercel (no model)
|
| 108 |
+
2. Upload your model to Hugging Face
|
| 109 |
+
3. Call HF Inference API from FastAPI
|
| 110 |
+
|
| 111 |
+
**Pros:**
|
| 112 |
+
- Fast cold starts
|
| 113 |
+
- Automatic scaling
|
| 114 |
+
- Free tier available
|
| 115 |
+
- No model management
|
| 116 |
+
|
| 117 |
+
**Cons:**
|
| 118 |
+
- Requires hosting model elsewhere
|
| 119 |
+
- Network latency
|
| 120 |
+
- May have costs
|
| 121 |
+
|
| 122 |
+
#### 3B: Split Architecture
|
| 123 |
+
|
| 124 |
+
```
|
| 125 |
+
FastAPI (Vercel)
|
| 126 |
+
↓ calls →
|
| 127 |
+
ML Service (Railway/Render)
|
| 128 |
+
↓ hosts →
|
| 129 |
+
Your Model
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
**Architecture:**
|
| 133 |
+
- Vercel: FastAPI endpoints (lightweight)
|
| 134 |
+
- Railway/Render: ML inference service
|
| 135 |
+
- Keep code in sync via API calls
|
| 136 |
+
|
| 137 |
+
## Quick Comparison
|
| 138 |
+
|
| 139 |
+
| Platform | Suitable? | Cost | Setup | Performance |
|
| 140 |
+
|----------|-----------|------|-------|-------------|
|
| 141 |
+
| Vercel | ❌ No | Free/Paid | Easy | Too slow |
|
| 142 |
+
| Railway | ✅ Yes | $5/mo | Easy | Fast |
|
| 143 |
+
| Render | ✅ Yes | Free | Easy | Fast |
|
| 144 |
+
| Cloud Run | ✅ Yes | Pay-per-use | Medium | Fast |
|
| 145 |
+
| Lambda | ⚠️ Complex | Pay-per-use | Hard | Cold start |
|
| 146 |
+
| HF API | ✅ Yes | Free/Paid | Easy | Network latency |
|
| 147 |
+
|
| 148 |
+
## Recommended Next Steps
|
| 149 |
+
|
| 150 |
+
### For Testing: Render.com
|
| 151 |
+
1. Push code to GitHub
|
| 152 |
+
2. Sign up at render.com
|
| 153 |
+
3. Create "Web Service"
|
| 154 |
+
4. Connect GitHub repo
|
| 155 |
+
5. Deploy (free tier works)
|
| 156 |
+
|
| 157 |
+
### For Production: Railway.app
|
| 158 |
+
1. Install Railway CLI
|
| 159 |
+
2. `railway login`
|
| 160 |
+
3. `railway init`
|
| 161 |
+
4. `railway up`
|
| 162 |
+
5. Done!
|
| 163 |
+
|
| 164 |
+
### For Enterprise: Google Cloud Run
|
| 165 |
+
1. Create Dockerfile
|
| 166 |
+
2. Build container
|
| 167 |
+
3. Deploy to Cloud Run
|
| 168 |
+
4. Auto-scales based on traffic
|
| 169 |
+
|
| 170 |
+
## Migration Guide
|
| 171 |
+
|
| 172 |
+
### From Vercel to Railway
|
| 173 |
+
|
| 174 |
+
1. **Keep your code**: No changes needed
|
| 175 |
+
2. **Add Dockerfile** (optional, Railway auto-detects):
|
| 176 |
+
```dockerfile
|
| 177 |
+
FROM python:3.13-slim
|
| 178 |
+
|
| 179 |
+
WORKDIR /app
|
| 180 |
+
COPY requirements.txt .
|
| 181 |
+
RUN pip install -r requirements.txt
|
| 182 |
+
|
| 183 |
+
COPY . .
|
| 184 |
+
|
| 185 |
+
CMD ["python", "main.py"]
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
3. **Deploy**: `railway up`
|
| 189 |
+
4. **Set env vars**: Railway dashboard
|
| 190 |
+
|
| 191 |
+
### From Vercel to Render
|
| 192 |
+
|
| 193 |
+
1. Push to GitHub
|
| 194 |
+
2. Create Render account
|
| 195 |
+
3. New → Web Service
|
| 196 |
+
4. Connect GitHub repo
|
| 197 |
+
5. Set environment variables
|
| 198 |
+
6. Deploy
|
| 199 |
+
|
| 200 |
+
## Environment Variables Setup
|
| 201 |
+
|
| 202 |
+
On any platform, make sure to set:
|
| 203 |
+
|
| 204 |
+
```bash
|
| 205 |
+
GDRIVE_MODEL_ID=YOUR_FILE_ID_HERE
|
| 206 |
+
```
|
| 207 |
+
|
| 208 |
+
## Model Loading Strategy
|
| 209 |
+
|
| 210 |
+
### Current (Works on Railway/Render):
|
| 211 |
+
```python
|
| 212 |
+
# Downloads model on first request
|
| 213 |
+
# Caches in memory for subsequent requests
|
| 214 |
+
# Cold start: 30-60s (first request)
|
| 215 |
+
# Warm: < 1s (subsequent requests)
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
### Optimization Options:
|
| 219 |
+
|
| 220 |
+
#### 1. Pre-load Model on Startup
|
| 221 |
+
```python
|
| 222 |
+
# In main.py
|
| 223 |
+
@app.on_event("startup")
|
| 224 |
+
async def startup_event():
|
| 225 |
+
get_ml_service() # Load model immediately
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
#### 2. Use Model Caching Layer (Redis/Memcached)
|
| 229 |
+
```python
|
| 230 |
+
# Store model in Redis between requests
|
| 231 |
+
# Reduces cold starts
|
| 232 |
+
```
|
| 233 |
+
|
| 234 |
+
#### 3. Keep Container Warm
|
| 235 |
+
```python
|
| 236 |
+
# Set up health checks that keep container alive
|
| 237 |
+
# Prevents cold starts
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## Monitoring & Debugging
|
| 241 |
+
|
| 242 |
+
### Check Deployment Logs
|
| 243 |
+
```bash
|
| 244 |
+
# Railway
|
| 245 |
+
railway logs
|
| 246 |
+
|
| 247 |
+
# Render
|
| 248 |
+
# Dashboard → Logs tab
|
| 249 |
+
|
| 250 |
+
# Vercel
|
| 251 |
+
vercel logs
|
| 252 |
+
```
|
| 253 |
+
|
| 254 |
+
### Check Model Loading
|
| 255 |
+
Look for these in logs:
|
| 256 |
+
```
|
| 257 |
+
Downloading model from Google Drive...
|
| 258 |
+
Model downloaded successfully
|
| 259 |
+
Device set to use...
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
### Common Issues
|
| 263 |
+
|
| 264 |
+
#### "Model not found"
|
| 265 |
+
- Check GDRIVE_MODEL_ID is set
|
| 266 |
+
- Verify file sharing permissions
|
| 267 |
+
- Check internet access in container
|
| 268 |
+
|
| 269 |
+
#### "Timeout on cold start"
|
| 270 |
+
- Normal! Cold start takes 30-60s
|
| 271 |
+
- Use health checks to keep warm
|
| 272 |
+
- Or upgrade tier for faster starts
|
| 273 |
+
|
| 274 |
+
#### "Out of memory"
|
| 275 |
+
- Model too large for tier
|
| 276 |
+
- Upgrade to higher tier
|
| 277 |
+
- Or use HF Inference API instead
|
| 278 |
+
|
| 279 |
+
## Summary
|
| 280 |
+
|
| 281 |
+
**Don't use Vercel for this project.** It's designed for static sites and small serverless functions, not ML workloads.
|
| 282 |
+
|
| 283 |
+
**Use Railway or Render** for easy deployment with your current code.
|
| 284 |
+
|
| 285 |
+
**Consider Hugging Face Inference API** for production scale without managing infrastructure.
|
| 286 |
+
|
__pycache__/app.cpython-313.pyc
ADDED
|
Binary file (331 Bytes). View file
|
|
|
__pycache__/main.cpython-313.pyc
ADDED
|
Binary file (2.87 kB). View file
|
|
|
__pycache__/ml_service.cpython-313.pyc
ADDED
|
Binary file (4.42 kB). View file
|
|
|
__pycache__/schemas.cpython-313.pyc
ADDED
|
Binary file (1.45 kB). View file
|
|
|
api/index.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vercel serverless entry point for FastAPI.
|
| 3 |
+
This is a wrapper that exports the FastAPI app.
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Add parent directory to path to import our modules
|
| 9 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 10 |
+
|
| 11 |
+
from main import app
|
| 12 |
+
|
| 13 |
+
# Export app for Vercel
|
| 14 |
+
__all__ = ["app"]
|
| 15 |
+
|
main.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from ml_service import get_ml_service
|
| 4 |
+
from schemas import PredictionRequest, PredictionResponse, PredictionItem
|
| 5 |
+
import asyncio
|
| 6 |
+
|
| 7 |
+
# Initialize FastAPI app
|
| 8 |
+
app = FastAPI(
|
| 9 |
+
title="ML Text Classification API",
|
| 10 |
+
description="API for multi-label text classification using DistilBERT",
|
| 11 |
+
version="1.0.0"
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# Configure CORS
|
| 15 |
+
app.add_middleware(
|
| 16 |
+
CORSMiddleware,
|
| 17 |
+
allow_origins=["*"],
|
| 18 |
+
allow_credentials=True,
|
| 19 |
+
allow_methods=["*"],
|
| 20 |
+
allow_headers=["*"],
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@app.get("/")
|
| 25 |
+
async def root():
|
| 26 |
+
"""Root endpoint."""
|
| 27 |
+
return {
|
| 28 |
+
"message": "ML Text Classification API",
|
| 29 |
+
"version": "1.0.0",
|
| 30 |
+
"endpoints": {
|
| 31 |
+
"health": "/health",
|
| 32 |
+
"predict": "/predict"
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@app.get("/health")
|
| 38 |
+
async def health_check():
|
| 39 |
+
"""Health check endpoint."""
|
| 40 |
+
return {"status": "healthy"}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@app.post("/predict", response_model=PredictionResponse)
|
| 44 |
+
async def predict(prediction_request: PredictionRequest):
|
| 45 |
+
"""
|
| 46 |
+
Predict labels for the given text.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
prediction_request: Request containing the text to classify
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
PredictionResponse with classification results
|
| 53 |
+
"""
|
| 54 |
+
if not prediction_request.text:
|
| 55 |
+
raise HTTPException(
|
| 56 |
+
status_code=400,
|
| 57 |
+
detail="Text field is required and cannot be empty"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
# Get ML service and predict in executor to avoid blocking
|
| 62 |
+
ml_service = get_ml_service()
|
| 63 |
+
|
| 64 |
+
# Run blocking ML inference in thread pool
|
| 65 |
+
loop = asyncio.get_event_loop()
|
| 66 |
+
predictions = await loop.run_in_executor(
|
| 67 |
+
None, # Use default executor
|
| 68 |
+
ml_service.predict,
|
| 69 |
+
prediction_request.text
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Convert to Pydantic models
|
| 73 |
+
results = [
|
| 74 |
+
PredictionItem(label=item['label'], score=item['score'])
|
| 75 |
+
for item in predictions
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
return PredictionResponse(results=results)
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
raise HTTPException(
|
| 82 |
+
status_code=500,
|
| 83 |
+
detail=f"Prediction error: {str(e)}"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
import uvicorn
|
| 89 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 90 |
+
|
ml_service.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import os
|
| 3 |
+
import gdown
|
| 4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class MLInferenceService:
|
| 9 |
+
"""Service for loading and running ML model inference."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, model_dir: str = "./model", gdrive_file_id: str = None):
|
| 12 |
+
self.model_dir = model_dir
|
| 13 |
+
self.gdrive_file_id = gdrive_file_id or os.getenv("GDRIVE_MODEL_ID")
|
| 14 |
+
self.model = None
|
| 15 |
+
self.tokenizer = None
|
| 16 |
+
self.clf = None
|
| 17 |
+
self.label_names = []
|
| 18 |
+
|
| 19 |
+
def load_model(self):
|
| 20 |
+
"""Load the model, tokenizer, and label names."""
|
| 21 |
+
if self.model is not None:
|
| 22 |
+
return
|
| 23 |
+
|
| 24 |
+
# If Google Drive ID provided, download model file
|
| 25 |
+
if self.gdrive_file_id:
|
| 26 |
+
self._download_from_gdrive()
|
| 27 |
+
|
| 28 |
+
# Load model and tokenizer
|
| 29 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_dir)
|
| 30 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_dir)
|
| 31 |
+
|
| 32 |
+
# Load MultiLabelBinarizer to get label names
|
| 33 |
+
with open(f"{self.model_dir}/mlb.pkl", "rb") as f:
|
| 34 |
+
mlb = pickle.load(f)
|
| 35 |
+
self.label_names = list(mlb.classes_)
|
| 36 |
+
|
| 37 |
+
# Create pipeline for inference
|
| 38 |
+
self.clf = pipeline(
|
| 39 |
+
"text-classification",
|
| 40 |
+
model=self.model,
|
| 41 |
+
tokenizer=self.tokenizer,
|
| 42 |
+
return_all_scores=True
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def _download_from_gdrive(self):
|
| 46 |
+
"""Download model.safetensors from Google Drive if not exists."""
|
| 47 |
+
model_path = f"{self.model_dir}/model.safetensors"
|
| 48 |
+
|
| 49 |
+
# Skip download if file already exists
|
| 50 |
+
if os.path.exists(model_path):
|
| 51 |
+
return
|
| 52 |
+
|
| 53 |
+
# Ensure model directory exists
|
| 54 |
+
os.makedirs(self.model_dir, exist_ok=True)
|
| 55 |
+
|
| 56 |
+
# Download from Google Drive
|
| 57 |
+
print(f"Downloading model from Google Drive...")
|
| 58 |
+
gdrive_url = f"https://drive.google.com/uc?id={self.gdrive_file_id}"
|
| 59 |
+
gdown.download(gdrive_url, model_path, quiet=False)
|
| 60 |
+
print(f"Model downloaded successfully to {model_path}")
|
| 61 |
+
|
| 62 |
+
def predict(self, text: str) -> List[Dict[str, Any]]:
|
| 63 |
+
"""
|
| 64 |
+
Predict labels for the given text.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
text: Input text to classify
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
List of dictionaries with 'label' and 'score' keys
|
| 71 |
+
"""
|
| 72 |
+
if self.clf is None:
|
| 73 |
+
raise RuntimeError("Model not loaded. Call load_model() first.")
|
| 74 |
+
|
| 75 |
+
# Process text: replace ||| with [SEP]
|
| 76 |
+
processed_text = text.replace('|||', '[SEP]')
|
| 77 |
+
|
| 78 |
+
# Get predictions
|
| 79 |
+
result = self.clf(processed_text)
|
| 80 |
+
|
| 81 |
+
# Map label indices to label names and filter by score >= 0.5
|
| 82 |
+
output = [
|
| 83 |
+
{'label': self.label_names[i], 'score': item['score']}
|
| 84 |
+
for i, item in enumerate(result[0])
|
| 85 |
+
if item['score'] >= 0.5
|
| 86 |
+
]
|
| 87 |
+
|
| 88 |
+
return output
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# Global singleton instance
|
| 92 |
+
_ml_service = None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def get_ml_service() -> MLInferenceService:
|
| 96 |
+
"""Get or create the global ML service instance."""
|
| 97 |
+
global _ml_service
|
| 98 |
+
if _ml_service is None:
|
| 99 |
+
_ml_service = MLInferenceService()
|
| 100 |
+
_ml_service.load_model()
|
| 101 |
+
return _ml_service
|
| 102 |
+
|
model/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
model/config.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation": "gelu",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DistilBertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"dim": 768,
|
| 8 |
+
"dropout": 0.1,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "LABEL_0",
|
| 13 |
+
"1": "LABEL_1",
|
| 14 |
+
"2": "LABEL_2",
|
| 15 |
+
"3": "LABEL_3",
|
| 16 |
+
"4": "LABEL_4",
|
| 17 |
+
"5": "LABEL_5",
|
| 18 |
+
"6": "LABEL_6",
|
| 19 |
+
"7": "LABEL_7",
|
| 20 |
+
"8": "LABEL_8",
|
| 21 |
+
"9": "LABEL_9",
|
| 22 |
+
"10": "LABEL_10",
|
| 23 |
+
"11": "LABEL_11",
|
| 24 |
+
"12": "LABEL_12",
|
| 25 |
+
"13": "LABEL_13",
|
| 26 |
+
"14": "LABEL_14",
|
| 27 |
+
"15": "LABEL_15",
|
| 28 |
+
"16": "LABEL_16",
|
| 29 |
+
"17": "LABEL_17",
|
| 30 |
+
"18": "LABEL_18",
|
| 31 |
+
"19": "LABEL_19",
|
| 32 |
+
"20": "LABEL_20",
|
| 33 |
+
"21": "LABEL_21",
|
| 34 |
+
"22": "LABEL_22",
|
| 35 |
+
"23": "LABEL_23",
|
| 36 |
+
"24": "LABEL_24",
|
| 37 |
+
"25": "LABEL_25"
|
| 38 |
+
},
|
| 39 |
+
"initializer_range": 0.02,
|
| 40 |
+
"label2id": {
|
| 41 |
+
"LABEL_0": 0,
|
| 42 |
+
"LABEL_1": 1,
|
| 43 |
+
"LABEL_10": 10,
|
| 44 |
+
"LABEL_11": 11,
|
| 45 |
+
"LABEL_12": 12,
|
| 46 |
+
"LABEL_13": 13,
|
| 47 |
+
"LABEL_14": 14,
|
| 48 |
+
"LABEL_15": 15,
|
| 49 |
+
"LABEL_16": 16,
|
| 50 |
+
"LABEL_17": 17,
|
| 51 |
+
"LABEL_18": 18,
|
| 52 |
+
"LABEL_19": 19,
|
| 53 |
+
"LABEL_2": 2,
|
| 54 |
+
"LABEL_20": 20,
|
| 55 |
+
"LABEL_21": 21,
|
| 56 |
+
"LABEL_22": 22,
|
| 57 |
+
"LABEL_23": 23,
|
| 58 |
+
"LABEL_24": 24,
|
| 59 |
+
"LABEL_25": 25,
|
| 60 |
+
"LABEL_3": 3,
|
| 61 |
+
"LABEL_4": 4,
|
| 62 |
+
"LABEL_5": 5,
|
| 63 |
+
"LABEL_6": 6,
|
| 64 |
+
"LABEL_7": 7,
|
| 65 |
+
"LABEL_8": 8,
|
| 66 |
+
"LABEL_9": 9
|
| 67 |
+
},
|
| 68 |
+
"max_position_embeddings": 512,
|
| 69 |
+
"model_type": "distilbert",
|
| 70 |
+
"n_heads": 12,
|
| 71 |
+
"n_layers": 6,
|
| 72 |
+
"pad_token_id": 0,
|
| 73 |
+
"problem_type": "multi_label_classification",
|
| 74 |
+
"qa_dropout": 0.1,
|
| 75 |
+
"seq_classif_dropout": 0.2,
|
| 76 |
+
"sinusoidal_pos_embds": false,
|
| 77 |
+
"tie_weights_": true,
|
| 78 |
+
"transformers_version": "4.57.1",
|
| 79 |
+
"vocab_size": 30522
|
| 80 |
+
}
|
model/mlb.pkl
ADDED
|
Binary file (957 Bytes). View file
|
|
|
model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
model/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
pydantic
|
| 4 |
+
transformers
|
| 5 |
+
torch
|
| 6 |
+
scikit-learn
|
| 7 |
+
numpy
|
| 8 |
+
safetensors
|
| 9 |
+
gdown
|
| 10 |
+
|
schemas.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class PredictionItem(BaseModel):
|
| 6 |
+
"""Schema for a single prediction result."""
|
| 7 |
+
label: str = Field(..., description="Label name")
|
| 8 |
+
score: float = Field(..., description="Prediction score/confidence")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class PredictionRequest(BaseModel):
|
| 12 |
+
"""Schema for prediction request."""
|
| 13 |
+
text: str = Field(..., description="Input text to classify", min_length=1)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class PredictionResponse(BaseModel):
|
| 17 |
+
"""Schema for prediction response."""
|
| 18 |
+
results: List[PredictionItem] = Field(..., description="List of predictions")
|
| 19 |
+
|
start.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Kill any existing process on port 8000
|
| 4 |
+
echo "Checking for existing server on port 8000..."
|
| 5 |
+
EXISTING_PID=$(lsof -ti:8000)
|
| 6 |
+
if [ ! -z "$EXISTING_PID" ]; then
|
| 7 |
+
echo "Killing existing process $EXISTING_PID"
|
| 8 |
+
kill -9 $EXISTING_PID 2>/dev/null
|
| 9 |
+
sleep 2
|
| 10 |
+
fi
|
| 11 |
+
|
| 12 |
+
# Activate virtual environment and start server
|
| 13 |
+
echo "Starting server..."
|
| 14 |
+
source venv/bin/activate
|
| 15 |
+
python main.py
|
| 16 |
+
|
vercel.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"builds": [
|
| 3 |
+
{
|
| 4 |
+
"src": "api/index.py",
|
| 5 |
+
"use": "@vercel/python"
|
| 6 |
+
}
|
| 7 |
+
],
|
| 8 |
+
"routes": [
|
| 9 |
+
{
|
| 10 |
+
"src": "/(.*)",
|
| 11 |
+
"dest": "/api/index.py"
|
| 12 |
+
}
|
| 13 |
+
]
|
| 14 |
+
}
|