Spaces:
Runtime error
Runtime error
Commit ·
949a823
1
Parent(s): 25cbe01
Deployment of FastAPI model service
Browse files- .gitattributes +1 -35
- Dockerfile +0 -0
- api_client.py +15 -0
- main.py +153 -0
- preprocessing.py +16 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
|
@@ -1,35 +1 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
backend/src/models/*.pth filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
ADDED
|
File without changes
|
api_client.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model-fastapi/api_client.py
|
| 2 |
+
import tweepy
|
| 3 |
+
import os # Import modul os untuk mengakses variabel lingkungan
|
| 4 |
+
|
| 5 |
+
# Dapatkan token dari variabel lingkungan TWITTER_BEARER_TOKEN
|
| 6 |
+
# Jika token tidak ditemukan, client tidak akan terinisialisasi
|
| 7 |
+
BEARER_TOKEN = os.environ.get("AAAAAAAAAAAAAAAAAAAAAOGp3AEAAAAAMEaOafsh1pNGVFrK%2BN2atq0Cba4%3DE2Gw0MDFfJ1bE4veBIIxhOUqbaqQKOqRxMhGybH4FfOETDNpow")
|
| 8 |
+
|
| 9 |
+
if not BEARER_TOKEN:
|
| 10 |
+
raise ValueError("TWITTER_BEARER_TOKEN tidak ditemukan di environment variables.")
|
| 11 |
+
|
| 12 |
+
client = tweepy.Client(bearer_token=BEARER_TOKEN)
|
| 13 |
+
|
| 14 |
+
def get_tweets_by_username(username: str, count: int = 100):
|
| 15 |
+
pass
|
main.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Dict, Optional, List
|
| 4 |
+
import numpy as np # Untuk operasi np.argmax
|
| 5 |
+
import os # Untuk os.environ (Bearer Token)
|
| 6 |
+
|
| 7 |
+
# Pustaka ML
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
# Import dari file lokal
|
| 12 |
+
from .api_client import get_tweets_by_username
|
| 13 |
+
from .preprocessing import preprocess_text
|
| 14 |
+
|
| 15 |
+
app = FastAPI()
|
| 16 |
+
|
| 17 |
+
# --- KONFIGURASI PATH ---
|
| 18 |
+
MODEL_DIR = "./model_assets"
|
| 19 |
+
WEIGHTS_FILE = "./model_assets/best_indobertweet.pth"
|
| 20 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 21 |
+
|
| 22 |
+
# --- PEMUATAN MODEL ---
|
| 23 |
+
# Logika 3 Tahap: 1. Muat Tokenizer, 2. Muat Struktur Model, 3. Muat Bobot .pth
|
| 24 |
+
try:
|
| 25 |
+
print("Mencoba memuat model...")
|
| 26 |
+
# 1. Muat Tokenizer & Struktur
|
| 27 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
|
| 28 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR, num_labels=2)
|
| 29 |
+
|
| 30 |
+
# 2. Muat Bobot .pth
|
| 31 |
+
# Setting map_location ke 'cpu' memastikan bisa dimuat bahkan jika server tidak punya GPU
|
| 32 |
+
state_dict = torch.load(WEIGHTS_FILE, map_location=DEVICE)
|
| 33 |
+
model.load_state_dict(state_dict)
|
| 34 |
+
|
| 35 |
+
# 3. Finalisasi
|
| 36 |
+
model.to(DEVICE)
|
| 37 |
+
model.eval()
|
| 38 |
+
print(f"Model berhasil dimuat ke device: {DEVICE}")
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"FATAL ERROR: Gagal memuat model. Pastikan file di {MODEL_DIR} sudah benar.")
|
| 42 |
+
print(e)
|
| 43 |
+
# Anda bisa memilih untuk raise error di sini untuk menghentikan server jika model gagal dimuat.
|
| 44 |
+
|
| 45 |
+
# --- DEFINISI PYDANTIC MODELS ---
|
| 46 |
+
|
| 47 |
+
# 1. Skema Permintaan (DATA YANG DITERIMA DARI Express.js)
|
| 48 |
+
class StressRequest(BaseModel):
|
| 49 |
+
x_username: str
|
| 50 |
+
tweet_count: int = 100
|
| 51 |
+
|
| 52 |
+
# 2. Skema Data Hasil (DATA YANG DISIMPAN KE DB & DIKIRIM KE Frontend)
|
| 53 |
+
class ResultData(BaseModel):
|
| 54 |
+
x_username: str
|
| 55 |
+
total_tweets: int
|
| 56 |
+
stress_level: int # Skor 0-100 (Probabilitas positif dikalikan 100)
|
| 57 |
+
keywords: Dict[str, float] # Contoh Placeholder: Tren Kata
|
| 58 |
+
stress_status: int # 0: Aman, 1: Rendah, 2: Sedang, 3: Tinggi
|
| 59 |
+
|
| 60 |
+
# 3. Skema Respons API Akhir
|
| 61 |
+
class APIResponse(BaseModel):
|
| 62 |
+
message: str
|
| 63 |
+
data: Optional[ResultData]
|
| 64 |
+
|
| 65 |
+
# --- UTILITY FUNCTIONS ---
|
| 66 |
+
|
| 67 |
+
def calculate_stress_status(stress_level: float) -> int:
|
| 68 |
+
"""Mengkonversi skor probabilitas (0-100) menjadi status diskrit."""
|
| 69 |
+
if stress_level >= 75:
|
| 70 |
+
return 3 # Tinggi
|
| 71 |
+
elif stress_level >= 50:
|
| 72 |
+
return 2 # Sedang
|
| 73 |
+
elif stress_level >= 25:
|
| 74 |
+
return 1 # Rendah
|
| 75 |
+
else:
|
| 76 |
+
return 0 # Aman
|
| 77 |
+
|
| 78 |
+
# --- ENDPOINT UTAMA ---
|
| 79 |
+
|
| 80 |
+
@app.post("/api/predict_stress", response_model=APIResponse)
|
| 81 |
+
def predict_stress(request: StressRequest):
|
| 82 |
+
username = request.x_username
|
| 83 |
+
tweet_count = request.tweet_count
|
| 84 |
+
|
| 85 |
+
# Cek Bearer Token sebelum memanggil API Twitter
|
| 86 |
+
if not os.environ.get("TWITTER_BEARER_TOKEN"):
|
| 87 |
+
return APIResponse(
|
| 88 |
+
message="Error: TWITTER_BEARER_TOKEN tidak diatur sebagai environment variable.",
|
| 89 |
+
data=None
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# 1. Ambil Tweet
|
| 93 |
+
raw_tweets = get_tweets_by_username(username, tweet_count)
|
| 94 |
+
|
| 95 |
+
if not raw_tweets:
|
| 96 |
+
return APIResponse(
|
| 97 |
+
message=f"Gagal mengambil tweet dari @{username}. Akun mungkin private atau tidak ditemukan.",
|
| 98 |
+
data=None
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# 2. Pre-processing dan Inferensi
|
| 102 |
+
cleaned_texts = [preprocess_text(t) for t in raw_tweets]
|
| 103 |
+
|
| 104 |
+
# Inisialisasi list untuk menyimpan probabilitas stress (kelas 1)
|
| 105 |
+
stress_probabilities = []
|
| 106 |
+
|
| 107 |
+
with torch.no_grad():
|
| 108 |
+
for text in cleaned_texts:
|
| 109 |
+
# Tokenisasi
|
| 110 |
+
enc = tokenizer(
|
| 111 |
+
text,
|
| 112 |
+
truncation=True,
|
| 113 |
+
padding="max_length",
|
| 114 |
+
max_length=128, # Konsisten dengan training
|
| 115 |
+
return_tensors="pt"
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# Pindahkan ke device dan inferensi
|
| 119 |
+
input_ids = enc["input_ids"].to(DEVICE)
|
| 120 |
+
attention_mask = enc["attention_mask"].to(DEVICE)
|
| 121 |
+
|
| 122 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
| 123 |
+
logits = outputs.logits
|
| 124 |
+
|
| 125 |
+
# Ambil probabilitas untuk kelas 1 (Stress)
|
| 126 |
+
probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
|
| 127 |
+
stress_probabilities.append(probs[1])
|
| 128 |
+
|
| 129 |
+
# 3. Agregasi Hasil
|
| 130 |
+
if not stress_probabilities:
|
| 131 |
+
# Jika ada tweets tapi semuanya kosong setelah pre-processing
|
| 132 |
+
avg_stress_score = 0
|
| 133 |
+
else:
|
| 134 |
+
# Hitung rata-rata probabilitas stres dari semua tweet (skor 0.0 - 1.0)
|
| 135 |
+
avg_stress_score = np.mean(stress_probabilities)
|
| 136 |
+
|
| 137 |
+
# Konversi ke skala 0-100
|
| 138 |
+
stress_level_100 = int(round(avg_stress_score * 100))
|
| 139 |
+
status = calculate_stress_status(stress_level_100)
|
| 140 |
+
|
| 141 |
+
# 4. Susun Respons
|
| 142 |
+
result_data = ResultData(
|
| 143 |
+
x_username=username,
|
| 144 |
+
total_tweets=len(raw_tweets),
|
| 145 |
+
stress_level=stress_level_100,
|
| 146 |
+
keywords={"placeholder": 0.0}, # Implementasi penambangan keyword akan dilakukan belakangan
|
| 147 |
+
stress_status=status
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
return APIResponse(
|
| 151 |
+
message=f"Analisis stres untuk @{username} berhasil. Ditemukan {result_data.total_tweets} tweets.",
|
| 152 |
+
data=result_data
|
| 153 |
+
)
|
preprocessing.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model-fastapi/preprocessing.py
|
| 2 |
+
import re
|
| 3 |
+
import emoji
|
| 4 |
+
|
| 5 |
+
def clean_text(t):
|
| 6 |
+
t = str(t).lower()
|
| 7 |
+
t = re.sub(r"http\S+|www\.\S+", "", t)
|
| 8 |
+
t = re.sub(r"@\w+", "", t)
|
| 9 |
+
t = re.sub(r"#(\w+)", r"\1", t)
|
| 10 |
+
t = re.sub(r"\s+", " ", t).strip()
|
| 11 |
+
# Jika Anda menggunakan emoji.replace_emoji() di Colab, pastikan versi Python Anda mendukungnya, atau gunakan pustaka yang diimpor.
|
| 12 |
+
# Contoh: t = emoji.replace_emoji(t, replace="")
|
| 13 |
+
return t
|
| 14 |
+
|
| 15 |
+
def preprocess_text(text: str) -> str:
|
| 16 |
+
return clean_text(text)
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
python-multipart
|
| 4 |
+
transformers
|
| 5 |
+
torch
|
| 6 |
+
pandas
|
| 7 |
+
tweepy
|
| 8 |
+
emoji
|