gaidasalsaa commited on
Commit
949a823
·
1 Parent(s): 25cbe01

Deployment of FastAPI model service

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -35
  2. Dockerfile +0 -0
  3. api_client.py +15 -0
  4. main.py +153 -0
  5. preprocessing.py +16 -0
  6. requirements.txt +8 -0
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ backend/src/models/*.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
File without changes
api_client.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model-fastapi/api_client.py
2
+ import tweepy
3
+ import os # Import modul os untuk mengakses variabel lingkungan
4
+
5
+ # Dapatkan token dari variabel lingkungan TWITTER_BEARER_TOKEN
6
+ # Jika token tidak ditemukan, client tidak akan terinisialisasi
7
+ BEARER_TOKEN = os.environ.get("AAAAAAAAAAAAAAAAAAAAAOGp3AEAAAAAMEaOafsh1pNGVFrK%2BN2atq0Cba4%3DE2Gw0MDFfJ1bE4veBIIxhOUqbaqQKOqRxMhGybH4FfOETDNpow")
8
+
9
+ if not BEARER_TOKEN:
10
+ raise ValueError("TWITTER_BEARER_TOKEN tidak ditemukan di environment variables.")
11
+
12
+ client = tweepy.Client(bearer_token=BEARER_TOKEN)
13
+
14
+ def get_tweets_by_username(username: str, count: int = 100):
15
+ pass
main.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from typing import Dict, Optional, List
4
+ import numpy as np # Untuk operasi np.argmax
5
+ import os # Untuk os.environ (Bearer Token)
6
+
7
+ # Pustaka ML
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ import torch
10
+
11
+ # Import dari file lokal
12
+ from .api_client import get_tweets_by_username
13
+ from .preprocessing import preprocess_text
14
+
15
+ app = FastAPI()
16
+
17
+ # --- KONFIGURASI PATH ---
18
+ MODEL_DIR = "./model_assets"
19
+ WEIGHTS_FILE = "./model_assets/best_indobertweet.pth"
20
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
+
22
+ # --- PEMUATAN MODEL ---
23
+ # Logika 3 Tahap: 1. Muat Tokenizer, 2. Muat Struktur Model, 3. Muat Bobot .pth
24
+ try:
25
+ print("Mencoba memuat model...")
26
+ # 1. Muat Tokenizer & Struktur
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
28
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR, num_labels=2)
29
+
30
+ # 2. Muat Bobot .pth
31
+ # Setting map_location ke 'cpu' memastikan bisa dimuat bahkan jika server tidak punya GPU
32
+ state_dict = torch.load(WEIGHTS_FILE, map_location=DEVICE)
33
+ model.load_state_dict(state_dict)
34
+
35
+ # 3. Finalisasi
36
+ model.to(DEVICE)
37
+ model.eval()
38
+ print(f"Model berhasil dimuat ke device: {DEVICE}")
39
+
40
+ except Exception as e:
41
+ print(f"FATAL ERROR: Gagal memuat model. Pastikan file di {MODEL_DIR} sudah benar.")
42
+ print(e)
43
+ # Anda bisa memilih untuk raise error di sini untuk menghentikan server jika model gagal dimuat.
44
+
45
+ # --- DEFINISI PYDANTIC MODELS ---
46
+
47
+ # 1. Skema Permintaan (DATA YANG DITERIMA DARI Express.js)
48
+ class StressRequest(BaseModel):
49
+ x_username: str
50
+ tweet_count: int = 100
51
+
52
+ # 2. Skema Data Hasil (DATA YANG DISIMPAN KE DB & DIKIRIM KE Frontend)
53
+ class ResultData(BaseModel):
54
+ x_username: str
55
+ total_tweets: int
56
+ stress_level: int # Skor 0-100 (Probabilitas positif dikalikan 100)
57
+ keywords: Dict[str, float] # Contoh Placeholder: Tren Kata
58
+ stress_status: int # 0: Aman, 1: Rendah, 2: Sedang, 3: Tinggi
59
+
60
+ # 3. Skema Respons API Akhir
61
+ class APIResponse(BaseModel):
62
+ message: str
63
+ data: Optional[ResultData]
64
+
65
+ # --- UTILITY FUNCTIONS ---
66
+
67
+ def calculate_stress_status(stress_level: float) -> int:
68
+ """Mengkonversi skor probabilitas (0-100) menjadi status diskrit."""
69
+ if stress_level >= 75:
70
+ return 3 # Tinggi
71
+ elif stress_level >= 50:
72
+ return 2 # Sedang
73
+ elif stress_level >= 25:
74
+ return 1 # Rendah
75
+ else:
76
+ return 0 # Aman
77
+
78
+ # --- ENDPOINT UTAMA ---
79
+
80
+ @app.post("/api/predict_stress", response_model=APIResponse)
81
+ def predict_stress(request: StressRequest):
82
+ username = request.x_username
83
+ tweet_count = request.tweet_count
84
+
85
+ # Cek Bearer Token sebelum memanggil API Twitter
86
+ if not os.environ.get("TWITTER_BEARER_TOKEN"):
87
+ return APIResponse(
88
+ message="Error: TWITTER_BEARER_TOKEN tidak diatur sebagai environment variable.",
89
+ data=None
90
+ )
91
+
92
+ # 1. Ambil Tweet
93
+ raw_tweets = get_tweets_by_username(username, tweet_count)
94
+
95
+ if not raw_tweets:
96
+ return APIResponse(
97
+ message=f"Gagal mengambil tweet dari @{username}. Akun mungkin private atau tidak ditemukan.",
98
+ data=None
99
+ )
100
+
101
+ # 2. Pre-processing dan Inferensi
102
+ cleaned_texts = [preprocess_text(t) for t in raw_tweets]
103
+
104
+ # Inisialisasi list untuk menyimpan probabilitas stress (kelas 1)
105
+ stress_probabilities = []
106
+
107
+ with torch.no_grad():
108
+ for text in cleaned_texts:
109
+ # Tokenisasi
110
+ enc = tokenizer(
111
+ text,
112
+ truncation=True,
113
+ padding="max_length",
114
+ max_length=128, # Konsisten dengan training
115
+ return_tensors="pt"
116
+ )
117
+
118
+ # Pindahkan ke device dan inferensi
119
+ input_ids = enc["input_ids"].to(DEVICE)
120
+ attention_mask = enc["attention_mask"].to(DEVICE)
121
+
122
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
123
+ logits = outputs.logits
124
+
125
+ # Ambil probabilitas untuk kelas 1 (Stress)
126
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
127
+ stress_probabilities.append(probs[1])
128
+
129
+ # 3. Agregasi Hasil
130
+ if not stress_probabilities:
131
+ # Jika ada tweets tapi semuanya kosong setelah pre-processing
132
+ avg_stress_score = 0
133
+ else:
134
+ # Hitung rata-rata probabilitas stres dari semua tweet (skor 0.0 - 1.0)
135
+ avg_stress_score = np.mean(stress_probabilities)
136
+
137
+ # Konversi ke skala 0-100
138
+ stress_level_100 = int(round(avg_stress_score * 100))
139
+ status = calculate_stress_status(stress_level_100)
140
+
141
+ # 4. Susun Respons
142
+ result_data = ResultData(
143
+ x_username=username,
144
+ total_tweets=len(raw_tweets),
145
+ stress_level=stress_level_100,
146
+ keywords={"placeholder": 0.0}, # Implementasi penambangan keyword akan dilakukan belakangan
147
+ stress_status=status
148
+ )
149
+
150
+ return APIResponse(
151
+ message=f"Analisis stres untuk @{username} berhasil. Ditemukan {result_data.total_tweets} tweets.",
152
+ data=result_data
153
+ )
preprocessing.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model-fastapi/preprocessing.py
2
+ import re
3
+ import emoji
4
+
5
+ def clean_text(t):
6
+ t = str(t).lower()
7
+ t = re.sub(r"http\S+|www\.\S+", "", t)
8
+ t = re.sub(r"@\w+", "", t)
9
+ t = re.sub(r"#(\w+)", r"\1", t)
10
+ t = re.sub(r"\s+", " ", t).strip()
11
+ # Jika Anda menggunakan emoji.replace_emoji() di Colab, pastikan versi Python Anda mendukungnya, atau gunakan pustaka yang diimpor.
12
+ # Contoh: t = emoji.replace_emoji(t, replace="")
13
+ return t
14
+
15
+ def preprocess_text(text: str) -> str:
16
+ return clean_text(text)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ transformers
5
+ torch
6
+ pandas
7
+ tweepy
8
+ emoji