Spaces:

reyhanadr
/

Sentiment_BitcoinHalving_IndoBERTweet

Sleeping

App Files Files Community

reyhanadr commited on Sep 26, 2025

Commit

f165c76

1 Parent(s): abf6e68

add another probability score

Browse files

Files changed (1) hide show

main.py +28 -26

main.py CHANGED Viewed

@@ -4,9 +4,10 @@ import re
 from transformers import BertTokenizer, BertForSequenceClassification
 from fastapi import FastAPI
 from pydantic import BaseModel
 # ====================================================================
-# 1. KELAS LOGIKA ANDA (Disalin dari kode Anda)
 # ====================================================================
 class TextCleaner:
@@ -52,8 +53,11 @@ class SentimentPredictor:
         self.model = model
         self.device = torch.device("cpu")
         self.model.to(self.device)
-    def predict(self, text: str) -> (str, float):
         inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
@@ -61,43 +65,36 @@ class SentimentPredictor:
             outputs = self.model(**inputs)
         logits = outputs.logits
-        predicted_label = torch.argmax(logits, dim=1).item()
-        probabilities = torch.softmax(logits, dim=1)
-        confidence_score = probabilities[0][predicted_label].item()
-        if predicted_label == 2:
-            sentiment = 'Negatif'
-        elif predicted_label == 1:
-            sentiment = 'Netral'
-        else: # predicted_label == 0
-            sentiment = 'Positif'
-        return sentiment, confidence_score
 # ====================================================================
-# 2. INISIALISASI MODEL & APLIKASI FASTAPI
-# (Ini hanya dijalankan sekali saat API pertama kali startet)
 # ====================================================================
 print("Memuat model dan tokenizer...")
-# Muat tokenizer dan model dasar
 tokenizer = BertTokenizer.from_pretrained('indolem/indobertweet-base-uncased')
 model = BertForSequenceClassification.from_pretrained('indolem/indobertweet-base-uncased', num_labels=3)
-# Muat bobot model yang sudah Anda latih
 model_path = 'model_indoBERTweet_100Epochs_sentiment.pth'
 state_dict = torch.load(model_path, map_location=torch.device('cpu'))
 model.load_state_dict(state_dict, strict=False)
 model.eval()
 print("Model berhasil dimuat.")
-# Buat instance dari kelas-kelas Anda
 text_cleaner = TextCleaner()
 sentiment_predictor = SentimentPredictor(tokenizer, model)
-# Inisialisasi aplikasi FastAPI
-# Baris ini ditambahkan untuk memaksa build ulang
 app = FastAPI(
     title="API Klasifikasi Sentimen",
     description="Sebuah API untuk menganalisis sentimen teks Bahasa Indonesia."
@@ -110,9 +107,11 @@ app = FastAPI(
 class TextInput(BaseModel):
     text: str
 class PredictionOutput(BaseModel):
     sentiment: str
     confidence: float
 # ====================================================================
 # 4. BUAT ENDPOINT PREDIKSI
@@ -124,11 +123,14 @@ def read_root():
 @app.post("/predict", response_model=PredictionOutput)
 def predict_sentiment(request: TextInput):
-    # Langkah 1: Bersihkan teks input
     cleaned_text = text_cleaner.clean_review(request.text)
-    # Langkah 2: Lakukan prediksi pada teks yang sudah bersih
-    sentiment, confidence = sentiment_predictor.predict(cleaned_text)
-    # Langkah 3: Kembalikan hasil prediksi
-    return PredictionOutput(sentiment=sentiment, confidence=confidence)

 from transformers import BertTokenizer, BertForSequenceClassification
 from fastapi import FastAPI
 from pydantic import BaseModel
+from typing import Dict
 # ====================================================================
+# 1. KELAS LOGIKA ANDA (Tidak ada perubahan di TextCleaner)
 # ====================================================================
 class TextCleaner:
         self.model = model
         self.device = torch.device("cpu")
         self.model.to(self.device)
+        # --- [DIUBAH] --- Definisikan mapping label di sini agar mudah digunakan
+        self.label_mapping = {0: 'Positif', 1: 'Netral', 2: 'Negatif'}
+    # --- [DIUBAH] --- Tipe data kembalian (return type) diubah
+    def predict(self, text: str) -> (str, float, Dict[str, float]):
         inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
             outputs = self.model(**inputs)
         logits = outputs.logits
+        # Hitung probabilitas untuk semua kelas
+        probabilities = torch.softmax(logits, dim=1)[0] # Ambil hasil pertama dari batch
+        # Dapatkan label dan skor kepercayaan dari probabilitas tertinggi
+        confidence_score = probabilities.max().item()
+        predicted_label_id = probabilities.argmax().item()
+        sentiment = self.label_mapping[predicted_label_id]
+        # --- [DIUBAH] --- Buat dictionary untuk semua skor probabilitas
+        all_scores = {self.label_mapping[i]: prob.item() for i, prob in enumerate(probabilities)}
+        return sentiment, confidence_score, all_scores
 # ====================================================================
+# 2. INISIALISASI MODEL & APLIKASI FASTAPI (Tidak ada perubahan)
 # ====================================================================
 print("Memuat model dan tokenizer...")
 tokenizer = BertTokenizer.from_pretrained('indolem/indobertweet-base-uncased')
 model = BertForSequenceClassification.from_pretrained('indolem/indobertweet-base-uncased', num_labels=3)
 model_path = 'model_indoBERTweet_100Epochs_sentiment.pth'
 state_dict = torch.load(model_path, map_location=torch.device('cpu'))
 model.load_state_dict(state_dict, strict=False)
 model.eval()
 print("Model berhasil dimuat.")
 text_cleaner = TextCleaner()
 sentiment_predictor = SentimentPredictor(tokenizer, model)
 app = FastAPI(
     title="API Klasifikasi Sentimen",
     description="Sebuah API untuk menganalisis sentimen teks Bahasa Indonesia."
 class TextInput(BaseModel):
     text: str
+# --- [DIUBAH] --- Model output diperbarui untuk menyertakan semua skor
 class PredictionOutput(BaseModel):
     sentiment: str
     confidence: float
+    all_scores: Dict[str, float]
 # ====================================================================
 # 4. BUAT ENDPOINT PREDIKSI
 @app.post("/predict", response_model=PredictionOutput)
 def predict_sentiment(request: TextInput):
     cleaned_text = text_cleaner.clean_review(request.text)
+    # --- [DIUBAH] --- Tangkap tiga nilai yang dikembalikan oleh metode predict
+    sentiment, confidence, all_scores = sentiment_predictor.predict(cleaned_text)
+    # --- [DIUBAH] --- Kembalikan hasil prediksi dalam struktur yang baru
+    return PredictionOutput(
+        sentiment=sentiment,
+        confidence=confidence,
+        all_scores=all_scores
+    )