Spaces:

reyhanadr
/

Sentiment_BitcoinHalving_IndoBERTweet

Sleeping

App Files Files Community

reyhanadr commited on Sep 27, 2025

Commit

99f3ba3

1 Parent(s): f165c76

add batch predict

Browse files

Files changed (1) hide show

main.py +24 -24

main.py CHANGED Viewed

@@ -4,23 +4,19 @@ import re
 from transformers import BertTokenizer, BertForSequenceClassification
 from fastapi import FastAPI
 from pydantic import BaseModel
-from typing import Dict
 # ====================================================================
-# 1. KELAS LOGIKA ANDA (Tidak ada perubahan di TextCleaner)
 # ====================================================================
 class TextCleaner:
     def __init__(self):
-        # Daftar karakter ini saya sederhanakan karena loop Anda sudah menangani huruf a-z
         self.character = ['.', ',', ';', ':', '?', '!', '(', ')', '[', ']', '{', '}', '<', '>', '"', '/', '\'', '-', '@']
-        # Menambahkan semua huruf ke dalam daftar karakter untuk pembersihan
         self.character.extend([chr(i) for i in range(ord('a'), ord('z') + 1)])
     def repeatcharClean(self, text):
         for char_to_clean in self.character:
-            # Menggunakan regex untuk mengganti 3 atau lebih karakter berulang menjadi satu
-            # Contoh: 'heloooo' -> 'helo'
             pattern = re.compile(re.escape(char_to_clean) + r'{3,}')
             text = pattern.sub(char_to_clean, text)
         return text
@@ -29,21 +25,17 @@ class TextCleaner:
         text = text.lower()
         text = re.sub(r'\s+', ' ', text)
         text = re.sub(r'[^\x00-\x7F]+', ' ', text)
         new_text = []
         for word in text.split(" "):
             word = '@USER' if word.startswith('@') and len(word) > 1 else word
             word = 'HTTPURL' if word.startswith('http') else word
             new_text.append(word)
         text = " ".join(new_text)
         text = emoji.demojize(text)
         text = re.sub(r':[A-Za-z_-]+:', ' ', text)
         text = re.sub(r"([xX;:]'?[dDpPvVoO3)(])", ' ', text)
         text = re.sub(r'["#$%&()*+,./:;<=>\[\]\\^_`{|}~]', ' ', text)
         text = self.repeatcharClean(text)
-        # Membersihkan spasi berlebih yang mungkin muncul setelah pembersihan
         text = re.sub(r'\s+', ' ', text).strip()
         return text
@@ -53,10 +45,8 @@ class SentimentPredictor:
         self.model = model
         self.device = torch.device("cpu")
         self.model.to(self.device)
-        # --- [DIUBAH] --- Definisikan mapping label di sini agar mudah digunakan
         self.label_mapping = {0: 'Positif', 1: 'Netral', 2: 'Negatif'}
-    # --- [DIUBAH] --- Tipe data kembalian (return type) diubah
     def predict(self, text: str) -> (str, float, Dict[str, float]):
         inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
@@ -65,18 +55,13 @@ class SentimentPredictor:
             outputs = self.model(**inputs)
         logits = outputs.logits
-        # Hitung probabilitas untuk semua kelas
-        probabilities = torch.softmax(logits, dim=1)[0] # Ambil hasil pertama dari batch
-        # Dapatkan label dan skor kepercayaan dari probabilitas tertinggi
         confidence_score = probabilities.max().item()
         predicted_label_id = probabilities.argmax().item()
         sentiment = self.label_mapping[predicted_label_id]
-        # --- [DIUBAH] --- Buat dictionary untuk semua skor probabilitas
         all_scores = {self.label_mapping[i]: prob.item() for i, prob in enumerate(probabilities)}
         return sentiment, confidence_score, all_scores
 # ====================================================================
@@ -107,14 +92,20 @@ app = FastAPI(
 class TextInput(BaseModel):
     text: str
-# --- [DIUBAH] --- Model output diperbarui untuk menyertakan semua skor
 class PredictionOutput(BaseModel):
     sentiment: str
     confidence: float
     all_scores: Dict[str, float]
 # ====================================================================
-# 4. BUAT ENDPOINT PREDIKSI
 # ====================================================================
 @app.get("/")
@@ -124,13 +115,22 @@ def read_root():
 @app.post("/predict", response_model=PredictionOutput)
 def predict_sentiment(request: TextInput):
     cleaned_text = text_cleaner.clean_review(request.text)
-    # --- [DIUBAH] --- Tangkap tiga nilai yang dikembalikan oleh metode predict
     sentiment, confidence, all_scores = sentiment_predictor.predict(cleaned_text)
-    # --- [DIUBAH] --- Kembalikan hasil prediksi dalam struktur yang baru
     return PredictionOutput(
         sentiment=sentiment,
         confidence=confidence,
         all_scores=all_scores
     )

 from transformers import BertTokenizer, BertForSequenceClassification
 from fastapi import FastAPI
 from pydantic import BaseModel
+from typing import Dict, List
 # ====================================================================
+# 1. KELAS LOGIKA ANDA (Tidak ada perubahan)
 # ====================================================================
 class TextCleaner:
     def __init__(self):
         self.character = ['.', ',', ';', ':', '?', '!', '(', ')', '[', ']', '{', '}', '<', '>', '"', '/', '\'', '-', '@']
         self.character.extend([chr(i) for i in range(ord('a'), ord('z') + 1)])
     def repeatcharClean(self, text):
         for char_to_clean in self.character:
             pattern = re.compile(re.escape(char_to_clean) + r'{3,}')
             text = pattern.sub(char_to_clean, text)
         return text
         text = text.lower()
         text = re.sub(r'\s+', ' ', text)
         text = re.sub(r'[^\x00-\x7F]+', ' ', text)
         new_text = []
         for word in text.split(" "):
             word = '@USER' if word.startswith('@') and len(word) > 1 else word
             word = 'HTTPURL' if word.startswith('http') else word
             new_text.append(word)
         text = " ".join(new_text)
         text = emoji.demojize(text)
         text = re.sub(r':[A-Za-z_-]+:', ' ', text)
         text = re.sub(r"([xX;:]'?[dDpPvVoO3)(])", ' ', text)
         text = re.sub(r'["#$%&()*+,./:;<=>\[\]\\^_`{|}~]', ' ', text)
         text = self.repeatcharClean(text)
         text = re.sub(r'\s+', ' ', text).strip()
         return text
         self.model = model
         self.device = torch.device("cpu")
         self.model.to(self.device)
         self.label_mapping = {0: 'Positif', 1: 'Netral', 2: 'Negatif'}
     def predict(self, text: str) -> (str, float, Dict[str, float]):
         inputs = self.tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=280)
         inputs = {k: v.to(self.device) for k, v in inputs.items()}
             outputs = self.model(**inputs)
         logits = outputs.logits
+        probabilities = torch.softmax(logits, dim=1)[0]
         confidence_score = probabilities.max().item()
         predicted_label_id = probabilities.argmax().item()
         sentiment = self.label_mapping[predicted_label_id]
         all_scores = {self.label_mapping[i]: prob.item() for i, prob in enumerate(probabilities)}
         return sentiment, confidence_score, all_scores
 # ====================================================================
 class TextInput(BaseModel):
     text: str
+# --- [PERBAIKAN] --- Menambahkan definisi BatchTextInput ---
+# Model ini memberitahu FastAPI bahwa endpoint batch akan menerima
+# sebuah objek JSON dengan satu key "texts" yang berisi daftar string.
+class BatchTextInput(BaseModel):
+    texts: List[str]
+# -----------------------------------------------------------
 class PredictionOutput(BaseModel):
     sentiment: str
     confidence: float
     all_scores: Dict[str, float]
 # ====================================================================
+# 4. BUAT ENDPOINT (Tidak ada perubahan logika)
 # ====================================================================
 @app.get("/")
 @app.post("/predict", response_model=PredictionOutput)
 def predict_sentiment(request: TextInput):
     cleaned_text = text_cleaner.clean_review(request.text)
     sentiment, confidence, all_scores = sentiment_predictor.predict(cleaned_text)
     return PredictionOutput(
         sentiment=sentiment,
         confidence=confidence,
         all_scores=all_scores
     )
+@app.post("/predict-batch", response_model=List[PredictionOutput])
+def predict_sentiment_batch(request: BatchTextInput):
+    results = []
+    for text in request.texts:
+        cleaned_text = text_cleaner.clean_review(text)
+        sentiment, confidence, all_scores = sentiment_predictor.predict(cleaned_text)
+        results.append(PredictionOutput(
+            sentiment=sentiment,
+            confidence=confidence,
+            all_scores=all_scores
+        ))
+    return results