skincare / app /utils /predict_afteruse.py
Maulidaaa's picture
Update app/utils/predict_afteruse.py
e735b38 verified
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
import os
HF_TOKEN = os.getenv("HF_TOKEN")
# Load the pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
model.eval()
# Efek after use (harus sesuai saat training)
afteruse_labels = [
"acne fighting", "acne trigger", "anti aging", "brightening", "moisturizing",
"redness reducing", "skin texture", "soothing", "unknown", "whitening"
]
# Deskripsi efek after use dalam bahasa Indonesia
afteruse_descriptions_id = {
"acne fighting": "membantu melawan jerawat",
"acne trigger": "dapat memicu jerawat",
"anti aging": "mengurangi tanda-tanda penuaan",
"brightening": "mencerahkan kulit",
"moisturizing": "melembapkan kulit",
"redness reducing": "mengurangi kemerahan pada kulit",
"skin texture": "memperbaiki tekstur kulit",
"soothing": "menenangkan kulit",
"unknown": "memiliki efek yang belum diketahui",
"whitening": "memutihkan kulit"
}
def predict_after_use(input_ingredients):
if not input_ingredients:
return []
# Tokenisasi input
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.sigmoid(logits)
# Konversi probabilitas ke numpy dan ratakan
probs = probs.squeeze().cpu().numpy()
if probs.ndim == 0:
probs = [probs] # Jika scalar, ubah jadi list
print(f"[DEBUG] Model output shape: {logits.shape}")
print(f"[DEBUG] Jumlah output model: {len(probs)} | Jumlah label: {len(afteruse_labels)}")
# Antisipasi mismatch jumlah output vs label
min_len = min(len(probs), len(afteruse_labels))
predicted_labels = [
afteruse_labels[i]
for i in range(min_len)
if probs[i] > 0.5
]
return predicted_labels
def generate_afteruse_sentence_id(predicted_labels):
if not predicted_labels:
return "Tidak ada efek yang terdeteksi berdasarkan bahan yang diberikan."
descriptions = [afteruse_descriptions_id.get(label, label) for label in predicted_labels]
if len(descriptions) == 1:
return f"Produk ini {descriptions[0]}."
elif len(descriptions) == 2:
return f"Produk ini {descriptions[0]} dan {descriptions[1]}."
else:
return f"Produk ini {', '.join(descriptions[:-1])}, dan {descriptions[-1]}."
def predict_after_use_with_probs(input_ingredients):
if not input_ingredients:
return [], []
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.sigmoid(logits)
probs = probs.squeeze().cpu().numpy()
if probs.ndim == 0:
probs = [probs]
min_len = min(len(probs), len(afteruse_labels))
predicted_labels = [
afteruse_labels[i]
for i in range(min_len)
if probs[i] > 0.5
]
label_probs = {
afteruse_labels[i]: float(probs[i])
for i in range(min_len)
}
return predicted_labels, label_probs