|
|
import torch |
|
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
|
import numpy as np |
|
|
import os |
|
|
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN) |
|
|
model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN) |
|
|
|
|
|
model.eval() |
|
|
|
|
|
|
|
|
afteruse_labels = [ |
|
|
"acne fighting", "acne trigger", "anti aging", "brightening", "moisturizing", |
|
|
"redness reducing", "skin texture", "soothing", "unknown", "whitening" |
|
|
] |
|
|
|
|
|
|
|
|
afteruse_descriptions_id = { |
|
|
"acne fighting": "membantu melawan jerawat", |
|
|
"acne trigger": "dapat memicu jerawat", |
|
|
"anti aging": "mengurangi tanda-tanda penuaan", |
|
|
"brightening": "mencerahkan kulit", |
|
|
"moisturizing": "melembapkan kulit", |
|
|
"redness reducing": "mengurangi kemerahan pada kulit", |
|
|
"skin texture": "memperbaiki tekstur kulit", |
|
|
"soothing": "menenangkan kulit", |
|
|
"unknown": "memiliki efek yang belum diketahui", |
|
|
"whitening": "memutihkan kulit" |
|
|
} |
|
|
|
|
|
def predict_after_use(input_ingredients): |
|
|
if not input_ingredients: |
|
|
return [] |
|
|
|
|
|
|
|
|
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
logits = outputs.logits |
|
|
probs = torch.sigmoid(logits) |
|
|
|
|
|
|
|
|
probs = probs.squeeze().cpu().numpy() |
|
|
if probs.ndim == 0: |
|
|
probs = [probs] |
|
|
|
|
|
print(f"[DEBUG] Model output shape: {logits.shape}") |
|
|
print(f"[DEBUG] Jumlah output model: {len(probs)} | Jumlah label: {len(afteruse_labels)}") |
|
|
|
|
|
|
|
|
min_len = min(len(probs), len(afteruse_labels)) |
|
|
predicted_labels = [ |
|
|
afteruse_labels[i] |
|
|
for i in range(min_len) |
|
|
if probs[i] > 0.5 |
|
|
] |
|
|
|
|
|
return predicted_labels |
|
|
|
|
|
def generate_afteruse_sentence_id(predicted_labels): |
|
|
if not predicted_labels: |
|
|
return "Tidak ada efek yang terdeteksi berdasarkan bahan yang diberikan." |
|
|
|
|
|
descriptions = [afteruse_descriptions_id.get(label, label) for label in predicted_labels] |
|
|
|
|
|
if len(descriptions) == 1: |
|
|
return f"Produk ini {descriptions[0]}." |
|
|
elif len(descriptions) == 2: |
|
|
return f"Produk ini {descriptions[0]} dan {descriptions[1]}." |
|
|
else: |
|
|
return f"Produk ini {', '.join(descriptions[:-1])}, dan {descriptions[-1]}." |
|
|
|
|
|
def predict_after_use_with_probs(input_ingredients): |
|
|
if not input_ingredients: |
|
|
return [], [] |
|
|
|
|
|
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
logits = outputs.logits |
|
|
probs = torch.sigmoid(logits) |
|
|
|
|
|
probs = probs.squeeze().cpu().numpy() |
|
|
if probs.ndim == 0: |
|
|
probs = [probs] |
|
|
|
|
|
min_len = min(len(probs), len(afteruse_labels)) |
|
|
predicted_labels = [ |
|
|
afteruse_labels[i] |
|
|
for i in range(min_len) |
|
|
if probs[i] > 0.5 |
|
|
] |
|
|
label_probs = { |
|
|
afteruse_labels[i]: float(probs[i]) |
|
|
for i in range(min_len) |
|
|
} |
|
|
|
|
|
return predicted_labels, label_probs |
|
|
|