File size: 3,385 Bytes
5f183d3 b9afd6a 5f183d3 e735b38 5f183d3 e735b38 5f183d3 e735b38 5f183d3 e735b38 5f183d3 e735b38 5f183d3 e735b38 5f183d3 e735b38 5f183d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | import torch
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
import os
HF_TOKEN = os.getenv("HF_TOKEN")
# Load the pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
model.eval()
# Efek after use (harus sesuai saat training)
afteruse_labels = [
"acne fighting", "acne trigger", "anti aging", "brightening", "moisturizing",
"redness reducing", "skin texture", "soothing", "unknown", "whitening"
]
# Deskripsi efek after use dalam bahasa Indonesia
afteruse_descriptions_id = {
"acne fighting": "membantu melawan jerawat",
"acne trigger": "dapat memicu jerawat",
"anti aging": "mengurangi tanda-tanda penuaan",
"brightening": "mencerahkan kulit",
"moisturizing": "melembapkan kulit",
"redness reducing": "mengurangi kemerahan pada kulit",
"skin texture": "memperbaiki tekstur kulit",
"soothing": "menenangkan kulit",
"unknown": "memiliki efek yang belum diketahui",
"whitening": "memutihkan kulit"
}
def predict_after_use(input_ingredients):
if not input_ingredients:
return []
# Tokenisasi input
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.sigmoid(logits)
# Konversi probabilitas ke numpy dan ratakan
probs = probs.squeeze().cpu().numpy()
if probs.ndim == 0:
probs = [probs] # Jika scalar, ubah jadi list
print(f"[DEBUG] Model output shape: {logits.shape}")
print(f"[DEBUG] Jumlah output model: {len(probs)} | Jumlah label: {len(afteruse_labels)}")
# Antisipasi mismatch jumlah output vs label
min_len = min(len(probs), len(afteruse_labels))
predicted_labels = [
afteruse_labels[i]
for i in range(min_len)
if probs[i] > 0.5
]
return predicted_labels
def generate_afteruse_sentence_id(predicted_labels):
if not predicted_labels:
return "Tidak ada efek yang terdeteksi berdasarkan bahan yang diberikan."
descriptions = [afteruse_descriptions_id.get(label, label) for label in predicted_labels]
if len(descriptions) == 1:
return f"Produk ini {descriptions[0]}."
elif len(descriptions) == 2:
return f"Produk ini {descriptions[0]} dan {descriptions[1]}."
else:
return f"Produk ini {', '.join(descriptions[:-1])}, dan {descriptions[-1]}."
def predict_after_use_with_probs(input_ingredients):
if not input_ingredients:
return [], []
inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.sigmoid(logits)
probs = probs.squeeze().cpu().numpy()
if probs.ndim == 0:
probs = [probs]
min_len = min(len(probs), len(afteruse_labels))
predicted_labels = [
afteruse_labels[i]
for i in range(min_len)
if probs[i] > 0.5
]
label_probs = {
afteruse_labels[i]: float(probs[i])
for i in range(min_len)
}
return predicted_labels, label_probs
|