import torch from transformers import BertTokenizer, BertForSequenceClassification import numpy as np import os HF_TOKEN = os.getenv("HF_TOKEN") # Load the pre-trained model and tokenizer tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN) model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN) model.eval() # Efek after use (harus sesuai saat training) afteruse_labels = [ "acne fighting", "acne trigger", "anti aging", "brightening", "moisturizing", "redness reducing", "skin texture", "soothing", "unknown", "whitening" ] # Deskripsi efek after use dalam bahasa Indonesia afteruse_descriptions_id = { "acne fighting": "membantu melawan jerawat", "acne trigger": "dapat memicu jerawat", "anti aging": "mengurangi tanda-tanda penuaan", "brightening": "mencerahkan kulit", "moisturizing": "melembapkan kulit", "redness reducing": "mengurangi kemerahan pada kulit", "skin texture": "memperbaiki tekstur kulit", "soothing": "menenangkan kulit", "unknown": "memiliki efek yang belum diketahui", "whitening": "memutihkan kulit" } def predict_after_use(input_ingredients): if not input_ingredients: return [] # Tokenisasi input inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.sigmoid(logits) # Konversi probabilitas ke numpy dan ratakan probs = probs.squeeze().cpu().numpy() if probs.ndim == 0: probs = [probs] # Jika scalar, ubah jadi list print(f"[DEBUG] Model output shape: {logits.shape}") print(f"[DEBUG] Jumlah output model: {len(probs)} | Jumlah label: {len(afteruse_labels)}") # Antisipasi mismatch jumlah output vs label min_len = min(len(probs), len(afteruse_labels)) predicted_labels = [ afteruse_labels[i] for i in range(min_len) if probs[i] > 0.5 ] return predicted_labels def generate_afteruse_sentence_id(predicted_labels): if not predicted_labels: return "Tidak ada efek yang terdeteksi berdasarkan bahan yang diberikan." descriptions = [afteruse_descriptions_id.get(label, label) for label in predicted_labels] if len(descriptions) == 1: return f"Produk ini {descriptions[0]}." elif len(descriptions) == 2: return f"Produk ini {descriptions[0]} dan {descriptions[1]}." else: return f"Produk ini {', '.join(descriptions[:-1])}, dan {descriptions[-1]}." def predict_after_use_with_probs(input_ingredients): if not input_ingredients: return [], [] inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.sigmoid(logits) probs = probs.squeeze().cpu().numpy() if probs.ndim == 0: probs = [probs] min_len = min(len(probs), len(afteruse_labels)) predicted_labels = [ afteruse_labels[i] for i in range(min_len) if probs[i] > 0.5 ] label_probs = { afteruse_labels[i]: float(probs[i]) for i in range(min_len) } return predicted_labels, label_probs