Maulidaaa commited on
Commit
b1074aa
·
verified ·
1 Parent(s): 030432c

Delete app/utils/predict_afteruse.py

Browse files
Files changed (1) hide show
  1. app/utils/predict_afteruse.py +0 -103
app/utils/predict_afteruse.py DELETED
@@ -1,103 +0,0 @@
1
- import torch
2
- from transformers import BertTokenizer, BertForSequenceClassification
3
- import numpy as np
4
- import os
5
-
6
- HF_TOKEN = os.getenv("HF_TOKEN")
7
-
8
- # Load the pre-trained model and tokenizer
9
- tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
10
- model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-multilabel", token=HF_TOKEN)
11
-
12
- model.eval()
13
-
14
- # Efek after use (harus sesuai saat training)
15
- afteruse_labels = [
16
- "acne fighting", "acne trigger", "anti aging", "brightening", "moisturizing",
17
- "redness reducing", "skin texture", "soothing", "unknown", "whitening"
18
- ]
19
-
20
- afteruse_descriptions_en = {
21
- "acne fighting": "helps fight acne",
22
- "acne trigger": "may trigger acne",
23
- "anti aging": "reduces signs of aging",
24
- "brightening": "brightens the skin",
25
- "moisturizing": "moisturizes the skin",
26
- "redness reducing": "reduces redness",
27
- "skin texture": "improves skin texture",
28
- "soothing": "soothes the skin",
29
- "unknown": "has unknown effects",
30
- "whitening": "whitens the skin"
31
- }
32
-
33
- def predict_after_use(input_ingredients):
34
- if not input_ingredients:
35
- return []
36
-
37
- # Tokenisasi input
38
- inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
39
-
40
- with torch.no_grad():
41
- outputs = model(**inputs)
42
- logits = outputs.logits
43
- probs = torch.sigmoid(logits)
44
-
45
- # Konversi probabilitas ke numpy dan ratakan
46
- probs = probs.squeeze().cpu().numpy()
47
- if probs.ndim == 0:
48
- probs = [probs] # Jika scalar, ubah jadi list
49
-
50
- print(f"[DEBUG] Model output shape: {logits.shape}")
51
- print(f"[DEBUG] Jumlah output model: {len(probs)} | Jumlah label: {len(afteruse_labels)}")
52
-
53
- # Antisipasi mismatch jumlah output vs label
54
- min_len = min(len(probs), len(afteruse_labels))
55
- predicted_labels = [
56
- afteruse_labels[i]
57
- for i in range(min_len)
58
- if probs[i] > 0.5
59
- ]
60
-
61
- return predicted_labels
62
-
63
- def generate_afteruse_sentence_en(predicted_labels):
64
- if not predicted_labels:
65
- return "No effects were detected based on the provided ingredients."
66
-
67
- descriptions = [afteruse_descriptions_en.get(label, label) for label in predicted_labels]
68
-
69
- if len(descriptions) == 1:
70
- return f"This product {descriptions[0]}."
71
- elif len(descriptions) == 2:
72
- return f"This product {descriptions[0]} and {descriptions[1]}."
73
- else:
74
- return f"This product {', '.join(descriptions[:-1])}, and {descriptions[-1]}."
75
-
76
-
77
- def predict_after_use_with_probs(input_ingredients):
78
- if not input_ingredients:
79
- return [], []
80
-
81
- inputs = tokenizer(input_ingredients, return_tensors="pt", truncation=True, padding=True, max_length=512)
82
-
83
- with torch.no_grad():
84
- outputs = model(**inputs)
85
- logits = outputs.logits
86
- probs = torch.sigmoid(logits)
87
-
88
- probs = probs.squeeze().cpu().numpy()
89
- if probs.ndim == 0:
90
- probs = [probs]
91
-
92
- min_len = min(len(probs), len(afteruse_labels))
93
- predicted_labels = [
94
- afteruse_labels[i]
95
- for i in range(min_len)
96
- if probs[i] > 0.5
97
- ]
98
- label_probs = {
99
- afteruse_labels[i]: float(probs[i])
100
- for i in range(min_len)
101
- }
102
-
103
- return predicted_labels, label_probs