Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| import torch.nn as nn | |
| import re | |
| import numpy as np | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Set page configuration | |
| st.set_page_config(page_title="Aplikasi Deteksi Alergen", page_icon="π²", layout="wide") | |
| # Target label | |
| target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] | |
| # Device | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Text cleaning | |
| def clean_text(text): | |
| text = text.replace('--', ' ') | |
| text = re.sub(r"http\S+", "", text) | |
| text = re.sub('\n', ' ', text) | |
| text = re.sub("[^a-zA-Z0-9\s]", " ", text) | |
| text = re.sub(" {2,}", " ", text) | |
| text = text.strip().lower() | |
| return text | |
| # Multilabel BERT model | |
| class MultilabelBertClassifier(nn.Module): | |
| def __init__(self, model_name, num_labels): | |
| super(MultilabelBertClassifier, self).__init__() | |
| self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) | |
| self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
| def forward(self, input_ids, attention_mask): | |
| outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| return outputs.logits | |
| # Load model | |
| def load_model(): | |
| tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') | |
| model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) | |
| try: | |
| state_dict = torch.load('model/alergen_model.pt', map_location=device) | |
| if 'model_state_dict' in state_dict: | |
| model_state_dict = state_dict['model_state_dict'] | |
| else: | |
| model_state_dict = state_dict | |
| new_state_dict = {k[7:] if k.startswith('module.') else k: v for k, v in model_state_dict.items()} | |
| model.load_state_dict(new_state_dict, strict=False) | |
| st.success("Model berhasil dimuat!") | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| st.info("Menggunakan model tanpa pre-trained weights.") | |
| model.to(device) | |
| model.eval() | |
| return tokenizer, model | |
| def predict_alergens(ingredients_text, tokenizer, model, threshold=0.5, max_length=128): | |
| cleaned_text = clean_text(ingredients_text) | |
| encoding = tokenizer.encode_plus( | |
| cleaned_text, | |
| add_special_tokens=True, | |
| max_length=max_length, | |
| truncation=True, | |
| return_tensors='pt', | |
| padding='max_length' | |
| ) | |
| input_ids = encoding['input_ids'].to(device) | |
| attention_mask = encoding['attention_mask'].to(device) | |
| with torch.no_grad(): | |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
| probs = torch.sigmoid(outputs).cpu().numpy()[0] # hasil sigmoid (0-1) | |
| results = [] | |
| for i, label in enumerate(target_columns): | |
| present = probs[i] > threshold | |
| percent = float(probs[i]) * 100 | |
| results.append({ | |
| 'label': label, | |
| 'present': present, | |
| 'probability': percent | |
| }) | |
| return results | |
| # Scrape Cookpad | |
| def scrape_ingredients_from_url(url): | |
| try: | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| response = requests.get(url, headers=headers) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| ingredients_div = soup.find('div', id='ingredients') | |
| if not ingredients_div: | |
| return None | |
| items = ingredients_div.find_all(['li', 'span']) | |
| ingredients = [item.get_text(strip=True) for item in items if item.get_text(strip=True)] | |
| return '\n'.join(ingredients) | |
| except Exception as e: | |
| st.error(f"Gagal mengambil data dari URL: {e}") | |
| return None | |
| # Main App | |
| def main(): | |
| st.title("Aplikasi Deteksi Alergen dalam Resep") | |
| st.markdown(""" | |
| Aplikasi ini memprediksi alergen yang terkandung dalam resep makanan berdasarkan bahan-bahan. | |
| """) | |
| with st.spinner("Memuat model..."): | |
| tokenizer, model = load_model() | |
| col1, col2 = st.columns([3, 2]) | |
| with col1: | |
| st.subheader("Masukkan URL Resep dari Cookpad") | |
| url = st.text_input("Contoh: https://cookpad.com/id/resep/24678703-gulai-telur-tahu-dan-kacang-panjang") | |
| threshold = st.slider( | |
| "Atur Threshold Deteksi Alergen", | |
| min_value=0.1, | |
| max_value=0.9, | |
| value=0.5, | |
| step=0.05, | |
| help="Semakin rendah threshold, semakin sensitif model terhadap kemungkinan adanya alergen." | |
| ) | |
| if st.button("Deteksi Alergen", type="primary"): | |
| if url: | |
| with st.spinner("Mengambil bahan resep dari URL..."): | |
| ingredients = scrape_ingredients_from_url(url) | |
| if ingredients: | |
| st.text_area("Daftar Bahan", ingredients, height=200) | |
| with st.spinner("Menganalisis bahan..."): | |
| alergens = predict_alergens(ingredients, tokenizer, model, threshold=threshold) | |
| with col2: | |
| st.subheader("Hasil Deteksi") | |
| emoji_map = { | |
| 'susu': 'π₯', | |
| 'kacang': 'π₯', | |
| 'telur': 'π₯', | |
| 'makanan_laut': 'π¦', | |
| 'gandum': 'πΎ' | |
| } | |
| detected = [] | |
| for result in alergens: | |
| label = result['label'] | |
| name = label.replace('_', ' ').title() | |
| prob = result['probability'] | |
| present = result['present'] | |
| emoji = emoji_map.get(label, '') | |
| if present: | |
| st.error(f"{emoji} {name}: Terdeteksi β οΈ ({prob:.2f}%)") | |
| detected.append(name) | |
| else: | |
| st.success(f"{emoji} {name}: Tidak Terdeteksi β ({prob:.2f}%)") | |
| if detected: | |
| st.warning(f"Resep ini mengandung alergen: {', '.join(detected)}") | |
| else: | |
| st.success("Resep ini tidak mengandung alergen yang terdeteksi.") | |
| else: | |
| st.warning("Gagal mengambil bahan dari halaman Cookpad. Pastikan URL valid.") | |
| else: | |
| st.warning("Silakan masukkan URL resep terlebih dahulu.") | |
| with st.expander("Tentang Aplikasi"): | |
| st.markdown(""" | |
| Aplikasi ini menggunakan model IndoBERT untuk deteksi 5 jenis alergen dari bahan resep: | |
| - Susu π₯ | |
| - Kacang π₯ | |
| - Telur π₯ | |
| - Makanan Laut π¦ | |
| - Gandum πΎ | |
| """) | |
| if __name__ == "__main__": | |
| main() | |