Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import re | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils.data import Dataset | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import matplotlib.pyplot as plt | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # Set page config | |
| st.set_page_config( | |
| page_title="Deteksi Alergen dalam Resep", | |
| page_icon="π²", | |
| layout="wide" | |
| ) | |
| # Set device | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # Clean text function | |
| def clean_text(text): | |
| # Convert dashes to spaces for better tokenization | |
| text = text.replace('--', ' ') | |
| # Basic cleaning | |
| text = re.sub(r"http\S+", "", text) | |
| text = re.sub('\n', ' ', text) | |
| text = re.sub("[^a-zA-Z0-9\s]", " ", text) | |
| text = re.sub(" {2,}", " ", text) | |
| text = text.strip() | |
| text = text.lower() | |
| return text | |
| # Define model for multilabel classification | |
| class MultilabelBertClassifier(nn.Module): | |
| def __init__(self, model_name, num_labels): | |
| super(MultilabelBertClassifier, self).__init__() | |
| self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) | |
| # Replace the classification head with our own for multilabel | |
| self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
| def forward(self, input_ids, attention_mask): | |
| outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| return outputs.logits | |
| # Function to predict allergens in new recipes | |
| def load_model(): | |
| # Target columns | |
| target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] | |
| # Initialize tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') | |
| # Initialize model | |
| model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) | |
| # Load model weights if available | |
| model_path = "model/alergen_model.pt" | |
| try: | |
| # Try to load the model | |
| checkpoint = torch.load(model_path, map_location=device) | |
| model.load_state_dict(checkpoint['model_state_dict']) | |
| st.success("Model berhasil dimuat!") | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.") | |
| model.to(device) | |
| model.eval() | |
| return model, tokenizer, target_columns | |
| def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128): | |
| # Clean the text | |
| cleaned_text = clean_text(ingredients_text) | |
| # Tokenize | |
| encoding = tokenizer.encode_plus( | |
| cleaned_text, | |
| add_special_tokens=True, | |
| max_length=max_length, | |
| truncation=True, | |
| return_tensors='pt', | |
| padding='max_length' | |
| ) | |
| input_ids = encoding['input_ids'].to(device) | |
| attention_mask = encoding['attention_mask'].to(device) | |
| with torch.no_grad(): | |
| outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
| predictions = torch.sigmoid(outputs) | |
| predictions_prob = predictions.cpu().numpy()[0] | |
| predictions_binary = (predictions > 0.5).float().cpu().numpy()[0] | |
| result = {} | |
| for i, target in enumerate(target_columns): | |
| result[target] = { | |
| 'present': bool(predictions_binary[i]), | |
| 'probability': float(predictions_prob[i]) | |
| } | |
| return result | |
| # Main application | |
| def main(): | |
| st.title("Deteksi Alergen dalam Resep") | |
| st.markdown(""" | |
| Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan. | |
| Alergen yang diidentifikasi meliputi: | |
| - Susu | |
| - Kacang | |
| - Telur | |
| - Makanan Laut | |
| - Gandum | |
| """) | |
| # Sidebar for model upload | |
| st.sidebar.header("Upload Model") | |
| uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"]) | |
| if uploaded_model is not None: | |
| with open("alergen_model.pt", "wb") as f: | |
| f.write(uploaded_model.getbuffer()) | |
| st.sidebar.success("Model telah diupload dan dimuat!") | |
| # Load model | |
| model, tokenizer, target_columns = load_model() | |
| # Input area | |
| st.header("Masukkan Daftar Bahan Resep") | |
| ingredients = st.text_area("Bahan-bahan:", height=200, | |
| placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("Deteksi Alergen", type="primary"): | |
| if ingredients: | |
| with st.spinner("Menganalisis bahan-bahan..."): | |
| # Clean text for display | |
| cleaned_text = clean_text(ingredients) | |
| st.markdown("### Bahan yang diproses:") | |
| st.text(cleaned_text) | |
| # Get predictions | |
| results = predict_allergens(ingredients, model, tokenizer, target_columns) | |
| # Display results | |
| st.markdown("### Hasil Deteksi Alergen:") | |
| # Create data for visualization | |
| allergens = list(results.keys()) | |
| probabilities = [results[a]['probability'] for a in allergens] | |
| present = [results[a]['present'] for a in allergens] | |
| # Create a colorful table of results | |
| result_df = pd.DataFrame({ | |
| 'Alergen': [a.title() for a in allergens], | |
| 'Terdeteksi': ['β ' if results[a]['present'] else 'β' for a in allergens], | |
| 'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens] | |
| }) | |
| st.dataframe(result_df, use_container_width=True) | |
| # Display chart in the second column | |
| with col2: | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| bars = ax.bar( | |
| [a.title() for a in allergens], | |
| probabilities, | |
| color=['red' if p else 'green' for p in present] | |
| ) | |
| # Add threshold line | |
| ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7) | |
| ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom') | |
| # Customize the chart | |
| ax.set_ylim(0, 1) | |
| ax.set_ylabel('Probabilitas') | |
| ax.set_title('Probabilitas Deteksi Alergen') | |
| # Add values on top of bars | |
| for bar in bars: | |
| height = bar.get_height() | |
| ax.annotate(f'{height:.2f}', | |
| xy=(bar.get_x() + bar.get_width() / 2, height), | |
| xytext=(0, 3), # 3 points vertical offset | |
| textcoords="offset points", | |
| ha='center', va='bottom') | |
| st.pyplot(fig) | |
| # Show detailed explanation | |
| st.markdown("### Penjelasan Hasil:") | |
| detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']] | |
| if detected_allergens: | |
| st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**") | |
| # Provide specific explanation for each detected allergen | |
| for allergen in detected_allergens: | |
| if allergen.lower() == 'susu': | |
| st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya") | |
| elif allergen.lower() == 'kacang': | |
| st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya") | |
| elif allergen.lower() == 'telur': | |
| st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya") | |
| elif allergen.lower() == 'makanan_laut': | |
| st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya") | |
| elif allergen.lower() == 'gandum': | |
| st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)") | |
| else: | |
| st.markdown("Tidak terdeteksi alergen umum dalam resep ini.") | |
| st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.") | |
| else: | |
| st.error("Mohon masukkan daftar bahan terlebih dahulu.") | |
| # Examples section | |
| with st.expander("Contoh Resep"): | |
| st.markdown(""" | |
| ### Contoh Resep 1 (Mengandung Beberapa Alergen) | |
| ``` | |
| 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis | |
| ``` | |
| ### Contoh Resep 2 (Mengandung Susu) | |
| ``` | |
| 250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut | |
| ``` | |
| ### Contoh Resep 3 (Mengandung Makanan Laut) | |
| ``` | |
| 250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya | |
| ``` | |
| """) | |
| # About section | |
| st.sidebar.markdown("---") | |
| st.sidebar.header("Tentang") | |
| st.sidebar.info(""" | |
| Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan. | |
| Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep. | |
| """) | |
| # Model information | |
| st.sidebar.markdown("---") | |
| st.sidebar.header("Informasi Model") | |
| st.sidebar.markdown(""" | |
| - **Model Dasar**: IndoBERT | |
| - **Jenis**: Multilabel Classification | |
| - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum | |
| """) | |
| if __name__ == "__main__": | |
| main() |