| import torch |
| from collections import OrderedDict |
| from transformers import BertTokenizer, BertForSequenceClassification |
| import os |
| from deep_translator import GoogleTranslator |
|
|
| |
| def load_glossary(file_path): |
| glossary = {} |
| try: |
| with open(file_path, 'r', encoding='utf-8') as file: |
| for line in file: |
| if line.strip(): |
| |
| key, value = line.strip().split('=') |
| glossary[key.strip().lower()] = value.strip() |
| except Exception as e: |
| print(f"Error loading glossary: {e}") |
| return glossary |
|
|
| |
| HF_TOKEN = os.getenv("HF_TOKEN") |
| tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-model", use_auth_token=HF_TOKEN) |
| model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-model", use_auth_token=HF_TOKEN) |
|
|
| |
| glossary = load_glossary('glossary.txt') |
|
|
| |
| def translate_with_glossary(text, target_lang='id'): |
| try: |
| |
| translated = GoogleTranslator(source='auto', target=target_lang).translate(text) |
|
|
| |
| for en_term, id_term in glossary.items(): |
| translated = translated.replace(en_term.lower(), id_term.lower()) |
|
|
| return translated.capitalize() |
|
|
| except Exception as e: |
| print(f"Error during translation: {e}") |
| return text.capitalize() |
|
|
| |
| def predict(desc): |
| if not desc: |
| return "Not Safe" |
| inputs = tokenizer(desc, return_tensors="pt", truncation=True, padding=True, max_length=512) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| logits = outputs.logits |
| pred = torch.argmax(logits, dim=1).item() |
| return "Safe" if pred == 1 else "Not Safe" |
|
|
| |
| def predict_with_description(ingredient, df, target_lang='id'): |
| df_match = df.copy() |
| df_match['INCI name_lower'] = df_match['INCI name'].str.lower() |
| df_match['IUPAC Name_lower'] = df_match['IUPAC Name'].str.lower() |
|
|
| ingredient_lower = ingredient.lower() |
| match_row = df_match[ |
| (df_match['INCI name_lower'] == ingredient_lower) | |
| (df_match['IUPAC Name_lower'] == ingredient_lower) |
| ] |
|
|
| if not match_row.empty: |
| row = match_row.iloc[0] |
| inci_name = row['INCI name'].capitalize() |
| desc = row.get('Description', 'Description not available') |
| func = row.get('Function', 'Function not available').capitalize() |
| restriction = row.get('Restriction', 'None') |
| risk_lvl = row.get('Risk Level', 'Unknown') |
| risk_desc = row.get('Risk Description', 'Risk info not available') |
| else: |
| inci_name = ingredient.title() |
| desc = "Description not found" |
| func = "Function not found" |
| restriction = "Restriction not found" |
| risk_lvl = "Unknown" |
| risk_desc = "Risk info not available" |
|
|
| result = predict(desc) |
|
|
| |
| translated_desc = translate_with_glossary(desc, target_lang) |
| translated_risk_desc = translate_with_glossary(risk_desc, target_lang) |
| translated_function = translate_with_glossary(func, target_lang) |
|
|
| return OrderedDict([ |
| ("Ingredient Name", inci_name), |
| ("Description", translated_desc), |
| ("Function", translated_function), |
| ("Restriction", restriction), |
| ("Risk Level", risk_lvl), |
| ("Risk Description", translated_risk_desc), |
| ("Prediction", result), |
| ]) |
|
|