Update app/utils/prediction.py
Browse files- app/utils/prediction.py +19 -7
app/utils/prediction.py
CHANGED
|
@@ -6,9 +6,6 @@ from deep_translator import GoogleTranslator # Import GoogleTranslator for tran
|
|
| 6 |
|
| 7 |
# Load glossary
|
| 8 |
def load_glossary(file_path):
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
glossary = {}
|
| 13 |
try:
|
| 14 |
with open(file_path, 'r', encoding='utf-8') as file:
|
|
@@ -30,6 +27,18 @@ model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-model
|
|
| 30 |
glossary = load_glossary('glossary.txt')
|
| 31 |
|
| 32 |
# Translate function using Google Translator and glossary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
print(f"Error during translation: {e}")
|
| 34 |
return text.capitalize()
|
| 35 |
|
|
@@ -51,7 +60,10 @@ def predict_with_description(ingredient, df, target_lang='id'):
|
|
| 51 |
df_match['IUPAC Name_lower'] = df_match['IUPAC Name'].str.lower()
|
| 52 |
|
| 53 |
ingredient_lower = ingredient.lower()
|
| 54 |
-
match_row = df_match[
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
if not match_row.empty:
|
| 57 |
row = match_row.iloc[0]
|
|
@@ -70,8 +82,8 @@ def predict_with_description(ingredient, df, target_lang='id'):
|
|
| 70 |
risk_desc = "Risk info not available"
|
| 71 |
|
| 72 |
result = predict(desc)
|
| 73 |
-
|
| 74 |
-
# Translate only Description and Risk Description using glossary and Google Translator
|
| 75 |
translated_desc = translate_with_glossary(desc, target_lang)
|
| 76 |
translated_risk_desc = translate_with_glossary(risk_desc, target_lang)
|
| 77 |
translated_function = translate_with_glossary(func, target_lang)
|
|
@@ -84,4 +96,4 @@ def predict_with_description(ingredient, df, target_lang='id'):
|
|
| 84 |
("Risk Level", risk_lvl),
|
| 85 |
("Risk Description", translated_risk_desc),
|
| 86 |
("Prediction", result),
|
| 87 |
-
])
|
|
|
|
| 6 |
|
| 7 |
# Load glossary
|
| 8 |
def load_glossary(file_path):
|
|
|
|
|
|
|
|
|
|
| 9 |
glossary = {}
|
| 10 |
try:
|
| 11 |
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
| 27 |
glossary = load_glossary('glossary.txt')
|
| 28 |
|
| 29 |
# Translate function using Google Translator and glossary
|
| 30 |
+
def translate_with_glossary(text, target_lang='id'):
|
| 31 |
+
try:
|
| 32 |
+
# Translate the text first
|
| 33 |
+
translated = GoogleTranslator(source='auto', target=target_lang).translate(text)
|
| 34 |
+
|
| 35 |
+
# Replace terms based on glossary
|
| 36 |
+
for en_term, id_term in glossary.items():
|
| 37 |
+
translated = translated.replace(en_term.lower(), id_term.lower())
|
| 38 |
+
|
| 39 |
+
return translated.capitalize()
|
| 40 |
+
|
| 41 |
+
except Exception as e:
|
| 42 |
print(f"Error during translation: {e}")
|
| 43 |
return text.capitalize()
|
| 44 |
|
|
|
|
| 60 |
df_match['IUPAC Name_lower'] = df_match['IUPAC Name'].str.lower()
|
| 61 |
|
| 62 |
ingredient_lower = ingredient.lower()
|
| 63 |
+
match_row = df_match[
|
| 64 |
+
(df_match['INCI name_lower'] == ingredient_lower) |
|
| 65 |
+
(df_match['IUPAC Name_lower'] == ingredient_lower)
|
| 66 |
+
]
|
| 67 |
|
| 68 |
if not match_row.empty:
|
| 69 |
row = match_row.iloc[0]
|
|
|
|
| 82 |
risk_desc = "Risk info not available"
|
| 83 |
|
| 84 |
result = predict(desc)
|
| 85 |
+
|
| 86 |
+
# Translate only Description, Function, and Risk Description using glossary and Google Translator
|
| 87 |
translated_desc = translate_with_glossary(desc, target_lang)
|
| 88 |
translated_risk_desc = translate_with_glossary(risk_desc, target_lang)
|
| 89 |
translated_function = translate_with_glossary(func, target_lang)
|
|
|
|
| 96 |
("Risk Level", risk_lvl),
|
| 97 |
("Risk Description", translated_risk_desc),
|
| 98 |
("Prediction", result),
|
| 99 |
+
])
|