Spaces:

Maulidaaa
/

skincare

Sleeping

App Files Files Community

skincare / app /utils /prediction.py

Maulidaaa

Update app/utils/prediction.py

d9e4ce9 verified about 1 year ago

raw

history blame contribute delete

3.83 kB

	import torch
	from collections import OrderedDict
	from transformers import BertTokenizer, BertForSequenceClassification
	import os
	from deep_translator import GoogleTranslator # Import GoogleTranslator for translation

	# Load glossary
	def load_glossary(file_path):
	glossary = {}
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	for line in file:
	if line.strip():
	# Membaca file dan memisahkan key dan value berdasarkan tanda "="
	key, value = line.strip().split('=')
	glossary[key.strip().lower()] = value.strip()
	except Exception as e:
	print(f"Error loading glossary: {e}")
	return glossary

	# Load the pre-trained model and tokenizer
	HF_TOKEN = os.getenv("HF_TOKEN")
	tokenizer = BertTokenizer.from_pretrained("Maulidaaa/bert-safe-model", use_auth_token=HF_TOKEN)
	model = BertForSequenceClassification.from_pretrained("Maulidaaa/bert-safe-model", use_auth_token=HF_TOKEN)

	# Load glossary from file
	glossary = load_glossary('glossary.txt')

	# Translate function using Google Translator and glossary
	def translate_with_glossary(text, target_lang='id'):
	try:
	# Translate the text first
	translated = GoogleTranslator(source='auto', target=target_lang).translate(text)

	# Replace terms based on glossary
	for en_term, id_term in glossary.items():
	translated = translated.replace(en_term.lower(), id_term.lower())

	return translated.capitalize()

	except Exception as e:
	print(f"Error during translation: {e}")
	return text.capitalize()

	# Prediction function
	def predict(desc):
	if not desc:
	return "Not Safe"
	inputs = tokenizer(desc, return_tensors="pt", truncation=True, padding=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	logits = outputs.logits
	pred = torch.argmax(logits, dim=1).item()
	return "Safe" if pred == 1 else "Not Safe"

	# Function to predict with ingredient description, translate terms, and return results
	def predict_with_description(ingredient, df, target_lang='id'):
	df_match = df.copy()
	df_match['INCI name_lower'] = df_match['INCI name'].str.lower()
	df_match['IUPAC Name_lower'] = df_match['IUPAC Name'].str.lower()

	ingredient_lower = ingredient.lower()
	match_row = df_match[
	(df_match['INCI name_lower'] == ingredient_lower) \|
	(df_match['IUPAC Name_lower'] == ingredient_lower)
	]

	if not match_row.empty:
	row = match_row.iloc[0]
	inci_name = row['INCI name'].capitalize()
	desc = row.get('Description', 'Description not available')
	func = row.get('Function', 'Function not available').capitalize()
	restriction = row.get('Restriction', 'None')
	risk_lvl = row.get('Risk Level', 'Unknown')
	risk_desc = row.get('Risk Description', 'Risk info not available')
	else:
	inci_name = ingredient.title()
	desc = "Description not found"
	func = "Function not found"
	restriction = "Restriction not found"
	risk_lvl = "Unknown"
	risk_desc = "Risk info not available"

	result = predict(desc)

	# Translate only Description, Function, and Risk Description using glossary and Google Translator
	translated_desc = translate_with_glossary(desc, target_lang)
	translated_risk_desc = translate_with_glossary(risk_desc, target_lang)
	translated_function = translate_with_glossary(func, target_lang)

	return OrderedDict([
	("Ingredient Name", inci_name),
	("Description", translated_desc),
	("Function", translated_function),
	("Restriction", restriction),
	("Risk Level", risk_lvl),
	("Risk Description", translated_risk_desc),
	("Prediction", result),
	])