Spaces:

georgeek
/

HF-LLM-Intent-Detection

Sleeping

App Files Files Community

HF-LLM-Intent-Detection / src /OLD_PAGE_Hugging Face.py

georgeek

Transfer

5ecde30 11 months ago

raw

history blame contribute delete

4.61 kB

	import streamlit as st
	'---'
	#st.title('Watson Assistant VDF TOBi improvement')
	st.markdown(
	"""
	<style>
	.stTextInput > div > div > input {
	background-color: #d3d3d3;
	}
	body {
	background-color: #f0f0f0;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	st.header('Watson Assistant VDF TOBi improvement')
	'---'
	st.write('The model is trained on the TOBi 🤖 intents in Romanian language.')
	#st.write('🤖')
	#:robot_face:

	import os
	import pandas as pd
	import re
	from time import time
	from src.E_Model_utils import load_model, train_model, get_embeddings
	from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings
	from src.A_Preprocess import load_data, clean_text
	import warnings
	warnings.filterwarnings("ignore", category=FutureWarning)



	model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"])
	# Load the saved embeddings
	#model_name = "xlm-roberta-base" # Choose the desired model
	#model_name = "xlm-r-distilroberta-base-paraphrase-v1"

	# Model path



	# Load the trained model
	if model_name != "other":
	# future improvement: add a loading spinner
	model_path = f"output/fine-tuned-model"
	st.write("Model path:", model_path)
	#model = load_model(model_path)

	if model_name == "multilingual-e5-small":
	infloat_model_name = "intfloat/multilingual-e5-small"
	model = load_model(infloat_model_name)
	elif model_name == "e5_small_fine_tuned_model":
	infloat_model_name = "intfloat/multilingual-e5-small"
	model = load_model(infloat_model_name)
	pass#model = load_model(model_path)
	else:
	model = load_model(model_name)

	st.write(f"Modelul selectat: {model_name}")

	st.write("Model loaded successfully!")

	# Load the embeddings and the index

	#embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")

	st.stop()
	# Upload the intents data
	uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv")

	if uploaded_file is not None:
	data = pd.read_csv(uploaded_file)
	st.write("CSV file successfully uploaded!")

	# Save data to session state
	st.session_state.data = data
	else:
	# If no file is uploaded, try to load data from session state
	data = st.session_state.data if 'data' in st.session_state else None

	if data is not None:
	# Extract utterances and intents
	utterances = data['utterance'].tolist()
	intents = data['intent'].tolist()

	user_text = st.text_input("Te rog introdu un text.")

	if user_text:
	if st.button("Identifică Intenția"):

	start = time()
	st.write("Procesare text...")
	st.write(start)
	cleaned_text = clean_text(user_text)
	input_embedding = get_embeddings(model, [cleaned_text])
	normalized_embedding = normalize_embeddings(input_embedding)

	embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")
	D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin

	print(I)
	intents = data['intent'].tolist()
	intent = intents[I[0][0]]
	distance = D[0][0]

	similarity = 1 / (1 + distance)

	st.write(f"Intenția identificată: {intent}")
	st.write(f"Nivel de încredere: {similarity:.4f}")
	st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
	else:

	st.write("Te rog introdu un text.")


	st.stop()
	# Endpoint pentru identificarea intenției
	input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible")

	if input_text:
	start = time()
	input_embeddings = model.encode([input_text])


	if st.button("Identifică Intenția"):
	if input_text:

	cleaned_text = clean_text(input_text)
	input_embedding = get_embeddings(model, [cleaned_text])
	normalized_embedding = normalize_embeddings(input_embedding)

	D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin

	#print(I)
	intents = data['intent'].tolist()
	intent = intentions[I[0][0]]
	distance = D[0][0]

	similarity = 1 / (1 + distance)

	st.write(f"Intenția identificată: {intent}")
	st.write(f"Nivel de încredere: {similarity:.4f}")
	st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
	else:
	st.write("Te rog introdu un text.")





	st.stop()