Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| '---' | |
| #st.title('Watson Assistant VDF TOBi improvement') | |
| st.markdown( | |
| """ | |
| <style> | |
| .stTextInput > div > div > input { | |
| background-color: #d3d3d3; | |
| } | |
| body { | |
| background-color: #f0f0f0; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.header('Watson Assistant VDF TOBi improvement') | |
| '---' | |
| st.write('The model is trained on the TOBi 🤖 intents in Romanian language.') | |
| #st.write('🤖') | |
| #:robot_face: | |
| import os | |
| import pandas as pd | |
| import re | |
| from time import time | |
| from src.E_Model_utils import load_model, train_model, get_embeddings | |
| from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings | |
| from src.A_Preprocess import load_data, clean_text | |
| import warnings | |
| warnings.filterwarnings("ignore", category=FutureWarning) | |
| model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"]) | |
| # Load the saved embeddings | |
| #model_name = "xlm-roberta-base" # Choose the desired model | |
| #model_name = "xlm-r-distilroberta-base-paraphrase-v1" | |
| # Model path | |
| # Load the trained model | |
| if model_name != "other": | |
| # future improvement: add a loading spinner | |
| model_path = f"output/fine-tuned-model" | |
| st.write("Model path:", model_path) | |
| #model = load_model(model_path) | |
| if model_name == "multilingual-e5-small": | |
| infloat_model_name = "intfloat/multilingual-e5-small" | |
| model = load_model(infloat_model_name) | |
| elif model_name == "e5_small_fine_tuned_model": | |
| infloat_model_name = "intfloat/multilingual-e5-small" | |
| model = load_model(infloat_model_name) | |
| pass#model = load_model(model_path) | |
| else: | |
| model = load_model(model_name) | |
| st.write(f"Modelul selectat: {model_name}") | |
| st.write("Model loaded successfully!") | |
| # Load the embeddings and the index | |
| #embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") | |
| st.stop() | |
| # Upload the intents data | |
| uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv") | |
| if uploaded_file is not None: | |
| data = pd.read_csv(uploaded_file) | |
| st.write("CSV file successfully uploaded!") | |
| # Save data to session state | |
| st.session_state.data = data | |
| else: | |
| # If no file is uploaded, try to load data from session state | |
| data = st.session_state.data if 'data' in st.session_state else None | |
| if data is not None: | |
| # Extract utterances and intents | |
| utterances = data['utterance'].tolist() | |
| intents = data['intent'].tolist() | |
| user_text = st.text_input("Te rog introdu un text.") | |
| if user_text: | |
| if st.button("Identifică Intenția"): | |
| start = time() | |
| st.write("Procesare text...") | |
| st.write(start) | |
| cleaned_text = clean_text(user_text) | |
| input_embedding = get_embeddings(model, [cleaned_text]) | |
| normalized_embedding = normalize_embeddings(input_embedding) | |
| embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") | |
| D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin | |
| print(I) | |
| intents = data['intent'].tolist() | |
| intent = intents[I[0][0]] | |
| distance = D[0][0] | |
| similarity = 1 / (1 + distance) | |
| st.write(f"Intenția identificată: {intent}") | |
| st.write(f"Nivel de încredere: {similarity:.4f}") | |
| st.write(f"Timp de răspuns: {time() - start:.4f} secunde") | |
| else: | |
| st.write("Te rog introdu un text.") | |
| st.stop() | |
| # Endpoint pentru identificarea intenției | |
| input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible") | |
| if input_text: | |
| start = time() | |
| input_embeddings = model.encode([input_text]) | |
| if st.button("Identifică Intenția"): | |
| if input_text: | |
| cleaned_text = clean_text(input_text) | |
| input_embedding = get_embeddings(model, [cleaned_text]) | |
| normalized_embedding = normalize_embeddings(input_embedding) | |
| D, I = index.search(normalized_embedding, 1) # Caută cel mai apropiat vecin | |
| #print(I) | |
| intents = data['intent'].tolist() | |
| intent = intentions[I[0][0]] | |
| distance = D[0][0] | |
| similarity = 1 / (1 + distance) | |
| st.write(f"Intenția identificată: {intent}") | |
| st.write(f"Nivel de încredere: {similarity:.4f}") | |
| st.write(f"Timp de răspuns: {time() - start:.4f} secunde") | |
| else: | |
| st.write("Te rog introdu un text.") | |
| st.stop() | |