Spaces:
Running
Running
| import pandas as pd | |
| import streamlit as st | |
| import datetime | |
| import socket | |
| from src.functions_llm import load_model, compare_label_results | |
| from src.functions_db import connect_to_db, Activity, LLM | |
| list_of_models = [ | |
| "1mpreccable/10k_trained_bert", | |
| "naloui/results", | |
| "another model from hub" | |
| ] | |
| session, user, activity, llm, _ = connect_to_db(address="sqlite:///src/databases/main.db") # connect to the database | |
| ## -------------- | |
| ## SIDEBAR PARAMS | |
| ## -------------- | |
| st.sidebar.title("App parameters") | |
| model_url = st.sidebar.selectbox("Choose your model", list_of_models) | |
| st.sidebar.divider() | |
| imported_df = st.sidebar.file_uploader("Upload your df in format csv or xlsx", type=["csv", 'xlsx']) | |
| ## -------------- | |
| ## MAINBAR PARAMS | |
| ## -------------- | |
| tab1, tab2 = st.tabs(["LLM", "DB_Extraction"]) | |
| tab1.title("LLM project") | |
| # Display model description | |
| if model_url: | |
| tab1.write(f"Selected model: **{model_url}**") | |
| # Sentence for analysis | |
| input_sentence = tab1.text_area("Enter a sentence for sentiment analysis") | |
| button_launch = tab1.button('Launch Prediction') | |
| #logic for single sentence inputed by user manually | |
| if model_url and input_sentence: | |
| if model_url == "another model from hub": | |
| model_url = tab1.text_input("Please provide the model URL here:") | |
| if button_launch: | |
| # Add activity to the database | |
| activity.add_activity( | |
| session, | |
| "admin", | |
| datetime.date.today(), | |
| datetime.datetime.now(), | |
| socket.gethostbyname(socket.gethostname()), | |
| "LLM project" | |
| ) | |
| sentiment_analyzer = load_model(model_url) | |
| if sentiment_analyzer: | |
| # Get sentiment prediction | |
| result = sentiment_analyzer(input_sentence) | |
| # Display result | |
| if result: | |
| sentiment = result[0]['label'] | |
| score = result[0]['score'] | |
| tab1.write(f"Sentiment: **{sentiment}** with a confidence score of **{score:.2f}**") | |
| if sentiment == 'LABEL_1' or sentiment == 'POSITIVE': | |
| tab1.write("Positive") | |
| elif sentiment == 'LABEL_0' or sentiment == 'NEGATIVE': | |
| tab1.write("Negative") | |
| # Option to download results | |
| results_df = pd.DataFrame([{'Sentence': input_sentence, 'Sentiment': sentiment, 'Score': score}]) | |
| csv = results_df.to_csv(index=False).encode('utf-8') | |
| tab1.download_button( | |
| label="Download results as CSV", | |
| data=csv, | |
| file_name='sentiment_analysis_results.csv', | |
| mime='text/csv', | |
| ) | |
| # Add LLM history to the database | |
| llm.add_llm( | |
| session, | |
| "admin", | |
| input_sentence, | |
| sentiment, | |
| model_url, | |
| "LLM project" | |
| ) | |
| #logic for file uploaded by user | |
| if model_url and imported_df: | |
| if imported_df.name.endswith('csv'): | |
| df = pd.read_csv(imported_df, encoding='unicode_escape') | |
| # print(df.head()) | |
| df = df[:500] # Limit to 500 rows while testing | |
| name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns) | |
| column_with_real_results = tab1.selectbox("Select the column with real results", df.columns) | |
| sentiment_analyzer = load_model(model_url) | |
| if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results: | |
| text_data = df[name_of_the_column].tolist() | |
| # Initialize progress bar | |
| progress_bar = tab1.progress(0) | |
| results = [] | |
| for i, text in enumerate(text_data): | |
| result = sentiment_analyzer(text) | |
| results.append(result[0]) | |
| # Update progress bar | |
| progress_bar.progress((i + 1) / len(text_data)) | |
| df['Sentiment'] = [res['label'] for res in results] | |
| df['Score'] = [res['score'] for res in results] | |
| df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative') | |
| tab1.write(df) | |
| # here add logic for calculating % of positive and negative reviews and compare with the real results | |
| result = compare_label_results(df, 'Sentiment_Label', column_with_real_results) | |
| tab1.write(f"Accuracy: {result:.2%}") | |
| # Option to download results | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| tab1.download_button( | |
| label="Download results as CSV", | |
| data=csv, | |
| file_name='sentiment_analysis_results.csv', | |
| mime='text/csv', | |
| ) | |
| #need to add % of positive and negative reviews and compare with the real results | |
| elif imported_df.name.endswith('xlsx'): | |
| df = pd.read_excel(imported_df) | |
| # print(df.head()) | |
| df = df[:500] # Limit to 500 rows while testing | |
| name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns) | |
| column_with_real_results = tab1.selectbox("Select the column with real results", df.columns) | |
| sentiment_analyzer = load_model(model_url) | |
| if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results: | |
| text_data = df[name_of_the_column].tolist() | |
| # Initialize progress bar | |
| progress_bar = tab1.progress(0) | |
| results = [] | |
| for i, text in enumerate(text_data): | |
| result = sentiment_analyzer(text) | |
| results.append(result[0]) | |
| # Update progress bar | |
| progress_bar.progress((i + 1) / len(text_data)) | |
| df['Sentiment'] = [res['label'] for res in results] | |
| df['Score'] = [res['score'] for res in results] | |
| df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative') | |
| # here add logic for calculating % of positive and negative reviews and compare with the real results | |
| result = compare_label_results(df, 'Sentiment_Label', column_with_real_results) | |
| tab1.write(f"Accuracy: {result:.2%}") | |
| tab1.write(df) | |
| # Option to download results | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| tab1.download_button( | |
| label="Download results as CSV", | |
| data=csv, | |
| file_name='sentiment_analysis_results.csv', | |
| mime='text/csv', | |
| ) | |
| else: | |
| tab1.write("Please upload a file in csv or xlsx format") | |
| ################################################################################ | |
| # TAB 2 DB | |
| ################################################################################ | |
| llm_histories = session.query(LLM).all() | |
| llm_histories_df = pd.DataFrame([{ | |
| 'id': llm_history.id, | |
| 'input_sentence': llm_history.input_sentence, | |
| 'output_label': llm_history.output_label, | |
| 'model_url': llm_history.model_url, | |
| 'type_of_activity': llm_history.type_of_activity, | |
| 'user_id': llm_history.user_id, | |
| } for llm_history in llm_histories]) | |
| tab2.write("LLM history:") | |
| tab2.data_editor(llm_histories_df) |