Spaces:

EN-collab
/

HQ_Project_EN

Running

App Files Files Community

HQ_Project_EN / pages /Project_1.1_-_LLM.py

1mpreccable

Initial commit with cleared history

cff87c7 9 months ago

raw

history blame contribute delete

7.55 kB

	import pandas as pd
	import streamlit as st
	import datetime
	import socket

	from src.functions_llm import load_model, compare_label_results
	from src.functions_db import connect_to_db, Activity, LLM


	list_of_models = [
	"1mpreccable/10k_trained_bert",
	"naloui/results",
	"another model from hub"
	]

	session, user, activity, llm, _ = connect_to_db(address="sqlite:///src/databases/main.db") # connect to the database


	## --------------
	## SIDEBAR PARAMS
	## --------------

	st.sidebar.title("App parameters")

	model_url = st.sidebar.selectbox("Choose your model", list_of_models)
	st.sidebar.divider()
	imported_df = st.sidebar.file_uploader("Upload your df in format csv or xlsx", type=["csv", 'xlsx'])

	## --------------
	## MAINBAR PARAMS
	## --------------
	tab1, tab2 = st.tabs(["LLM", "DB_Extraction"])

	tab1.title("LLM project")

	# Display model description
	if model_url:
	tab1.write(f"Selected model: {model_url}")

	# Sentence for analysis
	input_sentence = tab1.text_area("Enter a sentence for sentiment analysis")
	button_launch = tab1.button('Launch Prediction')

	#logic for single sentence inputed by user manually
	if model_url and input_sentence:
	if model_url == "another model from hub":
	model_url = tab1.text_input("Please provide the model URL here:")
	if button_launch:

	# Add activity to the database
	activity.add_activity(
	session,
	"admin",
	datetime.date.today(),
	datetime.datetime.now(),
	socket.gethostbyname(socket.gethostname()),
	"LLM project"
	)

	sentiment_analyzer = load_model(model_url)

	if sentiment_analyzer:
	# Get sentiment prediction
	result = sentiment_analyzer(input_sentence)

	# Display result
	if result:
	sentiment = result[0]['label']
	score = result[0]['score']
	tab1.write(f"Sentiment: {sentiment} with a confidence score of {score:.2f}")

	if sentiment == 'LABEL_1' or sentiment == 'POSITIVE':
	tab1.write("Positive")
	elif sentiment == 'LABEL_0' or sentiment == 'NEGATIVE':
	tab1.write("Negative")

	# Option to download results
	results_df = pd.DataFrame([{'Sentence': input_sentence, 'Sentiment': sentiment, 'Score': score}])
	csv = results_df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)

	# Add LLM history to the database
	llm.add_llm(
	session,
	"admin",
	input_sentence,
	sentiment,
	model_url,
	"LLM project"
	)

	#logic for file uploaded by user
	if model_url and imported_df:
	if imported_df.name.endswith('csv'):
	df = pd.read_csv(imported_df, encoding='unicode_escape')
	# print(df.head())
	df = df[:500] # Limit to 500 rows while testing

	name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
	column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)

	sentiment_analyzer = load_model(model_url)
	if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
	text_data = df[name_of_the_column].tolist()

	# Initialize progress bar
	progress_bar = tab1.progress(0)
	results = []
	for i, text in enumerate(text_data):
	result = sentiment_analyzer(text)
	results.append(result[0])
	# Update progress bar
	progress_bar.progress((i + 1) / len(text_data))


	df['Sentiment'] = [res['label'] for res in results]
	df['Score'] = [res['score'] for res in results]

	df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')

	tab1.write(df)

	# here add logic for calculating % of positive and negative reviews and compare with the real results
	result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
	tab1.write(f"Accuracy: {result:.2%}")

	# Option to download results
	csv = df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)
	#need to add % of positive and negative reviews and compare with the real results
	elif imported_df.name.endswith('xlsx'):
	df = pd.read_excel(imported_df)
	# print(df.head())
	df = df[:500] # Limit to 500 rows while testing

	name_of_the_column = tab1.selectbox("Select the column for sentiment analysis", df.columns)
	column_with_real_results = tab1.selectbox("Select the column with real results", df.columns)

	sentiment_analyzer = load_model(model_url)
	if sentiment_analyzer and name_of_the_column and button_launch and column_with_real_results:
	text_data = df[name_of_the_column].tolist()
	# Initialize progress bar
	progress_bar = tab1.progress(0)
	results = []
	for i, text in enumerate(text_data):
	result = sentiment_analyzer(text)
	results.append(result[0])
	# Update progress bar
	progress_bar.progress((i + 1) / len(text_data))

	df['Sentiment'] = [res['label'] for res in results]
	df['Score'] = [res['score'] for res in results]

	df['Sentiment_Label'] = df['Sentiment'].apply(lambda x: 'Positive' if x in ['LABEL_1', 'POSITIVE'] else 'Negative')

	# here add logic for calculating % of positive and negative reviews and compare with the real results
	result = compare_label_results(df, 'Sentiment_Label', column_with_real_results)
	tab1.write(f"Accuracy: {result:.2%}")

	tab1.write(df)


	# Option to download results
	csv = df.to_csv(index=False).encode('utf-8')
	tab1.download_button(
	label="Download results as CSV",
	data=csv,
	file_name='sentiment_analysis_results.csv',
	mime='text/csv',
	)
	else:
	tab1.write("Please upload a file in csv or xlsx format")


	################################################################################
	# TAB 2 DB
	################################################################################

	llm_histories = session.query(LLM).all()

	llm_histories_df = pd.DataFrame([{
	'id': llm_history.id,
	'input_sentence': llm_history.input_sentence,
	'output_label': llm_history.output_label,
	'model_url': llm_history.model_url,
	'type_of_activity': llm_history.type_of_activity,
	'user_id': llm_history.user_id,
	} for llm_history in llm_histories])

	tab2.write("LLM history:")
	tab2.data_editor(llm_histories_df)