Spaces:

ombhojane
/

restart

Build error

App Files Files Community

restart / app.py

ombhojane

Update app.py

2b3b522 verified about 2 years ago

raw

history blame contribute delete

7.77 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline, AutoTokenizer, AutoModel
	import torch
	from scipy.spatial.distance import cosine
	import re
	from collections import Counter
	import nltk
	from nltk.corpus import stopwords
	import matplotlib.pyplot as plt

	nltk.download('stopwords')



	@st.cache_data
	def load_models():
	tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
	model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
	sentiment_analysis = pipeline("sentiment-analysis")
	zero_shot_classifier = pipeline("zero-shot-classification")
	return tokenizer, model, sentiment_analysis, zero_shot_classifier

	tokenizer, model, sentiment_analysis, zero_shot_classifier = load_models()

	stop_words = set(stopwords.words('english'))

	# Define necessary functions for encoding, similarity calculation, and parameter analysis here...
	def encode(text):
	inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	# Ensure the output is a 1-D tensor by using .squeeze()
	return outputs.last_hidden_state.mean(dim=1).squeeze()

	def calculate_similarity(embedding1, embedding2):
	# Ensure embeddings are numpy arrays in 1-D format
	embedding1_np = embedding1.numpy()
	embedding2_np = embedding2.numpy()
	# Check if embeddings are already 1-D, if not, flatten them
	if embedding1_np.ndim > 1:
	embedding1_np = embedding1_np.flatten()
	if embedding2_np.ndim > 1:
	embedding2_np = embedding2_np.flatten()
	return 1 - cosine(embedding1_np, embedding2_np)


	# Analysis Functions for Each Parameter
	def analyze_positivity(text):
	result = sentiment_analysis(text)
	score = result[0]['score']
	return 50 + (score * 50) if result[0]['label'] == 'POSITIVE' else 50 - (score * 50)

	def analyze_decisiveness(text):
	candidate_labels = ['confident', 'uncertain']
	result = zero_shot_classifier(text, candidate_labels=candidate_labels)
	score = max(result['scores'])
	return score * 100

	def analyze_specificity(text):
	words = [word for word in re.findall(r'\b\w+\b', text.lower()) if word not in stop_words and word.isalpha()]
	unique_words = set(words)
	technical_terms = {'algorithm', 'data', 'analysis', 'design', 'programming'} # Example set
	technical_term_count = sum(1 for word in unique_words if word in technical_terms)
	specificity_score = (len(unique_words) / len(words) * 50) + (technical_term_count * 50 / len(technical_terms))
	return min(100, specificity_score)


	def analyze_self_awareness(text):
	reflection_keywords = ['strengths', 'weaknesses', 'learned', 'improved', 'challenge', 'goal', 'feedback']
	reflection_count = sum(text.lower().count(keyword) for keyword in reflection_keywords)
	total_words = len(re.findall(r'\b\w+\b', text.lower()))
	reflection_density = (reflection_count / total_words) * 100
	positive_score = analyze_sentiment_for_self_awareness(text)
	self_awareness_score = (reflection_density * 0.7) + (positive_score * 0.3)
	return min(100, self_awareness_score)

	def analyze_sentiment_for_self_awareness(text):
	result = sentiment_analysis(text)
	positive_score = result[0]['score'] if result[0]['label'] == 'POSITIVE' else 0
	return positive_score * 100


	def analyze_career_knowledge(text):
	career_topics = ["data science", "software engineering", "product management"]
	result = zero_shot_classifier(text, candidate_labels=career_topics)
	highest_score = max(result['scores'])
	return highest_score * 100

	def analyze_decision_anxiety(text):
	anxiety_keywords = ['uncertain', 'doubt', 'unsure', 'anxious', 'overwhelmed']
	anxiety_keyword_count = sum(text.lower().count(keyword) for keyword in anxiety_keywords)
	sentiment_result = sentiment_analysis(text)
	negative_score = sentiment_result[0]['score'] if sentiment_result[0]['label'] == 'NEGATIVE' else 0
	anxiety_score = negative_score + (anxiety_keyword_count * 0.1)
	return min(100, anxiety_score * 100)

	def analyze_values_alignment(text):
	# Placeholder: Replace with your values and method for calculating semantic similarity
	values = "innovation, teamwork, impact"
	embedding_text = encode(text)
	embedding_values = encode(values)
	similarity_score = calculate_similarity(embedding_text, embedding_values)
	return similarity_score * 100

	def analyze_interests_alignment(text):
	# Placeholder: Replace with your interests and method for calculating semantic similarity
	interests = "technology, programming, design"
	embedding_text = encode(text)
	embedding_interests = encode(interests)
	similarity_score = calculate_similarity(embedding_text, embedding_interests)
	return similarity_score * 100

	def analyze_qa_pair(question, answer):
	combined_text = f"{question} {answer}"

	# Calculate scores for each parameter using the combined text
	positivity_score = analyze_positivity(combined_text)
	decisiveness_score = analyze_decisiveness(combined_text)
	specificity_score = analyze_specificity(combined_text)
	self_awareness_score = analyze_self_awareness(combined_text)
	career_knowledge_score = analyze_career_knowledge(combined_text)
	decision_anxiety_score = analyze_decision_anxiety(combined_text)
	values_alignment_score = analyze_values_alignment(combined_text)
	interests_alignment_score = analyze_interests_alignment(combined_text)

	return {
	"Positivity": positivity_score,
	"Decisiveness": decisiveness_score,
	"Specificity": specificity_score,
	"Self-awareness": self_awareness_score,
	"Career Knowledge": career_knowledge_score,
	"Decision Anxiety": decision_anxiety_score,
	"Values Alignment": values_alignment_score,
	"Interests Alignment": interests_alignment_score,
	}


	# Load Q&A data from Excel
	def plot_average_scores(avg_scores):
	"""
	Plots the average scores across all Q&A pairs with parameters on the x-axis
	and values on the y-axis ranging from 0-100.
	"""
	parameters = list(avg_scores.keys())
	scores = list(avg_scores.values())

	fig, ax = plt.subplots(figsize=(10, 8))
	ax.bar(parameters, scores, color='skyblue')

	# Set limits for y-axis from 0 to 100
	ax.set_ylim(0, 100)

	# Rotate parameter labels to fit and make readable
	plt.xticks(rotation=45, ha="right")

	# Setting labels and title
	ax.set_ylabel('Average Score')
	ax.set_title('Average Scores Across All Q&A Pairs')

	plt.tight_layout()
	return fig




	# Streamlit app main function
	def main():
	st.title("Q&A Analysis App")
	uploaded_file = st.file_uploader("Choose an Excel file with Q&A", type=["xlsx"])

	if uploaded_file is not None:
	df = pd.read_excel(uploaded_file)
	avg_scores = process_and_analyze_qa(df)

	# Display parameter values in a table
	st.subheader("Average Scores Across All Q&A Pairs:")
	st.table(pd.DataFrame(avg_scores.items(), columns=['Parameter', 'Value']))

	# Generate and display the custom graph
	fig = plot_average_scores(avg_scores)
	st.pyplot(fig)




	def process_and_analyze_qa(df):
	scores_dict = {key: [] for key in analyze_qa_pair("Q", "A").keys()}
	for question in df.columns:
	for answer in df[question].dropna():
	scores = analyze_qa_pair(question, answer)
	for key, value in scores.items():
	scores_dict[key].append(value)

	avg_scores = {key: sum(values) / len(values) for key, values in scores_dict.items() if values}

	return avg_scores




	if __name__ == "__main__":
	main()