| import streamlit as st |
| import pandas as pd |
| from transformers import pipeline, AutoTokenizer, AutoModel |
| import torch |
| from scipy.spatial.distance import cosine |
| import re |
| from collections import Counter |
| import nltk |
| from nltk.corpus import stopwords |
| import matplotlib.pyplot as plt |
|
|
| nltk.download('stopwords') |
|
|
|
|
|
|
| @st.cache_data |
| def load_models(): |
| tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") |
| model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") |
| sentiment_analysis = pipeline("sentiment-analysis") |
| zero_shot_classifier = pipeline("zero-shot-classification") |
| return tokenizer, model, sentiment_analysis, zero_shot_classifier |
|
|
| tokenizer, model, sentiment_analysis, zero_shot_classifier = load_models() |
|
|
| stop_words = set(stopwords.words('english')) |
|
|
| |
| def encode(text): |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| |
| return outputs.last_hidden_state.mean(dim=1).squeeze() |
|
|
| def calculate_similarity(embedding1, embedding2): |
| |
| embedding1_np = embedding1.numpy() |
| embedding2_np = embedding2.numpy() |
| |
| if embedding1_np.ndim > 1: |
| embedding1_np = embedding1_np.flatten() |
| if embedding2_np.ndim > 1: |
| embedding2_np = embedding2_np.flatten() |
| return 1 - cosine(embedding1_np, embedding2_np) |
|
|
|
|
| |
| def analyze_positivity(text): |
| result = sentiment_analysis(text) |
| score = result[0]['score'] |
| return 50 + (score * 50) if result[0]['label'] == 'POSITIVE' else 50 - (score * 50) |
|
|
| def analyze_decisiveness(text): |
| candidate_labels = ['confident', 'uncertain'] |
| result = zero_shot_classifier(text, candidate_labels=candidate_labels) |
| score = max(result['scores']) |
| return score * 100 |
|
|
| def analyze_specificity(text): |
| words = [word for word in re.findall(r'\b\w+\b', text.lower()) if word not in stop_words and word.isalpha()] |
| unique_words = set(words) |
| technical_terms = {'algorithm', 'data', 'analysis', 'design', 'programming'} |
| technical_term_count = sum(1 for word in unique_words if word in technical_terms) |
| specificity_score = (len(unique_words) / len(words) * 50) + (technical_term_count * 50 / len(technical_terms)) |
| return min(100, specificity_score) |
|
|
|
|
| def analyze_self_awareness(text): |
| reflection_keywords = ['strengths', 'weaknesses', 'learned', 'improved', 'challenge', 'goal', 'feedback'] |
| reflection_count = sum(text.lower().count(keyword) for keyword in reflection_keywords) |
| total_words = len(re.findall(r'\b\w+\b', text.lower())) |
| reflection_density = (reflection_count / total_words) * 100 |
| positive_score = analyze_sentiment_for_self_awareness(text) |
| self_awareness_score = (reflection_density * 0.7) + (positive_score * 0.3) |
| return min(100, self_awareness_score) |
|
|
| def analyze_sentiment_for_self_awareness(text): |
| result = sentiment_analysis(text) |
| positive_score = result[0]['score'] if result[0]['label'] == 'POSITIVE' else 0 |
| return positive_score * 100 |
|
|
|
|
| def analyze_career_knowledge(text): |
| career_topics = ["data science", "software engineering", "product management"] |
| result = zero_shot_classifier(text, candidate_labels=career_topics) |
| highest_score = max(result['scores']) |
| return highest_score * 100 |
|
|
| def analyze_decision_anxiety(text): |
| anxiety_keywords = ['uncertain', 'doubt', 'unsure', 'anxious', 'overwhelmed'] |
| anxiety_keyword_count = sum(text.lower().count(keyword) for keyword in anxiety_keywords) |
| sentiment_result = sentiment_analysis(text) |
| negative_score = sentiment_result[0]['score'] if sentiment_result[0]['label'] == 'NEGATIVE' else 0 |
| anxiety_score = negative_score + (anxiety_keyword_count * 0.1) |
| return min(100, anxiety_score * 100) |
|
|
| def analyze_values_alignment(text): |
| |
| values = "innovation, teamwork, impact" |
| embedding_text = encode(text) |
| embedding_values = encode(values) |
| similarity_score = calculate_similarity(embedding_text, embedding_values) |
| return similarity_score * 100 |
|
|
| def analyze_interests_alignment(text): |
| |
| interests = "technology, programming, design" |
| embedding_text = encode(text) |
| embedding_interests = encode(interests) |
| similarity_score = calculate_similarity(embedding_text, embedding_interests) |
| return similarity_score * 100 |
|
|
| def analyze_qa_pair(question, answer): |
| combined_text = f"{question} {answer}" |
|
|
| |
| positivity_score = analyze_positivity(combined_text) |
| decisiveness_score = analyze_decisiveness(combined_text) |
| specificity_score = analyze_specificity(combined_text) |
| self_awareness_score = analyze_self_awareness(combined_text) |
| career_knowledge_score = analyze_career_knowledge(combined_text) |
| decision_anxiety_score = analyze_decision_anxiety(combined_text) |
| values_alignment_score = analyze_values_alignment(combined_text) |
| interests_alignment_score = analyze_interests_alignment(combined_text) |
|
|
| return { |
| "Positivity": positivity_score, |
| "Decisiveness": decisiveness_score, |
| "Specificity": specificity_score, |
| "Self-awareness": self_awareness_score, |
| "Career Knowledge": career_knowledge_score, |
| "Decision Anxiety": decision_anxiety_score, |
| "Values Alignment": values_alignment_score, |
| "Interests Alignment": interests_alignment_score, |
| } |
| |
|
|
| |
| def plot_average_scores(avg_scores): |
| """ |
| Plots the average scores across all Q&A pairs with parameters on the x-axis |
| and values on the y-axis ranging from 0-100. |
| """ |
| parameters = list(avg_scores.keys()) |
| scores = list(avg_scores.values()) |
|
|
| fig, ax = plt.subplots(figsize=(10, 8)) |
| ax.bar(parameters, scores, color='skyblue') |
|
|
| |
| ax.set_ylim(0, 100) |
|
|
| |
| plt.xticks(rotation=45, ha="right") |
|
|
| |
| ax.set_ylabel('Average Score') |
| ax.set_title('Average Scores Across All Q&A Pairs') |
|
|
| plt.tight_layout() |
| return fig |
|
|
|
|
|
|
|
|
| |
| def main(): |
| st.title("Q&A Analysis App") |
| uploaded_file = st.file_uploader("Choose an Excel file with Q&A", type=["xlsx"]) |
|
|
| if uploaded_file is not None: |
| df = pd.read_excel(uploaded_file) |
| avg_scores = process_and_analyze_qa(df) |
| |
| |
| st.subheader("Average Scores Across All Q&A Pairs:") |
| st.table(pd.DataFrame(avg_scores.items(), columns=['Parameter', 'Value'])) |
| |
| |
| fig = plot_average_scores(avg_scores) |
| st.pyplot(fig) |
|
|
| |
|
|
|
|
| def process_and_analyze_qa(df): |
| scores_dict = {key: [] for key in analyze_qa_pair("Q", "A").keys()} |
| for question in df.columns: |
| for answer in df[question].dropna(): |
| scores = analyze_qa_pair(question, answer) |
| for key, value in scores.items(): |
| scores_dict[key].append(value) |
| |
| avg_scores = {key: sum(values) / len(values) for key, values in scores_dict.items() if values} |
| |
| return avg_scores |
|
|
|
|
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|