Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import warnings | |
| from sentence_transformers import SentenceTransformer | |
| from scipy.spatial.distance import cosine | |
| from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer | |
| from sklearn.metrics import pairwise_distances | |
| from nltk.translate.bleu_score import sentence_bleu | |
| from rouge_score import rouge_scorer | |
| import numpy as np | |
| import PyPDF2 | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| from difflib import SequenceMatcher | |
| import streamlit_shadcn_ui as ui | |
| import bert_score | |
| import gensim.downloader as api | |
| # Suppress specific FutureWarning from transformers | |
| warnings.filterwarnings("ignore", category=FutureWarning, message=".*clean_up_tokenization_spaces.*") | |
| # Initialize models | |
| model = SentenceTransformer('all-mpnet-base-v2') | |
| tfidf_vectorizer = TfidfVectorizer() | |
| # Initialize session state for results table if not already present | |
| if 'results_df' not in st.session_state: | |
| st.session_state.results_df = pd.DataFrame(columns=[ | |
| "LLM1", "LLM2", | |
| "Context Similarity (%)", | |
| "Levenshtein Similarity (%)", | |
| "Jaccard Similarity (%)", | |
| "BLEU Score", | |
| "ROUGE-L (%)", | |
| "BERTScore (%)", | |
| "WMD" | |
| ]) | |
| # Function to chunk text into smaller parts | |
| def chunk_text(text, chunk_size=500): | |
| return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| # Function to create embeddings | |
| def create_embeddings(chunks): | |
| try: | |
| embeddings = model.encode(chunks, show_progress_bar=False) | |
| return embeddings | |
| except Exception as e: | |
| st.error(f"Error creating embeddings: {e}") | |
| return np.array([]) | |
| # Function to calculate similarity ratio and find matches | |
| def calculate_similarity_ratio_and_find_matches(embeddings1, embeddings2): | |
| try: | |
| similarities = np.dot(embeddings1, embeddings2.T) # Dot product | |
| max_similarities = np.max(similarities, axis=1) # Max similarity for each chunk in embeddings1 | |
| average_similarity = np.mean(max_similarities) | |
| return similarities, average_similarity | |
| except Exception as e: | |
| st.error(f"Error calculating similarity ratio: {e}") | |
| return np.array([]), 0 | |
| # Function to calculate word similarity ratio | |
| def calculate_word_similarity_ratio(text1, text2): | |
| try: | |
| words1 = text1.split() | |
| words2 = text2.split() | |
| if not words1 or not words2: | |
| return 0 | |
| word_embeddings1 = model.encode(words1) | |
| word_embeddings2 = model.encode(words2) | |
| similarities = np.array([ | |
| max([1 - cosine(emb1, emb2) for emb2 in word_embeddings2], default=0) | |
| for emb1 in word_embeddings1 | |
| ]) | |
| average_word_similarity = np.mean(similarities) if similarities.size > 0 else 0 | |
| return average_word_similarity | |
| except Exception as e: | |
| st.error(f"Error calculating word similarity ratio: {e}") | |
| return 0 | |
| # Function to calculate BLEU score | |
| def calculate_bleu_score(reference, candidate): | |
| return sentence_bleu([reference.split()], candidate.split()) | |
| # Function to calculate ROUGE-L score | |
| def calculate_rouge_l_score(reference, candidate): | |
| scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) | |
| scores = scorer.score(reference, candidate) | |
| return scores['rougeL'].fmeasure * 100 | |
| # Function to calculate BERTScore | |
| def calculate_bertscore(reference, candidate): | |
| P, R, F1 = bert_score.score([candidate], [reference], model_type='bert-base-uncased') | |
| return F1.mean().item() * 100 | |
| # Function to calculate WMD | |
| def calculate_wmd(reference, candidate): | |
| model = api.load("word2vec-google-news-300") | |
| return model.wmdistance(reference.split(), candidate.split()) | |
| # Function to extract text from PDF | |
| def extract_pdf_text(pdf_file): | |
| try: | |
| reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| except Exception as e: | |
| st.error(f"Error extracting text from PDF: {e}") | |
| return "" | |
| # Function to calculate Levenshtein distance | |
| def calculate_levenshtein_ratio(text1, text2): | |
| return SequenceMatcher(None, text1, text2).ratio() | |
| # Function to calculate Jaccard similarity | |
| def calculate_jaccard_similarity(text1, text2): | |
| vectorizer = CountVectorizer(binary=True).fit_transform([text1, text2]) | |
| vectors = vectorizer.toarray() | |
| # Compute the intersection and union for Jaccard Similarity | |
| intersection = np.sum(np.minimum(vectors[0], vectors[1])) | |
| union = np.sum(np.maximum(vectors[0], vectors[1])) | |
| return intersection / union if union != 0 else 0 | |
| # Function to calculate TF-IDF cosine similarity | |
| def calculate_tfidf_cosine_similarity(text1, text2): | |
| tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2]) | |
| return 1 - pairwise_distances(tfidf_matrix, metric='cosine')[0, 1] | |
| # Streamlit UI | |
| st.sidebar.title("LLM Details") | |
| llm1_name = st.sidebar.text_input("What is LLM1?", "LLM1") | |
| llm2_name = st.sidebar.text_input("What is LLM2?", "LLM2") | |
| st.title("Text-Based Similarity Comparison") | |
| # Create two columns for text input | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.write(f"**{llm1_name} response**") | |
| upload_pdf_1 = st.file_uploader(f"Upload PDF for {llm1_name} response", type="pdf", key="pdf1") | |
| if upload_pdf_1: | |
| text_input_1 = extract_pdf_text(upload_pdf_1) | |
| else: | |
| text_input_1 = st.text_area(f" Text for {llm1_name}", height=150, key="text1") | |
| with col2: | |
| st.write(f"**{llm2_name} response**") | |
| upload_pdf_2 = st.file_uploader(f"Upload PDF for {llm2_name} response", type="pdf", key="pdf2") | |
| if upload_pdf_2: | |
| text_input_2 = extract_pdf_text(upload_pdf_2) | |
| else: | |
| text_input_2 = st.text_area(f" Text for {llm2_name}", height=150, key="text2") | |
| if (text_input_1 and text_input_2) or (upload_pdf_1 and upload_pdf_2): | |
| if st.button("Submit"): | |
| # Process texts | |
| chunks_1 = chunk_text(text_input_1) | |
| chunks_2 = chunk_text(text_input_2) | |
| embeddings_1 = create_embeddings(chunks_1) | |
| embeddings_2 = create_embeddings(chunks_2) | |
| # Calculate and display similarity ratio | |
| if embeddings_1.size > 0 and embeddings_2.size > 0: | |
| similarities, similarity_ratio = calculate_similarity_ratio_and_find_matches(embeddings_1, embeddings_2) | |
| # Calculate word similarity ratios for chunks | |
| word_similarities = [] | |
| min_chunks = min(len(chunks_1), len(chunks_2)) | |
| for i in range(min_chunks): | |
| word_similarity_ratio = calculate_word_similarity_ratio(chunks_1[i], chunks_2[i]) | |
| word_similarities.append(word_similarity_ratio * 100) | |
| # Calculate additional metrics | |
| levenshtein_ratio = calculate_levenshtein_ratio(text_input_1, text_input_2) * 100 | |
| jaccard_similarity = calculate_jaccard_similarity(text_input_1, text_input_2) * 100 | |
| tfidf_cosine_similarity = calculate_tfidf_cosine_similarity(text_input_1, text_input_2) * 100 | |
| bleu_score = calculate_bleu_score(text_input_1, text_input_2) * 100 | |
| rouge_l_score = calculate_rouge_l_score(text_input_1, text_input_2) | |
| bertscore = calculate_bertscore(text_input_1, text_input_2) | |
| wmd = calculate_wmd(text_input_1, text_input_2) | |
| # Update results table in session state | |
| new_row = pd.Series({ | |
| "LLM1": llm1_name, | |
| "LLM2": llm2_name, | |
| "Context Similarity (%)": similarity_ratio * 100, | |
| "Levenshtein Similarity (%)": levenshtein_ratio, | |
| "Jaccard Similarity (%)": jaccard_similarity, | |
| "BLEU Score": bleu_score, | |
| "ROUGE-L (%)": rouge_l_score, | |
| "BERTScore (%)": bertscore, | |
| "WMD": wmd | |
| }) | |
| st.session_state.results_df = pd.concat([st.session_state.results_df, new_row.to_frame().T], ignore_index=True) | |
| # Display metrics | |
| st.subheader("Results") | |
| st.write(f"**Context Similarity:** {similarity_ratio * 100:.2f}%") | |
| st.write(f"**Levenshtein Similarity:** {levenshtein_ratio:.2f}%") | |
| st.write(f"**Jaccard Similarity:** {jaccard_similarity:.2f}%") | |
| st.write(f"**TF-IDF Cosine Similarity:** {tfidf_cosine_similarity:.2f}%") | |
| st.write(f"**BLEU Score:** {bleu_score:.2f}") | |
| st.write(f"**ROUGE-L Score:** {rouge_l_score:.2f}%") | |
| st.write(f"**BERTScore:** {bertscore:.2f}%") | |
| st.write(f"**Word Mover's Distance (WMD):** {wmd:.4f}") | |
| # Visualize the data | |
| st.subheader("Metrics Comparison") | |
| sns.set(style="whitegrid") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.barplot(data=st.session_state.results_df.drop(columns=["LLM1", "LLM2", "WMD"]), palette="viridis") | |
| plt.xticks(rotation=45) | |
| plt.title("Text Similarity Metrics") | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| st.subheader("Similarity Over Chunks") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.plot(word_similarities, marker='o', linestyle='-', color='b', label='Word Similarity') | |
| ax.axhline(similarity_ratio * 100, color='r', linestyle='--', label='Context Similarity') | |
| plt.xlabel("Chunk Index") | |
| plt.ylabel("Similarity (%)") | |
| plt.legend() | |
| plt.title("Similarity across Text Chunks") | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| # Display results dataframe | |
| st.subheader("Detailed Results Table") | |
| st.write(st.session_state.results_df) | |
| else: | |
| st.warning("Please enter both responses or upload PDF files.") | |