vIVANsy's picture
Update app.py
f161f54 verified
raw
history blame
9.83 kB
import streamlit as st
import warnings
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import pairwise_distances
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import numpy as np
import PyPDF2
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from difflib import SequenceMatcher
import streamlit_shadcn_ui as ui
import bert_score
import gensim.downloader as api
# Suppress specific FutureWarning from transformers
warnings.filterwarnings("ignore", category=FutureWarning, message=".*clean_up_tokenization_spaces.*")
# Initialize models
model = SentenceTransformer('all-mpnet-base-v2')
tfidf_vectorizer = TfidfVectorizer()
# Initialize session state for results table if not already present
if 'results_df' not in st.session_state:
st.session_state.results_df = pd.DataFrame(columns=[
"LLM1", "LLM2",
"Context Similarity (%)",
"Levenshtein Similarity (%)",
"Jaccard Similarity (%)",
"BLEU Score",
"ROUGE-L (%)",
"BERTScore (%)",
"WMD"
])
# Function to chunk text into smaller parts
def chunk_text(text, chunk_size=500):
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
# Function to create embeddings
def create_embeddings(chunks):
try:
embeddings = model.encode(chunks, show_progress_bar=False)
return embeddings
except Exception as e:
st.error(f"Error creating embeddings: {e}")
return np.array([])
# Function to calculate similarity ratio and find matches
def calculate_similarity_ratio_and_find_matches(embeddings1, embeddings2):
try:
similarities = np.dot(embeddings1, embeddings2.T) # Dot product
max_similarities = np.max(similarities, axis=1) # Max similarity for each chunk in embeddings1
average_similarity = np.mean(max_similarities)
return similarities, average_similarity
except Exception as e:
st.error(f"Error calculating similarity ratio: {e}")
return np.array([]), 0
# Function to calculate word similarity ratio
def calculate_word_similarity_ratio(text1, text2):
try:
words1 = text1.split()
words2 = text2.split()
if not words1 or not words2:
return 0
word_embeddings1 = model.encode(words1)
word_embeddings2 = model.encode(words2)
similarities = np.array([
max([1 - cosine(emb1, emb2) for emb2 in word_embeddings2], default=0)
for emb1 in word_embeddings1
])
average_word_similarity = np.mean(similarities) if similarities.size > 0 else 0
return average_word_similarity
except Exception as e:
st.error(f"Error calculating word similarity ratio: {e}")
return 0
# Function to calculate BLEU score
def calculate_bleu_score(reference, candidate):
return sentence_bleu([reference.split()], candidate.split())
# Function to calculate ROUGE-L score
def calculate_rouge_l_score(reference, candidate):
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
scores = scorer.score(reference, candidate)
return scores['rougeL'].fmeasure * 100
# Function to calculate BERTScore
def calculate_bertscore(reference, candidate):
P, R, F1 = bert_score.score([candidate], [reference], model_type='bert-base-uncased')
return F1.mean().item() * 100
# Function to calculate WMD
def calculate_wmd(reference, candidate):
model = api.load("word2vec-google-news-300")
return model.wmdistance(reference.split(), candidate.split())
# Function to extract text from PDF
def extract_pdf_text(pdf_file):
try:
reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return ""
# Function to calculate Levenshtein distance
def calculate_levenshtein_ratio(text1, text2):
return SequenceMatcher(None, text1, text2).ratio()
# Function to calculate Jaccard similarity
def calculate_jaccard_similarity(text1, text2):
vectorizer = CountVectorizer(binary=True).fit_transform([text1, text2])
vectors = vectorizer.toarray()
# Compute the intersection and union for Jaccard Similarity
intersection = np.sum(np.minimum(vectors[0], vectors[1]))
union = np.sum(np.maximum(vectors[0], vectors[1]))
return intersection / union if union != 0 else 0
# Function to calculate TF-IDF cosine similarity
def calculate_tfidf_cosine_similarity(text1, text2):
tfidf_matrix = tfidf_vectorizer.fit_transform([text1, text2])
return 1 - pairwise_distances(tfidf_matrix, metric='cosine')[0, 1]
# Streamlit UI
st.sidebar.title("LLM Details")
llm1_name = st.sidebar.text_input("What is LLM1?", "LLM1")
llm2_name = st.sidebar.text_input("What is LLM2?", "LLM2")
st.title("Text-Based Similarity Comparison")
# Create two columns for text input
col1, col2 = st.columns(2)
with col1:
st.write(f"**{llm1_name} response**")
upload_pdf_1 = st.file_uploader(f"Upload PDF for {llm1_name} response", type="pdf", key="pdf1")
if upload_pdf_1:
text_input_1 = extract_pdf_text(upload_pdf_1)
else:
text_input_1 = st.text_area(f" Text for {llm1_name}", height=150, key="text1")
with col2:
st.write(f"**{llm2_name} response**")
upload_pdf_2 = st.file_uploader(f"Upload PDF for {llm2_name} response", type="pdf", key="pdf2")
if upload_pdf_2:
text_input_2 = extract_pdf_text(upload_pdf_2)
else:
text_input_2 = st.text_area(f" Text for {llm2_name}", height=150, key="text2")
if (text_input_1 and text_input_2) or (upload_pdf_1 and upload_pdf_2):
if st.button("Submit"):
# Process texts
chunks_1 = chunk_text(text_input_1)
chunks_2 = chunk_text(text_input_2)
embeddings_1 = create_embeddings(chunks_1)
embeddings_2 = create_embeddings(chunks_2)
# Calculate and display similarity ratio
if embeddings_1.size > 0 and embeddings_2.size > 0:
similarities, similarity_ratio = calculate_similarity_ratio_and_find_matches(embeddings_1, embeddings_2)
# Calculate word similarity ratios for chunks
word_similarities = []
min_chunks = min(len(chunks_1), len(chunks_2))
for i in range(min_chunks):
word_similarity_ratio = calculate_word_similarity_ratio(chunks_1[i], chunks_2[i])
word_similarities.append(word_similarity_ratio * 100)
# Calculate additional metrics
levenshtein_ratio = calculate_levenshtein_ratio(text_input_1, text_input_2) * 100
jaccard_similarity = calculate_jaccard_similarity(text_input_1, text_input_2) * 100
tfidf_cosine_similarity = calculate_tfidf_cosine_similarity(text_input_1, text_input_2) * 100
bleu_score = calculate_bleu_score(text_input_1, text_input_2) * 100
rouge_l_score = calculate_rouge_l_score(text_input_1, text_input_2)
bertscore = calculate_bertscore(text_input_1, text_input_2)
wmd = calculate_wmd(text_input_1, text_input_2)
# Update results table in session state
new_row = pd.Series({
"LLM1": llm1_name,
"LLM2": llm2_name,
"Context Similarity (%)": similarity_ratio * 100,
"Levenshtein Similarity (%)": levenshtein_ratio,
"Jaccard Similarity (%)": jaccard_similarity,
"BLEU Score": bleu_score,
"ROUGE-L (%)": rouge_l_score,
"BERTScore (%)": bertscore,
"WMD": wmd
})
st.session_state.results_df = pd.concat([st.session_state.results_df, new_row.to_frame().T], ignore_index=True)
# Display metrics
st.subheader("Results")
st.write(f"**Context Similarity:** {similarity_ratio * 100:.2f}%")
st.write(f"**Levenshtein Similarity:** {levenshtein_ratio:.2f}%")
st.write(f"**Jaccard Similarity:** {jaccard_similarity:.2f}%")
st.write(f"**TF-IDF Cosine Similarity:** {tfidf_cosine_similarity:.2f}%")
st.write(f"**BLEU Score:** {bleu_score:.2f}")
st.write(f"**ROUGE-L Score:** {rouge_l_score:.2f}%")
st.write(f"**BERTScore:** {bertscore:.2f}%")
st.write(f"**Word Mover's Distance (WMD):** {wmd:.4f}")
# Visualize the data
st.subheader("Metrics Comparison")
sns.set(style="whitegrid")
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(data=st.session_state.results_df.drop(columns=["LLM1", "LLM2", "WMD"]), palette="viridis")
plt.xticks(rotation=45)
plt.title("Text Similarity Metrics")
plt.tight_layout()
st.pyplot(fig)
st.subheader("Similarity Over Chunks")
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(word_similarities, marker='o', linestyle='-', color='b', label='Word Similarity')
ax.axhline(similarity_ratio * 100, color='r', linestyle='--', label='Context Similarity')
plt.xlabel("Chunk Index")
plt.ylabel("Similarity (%)")
plt.legend()
plt.title("Similarity across Text Chunks")
plt.tight_layout()
st.pyplot(fig)
# Display results dataframe
st.subheader("Detailed Results Table")
st.write(st.session_state.results_df)
else:
st.warning("Please enter both responses or upload PDF files.")