Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,23 +36,6 @@ def load_models():
|
|
| 36 |
|
| 37 |
model, tfidf_vectorizer, word2vec_model = load_models()
|
| 38 |
|
| 39 |
-
def calculate_combined_similarity(paraphrasing_similarity, direct_text_comparison_similarity, summarization_similarity):
|
| 40 |
-
try:
|
| 41 |
-
# Adjusted weights
|
| 42 |
-
weighted_paraphrasing = paraphrasing_similarity * 0.6
|
| 43 |
-
weighted_direct_comparison = direct_text_comparison_similarity * 0.3
|
| 44 |
-
weighted_summarization = summarization_similarity * 0.1
|
| 45 |
-
|
| 46 |
-
# Combined similarity calculation with robustness to outliers
|
| 47 |
-
combined_similarity = (weighted_paraphrasing + weighted_direct_comparison + weighted_summarization)
|
| 48 |
-
|
| 49 |
-
# Ensure the score is between 0 and 100%
|
| 50 |
-
combined_similarity = max(0, min(100, combined_similarity))
|
| 51 |
-
return combined_similarity
|
| 52 |
-
except Exception as e:
|
| 53 |
-
st.error(f"Error calculating combined similarity: {e}")
|
| 54 |
-
return 0
|
| 55 |
-
|
| 56 |
# Initialize session state for results table if not already present
|
| 57 |
if 'results_df' not in st.session_state:
|
| 58 |
st.session_state.results_df = pd.DataFrame(columns=[
|
|
@@ -120,13 +103,8 @@ def calculate_word_similarity_ratio(text1, text2):
|
|
| 120 |
|
| 121 |
@st.cache_data
|
| 122 |
def calculate_bleu_score(reference, candidate):
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
return sentence_bleu([reference.split()], candidate.split()) * 100
|
| 126 |
-
except Exception as e:
|
| 127 |
-
st.error(f"Error calculating BLEU score: {e}")
|
| 128 |
-
return 0
|
| 129 |
-
|
| 130 |
|
| 131 |
@st.cache_data
|
| 132 |
def calculate_rouge_l_score(reference, candidate):
|
|
@@ -216,9 +194,8 @@ def calculate_direct_text_comparison_similarity(text1, text2):
|
|
| 216 |
@st.cache_data
|
| 217 |
def calculate_summarization_similarity(text1, text2):
|
| 218 |
try:
|
| 219 |
-
wmd = calculate_wmd(text1, text2)
|
| 220 |
-
|
| 221 |
-
return (1 - normalized_wmd) * 100 # Scale to percentage
|
| 222 |
except Exception as e:
|
| 223 |
st.error(f"Error calculating summarization similarity: {e}")
|
| 224 |
return 0
|
|
@@ -261,9 +238,15 @@ with col1:
|
|
| 261 |
paraphrasing_similarity = calculate_paraphrasing_similarity(text_input_1, text_input_2)
|
| 262 |
direct_text_comparison_similarity = calculate_direct_text_comparison_similarity(text_input_1, text_input_2)
|
| 263 |
summarization_similarity = calculate_summarization_similarity(text_input_1, text_input_2)
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
# Combine all metrics into a single similarity score
|
| 266 |
-
total_similarity =
|
|
|
|
|
|
|
| 267 |
|
| 268 |
# Update results table in session state
|
| 269 |
new_row = pd.Series({
|
|
@@ -304,59 +287,92 @@ with col1:
|
|
| 304 |
}
|
| 305 |
</style>
|
| 306 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
# Apply the style
|
| 309 |
st.markdown(combined_score_style, unsafe_allow_html=True)
|
| 310 |
-
|
|
|
|
| 311 |
# Display the combined similarity score
|
| 312 |
st.markdown(f'<div class="combined-score">Combined Similarity Score: {total_similarity:.2f}%</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
-
def plot_radar_chart(data, labels):
|
| 315 |
-
num_vars = len(labels)
|
| 316 |
-
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
|
| 317 |
-
angles += angles[:1]
|
| 318 |
-
|
| 319 |
-
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
|
| 320 |
-
for idx, entry in enumerate(data):
|
| 321 |
-
values = [
|
| 322 |
-
entry.get(label, 0) for label in labels
|
| 323 |
-
]
|
| 324 |
-
values += values[:1]
|
| 325 |
-
ax.fill(angles, values, color=sns.color_palette("husl", len(data))[idx], alpha=0.25, label=entry["name"])
|
| 326 |
-
ax.plot(angles, values, color=sns.color_palette("husl", len(data))[idx], linewidth=2, linestyle='solid')
|
| 327 |
-
|
| 328 |
-
ax.set_yticklabels([])
|
| 329 |
-
ax.set_xticks(angles[:-1])
|
| 330 |
-
ax.set_xticklabels(labels)
|
| 331 |
-
plt.title("Radar Chart of Similarity Metrics")
|
| 332 |
-
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
|
| 333 |
-
return fig
|
| 334 |
-
|
| 335 |
with col2:
|
| 336 |
|
| 337 |
# Display radar chart
|
| 338 |
if st.session_state.radar_chart_data:
|
| 339 |
st.subheader("Metrics Comparison")
|
| 340 |
st.markdown("*Larger area = More similarity of responses.*")
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
st.pyplot(fig)
|
| 345 |
|
| 346 |
-
# Display metrics sliders beside the radar chart
|
| 347 |
-
st.subheader("Similarity Factors")
|
| 348 |
-
st.markdown("*100 being the best case*")
|
| 349 |
-
metrics = st.session_state.radar_chart_data[-1]
|
| 350 |
-
for metric_name in ["paraphrasing_similarity", "direct_text_comparison_similarity", "summarization_similarity"]:
|
| 351 |
-
st.slider(
|
| 352 |
-
slider_labels[metric_name],
|
| 353 |
-
0, 100,
|
| 354 |
-
int(metrics[metric_name]),
|
| 355 |
-
key=metric_name,
|
| 356 |
-
disabled=True, # Make the slider non-editable
|
| 357 |
-
format="%.0f" # Format the slider value to be an integer
|
| 358 |
-
)
|
| 359 |
-
|
| 360 |
# Display metrics sliders beside the radar chart
|
| 361 |
if st.session_state.radar_chart_data:
|
| 362 |
st.subheader("Similarity Factors")
|
|
@@ -434,4 +450,4 @@ if st.sidebar.button("About"):
|
|
| 434 |
- Scikit-learn
|
| 435 |
- NLTK
|
| 436 |
- Gensim
|
| 437 |
-
""")
|
|
|
|
| 36 |
|
| 37 |
model, tfidf_vectorizer, word2vec_model = load_models()
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# Initialize session state for results table if not already present
|
| 40 |
if 'results_df' not in st.session_state:
|
| 41 |
st.session_state.results_df = pd.DataFrame(columns=[
|
|
|
|
| 103 |
|
| 104 |
@st.cache_data
|
| 105 |
def calculate_bleu_score(reference, candidate):
|
| 106 |
+
from nltk.translate.bleu_score import sentence_bleu
|
| 107 |
+
return sentence_bleu([reference.split()], candidate.split())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
@st.cache_data
|
| 110 |
def calculate_rouge_l_score(reference, candidate):
|
|
|
|
| 194 |
@st.cache_data
|
| 195 |
def calculate_summarization_similarity(text1, text2):
|
| 196 |
try:
|
| 197 |
+
wmd = calculate_wmd(text1, text2)
|
| 198 |
+
return (1 - wmd) * 100
|
|
|
|
| 199 |
except Exception as e:
|
| 200 |
st.error(f"Error calculating summarization similarity: {e}")
|
| 201 |
return 0
|
|
|
|
| 238 |
paraphrasing_similarity = calculate_paraphrasing_similarity(text_input_1, text_input_2)
|
| 239 |
direct_text_comparison_similarity = calculate_direct_text_comparison_similarity(text_input_1, text_input_2)
|
| 240 |
summarization_similarity = calculate_summarization_similarity(text_input_1, text_input_2)
|
| 241 |
+
if summarization_similarity<0:
|
| 242 |
+
summarization_similarity=0
|
| 243 |
+
if direct_text_comparison_similarity<0:
|
| 244 |
+
direct_text_comparison_similarity=0
|
| 245 |
+
|
| 246 |
# Combine all metrics into a single similarity score
|
| 247 |
+
total_similarity = (paraphrasing_similarity * 0.6 + # High weight
|
| 248 |
+
direct_text_comparison_similarity * 0.3 + # Moderate weight
|
| 249 |
+
summarization_similarity * 0.1) # Low weight
|
| 250 |
|
| 251 |
# Update results table in session state
|
| 252 |
new_row = pd.Series({
|
|
|
|
| 287 |
}
|
| 288 |
</style>
|
| 289 |
"""
|
| 290 |
+
good_case = """
|
| 291 |
+
<style>
|
| 292 |
+
.good {
|
| 293 |
+
font-size: 48px;
|
| 294 |
+
font-weight: bold;
|
| 295 |
+
color: #4CAF50; /* Green color for positive emphasis */
|
| 296 |
+
background-color: #f0f0f5;
|
| 297 |
+
padding: 20px;
|
| 298 |
+
border-radius: 15px;
|
| 299 |
+
text-align: center;
|
| 300 |
+
margin-top: 30px;
|
| 301 |
+
box-shadow: 2px 2px 12px rgba(0, 0, 0, 0.1);
|
| 302 |
+
}
|
| 303 |
+
</style>
|
| 304 |
+
"""
|
| 305 |
+
bad_case = """
|
| 306 |
+
<style>
|
| 307 |
+
.bad {
|
| 308 |
+
font-size: 48px;
|
| 309 |
+
font-weight: bold;
|
| 310 |
+
color: #FF0000; /* Red color for negative emphasis */
|
| 311 |
+
background-color: #f0f0f5;
|
| 312 |
+
padding: 20px;
|
| 313 |
+
border-radius: 15px;
|
| 314 |
+
text-align: center;
|
| 315 |
+
margin-top: 30px;
|
| 316 |
+
box-shadow: 2px 2px 12px rgba(0, 0, 0, 0.1);
|
| 317 |
+
}
|
| 318 |
+
</style>
|
| 319 |
+
"""
|
| 320 |
|
| 321 |
# Apply the style
|
| 322 |
st.markdown(combined_score_style, unsafe_allow_html=True)
|
| 323 |
+
st.markdown(good_case, unsafe_allow_html=True)
|
| 324 |
+
st.markdown(bad_case, unsafe_allow_html=True)
|
| 325 |
# Display the combined similarity score
|
| 326 |
st.markdown(f'<div class="combined-score">Combined Similarity Score: {total_similarity:.2f}%</div>', unsafe_allow_html=True)
|
| 327 |
+
# Calculate context-words difference
|
| 328 |
+
context_words_diff = int(paraphrasing_similarity) - int(direct_text_comparison_similarity)
|
| 329 |
+
|
| 330 |
+
# Display distinguishing factor
|
| 331 |
+
if total_similarity >= 100:
|
| 332 |
+
st.markdown(f'<div class="bad">Similar Responses</div>', unsafe_allow_html=True)
|
| 333 |
+
elif total_similarity >= 55:
|
| 334 |
+
if context_words_diff >= 42 and context_words_diff < 57.08:
|
| 335 |
+
st.markdown(f'<div class="bad">Similar Responses</div>', unsafe_allow_html=True)
|
| 336 |
+
elif context_words_diff > 35:
|
| 337 |
+
st.markdown(f'<div class="good">Response 2 is better.</div>', unsafe_allow_html=True)
|
| 338 |
+
else:
|
| 339 |
+
st.markdown(f'<div class="bad">Similar Responses</div>', unsafe_allow_html=True)
|
| 340 |
+
else:
|
| 341 |
+
st.markdown(f'<div class="bad">Similar Responses</div>', unsafe_allow_html=True)
|
| 342 |
+
|
| 343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
with col2:
|
| 345 |
|
| 346 |
# Display radar chart
|
| 347 |
if st.session_state.radar_chart_data:
|
| 348 |
st.subheader("Metrics Comparison")
|
| 349 |
st.markdown("*Larger area = More similarity of responses.*")
|
| 350 |
+
labels = ["Context similarity", "Words Similarity", "Summarization Similarity"]
|
| 351 |
+
num_vars = len(labels)
|
| 352 |
+
angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
|
| 353 |
+
angles += angles[:1]
|
| 354 |
+
|
| 355 |
+
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
|
| 356 |
+
|
| 357 |
+
# Plot each response with a different color
|
| 358 |
+
color_palette = sns.color_palette("husl", len(st.session_state.radar_chart_data))
|
| 359 |
+
for idx, data in enumerate(st.session_state.radar_chart_data):
|
| 360 |
+
values = [
|
| 361 |
+
data["paraphrasing_similarity"],
|
| 362 |
+
data["direct_text_comparison_similarity"],
|
| 363 |
+
data["summarization_similarity"]
|
| 364 |
+
]
|
| 365 |
+
values += values[:1]
|
| 366 |
+
ax.fill(angles, values, color=color_palette[idx], alpha=0.25, label=data["name"])
|
| 367 |
+
ax.plot(angles, values, color=color_palette[idx], linewidth=2, linestyle='solid')
|
| 368 |
+
|
| 369 |
+
ax.set_yticklabels([])
|
| 370 |
+
ax.set_xticks(angles[:-1])
|
| 371 |
+
ax.set_xticklabels(labels)
|
| 372 |
+
plt.title("Radar Chart of Similarity Metrics")
|
| 373 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
|
| 374 |
st.pyplot(fig)
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
# Display metrics sliders beside the radar chart
|
| 377 |
if st.session_state.radar_chart_data:
|
| 378 |
st.subheader("Similarity Factors")
|
|
|
|
| 450 |
- Scikit-learn
|
| 451 |
- NLTK
|
| 452 |
- Gensim
|
| 453 |
+
""")
|