uumerrr684's picture
Update app.py
06d8f74 verified
import streamlit as st
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import requests
import json
import os
# Page config
st.set_page_config(
page_title="Semantic Similarity Explainer",
page_icon="πŸ”",
layout="wide"
)
# Title and description
st.title("πŸ” Semantic Similarity Explainer with AI")
st.markdown("""
This app calculates the **semantic similarity** between two sentences using transformer-based embeddings (all-MiniLM-L6-v2) and uses AI to explain why that specific score makes sense.
""")
# Initialize session state
if 'history' not in st.session_state:
st.session_state.history = []
# Cache the model loading
@st.cache_resource
def load_model():
return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Load the model
with st.spinner("Loading transformer model..."):
model = load_model()
# Get API key from environment variables (Hugging Face Spaces secrets)
api_key = os.getenv("OPENROUTER_API_KEY")
# Sidebar for configuration
with st.sidebar:
st.header("βš™οΈ Configuration")
if api_key:
st.success("🟒 API Connected")
else:
st.error("❌ API Key not found in environment")
api_key = st.text_input("OpenRouter API Key", type="password", help="Get your API key from https://openrouter.ai/keys")
st.info("πŸ’‘ Tip: Set OPENROUTER_API_KEY in Hugging Face Spaces secrets to avoid typing it every time")
st.markdown("---")
st.markdown("""
### How it works:
1. Enter two sentences
2. Generate embeddings using transformer
3. Calculate cosine similarity
4. AI explains the similarity score
5. View the full prompt sent to AI
""")
st.info("""
**Model:** all-MiniLM-L6-v2
This transformer model creates 384-dimensional embeddings that capture semantic meaning, not just word overlap.
""")
# Main content
col1, col2 = st.columns(2)
with col1:
sentence1 = st.text_input("Enter first sentence:", placeholder="e.g., you are hot")
with col2:
sentence2 = st.text_input("Enter second sentence:", placeholder="e.g., you are cold")
# Calculate button
if st.button("🎯 Calculate & Explain", type="primary"):
if not sentence1 or not sentence2:
st.error("Please enter both sentences!")
elif not api_key:
st.error("Please enter your OpenRouter API key in the sidebar!")
else:
try:
# Normalize to lowercase for consistency
sentence1_normalized = sentence1.lower().strip()
sentence2_normalized = sentence2.lower().strip()
# Generate embeddings
with st.spinner("Generating semantic embeddings..."):
embeddings = model.encode([sentence1_normalized, sentence2_normalized])
embedding1 = embeddings[0].reshape(1, -1)
embedding2 = embeddings[1].reshape(1, -1)
# Calculate cosine similarity
similarity = cosine_similarity(embedding1, embedding2)[0][0]
# Convert to Python float to fix the progress bar error
similarity_float = float(similarity)
similarity_rounded = round(similarity_float, 2)
# Display similarity score
st.success(f"**Semantic similarity between:**")
st.info(f'"{sentence1}" and "{sentence2}" β†’ **{similarity_rounded:.2f}**')
# Show similarity meter (fixed the float32 error)
if similarity_rounded < 0.3:
similarity_desc = "Low similarity"
elif similarity_rounded < 0.7:
similarity_desc = "Moderate similarity"
else:
similarity_desc = "High similarity"
# Convert to regular Python float for progress bar
st.progress(float(similarity_rounded), text=similarity_desc)
# Create a comprehensive prompt for the AI to explain WHY this specific score occurred
detailed_prompt = f"""You are an expert in Natural Language Processing and semantic similarity analysis using transformer-based embeddings.
I have calculated the semantic similarity between two sentences using the 'all-MiniLM-L6-v2' transformer model, which creates 384-dimensional vector embeddings that capture deep semantic meaning.
**ANALYSIS REQUEST:**
Sentence 1: "{sentence1}"
Sentence 2: "{sentence2}"
Cosine Similarity Score: {similarity_rounded:.2f}
Please provide a detailed explanation of WHY these two specific sentences resulted in a similarity score of {similarity_rounded:.2f}.
**Your analysis should cover:**
1. **Score Interpretation**: What does {similarity_rounded:.2f} mean on the 0.00-1.00 scale? Is this low, moderate, or high similarity?
2. **Semantic Analysis**:
- What are the key semantic elements in each sentence?
- What similarities did the transformer model detect?
- What differences contributed to the score not being higher/lower?
3. **Linguistic Features**:
- Sentence structure patterns
- Word relationships (synonyms, antonyms, related concepts)
- Grammatical similarities
- Contextual meaning
4. **Transformer Model Behavior**:
- How does all-MiniLM-L6-v2 process these sentences?
- What semantic features likely contributed most to this score?
- Why this score makes sense from a deep learning perspective
5. **Intuitive Validation**: Does this {similarity_rounded:.2f} score match what a human would expect when comparing these sentences?
Please be specific about these exact sentences and this exact score of {similarity_rounded:.2f}. Explain the reasoning behind this particular numerical result."""
# Call OpenRouter API with the detailed prompt
with st.spinner("πŸ€– AI is analyzing why you got this specific similarity score..."):
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://github.com/semantic-similarity-app",
"X-Title": "Semantic Similarity Explainer"
}
data = {
"model": "openai/gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are an NLP expert who explains similarity scores in simple, short terms that anyone can understand."
},
{
"role": "user",
"content": detailed_prompt
}
],
"temperature": 0.10, # Slightly higher for more natural explanations
"max_tokens": 400 # Much shorter responses
}
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers,
json=data
)
if response.status_code == 200:
result = response.json()
explanation = result['choices'][0]['message']['content']
# Display results in tabs
tab1, tab2, tab3 = st.tabs(["πŸ€– AI Explanation", "πŸ“ Prompt Sent to AI", "πŸ”§ Technical Details"])
with tab1:
st.markdown("### 🧠 Why You Got This Similarity Score")
st.markdown("**AI Analysis:**")
# Create a nice container for the AI explanation
with st.container():
st.markdown(f"""
<div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 4px solid #1f77b4;">
{explanation}
</div>
""", unsafe_allow_html=True)
with tab2:
st.markdown("### πŸ“€ Exact Prompt Sent to GPT-3.5-Turbo")
st.markdown("This is exactly what was sent to the AI to generate the explanation:")
st.code(detailed_prompt, language="text")
st.markdown("**API Details:**")
st.json({
"model": "openai/gpt-3.5-turbo",
"temperature": 0.3,
"max_tokens": 800,
"system_message": "You are an expert NLP researcher..."
})
with tab3:
st.markdown("### πŸ”§ Technical Details")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Sentence 1 Analysis:**")
st.text(f"Original: {sentence1}")
st.text(f"Normalized: {sentence1_normalized}")
st.text(f"Embedding shape: {embedding1.shape}")
st.text(f"Embedding L2 norm: {np.linalg.norm(embedding1):.4f}")
st.markdown("**First 10 embedding dimensions:**")
embedding_preview = embedding1[0][:10]
for i, val in enumerate(embedding_preview):
st.text(f"Dim {i}: {val:.4f}")
with col2:
st.markdown("**Sentence 2 Analysis:**")
st.text(f"Original: {sentence2}")
st.text(f"Normalized: {sentence2_normalized}")
st.text(f"Embedding shape: {embedding2.shape}")
st.text(f"Embedding L2 norm: {np.linalg.norm(embedding2):.4f}")
st.markdown("**First 10 embedding dimensions:**")
embedding_preview = embedding2[0][:10]
for i, val in enumerate(embedding_preview):
st.text(f"Dim {i}: {val:.4f}")
st.markdown("---")
st.markdown("**Similarity Computation Details:**")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Embedding Dimensions", "384")
st.metric("Exact Similarity", f"{similarity_float:.6f}")
with col2:
st.metric("Rounded Similarity", f"{similarity_rounded:.2f}")
dot_product = np.dot(embedding1[0], embedding2[0])
st.metric("Dot Product", f"{dot_product:.4f}")
with col3:
# Calculate angle between vectors
angle = np.arccos(np.clip(similarity_float, -1.0, 1.0))
angle_degrees = np.degrees(angle)
st.metric("Vector Angle (degrees)", f"{angle_degrees:.2f}Β°")
st.metric("Model Used", "all-MiniLM-L6-v2")
# Save to history
st.session_state.history.append({
"sentence1": sentence1,
"sentence2": sentence2,
"similarity": similarity_rounded,
"explanation": explanation
})
st.success("βœ… Analysis complete! Check the tabs above for detailed explanations.")
else:
st.error(f"❌ API Error: {response.status_code}")
st.error(f"Response: {response.text}")
except Exception as e:
st.error(f"❌ An error occurred: {str(e)}")
st.error("Please check your API key and internet connection.")
# Display history
if st.session_state.history:
st.markdown("---")
st.markdown("### πŸ“œ Previous Calculations")
for i, item in enumerate(reversed(st.session_state.history[-5:])): # Show last 5
with st.expander(f"'{item['sentence1']}' vs '{item['sentence2']}' β†’ Score: {item['similarity']:.2f}"):
st.markdown(item['explanation'])
# Info box about semantic similarity
with st.expander("ℹ️ Understanding Semantic Similarity Scores"):
st.markdown("""
### How to Interpret Cosine Similarity Scores
**What the numbers mean:**
- **0.90 - 1.00**: Nearly identical meaning (e.g., "The car is fast" vs "The automobile is quick")
- **0.70 - 0.89**: High semantic similarity (e.g., "I love dogs" vs "I adore puppies")
- **0.50 - 0.69**: Moderate similarity (e.g., "You are hot" vs "You are cold" - same structure, opposite meaning)
- **0.30 - 0.49**: Low similarity (e.g., "I like pizza" vs "Mathematics is difficult")
- **0.00 - 0.29**: Very low similarity (e.g., "Hello world" vs "Quantum physics equations")
**Why transformer embeddings are powerful:**
- They understand **context** and **meaning**, not just word overlap
- They capture **relationships** between words (synonyms, antonyms, related concepts)
- They consider **sentence structure** and **grammatical patterns**
- They detect **semantic similarity** even with different words
""")
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center'>
<p>πŸš€ Made with ❀️ using Streamlit | Powered by Sentence Transformers & OpenRouter API</p>
<p><small>Each calculation automatically sends your sentences and similarity score to GPT-3.5-turbo for detailed analysis</small></p>
</div>
""", unsafe_allow_html=True)