import streamlit as st import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import requests import json import os # Page config st.set_page_config( page_title="Semantic Similarity Explainer", page_icon="đ", layout="wide" ) # Title and description st.title("đ Semantic Similarity Explainer with AI") st.markdown(""" This app calculates the **semantic similarity** between two sentences using transformer-based embeddings (all-MiniLM-L6-v2) and uses AI to explain why that specific score makes sense. """) # Initialize session state if 'history' not in st.session_state: st.session_state.history = [] # Cache the model loading @st.cache_resource def load_model(): return SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Load the model with st.spinner("Loading transformer model..."): model = load_model() # Get API key from environment variables (Hugging Face Spaces secrets) api_key = os.getenv("OPENROUTER_API_KEY") # Sidebar for configuration with st.sidebar: st.header("âī¸ Configuration") if api_key: st.success("đĸ API Connected") else: st.error("â API Key not found in environment") api_key = st.text_input("OpenRouter API Key", type="password", help="Get your API key from https://openrouter.ai/keys") st.info("đĄ Tip: Set OPENROUTER_API_KEY in Hugging Face Spaces secrets to avoid typing it every time") st.markdown("---") st.markdown(""" ### How it works: 1. Enter two sentences 2. Generate embeddings using transformer 3. Calculate cosine similarity 4. AI explains the similarity score 5. View the full prompt sent to AI """) st.info(""" **Model:** all-MiniLM-L6-v2 This transformer model creates 384-dimensional embeddings that capture semantic meaning, not just word overlap. """) # Main content col1, col2 = st.columns(2) with col1: sentence1 = st.text_input("Enter first sentence:", placeholder="e.g., you are hot") with col2: sentence2 = st.text_input("Enter second sentence:", placeholder="e.g., you are cold") # Calculate button if st.button("đ¯ Calculate & Explain", type="primary"): if not sentence1 or not sentence2: st.error("Please enter both sentences!") elif not api_key: st.error("Please enter your OpenRouter API key in the sidebar!") else: try: # Normalize to lowercase for consistency sentence1_normalized = sentence1.lower().strip() sentence2_normalized = sentence2.lower().strip() # Generate embeddings with st.spinner("Generating semantic embeddings..."): embeddings = model.encode([sentence1_normalized, sentence2_normalized]) embedding1 = embeddings[0].reshape(1, -1) embedding2 = embeddings[1].reshape(1, -1) # Calculate cosine similarity similarity = cosine_similarity(embedding1, embedding2)[0][0] # Convert to Python float to fix the progress bar error similarity_float = float(similarity) similarity_rounded = round(similarity_float, 2) # Display similarity score st.success(f"**Semantic similarity between:**") st.info(f'"{sentence1}" and "{sentence2}" â **{similarity_rounded:.2f}**') # Show similarity meter (fixed the float32 error) if similarity_rounded < 0.3: similarity_desc = "Low similarity" elif similarity_rounded < 0.7: similarity_desc = "Moderate similarity" else: similarity_desc = "High similarity" # Convert to regular Python float for progress bar st.progress(float(similarity_rounded), text=similarity_desc) # Create a comprehensive prompt for the AI to explain WHY this specific score occurred detailed_prompt = f"""You are an expert in Natural Language Processing and semantic similarity analysis using transformer-based embeddings. I have calculated the semantic similarity between two sentences using the 'all-MiniLM-L6-v2' transformer model, which creates 384-dimensional vector embeddings that capture deep semantic meaning. **ANALYSIS REQUEST:** Sentence 1: "{sentence1}" Sentence 2: "{sentence2}" Cosine Similarity Score: {similarity_rounded:.2f} Please provide a detailed explanation of WHY these two specific sentences resulted in a similarity score of {similarity_rounded:.2f}. **Your analysis should cover:** 1. **Score Interpretation**: What does {similarity_rounded:.2f} mean on the 0.00-1.00 scale? Is this low, moderate, or high similarity? 2. **Semantic Analysis**: - What are the key semantic elements in each sentence? - What similarities did the transformer model detect? - What differences contributed to the score not being higher/lower? 3. **Linguistic Features**: - Sentence structure patterns - Word relationships (synonyms, antonyms, related concepts) - Grammatical similarities - Contextual meaning 4. **Transformer Model Behavior**: - How does all-MiniLM-L6-v2 process these sentences? - What semantic features likely contributed most to this score? - Why this score makes sense from a deep learning perspective 5. **Intuitive Validation**: Does this {similarity_rounded:.2f} score match what a human would expect when comparing these sentences? Please be specific about these exact sentences and this exact score of {similarity_rounded:.2f}. Explain the reasoning behind this particular numerical result.""" # Call OpenRouter API with the detailed prompt with st.spinner("đ¤ AI is analyzing why you got this specific similarity score..."): headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "HTTP-Referer": "https://github.com/semantic-similarity-app", "X-Title": "Semantic Similarity Explainer" } data = { "model": "openai/gpt-3.5-turbo", "messages": [ { "role": "system", "content": "You are an NLP expert who explains similarity scores in simple, short terms that anyone can understand." }, { "role": "user", "content": detailed_prompt } ], "temperature": 0.10, # Slightly higher for more natural explanations "max_tokens": 400 # Much shorter responses } response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json=data ) if response.status_code == 200: result = response.json() explanation = result['choices'][0]['message']['content'] # Display results in tabs tab1, tab2, tab3 = st.tabs(["đ¤ AI Explanation", "đ Prompt Sent to AI", "đ§ Technical Details"]) with tab1: st.markdown("### đ§ Why You Got This Similarity Score") st.markdown("**AI Analysis:**") # Create a nice container for the AI explanation with st.container(): st.markdown(f"""
đ Made with â¤ī¸ using Streamlit | Powered by Sentence Transformers & OpenRouter API
Each calculation automatically sends your sentences and similarity score to GPT-3.5-turbo for detailed analysis