uumerrr684 commited on
Commit
7b83a38
Β·
verified Β·
1 Parent(s): 90a486a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -68
app.py CHANGED
@@ -57,10 +57,10 @@ with st.sidebar:
57
  col1, col2 = st.columns(2)
58
 
59
  with col1:
60
- sentence1 = st.text_input("Enter first sentence:", placeholder="e.g., you are pretty")
61
 
62
  with col2:
63
- sentence2 = st.text_input("Enter second sentence:", placeholder="e.g., you are ugly")
64
 
65
  # Calculate button
66
  if st.button("🎯 Calculate & Explain", type="primary"):
@@ -83,65 +83,84 @@ if st.button("🎯 Calculate & Explain", type="primary"):
83
  # Calculate cosine similarity
84
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
85
 
86
- # Round to 2 decimal places
87
- similarity_rounded = round(similarity, 2)
 
88
 
89
  # Display similarity score
90
  st.success(f"**Semantic similarity between:**")
91
  st.info(f'"{sentence1}" and "{sentence2}" β†’ **{similarity_rounded:.2f}**')
92
 
93
- # Show similarity meter
94
- progress_color = "normal"
95
  if similarity_rounded < 0.3:
96
- progress_color = "normal"
97
  similarity_desc = "Low similarity"
98
  elif similarity_rounded < 0.7:
99
  similarity_desc = "Moderate similarity"
100
  else:
101
  similarity_desc = "High similarity"
102
 
103
- st.progress(similarity_rounded, text=similarity_desc)
 
104
 
105
- # Create the prompt for the AI
106
- prompt = f"""You are an expert in Natural Language Processing and semantic similarity using transformer-based embeddings.
107
 
108
- I have calculated the semantic similarity between two sentences using the 'all-MiniLM-L6-v2' transformer model, which generates 384-dimensional dense vector embeddings that capture semantic meaning.
109
 
110
- Original Sentence 1: "{sentence1}"
111
- Original Sentence 2: "{sentence2}"
 
 
112
 
113
- Normalized (lowercase) for embedding:
114
- Sentence 1: "{sentence1_normalized}"
115
- Sentence 2: "{sentence2_normalized}"
116
 
117
- Calculated Semantic Similarity Score: {similarity_rounded:.2f}
118
 
119
- Please explain:
120
- 1. What this similarity score means (0.00 = completely different meaning, 1.00 = identical meaning)
121
- 2. Why these two specific sentences resulted in a score of {similarity_rounded:.2f}
122
- 3. What semantic features (meaning, context, sentiment) contributed to this score
123
- 4. How transformer embeddings capture deeper meaning beyond just word overlap
124
- 5. Whether this score makes intuitive sense given the semantic relationship between the sentences
125
 
126
- Note: This uses semantic embeddings, not TF-IDF, so the score reflects actual meaning similarity, not just word overlap."""
 
 
 
127
 
128
- # Call OpenRouter API
129
- with st.spinner("Getting AI explanation..."):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  headers = {
131
  "Authorization": f"Bearer {api_key}",
132
  "Content-Type": "application/json",
133
- "HTTP-Referer": "https://github.com/yourusername/semantic-similarity-app",
134
  "X-Title": "Semantic Similarity Explainer"
135
  }
136
 
137
  data = {
138
  "model": "openai/gpt-3.5-turbo",
139
  "messages": [
140
- {"role": "system", "content": "You are an expert in NLP and transformer-based semantic similarity analysis. Provide clear, educational explanations about how embeddings capture meaning."},
141
- {"role": "user", "content": prompt}
 
 
 
 
 
 
142
  ],
143
- "temperature": 0.7,
144
- "max_tokens": 600
145
  }
146
 
147
  response = requests.post(
@@ -155,54 +174,70 @@ Note: This uses semantic embeddings, not TF-IDF, so the score reflects actual me
155
  explanation = result['choices'][0]['message']['content']
156
 
157
  # Display results in tabs
158
- tab1, tab2, tab3 = st.tabs(["πŸ“Š AI Explanation", "πŸ“ Full Prompt Sent", "πŸ”§ Technical Details"])
159
 
160
  with tab1:
161
- st.markdown("### AI Explanation")
162
- st.markdown(explanation)
 
 
 
 
 
 
 
 
163
 
164
  with tab2:
165
- st.markdown("### Full Prompt Sent to GPT-3.5-turbo")
166
- st.code(prompt, language="text")
 
 
 
 
 
 
 
 
 
167
 
168
  with tab3:
169
- st.markdown("### Technical Details")
170
 
171
  col1, col2 = st.columns(2)
172
 
173
  with col1:
174
- st.markdown("**Sentence 1 Details:**")
175
  st.text(f"Original: {sentence1}")
176
  st.text(f"Normalized: {sentence1_normalized}")
177
  st.text(f"Embedding shape: {embedding1.shape}")
178
- st.text(f"Embedding norm: {np.linalg.norm(embedding1):.4f}")
179
 
180
- # Show first 10 dimensions of embedding
181
  st.markdown("**First 10 embedding dimensions:**")
182
  embedding_preview = embedding1[0][:10]
183
  for i, val in enumerate(embedding_preview):
184
  st.text(f"Dim {i}: {val:.4f}")
185
 
186
  with col2:
187
- st.markdown("**Sentence 2 Details:**")
188
  st.text(f"Original: {sentence2}")
189
  st.text(f"Normalized: {sentence2_normalized}")
190
  st.text(f"Embedding shape: {embedding2.shape}")
191
- st.text(f"Embedding norm: {np.linalg.norm(embedding2):.4f}")
192
 
193
- # Show first 10 dimensions of embedding
194
  st.markdown("**First 10 embedding dimensions:**")
195
  embedding_preview = embedding2[0][:10]
196
  for i, val in enumerate(embedding_preview):
197
  st.text(f"Dim {i}: {val:.4f}")
198
 
199
  st.markdown("---")
200
- st.markdown("**Embedding Statistics:**")
 
201
  col1, col2, col3 = st.columns(3)
202
 
203
  with col1:
204
  st.metric("Embedding Dimensions", "384")
205
- st.metric("Exact Similarity", f"{similarity:.6f}")
206
 
207
  with col2:
208
  st.metric("Rounded Similarity", f"{similarity_rounded:.2f}")
@@ -211,10 +246,10 @@ Note: This uses semantic embeddings, not TF-IDF, so the score reflects actual me
211
 
212
  with col3:
213
  # Calculate angle between vectors
214
- angle = np.arccos(np.clip(similarity, -1.0, 1.0))
215
  angle_degrees = np.degrees(angle)
216
- st.metric("Angle (degrees)", f"{angle_degrees:.2f}Β°")
217
- st.metric("Model", "all-MiniLM-L6-v2")
218
 
219
  # Save to history
220
  st.session_state.history.append({
@@ -224,12 +259,15 @@ Note: This uses semantic embeddings, not TF-IDF, so the score reflects actual me
224
  "explanation": explanation
225
  })
226
 
 
 
227
  else:
228
- st.error(f"API Error: {response.status_code}")
229
- st.error(response.text)
230
 
231
  except Exception as e:
232
- st.error(f"An error occurred: {str(e)}")
 
233
 
234
  # Display history
235
  if st.session_state.history:
@@ -237,33 +275,33 @@ if st.session_state.history:
237
  st.markdown("### πŸ“œ Previous Calculations")
238
 
239
  for i, item in enumerate(reversed(st.session_state.history[-5:])): # Show last 5
240
- with st.expander(f"'{item['sentence1']}' vs '{item['sentence2']}' - Score: {item['similarity']:.2f}"):
241
  st.markdown(item['explanation'])
242
 
243
  # Info box about semantic similarity
244
- with st.expander("ℹ️ Understanding Semantic Similarity"):
245
  st.markdown("""
246
- ### Semantic Similarity vs Word Overlap
247
-
248
- **Transformer-based embeddings** (like all-MiniLM-L6-v2) capture the **actual meaning** of sentences, not just word overlap.
249
 
250
- Examples:
251
- - "The car is fast" vs "The automobile is quick" β†’ High similarity (~0.90)
252
- - "I love dogs" vs "I hate dogs" β†’ Moderate similarity (~0.60) - similar topic, opposite sentiment
253
- - "You are pretty" vs "You are ugly" β†’ Moderate similarity (~0.40-0.50) - same structure, opposite meaning
254
- - "The cat sat on the mat" vs "Python is a programming language" β†’ Low similarity (~0.10)
 
255
 
256
- The model understands:
257
- - **Synonyms** (car/automobile, fast/quick)
258
- - **Context** (word meanings in sentences)
259
- - **Semantic relationships** (opposites, related concepts)
260
- - **Sentence structure** and grammatical patterns
261
  """)
262
 
263
  # Footer
264
  st.markdown("---")
265
  st.markdown("""
266
  <div style='text-align: center'>
267
- <p>Made with ❀️ using Streamlit | Powered by Sentence Transformers & OpenRouter API</p>
 
268
  </div>
269
- """, unsafe_allow_html=True)
 
57
  col1, col2 = st.columns(2)
58
 
59
  with col1:
60
+ sentence1 = st.text_input("Enter first sentence:", placeholder="e.g., you are hot")
61
 
62
  with col2:
63
+ sentence2 = st.text_input("Enter second sentence:", placeholder="e.g., you are cold")
64
 
65
  # Calculate button
66
  if st.button("🎯 Calculate & Explain", type="primary"):
 
83
  # Calculate cosine similarity
84
  similarity = cosine_similarity(embedding1, embedding2)[0][0]
85
 
86
+ # Convert to Python float to fix the progress bar error
87
+ similarity_float = float(similarity)
88
+ similarity_rounded = round(similarity_float, 2)
89
 
90
  # Display similarity score
91
  st.success(f"**Semantic similarity between:**")
92
  st.info(f'"{sentence1}" and "{sentence2}" β†’ **{similarity_rounded:.2f}**')
93
 
94
+ # Show similarity meter (fixed the float32 error)
 
95
  if similarity_rounded < 0.3:
 
96
  similarity_desc = "Low similarity"
97
  elif similarity_rounded < 0.7:
98
  similarity_desc = "Moderate similarity"
99
  else:
100
  similarity_desc = "High similarity"
101
 
102
+ # Convert to regular Python float for progress bar
103
+ st.progress(float(similarity_rounded), text=similarity_desc)
104
 
105
+ # Create a comprehensive prompt for the AI to explain WHY this specific score occurred
106
+ detailed_prompt = f"""You are an expert in Natural Language Processing and semantic similarity analysis using transformer-based embeddings.
107
 
108
+ I have calculated the semantic similarity between two sentences using the 'all-MiniLM-L6-v2' transformer model, which creates 384-dimensional vector embeddings that capture deep semantic meaning.
109
 
110
+ **ANALYSIS REQUEST:**
111
+ Sentence 1: "{sentence1}"
112
+ Sentence 2: "{sentence2}"
113
+ Cosine Similarity Score: {similarity_rounded:.2f}
114
 
115
+ Please provide a detailed explanation of WHY these two specific sentences resulted in a similarity score of {similarity_rounded:.2f}.
 
 
116
 
117
+ **Your analysis should cover:**
118
 
119
+ 1. **Score Interpretation**: What does {similarity_rounded:.2f} mean on the 0.00-1.00 scale? Is this low, moderate, or high similarity?
 
 
 
 
 
120
 
121
+ 2. **Semantic Analysis**:
122
+ - What are the key semantic elements in each sentence?
123
+ - What similarities did the transformer model detect?
124
+ - What differences contributed to the score not being higher/lower?
125
 
126
+ 3. **Linguistic Features**:
127
+ - Sentence structure patterns
128
+ - Word relationships (synonyms, antonyms, related concepts)
129
+ - Grammatical similarities
130
+ - Contextual meaning
131
+
132
+ 4. **Transformer Model Behavior**:
133
+ - How does all-MiniLM-L6-v2 process these sentences?
134
+ - What semantic features likely contributed most to this score?
135
+ - Why this score makes sense from a deep learning perspective
136
+
137
+ 5. **Intuitive Validation**: Does this {similarity_rounded:.2f} score match what a human would expect when comparing these sentences?
138
+
139
+ Please be specific about these exact sentences and this exact score of {similarity_rounded:.2f}. Explain the reasoning behind this particular numerical result."""
140
+
141
+ # Call OpenRouter API with the detailed prompt
142
+ with st.spinner("πŸ€– AI is analyzing why you got this specific similarity score..."):
143
  headers = {
144
  "Authorization": f"Bearer {api_key}",
145
  "Content-Type": "application/json",
146
+ "HTTP-Referer": "https://github.com/semantic-similarity-app",
147
  "X-Title": "Semantic Similarity Explainer"
148
  }
149
 
150
  data = {
151
  "model": "openai/gpt-3.5-turbo",
152
  "messages": [
153
+ {
154
+ "role": "system",
155
+ "content": "You are an expert NLP researcher specializing in transformer-based semantic similarity analysis. Provide detailed, educational explanations about how specific cosine similarity scores are generated by embedding models."
156
+ },
157
+ {
158
+ "role": "user",
159
+ "content": detailed_prompt
160
+ }
161
  ],
162
+ "temperature": 0.3, # Lower temperature for more focused explanations
163
+ "max_tokens": 800
164
  }
165
 
166
  response = requests.post(
 
174
  explanation = result['choices'][0]['message']['content']
175
 
176
  # Display results in tabs
177
+ tab1, tab2, tab3 = st.tabs(["πŸ€– AI Explanation", "πŸ“ Prompt Sent to AI", "πŸ”§ Technical Details"])
178
 
179
  with tab1:
180
+ st.markdown("### 🧠 Why You Got This Similarity Score")
181
+ st.markdown("**AI Analysis:**")
182
+
183
+ # Create a nice container for the AI explanation
184
+ with st.container():
185
+ st.markdown(f"""
186
+ <div style="background-color: #f0f2f6; padding: 20px; border-radius: 10px; border-left: 4px solid #1f77b4;">
187
+ {explanation}
188
+ </div>
189
+ """, unsafe_allow_html=True)
190
 
191
  with tab2:
192
+ st.markdown("### πŸ“€ Exact Prompt Sent to GPT-3.5-Turbo")
193
+ st.markdown("This is exactly what was sent to the AI to generate the explanation:")
194
+ st.code(detailed_prompt, language="text")
195
+
196
+ st.markdown("**API Details:**")
197
+ st.json({
198
+ "model": "openai/gpt-3.5-turbo",
199
+ "temperature": 0.3,
200
+ "max_tokens": 800,
201
+ "system_message": "You are an expert NLP researcher..."
202
+ })
203
 
204
  with tab3:
205
+ st.markdown("### πŸ”§ Technical Details")
206
 
207
  col1, col2 = st.columns(2)
208
 
209
  with col1:
210
+ st.markdown("**Sentence 1 Analysis:**")
211
  st.text(f"Original: {sentence1}")
212
  st.text(f"Normalized: {sentence1_normalized}")
213
  st.text(f"Embedding shape: {embedding1.shape}")
214
+ st.text(f"Embedding L2 norm: {np.linalg.norm(embedding1):.4f}")
215
 
 
216
  st.markdown("**First 10 embedding dimensions:**")
217
  embedding_preview = embedding1[0][:10]
218
  for i, val in enumerate(embedding_preview):
219
  st.text(f"Dim {i}: {val:.4f}")
220
 
221
  with col2:
222
+ st.markdown("**Sentence 2 Analysis:**")
223
  st.text(f"Original: {sentence2}")
224
  st.text(f"Normalized: {sentence2_normalized}")
225
  st.text(f"Embedding shape: {embedding2.shape}")
226
+ st.text(f"Embedding L2 norm: {np.linalg.norm(embedding2):.4f}")
227
 
 
228
  st.markdown("**First 10 embedding dimensions:**")
229
  embedding_preview = embedding2[0][:10]
230
  for i, val in enumerate(embedding_preview):
231
  st.text(f"Dim {i}: {val:.4f}")
232
 
233
  st.markdown("---")
234
+ st.markdown("**Similarity Computation Details:**")
235
+
236
  col1, col2, col3 = st.columns(3)
237
 
238
  with col1:
239
  st.metric("Embedding Dimensions", "384")
240
+ st.metric("Exact Similarity", f"{similarity_float:.6f}")
241
 
242
  with col2:
243
  st.metric("Rounded Similarity", f"{similarity_rounded:.2f}")
 
246
 
247
  with col3:
248
  # Calculate angle between vectors
249
+ angle = np.arccos(np.clip(similarity_float, -1.0, 1.0))
250
  angle_degrees = np.degrees(angle)
251
+ st.metric("Vector Angle (degrees)", f"{angle_degrees:.2f}Β°")
252
+ st.metric("Model Used", "all-MiniLM-L6-v2")
253
 
254
  # Save to history
255
  st.session_state.history.append({
 
259
  "explanation": explanation
260
  })
261
 
262
+ st.success("βœ… Analysis complete! Check the tabs above for detailed explanations.")
263
+
264
  else:
265
+ st.error(f"❌ API Error: {response.status_code}")
266
+ st.error(f"Response: {response.text}")
267
 
268
  except Exception as e:
269
+ st.error(f"❌ An error occurred: {str(e)}")
270
+ st.error("Please check your API key and internet connection.")
271
 
272
  # Display history
273
  if st.session_state.history:
 
275
  st.markdown("### πŸ“œ Previous Calculations")
276
 
277
  for i, item in enumerate(reversed(st.session_state.history[-5:])): # Show last 5
278
+ with st.expander(f"'{item['sentence1']}' vs '{item['sentence2']}' β†’ Score: {item['similarity']:.2f}"):
279
  st.markdown(item['explanation'])
280
 
281
  # Info box about semantic similarity
282
+ with st.expander("ℹ️ Understanding Semantic Similarity Scores"):
283
  st.markdown("""
284
+ ### How to Interpret Cosine Similarity Scores
 
 
285
 
286
+ **What the numbers mean:**
287
+ - **0.90 - 1.00**: Nearly identical meaning (e.g., "The car is fast" vs "The automobile is quick")
288
+ - **0.70 - 0.89**: High semantic similarity (e.g., "I love dogs" vs "I adore puppies")
289
+ - **0.50 - 0.69**: Moderate similarity (e.g., "You are hot" vs "You are cold" - same structure, opposite meaning)
290
+ - **0.30 - 0.49**: Low similarity (e.g., "I like pizza" vs "Mathematics is difficult")
291
+ - **0.00 - 0.29**: Very low similarity (e.g., "Hello world" vs "Quantum physics equations")
292
 
293
+ **Why transformer embeddings are powerful:**
294
+ - They understand **context** and **meaning**, not just word overlap
295
+ - They capture **relationships** between words (synonyms, antonyms, related concepts)
296
+ - They consider **sentence structure** and **grammatical patterns**
297
+ - They detect **semantic similarity** even with different words
298
  """)
299
 
300
  # Footer
301
  st.markdown("---")
302
  st.markdown("""
303
  <div style='text-align: center'>
304
+ <p>πŸš€ Made with ❀️ using Streamlit | Powered by Sentence Transformers & OpenRouter API</p>
305
+ <p><small>Each calculation automatically sends your sentences and similarity score to GPT-3.5-turbo for detailed analysis</small></p>
306
  </div>
307
+ """, unsafe_allow_html=True)