Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,15 @@ from Levenshtein import distance as levenshtein_distance
|
|
| 9 |
from textdistance import jaro_winkler, damerau_levenshtein, cosine
|
| 10 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 11 |
from sklearn.preprocessing import normalize
|
|
|
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# -----------------------
|
| 15 |
# ℹ️ PROJECT INFORMATION & EDUCATION SECTION
|
|
@@ -80,6 +88,23 @@ if option == "String Similarity":
|
|
| 80 |
st.write(f"**Cosine Similarity:** {cosine_sim:.4f}")
|
| 81 |
st.write(f"**Q-Gram Similarity:** {qgram_sim:.4f}")
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# -----------------------
|
| 84 |
# 2️⃣ MODEL CALIBRATION (RELIABILITY DIAGRAM)
|
| 85 |
# -----------------------
|
|
|
|
| 9 |
from textdistance import jaro_winkler, damerau_levenshtein, cosine
|
| 10 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 11 |
from sklearn.preprocessing import normalize
|
| 12 |
+
import pandas as pd
|
| 13 |
|
| 14 |
+
# -----------------------
|
| 15 |
+
# 🎨 Streamlit App Layout
|
| 16 |
+
# -----------------------
|
| 17 |
+
|
| 18 |
+
st.title("🔍 String Similarity & Model Calibration App")
|
| 19 |
+
st.sidebar.header("📌 Select an Option")
|
| 20 |
+
option = st.sidebar.radio("Choose a Task:", ["String Similarity", "Model Calibration"])
|
| 21 |
|
| 22 |
# -----------------------
|
| 23 |
# ℹ️ PROJECT INFORMATION & EDUCATION SECTION
|
|
|
|
| 88 |
st.write(f"**Cosine Similarity:** {cosine_sim:.4f}")
|
| 89 |
st.write(f"**Q-Gram Similarity:** {qgram_sim:.4f}")
|
| 90 |
|
| 91 |
+
# -----------------------
|
| 92 |
+
# 📊 STRING SIMILARITY EXAMPLES TABLE
|
| 93 |
+
# -----------------------
|
| 94 |
+
|
| 95 |
+
st.subheader("📊 Example Word Comparisons")
|
| 96 |
+
data = {
|
| 97 |
+
"Word 1": ["MARTHA", "HOUSE", "SUNDAY", "NIGHT", "FLIGHT"],
|
| 98 |
+
"Word 2": ["MARHTA", "HORSE", "MONDAY", "KNIGHT", "FIGHT"],
|
| 99 |
+
"Levenshtein Distance": [1, 2, 2, 2, 1],
|
| 100 |
+
"Jaro-Winkler Similarity": [0.9611, 0.8375, 0.8222, 0.9444, 0.9740],
|
| 101 |
+
"Damerau-Levenshtein Distance": [1, 1, 2, 1, 1],
|
| 102 |
+
"Cosine Similarity": [0.8333, 0.7500, 0.6667, 0.8000, 0.9500],
|
| 103 |
+
"Q-Gram Similarity": [0.8571, 0.7143, 0.6667, 0.7778, 0.9231],
|
| 104 |
+
}
|
| 105 |
+
df = pd.DataFrame(data)
|
| 106 |
+
st.table(df)
|
| 107 |
+
|
| 108 |
# -----------------------
|
| 109 |
# 2️⃣ MODEL CALIBRATION (RELIABILITY DIAGRAM)
|
| 110 |
# -----------------------
|