Spaces:

davoodwadi
/

semantic-alignment

Sleeping

davoodwadi commited on Oct 8, 2025

Commit

b093b7d

verified ·

1 Parent(s): 5829c81

Upload 2 files

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ PyYAML
 tqdm
 openai
 anthropic
-google-genai

 tqdm
 openai
 anthropic
+google-genai
+sentence-transformers

src/streamlit_app.py CHANGED Viewed

@@ -3,22 +3,47 @@ import pandas as pd
 from pathlib import Path
 import numpy as np
 from openai import OpenAI
 def main():
     st.write('# Semantic Alignment App')
-    st.write('## Business Processes')
-    bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
-    st.dataframe(bp_df)
-    st.write('## Information Systems')
-    is_df = pd.read_csv('labelled_data/information_systems.csv')
-    st.dataframe(is_df)

 from pathlib import Path
 import numpy as np
 from openai import OpenAI
+from sentence_transformers import SentenceTransformer
+@st.cache_resource
+def load_model():
+    """Loads the SentenceTransformer model and caches it."""
+    device = 'cpu'
+    model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+    return model
+# Load the model by calling the cached function
+model = load_model()
 def main():
     st.write('# Semantic Alignment App')
+    tab_bp, tab_is = st.tabs(['Business Processes', 'Information Systems'])
+    with tab_bp:
+        # st.write('## Business Processes')
+        bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
+        st.dataframe(bp_df)
+        # embeddings_bp = model.encode(bp_df['business_process'].values)
+        # embeddings_bp_classes = model.encode(bp_df['business_process_class'].values)
+        for i, row in bp_df.iterrows():
+            # st.write(row)
+            bpc, bp = row['business_process_class'], row['business_process']
+            embeddings_bp = model.encode(bp)
+            embeddings_bpc = model.encode(bpc)
+            st.write(embeddings_bp)
+            # bp_similarities = model.similarity(embeddings_bp, embeddings_bpc)
+            # st.write(bp_similarities)
+    with tab_is:
+        # st.write('## Information Systems')
+        is_df = pd.read_csv('labelled_data/information_systems.csv')
+        st.dataframe(is_df)