Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- requirements.txt +2 -1
- src/streamlit_app.py +32 -7
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ PyYAML
|
|
| 6 |
tqdm
|
| 7 |
openai
|
| 8 |
anthropic
|
| 9 |
-
google-genai
|
|
|
|
|
|
| 6 |
tqdm
|
| 7 |
openai
|
| 8 |
anthropic
|
| 9 |
+
google-genai
|
| 10 |
+
sentence-transformers
|
src/streamlit_app.py
CHANGED
|
@@ -3,22 +3,47 @@ import pandas as pd
|
|
| 3 |
from pathlib import Path
|
| 4 |
import numpy as np
|
| 5 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def main():
|
| 8 |
st.write('# Semantic Alignment App')
|
| 9 |
|
| 10 |
-
st.write('## Business Processes')
|
| 11 |
-
bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
|
| 12 |
-
st.dataframe(bp_df)
|
| 13 |
|
| 14 |
-
st.write('## Information Systems')
|
| 15 |
-
is_df = pd.read_csv('labelled_data/information_systems.csv')
|
| 16 |
-
st.dataframe(is_df)
|
| 17 |
-
|
| 18 |
|
|
|
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
import numpy as np
|
| 5 |
from openai import OpenAI
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
|
| 8 |
+
@st.cache_resource
|
| 9 |
+
def load_model():
|
| 10 |
+
"""Loads the SentenceTransformer model and caches it."""
|
| 11 |
+
device = 'cpu'
|
| 12 |
+
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
|
| 13 |
+
return model
|
| 14 |
+
|
| 15 |
+
# Load the model by calling the cached function
|
| 16 |
+
model = load_model()
|
| 17 |
|
| 18 |
def main():
|
| 19 |
st.write('# Semantic Alignment App')
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
tab_bp, tab_is = st.tabs(['Business Processes', 'Information Systems'])
|
| 24 |
|
| 25 |
+
with tab_bp:
|
| 26 |
+
# st.write('## Business Processes')
|
| 27 |
+
bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
|
| 28 |
+
st.dataframe(bp_df)
|
| 29 |
|
| 30 |
+
# embeddings_bp = model.encode(bp_df['business_process'].values)
|
| 31 |
+
# embeddings_bp_classes = model.encode(bp_df['business_process_class'].values)
|
| 32 |
+
for i, row in bp_df.iterrows():
|
| 33 |
+
# st.write(row)
|
| 34 |
+
bpc, bp = row['business_process_class'], row['business_process']
|
| 35 |
+
embeddings_bp = model.encode(bp)
|
| 36 |
+
embeddings_bpc = model.encode(bpc)
|
| 37 |
+
|
| 38 |
+
st.write(embeddings_bp)
|
| 39 |
+
# bp_similarities = model.similarity(embeddings_bp, embeddings_bpc)
|
| 40 |
+
# st.write(bp_similarities)
|
| 41 |
|
| 42 |
+
with tab_is:
|
| 43 |
+
# st.write('## Information Systems')
|
| 44 |
+
is_df = pd.read_csv('labelled_data/information_systems.csv')
|
| 45 |
+
st.dataframe(is_df)
|
| 46 |
+
|
| 47 |
|
| 48 |
|
| 49 |
|