davoodwadi commited on
Commit
b093b7d
·
verified ·
1 Parent(s): 5829c81

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/streamlit_app.py +32 -7
requirements.txt CHANGED
@@ -6,4 +6,5 @@ PyYAML
6
  tqdm
7
  openai
8
  anthropic
9
- google-genai
 
 
6
  tqdm
7
  openai
8
  anthropic
9
+ google-genai
10
+ sentence-transformers
src/streamlit_app.py CHANGED
@@ -3,22 +3,47 @@ import pandas as pd
3
  from pathlib import Path
4
  import numpy as np
5
  from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def main():
8
  st.write('# Semantic Alignment App')
9
 
10
- st.write('## Business Processes')
11
- bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
12
- st.dataframe(bp_df)
13
 
14
- st.write('## Information Systems')
15
- is_df = pd.read_csv('labelled_data/information_systems.csv')
16
- st.dataframe(is_df)
17
-
18
 
 
19
 
 
 
 
 
20
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
22
 
23
 
24
 
 
3
  from pathlib import Path
4
  import numpy as np
5
  from openai import OpenAI
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ @st.cache_resource
9
+ def load_model():
10
+ """Loads the SentenceTransformer model and caches it."""
11
+ device = 'cpu'
12
+ model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
13
+ return model
14
+
15
+ # Load the model by calling the cached function
16
+ model = load_model()
17
 
18
  def main():
19
  st.write('# Semantic Alignment App')
20
 
 
 
 
21
 
 
 
 
 
22
 
23
+ tab_bp, tab_is = st.tabs(['Business Processes', 'Information Systems'])
24
 
25
+ with tab_bp:
26
+ # st.write('## Business Processes')
27
+ bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv')
28
+ st.dataframe(bp_df)
29
 
30
+ # embeddings_bp = model.encode(bp_df['business_process'].values)
31
+ # embeddings_bp_classes = model.encode(bp_df['business_process_class'].values)
32
+ for i, row in bp_df.iterrows():
33
+ # st.write(row)
34
+ bpc, bp = row['business_process_class'], row['business_process']
35
+ embeddings_bp = model.encode(bp)
36
+ embeddings_bpc = model.encode(bpc)
37
+
38
+ st.write(embeddings_bp)
39
+ # bp_similarities = model.similarity(embeddings_bp, embeddings_bpc)
40
+ # st.write(bp_similarities)
41
 
42
+ with tab_is:
43
+ # st.write('## Information Systems')
44
+ is_df = pd.read_csv('labelled_data/information_systems.csv')
45
+ st.dataframe(is_df)
46
+
47
 
48
 
49