import streamlit as st import pandas as pd from pathlib import Path import numpy as np from openai import OpenAI from sentence_transformers import SentenceTransformer from sklearn.preprocessing import MinMaxScaler from io import StringIO @st.cache_resource def load_model(): """Loads the SentenceTransformer model and caches it.""" model = SentenceTransformer("all-MiniLM-L6-v2") return model # Load the model by calling the cached function model = load_model() # model = SentenceTransformer("all-MiniLM-L6-v2") minmax = MinMaxScaler() pillar_weights_csv = '''Pillar,Normalized Weight Student-centric,0.28 Digitally advanced,0.22 Fostering technologies to stay afloat,0.15 Scaling functionalities to create new value,0.20 Justifying value to design change,0.15 ''' def main(): # st.write('# Semantic Alignment App') tab_bp, tab_is, tab_matching = st.tabs(['Business Processes', 'Information Systems', 'BP-IS Matching'], default='BP-IS Matching') with tab_bp: # st.write('## Business Processes') bp_df = pd.read_csv('labelled_data/business_processes_quoted.csv') st.dataframe(bp_df) st.write(bp_df.shape) with tab_is: # st.write('## Information Systems') is_df = pd.read_csv('labelled_data/information_systems.csv') st.dataframe(is_df) st.markdown('''# New dataframe''') st.markdown(f'''{is_df.shape}''') with tab_matching: business_process_classes = bp_df['business_process_class'].unique() information_system_classes = is_df['information_system_class'].unique() # st.write(business_process_classes) # st.write(information_system_classes) embeddings_bp_classes = model.encode(business_process_classes) embeddings_is_classes = model.encode(information_system_classes) # st.write(embeddings_bp_classes.shape, embeddings_is_classes.shape) similarities = model.similarity(embeddings_bp_classes, embeddings_is_classes) similarities_minmax = minmax.fit_transform(similarities) # st.write('similarities.max()', similarities_minmax.max()) # bp, is # st.write('similarities.min()', similarities_minmax.min()) # bp, is # matrix = np.random.randn(len(business_process_classes), len(information_system_classes)) # matrix = minmax.fit_transform(matrix) * 100 matrix_df = pd.DataFrame(similarities_minmax, columns=information_system_classes , index=business_process_classes) st.write('### Alignment Matrix') st.dataframe(matrix_df) # st.write('### Bigger than 70') # matrix_df[matrix_df>70] st.write('### Max Score for each BP') max_col = similarities_minmax.max(1) max_col_arg = similarities_minmax.argmax(1) # information_system_classes[max_col], business_process_classes s = pd.DataFrame({'information_system_classes':information_system_classes[max_col_arg], 'business_process_classes':business_process_classes, 'score':max_col }) st.dataframe(s) st.write('### Pillars') pillar_weights = pd.read_csv(StringIO(pillar_weights_csv)) # step 1 st.write("### Step 1 (from ChatGPT)") st.dataframe(pillar_weights) unique_pillars = pillar_weights['Pillar'] is_bp_pairs = s.apply(lambda x: x['information_system_classes'] + ' ' + x['business_process_classes'], axis=1) embeddings_unique_pillars = model.encode(unique_pillars) embeddings_is_bp_pairs = model.encode(is_bp_pairs) similarities_pillars = model.similarity(embeddings_unique_pillars, embeddings_is_bp_pairs) similarities_pillars_minmax = minmax.fit_transform(similarities_pillars) similarities_pillars_minmax*=5 # st.write(similarities_pillars_minmax) similarities_pillars_df = pd.DataFrame(similarities_pillars_minmax) similarities_pillars_df.index = unique_pillars similarities_pillars_df.columns = is_bp_pairs # step 2 st.write("### Step 2 - BP-IS pair and Pillars (Embedding Model)") st.write(similarities_pillars_df.T) # step 3 st.write("### Step 3 - Normalized Strategic Score (*0