Spaces:

mlkorra
/

competitive-analysis

Runtime error

App Files Files Community

mlkorra commited on Aug 1, 2021

Commit

7dfe9b5

1 Parent(s): 96a99b3

Update App

Browse files

Files changed (1) hide show

app.py +52 -49

app.py CHANGED Viewed

@@ -23,23 +23,25 @@ import os
 def visualizer(prob_req, embed, df, index, company_name):
-    fname = 'topicmodel/saving_example.sav'
-    reducer= pickle.load((open(fname, 'rb'))) #load the umap dimensionality reduction model trained on rest of probablities
-    embed_req= reducer.transform(prob_req)
-    #add scatter plot for all embeddings from our dataset
-    fig1 = px.scatter(
-    embed, x=0, y=1,
-    color=df.iloc[index]['headquarters'], labels={'color': 'states'}, hover_name= df.iloc[index]['company_name'] + " with industry group: "+  df.iloc[index]['industry_groups'])
-    #add the data for users request and display
-    fig1.add_trace(
-    go.Scatter(
-        x=embed_req[:,0],
-        y=embed_req[:,1],
-        mode='markers',
-        marker_symbol="hexagon2", marker_size=15,
-        showlegend=True, name= company_name, hovertext= company_name))
-    st.plotly_chart(fig1)
 def clean_text(text):
@@ -67,30 +69,31 @@ def preprocess(name, group, state, states_used, desc):
 @st.cache(persist=True,suppress_st_warning=True)
 def load_topic_model(model_path, name, group, state, states_used, desc):
-    #load Bertopic
-    model=BERTopic.load(model_path)
-    #load dataset (used for creating scatter plot)
-    data_path = 'topicmodel/data.csv'
-    df = pd.read_csv(data_path)
-    #load embeddings reduced by UMAP for the points to be displayed by scatter plot
-    embeddings_path = 'topicmodel/embed.npy'
-    embeddings = np.load(embeddings_path)
-    #preprocess user inputs
-    request= preprocess(name, group, state, states_used, desc)
-    index=[]
-    #only select states that user wants to compare
-    for state_used in states_used:
-        index.extend(df.index[df['headquarters'].str.contains(state_used)].tolist())
-    select=embeddings[index]
-    #use bert topic to get probabilities
-    topic, prob_req= model.transform([request])
-    st.text("Modelling done! plotting results now...")
-    return topic, prob_req, select, df, index
 def app():
@@ -115,7 +118,7 @@ def app():
     #state= st.selectbox('Select state the company is based in', states)
     #states_used = st.multiselect('Select states you want to analyse', states)
-    examples = [['Coursera','Education','California',['California','Washington','Ohio'],'We are a social entrepreneurship company that partners with the top universities in the world to offer courses online for anyone to take, for free. We envision a future where the top universities are educating not only thousands of students, but millions. Our technology enables the best professors to teach tens or hundreds of thousands of students']]
     if check_examples:
         example = examples[0]
@@ -137,13 +140,13 @@ def app():
         states_used = st.multiselect('Select states you want to analyse', states)
     if(st.button("Analyse Competition")):
-            if companyname=="" or companydesc=="" or companygrp=="" or states_used==[]:
-                st.error("Some fields are empty!")
-            else:
-                model_path = 'topicmodel/my_model.pkl'
-                topic,prob_req,embed,df,index = load_topic_model(model_path, companyname, companygrp, state, states_used, companydesc)
-                visualizer(prob_req, embed, df, index, companyname)
 if __name__ == "__main__":

 def visualizer(prob_req, embed, df, index, company_name):
+    with st.spinner("Visualizing the results !!!"):
+        fname = 'topicmodel/saving_example.sav'
+        reducer= pickle.load((open(fname, 'rb'))) #load the umap dimensionality reduction model trained on rest of probablities
+        embed_req= reducer.transform(prob_req)
+        #add scatter plot for all embeddings from our dataset
+        fig1 = px.scatter(
+        embed, x=0, y=1,
+        color=df.iloc[index]['headquarters'], labels={'color': 'states'}, hover_name= df.iloc[index]['company_name'] + " with industry group: "+  df.iloc[index]['industry_groups'])
+        #add the data for users request and display
+        fig1.add_trace(
+        go.Scatter(
+            x=embed_req[:,0],
+            y=embed_req[:,1],
+            mode='markers',
+            marker_symbol="hexagon2", marker_size=15,
+            showlegend=True, name= company_name, hovertext= company_name))
+        st.plotly_chart(fig1)
 def clean_text(text):
 @st.cache(persist=True,suppress_st_warning=True)
 def load_topic_model(model_path, name, group, state, states_used, desc):
+    with st.spinner("Creating Topic Models ....."):
+        #load Bertopic
+        model=BERTopic.load(model_path)
+        #load dataset (used for creating scatter plot)
+        data_path = 'topicmodel/data.csv'
+        df = pd.read_csv(data_path)
+        #load embeddings reduced by UMAP for the points to be displayed by scatter plot
+        embeddings_path = 'topicmodel/embed.npy'
+        embeddings = np.load(embeddings_path)
+        #preprocess user inputs
+        request= preprocess(name, group, state, states_used, desc)
+        index=[]
+        #only select states that user wants to compare
+        for state_used in states_used:
+            index.extend(df.index[df['headquarters'].str.contains(state_used)].tolist())
+        select=embeddings[index]
+        #use bert topic to get probabilities
+        topic, prob_req= model.transform([request])
+        st.text("Modelling done! plotting results now...")
+        return topic, prob_req, select, df, index
 def app():
     #state= st.selectbox('Select state the company is based in', states)
     #states_used = st.multiselect('Select states you want to analyse', states)
+    examples = [['Coursera','Education','California',['California','New York','Ohio'],'We are a social entrepreneurship company that partners with the top universities in the world to offer courses online for anyone to take, for free. We envision a future where the top universities are educating not only thousands of students, but millions. Our technology enables the best professors to teach tens or hundreds of thousands of students']]
     if check_examples:
         example = examples[0]
         states_used = st.multiselect('Select states you want to analyse', states)
     if(st.button("Analyse Competition")):
+        if companyname=="" or companydesc=="" or companygrp=="" or states_used==[]:
+            st.error("Some fields are empty!")
+        else:
+            model_path = 'topicmodel/my_model.pkl'
+            topic,prob_req,embed,df,index = load_topic_model(model_path, companyname, companygrp, state, states_used, companydesc)
+            visualizer(prob_req, embed, df, index, companyname)
 if __name__ == "__main__":