naveed92 commited on
Commit
f00881a
·
1 Parent(s): 41ba13c

add product and industry search functions

Browse files
Files changed (1) hide show
  1. app.py +78 -18
app.py CHANGED
@@ -16,13 +16,26 @@ def load_pandas_xlsx(path):
16
  data = pd.read_excel(path)
17
  return data
18
 
19
- # @st.cache_data
20
  @st.cache_data
21
  def build_company_df(input_df):
22
  # build company df
23
  output_df = input_df[['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
24
  return output_df
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def search_df(inp, df, col):
27
  mask = df[col].str.contains(inp, case=False, regex=False)
28
  select_df = df[mask]
@@ -48,8 +61,16 @@ st.success("Company Data Loaded!")
48
  ### Pre computation Steps ###
49
 
50
  # Pre compute unique number of companies per industry
51
- industry_to_counts = competitor_df[['company', 'companyLabel', 'companyLabelJA', 'industry', 'industryLabel', 'industryLabelJA']].drop_duplicates().groupby(['industry', 'industryLabel', 'industryLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
52
- industry_to_counts = industry_to_counts.rename(columns={'company': 'n_competitors'})
 
 
 
 
 
 
 
 
53
 
54
  ### end ###
55
 
@@ -146,7 +167,7 @@ if option == "By Company":
146
 
147
  competitors = competitor_df[competitor_df.industry == industry.industry][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
148
  st.dataframe(competitors)
149
- print("------")
150
 
151
  st.title("Analysis by country")
152
 
@@ -176,30 +197,69 @@ if option == "By Company":
176
  st.dataframe(competitors_by_country)
177
 
178
 
179
- # st.title("Competitors per industry")
 
 
 
 
 
 
 
 
180
 
181
- # for row in wikidata_industries.itertuples():
182
- # competitors = competitor_df[competitor_df.industry == row.industry][['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
183
- # st.write(f"For industry: {row.industryLabel}")
184
- # st.dataframe(competitors)
185
- # print("------")
 
 
186
 
 
 
187
 
188
- ##
 
 
189
 
190
- # https://docs.streamlit.io/develop/api-reference/widgets/st.selectbox
 
 
 
191
 
192
- # company_input = st.selectbox(label="Company", options=unique_companies, placeholder="Choose a company")
193
 
194
- # company_input = st.text_input("Company", "Enter company here")
195
 
196
- elif option == "By Industry":
197
-
198
- st.write("Industry search work in progress")
199
 
200
  elif option == "By Product":
201
 
202
- st.write("Product search work in progress")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  else:
205
 
 
16
  data = pd.read_excel(path)
17
  return data
18
 
 
19
  @st.cache_data
20
  def build_company_df(input_df):
21
  # build company df
22
  output_df = input_df[['companyLabel', 'companyLabelJA', 'company']].drop_duplicates()
23
  return output_df
24
 
25
+ @st.cache_data
26
+ def build_industry_df(input_df):
27
+ # Pre compute unique number of companies per industry
28
+ output_df = input_df[['company', 'companyLabel', 'companyLabelJA', 'industry', 'industryLabel', 'industryLabelJA']].drop_duplicates().groupby(['industry', 'industryLabel', 'industryLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
29
+ output_df = output_df.rename(columns={'company': 'n_competitors'})
30
+ return output_df
31
+
32
+ @st.cache_data
33
+ def build_product_df(input_df):
34
+ # Pre compute unique number of companies per product
35
+ output_df = input_df[['company', 'companyLabel', 'companyLabelJA', 'product', 'productLabel', 'productLabelJA']].drop_duplicates().groupby(['product', 'productLabel', 'productLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
36
+ output_df = output_df.rename(columns={'company': 'n_competitors'})
37
+ return output_df
38
+
39
  def search_df(inp, df, col):
40
  mask = df[col].str.contains(inp, case=False, regex=False)
41
  select_df = df[mask]
 
61
  ### Pre computation Steps ###
62
 
63
  # Pre compute unique number of companies per industry
64
+ # industry_to_counts = competitor_df[['company', 'companyLabel', 'companyLabelJA', 'industry', 'industryLabel', 'industryLabelJA']].drop_duplicates().groupby(['industry', 'industryLabel', 'industryLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
65
+ # industry_to_counts = industry_to_counts.rename(columns={'company': 'n_competitors'})
66
+
67
+ industry_to_counts = build_industry_df(competitor_df)
68
+
69
+ # Pre compute unique number of companies per industry
70
+ # product_to_counts = competitor_df[['company', 'companyLabel', 'companyLabelJA', 'product', 'productLabel', 'productLabelJA']].drop_duplicates().groupby(['product', 'productLabel', 'productLabelJA'])['company'].count().sort_values(ascending=False).reset_index().copy()
71
+ # product_to_counts = product_to_counts.rename(columns={'company': 'n_competitors'})
72
+
73
+ product_to_counts = build_product_df(competitor_df)
74
 
75
  ### end ###
76
 
 
167
 
168
  competitors = competitor_df[competitor_df.industry == industry.industry][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
169
  st.dataframe(competitors)
170
+ # print("------")
171
 
172
  st.title("Analysis by country")
173
 
 
197
  st.dataframe(competitors_by_country)
198
 
199
 
200
+ elif option == "By Industry":
201
+
202
+ st.title("Searching by Industry")
203
+
204
+ # Get input
205
+ industry_input = st_keyup("Enter an industry name", value="retail", key="1", debounce=500)
206
+
207
+ # Perform search
208
+ industry_select_df = search_df(industry_input, industry_to_counts, 'industryLabel')
209
 
210
+ # Show search results
211
+ with st.status("Searching ...", state="running", expanded=False) as status:
212
+ status.update(label=f"{len(industry_select_df)} results found", state="complete", expanded=True)
213
+
214
+ ### Selection for Industry ###
215
+ st.dataframe(industry_select_df, on_select="rerun", key="industry", selection_mode="single-row")
216
+ select_industry = st.session_state.industry
217
 
218
+ # expand if industry if selected
219
+ if len(select_industry['selection']['rows']) > 0:
220
 
221
+ industry_id = select_industry['selection']['rows'][0]
222
+
223
+ industry = industry_select_df.iloc[industry_id]
224
 
225
+ st.title(f"All Competitors for {industry.industryLabel}")
226
+
227
+ competitors = competitor_df[competitor_df.industry == industry.industry][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
228
+ st.dataframe(competitors)
229
 
 
230
 
 
231
 
 
 
 
232
 
233
  elif option == "By Product":
234
 
235
+ st.title("Searching by Product")
236
+
237
+ # Get input
238
+ product_input = st_keyup("Enter an product name", value="computer", key="2", debounce=500)
239
+
240
+ # Perform search
241
+ product_select_df = search_df(product_input, product_to_counts, 'productLabel')
242
+
243
+ # Show search results
244
+ with st.status("Searching ...", state="running", expanded=False) as status:
245
+ status.update(label=f"{len(product_select_df)} results found", state="complete", expanded=True)
246
+
247
+ ### Selection for Product ###
248
+ st.dataframe(product_select_df, on_select="rerun", key="product", selection_mode="single-row")
249
+ select_product = st.session_state.product
250
+
251
+ # expand if product if selected
252
+ if len(select_product['selection']['rows']) > 0:
253
+
254
+ product_id = select_product['selection']['rows'][0]
255
+
256
+ product = product_select_df.iloc[product_id]
257
+
258
+ st.title(f"All Competitors for {product.productLabel}")
259
+
260
+ competitors = competitor_df[competitor_df['product'] == product['product']][['companyLabel', 'companyLabelJA', 'company', 'country', 'countryLabel']].drop_duplicates().copy()
261
+ st.dataframe(competitors)
262
+
263
 
264
  else:
265